-
Notifications
You must be signed in to change notification settings - Fork 163
Faster parsing of GC(id) #463
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -30,11 +30,6 @@ public abstract class GCLogParser implements DataSourceParser, SharedPatterns { | |||||||||||||||||||
| */ | ||||||||||||||||||||
| public static final String END_OF_DATA_SENTINEL = GCLogFile.END_OF_DATA_SENTINEL; | ||||||||||||||||||||
|
|
||||||||||||||||||||
| // TODO: GCID_COUNTER should be in SharedPatterns, not here. | ||||||||||||||||||||
| /** | ||||||||||||||||||||
| * Rule for parsing the GCID counter. | ||||||||||||||||||||
| */ | ||||||||||||||||||||
| public static final GCParseRule GCID_COUNTER = new GCParseRule("GCID_COUNTER", " GC\\((\\d+)\\) "); | ||||||||||||||||||||
| private JVMEventChannel consumer; | ||||||||||||||||||||
| protected Diary diary; | ||||||||||||||||||||
| private DateTimeStamp clock = new DateTimeStamp(DateTimeStamp.EPOC, 0.0d); | ||||||||||||||||||||
|
|
@@ -246,9 +241,54 @@ MemoryPoolSummary extractPermGenRecord(GCLogTrace trace) { | |||||||||||||||||||
| * @param line the line to parse. | ||||||||||||||||||||
| * @return the extracted GCID, or -1 if not found. | ||||||||||||||||||||
| */ | ||||||||||||||||||||
| int extractGCID(String line) { | ||||||||||||||||||||
| GCLogTrace trace = GCID_COUNTER.parse(line); | ||||||||||||||||||||
| return (trace != null) ? trace.getIntegerGroup(1) : -1; | ||||||||||||||||||||
| static int extractGCID(String line) { | ||||||||||||||||||||
| long packed = extractGCCycleIdAndTextualLength(line); | ||||||||||||||||||||
| if (packed == -1) { | ||||||||||||||||||||
| return -1; | ||||||||||||||||||||
| } | ||||||||||||||||||||
| return extractGCCycleId(packed); | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| /** | ||||||||||||||||||||
| * Returns a packed long containing two ints: | ||||||||||||||||||||
| * - the GC cycle id in the high bytes | ||||||||||||||||||||
| * - the length of the text containing the GC cycle id,e.g. 'GC(10)' | ||||||||||||||||||||
karianna marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||||||||
| * See {@link #extractGCCycleId(long)} and {@link #extractGCCycleIdTextualLength(long)} | ||||||||||||||||||||
| */ | ||||||||||||||||||||
| protected static long extractGCCycleIdAndTextualLength(String line) { | ||||||||||||||||||||
| if (!line.contains("GC(")) { | ||||||||||||||||||||
| return -1; | ||||||||||||||||||||
| } | ||||||||||||||||||||
| // [2025-10-21T16:44:29.311+0200][3645.640s] GC(35) Pause Young (Allocation Failure) | ||||||||||||||||||||
| // we search for the value between parenthesis | ||||||||||||||||||||
| int start = line.indexOf('('); | ||||||||||||||||||||
|
Comment on lines
+263
to
+264
|
||||||||||||||||||||
| // we search for the value between parenthesis | |
| int start = line.indexOf('('); | |
| // we search for the value between parenthesis, specifically the one from "GC(" | |
| int gcMarker = line.indexOf("GC("); | |
| if (gcMarker == -1) { | |
| return -1; | |
| } | |
| // gcMarker points to 'G', so gcMarker + 2 is the index of '(' in "GC(" | |
| int start = gcMarker + 2; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if we should actually try to recover from this and just process the next line...
Copilot
AI
Jan 21, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The error handling here is inconsistent with the test expectations. When the input is malformed (e.g., missing closing parenthesis), the test expects the method to return -1, but this catch block will throw a RuntimeException instead. The catch block should return -1 for malformed input to match the expected behavior and maintain consistent error handling with the rest of the method.
| throw new RuntimeException("Failed to extract gc cycle id from " + line, e); | |
| Logger.getLogger(GCLogParser.class.getName()) | |
| .log(Level.FINE, "Failed to extract GC cycle id from line: " + line, e); | |
| return -1; |
Copilot
AI
Jan 21, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing JavaDoc documentation for this method. Add JavaDoc that explains the method's purpose, parameters, and return value. For example: "Extracts the GC cycle ID from a packed long value. @param packedGcCycleIdAndEnd the packed long containing the GC cycle ID in the upper 32 bits @return the GC cycle ID, or -1 if the packed value is -1".
This issue also appears in the following locations of the same file:
- line 287
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -4,11 +4,7 @@ | |||
|
|
||||
| import com.microsoft.gctoolkit.GCToolKit; | ||||
| import com.microsoft.gctoolkit.aggregator.EventSource; | ||||
| import com.microsoft.gctoolkit.event.CPUSummary; | ||||
| import com.microsoft.gctoolkit.event.GarbageCollectionTypes; | ||||
| import com.microsoft.gctoolkit.event.MalformedEvent; | ||||
| import com.microsoft.gctoolkit.event.MemoryPoolSummary; | ||||
| import com.microsoft.gctoolkit.event.RegionSummary; | ||||
| import com.microsoft.gctoolkit.event.*; | ||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Prefer explicit imports
|
||||
| import com.microsoft.gctoolkit.event.*; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bad bot suggested edit but the comment is good.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| package com.microsoft.gctoolkit.parser; | ||
|
|
||
| import org.junit.jupiter.api.Test; | ||
|
|
||
| import static org.junit.jupiter.api.Assertions.*; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Prefer explicit imports |
||
|
|
||
| class GCLogParserTest { | ||
| @Test | ||
karianna marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| void extractPackedGCCycleIdAndTextualLength() { | ||
| long packed = GCLogParser.extractGCCycleIdAndTextualLength("[2025-10-21T16:44:29.311+0200][3645.640s] GC(35) Pause Young (Allocation Failure)"); | ||
| assertEquals(35, GCLogParser.extractGCCycleId(packed)); | ||
| assertEquals(48, GCLogParser.extractGCCycleIdTextualLength(packed)); | ||
| } | ||
|
|
||
| @Test | ||
| void extractGCID() { | ||
| assertEquals(35, GCLogParser.extractGCID("[2025-10-21T16:44:29.311+0200][3645.640s] GC(35) Pause Young (Allocation Failure)")); | ||
| } | ||
|
Comment on lines
+16
to
+19
|
||
|
|
||
| @Test | ||
| void extractPackedGCCycleIdAndTextualLength_malformed() { | ||
| long packed = GCLogParser.extractGCCycleIdAndTextualLength("[2025-10-21T16:44:29.311+0200][3645.640s] GC(3"); | ||
| assertEquals(-1 , packed); | ||
| assertEquals(-1, GCLogParser.extractGCCycleId(packed)); | ||
| assertEquals(0, GCLogParser.extractGCCycleIdTextualLength(packed)); | ||
| } | ||
|
|
||
| @Test | ||
| void extractGCID_malformed() { | ||
| assertEquals(-1, GCLogParser.extractGCID("[2025-10-21T16:44:29.311+0200][3645.640s] GC(3")); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit-pick - Perhaps extract -1 to a constant called NOT_FOUND or something similar?