-
Notifications
You must be signed in to change notification settings - Fork 169
Fix flaky test #2469
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Fix flaky test #2469
Changes from all commits
b963213
a8ba1cb
b065a8d
30aa33a
cd3b46b
3765386
373a947
7ecda2d
6678fc6
40834c4
500018f
7101cfb
1dbda97
8fa0815
1b5207c
da32568
2113a0a
010cac8
126ef4b
1c85f06
0bfc257
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -43,6 +43,7 @@ | |
| import java.util.concurrent.ScheduledExecutorService; | ||
| import java.util.concurrent.TimeUnit; | ||
| import java.util.concurrent.locks.LockSupport; | ||
| import java.util.concurrent.locks.ReentrantLock; | ||
| import java.util.function.Supplier; | ||
| import java.util.logging.Level; | ||
| import java.util.logging.Logger; | ||
|
|
@@ -153,6 +154,7 @@ public class SamplingProfiler implements Runnable { | |
| @Nullable private final File tempDir; | ||
|
|
||
| private final AsyncProfiler profiler; | ||
| private final ReentrantLock profilerLock = new ReentrantLock(); | ||
| @Nullable private volatile Future<?> profilingTask; | ||
|
|
||
| /** | ||
|
|
@@ -317,6 +319,9 @@ public boolean onActivation(Span activeSpan, @Nullable Span previouslyActive) { | |
| if (previouslyActive == null) { | ||
| profiler.addThread(Thread.currentThread()); | ||
| } | ||
| if (!config.isPostProcessingEnabled()) { | ||
| return true; | ||
| } | ||
| boolean success = | ||
| eventBuffer.tryPublishEvent(activationEventTranslator, activeSpan, previouslyActive); | ||
| if (!success) { | ||
|
|
@@ -343,6 +348,9 @@ public boolean onDeactivation(Span deactivatedSpan, @Nullable Span previouslyAct | |
| if (previouslyActive == null) { | ||
| profiler.removeThread(Thread.currentThread()); | ||
| } | ||
| if (!config.isPostProcessingEnabled()) { | ||
| return true; | ||
| } | ||
| boolean success = | ||
| eventBuffer.tryPublishEvent( | ||
| deactivationEventTranslator, deactivatedSpan, previouslyActive); | ||
|
|
@@ -373,7 +381,9 @@ public void run() { | |
| Duration profilingDuration = config.getProfilingDuration(); | ||
| boolean postProcessingEnabled = config.isPostProcessingEnabled(); | ||
|
|
||
| setProfilingSessionOngoing(postProcessingEnabled); | ||
| // We need to enable the session so that onActivation is called and threads are added to the | ||
| // profiler (profiler.addThread). Otherwise, with the "filter" option, nothing is profiled. | ||
| setProfilingSessionOngoing(true); | ||
|
|
||
| if (postProcessingEnabled) { | ||
| logger.fine("Start full profiling session (async-profiler and agent processing)"); | ||
|
|
@@ -394,37 +404,62 @@ public void run() { | |
| config.isNonStopProfiling() && !interrupted && postProcessingEnabled; | ||
jackshirazi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| setProfilingSessionOngoing(continueProfilingSession); | ||
|
|
||
| if (!interrupted && !scheduler.isShutdown()) { | ||
| long delay = config.getProfilingInterval().toMillis() - profilingDuration.toMillis(); | ||
| profilingTask = scheduler.schedule(this, delay, TimeUnit.MILLISECONDS); | ||
| profilerLock.lock(); | ||
| try { | ||
| // it's possible for an interruption to occur just before the lock was acquired. This is | ||
| // handled by re-reading Thread.currentThread().isInterrupted() to ensure no task is scheduled | ||
| // if an interruption occurred just before acquiring the lock | ||
| if (!Thread.currentThread().isInterrupted() && !scheduler.isShutdown()) { | ||
| long delay = config.getProfilingInterval().toMillis() - profilingDuration.toMillis(); | ||
| profilingTask = scheduler.schedule(this, delay, TimeUnit.MILLISECONDS); | ||
| } | ||
| } finally { | ||
| profilerLock.unlock(); | ||
jackshirazi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
|
|
||
| @SuppressWarnings({"NonAtomicVolatileUpdate", "EmptyCatch"}) | ||
| private void profile(Duration profilingDuration) throws Exception { | ||
| try { | ||
| String startCommand = createStartCommand(); | ||
| String startMessage = profiler.execute(startCommand); | ||
| String startMessage; | ||
| try { | ||
| startMessage = profiler.execute(startCommand); | ||
| } catch (IllegalStateException e) { | ||
| if (e.getMessage() != null && e.getMessage().contains("already started")) { | ||
| logger.fine("Profiler already started. Stopping and restarting."); | ||
| try { | ||
| profiler.stop(); | ||
| } catch (RuntimeException ignore) { | ||
| logger.log(Level.FINE, "Ignored error on stopping profiler", ignore); | ||
| } | ||
| startMessage = profiler.execute(startCommand); | ||
| } else { | ||
| throw e; | ||
| } | ||
|
Comment on lines
+429
to
+439
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is is a known failure mode of async profiler here ? If so, then maybe having a dedicated method to wrap this "when exception is thrown stop, start and try again" logic could make it slightly more readeable.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is testing fragility under concurrent test runs, I'd be surprised to see it get hit in production. Especially as dynamic updates to the test durations will be few and far in between in reality |
||
| } | ||
| logger.fine(startMessage); | ||
| if (!profiledThreads.isEmpty()) { | ||
| restoreFilterState(profiler); | ||
| try { | ||
| // try-finally because if the code is interrupted we want to ensure the | ||
| // profiler.execute("stop") is called | ||
| if (!profiledThreads.isEmpty()) { | ||
| restoreFilterState(profiler); | ||
| } | ||
| // Doesn't need to be atomic as this field is being updated only by a single thread | ||
| profilingSessions++; | ||
|
|
||
| // When post-processing is disabled activation events are ignored, but we still need to | ||
| // invoke this method as it is the one enforcing the sampling session duration. As a side | ||
| // effect it will also consume residual activation events if post-processing is disabled | ||
| // dynamically | ||
| consumeActivationEventsFromRingBufferAndWriteToFile(profilingDuration); | ||
| } finally { | ||
| String stopMessage = profiler.execute("stop"); | ||
| logger.fine(stopMessage); | ||
jackshirazi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| // Doesn't need to be atomic as this field is being updated only by a single thread | ||
| profilingSessions++; | ||
|
|
||
| // When post-processing is disabled activation events are ignored, but we still need to invoke | ||
| // this method | ||
| // as it is the one enforcing the sampling session duration. As a side effect it will also | ||
| // consume | ||
| // residual activation events if post-processing is disabled dynamically | ||
| consumeActivationEventsFromRingBufferAndWriteToFile(profilingDuration); | ||
|
|
||
| String stopMessage = profiler.execute("stop"); | ||
| logger.fine(stopMessage); | ||
|
|
||
| // When post-processing is disabled, jfr file will not be parsed and the heavy processing will | ||
| // not occur | ||
| // as this method aborts when no activation events are buffered | ||
| // not occur as this method aborts when no activation events are buffered | ||
| processTraces(); | ||
| } catch (InterruptedException | ClosedByInterruptException e) { | ||
| try { | ||
|
|
@@ -505,6 +540,9 @@ EventPoller.PollState consumeActivationEventsFromRingBufferAndWriteToFile() thro | |
| } | ||
|
|
||
| public void processTraces() throws IOException { | ||
| if (!config.isPostProcessingEnabled()) { | ||
| return; | ||
| } | ||
| if (jfrParser == null) { | ||
| jfrParser = new JfrParser(); | ||
| } | ||
|
|
@@ -739,18 +777,25 @@ public void start() { | |
|
|
||
| @SuppressWarnings({"FutureReturnValueIgnored", "Interruption"}) | ||
| public void reschedule() { | ||
| Future<?> future = this.profilingTask; | ||
| if (future != null) { | ||
| if (future.cancel(true)) { | ||
| profilerLock.lock(); | ||
| try { | ||
| Future<?> future = this.profilingTask; | ||
| if (future != null && future.cancel(true)) { | ||
| Duration profilingDuration = config.getProfilingDuration(); | ||
| long delay = config.getProfilingInterval().toMillis() - profilingDuration.toMillis(); | ||
| profilingTask = scheduler.schedule(this, delay, TimeUnit.MILLISECONDS); | ||
| } | ||
| } finally { | ||
| profilerLock.unlock(); | ||
jackshirazi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
|
|
||
| @SuppressWarnings({"FutureReturnValueIgnored", "Interruption"}) | ||
| public void stop() throws InterruptedException, IOException { | ||
| // cancels/interrupts the profiling thread | ||
| if (profilingTask != null) { | ||
| profilingTask.cancel(true); | ||
| } | ||
|
Comment on lines
+796
to
+798
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [minor] I would suggest to also wrap the access to
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. leaving this for another time |
||
| // implicitly clears profiled threads | ||
| scheduler.shutdown(); | ||
| scheduler.awaitTermination(10, TimeUnit.SECONDS); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -93,6 +93,9 @@ public void parse( | |
| this.includedClasses = includedClasses; | ||
| bufferedFile.setFile(file); | ||
| long fileSize = bufferedFile.size(); | ||
| if (fileSize == 0) { | ||
| return; | ||
| } | ||
|
Comment on lines
+96
to
+98
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the case of empty file covered by any existing test ? I haven't seen any change related to this in the tests.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good point, test added now |
||
|
|
||
| int chunkSize = readChunk(0); | ||
| if (chunkSize < fileSize) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,7 +22,6 @@ | |
| import java.lang.reflect.Method; | ||
| import java.nio.file.Files; | ||
| import java.nio.file.Path; | ||
| import java.nio.file.Paths; | ||
| import java.time.Duration; | ||
| import java.util.List; | ||
| import java.util.Optional; | ||
|
|
@@ -44,6 +43,11 @@ | |
| @DisabledOnOpenJ9 | ||
| class SamplingProfilerTest { | ||
|
|
||
| static { | ||
| // Needed to ensure ordering because tests things out of order | ||
| ProfilingActivationListener.ensureInitialized(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is the benefit of explicitly calling this in the tests and not in production code ? is there any benefit of doing this and if yes maybe a comment could be welcome.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the ProfilingActivationListener has a static initializer which has to be executed before any Context access. This happens in production, but is not guaranteed in test because we start things out of order to setup tests |
||
| } | ||
|
|
||
| private ProfilerTestSetup setup; | ||
|
|
||
| @TempDir private Path tempDir; | ||
|
|
@@ -58,12 +62,6 @@ void tearDown() { | |
|
|
||
| @Test | ||
| void shouldLazilyCreateTempFilesAndCleanThem() { | ||
| for (Path file : getProfilerTempFiles()) { | ||
| if (!file.toFile().delete()) { | ||
| throw new IllegalStateException("Could not delete temp file: " + file); | ||
| } | ||
| } | ||
|
|
||
| // temporary files should be created on-demand, and properly deleted afterwards | ||
| setupProfiler(false); | ||
|
|
||
|
|
@@ -91,9 +89,8 @@ void shouldLazilyCreateTempFilesAndCleanThem() { | |
| .isEmpty(); | ||
| } | ||
|
|
||
| private static List<Path> getProfilerTempFiles() { | ||
| Path tempFolder = Paths.get(System.getProperty("java.io.tmpdir")); | ||
| try (Stream<Path> files = Files.list(tempFolder)) { | ||
| private List<Path> getProfilerTempFiles() { | ||
| try (Stream<Path> files = Files.list(tempDir)) { | ||
| return files | ||
| .filter(f -> f.getFileName().toString().startsWith("otel-inferred-")) | ||
| .sorted() | ||
|
|
@@ -327,7 +324,8 @@ private void setupProfiler(Consumer<InferredSpansProcessorBuilder> configCustomi | |
| config | ||
| .profilingDuration(Duration.ofMillis(500)) | ||
| .profilerInterval(Duration.ofMillis(500)) | ||
| .samplingInterval(Duration.ofMillis(5)); | ||
| .samplingInterval(Duration.ofMillis(5)) | ||
| .tempDir(tempDir.toFile()); | ||
| configCustomizer.accept(config); | ||
| }); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[minor] not related to this PR, but we could probably replace usage of
FilewithPathto prevent having to calltoFile.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
leaving this for another time