-
Notifications
You must be signed in to change notification settings - Fork 383
Add --diagnostic-port option to dotnet-stack #5592
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 10 commits
47ce0d2
682a87d
1c97f11
07c8f6c
a72e921
877a49d
ea77b81
87df662
3a3a765
7e8ef6b
79364ff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,22 +27,44 @@ internal static class ReportCommandHandler | |
| /// <param name="processId">The process to report the stack from.</param> | ||
| /// <param name="name">The name of process to report the stack from.</param> | ||
| /// <param name="duration">The duration of to trace the target for. </param> | ||
| /// <param name="diagnosticPort">The diagnostic port to connect to.</param> | ||
| /// <returns></returns> | ||
| private static async Task<int> Report(CancellationToken ct, TextWriter stdOutput, TextWriter stdError, int processId, string name, TimeSpan duration) | ||
| private static async Task<int> Report(CancellationToken ct, TextWriter stdOutput, TextWriter stdError, int processId, string name, TimeSpan duration, string diagnosticPort) | ||
| { | ||
| string tempNetTraceFilename = Path.Join(Path.GetTempPath(), Path.GetRandomFileName() + ".nettrace"); | ||
| string tempEtlxFilename = ""; | ||
|
|
||
| try | ||
| { | ||
| // Either processName or processId has to be specified. | ||
| // Validate that only one of processId, name, or diagnosticPort is specified | ||
| int optionCount = 0; | ||
| if (processId != 0) | ||
| { | ||
| optionCount++; | ||
| } | ||
| if (!string.IsNullOrEmpty(name)) | ||
| { | ||
| optionCount++; | ||
| } | ||
| if (!string.IsNullOrEmpty(diagnosticPort)) | ||
| { | ||
| optionCount++; | ||
| } | ||
|
|
||
| if (optionCount == 0) | ||
| { | ||
| stdError.WriteLine("--process-id, --name, or --diagnostic-port is required"); | ||
| return -1; | ||
| } | ||
| else if (optionCount > 1) | ||
| { | ||
| stdError.WriteLine("Only one of --process-id, --name, or --diagnostic-port can be specified"); | ||
| return -1; | ||
| } | ||
|
|
||
| // Resolve process name to ID if needed | ||
| if (!string.IsNullOrEmpty(name)) | ||
| { | ||
| if (processId != 0) | ||
| { | ||
| Console.WriteLine("Can only specify either --name or --process-id option."); | ||
| return -1; | ||
| } | ||
| processId = CommandUtils.FindProcessIdWithName(name); | ||
| if (processId < 0) | ||
| { | ||
|
|
@@ -55,91 +77,125 @@ private static async Task<int> Report(CancellationToken ct, TextWriter stdOutput | |
| stdError.WriteLine("Process ID should not be negative."); | ||
| return -1; | ||
| } | ||
| else if (processId == 0) | ||
|
|
||
| DiagnosticsClientBuilder builder = new("dotnet-stack", 10); | ||
| using (DiagnosticsClientHolder holder = await builder.Build(ct, processId, diagnosticPort, showChildIO: false, printLaunchCommand: false).ConfigureAwait(false)) | ||
| { | ||
| stdError.WriteLine("--process-id is required"); | ||
| return -1; | ||
| } | ||
| if (holder == null) | ||
| { | ||
| return -1; | ||
| } | ||
|
|
||
| DiagnosticsClient client = holder.Client; | ||
|
|
||
| DiagnosticsClient client = new(processId); | ||
| List<EventPipeProvider> providers = new() | ||
| { | ||
| new EventPipeProvider("Microsoft-DotNETCore-SampleProfiler", EventLevel.Informational) | ||
| }; | ||
|
|
||
| // collect a *short* trace with stack samples | ||
| // the hidden '--duration' flag can increase the time of this trace in case 10ms | ||
| // is too short in a given environment, e.g., resource constrained systems | ||
| // N.B. - This trace INCLUDES rundown. For sufficiently large applications, it may take non-trivial time to collect | ||
| // the symbol data in rundown. | ||
| EventPipeSession session = await client.StartEventPipeSessionAsync(providers, requestRundown:true, token:ct).ConfigureAwait(false); | ||
| using (session) | ||
| using (FileStream fs = File.OpenWrite(tempNetTraceFilename)) | ||
| { | ||
| Task copyTask = session.EventStream.CopyToAsync(fs, ct); | ||
| await Task.Delay(duration, ct).ConfigureAwait(false); | ||
| await session.StopAsync(ct).ConfigureAwait(false); | ||
|
|
||
| // check if rundown is taking more than 5 seconds and add comment to report | ||
| Task timeoutTask = Task.Delay(TimeSpan.FromSeconds(5)); | ||
| Task completedTask = await Task.WhenAny(copyTask, timeoutTask).ConfigureAwait(false); | ||
| if (completedTask == timeoutTask) | ||
| // Resume runtime if it was suspended (similar to --resume-runtime:true in other tools) | ||
| // This is safe to call even if the runtime wasn't suspended - it's a no-op in that case | ||
| try | ||
| { | ||
| stdOutput.WriteLine($"# Sufficiently large applications can cause this reportCommand to take non-trivial amounts of time"); | ||
| await client.ResumeRuntimeAsync(ct).ConfigureAwait(false); | ||
| } | ||
| catch (Exception) | ||
| { | ||
| // ResumeRuntime is a no-op if the runtime wasn't suspended, | ||
| // so we can safely ignore exceptions here | ||
| } | ||
| await copyTask.ConfigureAwait(false); | ||
| } | ||
|
|
||
| // using the generated trace file, symbolicate and compute stacks. | ||
| tempEtlxFilename = TraceLog.CreateFromEventPipeDataFile(tempNetTraceFilename); | ||
| using (SymbolReader symbolReader = new(TextWriter.Null) { SymbolPath = SymbolPath.MicrosoftSymbolServerPath }) | ||
| using (TraceLog eventLog = new(tempEtlxFilename)) | ||
| { | ||
| MutableTraceEventStackSource stackSource = new(eventLog) | ||
| List<EventPipeProvider> providers = new() | ||
| { | ||
| OnlyManagedCodeStacks = true | ||
| new EventPipeProvider("Microsoft-DotNETCore-SampleProfiler", EventLevel.Informational) | ||
| }; | ||
|
|
||
| SampleProfilerThreadTimeComputer computer = new(eventLog, symbolReader); | ||
| computer.GenerateThreadTimeStacks(stackSource); | ||
|
|
||
| Dictionary<int, List<StackSourceSample>> samplesForThread = new(); | ||
| // collect a *short* trace with stack samples | ||
| // the hidden '--duration' flag can increase the time of this trace in case 10ms | ||
| // is too short in a given environment, e.g., resource constrained systems | ||
| // N.B. - This trace INCLUDES rundown. For sufficiently large applications, it may take non-trivial time to collect | ||
| // the symbol data in rundown. | ||
| EventPipeSession session; | ||
| try | ||
| { | ||
| session = await client.StartEventPipeSessionAsync(providers, requestRundown:true, token:ct).ConfigureAwait(false); | ||
| } | ||
| catch (Exception) | ||
| { | ||
| stdError.WriteLine(EventPipeErrorMessage); | ||
| return -1; | ||
| } | ||
|
||
|
|
||
| stackSource.ForEach((sample) => { | ||
| StackSourceCallStackIndex stackIndex = sample.StackIndex; | ||
| while (!stackSource.GetFrameName(stackSource.GetFrameIndex(stackIndex), false).StartsWith("Thread (")) | ||
| try | ||
|
||
| { | ||
| using (session) | ||
| using (FileStream fs = File.OpenWrite(tempNetTraceFilename)) | ||
| { | ||
| stackIndex = stackSource.GetCallerIndex(stackIndex); | ||
| Task copyTask = session.EventStream.CopyToAsync(fs, ct); | ||
| await Task.Delay(duration, ct).ConfigureAwait(false); | ||
| await session.StopAsync(ct).ConfigureAwait(false); | ||
|
|
||
| // check if rundown is taking more than 5 seconds and add comment to report | ||
| Task timeoutTask = Task.Delay(TimeSpan.FromSeconds(5)); | ||
| Task completedTask = await Task.WhenAny(copyTask, timeoutTask).ConfigureAwait(false); | ||
| if (completedTask == timeoutTask) | ||
| { | ||
| stdOutput.WriteLine($"# Sufficiently large applications can cause this reportCommand to take non-trivial amounts of time"); | ||
| } | ||
| await copyTask.ConfigureAwait(false); | ||
| } | ||
| } | ||
| catch (Exception) | ||
| { | ||
| stdError.WriteLine(EventPipeErrorMessage); | ||
| return -1; | ||
| } | ||
|
|
||
| // long form for: int.Parse(threadFrame["Thread (".Length..^1)]) | ||
| // Thread id is in the frame name as "Thread (<ID>)" | ||
| string template = "Thread ("; | ||
| string threadFrame = stackSource.GetFrameName(stackSource.GetFrameIndex(stackIndex), false); | ||
| // using the generated trace file, symbolicate and compute stacks. | ||
| tempEtlxFilename = TraceLog.CreateFromEventPipeDataFile(tempNetTraceFilename); | ||
| using (SymbolReader symbolReader = new(TextWriter.Null) { SymbolPath = SymbolPath.MicrosoftSymbolServerPath }) | ||
| using (TraceLog eventLog = new(tempEtlxFilename)) | ||
| { | ||
| MutableTraceEventStackSource stackSource = new(eventLog) | ||
| { | ||
| OnlyManagedCodeStacks = true | ||
| }; | ||
|
|
||
| // we are looking for the first index of ) because | ||
| // we need to handle a thread name like: Thread (4008) (.NET IO ThreadPool Worker) | ||
| int firstIndex = threadFrame.IndexOf(')'); | ||
| int threadId = int.Parse(threadFrame.AsSpan(template.Length, firstIndex - template.Length)); | ||
| SampleProfilerThreadTimeComputer computer = new(eventLog, symbolReader); | ||
| computer.GenerateThreadTimeStacks(stackSource); | ||
|
|
||
| if (samplesForThread.TryGetValue(threadId, out List<StackSourceSample> samples)) | ||
| { | ||
| samples.Add(sample); | ||
| } | ||
| else | ||
| { | ||
| samplesForThread[threadId] = new List<StackSourceSample>() { sample }; | ||
| } | ||
| }); | ||
| Dictionary<int, List<StackSourceSample>> samplesForThread = new(); | ||
|
|
||
| // For every thread recorded in our trace, print the first stack | ||
| foreach ((int threadId, List<StackSourceSample> samples) in samplesForThread) | ||
| { | ||
| stackSource.ForEach((sample) => { | ||
| StackSourceCallStackIndex stackIndex = sample.StackIndex; | ||
| while (!stackSource.GetFrameName(stackSource.GetFrameIndex(stackIndex), false).StartsWith("Thread (")) | ||
| { | ||
| stackIndex = stackSource.GetCallerIndex(stackIndex); | ||
| } | ||
|
|
||
| // long form for: int.Parse(threadFrame["Thread (".Length..^1)]) | ||
| // Thread id is in the frame name as "Thread (<ID>)" | ||
| string template = "Thread ("; | ||
| string threadFrame = stackSource.GetFrameName(stackSource.GetFrameIndex(stackIndex), false); | ||
|
|
||
| // we are looking for the first index of ) because | ||
| // we need to handle a thread name like: Thread (4008) (.NET IO ThreadPool Worker) | ||
| int firstIndex = threadFrame.IndexOf(')'); | ||
| int threadId = int.Parse(threadFrame.AsSpan(template.Length, firstIndex - template.Length)); | ||
|
|
||
| if (samplesForThread.TryGetValue(threadId, out List<StackSourceSample> samples)) | ||
| { | ||
| samples.Add(sample); | ||
| } | ||
| else | ||
| { | ||
| samplesForThread[threadId] = new List<StackSourceSample>() { sample }; | ||
| } | ||
| }); | ||
|
|
||
| // For every thread recorded in our trace, print the first stack | ||
| foreach ((int threadId, List<StackSourceSample> samples) in samplesForThread) | ||
| { | ||
| #if DEBUG | ||
| stdOutput.WriteLine($"Found {samples.Count} stacks for thread 0x{threadId:X}"); | ||
| stdOutput.WriteLine($"Found {samples.Count} stacks for thread 0x{threadId:X}"); | ||
| #endif | ||
| PrintStack(stdOutput, threadId, samples[0], stackSource); | ||
| PrintStack(stdOutput, threadId, samples[0], stackSource); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -189,15 +245,17 @@ public static Command ReportCommand() | |
| { | ||
| ProcessIdOption, | ||
| NameOption, | ||
| DurationOption | ||
| DurationOption, | ||
| DiagnosticPortOption | ||
| }; | ||
|
|
||
| reportCommand.SetAction((parseResult, ct) => Report(ct, | ||
| stdOutput: parseResult.Configuration.Output, | ||
| stdError: parseResult.Configuration.Error, | ||
| processId: parseResult.GetValue(ProcessIdOption), | ||
| name: parseResult.GetValue(NameOption), | ||
| duration: parseResult.GetValue(DurationOption))); | ||
| duration: parseResult.GetValue(DurationOption), | ||
| diagnosticPort: parseResult.GetValue(DiagnosticPortOption))); | ||
|
|
||
| return reportCommand; | ||
| } | ||
|
|
@@ -221,5 +279,14 @@ public static Command ReportCommand() | |
| { | ||
| Description = "The name of the process to report the stack." | ||
| }; | ||
|
|
||
| private static readonly Option<string> DiagnosticPortOption = | ||
| new("--diagnostic-port", "--dport") | ||
| { | ||
| Description = "The path to a diagnostic port to be used." | ||
| }; | ||
|
|
||
| private const string EventPipeErrorMessage = | ||
| "There was a failure in reading stack data. Possible reasons could be trying to connect to a runtime that has been suspended, an unexpected close of the IPC channel, etc."; | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Its a little confusing that if it we deemed it safe to call ResumeRuntimeAsync, that we would expect exceptions. If the runtime was suspended and we hit an exception, we are now ignoring it, is that right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed, we probably want to follow the precedent established by other tools:
dotnet-trace
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@copilot - please follow the Exception handling pattern used by dotnet-trace for this call as shown here: https://github.com/dotnet/diagnostics/blob/main/src/Tools/dotnet-trace/CommandLine/Commands/CollectCommand.cs#L341-L348
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done! Updated the exception handling to follow the dotnet-trace pattern. Now only catching
UnsupportedCommandExceptionfor the ResumeRuntimeAsync call, and removed the overly broad try-catch blocks that were hiding diagnostic information. Exceptions now bubble up with full stack traces. Commit f3a8c2d.