| | | 1 | | using System.Diagnostics; |
| | | 2 | | using System.Diagnostics.Metrics; |
| | | 3 | | |
| | | 4 | | namespace NexusLabs.Needlr.AgentFramework.Diagnostics; |
| | | 5 | | |
| | | 6 | | /// <summary> |
| | | 7 | | /// Default <see cref="IPipelineMetrics"/> implementation using <see cref="Meter"/> |
| | | 8 | | /// for counters/histograms and <see cref="ActivitySource"/> for distributed |
| | | 9 | | /// tracing spans. Compatible with OpenTelemetry — both metrics and traces are |
| | | 10 | | /// exported when listeners are registered against the configured source name. |
| | | 11 | | /// </summary> |
| | | 12 | | /// <remarks> |
| | | 13 | | /// Source names default to <c>"NexusLabs.Needlr.AgentFramework.Pipelines"</c> but |
| | | 14 | | /// can be overridden via <see cref="PipelineMetricsOptions.MeterName"/> and |
| | | 15 | | /// <see cref="PipelineMetricsOptions.ActivitySourceName"/> to match consumers' |
| | | 16 | | /// existing dashboard queries. |
| | | 17 | | /// </remarks> |
| | | 18 | | [DoNotAutoRegister] |
| | | 19 | | internal sealed class PipelineMetrics : IPipelineMetrics, IDisposable |
| | | 20 | | { |
| | | 21 | | private readonly Meter _meter; |
| | | 22 | | private readonly ActivitySource _activitySource; |
| | | 23 | | private readonly Counter<long> _runsStarted; |
| | | 24 | | private readonly Counter<long> _runsCompleted; |
| | | 25 | | private readonly Histogram<double> _runDuration; |
| | | 26 | | private readonly Counter<long> _stagesCompleted; |
| | | 27 | | private readonly Histogram<double> _stageDuration; |
| | | 28 | | private readonly Counter<long> _stageTokens; |
| | | 29 | | private readonly Counter<long> _stageToolFailed; |
| | | 30 | | |
| | 6 | 31 | | public PipelineMetrics() : this(new PipelineMetricsOptions()) { } |
| | | 32 | | |
| | 32 | 33 | | public PipelineMetrics(PipelineMetricsOptions options) |
| | | 34 | | { |
| | 32 | 35 | | ArgumentNullException.ThrowIfNull(options); |
| | | 36 | | |
| | 32 | 37 | | _meter = new Meter(options.MeterName); |
| | 32 | 38 | | _activitySource = new ActivitySource(options.ResolvedActivitySourceName); |
| | | 39 | | |
| | 32 | 40 | | _runsStarted = _meter.CreateCounter<long>( |
| | 32 | 41 | | "pipeline.run.started", |
| | 32 | 42 | | description: "Pipeline runs started"); |
| | | 43 | | |
| | 32 | 44 | | _runsCompleted = _meter.CreateCounter<long>( |
| | 32 | 45 | | "pipeline.run.completed", |
| | 32 | 46 | | description: "Pipeline runs completed"); |
| | | 47 | | |
| | 32 | 48 | | _runDuration = _meter.CreateHistogram<double>( |
| | 32 | 49 | | "pipeline.run.duration", |
| | 32 | 50 | | unit: "s", |
| | 32 | 51 | | description: "Pipeline run execution duration"); |
| | | 52 | | |
| | 32 | 53 | | _stagesCompleted = _meter.CreateCounter<long>( |
| | 32 | 54 | | "pipeline.stage.completed", |
| | 32 | 55 | | description: "Pipeline stages completed"); |
| | | 56 | | |
| | 32 | 57 | | _stageDuration = _meter.CreateHistogram<double>( |
| | 32 | 58 | | "pipeline.stage.duration", |
| | 32 | 59 | | unit: "s", |
| | 32 | 60 | | description: "Pipeline stage execution duration"); |
| | | 61 | | |
| | 32 | 62 | | _stageTokens = _meter.CreateCounter<long>( |
| | 32 | 63 | | "pipeline.stage.tokens", |
| | 32 | 64 | | description: "Tokens consumed by a pipeline stage, broken down by token kind"); |
| | | 65 | | |
| | 32 | 66 | | _stageToolFailed = _meter.CreateCounter<long>( |
| | 32 | 67 | | "pipeline.stage.tool.failed", |
| | 32 | 68 | | description: "Failed tool invocations in a pipeline stage"); |
| | 32 | 69 | | } |
| | | 70 | | |
| | | 71 | | /// <inheritdoc /> |
| | 10 | 72 | | public ActivitySource ActivitySource => _activitySource; |
| | | 73 | | |
| | | 74 | | /// <inheritdoc /> |
| | | 75 | | public void RecordPipelineStarted(string pipelineName) => |
| | 3 | 76 | | _runsStarted.Add(1, new KeyValuePair<string, object?>("pipeline_name", pipelineName)); |
| | | 77 | | |
| | | 78 | | /// <inheritdoc /> |
| | | 79 | | public void RecordPipelineCompleted(string pipelineName, bool succeeded, TimeSpan duration) |
| | | 80 | | { |
| | 4 | 81 | | var pipelineTag = new KeyValuePair<string, object?>("pipeline_name", pipelineName); |
| | 4 | 82 | | var outcomeTag = new KeyValuePair<string, object?>("outcome", succeeded ? "Succeeded" : "Failed"); |
| | | 83 | | |
| | 4 | 84 | | _runsCompleted.Add(1, pipelineTag, outcomeTag); |
| | 4 | 85 | | _runDuration.Record(duration.TotalSeconds, pipelineTag, outcomeTag); |
| | 4 | 86 | | } |
| | | 87 | | |
| | | 88 | | /// <inheritdoc /> |
| | | 89 | | public void RecordStageCompleted(string pipelineName, IAgentStageResult stage, TimeSpan duration) |
| | | 90 | | { |
| | 24 | 91 | | ArgumentNullException.ThrowIfNull(stage); |
| | | 92 | | |
| | 23 | 93 | | var pipelineTag = new KeyValuePair<string, object?>("pipeline_name", pipelineName); |
| | 23 | 94 | | var stageTag = new KeyValuePair<string, object?>("stage_name", stage.AgentName); |
| | 23 | 95 | | var phaseTag = new KeyValuePair<string, object?>("phase_name", stage.PhaseName ?? "(none)"); |
| | 23 | 96 | | var outcomeTag = new KeyValuePair<string, object?>("outcome", stage.Outcome.ToString()); |
| | 23 | 97 | | var terminationTag = new KeyValuePair<string, object?>( |
| | 23 | 98 | | "termination_cause", |
| | 23 | 99 | | stage.Termination?.ToTagValue() ?? "Unspecified"); |
| | | 100 | | |
| | 23 | 101 | | _stagesCompleted.Add(1, pipelineTag, stageTag, outcomeTag, terminationTag, phaseTag); |
| | | 102 | | |
| | 23 | 103 | | if (stage.Outcome == StageOutcome.Skipped) |
| | 2 | 104 | | return; |
| | | 105 | | |
| | 21 | 106 | | _stageDuration.Record(duration.TotalSeconds, pipelineTag, stageTag, outcomeTag, phaseTag); |
| | | 107 | | |
| | 21 | 108 | | if (stage.Diagnostics is { } diagnostics) |
| | | 109 | | { |
| | 3 | 110 | | EmitTokenCounts(diagnostics.AggregateTokenUsage, pipelineTag, stageTag); |
| | 3 | 111 | | EmitFailedToolCalls(diagnostics.ToolCalls, pipelineTag, stageTag); |
| | | 112 | | } |
| | 21 | 113 | | } |
| | | 114 | | |
| | | 115 | | private void EmitTokenCounts( |
| | | 116 | | TokenUsage tokens, |
| | | 117 | | KeyValuePair<string, object?> pipelineTag, |
| | | 118 | | KeyValuePair<string, object?> stageTag) |
| | | 119 | | { |
| | 3 | 120 | | EmitTokenKind(tokens.InputTokens, "input", pipelineTag, stageTag); |
| | 3 | 121 | | EmitTokenKind(tokens.OutputTokens, "output", pipelineTag, stageTag); |
| | 3 | 122 | | EmitTokenKind(tokens.CachedInputTokens, "cached_input", pipelineTag, stageTag); |
| | 3 | 123 | | EmitTokenKind(tokens.ReasoningTokens, "reasoning", pipelineTag, stageTag); |
| | 3 | 124 | | } |
| | | 125 | | |
| | | 126 | | private void EmitTokenKind( |
| | | 127 | | long count, |
| | | 128 | | string kind, |
| | | 129 | | KeyValuePair<string, object?> pipelineTag, |
| | | 130 | | KeyValuePair<string, object?> stageTag) |
| | | 131 | | { |
| | 12 | 132 | | if (count <= 0) |
| | 7 | 133 | | return; |
| | | 134 | | |
| | 5 | 135 | | _stageTokens.Add( |
| | 5 | 136 | | count, |
| | 5 | 137 | | pipelineTag, |
| | 5 | 138 | | stageTag, |
| | 5 | 139 | | new KeyValuePair<string, object?>("token_kind", kind)); |
| | 5 | 140 | | } |
| | | 141 | | |
| | | 142 | | private void EmitFailedToolCalls( |
| | | 143 | | IReadOnlyList<ToolCallDiagnostics> toolCalls, |
| | | 144 | | KeyValuePair<string, object?> pipelineTag, |
| | | 145 | | KeyValuePair<string, object?> stageTag) |
| | | 146 | | { |
| | 14 | 147 | | foreach (var tool in toolCalls) |
| | | 148 | | { |
| | 4 | 149 | | if (tool.Succeeded) |
| | | 150 | | continue; |
| | | 151 | | |
| | 3 | 152 | | _stageToolFailed.Add( |
| | 3 | 153 | | 1, |
| | 3 | 154 | | pipelineTag, |
| | 3 | 155 | | stageTag, |
| | 3 | 156 | | new KeyValuePair<string, object?>("tool_name", tool.ToolName)); |
| | | 157 | | } |
| | 3 | 158 | | } |
| | | 159 | | |
| | | 160 | | /// <summary>Disposes the underlying <see cref="Meter"/> and <see cref="ActivitySource"/>.</summary> |
| | | 161 | | public void Dispose() |
| | | 162 | | { |
| | 30 | 163 | | _meter.Dispose(); |
| | 30 | 164 | | _activitySource.Dispose(); |
| | 30 | 165 | | } |
| | | 166 | | } |