| | | 1 | | using System.Collections.Concurrent; |
| | | 2 | | using System.Diagnostics; |
| | | 3 | | using System.Runtime.CompilerServices; |
| | | 4 | | |
| | | 5 | | using Microsoft.Extensions.AI; |
| | | 6 | | |
| | | 7 | | using NexusLabs.Needlr.AgentFramework.Progress; |
| | | 8 | | |
| | | 9 | | namespace NexusLabs.Needlr.AgentFramework.Diagnostics; |
| | | 10 | | |
| | | 11 | | /// <summary> |
| | | 12 | | /// Single writer for chat completion diagnostics. Wraps each |
| | | 13 | | /// <c>IChatClient.GetResponseAsync()</c> call to capture per-completion timing, |
| | | 14 | | /// token usage, and full request/response payloads. Records to the AsyncLocal |
| | | 15 | | /// <see cref="AgentRunDiagnosticsBuilder"/> and a thread-safe collection (for |
| | | 16 | | /// workflow runs where AsyncLocal doesn't propagate). Optionally emits |
| | | 17 | | /// <see cref="LlmCallStartedEvent"/>/<see cref="LlmCallCompletedEvent"/> to the |
| | | 18 | | /// progress reporter and OTel metrics via <see cref="IAgentMetrics"/>. |
| | | 19 | | /// </summary> |
| | | 20 | | /// <remarks> |
| | | 21 | | /// <para> |
| | | 22 | | /// <c>IterativeAgentLoop</c> wraps its chat client with this middleware |
| | | 23 | | /// internally, making it the sole writer for <see cref="ChatCompletionDiagnostics"/>. |
| | | 24 | | /// No other code should call <see cref="AgentRunDiagnosticsBuilder.AddChatCompletion"/> |
| | | 25 | | /// for calls that pass through this middleware. |
| | | 26 | | /// </para> |
| | | 27 | | /// <para> |
| | | 28 | | /// <see cref="IAgentMetrics"/> and <see cref="IProgressReporterAccessor"/> are optional. |
| | | 29 | | /// When null, recording still occurs but OTel metrics and progress events are skipped. |
| | | 30 | | /// </para> |
| | | 31 | | /// </remarks> |
| | | 32 | | [DoNotAutoRegister] |
| | | 33 | | internal sealed class DiagnosticsChatClientMiddleware : IChatCompletionCollector |
| | | 34 | | { |
| | | 35 | | private readonly IAgentMetrics? _metrics; |
| | | 36 | | private readonly IGenAiTokenMetrics? _genAiTokenMetrics; |
| | | 37 | | private readonly IProgressReporterAccessor? _progressAccessor; |
| | | 38 | | private readonly ChatCompletionActivityMode _activityMode; |
| | 206 | 39 | | private readonly ConcurrentQueue<ChatCompletionDiagnostics> _allCompletions = new(); |
| | | 40 | | private int _sequenceCounter; |
| | | 41 | | |
| | 206 | 42 | | internal DiagnosticsChatClientMiddleware( |
| | 206 | 43 | | IAgentMetrics? metrics = null, |
| | 206 | 44 | | IProgressReporterAccessor? progressAccessor = null, |
| | 206 | 45 | | ChatCompletionActivityMode activityMode = ChatCompletionActivityMode.Always, |
| | 206 | 46 | | IGenAiTokenMetrics? genAiTokenMetrics = null) |
| | | 47 | | { |
| | 206 | 48 | | _metrics = metrics; |
| | 206 | 49 | | _genAiTokenMetrics = genAiTokenMetrics; |
| | 206 | 50 | | _progressAccessor = progressAccessor; |
| | 206 | 51 | | _activityMode = activityMode; |
| | 206 | 52 | | } |
| | | 53 | | |
| | | 54 | | /// <summary> |
| | | 55 | | /// Drains all captured completions since the last drain. Thread-safe. |
| | | 56 | | /// </summary> |
| | | 57 | | public IReadOnlyList<ChatCompletionDiagnostics> DrainCompletions() |
| | | 58 | | { |
| | 23 | 59 | | var results = new List<ChatCompletionDiagnostics>(); |
| | 44 | 60 | | while (_allCompletions.TryDequeue(out var completion)) |
| | | 61 | | { |
| | 21 | 62 | | results.Add(completion); |
| | 21 | 63 | | } |
| | 23 | 64 | | return results; |
| | | 65 | | } |
| | | 66 | | |
| | | 67 | | internal async Task<ChatResponse> HandleAsync( |
| | | 68 | | IEnumerable<ChatMessage> messages, |
| | | 69 | | ChatOptions? options, |
| | | 70 | | IChatClient innerChatClient, |
| | | 71 | | CancellationToken cancellationToken) |
| | | 72 | | { |
| | 304 | 73 | | var builder = AgentRunDiagnosticsBuilder.GetCurrent(); |
| | 304 | 74 | | var sequence = builder?.NextChatCompletionSequence() |
| | 304 | 75 | | ?? Interlocked.Increment(ref _sequenceCounter) - 1; |
| | 304 | 76 | | var startedAt = DateTimeOffset.UtcNow; |
| | 304 | 77 | | var stopwatch = Stopwatch.StartNew(); |
| | | 78 | | |
| | 304 | 79 | | var (ownedActivity, targetActivity) = StartChatActivity("agent.chat"); |
| | 304 | 80 | | using var _ = ownedActivity; |
| | | 81 | | |
| | 304 | 82 | | if (_progressAccessor is not null) |
| | | 83 | | { |
| | 35 | 84 | | _progressAccessor.Current.Report(new LlmCallStartedEvent( |
| | 35 | 85 | | Timestamp: startedAt, |
| | 35 | 86 | | WorkflowId: _progressAccessor.Current.WorkflowId, |
| | 35 | 87 | | AgentId: _progressAccessor.Current.AgentId, |
| | 35 | 88 | | ParentAgentId: builder?.ParentAgentName, |
| | 35 | 89 | | Depth: _progressAccessor.Current.Depth, |
| | 35 | 90 | | SequenceNumber: _progressAccessor.Current.NextSequence(), |
| | 35 | 91 | | CallSequence: sequence)); |
| | | 92 | | } |
| | | 93 | | |
| | | 94 | | try |
| | | 95 | | { |
| | 304 | 96 | | var response = await innerChatClient.GetResponseAsync(messages, options, cancellationToken) |
| | 304 | 97 | | .ConfigureAwait(false); |
| | | 98 | | |
| | 293 | 99 | | stopwatch.Stop(); |
| | | 100 | | |
| | 293 | 101 | | var model = response.ModelId ?? "unknown"; |
| | | 102 | | |
| | 293 | 103 | | targetActivity?.SetTag("gen_ai.response.model", model); |
| | 293 | 104 | | targetActivity?.SetTag("agent.chat.sequence", sequence); |
| | 293 | 105 | | targetActivity?.SetTag("status", "success"); |
| | | 106 | | |
| | 293 | 107 | | _metrics?.RecordChatCompletion(model, stopwatch.Elapsed, succeeded: true, agentName: builder?.AgentName); |
| | | 108 | | |
| | 293 | 109 | | var usage = response.Usage; |
| | 293 | 110 | | var tokens = new TokenUsage( |
| | 293 | 111 | | InputTokens: usage?.InputTokenCount ?? 0, |
| | 293 | 112 | | OutputTokens: usage?.OutputTokenCount ?? 0, |
| | 293 | 113 | | TotalTokens: usage?.TotalTokenCount ?? 0, |
| | 293 | 114 | | CachedInputTokens: |
| | 293 | 115 | | usage?.CachedInputTokenCount |
| | 293 | 116 | | ?? usage?.AdditionalCounts?.GetValueOrDefault("CachedInputTokens") |
| | 293 | 117 | | ?? 0, |
| | 293 | 118 | | ReasoningTokens: |
| | 293 | 119 | | usage?.ReasoningTokenCount |
| | 293 | 120 | | ?? usage?.AdditionalCounts?.GetValueOrDefault("ReasoningTokens") |
| | 293 | 121 | | ?? 0); |
| | | 122 | | |
| | 293 | 123 | | targetActivity?.SetTag("gen_ai.usage.input_tokens", tokens.InputTokens); |
| | 293 | 124 | | targetActivity?.SetTag("gen_ai.usage.output_tokens", tokens.OutputTokens); |
| | 293 | 125 | | targetActivity?.SetTag("gen_ai.usage.cached_input_tokens", tokens.CachedInputTokens); |
| | 293 | 126 | | targetActivity?.SetTag("gen_ai.usage.reasoning_tokens", tokens.ReasoningTokens); |
| | | 127 | | |
| | 293 | 128 | | EmitGenAiTokenUsage(tokens, options?.ModelId, response, innerChatClient); |
| | | 129 | | |
| | 293 | 130 | | var messageList = messages as ICollection<ChatMessage> ?? messages.ToList(); |
| | | 131 | | |
| | 293 | 132 | | var diagnostics = new ChatCompletionDiagnostics( |
| | 293 | 133 | | Sequence: sequence, |
| | 293 | 134 | | Model: model, |
| | 293 | 135 | | Tokens: tokens, |
| | 293 | 136 | | InputMessageCount: messageList.Count, |
| | 293 | 137 | | Duration: stopwatch.Elapsed, |
| | 293 | 138 | | Succeeded: true, |
| | 293 | 139 | | ErrorMessage: null, |
| | 293 | 140 | | StartedAt: startedAt, |
| | 293 | 141 | | CompletedAt: DateTimeOffset.UtcNow) |
| | 293 | 142 | | { |
| | 293 | 143 | | AgentName = builder?.AgentName, |
| | 293 | 144 | | RequestMessages = messageList as IReadOnlyList<ChatMessage> ?? messageList.ToList(), |
| | 293 | 145 | | Response = response, |
| | 293 | 146 | | RequestCharCount = DiagnosticsCharCounter.ChatMessagesLength(messageList as IReadOnlyList<ChatMessage> ? |
| | 293 | 147 | | ResponseCharCount = DiagnosticsCharCounter.ChatResponseLength(response), |
| | 293 | 148 | | }; |
| | | 149 | | |
| | 293 | 150 | | builder?.AddChatCompletion(diagnostics); |
| | 293 | 151 | | _allCompletions.Enqueue(diagnostics); |
| | | 152 | | |
| | 293 | 153 | | if (_progressAccessor is not null) |
| | | 154 | | { |
| | 34 | 155 | | _progressAccessor.Current.Report(new LlmCallCompletedEvent( |
| | 34 | 156 | | Timestamp: DateTimeOffset.UtcNow, |
| | 34 | 157 | | WorkflowId: _progressAccessor.Current.WorkflowId, |
| | 34 | 158 | | AgentId: _progressAccessor.Current.AgentId, |
| | 34 | 159 | | ParentAgentId: builder?.ParentAgentName, |
| | 34 | 160 | | Depth: _progressAccessor.Current.Depth, |
| | 34 | 161 | | SequenceNumber: _progressAccessor.Current.NextSequence(), |
| | 34 | 162 | | CallSequence: sequence, |
| | 34 | 163 | | Model: model, |
| | 34 | 164 | | Duration: stopwatch.Elapsed, |
| | 34 | 165 | | InputTokens: tokens.InputTokens, |
| | 34 | 166 | | OutputTokens: tokens.OutputTokens, |
| | 34 | 167 | | TotalTokens: tokens.TotalTokens)); |
| | | 168 | | } |
| | | 169 | | |
| | 293 | 170 | | return response; |
| | | 171 | | } |
| | 11 | 172 | | catch (Exception ex) |
| | | 173 | | { |
| | 11 | 174 | | stopwatch.Stop(); |
| | | 175 | | |
| | 11 | 176 | | targetActivity?.SetStatus(ActivityStatusCode.Error, ex.Message); |
| | 11 | 177 | | targetActivity?.SetTag("status", "failed"); |
| | | 178 | | |
| | 11 | 179 | | _metrics?.RecordChatCompletion("unknown", stopwatch.Elapsed, succeeded: false, agentName: builder?.AgentName |
| | | 180 | | |
| | 11 | 181 | | var failedMessageList = messages as IReadOnlyList<ChatMessage> ?? messages.ToList(); |
| | | 182 | | |
| | 11 | 183 | | var diagnostics = new ChatCompletionDiagnostics( |
| | 11 | 184 | | Sequence: sequence, |
| | 11 | 185 | | Model: "unknown", |
| | 11 | 186 | | Tokens: new TokenUsage(0, 0, 0, 0, 0), |
| | 11 | 187 | | InputMessageCount: 0, |
| | 11 | 188 | | Duration: stopwatch.Elapsed, |
| | 11 | 189 | | Succeeded: false, |
| | 11 | 190 | | ErrorMessage: ex.Message, |
| | 11 | 191 | | StartedAt: startedAt, |
| | 11 | 192 | | CompletedAt: DateTimeOffset.UtcNow) |
| | 11 | 193 | | { |
| | 11 | 194 | | AgentName = builder?.AgentName, |
| | 11 | 195 | | RequestMessages = failedMessageList, |
| | 11 | 196 | | RequestCharCount = DiagnosticsCharCounter.ChatMessagesLength(failedMessageList), |
| | 11 | 197 | | }; |
| | | 198 | | |
| | 11 | 199 | | builder?.AddChatCompletion(diagnostics); |
| | 11 | 200 | | _allCompletions.Enqueue(diagnostics); |
| | | 201 | | |
| | 11 | 202 | | if (_progressAccessor is not null) |
| | | 203 | | { |
| | 1 | 204 | | _progressAccessor.Current.Report(new LlmCallFailedEvent( |
| | 1 | 205 | | Timestamp: DateTimeOffset.UtcNow, |
| | 1 | 206 | | WorkflowId: _progressAccessor.Current.WorkflowId, |
| | 1 | 207 | | AgentId: _progressAccessor.Current.AgentId, |
| | 1 | 208 | | ParentAgentId: builder?.ParentAgentName, |
| | 1 | 209 | | Depth: _progressAccessor.Current.Depth, |
| | 1 | 210 | | SequenceNumber: _progressAccessor.Current.NextSequence(), |
| | 1 | 211 | | CallSequence: sequence, |
| | 1 | 212 | | ErrorMessage: ex.Message, |
| | 1 | 213 | | Duration: stopwatch.Elapsed)); |
| | | 214 | | } |
| | | 215 | | |
| | 11 | 216 | | throw; |
| | | 217 | | } |
| | 293 | 218 | | } |
| | | 219 | | |
| | | 220 | | internal async IAsyncEnumerable<ChatResponseUpdate> HandleStreamingAsync( |
| | | 221 | | IEnumerable<ChatMessage> messages, |
| | | 222 | | ChatOptions? options, |
| | | 223 | | IChatClient innerChatClient, |
| | | 224 | | [EnumeratorCancellation] CancellationToken cancellationToken) |
| | | 225 | | { |
| | 33 | 226 | | var builder = AgentRunDiagnosticsBuilder.GetCurrent(); |
| | 33 | 227 | | var sequence = builder?.NextChatCompletionSequence() |
| | 33 | 228 | | ?? Interlocked.Increment(ref _sequenceCounter) - 1; |
| | 33 | 229 | | var startedAt = DateTimeOffset.UtcNow; |
| | 33 | 230 | | var stopwatch = Stopwatch.StartNew(); |
| | | 231 | | |
| | 33 | 232 | | var (ownedStreamActivity, targetActivity) = StartChatActivity("agent.chat.stream"); |
| | 33 | 233 | | using var _s = ownedStreamActivity; |
| | | 234 | | |
| | 33 | 235 | | if (_progressAccessor is not null) |
| | | 236 | | { |
| | 26 | 237 | | _progressAccessor.Current.Report(new LlmCallStartedEvent( |
| | 26 | 238 | | Timestamp: startedAt, |
| | 26 | 239 | | WorkflowId: _progressAccessor.Current.WorkflowId, |
| | 26 | 240 | | AgentId: _progressAccessor.Current.AgentId, |
| | 26 | 241 | | ParentAgentId: builder?.ParentAgentName, |
| | 26 | 242 | | Depth: _progressAccessor.Current.Depth, |
| | 26 | 243 | | SequenceNumber: _progressAccessor.Current.NextSequence(), |
| | 26 | 244 | | CallSequence: sequence)); |
| | | 245 | | } |
| | | 246 | | |
| | 33 | 247 | | var messageList = messages as IReadOnlyList<ChatMessage> ?? messages.ToList(); |
| | 33 | 248 | | var buffered = new List<ChatResponseUpdate>(); |
| | 33 | 249 | | Exception? failure = null; |
| | | 250 | | |
| | 33 | 251 | | var enumerable = innerChatClient.GetStreamingResponseAsync(messages, options, cancellationToken); |
| | 32 | 252 | | var enumerator = enumerable.GetAsyncEnumerator(cancellationToken); |
| | | 253 | | |
| | | 254 | | try |
| | | 255 | | { |
| | | 256 | | while (true) |
| | | 257 | | { |
| | | 258 | | ChatResponseUpdate update; |
| | | 259 | | try |
| | | 260 | | { |
| | 73 | 261 | | if (!await enumerator.MoveNextAsync().ConfigureAwait(false)) |
| | | 262 | | { |
| | 27 | 263 | | break; |
| | | 264 | | } |
| | 41 | 265 | | update = enumerator.Current; |
| | 41 | 266 | | } |
| | 5 | 267 | | catch (Exception ex) |
| | | 268 | | { |
| | 5 | 269 | | failure = ex; |
| | 5 | 270 | | break; |
| | | 271 | | } |
| | | 272 | | |
| | 41 | 273 | | buffered.Add(update); |
| | 41 | 274 | | yield return update; |
| | | 275 | | } |
| | | 276 | | } |
| | | 277 | | finally |
| | | 278 | | { |
| | 32 | 279 | | await enumerator.DisposeAsync().ConfigureAwait(false); |
| | | 280 | | } |
| | | 281 | | |
| | 32 | 282 | | stopwatch.Stop(); |
| | | 283 | | |
| | 32 | 284 | | var aggregated = buffered.ToChatResponse(); |
| | | 285 | | |
| | 32 | 286 | | if (failure is null) |
| | | 287 | | { |
| | 27 | 288 | | var model = aggregated.ModelId ?? "unknown"; |
| | | 289 | | |
| | 27 | 290 | | targetActivity?.SetTag("gen_ai.response.model", model); |
| | 27 | 291 | | targetActivity?.SetTag("agent.chat.sequence", sequence); |
| | 27 | 292 | | targetActivity?.SetTag("status", "success"); |
| | | 293 | | |
| | 27 | 294 | | _metrics?.RecordChatCompletion(model, stopwatch.Elapsed, succeeded: true, agentName: builder?.AgentName); |
| | | 295 | | |
| | 27 | 296 | | var usage = aggregated.Usage; |
| | 27 | 297 | | var tokens = new TokenUsage( |
| | 27 | 298 | | InputTokens: usage?.InputTokenCount ?? 0, |
| | 27 | 299 | | OutputTokens: usage?.OutputTokenCount ?? 0, |
| | 27 | 300 | | TotalTokens: usage?.TotalTokenCount ?? 0, |
| | 27 | 301 | | CachedInputTokens: |
| | 27 | 302 | | usage?.CachedInputTokenCount |
| | 27 | 303 | | ?? usage?.AdditionalCounts?.GetValueOrDefault("CachedInputTokens") |
| | 27 | 304 | | ?? 0, |
| | 27 | 305 | | ReasoningTokens: |
| | 27 | 306 | | usage?.ReasoningTokenCount |
| | 27 | 307 | | ?? usage?.AdditionalCounts?.GetValueOrDefault("ReasoningTokens") |
| | 27 | 308 | | ?? 0); |
| | | 309 | | |
| | 27 | 310 | | targetActivity?.SetTag("gen_ai.usage.input_tokens", tokens.InputTokens); |
| | 27 | 311 | | targetActivity?.SetTag("gen_ai.usage.output_tokens", tokens.OutputTokens); |
| | 27 | 312 | | targetActivity?.SetTag("gen_ai.usage.cached_input_tokens", tokens.CachedInputTokens); |
| | 27 | 313 | | targetActivity?.SetTag("gen_ai.usage.reasoning_tokens", tokens.ReasoningTokens); |
| | | 314 | | |
| | 27 | 315 | | EmitGenAiTokenUsage(tokens, options?.ModelId, aggregated, innerChatClient); |
| | | 316 | | |
| | 27 | 317 | | var diagnostics = new ChatCompletionDiagnostics( |
| | 27 | 318 | | Sequence: sequence, |
| | 27 | 319 | | Model: model, |
| | 27 | 320 | | Tokens: tokens, |
| | 27 | 321 | | InputMessageCount: messageList.Count, |
| | 27 | 322 | | Duration: stopwatch.Elapsed, |
| | 27 | 323 | | Succeeded: true, |
| | 27 | 324 | | ErrorMessage: null, |
| | 27 | 325 | | StartedAt: startedAt, |
| | 27 | 326 | | CompletedAt: DateTimeOffset.UtcNow) |
| | 27 | 327 | | { |
| | 27 | 328 | | AgentName = builder?.AgentName, |
| | 27 | 329 | | RequestMessages = messageList, |
| | 27 | 330 | | Response = aggregated, |
| | 27 | 331 | | RequestCharCount = DiagnosticsCharCounter.ChatMessagesLength(messageList), |
| | 27 | 332 | | ResponseCharCount = DiagnosticsCharCounter.ChatResponseLength(aggregated), |
| | 27 | 333 | | }; |
| | | 334 | | |
| | 27 | 335 | | builder?.AddChatCompletion(diagnostics); |
| | 27 | 336 | | _allCompletions.Enqueue(diagnostics); |
| | | 337 | | |
| | 27 | 338 | | if (_progressAccessor is not null) |
| | | 339 | | { |
| | 22 | 340 | | _progressAccessor.Current.Report(new LlmCallCompletedEvent( |
| | 22 | 341 | | Timestamp: DateTimeOffset.UtcNow, |
| | 22 | 342 | | WorkflowId: _progressAccessor.Current.WorkflowId, |
| | 22 | 343 | | AgentId: _progressAccessor.Current.AgentId, |
| | 22 | 344 | | ParentAgentId: builder?.ParentAgentName, |
| | 22 | 345 | | Depth: _progressAccessor.Current.Depth, |
| | 22 | 346 | | SequenceNumber: _progressAccessor.Current.NextSequence(), |
| | 22 | 347 | | CallSequence: sequence, |
| | 22 | 348 | | Model: model, |
| | 22 | 349 | | Duration: stopwatch.Elapsed, |
| | 22 | 350 | | InputTokens: tokens.InputTokens, |
| | 22 | 351 | | OutputTokens: tokens.OutputTokens, |
| | 22 | 352 | | TotalTokens: tokens.TotalTokens)); |
| | | 353 | | } |
| | | 354 | | } |
| | | 355 | | else |
| | | 356 | | { |
| | 5 | 357 | | targetActivity?.SetStatus(ActivityStatusCode.Error, failure.Message); |
| | 5 | 358 | | targetActivity?.SetTag("status", "failed"); |
| | | 359 | | |
| | 5 | 360 | | _metrics?.RecordChatCompletion("unknown", stopwatch.Elapsed, succeeded: false, agentName: builder?.AgentName |
| | | 361 | | |
| | 5 | 362 | | var failureUsage = aggregated.Usage; |
| | 5 | 363 | | var failureTokens = new TokenUsage( |
| | 5 | 364 | | InputTokens: failureUsage?.InputTokenCount ?? 0, |
| | 5 | 365 | | OutputTokens: failureUsage?.OutputTokenCount ?? 0, |
| | 5 | 366 | | TotalTokens: failureUsage?.TotalTokenCount ?? 0, |
| | 5 | 367 | | CachedInputTokens: |
| | 5 | 368 | | failureUsage?.CachedInputTokenCount |
| | 5 | 369 | | ?? failureUsage?.AdditionalCounts?.GetValueOrDefault("CachedInputTokens") |
| | 5 | 370 | | ?? 0, |
| | 5 | 371 | | ReasoningTokens: |
| | 5 | 372 | | failureUsage?.ReasoningTokenCount |
| | 5 | 373 | | ?? failureUsage?.AdditionalCounts?.GetValueOrDefault("ReasoningTokens") |
| | 5 | 374 | | ?? 0); |
| | | 375 | | |
| | 5 | 376 | | EmitGenAiTokenUsage(failureTokens, options?.ModelId, aggregated, innerChatClient); |
| | | 377 | | |
| | 5 | 378 | | var diagnostics = new ChatCompletionDiagnostics( |
| | 5 | 379 | | Sequence: sequence, |
| | 5 | 380 | | Model: aggregated.ModelId ?? "unknown", |
| | 5 | 381 | | Tokens: failureTokens, |
| | 5 | 382 | | InputMessageCount: messageList.Count, |
| | 5 | 383 | | Duration: stopwatch.Elapsed, |
| | 5 | 384 | | Succeeded: false, |
| | 5 | 385 | | ErrorMessage: failure.Message, |
| | 5 | 386 | | StartedAt: startedAt, |
| | 5 | 387 | | CompletedAt: DateTimeOffset.UtcNow) |
| | 5 | 388 | | { |
| | 5 | 389 | | AgentName = builder?.AgentName, |
| | 5 | 390 | | RequestMessages = messageList, |
| | 5 | 391 | | Response = aggregated, |
| | 5 | 392 | | RequestCharCount = DiagnosticsCharCounter.ChatMessagesLength(messageList), |
| | 5 | 393 | | ResponseCharCount = DiagnosticsCharCounter.ChatResponseLength(aggregated), |
| | 5 | 394 | | }; |
| | | 395 | | |
| | 5 | 396 | | builder?.AddChatCompletion(diagnostics); |
| | 5 | 397 | | _allCompletions.Enqueue(diagnostics); |
| | | 398 | | |
| | 5 | 399 | | if (_progressAccessor is not null) |
| | | 400 | | { |
| | 3 | 401 | | _progressAccessor.Current.Report(new LlmCallFailedEvent( |
| | 3 | 402 | | Timestamp: DateTimeOffset.UtcNow, |
| | 3 | 403 | | WorkflowId: _progressAccessor.Current.WorkflowId, |
| | 3 | 404 | | AgentId: _progressAccessor.Current.AgentId, |
| | 3 | 405 | | ParentAgentId: builder?.ParentAgentName, |
| | 3 | 406 | | Depth: _progressAccessor.Current.Depth, |
| | 3 | 407 | | SequenceNumber: _progressAccessor.Current.NextSequence(), |
| | 3 | 408 | | CallSequence: sequence, |
| | 3 | 409 | | ErrorMessage: failure.Message, |
| | 3 | 410 | | Duration: stopwatch.Elapsed)); |
| | | 411 | | } |
| | | 412 | | |
| | 5 | 413 | | throw failure; |
| | | 414 | | } |
| | 27 | 415 | | } |
| | | 416 | | |
| | | 417 | | /// <summary> |
| | | 418 | | /// Records <c>cache_read</c> and/or <c>reasoning</c> measurements on the |
| | | 419 | | /// <c>gen_ai.client.token.usage</c> histogram (the same histogram MEAI's |
| | | 420 | | /// <see cref="Microsoft.Extensions.AI.OpenTelemetryChatClient"/> emits <c>input</c> and |
| | | 421 | | /// <c>output</c> on). Short-circuits BEFORE any tag construction or |
| | | 422 | | /// <see cref="Microsoft.Extensions.AI.ChatClientMetadata"/> resolution when both |
| | | 423 | | /// counts are zero — that is the common path for non-cached, non-reasoning calls. |
| | | 424 | | /// </summary> |
| | | 425 | | private void EmitGenAiTokenUsage( |
| | | 426 | | TokenUsage tokens, |
| | | 427 | | string? requestModel, |
| | | 428 | | ChatResponse response, |
| | | 429 | | IChatClient innerChatClient) |
| | | 430 | | { |
| | 325 | 431 | | if (_genAiTokenMetrics is null) |
| | 255 | 432 | | return; |
| | | 433 | | |
| | 70 | 434 | | if (tokens.CachedInputTokens <= 0 && tokens.ReasoningTokens <= 0) |
| | 53 | 435 | | return; |
| | | 436 | | |
| | 17 | 437 | | var metadata = innerChatClient.GetService(typeof(ChatClientMetadata)) as ChatClientMetadata; |
| | 17 | 438 | | var tags = new GenAiTokenUsageTags( |
| | 17 | 439 | | OperationName: "chat", |
| | 17 | 440 | | RequestModel: requestModel ?? metadata?.DefaultModelId, |
| | 17 | 441 | | ResponseModel: response.ModelId, |
| | 17 | 442 | | ProviderName: metadata?.ProviderName, |
| | 17 | 443 | | ServerAddress: metadata?.ProviderUri?.Host, |
| | 17 | 444 | | ServerPort: metadata?.ProviderUri is { } uri ? uri.Port : null); |
| | | 445 | | |
| | 17 | 446 | | if (tokens.CachedInputTokens > 0) |
| | 14 | 447 | | _genAiTokenMetrics.RecordTokenUsage(GenAiTokenTypes.CacheRead, tokens.CachedInputTokens, tags); |
| | | 448 | | |
| | 17 | 449 | | if (tokens.ReasoningTokens > 0) |
| | 8 | 450 | | _genAiTokenMetrics.RecordTokenUsage(GenAiTokenTypes.Reasoning, tokens.ReasoningTokens, tags); |
| | 17 | 451 | | } |
| | | 452 | | |
| | | 453 | | /// <summary> |
| | | 454 | | /// Creates a chat completion activity respecting <see cref="_activityMode"/>. |
| | | 455 | | /// When <see cref="ChatCompletionActivityMode.EnrichParent"/> is active and a |
| | | 456 | | /// parent <c>gen_ai.*</c> activity exists, returns <c>created = null</c> and |
| | | 457 | | /// <c>target = parent</c> so callers enrich the parent span without creating a |
| | | 458 | | /// duplicate child. The caller must only dispose <c>created</c>, never <c>target</c>. |
| | | 459 | | /// </summary> |
| | | 460 | | private (Activity? Created, Activity? Target) StartChatActivity(string operationName) |
| | | 461 | | { |
| | 337 | 462 | | if (_metrics is null) |
| | | 463 | | { |
| | 259 | 464 | | return (null, null); |
| | | 465 | | } |
| | | 466 | | |
| | 78 | 467 | | if (_activityMode == ChatCompletionActivityMode.EnrichParent) |
| | | 468 | | { |
| | 7 | 469 | | var parent = Activity.Current; |
| | 7 | 470 | | if (parent?.OperationName.StartsWith("gen_ai.", StringComparison.Ordinal) == true) |
| | | 471 | | { |
| | 5 | 472 | | return (Created: null, Target: parent); |
| | | 473 | | } |
| | | 474 | | } |
| | | 475 | | |
| | 73 | 476 | | var created = _metrics.ActivitySource.StartActivity(operationName, ActivityKind.Client); |
| | 73 | 477 | | return (Created: created, Target: created); |
| | | 478 | | } |
| | | 479 | | } |