| | | 1 | | using Microsoft.Extensions.AI; |
| | | 2 | | |
| | | 3 | | namespace NexusLabs.Needlr.AgentFramework.Diagnostics; |
| | | 4 | | |
| | | 5 | | /// <summary> |
| | | 6 | | /// Diagnostics for a single LLM chat completion call within an agent run. |
| | | 7 | | /// </summary> |
| | | 8 | | /// <remarks> |
| | | 9 | | /// <para> |
| | | 10 | | /// Each time the agent calls the underlying <c>IChatClient.GetResponseAsync</c>, the |
| | | 11 | | /// diagnostics chat client middleware captures timing, token usage, and model metadata |
| | | 12 | | /// into one of these records. An agent that makes multiple LLM calls (e.g., a tool-call |
| | | 13 | | /// loop) produces multiple <see cref="ChatCompletionDiagnostics"/> entries within a |
| | | 14 | | /// single <see cref="IAgentRunDiagnostics"/>. |
| | | 15 | | /// </para> |
| | | 16 | | /// </remarks> |
| | | 17 | | /// <example> |
| | | 18 | | /// <code> |
| | | 19 | | /// foreach (var call in diagnostics.ChatCompletions) |
| | | 20 | | /// { |
| | | 21 | | /// var fresh = call.Tokens.InputTokens - call.Tokens.CachedInputTokens; |
| | | 22 | | /// Console.WriteLine($"[{call.Model}] in:{call.Tokens.InputTokens} " + |
| | | 23 | | /// $"(cached:{call.Tokens.CachedInputTokens} fresh:{fresh}) " + |
| | | 24 | | /// $"out:{call.Tokens.OutputTokens} | {call.Duration.TotalMilliseconds}ms"); |
| | | 25 | | /// } |
| | | 26 | | /// </code> |
| | | 27 | | /// </example> |
| | | 28 | | /// <param name="Sequence">Zero-based invocation order within the agent run.</param> |
| | | 29 | | /// <param name="Model">The model identifier returned by the LLM provider (e.g., <c>"gpt-4o"</c>, <c>"claude-sonnet-4-20 |
| | | 30 | | /// <param name="Tokens">Token usage breakdown for this single call.</param> |
| | | 31 | | /// <param name="InputMessageCount">Number of <c>ChatMessage</c> entries sent to the model.</param> |
| | | 32 | | /// <param name="Duration">Wall-clock time for the API call.</param> |
| | | 33 | | /// <param name="Succeeded">Whether the call returned without throwing.</param> |
| | | 34 | | /// <param name="ErrorMessage">The exception message if the call failed; <see langword="null"/> on success.</param> |
| | | 35 | | /// <param name="StartedAt">UTC timestamp when the API call began.</param> |
| | | 36 | | /// <param name="CompletedAt">UTC timestamp when the API call finished.</param> |
| | 377 | 37 | | public sealed record ChatCompletionDiagnostics( |
| | 254 | 38 | | int Sequence, |
| | 8 | 39 | | string Model, |
| | 1597 | 40 | | TokenUsage Tokens, |
| | 1 | 41 | | int InputMessageCount, |
| | 6 | 42 | | TimeSpan Duration, |
| | 20 | 43 | | bool Succeeded, |
| | 4 | 44 | | string? ErrorMessage, |
| | 60 | 45 | | DateTimeOffset StartedAt, |
| | 405 | 46 | | DateTimeOffset CompletedAt) |
| | | 47 | | { |
| | | 48 | | /// <summary> |
| | | 49 | | /// The name of the agent that triggered this completion, or <see langword="null"/> |
| | | 50 | | /// if the agent name was not available. Used to attribute completions to the |
| | | 51 | | /// correct stage in group chat workflows where multiple agents share a single |
| | | 52 | | /// chat client. |
| | | 53 | | /// </summary> |
| | 416 | 54 | | public string? AgentName { get; init; } |
| | | 55 | | |
| | | 56 | | /// <summary> |
| | | 57 | | /// The full list of <see cref="ChatMessage"/> instances sent to the model for this |
| | | 58 | | /// completion. Captured losslessly to enable post-hoc replay and evaluation |
| | | 59 | | /// without re-invoking the agent. <see langword="null"/> if capture was unavailable |
| | | 60 | | /// (e.g., on call failure before messages were materialized). |
| | | 61 | | /// </summary> |
| | | 62 | | /// <remarks> |
| | | 63 | | /// Populated on success. This is the input side of the call and is directly |
| | | 64 | | /// consumable by <c>Microsoft.Extensions.AI.Evaluation</c> evaluators. |
| | | 65 | | /// </remarks> |
| | 326 | 66 | | public IReadOnlyList<ChatMessage>? RequestMessages { get; init; } |
| | | 67 | | |
| | | 68 | | /// <summary> |
| | | 69 | | /// The full <see cref="ChatResponse"/> returned by the model for this completion, |
| | | 70 | | /// or <see langword="null"/> if the call failed or the response was not captured. |
| | | 71 | | /// Captured losslessly to enable post-hoc replay and evaluation. |
| | | 72 | | /// </summary> |
| | 387 | 73 | | public ChatResponse? Response { get; init; } |
| | | 74 | | |
| | | 75 | | /// <summary> |
| | | 76 | | /// Total character count of the text content across every message in |
| | | 77 | | /// <see cref="RequestMessages"/>. Complements token-based metrics with a |
| | | 78 | | /// direct programmatic measure of payload size. Defaults to <c>0</c> |
| | | 79 | | /// when not populated by the capture middleware. |
| | | 80 | | /// </summary> |
| | 319 | 81 | | public long RequestCharCount { get; init; } |
| | | 82 | | |
| | | 83 | | /// <summary> |
| | | 84 | | /// Total character count of the text content across every message in |
| | | 85 | | /// <see cref="Response"/>. Defaults to <c>0</c> when not populated by |
| | | 86 | | /// the capture middleware. |
| | | 87 | | /// </summary> |
| | 385 | 88 | | public long ResponseCharCount { get; init; } |
| | | 89 | | } |