| | | 1 | | namespace NexusLabs.Needlr.AgentFramework.Diagnostics; |
| | | 2 | | |
| | | 3 | | /// <summary> |
| | | 4 | | /// Token usage breakdown for a single LLM call or aggregate across an agent run. |
| | | 5 | | /// </summary> |
| | | 6 | | /// <remarks> |
| | | 7 | | /// <para> |
| | | 8 | | /// Each <see cref="ChatCompletionDiagnostics"/> carries a <see cref="TokenUsage"/> for |
| | | 9 | | /// that individual call. <see cref="IAgentRunDiagnostics.AggregateTokenUsage"/> sums |
| | | 10 | | /// across all calls in one agent run. <see cref="IPipelineRunResult.AggregateTokenUsage"/> |
| | | 11 | | /// sums across all stages in a pipeline. |
| | | 12 | | /// </para> |
| | | 13 | | /// <para> |
| | | 14 | | /// Token counts come from the LLM provider's response metadata. If the provider does |
| | | 15 | | /// not report a field, the corresponding value is <c>0</c>. |
| | | 16 | | /// </para> |
| | | 17 | | /// </remarks> |
| | | 18 | | /// <example> |
| | | 19 | | /// <code> |
| | | 20 | | /// var diagnostics = accessor.LastRunDiagnostics; |
| | | 21 | | /// var usage = diagnostics.AggregateTokenUsage; |
| | | 22 | | /// Console.WriteLine($"Input: {usage.InputTokens} (cached: {usage.CachedInputTokens})"); |
| | | 23 | | /// Console.WriteLine($"Output: {usage.OutputTokens} (reasoning: {usage.ReasoningTokens})"); |
| | | 24 | | /// Console.WriteLine($"Total: {usage.TotalTokens}"); |
| | | 25 | | /// </code> |
| | | 26 | | /// </example> |
| | | 27 | | /// <param name="InputTokens">Number of tokens in the prompt sent to the model.</param> |
| | | 28 | | /// <param name="OutputTokens">Number of tokens generated by the model.</param> |
| | | 29 | | /// <param name="TotalTokens">Sum of input and output tokens (may include overhead tokens not in either count).</param> |
| | | 30 | | /// <param name="CachedInputTokens">Subset of <paramref name="InputTokens"/> served from the provider's prompt cache. Ca |
| | | 31 | | /// <param name="ReasoningTokens">Tokens consumed by chain-of-thought reasoning (e.g., OpenAI o-series models). Zero for |
| | 1085 | 32 | | public sealed record TokenUsage( |
| | 568 | 33 | | long InputTokens, |
| | 565 | 34 | | long OutputTokens, |
| | 585 | 35 | | long TotalTokens, |
| | 326 | 36 | | long CachedInputTokens, |
| | 1402 | 37 | | long ReasoningTokens); |