| | | 1 | | using Microsoft.Extensions.AI; |
| | | 2 | | |
| | | 3 | | using NexusLabs.Needlr.AgentFramework.Budget; |
| | | 4 | | |
| | | 5 | | namespace NexusLabs.Needlr.AgentFramework.Workflows.Budget; |
| | | 6 | | |
| | | 7 | | /// <summary> |
| | | 8 | | /// Lightweight <see cref="DelegatingChatClient"/> that records token usage from |
| | | 9 | | /// each LLM call into <see cref="ITokenBudgetTracker"/>. Does NOT enforce budgets |
| | | 10 | | /// — that is the responsibility of <see cref="TokenBudgetChatMiddleware"/>. |
| | | 11 | | /// </summary> |
| | | 12 | | /// <remarks> |
| | | 13 | | /// Wired automatically by <c>UsingTokenTracking()</c>, <c>UsingTokenBudget()</c>, |
| | | 14 | | /// and <c>UsingDiagnostics()</c>. Idempotent — only one instance is wired |
| | | 15 | | /// regardless of how many extensions request it. |
| | | 16 | | /// </remarks> |
| | | 17 | | public sealed class TokenUsageRecordingMiddleware : DelegatingChatClient |
| | | 18 | | { |
| | | 19 | | private readonly ITokenBudgetTracker _tracker; |
| | | 20 | | |
| | | 21 | | /// <param name="innerClient">The inner chat client to delegate to.</param> |
| | | 22 | | /// <param name="tracker">The token budget tracker to record usage into.</param> |
| | | 23 | | public TokenUsageRecordingMiddleware( |
| | | 24 | | IChatClient innerClient, |
| | | 25 | | ITokenBudgetTracker tracker) |
| | 53 | 26 | | : base(innerClient) |
| | | 27 | | { |
| | 53 | 28 | | ArgumentNullException.ThrowIfNull(tracker); |
| | 53 | 29 | | _tracker = tracker; |
| | 53 | 30 | | } |
| | | 31 | | |
| | | 32 | | /// <inheritdoc /> |
| | | 33 | | public override async Task<ChatResponse> GetResponseAsync( |
| | | 34 | | IEnumerable<ChatMessage> messages, |
| | | 35 | | ChatOptions? options = null, |
| | | 36 | | CancellationToken cancellationToken = default) |
| | | 37 | | { |
| | 42 | 38 | | var response = await base.GetResponseAsync(messages, options, cancellationToken) |
| | 42 | 39 | | .ConfigureAwait(false); |
| | | 40 | | |
| | 41 | 41 | | var usage = response.Usage; |
| | 41 | 42 | | if (usage is not null) |
| | | 43 | | { |
| | 34 | 44 | | var inputCount = usage.InputTokenCount ?? 0; |
| | 34 | 45 | | var outputCount = usage.OutputTokenCount ?? 0; |
| | | 46 | | |
| | 34 | 47 | | if (inputCount > 0 || outputCount > 0) |
| | | 48 | | { |
| | 30 | 49 | | _tracker.Record(inputCount, outputCount); |
| | | 50 | | } |
| | 4 | 51 | | else if (usage.TotalTokenCount is long totalOnly) |
| | | 52 | | { |
| | 4 | 53 | | _tracker.Record(totalOnly); |
| | | 54 | | } |
| | | 55 | | } |
| | | 56 | | |
| | 41 | 57 | | return response; |
| | 41 | 58 | | } |
| | | 59 | | } |