| | | 1 | | using Microsoft.Extensions.AI; |
| | | 2 | | |
| | | 3 | | using NexusLabs.Needlr.AgentFramework.Budget; |
| | | 4 | | using NexusLabs.Needlr.AgentFramework.Progress; |
| | | 5 | | |
| | | 6 | | namespace NexusLabs.Needlr.AgentFramework.Workflows.Budget; |
| | | 7 | | |
| | | 8 | | /// <summary> |
| | | 9 | | /// <see cref="DelegatingChatClient"/> that enforces token budget limits by aborting |
| | | 10 | | /// when <see cref="ITokenBudgetTracker"/> thresholds are exceeded. Depends on |
| | | 11 | | /// <see cref="TokenUsageRecordingMiddleware"/> (wired as an inner middleware) to |
| | | 12 | | /// keep the tracker's token counts up to date. |
| | | 13 | | /// </summary> |
| | | 14 | | /// <remarks> |
| | | 15 | | /// <para> |
| | | 16 | | /// This middleware does NOT record token usage — that is handled by |
| | | 17 | | /// <see cref="TokenUsageRecordingMiddleware"/>, which runs before this middleware |
| | | 18 | | /// in the pipeline. Use <c>UsingTokenBudget()</c> to wire both correctly. |
| | | 19 | | /// </para> |
| | | 20 | | /// <para> |
| | | 21 | | /// Budget enforcement uses two mechanisms: |
| | | 22 | | /// <list type="number"> |
| | | 23 | | /// <item> |
| | | 24 | | /// <see cref="OperationCanceledException"/> wrapping <see cref="TokenBudgetExceededException"/> |
| | | 25 | | /// thrown from the middleware (works for direct agent runs). |
| | | 26 | | /// </item> |
| | | 27 | | /// <item> |
| | | 28 | | /// <see cref="ITokenBudgetTracker.BudgetCancellationToken"/> cancelled when tokens are recorded |
| | | 29 | | /// past the limit (works for MAF workflow runs — pass this token to the workflow). |
| | | 30 | | /// </item> |
| | | 31 | | /// </list> |
| | | 32 | | /// </para> |
| | | 33 | | /// </remarks> |
| | | 34 | | public sealed class TokenBudgetChatMiddleware : DelegatingChatClient |
| | | 35 | | { |
| | | 36 | | private readonly ITokenBudgetTracker _tracker; |
| | | 37 | | private readonly IProgressReporterAccessor _progressAccessor; |
| | | 38 | | |
| | | 39 | | /// <param name="innerClient">The inner chat client to delegate to.</param> |
| | | 40 | | /// <param name="tracker">The token budget tracker scoped to the current pipeline run.</param> |
| | | 41 | | /// <param name="progressAccessor">Progress reporter accessor for emitting budget events.</param> |
| | | 42 | | public TokenBudgetChatMiddleware( |
| | | 43 | | IChatClient innerClient, |
| | | 44 | | ITokenBudgetTracker tracker, |
| | | 45 | | IProgressReporterAccessor progressAccessor) |
| | 8 | 46 | | : base(innerClient) |
| | | 47 | | { |
| | 8 | 48 | | ArgumentNullException.ThrowIfNull(tracker); |
| | 7 | 49 | | ArgumentNullException.ThrowIfNull(progressAccessor); |
| | 7 | 50 | | _tracker = tracker; |
| | 7 | 51 | | _progressAccessor = progressAccessor; |
| | 7 | 52 | | } |
| | | 53 | | |
| | | 54 | | /// <inheritdoc /> |
| | | 55 | | public override async Task<ChatResponse> GetResponseAsync( |
| | | 56 | | IEnumerable<ChatMessage> messages, |
| | | 57 | | ChatOptions? options, |
| | | 58 | | CancellationToken cancellationToken) |
| | | 59 | | { |
| | | 60 | | // Pre-call gate: abort if any budget already exhausted. |
| | 7 | 61 | | if (IsBudgetExceeded()) |
| | | 62 | | { |
| | 1 | 63 | | EmitBudgetExceededEvent(); |
| | 1 | 64 | | ThrowBudgetExceeded(_tracker.CurrentTokens, _tracker.MaxTokens ?? 0); |
| | | 65 | | } |
| | | 66 | | |
| | 6 | 67 | | var response = await base.GetResponseAsync(messages, options, cancellationToken) |
| | 6 | 68 | | .ConfigureAwait(false); |
| | | 69 | | |
| | | 70 | | // Post-call check: the recording middleware (inner) has already updated |
| | | 71 | | // the tracker. Check if any limit was exceeded. |
| | 6 | 72 | | if (IsBudgetExceeded()) |
| | | 73 | | { |
| | 2 | 74 | | EmitBudgetExceededEvent(); |
| | 2 | 75 | | ThrowBudgetExceeded(_tracker.CurrentTokens, _tracker.MaxTokens ?? 0); |
| | | 76 | | } |
| | | 77 | | |
| | | 78 | | // Emit an update event if a budget scope is active |
| | 4 | 79 | | if (_tracker.MaxTokens.HasValue || _tracker.MaxInputTokens.HasValue || _tracker.MaxOutputTokens.HasValue) |
| | | 80 | | { |
| | 3 | 81 | | EmitBudgetUpdatedEvent(); |
| | | 82 | | } |
| | | 83 | | |
| | 4 | 84 | | return response; |
| | 4 | 85 | | } |
| | | 86 | | |
| | | 87 | | private bool IsBudgetExceeded() => |
| | 13 | 88 | | (_tracker.MaxTokens.HasValue && _tracker.CurrentTokens >= _tracker.MaxTokens.Value) || |
| | 13 | 89 | | (_tracker.MaxInputTokens.HasValue && _tracker.CurrentInputTokens >= _tracker.MaxInputTokens.Value) || |
| | 13 | 90 | | (_tracker.MaxOutputTokens.HasValue && _tracker.CurrentOutputTokens >= _tracker.MaxOutputTokens.Value); |
| | | 91 | | |
| | | 92 | | private void EmitBudgetUpdatedEvent() |
| | | 93 | | { |
| | 3 | 94 | | var reporter = _progressAccessor.Current; |
| | 3 | 95 | | reporter.Report(new BudgetUpdatedEvent( |
| | 3 | 96 | | Timestamp: DateTimeOffset.UtcNow, |
| | 3 | 97 | | WorkflowId: reporter.WorkflowId, |
| | 3 | 98 | | AgentId: reporter.AgentId, |
| | 3 | 99 | | ParentAgentId: null, |
| | 3 | 100 | | Depth: reporter.Depth, |
| | 3 | 101 | | SequenceNumber: _progressAccessor.Current.NextSequence(), |
| | 3 | 102 | | CurrentInputTokens: _tracker.CurrentInputTokens, |
| | 3 | 103 | | CurrentOutputTokens: _tracker.CurrentOutputTokens, |
| | 3 | 104 | | CurrentTotalTokens: _tracker.CurrentTokens, |
| | 3 | 105 | | MaxInputTokens: _tracker.MaxInputTokens, |
| | 3 | 106 | | MaxOutputTokens: _tracker.MaxOutputTokens, |
| | 3 | 107 | | MaxTotalTokens: _tracker.MaxTokens)); |
| | 3 | 108 | | } |
| | | 109 | | |
| | | 110 | | private void EmitBudgetExceededEvent() |
| | | 111 | | { |
| | 3 | 112 | | var reporter = _progressAccessor.Current; |
| | 3 | 113 | | var (limitType, current, max) = |
| | 3 | 114 | | _tracker.MaxInputTokens.HasValue && _tracker.CurrentInputTokens >= _tracker.MaxInputTokens.Value |
| | 3 | 115 | | ? ("input", _tracker.CurrentInputTokens, _tracker.MaxInputTokens.Value) |
| | 3 | 116 | | : _tracker.MaxOutputTokens.HasValue && _tracker.CurrentOutputTokens >= _tracker.MaxOutputTokens.Value |
| | 3 | 117 | | ? ("output", _tracker.CurrentOutputTokens, _tracker.MaxOutputTokens.Value) |
| | 3 | 118 | | : ("total", _tracker.CurrentTokens, _tracker.MaxTokens ?? 0); |
| | | 119 | | |
| | 3 | 120 | | reporter.Report(new BudgetExceededEvent( |
| | 3 | 121 | | Timestamp: DateTimeOffset.UtcNow, |
| | 3 | 122 | | WorkflowId: reporter.WorkflowId, |
| | 3 | 123 | | AgentId: reporter.AgentId, |
| | 3 | 124 | | ParentAgentId: null, |
| | 3 | 125 | | Depth: reporter.Depth, |
| | 3 | 126 | | SequenceNumber: _progressAccessor.Current.NextSequence(), |
| | 3 | 127 | | LimitType: limitType, |
| | 3 | 128 | | CurrentValue: current, |
| | 3 | 129 | | MaxValue: max)); |
| | 3 | 130 | | } |
| | | 131 | | |
| | | 132 | | private void ThrowBudgetExceeded(long currentTokens, long maxTokens) |
| | | 133 | | { |
| | 3 | 134 | | var budgetException = new TokenBudgetExceededException(currentTokens, maxTokens); |
| | 3 | 135 | | throw new OperationCanceledException( |
| | 3 | 136 | | budgetException.Message, |
| | 3 | 137 | | budgetException, |
| | 3 | 138 | | _tracker.BudgetCancellationToken); |
| | | 139 | | } |
| | | 140 | | } |