| | | 1 | | namespace NexusLabs.Needlr.AgentFramework.Budget; |
| | | 2 | | |
| | | 3 | | /// <summary> |
| | | 4 | | /// Tracks token usage within a scoped budget, enabling pipeline-level token limits |
| | | 5 | | /// for total, input, and/or output tokens independently. |
| | | 6 | | /// </summary> |
| | | 7 | | /// <remarks> |
| | | 8 | | /// <para> |
| | | 9 | | /// Each call to <see cref="BeginScope(long)"/> or |
| | | 10 | | /// <see cref="BeginScope(long?, long?, long?)"/> opens a budget window in the current |
| | | 11 | | /// async context. Concurrent pipeline runs each maintain their own independent token |
| | | 12 | | /// counts via <see cref="System.Threading.AsyncLocal{T}"/>. |
| | | 13 | | /// </para> |
| | | 14 | | /// <para> |
| | | 15 | | /// <see cref="ITokenBudgetTracker"/> is automatically registered in DI by |
| | | 16 | | /// <c>UsingAgentFramework()</c>. Wire the chat-level middleware by calling |
| | | 17 | | /// <c>UsingTokenBudget()</c> on <see cref="AgentFrameworkSyringe"/>. |
| | | 18 | | /// </para> |
| | | 19 | | /// </remarks> |
| | | 20 | | public interface ITokenBudgetTracker |
| | | 21 | | { |
| | | 22 | | /// <summary> |
| | | 23 | | /// Opens a token-budget scope with a total token limit. |
| | | 24 | | /// </summary> |
| | | 25 | | /// <param name="maxTokens">Maximum total tokens allowed.</param> |
| | | 26 | | /// <returns>A disposable handle that ends the scope when disposed.</returns> |
| | | 27 | | IDisposable BeginScope(long maxTokens); |
| | | 28 | | |
| | | 29 | | /// <summary> |
| | | 30 | | /// Opens a token-budget scope with granular limits for input, output, |
| | | 31 | | /// and/or total tokens. At least one limit must be specified. |
| | | 32 | | /// </summary> |
| | | 33 | | /// <param name="maxInputTokens">Maximum input tokens, or <see langword="null"/> for no limit.</param> |
| | | 34 | | /// <param name="maxOutputTokens">Maximum output tokens, or <see langword="null"/> for no limit.</param> |
| | | 35 | | /// <param name="maxTotalTokens">Maximum total tokens, or <see langword="null"/> for no limit.</param> |
| | | 36 | | /// <returns>A disposable handle that ends the scope when disposed.</returns> |
| | | 37 | | /// <exception cref="ArgumentException">All three parameters are <see langword="null"/>.</exception> |
| | | 38 | | IDisposable BeginScope(long? maxInputTokens = null, long? maxOutputTokens = null, long? maxTotalTokens = null); |
| | | 39 | | |
| | | 40 | | /// <summary> |
| | | 41 | | /// Gets the <see cref="CancellationToken"/> for the active scope that is cancelled |
| | | 42 | | /// when any budget limit is exceeded. |
| | | 43 | | /// </summary> |
| | | 44 | | /// <value><see cref="CancellationToken.None"/> if no scope is active.</value> |
| | | 45 | | CancellationToken BudgetCancellationToken { get; } |
| | | 46 | | |
| | | 47 | | /// <summary>Gets the total tokens accumulated so far in the active scope.</summary> |
| | | 48 | | /// <value>0 if no scope is active.</value> |
| | | 49 | | long CurrentTokens { get; } |
| | | 50 | | |
| | | 51 | | /// <summary>Gets the input tokens accumulated so far in the active scope.</summary> |
| | | 52 | | /// <value>0 if no scope is active.</value> |
| | | 53 | | long CurrentInputTokens { get; } |
| | | 54 | | |
| | | 55 | | /// <summary>Gets the output tokens accumulated so far in the active scope.</summary> |
| | | 56 | | /// <value>0 if no scope is active.</value> |
| | | 57 | | long CurrentOutputTokens { get; } |
| | | 58 | | |
| | | 59 | | /// <summary>Gets the total token budget limit of the active scope.</summary> |
| | | 60 | | /// <value><see langword="null"/> if no scope is active or no total limit set.</value> |
| | | 61 | | long? MaxTokens { get; } |
| | | 62 | | |
| | | 63 | | /// <summary>Gets the input token budget limit of the active scope.</summary> |
| | | 64 | | /// <value><see langword="null"/> if no scope is active or no input limit set.</value> |
| | | 65 | | long? MaxInputTokens { get; } |
| | | 66 | | |
| | | 67 | | /// <summary>Gets the output token budget limit of the active scope.</summary> |
| | | 68 | | /// <value><see langword="null"/> if no scope is active or no output limit set.</value> |
| | | 69 | | long? MaxOutputTokens { get; } |
| | | 70 | | |
| | | 71 | | /// <summary> |
| | | 72 | | /// Records <paramref name="tokenCount"/> as total tokens against the active scope's budget. |
| | | 73 | | /// Called automatically by <c>TokenBudgetChatMiddleware</c> after each LLM response. |
| | | 74 | | /// </summary> |
| | | 75 | | void Record(long tokenCount); |
| | | 76 | | |
| | | 77 | | /// <summary> |
| | | 78 | | /// Records input and output tokens separately against the active scope's budget. |
| | | 79 | | /// Called automatically by <c>TokenUsageRecordingMiddleware</c> after each LLM response. |
| | | 80 | | /// </summary> |
| | | 81 | | void Record(long inputTokens, long outputTokens); |
| | | 82 | | |
| | | 83 | | /// <summary> |
| | | 84 | | /// Opens a child scope with its own budget that counts against the parent. |
| | | 85 | | /// Token usage in the child rolls up to the parent in real-time. Exceeding |
| | | 86 | | /// the child's limit cancels the child's token. If the parent scope is |
| | | 87 | | /// cancelled, all active children are also cancelled. |
| | | 88 | | /// </summary> |
| | | 89 | | /// <param name="name">Human-readable name for diagnostics (e.g., stage name).</param> |
| | | 90 | | /// <param name="maxTokens">Maximum total tokens for this child scope, or |
| | | 91 | | /// <see langword="null"/> for unlimited (still counts against parent).</param> |
| | | 92 | | /// <returns>A disposable handle that restores the parent scope when disposed.</returns> |
| | | 93 | | /// <exception cref="InvalidOperationException">No parent scope is active.</exception> |
| | | 94 | | IDisposable BeginChildScope(string name, long? maxTokens = null); |
| | | 95 | | |
| | | 96 | | /// <summary> |
| | | 97 | | /// Opens a pure tracking scope with no budget limits. Token usage is accumulated |
| | | 98 | | /// via <see cref="Record(long)"/> and <see cref="Record(long,long)"/> but no |
| | | 99 | | /// cancellation or enforcement occurs. Use this when you need visibility into |
| | | 100 | | /// token usage without restricting it. |
| | | 101 | | /// </summary> |
| | | 102 | | /// <returns>A disposable handle that ends the tracking scope when disposed.</returns> |
| | | 103 | | IDisposable BeginTrackingScope(); |
| | | 104 | | } |
| | | 105 | | |
| | | 106 | | /// <summary> |
| | | 107 | | /// Thrown when a pipeline's token budget is exceeded. |
| | | 108 | | /// </summary> |
| | | 109 | | public sealed class TokenBudgetExceededException : Exception |
| | | 110 | | { |
| | | 111 | | /// <summary>Gets the number of tokens accumulated at the time the budget was exceeded.</summary> |
| | 1 | 112 | | public long CurrentTokens { get; } |
| | | 113 | | |
| | | 114 | | /// <summary>Gets the maximum token budget that was exceeded.</summary> |
| | 1 | 115 | | public long MaxTokens { get; } |
| | | 116 | | |
| | | 117 | | /// <param name="currentTokens">Accumulated token count.</param> |
| | | 118 | | /// <param name="maxTokens">The budget limit that was exceeded.</param> |
| | | 119 | | public TokenBudgetExceededException(long currentTokens, long maxTokens) |
| | 4 | 120 | | : base($"Token budget exceeded: used {currentTokens} of {maxTokens} tokens.") |
| | | 121 | | { |
| | 4 | 122 | | CurrentTokens = currentTokens; |
| | 4 | 123 | | MaxTokens = maxTokens; |
| | 4 | 124 | | } |
| | | 125 | | } |