| | | 1 | | using System.Globalization; |
| | | 2 | | using System.Runtime.CompilerServices; |
| | | 3 | | using System.Security.Cryptography; |
| | | 4 | | using System.Text; |
| | | 5 | | using System.Text.Json; |
| | | 6 | | |
| | | 7 | | using Microsoft.Extensions.AI; |
| | | 8 | | |
| | | 9 | | namespace NexusLabs.Needlr.AgentFramework.Evaluation; |
| | | 10 | | |
| | | 11 | | /// <summary> |
| | | 12 | | /// <see cref="DelegatingChatClient"/> that persists every LLM request/response pair |
| | | 13 | | /// to an <see cref="IEvaluationCaptureStore"/> and replays cached responses on |
| | | 14 | | /// subsequent calls with an identical request. Intended to make evaluator runs |
| | | 15 | | /// deterministic and cheap to re-execute. |
| | | 16 | | /// </summary> |
| | | 17 | | /// <remarks> |
| | | 18 | | /// <para> |
| | | 19 | | /// Cache keys are derived from a stable SHA-256 hash of the request messages |
| | | 20 | | /// (role + text) and a small subset of <see cref="ChatOptions"/> that affect |
| | | 21 | | /// output — currently <c>ModelId</c>, <c>Temperature</c>, <c>TopP</c>, and |
| | | 22 | | /// <c>MaxOutputTokens</c>. Requests that differ only in non-captured options |
| | | 23 | | /// will collide; callers that rely on other options producing distinct responses |
| | | 24 | | /// must not use this middleware. |
| | | 25 | | /// </para> |
| | | 26 | | /// <para> |
| | | 27 | | /// Streaming calls materialize cached responses as a single |
| | | 28 | | /// <see cref="ChatResponseUpdate"/> per message. On cache miss the stream is |
| | | 29 | | /// fully buffered before being persisted and replayed to the caller. |
| | | 30 | | /// </para> |
| | | 31 | | /// </remarks> |
| | | 32 | | public sealed class EvaluationCaptureChatClient : DelegatingChatClient |
| | | 33 | | { |
| | | 34 | | private readonly IEvaluationCaptureStore _store; |
| | | 35 | | |
| | | 36 | | /// <param name="innerClient">The inner chat client to delegate to.</param> |
| | | 37 | | /// <param name="store">Backing store used for capture and replay.</param> |
| | | 38 | | public EvaluationCaptureChatClient( |
| | | 39 | | IChatClient innerClient, |
| | | 40 | | IEvaluationCaptureStore store) |
| | 12 | 41 | | : base(innerClient) |
| | | 42 | | { |
| | 12 | 43 | | ArgumentNullException.ThrowIfNull(store); |
| | 11 | 44 | | _store = store; |
| | 11 | 45 | | } |
| | | 46 | | |
| | | 47 | | /// <inheritdoc /> |
| | | 48 | | public override async Task<ChatResponse> GetResponseAsync( |
| | | 49 | | IEnumerable<ChatMessage> messages, |
| | | 50 | | ChatOptions? options = null, |
| | | 51 | | CancellationToken cancellationToken = default) |
| | | 52 | | { |
| | 9 | 53 | | ArgumentNullException.ThrowIfNull(messages); |
| | | 54 | | |
| | 8 | 55 | | var materialized = messages as IReadOnlyList<ChatMessage> ?? messages.ToList(); |
| | 8 | 56 | | var key = ComputeKey(materialized, options); |
| | | 57 | | |
| | 8 | 58 | | var cached = await _store.TryGetAsync(key, cancellationToken).ConfigureAwait(false); |
| | 8 | 59 | | if (cached is not null) |
| | | 60 | | { |
| | 4 | 61 | | return cached; |
| | | 62 | | } |
| | | 63 | | |
| | 4 | 64 | | var response = await base |
| | 4 | 65 | | .GetResponseAsync(materialized, options, cancellationToken) |
| | 4 | 66 | | .ConfigureAwait(false); |
| | | 67 | | |
| | 4 | 68 | | await _store.SaveAsync(key, response, cancellationToken).ConfigureAwait(false); |
| | 4 | 69 | | return response; |
| | 8 | 70 | | } |
| | | 71 | | |
| | | 72 | | /// <inheritdoc /> |
| | | 73 | | public override async IAsyncEnumerable<ChatResponseUpdate> GetStreamingResponseAsync( |
| | | 74 | | IEnumerable<ChatMessage> messages, |
| | | 75 | | ChatOptions? options = null, |
| | | 76 | | [EnumeratorCancellation] CancellationToken cancellationToken = default) |
| | | 77 | | { |
| | 3 | 78 | | ArgumentNullException.ThrowIfNull(messages); |
| | | 79 | | |
| | 2 | 80 | | var materialized = messages as IReadOnlyList<ChatMessage> ?? messages.ToList(); |
| | 2 | 81 | | var key = ComputeKey(materialized, options); |
| | | 82 | | |
| | 2 | 83 | | var cached = await _store.TryGetAsync(key, cancellationToken).ConfigureAwait(false); |
| | 2 | 84 | | if (cached is not null) |
| | | 85 | | { |
| | 4 | 86 | | foreach (var update in ToUpdates(cached)) |
| | | 87 | | { |
| | 1 | 88 | | yield return update; |
| | | 89 | | } |
| | 1 | 90 | | yield break; |
| | | 91 | | } |
| | | 92 | | |
| | 1 | 93 | | var buffered = new List<ChatResponseUpdate>(); |
| | 4 | 94 | | await foreach (var update in base |
| | 1 | 95 | | .GetStreamingResponseAsync(materialized, options, cancellationToken) |
| | 1 | 96 | | .ConfigureAwait(false)) |
| | | 97 | | { |
| | 1 | 98 | | buffered.Add(update); |
| | 1 | 99 | | yield return update; |
| | | 100 | | } |
| | | 101 | | |
| | 1 | 102 | | var combined = buffered.ToChatResponse(); |
| | 1 | 103 | | await _store.SaveAsync(key, combined, cancellationToken).ConfigureAwait(false); |
| | 2 | 104 | | } |
| | | 105 | | |
| | | 106 | | internal static string ComputeKey( |
| | | 107 | | IReadOnlyList<ChatMessage> messages, |
| | | 108 | | ChatOptions? options) |
| | | 109 | | { |
| | 26 | 110 | | var sb = new StringBuilder(); |
| | 108 | 111 | | foreach (var message in messages) |
| | | 112 | | { |
| | 28 | 113 | | sb.Append(message.Role.Value); |
| | 28 | 114 | | sb.Append(':'); |
| | 28 | 115 | | sb.Append(message.Text); |
| | | 116 | | |
| | 114 | 117 | | foreach (var content in message.Contents) |
| | | 118 | | { |
| | | 119 | | switch (content) |
| | | 120 | | { |
| | | 121 | | case FunctionCallContent fc: |
| | 2 | 122 | | sb.Append("|fc:"); |
| | 2 | 123 | | sb.Append(fc.CallId); |
| | 2 | 124 | | sb.Append(':'); |
| | 2 | 125 | | sb.Append(fc.Name); |
| | 2 | 126 | | if (fc.Arguments is not null) |
| | | 127 | | { |
| | 2 | 128 | | sb.Append(':'); |
| | 10 | 129 | | foreach (var kvp in fc.Arguments.OrderBy(k => k.Key, StringComparer.Ordinal)) |
| | | 130 | | { |
| | 2 | 131 | | sb.Append(kvp.Key).Append('=').Append(kvp.Value).Append(';'); |
| | | 132 | | } |
| | | 133 | | } |
| | | 134 | | break; |
| | | 135 | | case FunctionResultContent fr: |
| | 0 | 136 | | sb.Append("|fr:"); |
| | 0 | 137 | | sb.Append(fr.CallId); |
| | 0 | 138 | | sb.Append(':'); |
| | 0 | 139 | | sb.Append(fr.Result); |
| | 0 | 140 | | break; |
| | | 141 | | case TextReasoningContent reasoning: |
| | 3 | 142 | | sb.Append("|reason:"); |
| | 3 | 143 | | sb.Append(reasoning.Text); |
| | 3 | 144 | | break; |
| | | 145 | | #pragma warning disable MEAI001 // WebSearchToolCallContent is experimental |
| | | 146 | | case WebSearchToolCallContent ws: |
| | 2 | 147 | | sb.Append("|ws:"); |
| | 2 | 148 | | if (ws.Queries is not null) |
| | | 149 | | { |
| | 8 | 150 | | foreach (var query in ws.Queries) |
| | | 151 | | { |
| | 2 | 152 | | sb.Append(query).Append(';'); |
| | | 153 | | } |
| | | 154 | | } |
| | | 155 | | break; |
| | | 156 | | #pragma warning restore MEAI001 |
| | | 157 | | } |
| | | 158 | | } |
| | | 159 | | |
| | 28 | 160 | | sb.Append('\n'); |
| | | 161 | | } |
| | | 162 | | |
| | 26 | 163 | | sb.Append("---\n"); |
| | 26 | 164 | | sb.Append("model:").Append(options?.ModelId ?? string.Empty).Append('\n'); |
| | 26 | 165 | | sb.Append("temp:").Append(FormatNullable(options?.Temperature)).Append('\n'); |
| | 26 | 166 | | sb.Append("topp:").Append(FormatNullable(options?.TopP)).Append('\n'); |
| | 26 | 167 | | sb.Append("max:").Append(options?.MaxOutputTokens?.ToString(CultureInfo.InvariantCulture) ?? string.Empty).Appen |
| | | 168 | | |
| | 26 | 169 | | var bytes = Encoding.UTF8.GetBytes(sb.ToString()); |
| | 26 | 170 | | var hash = SHA256.HashData(bytes); |
| | 26 | 171 | | return Convert.ToHexString(hash).ToLowerInvariant(); |
| | | 172 | | } |
| | | 173 | | |
| | | 174 | | private static string FormatNullable(float? value) => |
| | 52 | 175 | | value.HasValue |
| | 52 | 176 | | ? value.Value.ToString("R", CultureInfo.InvariantCulture) |
| | 52 | 177 | | : string.Empty; |
| | | 178 | | |
| | | 179 | | private static IEnumerable<ChatResponseUpdate> ToUpdates(ChatResponse response) |
| | | 180 | | { |
| | 4 | 181 | | foreach (var message in response.Messages) |
| | | 182 | | { |
| | 1 | 183 | | if (message.Contents.Count > 0) |
| | | 184 | | { |
| | 4 | 185 | | foreach (var content in message.Contents) |
| | | 186 | | { |
| | 1 | 187 | | yield return new ChatResponseUpdate(message.Role, [content]) |
| | 1 | 188 | | { |
| | 1 | 189 | | ResponseId = response.ResponseId, |
| | 1 | 190 | | ModelId = response.ModelId, |
| | 1 | 191 | | }; |
| | | 192 | | } |
| | | 193 | | } |
| | | 194 | | else |
| | | 195 | | { |
| | 0 | 196 | | yield return new ChatResponseUpdate(message.Role, message.Text) |
| | 0 | 197 | | { |
| | 0 | 198 | | ResponseId = response.ResponseId, |
| | 0 | 199 | | ModelId = response.ModelId, |
| | 0 | 200 | | }; |
| | | 201 | | } |
| | 1 | 202 | | } |
| | 1 | 203 | | } |
| | | 204 | | } |