| | | 1 | | using Microsoft.Extensions.AI; |
| | | 2 | | using Microsoft.Extensions.AI.Evaluation; |
| | | 3 | | |
| | | 4 | | using NexusLabs.Needlr.AgentFramework.Diagnostics; |
| | | 5 | | |
| | | 6 | | namespace NexusLabs.Needlr.AgentFramework.Evaluation; |
| | | 7 | | |
| | | 8 | | /// <summary> |
| | | 9 | | /// Carries an <see cref="IAgentRunDiagnostics"/> snapshot through the |
| | | 10 | | /// <c>Microsoft.Extensions.AI.Evaluation</c> evaluator pipeline so that Needlr-native |
| | | 11 | | /// deterministic evaluators can score execution-mode, tool-call trajectory, and |
| | | 12 | | /// termination behaviour without being re-invoked against the LLM. |
| | | 13 | | /// </summary> |
| | | 14 | | /// <remarks> |
| | | 15 | | /// <para> |
| | | 16 | | /// Evaluators that require the raw diagnostics snapshot (for example, |
| | | 17 | | /// <see cref="ToolCallTrajectoryEvaluator"/>) look up the single instance of this |
| | | 18 | | /// context in the <c>additionalContext</c> collection passed to |
| | | 19 | | /// <see cref="IEvaluator.EvaluateAsync"/>. |
| | | 20 | | /// </para> |
| | | 21 | | /// <para> |
| | | 22 | | /// <see cref="EvaluationContext.Contents"/> contains a single <see cref="TextContent"/> |
| | | 23 | | /// summarising the diagnostics so that reporting pipelines which only serialise |
| | | 24 | | /// <see cref="EvaluationContext.Contents"/> still record meaningful information. |
| | | 25 | | /// Consumers that need the full snapshot read <see cref="Diagnostics"/> directly. |
| | | 26 | | /// </para> |
| | | 27 | | /// </remarks> |
| | | 28 | | public sealed class AgentRunDiagnosticsContext : EvaluationContext |
| | | 29 | | { |
| | | 30 | | /// <summary> |
| | | 31 | | /// The stable name used for this context. Evaluators can locate the context by |
| | | 32 | | /// matching <see cref="EvaluationContext.Name"/> against this value. |
| | | 33 | | /// </summary> |
| | | 34 | | public const string ContextName = "Needlr Agent Run Diagnostics"; |
| | | 35 | | |
| | | 36 | | /// <summary> |
| | | 37 | | /// Initializes a new instance of the <see cref="AgentRunDiagnosticsContext"/> class. |
| | | 38 | | /// </summary> |
| | | 39 | | /// <param name="diagnostics">The captured agent-run diagnostics to expose to evaluators.</param> |
| | | 40 | | /// <exception cref="ArgumentNullException"><paramref name="diagnostics"/> is <see langword="null"/>.</exception> |
| | | 41 | | public AgentRunDiagnosticsContext(IAgentRunDiagnostics diagnostics) |
| | 49 | 42 | | : base(ContextName, BuildContents(diagnostics)) |
| | | 43 | | { |
| | 49 | 44 | | Diagnostics = diagnostics; |
| | 49 | 45 | | } |
| | | 46 | | |
| | | 47 | | /// <summary>Gets the captured diagnostics snapshot.</summary> |
| | 49 | 48 | | public IAgentRunDiagnostics Diagnostics { get; } |
| | | 49 | | |
| | | 50 | | private static AIContent[] BuildContents(IAgentRunDiagnostics diagnostics) |
| | | 51 | | { |
| | 49 | 52 | | ArgumentNullException.ThrowIfNull(diagnostics); |
| | | 53 | | |
| | 49 | 54 | | var summary = |
| | 49 | 55 | | $"Agent='{diagnostics.AgentName}' " + |
| | 49 | 56 | | $"ExecutionMode='{diagnostics.ExecutionMode ?? "(unknown)"}' " + |
| | 49 | 57 | | $"Succeeded={diagnostics.Succeeded} " + |
| | 49 | 58 | | $"ChatCompletions={diagnostics.ChatCompletions.Count} " + |
| | 49 | 59 | | $"ToolCalls={diagnostics.ToolCalls.Count} " + |
| | 49 | 60 | | $"DurationMs={(long)diagnostics.TotalDuration.TotalMilliseconds}"; |
| | | 61 | | |
| | 49 | 62 | | return [new TextContent(summary)]; |
| | | 63 | | } |
| | | 64 | | } |