| | | 1 | | using Microsoft.Extensions.AI; |
| | | 2 | | using Microsoft.Extensions.AI.Evaluation; |
| | | 3 | | |
| | | 4 | | using NexusLabs.Needlr.AgentFramework.Diagnostics; |
| | | 5 | | |
| | | 6 | | namespace NexusLabs.Needlr.AgentFramework.Evaluation; |
| | | 7 | | |
| | | 8 | | /// <summary> |
| | | 9 | | /// Carries an <see cref="IPipelineRunResult"/> through the |
| | | 10 | | /// <c>Microsoft.Extensions.AI.Evaluation</c> evaluator pipeline so that pipeline-aware |
| | | 11 | | /// evaluators can score per-stage and aggregate metrics without re-invoking the model. |
| | | 12 | | /// </summary> |
| | | 13 | | /// <remarks> |
| | | 14 | | /// <para> |
| | | 15 | | /// Evaluators that require the full pipeline result look up the single instance of this |
| | | 16 | | /// context in the <c>additionalContext</c> collection passed to |
| | | 17 | | /// <see cref="IEvaluator.EvaluateAsync"/>. |
| | | 18 | | /// </para> |
| | | 19 | | /// <para> |
| | | 20 | | /// <see cref="EvaluationContext.Contents"/> contains a single <see cref="TextContent"/> |
| | | 21 | | /// summarising the pipeline run so that reporting pipelines which only serialise |
| | | 22 | | /// <see cref="EvaluationContext.Contents"/> still record meaningful information. |
| | | 23 | | /// Consumers that need the full snapshot read <see cref="PipelineResult"/> directly. |
| | | 24 | | /// </para> |
| | | 25 | | /// <para> |
| | | 26 | | /// Use <see cref="ForStage"/> to create a per-stage |
| | | 27 | | /// <see cref="AgentRunDiagnosticsContext"/> for evaluators that operate on individual |
| | | 28 | | /// agent runs within the pipeline. |
| | | 29 | | /// </para> |
| | | 30 | | /// </remarks> |
| | | 31 | | public sealed class PipelineEvaluationContext : EvaluationContext |
| | | 32 | | { |
| | | 33 | | /// <summary> |
| | | 34 | | /// The stable name used for this context. Evaluators can locate the context by |
| | | 35 | | /// matching <see cref="EvaluationContext.Name"/> against this value. |
| | | 36 | | /// </summary> |
| | | 37 | | public const string ContextName = "Needlr Pipeline Run Result"; |
| | | 38 | | |
| | | 39 | | /// <summary> |
| | | 40 | | /// Initializes a new instance of the <see cref="PipelineEvaluationContext"/> class. |
| | | 41 | | /// </summary> |
| | | 42 | | /// <param name="pipelineResult">The captured pipeline run result to expose to evaluators.</param> |
| | | 43 | | /// <exception cref="ArgumentNullException"><paramref name="pipelineResult"/> is <see langword="null"/>.</exception> |
| | | 44 | | public PipelineEvaluationContext(IPipelineRunResult pipelineResult) |
| | 14 | 45 | | : base(ContextName, BuildContents(pipelineResult)) |
| | | 46 | | { |
| | 13 | 47 | | PipelineResult = pipelineResult; |
| | 13 | 48 | | } |
| | | 49 | | |
| | | 50 | | /// <summary>Gets the pipeline run result.</summary> |
| | 11 | 51 | | public IPipelineRunResult PipelineResult { get; } |
| | | 52 | | |
| | | 53 | | /// <summary> |
| | | 54 | | /// Creates an <see cref="AgentRunDiagnosticsContext"/> for a single stage within a |
| | | 55 | | /// pipeline, or <see langword="null"/> when the stage has no captured diagnostics. |
| | | 56 | | /// </summary> |
| | | 57 | | /// <param name="stage">The stage result to convert.</param> |
| | | 58 | | /// <returns> |
| | | 59 | | /// An <see cref="AgentRunDiagnosticsContext"/> wrapping the stage's diagnostics, or |
| | | 60 | | /// <see langword="null"/> if <see cref="IAgentStageResult.Diagnostics"/> is |
| | | 61 | | /// <see langword="null"/>. |
| | | 62 | | /// </returns> |
| | | 63 | | public static AgentRunDiagnosticsContext? ForStage(IAgentStageResult stage) |
| | | 64 | | { |
| | 2 | 65 | | return stage.Diagnostics is not null |
| | 2 | 66 | | ? new AgentRunDiagnosticsContext(stage.Diagnostics) |
| | 2 | 67 | | : null; |
| | | 68 | | } |
| | | 69 | | |
| | | 70 | | /// <summary> |
| | | 71 | | /// Creates a pipeline-level context from a full pipeline result. |
| | | 72 | | /// </summary> |
| | | 73 | | /// <param name="result">The pipeline run result.</param> |
| | | 74 | | /// <returns>A new <see cref="PipelineEvaluationContext"/> wrapping the result.</returns> |
| | | 75 | | public static PipelineEvaluationContext ForPipeline(IPipelineRunResult result) => |
| | 1 | 76 | | new(result); |
| | | 77 | | |
| | | 78 | | private static AIContent[] BuildContents(IPipelineRunResult result) |
| | | 79 | | { |
| | 14 | 80 | | ArgumentNullException.ThrowIfNull(result); |
| | | 81 | | |
| | 13 | 82 | | var summary = |
| | 13 | 83 | | $"Pipeline Succeeded={result.Succeeded} " + |
| | 13 | 84 | | $"Stages={result.Stages.Count} " + |
| | 13 | 85 | | $"Duration={result.TotalDuration.TotalMilliseconds:F0}ms " + |
| | 13 | 86 | | $"TotalTokens={result.AggregateTokenUsage?.TotalTokens ?? 0}"; |
| | | 87 | | |
| | 13 | 88 | | return [new TextContent(summary)]; |
| | | 89 | | } |
| | | 90 | | } |