< Summary

Information
Class: NexusLabs.Needlr.AgentFramework.Evaluation.PipelineStageEvaluator
Assembly: NexusLabs.Needlr.AgentFramework.Evaluation
File(s): /home/runner/work/needlr/needlr/src/NexusLabs.Needlr.AgentFramework.Evaluation/PipelineStageEvaluator.cs
Line coverage
98%
Covered lines: 63
Uncovered lines: 1
Coverable lines: 64
Total lines: 141
Line coverage: 98.4%
Branch coverage
95%
Covered branches: 19
Total branches: 20
Branch coverage: 95%
Method coverage

Feature is only available for sponsors

Upgrade to PRO version

Metrics

MethodBranch coverage Crap Score Cyclomatic complexity Line coverage
get_EvaluationMetricNames()100%210%
.ctor()100%11100%
EvaluateAsync(...)95%2020100%

File(s)

/home/runner/work/needlr/needlr/src/NexusLabs.Needlr.AgentFramework.Evaluation/PipelineStageEvaluator.cs

#LineLine coverage
 1using Microsoft.Extensions.AI;
 2using Microsoft.Extensions.AI.Evaluation;
 3
 4using NexusLabs.Needlr.AgentFramework.Diagnostics;
 5
 6namespace NexusLabs.Needlr.AgentFramework.Evaluation;
 7
 8/// <summary>
 9/// Deterministic evaluator that scores per-stage success/failure and overall pipeline
 10/// health from the captured <see cref="IPipelineRunResult"/> snapshot carried in a
 11/// <see cref="PipelineEvaluationContext"/>.
 12/// </summary>
 13/// <remarks>
 14/// <para>
 15/// This evaluator never contacts a language model. It reads the
 16/// <see cref="IPipelineRunResult"/> to produce:
 17/// </para>
 18/// <list type="bullet">
 19///   <item><description><c>pipeline.succeeded</c> — whether the pipeline succeeded.</description></item>
 20///   <item><description><c>pipeline.total_stages</c> — total number of stages.</description></item>
 21///   <item><description><c>pipeline.completed_stages</c> — stages with non-null diagnostics.</description></item>
 22///   <item><description><c>pipeline.skipped_stages</c> — stages with null diagnostics AND null response.</description><
 23///   <item><description><c>pipeline.total_duration_ms</c> — total pipeline duration in milliseconds.</description></ite
 24///   <item><description><c>pipeline.error_message</c> — error message if the pipeline failed (nullable).</description><
 25/// </list>
 26/// <para>
 27/// When no <see cref="PipelineEvaluationContext"/> is present in the
 28/// <c>additionalContext</c> collection, the evaluator returns an empty
 29/// <see cref="EvaluationResult"/> — callers should treat that as "not applicable".
 30/// </para>
 31/// </remarks>
 32public sealed class PipelineStageEvaluator : IEvaluator
 33{
 34    /// <summary>Metric name for whether the pipeline succeeded.</summary>
 35    public const string SucceededMetricName = "pipeline.succeeded";
 36
 37    /// <summary>Metric name for the total number of stages.</summary>
 38    public const string TotalStagesMetricName = "pipeline.total_stages";
 39
 40    /// <summary>Metric name for the number of completed stages (those with diagnostics).</summary>
 41    public const string CompletedStagesMetricName = "pipeline.completed_stages";
 42
 43    /// <summary>Metric name for the number of skipped stages (null diagnostics AND null response).</summary>
 44    public const string SkippedStagesMetricName = "pipeline.skipped_stages";
 45
 46    /// <summary>Metric name for the total pipeline duration in milliseconds.</summary>
 47    public const string TotalDurationMsMetricName = "pipeline.total_duration_ms";
 48
 49    /// <summary>Metric name for the error message if the pipeline failed.</summary>
 50    public const string ErrorMessageMetricName = "pipeline.error_message";
 51
 52    /// <inheritdoc />
 053    public IReadOnlyCollection<string> EvaluationMetricNames { get; } =
 654    [
 655        SucceededMetricName,
 656        TotalStagesMetricName,
 657        CompletedStagesMetricName,
 658        SkippedStagesMetricName,
 659        TotalDurationMsMetricName,
 660        ErrorMessageMetricName,
 661    ];
 62
 63    /// <inheritdoc />
 64    public ValueTask<EvaluationResult> EvaluateAsync(
 65        IEnumerable<ChatMessage> messages,
 66        ChatResponse modelResponse,
 67        ChatConfiguration? chatConfiguration = null,
 68        IEnumerable<EvaluationContext>? additionalContext = null,
 69        CancellationToken cancellationToken = default)
 70    {
 671        var pipelineResult = additionalContext?
 672            .OfType<PipelineEvaluationContext>()
 673            .FirstOrDefault()?
 674            .PipelineResult;
 75
 676        if (pipelineResult is null)
 77        {
 178            return new ValueTask<EvaluationResult>(new EvaluationResult());
 79        }
 80
 581        var stages = pipelineResult.Stages;
 582        var totalStages = pipelineResult.PlannedStageCount;
 583        var completedStages = 0;
 584        var skippedStages = 0;
 85
 2886        for (var i = 0; i < stages.Count; i++)
 87        {
 988            var stage = stages[i];
 989            if (stage.Diagnostics is not null)
 90            {
 791                completedStages++;
 92            }
 293            else if (stage.FinalResponse is null)
 94            {
 295                skippedStages++;
 96            }
 97        }
 98
 599        var durationMs = pipelineResult.TotalDuration.TotalMilliseconds;
 5100        var succeeded = pipelineResult.Succeeded;
 5101        var errorMessage = pipelineResult.ErrorMessage;
 102
 5103        var metrics = new List<EvaluationMetric>
 5104        {
 5105            new BooleanMetric(
 5106                SucceededMetricName,
 5107                value: succeeded,
 5108                reason: succeeded
 5109                    ? "Pipeline completed successfully."
 5110                    : "Pipeline did not complete successfully."),
 5111            new NumericMetric(
 5112                TotalStagesMetricName,
 5113                value: totalStages,
 5114                reason: $"Pipeline has {totalStages} stage(s)."),
 5115            new NumericMetric(
 5116                CompletedStagesMetricName,
 5117                value: completedStages,
 5118                reason: completedStages == totalStages
 5119                    ? "All stages completed with diagnostics."
 5120                    : $"{completedStages} of {totalStages} stage(s) completed with diagnostics."),
 5121            new NumericMetric(
 5122                SkippedStagesMetricName,
 5123                value: skippedStages,
 5124                reason: skippedStages == 0
 5125                    ? "No stages were skipped."
 5126                    : $"{skippedStages} stage(s) were skipped (no diagnostics and no response)."),
 5127            new NumericMetric(
 5128                TotalDurationMsMetricName,
 5129                value: durationMs,
 5130                reason: $"Pipeline ran for {durationMs:F0}ms."),
 5131            new StringMetric(
 5132                ErrorMessageMetricName,
 5133                value: errorMessage,
 5134                reason: errorMessage is not null
 5135                    ? $"Pipeline error: {errorMessage}"
 5136                    : "No error occurred."),
 5137        };
 138
 5139        return new ValueTask<EvaluationResult>(new EvaluationResult(metrics.ToArray()));
 140    }
 141}