< Summary

Information
Class: NexusLabs.Needlr.AgentFramework.Evaluation.PipelineCostEvaluator
Assembly: NexusLabs.Needlr.AgentFramework.Evaluation
File(s): /home/runner/work/needlr/needlr/src/NexusLabs.Needlr.AgentFramework.Evaluation/PipelineCostEvaluator.cs
Line coverage
98%
Covered lines: 72
Uncovered lines: 1
Coverable lines: 73
Total lines: 158
Line coverage: 98.6%
Branch coverage
95%
Covered branches: 21
Total branches: 22
Branch coverage: 95.4%
Method coverage

Feature is only available for sponsors

Upgrade to PRO version

Metrics

MethodBranch coverage Crap Score Cyclomatic complexity Line coverage
get_EvaluationMetricNames()100%210%
.ctor()100%11100%
EvaluateAsync(...)95.45%2222100%

File(s)

/home/runner/work/needlr/needlr/src/NexusLabs.Needlr.AgentFramework.Evaluation/PipelineCostEvaluator.cs

#LineLine coverage
 1using Microsoft.Extensions.AI;
 2using Microsoft.Extensions.AI.Evaluation;
 3
 4using NexusLabs.Needlr.AgentFramework.Diagnostics;
 5
 6namespace NexusLabs.Needlr.AgentFramework.Evaluation;
 7
 8/// <summary>
 9/// Deterministic evaluator that scores token usage and cost breakdown per stage of a
 10/// pipeline run from the captured <see cref="IPipelineRunResult"/> snapshot carried in a
 11/// <see cref="PipelineEvaluationContext"/>.
 12/// </summary>
 13/// <remarks>
 14/// <para>
 15/// This evaluator never contacts a language model. It reads
 16/// <see cref="IPipelineRunResult.AggregateTokenUsage"/> and per-stage
 17/// <see cref="IAgentRunDiagnostics.AggregateTokenUsage"/> to produce:
 18/// </para>
 19/// <list type="bullet">
 20///   <item><description><c>pipeline.total_tokens</c> — sum of all stage tokens.</description></item>
 21///   <item><description><c>pipeline.total_input_tokens</c> — aggregate input tokens.</description></item>
 22///   <item><description><c>pipeline.total_output_tokens</c> — aggregate output tokens.</description></item>
 23///   <item><description><c>pipeline.stage_count</c> — number of stages in the pipeline.</description></item>
 24///   <item><description><c>pipeline.stages_with_diagnostics</c> — count of stages that have non-null diagnostics.</desc
 25///   <item><description><c>pipeline.most_expensive_stage</c> — name of the stage with the most tokens.</description></i
 26///   <item><description><c>pipeline.most_expensive_stage_pct</c> — percentage of total tokens used by the most expensiv
 27/// </list>
 28/// <para>
 29/// When no <see cref="PipelineEvaluationContext"/> is present in the
 30/// <c>additionalContext</c> collection, the evaluator returns an empty
 31/// <see cref="EvaluationResult"/> — callers should treat that as "not applicable".
 32/// </para>
 33/// </remarks>
 34public sealed class PipelineCostEvaluator : IEvaluator
 35{
 36    /// <summary>Metric name for the total token count across all stages.</summary>
 37    public const string TotalTokensMetricName = "pipeline.total_tokens";
 38
 39    /// <summary>Metric name for the total input token count.</summary>
 40    public const string TotalInputTokensMetricName = "pipeline.total_input_tokens";
 41
 42    /// <summary>Metric name for the total output token count.</summary>
 43    public const string TotalOutputTokensMetricName = "pipeline.total_output_tokens";
 44
 45    /// <summary>Metric name for the number of stages in the pipeline.</summary>
 46    public const string StageCountMetricName = "pipeline.stage_count";
 47
 48    /// <summary>Metric name for the count of stages that have diagnostics.</summary>
 49    public const string StagesWithDiagnosticsMetricName = "pipeline.stages_with_diagnostics";
 50
 51    /// <summary>Metric name for the name of the most expensive stage by token count.</summary>
 52    public const string MostExpensiveStageMetricName = "pipeline.most_expensive_stage";
 53
 54    /// <summary>Metric name for the percentage of total tokens used by the most expensive stage.</summary>
 55    public const string MostExpensiveStagePctMetricName = "pipeline.most_expensive_stage_pct";
 56
 57    /// <inheritdoc />
 058    public IReadOnlyCollection<string> EvaluationMetricNames { get; } =
 559    [
 560        TotalTokensMetricName,
 561        TotalInputTokensMetricName,
 562        TotalOutputTokensMetricName,
 563        StageCountMetricName,
 564        StagesWithDiagnosticsMetricName,
 565        MostExpensiveStageMetricName,
 566        MostExpensiveStagePctMetricName,
 567    ];
 68
 69    /// <inheritdoc />
 70    public ValueTask<EvaluationResult> EvaluateAsync(
 71        IEnumerable<ChatMessage> messages,
 72        ChatResponse modelResponse,
 73        ChatConfiguration? chatConfiguration = null,
 74        IEnumerable<EvaluationContext>? additionalContext = null,
 75        CancellationToken cancellationToken = default)
 76    {
 577        var pipelineResult = additionalContext?
 578            .OfType<PipelineEvaluationContext>()
 579            .FirstOrDefault()?
 580            .PipelineResult;
 81
 582        if (pipelineResult is null)
 83        {
 184            return new ValueTask<EvaluationResult>(new EvaluationResult());
 85        }
 86
 487        var stages = pipelineResult.Stages;
 488        var stageCount = stages.Count;
 89
 490        long totalTokens = 0;
 491        long totalInputTokens = 0;
 492        long totalOutputTokens = 0;
 493        var stagesWithDiagnostics = 0;
 494        string? mostExpensiveStageName = null;
 495        long mostExpensiveStageTokens = 0;
 96
 2897        for (var i = 0; i < stages.Count; i++)
 98        {
 1099            var stage = stages[i];
 10100            if (stage.Diagnostics is null)
 101            {
 102                continue;
 103            }
 104
 7105            stagesWithDiagnostics++;
 7106            var usage = stage.Diagnostics.AggregateTokenUsage;
 7107            totalTokens += usage.TotalTokens;
 7108            totalInputTokens += usage.InputTokens;
 7109            totalOutputTokens += usage.OutputTokens;
 110
 7111            if (usage.TotalTokens > mostExpensiveStageTokens)
 112            {
 6113                mostExpensiveStageTokens = usage.TotalTokens;
 6114                mostExpensiveStageName = stage.AgentName;
 115            }
 116        }
 117
 4118        var mostExpensivePct = totalTokens > 0
 4119            ? (double)mostExpensiveStageTokens / totalTokens * 100.0
 4120            : 0;
 121
 4122        return new ValueTask<EvaluationResult>(new EvaluationResult(
 4123            new NumericMetric(
 4124                TotalTokensMetricName,
 4125                value: totalTokens,
 4126                reason: $"{totalTokens:N0} total tokens consumed across {stagesWithDiagnostics} stage(s) with diagnostic
 4127            new NumericMetric(
 4128                TotalInputTokensMetricName,
 4129                value: totalInputTokens,
 4130                reason: $"{totalInputTokens:N0} input tokens consumed."),
 4131            new NumericMetric(
 4132                TotalOutputTokensMetricName,
 4133                value: totalOutputTokens,
 4134                reason: $"{totalOutputTokens:N0} output tokens consumed."),
 4135            new NumericMetric(
 4136                StageCountMetricName,
 4137                value: stageCount,
 4138                reason: $"Pipeline has {stageCount} stage(s)."),
 4139            new NumericMetric(
 4140                StagesWithDiagnosticsMetricName,
 4141                value: stagesWithDiagnostics,
 4142                reason: stagesWithDiagnostics == stageCount
 4143                    ? "All stages have diagnostics."
 4144                    : $"{stagesWithDiagnostics} of {stageCount} stage(s) have diagnostics."),
 4145            new StringMetric(
 4146                MostExpensiveStageMetricName,
 4147                value: mostExpensiveStageName ?? string.Empty,
 4148                reason: mostExpensiveStageName is not null
 4149                    ? $"Stage '{mostExpensiveStageName}' used the most tokens ({mostExpensiveStageTokens:N0})."
 4150                    : "No stages have diagnostics to determine the most expensive stage."),
 4151            new NumericMetric(
 4152                MostExpensiveStagePctMetricName,
 4153                value: mostExpensivePct,
 4154                reason: mostExpensiveStageName is not null
 4155                    ? $"Stage '{mostExpensiveStageName}' consumed {mostExpensivePct:F1}% of total tokens."
 4156                    : "No stages have diagnostics to compute percentage.")));
 157    }
 158}