< Summary

Information
Class: NexusLabs.Needlr.AgentFramework.Evaluation.IterationCoherenceEvaluator
Assembly: NexusLabs.Needlr.AgentFramework.Evaluation
File(s): /home/runner/work/needlr/needlr/src/NexusLabs.Needlr.AgentFramework.Evaluation/IterationCoherenceEvaluator.cs
Line coverage
97%
Covered lines: 111
Uncovered lines: 3
Coverable lines: 114
Total lines: 251
Line coverage: 97.3%
Branch coverage
93%
Covered branches: 58
Total branches: 62
Branch coverage: 93.5%
Method coverage

Feature is only available for sponsors

Upgrade to PRO version

Metrics

MethodBranch coverage Crap Score Cyclomatic complexity Line coverage
.ctor(...)100%22100%
get_EvaluationMetricNames()100%210%
EvaluateAsync(...)97.22%3636100%
DetectDegenerateLoop(...)100%1414100%
GetAggregateText(...)50%4475%
BuildIncoherentReason(...)83.33%6685.71%

File(s)

/home/runner/work/needlr/needlr/src/NexusLabs.Needlr.AgentFramework.Evaluation/IterationCoherenceEvaluator.cs

#LineLine coverage
 1using Microsoft.Extensions.AI;
 2using Microsoft.Extensions.AI.Evaluation;
 3
 4using NexusLabs.Needlr.AgentFramework.Diagnostics;
 5
 6namespace NexusLabs.Needlr.AgentFramework.Evaluation;
 7
 8/// <summary>
 9/// Deterministic evaluator that scores the iteration coherence of an iterative-loop
 10/// agent run from the captured <see cref="IAgentRunDiagnostics"/> snapshot carried in
 11/// an <see cref="AgentRunDiagnosticsContext"/>.
 12/// </summary>
 13/// <remarks>
 14/// <para>
 15/// This evaluator only produces metrics when
 16/// <see cref="IAgentRunDiagnostics.ExecutionMode"/> is <c>"IterativeLoop"</c>. For any
 17/// other execution mode (or when the context is missing) the evaluator returns an
 18/// empty <see cref="EvaluationResult"/>, which callers should treat as "not applicable".
 19/// </para>
 20/// <para>
 21/// When applicable, the evaluator emits:
 22/// </para>
 23/// <list type="bullet">
 24///   <item><description><c>Iteration Count</c> — number of LLM iterations, derived from <see cref="IAgentRunDiagnostics
 25///   <item><description><c>Iteration Empty Outputs</c> — number of iterations whose <see cref="ChatCompletionDiagnostic
 26///   <item><description><c>Terminated Coherently</c> — boolean rollup. <see langword="true"/> when the run succeeded, p
 27///   <item><description><c>Iteration Efficiency Ratio</c> — ratio of useful iterations (produced text output or trigger
 28///   <item><description><c>Degenerate Loop Detected</c> — boolean. <see langword="true"/> when two or more consecutive 
 29///   <item><description><c>Max Iterations Hit</c> — boolean. <see langword="true"/> when the iteration count reached or
 30/// </list>
 31/// </remarks>
 32public sealed class IterationCoherenceEvaluator : IEvaluator
 33{
 34    /// <summary>The execution mode value that gates this evaluator.</summary>
 35    public const string IterativeLoopExecutionMode = "IterativeLoop";
 36
 37    /// <summary>Metric name for the iteration count.</summary>
 38    public const string IterationCountMetricName = "Iteration Count";
 39
 40    /// <summary>Metric name for the count of iterations with empty output.</summary>
 41    public const string EmptyOutputsMetricName = "Iteration Empty Outputs";
 42
 43    /// <summary>Metric name for the boolean rollup indicating coherent termination.</summary>
 44    public const string TerminatedCoherentlyMetricName = "Terminated Coherently";
 45
 46    /// <summary>Metric name for the ratio of useful iterations to total iterations.</summary>
 47    public const string EfficiencyRatioMetricName = "Iteration Efficiency Ratio";
 48
 49    /// <summary>Metric name for the boolean indicating a degenerate (repeated-output) loop.</summary>
 50    public const string DegenerateLoopMetricName = "Degenerate Loop Detected";
 51
 52    /// <summary>Metric name for the boolean indicating the iteration count reached maxIterations.</summary>
 53    public const string MaxIterationsHitMetricName = "Max Iterations Hit";
 54
 55    private readonly int? _maxIterations;
 56
 57    /// <summary>
 58    /// Creates a new <see cref="IterationCoherenceEvaluator"/>.
 59    /// </summary>
 60    /// <param name="maxIterations">
 61    /// Optional expected iteration limit. When provided, the evaluator emits the
 62    /// <see cref="MaxIterationsHitMetricName"/> metric. When <see langword="null"/>,
 63    /// the metric is omitted.
 64    /// </param>
 1865    public IterationCoherenceEvaluator(int? maxIterations = null)
 66    {
 1867        _maxIterations = maxIterations;
 68
 1869        var names = new List<string>
 1870        {
 1871            IterationCountMetricName,
 1872            EmptyOutputsMetricName,
 1873            TerminatedCoherentlyMetricName,
 1874            EfficiencyRatioMetricName,
 1875            DegenerateLoopMetricName,
 1876        };
 1877        if (maxIterations.HasValue)
 78        {
 379            names.Add(MaxIterationsHitMetricName);
 80        }
 1881        EvaluationMetricNames = names;
 1882    }
 83
 84    /// <inheritdoc />
 085    public IReadOnlyCollection<string> EvaluationMetricNames { get; }
 86
 87    /// <inheritdoc />
 88    public ValueTask<EvaluationResult> EvaluateAsync(
 89        IEnumerable<ChatMessage> messages,
 90        ChatResponse modelResponse,
 91        ChatConfiguration? chatConfiguration = null,
 92        IEnumerable<EvaluationContext>? additionalContext = null,
 93        CancellationToken cancellationToken = default)
 94    {
 1895        var diagnostics = additionalContext?
 1896            .OfType<AgentRunDiagnosticsContext>()
 1897            .FirstOrDefault()?
 1898            .Diagnostics;
 99
 18100        if (diagnostics is null ||
 18101            !string.Equals(diagnostics.ExecutionMode, IterativeLoopExecutionMode, StringComparison.Ordinal))
 102        {
 3103            return new ValueTask<EvaluationResult>(new EvaluationResult());
 104        }
 105
 15106        var completions = diagnostics.ChatCompletions;
 15107        var iterationCount = completions.Count;
 15108        var emptyOutputs = 0;
 15109        var usefulIterations = 0;
 96110        for (var i = 0; i < completions.Count; i++)
 111        {
 33112            var hasTextOutput = completions[i].ResponseCharCount > 0;
 33113            var hasFunctionCalls = completions[i].Response?.Messages
 38114                .Any(m => m.Contents.OfType<FunctionCallContent>().Any()) ?? false;
 115
 33116            if (!hasTextOutput)
 117            {
 3118                emptyOutputs++;
 119            }
 120
 33121            if (hasTextOutput || hasFunctionCalls)
 122            {
 31123                usefulIterations++;
 124            }
 125        }
 126
 15127        var finalIterationProducedOutput =
 15128            iterationCount > 0 && completions[iterationCount - 1].ResponseCharCount > 0;
 15129        var terminatedCoherently =
 15130            diagnostics.Succeeded &&
 15131            iterationCount > 0 &&
 15132            finalIterationProducedOutput;
 15133        var efficiencyRatio = iterationCount > 0
 15134            ? (double)usefulIterations / iterationCount
 15135            : 0;
 15136        var degenerateLoop = DetectDegenerateLoop(completions);
 137
 15138        var metrics = new List<EvaluationMetric>
 15139        {
 15140            new NumericMetric(
 15141                IterationCountMetricName,
 15142                value: iterationCount,
 15143                reason: iterationCount == 0
 15144                    ? "No iterations were recorded."
 15145                    : $"{iterationCount} iteration(s) were recorded."),
 15146
 15147            new NumericMetric(
 15148                EmptyOutputsMetricName,
 15149                value: emptyOutputs,
 15150                reason: emptyOutputs == 0
 15151                    ? "Every iteration produced non-empty output."
 15152                    : $"{emptyOutputs} of {iterationCount} iteration(s) produced empty output."),
 15153
 15154            new BooleanMetric(
 15155                TerminatedCoherentlyMetricName,
 15156                value: terminatedCoherently,
 15157                reason: terminatedCoherently
 15158                    ? "The iterative loop succeeded and the final iteration produced output."
 15159                    : BuildIncoherentReason(diagnostics, iterationCount, finalIterationProducedOutput)),
 15160
 15161            new NumericMetric(
 15162                EfficiencyRatioMetricName,
 15163                value: efficiencyRatio,
 15164                reason: iterationCount == 0
 15165                    ? "No iterations to compute efficiency."
 15166                    : $"{usefulIterations} of {iterationCount} iteration(s) were useful (produced text or triggered tool
 15167
 15168            new BooleanMetric(
 15169                DegenerateLoopMetricName,
 15170                value: degenerateLoop,
 15171                reason: degenerateLoop
 15172                    ? "Two or more consecutive iterations produced identical text output."
 15173                    : "No consecutive duplicate outputs detected."),
 15174        };
 175
 15176        if (_maxIterations.HasValue)
 177        {
 3178            var hit = iterationCount >= _maxIterations.Value;
 3179            metrics.Add(new BooleanMetric(
 3180                MaxIterationsHitMetricName,
 3181                value: hit,
 3182                reason: hit
 3183                    ? $"Iteration count ({iterationCount}) reached or exceeded the configured limit ({_maxIterations.Val
 3184                    : $"Iteration count ({iterationCount}) is below the configured limit ({_maxIterations.Value})."));
 185        }
 186
 15187        return new ValueTask<EvaluationResult>(new EvaluationResult(metrics.ToArray()));
 188    }
 189
 190    private static bool DetectDegenerateLoop(IReadOnlyList<ChatCompletionDiagnostics> completions)
 191    {
 15192        if (completions.Count < 2)
 193        {
 5194            return false;
 195        }
 196
 58197        for (var i = 1; i < completions.Count; i++)
 198        {
 20199            var prevResponse = completions[i - 1].Response;
 20200            var currResponse = completions[i].Response;
 201
 20202            if (prevResponse is null || currResponse is null)
 203            {
 204                continue;
 205            }
 206
 2207            var prevText = GetAggregateText(prevResponse);
 2208            var currText = GetAggregateText(currResponse);
 209
 2210            if (prevText is not null &&
 2211                currText is not null &&
 2212                string.Equals(prevText, currText, StringComparison.Ordinal))
 213            {
 1214                return true;
 215            }
 216        }
 217
 9218        return false;
 219    }
 220
 221    private static string? GetAggregateText(ChatResponse response)
 222    {
 4223        if (response.Messages.Count == 0)
 224        {
 0225            return null;
 226        }
 227
 4228        var text = response.Messages[response.Messages.Count - 1].Text;
 4229        return string.IsNullOrEmpty(text) ? null : text;
 230    }
 231
 232    private static string BuildIncoherentReason(
 233        IAgentRunDiagnostics diagnostics,
 234        int iterationCount,
 235        bool finalIterationProducedOutput)
 236    {
 5237        if (!diagnostics.Succeeded)
 238        {
 1239            return "The agent run did not complete successfully.";
 240        }
 4241        if (iterationCount == 0)
 242        {
 2243            return "The agent run succeeded but recorded zero iterations.";
 244        }
 2245        if (!finalIterationProducedOutput)
 246        {
 2247            return "The final iteration produced no output.";
 248        }
 0249        return "Iterative-loop termination is incoherent.";
 250    }
 251}