< Summary

Information
Class: NexusLabs.Needlr.AgentFramework.Evaluation.TerminationAppropriatenessEvaluator
Assembly: NexusLabs.Needlr.AgentFramework.Evaluation
File(s): /home/runner/work/needlr/needlr/src/NexusLabs.Needlr.AgentFramework.Evaluation/TerminationAppropriatenessEvaluator.cs
Line coverage
97%
Covered lines: 39
Uncovered lines: 1
Coverable lines: 40
Total lines: 101
Line coverage: 97.5%
Branch coverage
93%
Covered branches: 15
Total branches: 16
Branch coverage: 93.7%
Method coverage

Feature is only available for sponsors

Upgrade to PRO version

Metrics

MethodBranch coverage Crap Score Cyclomatic complexity Line coverage
get_EvaluationMetricNames()100%210%
.ctor()100%11100%
EvaluateAsync(...)93.75%1616100%

File(s)

/home/runner/work/needlr/needlr/src/NexusLabs.Needlr.AgentFramework.Evaluation/TerminationAppropriatenessEvaluator.cs

#LineLine coverage
 1using Microsoft.Extensions.AI;
 2using Microsoft.Extensions.AI.Evaluation;
 3
 4using NexusLabs.Needlr.AgentFramework.Diagnostics;
 5
 6namespace NexusLabs.Needlr.AgentFramework.Evaluation;
 7
 8/// <summary>
 9/// Deterministic evaluator that scores whether an agent run terminated appropriately,
 10/// using the captured <see cref="IAgentRunDiagnostics"/> snapshot carried in an
 11/// <see cref="AgentRunDiagnosticsContext"/>.
 12/// </summary>
 13/// <remarks>
 14/// <para>
 15/// When the <see cref="AgentRunDiagnosticsContext"/> is present, the evaluator emits:
 16/// </para>
 17/// <list type="bullet">
 18///   <item><description><c>Run Succeeded</c> — boolean; mirrors <see cref="IAgentRunDiagnostics.Succeeded"/>.</descript
 19///   <item><description><c>Termination Consistent</c> — boolean; <see langword="true"/> when <c>Succeeded</c> is consis
 20///   <item><description><c>Execution Mode</c> — string; mirrors <see cref="IAgentRunDiagnostics.ExecutionMode"/>, or <c
 21/// </list>
 22/// <para>
 23/// When no <see cref="AgentRunDiagnosticsContext"/> is present, the evaluator returns
 24/// an empty <see cref="EvaluationResult"/>.
 25/// </para>
 26/// </remarks>
 27public sealed class TerminationAppropriatenessEvaluator : IEvaluator
 28{
 29    /// <summary>Metric name for the success rollup.</summary>
 30    public const string RunSucceededMetricName = "Run Succeeded";
 31
 32    /// <summary>Metric name for the success/error consistency check.</summary>
 33    public const string TerminationConsistentMetricName = "Termination Consistent";
 34
 35    /// <summary>Metric name for the captured execution mode string.</summary>
 36    public const string ExecutionModeMetricName = "Execution Mode";
 37
 38    /// <summary>Execution mode string emitted when the diagnostics do not carry one.</summary>
 39    public const string UnknownExecutionMode = "Unknown";
 40
 41    /// <inheritdoc />
 042    public IReadOnlyCollection<string> EvaluationMetricNames { get; } =
 743    [
 744        RunSucceededMetricName,
 745        TerminationConsistentMetricName,
 746        ExecutionModeMetricName,
 747    ];
 48
 49    /// <inheritdoc />
 50    public ValueTask<EvaluationResult> EvaluateAsync(
 51        IEnumerable<ChatMessage> messages,
 52        ChatResponse modelResponse,
 53        ChatConfiguration? chatConfiguration = null,
 54        IEnumerable<EvaluationContext>? additionalContext = null,
 55        CancellationToken cancellationToken = default)
 56    {
 757        var diagnostics = additionalContext?
 758            .OfType<AgentRunDiagnosticsContext>()
 759            .FirstOrDefault()?
 760            .Diagnostics;
 61
 762        if (diagnostics is null)
 63        {
 164            return new ValueTask<EvaluationResult>(new EvaluationResult());
 65        }
 66
 667        var runSucceeded = diagnostics.Succeeded;
 668        var hasErrorMessage = !string.IsNullOrEmpty(diagnostics.ErrorMessage);
 669        var terminationConsistent = runSucceeded != hasErrorMessage;
 70
 671        var runSucceededMetric = new BooleanMetric(
 672            RunSucceededMetricName,
 673            value: runSucceeded,
 674            reason: runSucceeded
 675                ? "The agent run reported success."
 676                : $"The agent run failed: {diagnostics.ErrorMessage ?? "no error message captured"}.");
 77
 678        var terminationConsistentMetric = new BooleanMetric(
 679            TerminationConsistentMetricName,
 680            value: terminationConsistent,
 681            reason: terminationConsistent
 682                ? "Success flag is consistent with the presence/absence of an error message."
 683                : runSucceeded
 684                    ? "The run reported success but an error message was also captured."
 685                    : "The run reported failure but no error message was captured.");
 86
 687        var executionMode = string.IsNullOrEmpty(diagnostics.ExecutionMode)
 688            ? UnknownExecutionMode
 689            : diagnostics.ExecutionMode!;
 90
 691        var executionModeMetric = new StringMetric(
 692            ExecutionModeMetricName,
 693            value: executionMode,
 694            reason: $"The captured execution mode was '{executionMode}'.");
 95
 696        return new ValueTask<EvaluationResult>(new EvaluationResult(
 697            runSucceededMetric,
 698            terminationConsistentMetric,
 699            executionModeMetric));
 100    }
 101}