< Summary

Information

Class:	NexusLabs.Needlr.AgentFramework.Evaluation.TaskCompletionEvaluator
Assembly:	NexusLabs.Needlr.AgentFramework.Evaluation
File(s):	File 1: /_/src/NexusLabs.Needlr.AgentFramework.Evaluation/obj/Release/net10.0/System.Text.RegularExpressions.Generator/System.Text.RegularExpressions.Generator.RegexGenerator/RegexGenerator.g.cs File 2: /home/runner/work/needlr/needlr/src/NexusLabs.Needlr.AgentFramework.Evaluation/TaskCompletionEvaluator.cs

Line coverage

96%

Covered lines:	82
Uncovered lines:	3
Coverable lines:	85
Total lines:	321
Line coverage:	96.4%

Branch coverage

88%

Covered branches:	30
Total branches:	34
Branch coverage:	88.2%

Method coverage

Feature is only available for sponsors

Upgrade to PRO version

Metrics

Method	Branch coverage	Crap Score	Cyclomatic complexity	Line coverage
File 1: ScorePattern()	100%	1	1	100%
File 1: CompletedPattern()	100%	1	1	100%
File 1: ReasoningPattern()	100%	1	1	100%
File 2: get_EvaluationMetricNames()	100%	2	1	0%
File 2: .ctor()	100%	1	1	100%
File 2: EvaluateAsync()	78.57%	14	14	100%
File 2: ExtractUserPrompt(...)	83.33%	6	6	80%
File 2: BuildEvaluationPrompt(...)	100%	6	6	94.44%
File 2: ParseJudgeResponse(...)	100%	8	8	100%

File(s)

/_/src/NexusLabs.Needlr.AgentFramework.Evaluation/obj/Release/net10.0/System.Text.RegularExpressions.Generator/System.Text.RegularExpressions.Generator.RegexGenerator/RegexGenerator.g.cs

File '/_/src/NexusLabs.Needlr.AgentFramework.Evaluation/obj/Release/net10.0/System.Text.RegularExpressions.Generator/System.Text.RegularExpressions.Generator.RegexGenerator/RegexGenerator.g.cs' does not exist (any more).

/home/runner/work/needlr/needlr/src/NexusLabs.Needlr.AgentFramework.Evaluation/TaskCompletionEvaluator.cs

#	Line	Line coverage
	`1`	`using System.Globalization;`
	`2`	`using System.Text.Json;`
	`3`	`using System.Text.RegularExpressions;`
	`4`
	`5`	`using Microsoft.Extensions.AI;`
	`6`	`using Microsoft.Extensions.AI.Evaluation;`
	`7`
	`8`	`using NexusLabs.Needlr.AgentFramework.Diagnostics;`
	`9`
	`10`	`namespace NexusLabs.Needlr.AgentFramework.Evaluation;`
	`11`
	`12`	`/// <summary>`
	`13`	`/// LLM-judged evaluator that assesses whether an agent actually accomplished`
	`14`	`/// the task it was given. Unlike MEAI's <c>TaskAdherenceEvaluator</c> (which`
	`15`	`/// checks instruction following), this evaluator checks <em>task success</em>:`
	`16`	`/// did the agent produce output that satisfies the original request?`
	`17`	`/// </summary>`
	`18`	`/// <remarks>`
	`19`	`/// <para>`
	`20`	`/// This evaluator requires a <see cref="ChatConfiguration"/> with a judge`
	`21`	`/// <see cref="IChatClient"/>. It sends the original prompt and agent output`
	`22`	`/// to the judge with a structured evaluation prompt and parses the response.`
	`23`	`/// </para>`
	`24`	`/// <para>`
	`25`	`/// When no judge is configured (<c>chatConfiguration</c> is null`
	`26`	`/// or has no <see cref="ChatConfiguration.ChatClient"/>), the evaluator`
	`27`	`/// returns an empty <see cref="EvaluationResult"/>.`
	`28`	`/// </para>`
	`29`	`/// <para>`
	`30`	`/// Metrics produced:`
	`31`	`/// </para>`
	`32`	`/// <list type="bullet">`
	`33`	`/// <item><description><c>Task Completed</c> — boolean. <see langword="true"/> when the`
	`34`	`/// judge determines the agent accomplished the requested task.</description></item>`
	`35`	`/// <item><description><c>Task Completion Score</c> — numeric (1–5). How completely and`
	`36`	`/// correctly the agent fulfilled the request. 5 = fully complete, 1 = not started or`
	`37`	`/// completely wrong.</description></item>`
	`38`	`/// <item><description><c>Task Completion Reasoning</c> — string. The judge's`
	`39`	`/// explanation for the score.</description></item>`
	`40`	`/// </list>`
	`41`	`/// </remarks>`
	`42`	`public sealed partial class TaskCompletionEvaluator : IEvaluator`
	`43`	`{`
	`44`	`/// <summary>Metric name for the boolean task-completed flag.</summary>`
	`45`	`public const string TaskCompletedMetricName = "Task Completed";`
	`46`
	`47`	`/// <summary>Metric name for the numeric 1–5 completion score.</summary>`
	`48`	`public const string TaskCompletionScoreMetricName = "Task Completion Score";`
	`49`
	`50`	`/// <summary>Metric name for the judge's reasoning.</summary>`
	`51`	`public const string TaskCompletionReasoningMetricName = "Task Completion Reasoning";`
	`52`
	`53`	`/// <summary>Score threshold at or above which the task is considered completed.</summary>`
	`54`	`public const int CompletionThreshold = 3;`
	`55`
	`56`	`private const string SystemPrompt = """`
	`57`	`You are an evaluation judge. Your job is to assess whether an AI agent`
	`58`	`completed a task it was given.`
	`59`
	`60`	`You will be given:`
	`61`	`1. The original task/request (USER PROMPT)`
	`62`	`2. The agent's final output (AGENT OUTPUT)`
	`63`
	`64`	`Rate the agent's task completion on a scale of 1-5:`
	`65`	`- 5: Task fully completed with correct, comprehensive output`
	`66`	`- 4: Task mostly completed with minor gaps or issues`
	`67`	`- 3: Task partially completed — core intent addressed but significant gaps`
	`68`	`- 2: Task barely started — some relevant content but far from complete`
	`69`	`- 1: Task not completed — output is wrong, empty, or irrelevant`
	`70`
	`71`	`Respond with EXACTLY this format (no markdown, no extra text):`
	`72`	`SCORE: <number 1-5>`
	`73`	`COMPLETED: <YES or NO>`
	`74`	`REASONING: <one paragraph explaining your assessment>`
	`75`	`""";`
	`76`
	`77`	`/// <inheritdoc />`
0	`78`	`public IReadOnlyCollection<string> EvaluationMetricNames { get; } =`
5	`79`	`[`
5	`80`	`TaskCompletedMetricName,`
5	`81`	`TaskCompletionScoreMetricName,`
5	`82`	`TaskCompletionReasoningMetricName,`
5	`83`	`];`
	`84`
	`85`	`/// <inheritdoc />`
	`86`	`public async ValueTask<EvaluationResult> EvaluateAsync(`
	`87`	`IEnumerable<ChatMessage> messages,`
	`88`	`ChatResponse modelResponse,`
	`89`	`ChatConfiguration? chatConfiguration = null,`
	`90`	`IEnumerable<EvaluationContext>? additionalContext = null,`
	`91`	`CancellationToken cancellationToken = default)`
	`92`	`{`
5	`93`	`if (chatConfiguration?.ChatClient is null)`
	`94`	`{`
1	`95`	`return new EvaluationResult();`
	`96`	`}`
	`97`
4	`98`	`var userPrompt = ExtractUserPrompt(messages);`
4	`99`	`var agentOutput = modelResponse.Text ?? string.Empty;`
	`100`
4	`101`	`var diagnosticsContext = additionalContext?`
4	`102`	`.OfType<AgentRunDiagnosticsContext>()`
4	`103`	`.FirstOrDefault()?`
4	`104`	`.Diagnostics;`
	`105`
4	`106`	`var evaluationPrompt = BuildEvaluationPrompt(userPrompt, agentOutput, diagnosticsContext);`
	`107`
4	`108`	`var judgeMessages = new ChatMessage[]`
4	`109`	`{`
4	`110`	`new(ChatRole.System, SystemPrompt),`
4	`111`	`new(ChatRole.User, evaluationPrompt),`
4	`112`	`};`
	`113`
4	`114`	`var judgeResponse = await chatConfiguration.ChatClient`
4	`115`	`.GetResponseAsync(judgeMessages, cancellationToken: cancellationToken)`
4	`116`	`.ConfigureAwait(false);`
	`117`
4	`118`	`var judgeText = judgeResponse.Text ?? string.Empty;`
4	`119`	`var (score, completed, reasoning) = ParseJudgeResponse(judgeText);`
	`120`
4	`121`	`return new EvaluationResult(`
4	`122`	`new BooleanMetric(`
4	`123`	`TaskCompletedMetricName,`
4	`124`	`value: completed,`
4	`125`	`reason: completed`
4	`126`	`? "The judge determined the agent accomplished the task."`
4	`127`	`: "The judge determined the agent did not accomplish the task."),`
4	`128`	`new NumericMetric(`
4	`129`	`TaskCompletionScoreMetricName,`
4	`130`	`value: score,`
4	`131`	`reason: $"Score {score}/5 (threshold for completion: {CompletionThreshold})."),`
4	`132`	`new StringMetric(`
4	`133`	`TaskCompletionReasoningMetricName,`
4	`134`	`value: reasoning,`
4	`135`	`reason: "The judge's explanation for the task completion assessment."));`
5	`136`	`}`
	`137`
	`138`	`private static string ExtractUserPrompt(IEnumerable<ChatMessage> messages)`
	`139`	`{`
12	`140`	`foreach (var msg in messages)`
	`141`	`{`
4	`142`	`if (msg.Role == ChatRole.User && !string.IsNullOrWhiteSpace(msg.Text))`
	`143`	`{`
4	`144`	`return msg.Text;`
	`145`	`}`
	`146`	`}`
	`147`
0	`148`	`return "(no user prompt provided)";`
4	`149`	`}`
	`150`
	`151`	`private static string BuildEvaluationPrompt(`
	`152`	`string userPrompt,`
	`153`	`string agentOutput,`
	`154`	`IAgentRunDiagnostics? diagnostics)`
	`155`	`{`
4	`156`	`var prompt = $"""`
4	`157`	`USER PROMPT:`
4	`158`	`{userPrompt}`
4	`159`
4	`160`	`AGENT OUTPUT:`
4	`161`	`{(string.IsNullOrWhiteSpace(agentOutput) ? "(empty — the agent produced no text output)" : agentOutput)}`
4	`162`	`""";`
	`163`
4	`164`	`if (diagnostics is not null)`
	`165`	`{`
1	`166`	`prompt += $"""`
1	`167`
1	`168`
1	`169`	`ADDITIONAL CONTEXT:`
1	`170`	`- Tool calls made: {diagnostics.ToolCalls.Count}`
0	`171`	`- Tool calls failed: {diagnostics.ToolCalls.Count(t => !t.Succeeded)}`
1	`172`	`- Execution mode: {diagnostics.ExecutionMode ?? "unknown"}`
1	`173`	`- Agent reported success: {diagnostics.Succeeded}`
1	`174`	`""";`
	`175`	`}`
	`176`
4	`177`	`return prompt;`
	`178`	`}`
	`179`
	`180`	`internal static (int Score, bool Completed, string Reasoning) ParseJudgeResponse(string response)`
	`181`	`{`
12	`182`	`var score = 1;`
12	`183`	`var completed = false;`
12	`184`	`var reasoning = "Unable to parse judge response.";`
	`185`
12	`186`	`var scoreMatch = ScorePattern().Match(response);`
12	`187`	`if (scoreMatch.Success &&`
12	`188`	`int.TryParse(scoreMatch.Groups[1].Value, CultureInfo.InvariantCulture, out var parsedScore))`
	`189`	`{`
11	`190`	`score = Math.Clamp(parsedScore, 1, 5);`
	`191`	`}`
	`192`
12	`193`	`var completedMatch = CompletedPattern().Match(response);`
12	`194`	`if (completedMatch.Success)`
	`195`	`{`
9	`196`	`completed = string.Equals(`
9	`197`	`completedMatch.Groups[1].Value.Trim(),`
9	`198`	`"YES",`
9	`199`	`StringComparison.OrdinalIgnoreCase);`
	`200`	`}`
	`201`	`else`
	`202`	`{`
3	`203`	`completed = score >= CompletionThreshold;`
	`204`	`}`
	`205`
12	`206`	`var reasoningMatch = ReasoningPattern().Match(response);`
12	`207`	`if (reasoningMatch.Success)`
	`208`	`{`
11	`209`	`reasoning = reasoningMatch.Groups[1].Value.Trim();`
	`210`	`}`
	`211`
12	`212`	`return (score, completed, reasoning);`
	`213`	`}`
	`214`
	`215`	`[GeneratedRegex(@"SCORE:\s*(\d)", RegexOptions.IgnoreCase)]`
	`216`	`private static partial Regex ScorePattern();`
	`217`
	`218`	`[GeneratedRegex(@"COMPLETED:\s*(YES\|NO)", RegexOptions.IgnoreCase)]`
	`219`	`private static partial Regex CompletedPattern();`
	`220`
	`221`	`[GeneratedRegex(@"REASONING:\s*(.+)", RegexOptions.IgnoreCase \| RegexOptions.Singleline)]`
	`222`	`private static partial Regex ReasoningPattern();`
	`223`	`}`

< Summary

Metrics

File(s)

/_/src/NexusLabs.Needlr.AgentFramework.Evaluation/obj/Release/net10.0/System.Text.RegularExpressions.Generator/System.Text.RegularExpressions.Generator.RegexGenerator/RegexGenerator.g.cs

/home/runner/work/needlr/needlr/src/NexusLabs.Needlr.AgentFramework.Evaluation/TaskCompletionEvaluator.cs

Methods/Properties