| | | 1 | | namespace NexusLabs.Needlr.AgentFramework.Langfuse; |
| | | 2 | | |
| | | 3 | | /// <summary> |
| | | 4 | | /// Default <see cref="ILangfuseExperimentRun"/>. Starts a scenario per dataset item and links its |
| | | 5 | | /// trace to the run via <c>POST /api/public/dataset-run-items</c>. Link failures are non-fatal — |
| | | 6 | | /// routed to the diagnostics callback — so a Langfuse hiccup does not crash the eval; the gap is |
| | | 7 | | /// surfaced rather than silently swallowed. |
| | | 8 | | /// </summary> |
| | | 9 | | internal sealed class LangfuseExperimentRun : ILangfuseExperimentRun |
| | | 10 | | { |
| | | 11 | | private readonly LangfuseApiClient _apiClient; |
| | | 12 | | private readonly LangfuseScoreRecorder _recorder; |
| | | 13 | | private readonly string? _runDescription; |
| | | 14 | | private readonly Action<string>? _diagnostics; |
| | | 15 | | |
| | 2 | 16 | | public LangfuseExperimentRun( |
| | 2 | 17 | | LangfuseApiClient apiClient, |
| | 2 | 18 | | LangfuseScoreRecorder recorder, |
| | 2 | 19 | | string datasetName, |
| | 2 | 20 | | string runName, |
| | 2 | 21 | | string? runDescription, |
| | 2 | 22 | | Action<string>? diagnostics) |
| | | 23 | | { |
| | 2 | 24 | | ArgumentNullException.ThrowIfNull(apiClient); |
| | 2 | 25 | | ArgumentNullException.ThrowIfNull(recorder); |
| | 2 | 26 | | ArgumentException.ThrowIfNullOrWhiteSpace(datasetName); |
| | 2 | 27 | | ArgumentException.ThrowIfNullOrWhiteSpace(runName); |
| | | 28 | | |
| | 2 | 29 | | _apiClient = apiClient; |
| | 2 | 30 | | _recorder = recorder; |
| | 2 | 31 | | _runDescription = runDescription; |
| | 2 | 32 | | _diagnostics = diagnostics; |
| | 2 | 33 | | DatasetName = datasetName; |
| | 2 | 34 | | RunName = runName; |
| | 2 | 35 | | } |
| | | 36 | | |
| | | 37 | | /// <inheritdoc /> |
| | 2 | 38 | | public string DatasetName { get; } |
| | | 39 | | |
| | | 40 | | /// <inheritdoc /> |
| | 3 | 41 | | public string RunName { get; } |
| | | 42 | | |
| | | 43 | | /// <inheritdoc /> |
| | | 44 | | public async Task<ILangfuseScenario> BeginItemAsync( |
| | | 45 | | string datasetItemId, |
| | | 46 | | string? scenarioName = null, |
| | | 47 | | IEnumerable<string>? tags = null, |
| | | 48 | | IReadOnlyDictionary<string, string>? metadata = null, |
| | | 49 | | CancellationToken cancellationToken = default) |
| | | 50 | | { |
| | 2 | 51 | | ArgumentException.ThrowIfNullOrWhiteSpace(datasetItemId); |
| | | 52 | | |
| | 2 | 53 | | var name = string.IsNullOrWhiteSpace(scenarioName) |
| | 2 | 54 | | ? $"{DatasetName}: {datasetItemId}" |
| | 2 | 55 | | : scenarioName; |
| | | 56 | | |
| | 2 | 57 | | var scenario = new LangfuseScenario( |
| | 2 | 58 | | _recorder, |
| | 2 | 59 | | name, |
| | 2 | 60 | | sessionId: null, |
| | 2 | 61 | | userId: null, |
| | 2 | 62 | | tags, |
| | 2 | 63 | | metadata); |
| | | 64 | | |
| | 2 | 65 | | if (scenario.TraceId is { Length: > 0 } traceId) |
| | | 66 | | { |
| | 2 | 67 | | await LinkRunItemAsync(datasetItemId, traceId, cancellationToken).ConfigureAwait(false); |
| | | 68 | | } |
| | | 69 | | else |
| | | 70 | | { |
| | 0 | 71 | | _diagnostics?.Invoke( |
| | 0 | 72 | | $"Langfuse dataset run item skipped for item '{datasetItemId}' in run '{RunName}': " + |
| | 0 | 73 | | "no sampled trace was available to link."); |
| | | 74 | | } |
| | | 75 | | |
| | 2 | 76 | | return scenario; |
| | 2 | 77 | | } |
| | | 78 | | |
| | | 79 | | private async Task LinkRunItemAsync(string datasetItemId, string traceId, CancellationToken cancellationToken) |
| | | 80 | | { |
| | | 81 | | try |
| | | 82 | | { |
| | 2 | 83 | | var request = new LangfuseCreateDatasetRunItemRequest |
| | 2 | 84 | | { |
| | 2 | 85 | | RunName = RunName, |
| | 2 | 86 | | RunDescription = _runDescription, |
| | 2 | 87 | | DatasetItemId = datasetItemId, |
| | 2 | 88 | | TraceId = traceId, |
| | 2 | 89 | | }; |
| | | 90 | | |
| | 2 | 91 | | await _apiClient |
| | 2 | 92 | | .PostAsync("api/public/dataset-run-items", request, cancellationToken) |
| | 2 | 93 | | .ConfigureAwait(false); |
| | 1 | 94 | | } |
| | 1 | 95 | | catch (LangfuseException ex) |
| | | 96 | | { |
| | 1 | 97 | | _diagnostics?.Invoke( |
| | 1 | 98 | | $"Langfuse dataset run item link failed for item '{datasetItemId}' in run '{RunName}': {ex.Message}"); |
| | 1 | 99 | | } |
| | 2 | 100 | | } |
| | | 101 | | } |