diff --git a/packages/sdk/src/qualifire.ts b/packages/sdk/src/qualifire.ts new file mode 100644 index 00000000..9cd4f6b6 --- /dev/null +++ b/packages/sdk/src/qualifire.ts @@ -0,0 +1,92 @@ +import { EvaluationResult, ReportSummaryRequest, StructuredSummary } from "./types"; + +interface QualifireReportPayload { + job_id: string; + evaluations: EvaluationResult; + structured: StructuredSummary | null; + deep_test: boolean; + start_time: string; + judge_model: string | null; +} + +export interface QualifireClientOptions { + logger?: (message: string) => void; +} + +export class QualifireClient { + private static convertWithStructuredSummary( + evaluationResults: EvaluationResult, + request: ReportSummaryRequest + ): QualifireReportPayload { + return { + job_id: request.job_id, + evaluations: evaluationResults, + structured: request.structuredSummary || null, + deep_test: request.deepTest ?? false, + start_time: request.startTime ?? new Date().toISOString(), + judge_model: request.judgeModel || null, + }; + } + + /** + * Reports evaluation summary to Qualifire. + * + * @param evaluationResults - The evaluation results to report + * @param request - Configuration including Qualifire URL, API key, and metadata + * @throws {Error} If the API request fails or returns a non-2xx status + * @returns A promise that resolves when the report is successfully submitted + */ + public static async reportSummaryToQualifire( + evaluationResults: EvaluationResult, + request: ReportSummaryRequest, + options?: QualifireClientOptions + ): Promise { + options?.logger?.("Reporting summary to Qualifire"); + + const apiKey = request.qualifireApiKey; + const baseUrl = request.qualifireUrl ?? "https://api.qualifire.com"; + const endpoint = `${baseUrl}/llm/summary`; + + if (!apiKey) { + throw new Error("qualifireApiKey is required but was undefined"); + } + + if (!baseUrl || baseUrl === "undefined") { + throw new Error("Invalid qualifireUrl provided"); + } + + const apiEvaluationResult = this.convertWithStructuredSummary( + evaluationResults, + request + ); + + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 30000); + + try { + const response = await fetch(endpoint, { + method: "POST", + headers: { + "Content-Type": "application/json", + "X-Qualifire-API-Key": apiKey, + }, + body: JSON.stringify(apiEvaluationResult), + signal: controller.signal + }); + + if (!response.ok) { + const errText = await response.text(); + throw new Error( + `Qualifire report failed: ${response.status} ${response.statusText} - ${errText}` + ); + } + clearTimeout(timeoutId); + } catch (error) { + clearTimeout(timeoutId); + if (error instanceof Error && error.name === 'AbortError') { + throw new Error('Qualifire report timed out after 30 seconds'); + } + throw error; + } + } +} diff --git a/packages/sdk/src/types.ts b/packages/sdk/src/types.ts index ccc1beaa..c2f6f1e1 100644 --- a/packages/sdk/src/types.ts +++ b/packages/sdk/src/types.ts @@ -118,6 +118,23 @@ export interface RogueClientConfig { retries?: number; } +export interface StructuredSummary { + overall_summary: string, + key_findings: string[], + recommendations: string[], + detailed_breakdown: object[] +} + +export interface ReportSummaryRequest { + job_id: string, + qualifireUrl?: string; + qualifireApiKey?: string; + structuredSummary?: StructuredSummary; + deepTest?: boolean; + startTime?: string; + judgeModel?: string; +} + // Event Types for WebSocket export type WebSocketEventType = | "job_update" diff --git a/packages/tui/internal/screens/evaluations/form_view.go b/packages/tui/internal/screens/evaluations/form_view.go index 46c03d8d..4062163a 100644 --- a/packages/tui/internal/screens/evaluations/form_view.go +++ b/packages/tui/internal/screens/evaluations/form_view.go @@ -161,8 +161,8 @@ func RenderForm(state *FormState) string { // Prepare field values agent := state.AgentURL - protocol := state.Protocol - transport := state.Transport + protocol := string(state.Protocol) + transport := string(state.Transport) judge := state.JudgeModel deep := "❌" if state.DeepTest { diff --git a/rogue/evaluator_agent/base_evaluator_agent.py b/rogue/evaluator_agent/base_evaluator_agent.py index 59a9e347..866972c0 100644 --- a/rogue/evaluator_agent/base_evaluator_agent.py +++ b/rogue/evaluator_agent/base_evaluator_agent.py @@ -366,6 +366,7 @@ def _log_evaluation( context_id: str, evaluation_passed: bool, reason: str, + scenario_type: Optional[str], **kwargs, ) -> None: """