Skip to content
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions packages/sdk/src/qualifire.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import { EvaluationResult, ReportSummaryRequest, StructuredSummary } from "./types";

interface QualifireReportPayload {
job_id: string;
evaluations: EvaluationResult;
structured: StructuredSummary | null;
deep_test: boolean;
start_time: string;
judge_model: string | null;
}
Comment on lines +3 to +10
Copy link
Contributor

@coderabbitai coderabbitai bot Oct 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | πŸ”΄ Critical

Type mismatch: deep_test and start_time should be nullable.

The QualifireReportPayload interface defines deep_test: boolean and start_time: string as required fields. However, the source fields in ReportSummaryRequest are optional (deepTest?: boolean, startTime?: string). This creates a type mismatch where undefined values can be assigned to non-nullable fields in the payload, violating the type contract and potentially causing API errors.

Apply this diff to make the fields nullable:

 interface QualifireReportPayload {
   job_id: string;
   evaluations: EvaluationResult;
   structured: StructuredSummary | null;
-  deep_test: boolean;
-  start_time: string;
+  deep_test: boolean | null;
+  start_time: string | null;
   judge_model: string | null;
 }

Then update the mapping in convertWithStructuredSummary:

   return {
     job_id: request.job_id,
     evaluations: evaluationResults,
     structured: request.structuredSummary || null,
-    deep_test: request.deepTest,
-    start_time: request.startTime,
+    deep_test: request.deepTest ?? null,
+    start_time: request.startTime ?? null,
     judge_model: request.judgeModel || null,
   };

Committable suggestion skipped: line range outside the PR's diff.

πŸ€– Prompt for AI Agents
In packages/sdk/src/qualifire.ts around lines 3 to 10, the
QualifireReportPayload currently declares deep_test: boolean and start_time:
string but the source ReportSummaryRequest has deepTest?: boolean and
startTime?: string, causing a type mismatch; change the interface to make
deep_test: boolean | null and start_time: string | null, and then update
convertWithStructuredSummary to map optional fields defensively (deep_test:
source.deepTest ?? null and start_time: source.startTime ?? null) so undefined
from the source becomes null in the payload.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ignorant05 I think this comment is correct

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like the humans are having a chat. I'll hop back into my burrow for now. If you need me again, just tag @coderabbitai in a new comment, and I'll come hopping out!


export interface QualifireClientOptions {
logger?: (message: string) => void;
}

export class QualifireClient {
private static convertWithStructuredSummary(
evaluationResults: EvaluationResult,
request: ReportSummaryRequest
): QualifireReportPayload {
return {
job_id: request.job_id,
evaluations: evaluationResults,
structured: request.structuredSummary || null,
deep_test: request.deepTest ?? false,
start_time: request.startTime ?? new Date().toISOString(),
judge_model: request.judgeModel || null,
};
}

/**
* Reports evaluation summary to Qualifire.
*
* @param evaluationResults - The evaluation results to report
* @param request - Configuration including Qualifire URL, API key, and metadata
* @throws {Error} If the API request fails or returns a non-2xx status
* @returns A promise that resolves when the report is successfully submitted
*/
public static async reportSummaryToQualifire(
evaluationResults: EvaluationResult,
request: ReportSummaryRequest,
options?: QualifireClientOptions
): Promise<void> {
options?.logger?.("Reporting summary to Qualifire");

const apiKey = request.qualifireApiKey;
const baseUrl = request.qualifireUrl ?? "https://api.qualifire.com";
const endpoint = `${baseUrl}/llm/summary`;

if (!apiKey) {
throw new Error("qualifireApiKey is required but was undefined");
}

if (!baseUrl || baseUrl === "undefined") {
throw new Error("Invalid qualifireUrl provided");
}

const apiEvaluationResult = this.convertWithStructuredSummary(
evaluationResults,
request
);

const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 30000);

try {
const response = await fetch(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
"X-Qualifire-API-Key": apiKey,
},
body: JSON.stringify(apiEvaluationResult),
signal: controller.signal
});

if (!response.ok) {
const errText = await response.text();
throw new Error(
`Qualifire report failed: ${response.status} ${response.statusText} - ${errText}`
);
}
clearTimeout(timeoutId);
} catch (error) {
clearTimeout(timeoutId);
if (error instanceof Error && error.name === 'AbortError') {
throw new Error('Qualifire report timed out after 30 seconds');
}
throw error;
}
}
}
17 changes: 17 additions & 0 deletions packages/sdk/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,23 @@ export interface RogueClientConfig {
retries?: number;
}

export interface StructuredSummary {
overall_summary: string,
key_findings: string[],
recommendations: string[],
detailed_breakdown: object[]
}

export interface ReportSummaryRequest {
job_id: string,
qualifireUrl?: string;
qualifireApiKey?: string;
structuredSummary?: StructuredSummary;
deepTest?: boolean;
startTime?: string;
judgeModel?: string;
}

// Event Types for WebSocket
export type WebSocketEventType =
| "job_update"
Expand Down
86 changes: 81 additions & 5 deletions packages/tui/internal/screens/evaluations/form_view.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,19 +159,95 @@ func RenderForm(state *FormState) string {
return fieldContainer.Render(fieldContent)
}

// Helper function to render a dropdown field with indicators
renderDropdownField := func(fieldIndex int, label, value string) string {
active := m.evalState.currentField == fieldIndex

labelStyle := lipgloss.NewStyle().
Foreground(t.TextMuted()).
Background(t.Background()).
Width(20).
Align(lipgloss.Right)

valueStyle := lipgloss.NewStyle().
Foreground(t.Text()).
Background(t.Background()).
Padding(0, 1)

if active {
labelStyle = labelStyle.Foreground(t.Primary()).Bold(true)
valueStyle = valueStyle.
Foreground(t.Primary()).
Background(t.Background()).
Bold(true)
// Add dropdown indicators
value = "β—€ " + value + " β–Ά"
}

// Create a full-width container for the field
fieldContainer := lipgloss.NewStyle().
Width(m.width-4).
Background(t.Background()).
Padding(0, 2)

fieldContent := lipgloss.JoinHorizontal(lipgloss.Left,
labelStyle.Render(label),
valueStyle.Render(value),
)

return fieldContainer.Render(fieldContent)
}

// Helper function to render a toggle field
renderToggleField := func(fieldIndex int, label, value string) string {
active := m.evalState.currentField == fieldIndex

labelStyle := lipgloss.NewStyle().
Foreground(t.TextMuted()).
Background(t.Background()).
Width(20).
Align(lipgloss.Right)

valueStyle := lipgloss.NewStyle().
Foreground(t.Text()).
Background(t.Background()).
Padding(0, 1)

if active {
labelStyle = labelStyle.Foreground(t.Primary()).Bold(true)
valueStyle = valueStyle.
Foreground(t.Primary()).
Background(t.Background()).
Bold(true)
}

// Create a full-width container for the field
fieldContainer := lipgloss.NewStyle().
Width(m.width-4).
Background(t.Background()).
Padding(0, 2)

fieldContent := lipgloss.JoinHorizontal(lipgloss.Left,
labelStyle.Render(label),
valueStyle.Render(value),
)

return fieldContainer.Render(fieldContent)
}

// Prepare field values
agent := state.AgentURL
protocol := state.Protocol
transport := state.Transport
judge := state.JudgeModel
agent := m.evalState.AgentURL
protocol := string(m.evalState.AgentProtocol)
transport := string(m.evalState.AgentTransport)
judge := m.evalState.JudgeModel
deep := "❌"
if state.DeepTest {
deep = "βœ…"
}

// Helper function to render the start button
renderStartButton := func() string {
active := state.CurrentField == 5
active := m.evalState.currentField == 5
var buttonText string

if state.EvalSpinnerActive {
Expand Down
5 changes: 5 additions & 0 deletions packages/tui/internal/tui/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ type AgentConfig struct {
InterviewMode bool `json:"interview_mode"`
DeepTestMode bool `json:"deep_test_mode"`
ParallelRuns int `json:"parallel_runs"`

type EvalScenario struct {
Scenario string `json:"scenario"`
ScenarioType ScenarioType `json:"scenario_type"`
ExpectedOutcome string `json:"expected_outcome,omitempty"`
}

type EvaluationRequest struct {
Expand Down
1 change: 1 addition & 0 deletions rogue/evaluator_agent/base_evaluator_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,7 @@ def _log_evaluation(
context_id: str,
evaluation_passed: bool,
reason: str,
scenario_type: Optional[str],
**kwargs,
) -> None:
"""
Expand Down