Skip to content

Commit c1a2208

Browse files
committed
Showed cache hits in the UI
1 parent 3ce0ac4 commit c1a2208

File tree

10 files changed

+103
-41
lines changed

10 files changed

+103
-41
lines changed

apps/evalite-ui/app/constants.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
export const BASE_URL = import.meta.env.DEV
2+
? "http://localhost:3006"
3+
: window.location.origin;

apps/evalite-ui/app/data/use-subscribe-to-socket.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import type { QueryClient } from "@tanstack/react-query";
33
import { getServerStateQueryOptions } from "./queries";
44
import type { Evalite } from "evalite/types";
55
import { isStaticMode } from "~/sdk";
6+
import { BASE_URL } from "~/constants";
67

78
export const useSubscribeToSocket = (queryClient: QueryClient) => {
89
useEffect(() => {
@@ -11,9 +12,7 @@ export const useSubscribeToSocket = (queryClient: QueryClient) => {
1112
return;
1213
}
1314

14-
const socket = new WebSocket(
15-
`${window.location.origin}/api/socket`
16-
);
15+
const socket = new WebSocket(`${BASE_URL}/api/socket`);
1716

1817
socket.onmessage = async (event) => {
1918
const newState: Evalite.ServerState = JSON.parse(event.data);

apps/evalite-ui/app/routes/suite.$name.tsx

Lines changed: 59 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { createFileRoute } from "@tanstack/react-router";
22
import { z } from "zod";
33
import { zodValidator } from "@tanstack/zod-adapter";
44
import { Link, Outlet, useMatches } from "@tanstack/react-router";
5-
import { XCircleIcon } from "lucide-react";
5+
import { XCircleIcon, Zap } from "lucide-react";
66
import type * as React from "react";
77

88
import { DisplayInput } from "~/components/display-input";
@@ -19,6 +19,11 @@ import {
1919
TableHeader,
2020
TableRow,
2121
} from "~/components/ui/table";
22+
import {
23+
Tooltip,
24+
TooltipTrigger,
25+
TooltipContent,
26+
} from "~/components/ui/tooltip";
2227
import { cn } from "~/lib/utils";
2328
import { formatTime, isArrayOfRenderedColumns } from "~/utils";
2429
import { useServerStateUtils } from "~/hooks/use-server-state-utils";
@@ -65,6 +70,7 @@ type EvalTableRowProps = {
6570
isRunningEval: boolean;
6671
hasScores: boolean;
6772
prevSuite: Evalite.SDK.GetSuiteByNameResult["prevSuite"];
73+
cacheHitCount: number;
6874
trialConfig?: {
6975
isFirstTrial: boolean;
7076
rowSpan: number;
@@ -104,17 +110,31 @@ function EvalTableRow({
104110
isRunningEval,
105111
hasScores,
106112
prevSuite: prevEvaluation,
113+
cacheHitCount,
107114
trialConfig,
108115
}: EvalTableRowProps) {
109116
const Wrapper = useMemo(
110117
() => makeWrapper({ evalIndex, timestamp, name }),
111118
[evalIndex, timestamp, name]
112119
);
120+
113121
return (
114122
<TableRow className={cn("has-[.active]:bg-foreground/20!")}>
123+
{cacheHitCount > 0 && (
124+
<TableCell className="pt-4 pl-4">
125+
<Tooltip>
126+
<TooltipTrigger asChild>
127+
<Zap className="size-4 text-accent-foreground" />
128+
</TooltipTrigger>
129+
<TooltipContent>
130+
{cacheHitCount} cache {cacheHitCount === 1 ? "hit" : "hits"}
131+
</TooltipContent>
132+
</Tooltip>
133+
</TableCell>
134+
)}
115135
{isArrayOfRenderedColumns(_eval.rendered_columns) ? (
116136
<>
117-
{_eval.rendered_columns.map((column) => (
137+
{_eval.rendered_columns.map((column, index) => (
118138
<TableCell>
119139
<DisplayInput
120140
className={cn(
@@ -185,15 +205,17 @@ function EvalTableRow({
185205
return (
186206
<TableCell key={scorer.id} className={cn(index === 0 && "border-l")}>
187207
<Wrapper>
188-
<Score
189-
hasScores={hasScores}
190-
score={scorer.score}
191-
state={getScoreState({
192-
score: scorer.score,
193-
prevScore: scoreInPreviousEvaluation?.score,
194-
status: _eval.status,
195-
})}
196-
/>
208+
<div className="flex items-center gap-2">
209+
<Score
210+
hasScores={hasScores}
211+
score={scorer.score}
212+
state={getScoreState({
213+
score: scorer.score,
214+
prevScore: scoreInPreviousEvaluation?.score,
215+
status: _eval.status,
216+
})}
217+
/>
218+
</div>
197219
</Wrapper>
198220
</TableCell>
199221
);
@@ -326,6 +348,10 @@ function SuiteComponent() {
326348
const hasScores =
327349
possiblyRunningSuite.evals.some((r) => r.scores.length > 0) ?? true;
328350

351+
const doAnyEvalsHaveCacheHits = Object.values(
352+
serverState.cacheHitsByEval
353+
).some((hits) => hits > 0);
354+
329355
return (
330356
<>
331357
<title>{`${name} | Evalite`}</title>
@@ -474,6 +500,7 @@ function SuiteComponent() {
474500
<Table>
475501
<TableHeader>
476502
<TableRow>
503+
{doAnyEvalsHaveCacheHits && <TableHead></TableHead>}
477504
{isArrayOfRenderedColumns(
478505
evaluationWithoutLayoutShift.evals[0]?.rendered_columns
479506
) ? (
@@ -512,6 +539,8 @@ function SuiteComponent() {
512539
group.evals.map((_eval, trialIndex) => {
513540
const evalIndex =
514541
evaluationWithoutLayoutShift!.evals.indexOf(_eval);
542+
const cacheHitCount =
543+
serverState.cacheHitsByEval[_eval.id] ?? 0;
515544
return (
516545
<EvalTableRow
517546
key={`${JSON.stringify(_eval.input)}-${_eval.trial_index}`}
@@ -523,6 +552,7 @@ function SuiteComponent() {
523552
isRunningEval={isRunningEval}
524553
hasScores={hasScores}
525554
prevSuite={prevSuite}
555+
cacheHitCount={cacheHitCount}
526556
trialConfig={{
527557
isFirstTrial: trialIndex === 0,
528558
rowSpan: group.evals.length,
@@ -533,19 +563,24 @@ function SuiteComponent() {
533563
})
534564
)
535565
: // Original rendering for non-trial results
536-
evaluationWithoutLayoutShift.evals.map((_eval, index) => (
537-
<EvalTableRow
538-
key={JSON.stringify(_eval.input)}
539-
eval={_eval}
540-
evalIndex={index}
541-
name={name}
542-
timestamp={timestamp}
543-
showExpectedColumn={showExpectedColumn}
544-
isRunningEval={isRunningEval}
545-
hasScores={hasScores}
546-
prevSuite={prevSuite}
547-
/>
548-
))}
566+
evaluationWithoutLayoutShift.evals.map((_eval, index) => {
567+
const cacheHitCount =
568+
serverState.cacheHitsByEval[_eval.id] ?? 0;
569+
return (
570+
<EvalTableRow
571+
key={JSON.stringify(_eval.input)}
572+
eval={_eval}
573+
evalIndex={index}
574+
name={name}
575+
timestamp={timestamp}
576+
showExpectedColumn={showExpectedColumn}
577+
isRunningEval={isRunningEval}
578+
hasScores={hasScores}
579+
prevSuite={prevSuite}
580+
cacheHitCount={cacheHitCount}
581+
/>
582+
);
583+
})}
549584
</TableBody>
550585
</Table>
551586
</>

apps/evalite-ui/app/sdk.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import { notFound } from "@tanstack/react-router";
22
import type { Evalite } from "evalite/types";
3-
4-
const BASE_URL = window.location.origin;
3+
import { BASE_URL } from "./constants";
54

65
declare global {
76
interface Window {

packages/evalite/src/evalite.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ function registerEvalite<TInput, TOutput, TExpected>(
208208
output: datasetResult.error,
209209
scores: [],
210210
traces: [],
211+
cacheHits: [],
211212
renderedColumns: [],
212213
},
213214
})
@@ -362,6 +363,7 @@ function registerEvalite<TInput, TOutput, TExpected>(
362363
output: serializableOutput,
363364
scores,
364365
traces: tracesWithFiles,
366+
cacheHits,
365367
status: "success",
366368
renderedColumns,
367369
variantName: vitestOpts.variantName,
@@ -397,6 +399,7 @@ function registerEvalite<TInput, TOutput, TExpected>(
397399
output: serializedError,
398400
scores: [],
399401
traces: await handleFilesInTraces(rootDir, traces),
402+
cacheHits,
400403
status: "fail",
401404
renderedColumns: [],
402405
variantName: vitestOpts.variantName,

packages/evalite/src/export-static.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ export const exportStaticUI = async (
232232
// Generate server-state.json
233233
const serverState: Evalite.ServerState = {
234234
type: "idle",
235+
cacheHitsByEval: {},
235236
};
236237
await fs.writeFile(
237238
path.join(dataDir, "server-state.json"),

packages/evalite/src/reporter.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,7 @@ export default class EvaliteReporter implements Reporter {
368368
output: null,
369369
scores: [],
370370
traces: [],
371+
cacheHits: [],
371372
status: "fail",
372373
renderedColumns: [],
373374
variantName: data.initialEval.variantName,

packages/evalite/src/reporter/EvaliteRunner.ts

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ export interface EvaliteRunnerOptions {
1010

1111
export class EvaliteRunner {
1212
private opts: EvaliteRunnerOptions;
13-
private state: Evalite.ServerState = { type: "idle" };
13+
private state: Evalite.ServerState = { type: "idle", cacheHitsByEval: {} };
1414
private didLastRunFailThreshold: "yes" | "no" | "unknown" = "unknown";
1515
private collectedResults: Map<string, Evalite.Eval> = new Map();
1616
private eventQueue: Promise<void> = Promise.resolve();
@@ -98,7 +98,10 @@ export class EvaliteRunner {
9898
case "running":
9999
switch (event.type) {
100100
case "RUN_ENDED":
101-
this.updateState({ type: "idle" });
101+
this.updateState({
102+
type: "idle",
103+
cacheHitsByEval: this.state.cacheHitsByEval,
104+
});
102105
break;
103106
case "EVAL_STARTED":
104107
{
@@ -247,6 +250,14 @@ export class EvaliteRunner {
247250
});
248251
}
249252

253+
// Count cache hits for this eval
254+
const cacheHitCount = event.eval.cacheHits.filter(
255+
(hit) => hit.hit
256+
).length;
257+
if (cacheHitCount > 0) {
258+
this.state.cacheHitsByEval[evalId] = cacheHitCount;
259+
}
260+
250261
const allEvals = await this.opts.storage.evals.getMany({
251262
suiteIds: [suite.id],
252263
});
@@ -295,6 +306,7 @@ export class EvaliteRunner {
295306
case "RUN_BEGUN":
296307
// Clear collected results for new run
297308
this.collectedResults.clear();
309+
this.state.cacheHitsByEval = {};
298310

299311
this.updateState({
300312
filepaths: event.filepaths,
@@ -303,6 +315,7 @@ export class EvaliteRunner {
303315
runId: undefined, // Run is created lazily
304316
suiteNamesRunning: [],
305317
evalIdsRunning: [],
318+
cacheHitsByEval: this.state.cacheHitsByEval,
306319
});
307320
break;
308321
}

packages/evalite/src/server.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,18 @@ export type Server = ReturnType<typeof createServer>;
1111

1212
const THROTTLE_TIME = 100;
1313

14+
const INITIAL_STATE: Evalite.IdleServerState = {
15+
type: "idle",
16+
cacheHitsByEval: {},
17+
};
18+
1419
export const handleWebsockets = (server: fastify.FastifyInstance) => {
1520
const websocketListeners = new Map<
1621
string,
1722
(event: Evalite.ServerState) => void
1823
>();
1924

20-
let currentState: Evalite.ServerState = {
21-
type: "idle",
22-
};
25+
let currentState: Evalite.ServerState = INITIAL_STATE;
2326

2427
let timeout: NodeJS.Timeout | undefined;
2528

packages/evalite/src/types.ts

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -165,20 +165,24 @@ export declare namespace Evalite {
165165

166166
export type RunType = "full" | "partial";
167167

168-
export type RunningServerState = {
168+
export interface SharedServerState {
169+
cacheHitsByEval: Record<number, number>;
170+
}
171+
172+
export interface RunningServerState extends SharedServerState {
169173
type: "running";
170174
runType: RunType;
171175
filepaths: string[];
172176
runId: number | bigint | undefined;
173177
suiteNamesRunning: string[];
174178
evalIdsRunning: (number | bigint)[];
175-
};
179+
}
176180

177-
export type ServerState =
178-
| RunningServerState
179-
| {
180-
type: "idle";
181-
};
181+
export interface IdleServerState extends SharedServerState {
182+
type: "idle";
183+
}
184+
185+
export type ServerState = RunningServerState | IdleServerState;
182186

183187
export type MaybePromise<T> = T | Promise<T>;
184188

@@ -220,6 +224,7 @@ export declare namespace Evalite {
220224
scores: Score[];
221225
duration: number;
222226
traces: Trace[];
227+
cacheHits: Array<{ keyHash: string; hit: boolean; savedDuration: number }>;
223228
renderedColumns: RenderedColumn[];
224229
}
225230

0 commit comments

Comments
 (0)