Skip to content

Commit 3de2525

Browse files
committed
Updates
1 parent 28f4ad1 commit 3de2525

File tree

11 files changed

+228
-45
lines changed

11 files changed

+228
-45
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"evalite": patch
3+
---
4+
5+
Added cache debug mode via debugCache in runEvalite to debug cache hits/misses.

apps/evalite-ui/app/routes/suite.$name.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ function EvalTableRow({
129129
<Zap className="size-4 text-accent-foreground" />
130130
</TooltipTrigger>
131131
<TooltipContent>
132-
{cacheHitCount} cache {cacheHitCount === 1 ? "hit" : "hits"}
132+
{cacheHitCount} LLM{" "}
133+
{cacheHitCount === 1 ? "call was cached" : "calls were cached"}
133134
</TooltipContent>
134135
</Tooltip>
135136
</TableCell>
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import { expect, it } from "vitest";
2+
import { loadFixture } from "./test-utils.js";
3+
4+
it("Should cache AI SDK in the task and scorers", async () => {
5+
await using fixture = await loadFixture("ai-sdk-caching");
6+
7+
// First run - should log cache misses
8+
await fixture.run({
9+
mode: "run-once-and-exit",
10+
cacheDebug: true,
11+
});
12+
13+
// Second run - should log cache hits
14+
await fixture.run({
15+
mode: "run-once-and-exit",
16+
cacheDebug: true,
17+
});
18+
19+
const output = fixture.getOutput();
20+
21+
const storage = fixture.storage;
22+
23+
const runs = await storage.runs.getMany();
24+
25+
expect(runs).toHaveLength(2);
26+
27+
const allLogs = fixture.getOutput().split("\n");
28+
29+
const cachelogs = allLogs.filter((log) => log.includes("[CACHE]"));
30+
expect(cachelogs.length).toBeGreaterThan(0);
31+
expect(cachelogs.some((log) => log.includes("Task cache HIT"))).toBe(true);
32+
expect(cachelogs.some((log) => log.includes("Scorer cache HIT"))).toBe(true);
33+
expect(cachelogs.some((log) => log.includes("saved"))).toBe(true);
34+
});
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import { generateText } from "ai";
2+
import { MockLanguageModelV2 } from "ai/test";
3+
import { wrapAISDKModel } from "evalite/ai-sdk";
4+
import { evalite } from "evalite";
5+
6+
const model = new MockLanguageModelV2({
7+
doGenerate: async (options) => ({
8+
rawCall: { rawPrompt: null, rawSettings: {} },
9+
finishReason: "stop",
10+
usage: { inputTokens: 10, outputTokens: 20, totalTokens: 30 },
11+
content: [{ type: "text", text: `Response for task` }],
12+
warnings: [],
13+
providerMetadata: undefined,
14+
request: undefined,
15+
response: undefined,
16+
}),
17+
});
18+
19+
const scorerModel = new MockLanguageModelV2({
20+
doGenerate: async (options) => ({
21+
rawCall: { rawPrompt: null, rawSettings: {} },
22+
finishReason: "stop",
23+
usage: { inputTokens: 5, outputTokens: 10, totalTokens: 15 },
24+
content: [{ type: "text", text: `1` }],
25+
warnings: [],
26+
providerMetadata: undefined,
27+
request: undefined,
28+
response: undefined,
29+
}),
30+
});
31+
32+
const tracedModel = wrapAISDKModel(model);
33+
const tracedScorerModel = wrapAISDKModel(scorerModel);
34+
35+
evalite("AI SDK Caching", {
36+
data: () => {
37+
return [
38+
{
39+
input: "test input 1",
40+
expected: "expected output 1",
41+
},
42+
{
43+
input: "test input 2",
44+
expected: "expected output 2",
45+
},
46+
];
47+
},
48+
task: async (input) => {
49+
const result = await generateText({
50+
model: tracedModel,
51+
prompt: input,
52+
});
53+
return result.text;
54+
},
55+
scorers: [
56+
{
57+
name: "AI Scorer",
58+
scorer: async ({ input, output, expected }) => {
59+
const result = await generateText({
60+
model: tracedScorerModel,
61+
prompt: `Score this: ${output}`,
62+
});
63+
return { score: 1 };
64+
},
65+
},
66+
],
67+
});

packages/evalite-tests/tests/test-utils.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,15 @@ export const loadFixture = async (
6161
* Set this to true if your test needs the server running (e.g., for cache functionality).
6262
*/
6363
enableServer?: boolean;
64+
cacheDebug?: boolean;
6465
}) => {
6566
const result = await runEvalite({
6667
...opts,
6768
cwd: dirPath,
6869
storage,
6970
testOutputWritable: captured.writable,
7071
disableServer: !opts.enableServer,
72+
cacheDebug: opts.cacheDebug ?? false,
7173
});
7274
vitestInstance = result.vitest;
7375
return vitestInstance;

packages/evalite/src/ai-sdk.ts

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import type {
44
LanguageModelV2StreamPart,
55
} from "@ai-sdk/provider";
66
import { wrapLanguageModel } from "ai";
7-
import { reportTrace, shouldReportTrace } from "./traces.js";
7+
import { reportTraceLocalStorage } from "./traces.js";
88
import { getCacheContext, generateCacheKey } from "./cache.js";
99

1010
const handlePromptContent = (
@@ -95,14 +95,8 @@ export const wrapAISDKModel = (
9595
const enableTracing = options?.tracing ?? true;
9696
const enableCaching = options?.caching ?? true;
9797

98-
const context = getCacheContext();
99-
const cachingAvailable =
100-
enableCaching && (context?.cacheEnabled ?? true) && context?.serverPort;
101-
102-
const tracingAvailable = enableTracing && shouldReportTrace();
103-
10498
// If neither is enabled/available, return original model
105-
if (!cachingAvailable && !tracingAvailable) {
99+
if (!enableCaching && !enableTracing) {
106100
return model;
107101
}
108102

@@ -112,10 +106,10 @@ export const wrapAISDKModel = (
112106
wrapGenerate: async (opts) => {
113107
const start = performance.now();
114108
let result: Awaited<ReturnType<typeof opts.doGenerate>> | undefined;
109+
const cacheContext = getCacheContext();
115110

116111
// Try cache if enabled
117-
if (cachingAvailable) {
118-
const context = getCacheContext()!;
112+
if (cacheContext) {
119113
const keyHash = generateCacheKey({
120114
model: model.modelId,
121115
params: opts.params,
@@ -125,7 +119,7 @@ export const wrapAISDKModel = (
125119

126120
try {
127121
const cacheResponse = await fetch(
128-
`http://localhost:${context.serverPort}/api/cache/${keyHash}`
122+
`http://localhost:${cacheContext.serverPort}/api/cache/${keyHash}`
129123
);
130124

131125
if (cacheResponse.ok) {
@@ -134,7 +128,7 @@ export const wrapAISDKModel = (
134128
duration: number;
135129
};
136130
if (cached?.value) {
137-
context.reportCacheHit({
131+
cacheContext.reportCacheHit({
138132
keyHash,
139133
hit: true,
140134
savedDuration: cached.duration,
@@ -161,8 +155,7 @@ export const wrapAISDKModel = (
161155
const duration = performance.now() - start;
162156

163157
// Store in cache if caching enabled
164-
if (cachingAvailable) {
165-
const context = getCacheContext()!;
158+
if (cacheContext) {
166159
const keyHash = generateCacheKey({
167160
model: model.modelId,
168161
params: opts.params,
@@ -172,7 +165,7 @@ export const wrapAISDKModel = (
172165

173166
try {
174167
await fetch(
175-
`http://localhost:${context.serverPort}/api/cache/${keyHash}`,
168+
`http://localhost:${cacheContext.serverPort}/api/cache/${keyHash}`,
176169
{
177170
method: "POST",
178171
headers: { "Content-Type": "application/json" },
@@ -183,12 +176,18 @@ export const wrapAISDKModel = (
183176
console.warn("Cache write failed:", error);
184177
}
185178

186-
context.reportCacheHit({ keyHash, hit: false, savedDuration: 0 });
179+
cacheContext.reportCacheHit({
180+
keyHash,
181+
hit: false,
182+
savedDuration: 0,
183+
});
187184
}
188185
}
189186

187+
const reportTraceFromContext = reportTraceLocalStorage.getStore();
188+
190189
// Report trace if enabled
191-
if (tracingAvailable) {
190+
if (reportTraceFromContext) {
192191
const end = performance.now();
193192
const textContent = result.content
194193
.filter((c) => c.type === "text")
@@ -208,7 +207,7 @@ export const wrapAISDKModel = (
208207
)
209208
.filter(Boolean);
210209

211-
reportTrace({
210+
reportTraceFromContext({
212211
output: {
213212
text: textContent,
214213
toolCalls,
@@ -230,9 +229,11 @@ export const wrapAISDKModel = (
230229
const start = performance.now();
231230
let cachedParts: LanguageModelV2StreamPart[] | undefined;
232231

232+
const cacheContext = getCacheContext();
233+
const reportTraceFromContext = reportTraceLocalStorage.getStore();
234+
233235
// Try cache if enabled
234-
if (cachingAvailable) {
235-
const context = getCacheContext()!;
236+
if (cacheContext) {
236237
const keyHash = generateCacheKey({
237238
model: model.modelId,
238239
params: params,
@@ -242,7 +243,7 @@ export const wrapAISDKModel = (
242243

243244
try {
244245
const cacheResponse = await fetch(
245-
`http://localhost:${context.serverPort}/api/cache/${keyHash}`
246+
`http://localhost:${cacheContext.serverPort}/api/cache/${keyHash}`
246247
);
247248

248249
if (cacheResponse.ok) {
@@ -251,7 +252,7 @@ export const wrapAISDKModel = (
251252
duration: number;
252253
};
253254
if (cached?.value) {
254-
context.reportCacheHit({
255+
cacheContext.reportCacheHit({
255256
keyHash,
256257
hit: true,
257258
savedDuration: cached.duration,
@@ -260,12 +261,12 @@ export const wrapAISDKModel = (
260261
cachedParts = cached.value as LanguageModelV2StreamPart[];
261262

262263
// If tracing enabled, report trace for cached stream
263-
if (tracingAvailable) {
264+
if (reportTraceFromContext) {
264265
const usage = cachedParts.find(
265266
(part) => part.type === "finish"
266267
)?.usage;
267268

268-
reportTrace({
269+
reportTraceFromContext({
269270
start,
270271
end: performance.now(),
271272
input: processPromptForTracing(params.prompt),
@@ -319,8 +320,7 @@ export const wrapAISDKModel = (
319320
const duration = performance.now() - start;
320321

321322
// Store in cache if enabled
322-
if (cachingAvailable) {
323-
const context = getCacheContext()!;
323+
if (cacheContext) {
324324
const keyHash = generateCacheKey({
325325
model: model.modelId,
326326
params: params,
@@ -330,7 +330,7 @@ export const wrapAISDKModel = (
330330

331331
try {
332332
await fetch(
333-
`http://localhost:${context.serverPort}/api/cache/${keyHash}`,
333+
`http://localhost:${cacheContext.serverPort}/api/cache/${keyHash}`,
334334
{
335335
method: "POST",
336336
headers: { "Content-Type": "application/json" },
@@ -344,20 +344,20 @@ export const wrapAISDKModel = (
344344
console.warn("Cache write failed:", error);
345345
}
346346

347-
context.reportCacheHit({
347+
cacheContext.reportCacheHit({
348348
keyHash,
349349
hit: false,
350350
savedDuration: 0,
351351
});
352352
}
353353

354354
// Report trace if enabled
355-
if (tracingAvailable) {
355+
if (reportTraceFromContext) {
356356
const usage = fullResponse.find(
357357
(part) => part.type === "finish"
358358
)?.usage;
359359

360-
reportTrace({
360+
reportTraceFromContext({
361361
start,
362362
end: performance.now(),
363363
input: processPromptForTracing(params.prompt),

packages/evalite/src/cache.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ export type CacheContextConfig = {
55
trialCount: number | undefined;
66
evalName: string;
77
serverPort: number;
8-
cacheEnabled: boolean;
98
};
109

1110
export interface CacheContext extends CacheContextConfig {

packages/evalite/src/evalite.ts

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ const runTask = async <TInput, TOutput, TExpected, TVariant = undefined>(
6060
variant: TVariant;
6161
traces: Evalite.Trace[];
6262
cacheContext: CacheContextConfig;
63+
cacheDebug: boolean;
6364
} & Omit<Evalite.RunnerOpts<TInput, TOutput, TExpected, TVariant>, "data">
6465
) => {
6566
const start = performance.now();
@@ -81,6 +82,11 @@ const runTask = async <TInput, TOutput, TExpected, TVariant = undefined>(
8182
...opts.cacheContext,
8283
reportCacheHit: (hit) => {
8384
scorerCacheHits.push(hit);
85+
if (opts.cacheDebug) {
86+
console.log(
87+
`[CACHE] Scorer cache ${hit.hit ? "HIT" : "MISS"}${hit.hit ? ` (saved ${hit.savedDuration.toFixed(0)}ms)` : ""}`
88+
);
89+
}
8490
},
8591
},
8692
async (): Promise<Evalite.ScoreWithCacheHits> => {
@@ -347,12 +353,20 @@ function registerEvalite<TInput, TOutput, TExpected>(
347353
trialCount: inject("trialCount"),
348354
evalName: evalName,
349355
serverPort: inject("serverPort"),
350-
cacheEnabled: inject("cacheEnabled"),
351356
};
352357

358+
const cacheDebug = inject("cacheDebug");
359+
353360
cacheContextLocalStorage.enterWith({
354361
...cacheContext,
355-
reportCacheHit: (hit) => taskCacheHits.push(hit),
362+
reportCacheHit: (hit) => {
363+
taskCacheHits.push(hit);
364+
if (cacheDebug) {
365+
console.log(
366+
`[CACHE] Task cache HIT (saved ${hit.savedDuration.toFixed(0)}ms)`
367+
);
368+
}
369+
},
356370
});
357371

358372
const [inputForMeta, expectedForMeta] = await Promise.all([
@@ -375,6 +389,7 @@ function registerEvalite<TInput, TOutput, TExpected>(
375389
columns: opts.columns,
376390
traces,
377391
cacheContext,
392+
cacheDebug,
378393
});
379394

380395
const [outputWithFiles, tracesWithFiles, renderedColumns] =

0 commit comments

Comments
 (0)