Skip to content

Commit 0061595

Browse files
committed
Fixed bugs and added tests
1 parent 3de2525 commit 0061595

File tree

9 files changed

+223
-33
lines changed

9 files changed

+223
-33
lines changed

packages/evalite-tests/tests/ai-sdk-caching.test.ts

Lines changed: 62 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,6 @@ it("Should cache AI SDK in the task and scorers", async () => {
1616
cacheDebug: true,
1717
});
1818

19-
const output = fixture.getOutput();
20-
21-
const storage = fixture.storage;
22-
23-
const runs = await storage.runs.getMany();
24-
25-
expect(runs).toHaveLength(2);
26-
2719
const allLogs = fixture.getOutput().split("\n");
2820

2921
const cachelogs = allLogs.filter((log) => log.includes("[CACHE]"));
@@ -32,3 +24,65 @@ it("Should cache AI SDK in the task and scorers", async () => {
3224
expect(cachelogs.some((log) => log.includes("Scorer cache HIT"))).toBe(true);
3325
expect(cachelogs.some((log) => log.includes("saved"))).toBe(true);
3426
});
27+
28+
it("Should disable cache when cacheEnabled is false", async () => {
29+
await using fixture = await loadFixture("ai-sdk-caching");
30+
31+
await fixture.run({
32+
mode: "run-once-and-exit",
33+
cacheDebug: true,
34+
cacheEnabled: false,
35+
});
36+
37+
await fixture.run({
38+
mode: "run-once-and-exit",
39+
cacheDebug: true,
40+
cacheEnabled: false,
41+
});
42+
43+
const allLogs = fixture.getOutput().split("\n");
44+
const cachelogs = allLogs.filter((log) => log.includes("[CACHE]"));
45+
expect(cachelogs.length).toBe(0);
46+
});
47+
48+
it("Should respect cacheEnabled: false in config", async () => {
49+
await using fixture = await loadFixture("ai-sdk-caching-config-disabled");
50+
51+
// First run
52+
await fixture.run({
53+
mode: "run-once-and-exit",
54+
cacheDebug: true,
55+
});
56+
57+
// Second run - should still not cache because config disables it
58+
await fixture.run({
59+
mode: "run-once-and-exit",
60+
cacheDebug: true,
61+
});
62+
63+
const allLogs = fixture.getOutput().split("\n");
64+
const cachelogs = allLogs.filter((log) => log.includes("[CACHE]"));
65+
expect(cachelogs.length).toBe(0);
66+
});
67+
68+
it("Should let runEvalite cacheEnabled override config cacheEnabled", async () => {
69+
await using fixture = await loadFixture("ai-sdk-caching-config-precedence");
70+
71+
// Config has cacheEnabled: true, but we override with false
72+
await fixture.run({
73+
mode: "run-once-and-exit",
74+
cacheDebug: true,
75+
cacheEnabled: false,
76+
});
77+
78+
await fixture.run({
79+
mode: "run-once-and-exit",
80+
cacheDebug: true,
81+
cacheEnabled: false,
82+
});
83+
84+
const allLogs = fixture.getOutput().split("\n");
85+
const cachelogs = allLogs.filter((log) => log.includes("[CACHE]"));
86+
// Should have no cache logs because runEvalite overrides config
87+
expect(cachelogs.length).toBe(0);
88+
});
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import { generateText } from "ai";
2+
import { MockLanguageModelV2 } from "ai/test";
3+
import { wrapAISDKModel } from "evalite/ai-sdk";
4+
import { evalite } from "evalite";
5+
6+
const model = new MockLanguageModelV2({
7+
doGenerate: async (options) => ({
8+
rawCall: { rawPrompt: null, rawSettings: {} },
9+
finishReason: "stop",
10+
usage: { inputTokens: 10, outputTokens: 20, totalTokens: 30 },
11+
content: [{ type: "text", text: `Response for task` }],
12+
warnings: [],
13+
providerMetadata: undefined,
14+
request: undefined,
15+
response: undefined,
16+
}),
17+
});
18+
19+
const scorerModel = new MockLanguageModelV2({
20+
doGenerate: async (options) => ({
21+
rawCall: { rawPrompt: null, rawSettings: {} },
22+
finishReason: "stop",
23+
usage: { inputTokens: 5, outputTokens: 10, totalTokens: 15 },
24+
content: [{ type: "text", text: `1` }],
25+
warnings: [],
26+
providerMetadata: undefined,
27+
request: undefined,
28+
response: undefined,
29+
}),
30+
});
31+
32+
const tracedModel = wrapAISDKModel(model);
33+
const tracedScorerModel = wrapAISDKModel(scorerModel);
34+
35+
evalite("AI SDK Caching Config Disabled", {
36+
data: () => {
37+
return [
38+
{
39+
input: "test input 1",
40+
expected: "expected output 1",
41+
},
42+
];
43+
},
44+
task: async (input) => {
45+
const result = await generateText({
46+
model: tracedModel,
47+
prompt: input,
48+
});
49+
return result.text;
50+
},
51+
scorers: [
52+
{
53+
name: "AI Scorer",
54+
scorer: async ({ input, output, expected }) => {
55+
const result = await generateText({
56+
model: tracedScorerModel,
57+
prompt: `Score this: ${output}`,
58+
});
59+
return { score: 1 };
60+
},
61+
},
62+
],
63+
});
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import { defineConfig } from "evalite/config";
2+
3+
export default defineConfig({
4+
cache: false,
5+
});
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import { generateText } from "ai";
2+
import { MockLanguageModelV2 } from "ai/test";
3+
import { wrapAISDKModel } from "evalite/ai-sdk";
4+
import { evalite } from "evalite";
5+
6+
const model = new MockLanguageModelV2({
7+
doGenerate: async (options) => ({
8+
rawCall: { rawPrompt: null, rawSettings: {} },
9+
finishReason: "stop",
10+
usage: { inputTokens: 10, outputTokens: 20, totalTokens: 30 },
11+
content: [{ type: "text", text: `Response for task` }],
12+
warnings: [],
13+
providerMetadata: undefined,
14+
request: undefined,
15+
response: undefined,
16+
}),
17+
});
18+
19+
const scorerModel = new MockLanguageModelV2({
20+
doGenerate: async (options) => ({
21+
rawCall: { rawPrompt: null, rawSettings: {} },
22+
finishReason: "stop",
23+
usage: { inputTokens: 5, outputTokens: 10, totalTokens: 15 },
24+
content: [{ type: "text", text: `1` }],
25+
warnings: [],
26+
providerMetadata: undefined,
27+
request: undefined,
28+
response: undefined,
29+
}),
30+
});
31+
32+
const tracedModel = wrapAISDKModel(model);
33+
const tracedScorerModel = wrapAISDKModel(scorerModel);
34+
35+
evalite("AI SDK Caching Config Precedence", {
36+
data: () => {
37+
return [
38+
{
39+
input: "test input 1",
40+
expected: "expected output 1",
41+
},
42+
];
43+
},
44+
task: async (input) => {
45+
const result = await generateText({
46+
model: tracedModel,
47+
prompt: input,
48+
});
49+
return result.text;
50+
},
51+
scorers: [
52+
{
53+
name: "AI Scorer",
54+
scorer: async ({ input, output, expected }) => {
55+
const result = await generateText({
56+
model: tracedScorerModel,
57+
prompt: `Score this: ${output}`,
58+
});
59+
return { score: 1 };
60+
},
61+
},
62+
],
63+
});
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import { defineConfig } from "evalite/config";
2+
3+
export default defineConfig({
4+
cache: true,
5+
});

packages/evalite-tests/tests/test-utils.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,14 @@ export const loadFixture = async (
6161
* Set this to true if your test needs the server running (e.g., for cache functionality).
6262
*/
6363
enableServer?: boolean;
64+
/**
65+
* Enable cache debug mode to log cache hits/misses.
66+
*/
6467
cacheDebug?: boolean;
68+
/**
69+
* Enable cache for AI SDK model outputs.
70+
*/
71+
cacheEnabled?: boolean;
6572
}) => {
6673
const result = await runEvalite({
6774
...opts,
@@ -70,6 +77,7 @@ export const loadFixture = async (
7077
testOutputWritable: captured.writable,
7178
disableServer: !opts.enableServer,
7279
cacheDebug: opts.cacheDebug ?? false,
80+
cacheEnabled: opts.cacheEnabled,
7381
});
7482
vitestInstance = result.vitest;
7583
return vitestInstance;

packages/evalite/src/evalite.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ const runTask = async <TInput, TOutput, TExpected, TVariant = undefined>(
6161
traces: Evalite.Trace[];
6262
cacheContext: CacheContextConfig;
6363
cacheDebug: boolean;
64+
cacheEnabled: boolean;
6465
} & Omit<Evalite.RunnerOpts<TInput, TOutput, TExpected, TVariant>, "data">
6566
) => {
6667
const start = performance.now();
@@ -81,6 +82,9 @@ const runTask = async <TInput, TOutput, TExpected, TVariant = undefined>(
8182
{
8283
...opts.cacheContext,
8384
reportCacheHit: (hit) => {
85+
if (!opts.cacheEnabled) {
86+
return;
87+
}
8488
scorerCacheHits.push(hit);
8589
if (opts.cacheDebug) {
8690
console.log(
@@ -356,10 +360,14 @@ function registerEvalite<TInput, TOutput, TExpected>(
356360
};
357361

358362
const cacheDebug = inject("cacheDebug");
363+
const cacheEnabled = inject("cacheEnabled");
359364

360365
cacheContextLocalStorage.enterWith({
361366
...cacheContext,
362367
reportCacheHit: (hit) => {
368+
if (!cacheEnabled) {
369+
return;
370+
}
363371
taskCacheHits.push(hit);
364372
if (cacheDebug) {
365373
console.log(
@@ -390,6 +398,7 @@ function registerEvalite<TInput, TOutput, TExpected>(
390398
traces,
391399
cacheContext,
392400
cacheDebug,
401+
cacheEnabled,
393402
});
394403

395404
const [outputWithFiles, tracesWithFiles, renderedColumns] =

packages/evalite/src/run-evalite.ts

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ declare module "vitest" {
3131
* Whether to log cache operations to the console.
3232
*/
3333
cacheDebug: boolean;
34+
/**
35+
* Whether to enable cache for AI SDK model outputs.
36+
*/
37+
cacheEnabled: boolean;
3438
}
3539
}
3640

@@ -246,15 +250,7 @@ export const runEvalite = async (opts: {
246250
const maxConcurrency = config?.maxConcurrency;
247251

248252
// Determine cache enabled: opts > config > default (true)
249-
let cacheEnabled = true;
250-
if (opts.cacheEnabled !== undefined) {
251-
cacheEnabled = opts.cacheEnabled;
252-
} else if (config?.cache !== undefined) {
253-
cacheEnabled =
254-
typeof config.cache === "boolean"
255-
? config.cache
256-
: (config.cache.enabled ?? true);
257-
}
253+
const cacheEnabled = opts.cacheEnabled ?? config?.cache ?? true;
258254

259255
// Merge setupFiles:
260256
// 1. Always include env-setup-file first to load .env files
@@ -355,6 +351,7 @@ export const runEvalite = async (opts: {
355351
vitest.provide("trialCount", config?.trialCount);
356352
vitest.provide("serverPort", actualServerPort);
357353
vitest.provide("cacheDebug", opts.cacheDebug ?? false);
354+
vitest.provide("cacheEnabled", cacheEnabled);
358355

359356
await vitest.start(filters);
360357

packages/evalite/src/types.ts

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -109,29 +109,15 @@ export declare namespace Evalite {
109109

110110
/**
111111
* Cache configuration for AI SDK model outputs
112-
* @default { enabled: true, ttlDays: 30 }
112+
* @default true
113113
* @example
114114
* ```ts
115115
* export default defineConfig({
116116
* cache: false // Disable cache entirely
117117
* })
118118
* ```
119-
* @example
120-
* ```ts
121-
* export default defineConfig({
122-
* cache: {
123-
* enabled: true,
124-
* ttlDays: 7 // Cache for 7 days
125-
* }
126-
* })
127-
* ```
128119
*/
129-
cache?:
130-
| boolean
131-
| {
132-
enabled?: boolean;
133-
ttlDays?: number;
134-
};
120+
cache?: boolean;
135121

136122
/**
137123
* Pass-through Vite/Vitest configuration options.

0 commit comments

Comments
 (0)