@@ -8,10 +8,7 @@ import type { Evalite } from "./types.js";
88import { FILES_LOCATION } from "./backend-only-constants.js" ;
99import { createScorer } from "./index.js" ;
1010import { serializeAnnotation } from "./reporter/events.js" ;
11- import {
12- cacheContextLocalStorage ,
13- reportCacheHitLocalStorage ,
14- } from "./cache.js" ;
11+ import { cacheContextLocalStorage , type CacheContextConfig } from "./cache.js" ;
1512
1613const makeSerializable = ( obj : unknown ) : unknown => {
1714 try {
@@ -62,27 +59,53 @@ const runTask = async <TInput, TOutput, TExpected, TVariant = undefined>(
6259 expected : TExpected | undefined ;
6360 variant : TVariant ;
6461 traces : Evalite . Trace [ ] ;
62+ cacheContext : CacheContextConfig ;
6563 } & Omit < Evalite . RunnerOpts < TInput , TOutput , TExpected , TVariant > , "data" >
6664) => {
6765 const start = performance . now ( ) ;
6866 const output = await executeTask ( opts . task , opts . input , opts . variant ) ;
6967 const duration = Math . round ( performance . now ( ) - start ) ;
7068
7169 const scores = await Promise . all (
72- ( opts . scorers || [ ] ) . map ( async ( scorerOrOpts ) => {
73- if ( typeof scorerOrOpts === "function" ) {
74- return scorerOrOpts ( {
75- input : opts . input ,
76- output,
77- expected : opts . expected as TExpected ,
78- } ) ;
79- } else {
80- return createScorer ( scorerOrOpts ) ( {
81- input : opts . input ,
82- output,
83- expected : opts . expected as TExpected ,
84- } ) ;
85- }
70+ ( opts . scorers || [ ] ) . map ( async ( scorerOrOpts , index ) => {
71+ // Isolate scorer traces - LLM calls in scorers still get traced
72+ // but traces are discarded (not collected in parent eval)
73+ return reportTraceLocalStorage . run (
74+ ( ) => {
75+ // no-op: discard traces
76+ } ,
77+ ( ) => {
78+ const scorerCacheHits : Array < Evalite . CacheHit > = [ ] ;
79+ return cacheContextLocalStorage . run (
80+ {
81+ ...opts . cacheContext ,
82+ reportCacheHit : ( hit ) => {
83+ scorerCacheHits . push ( hit ) ;
84+ } ,
85+ } ,
86+ async ( ) : Promise < Evalite . ScoreWithCacheHits > => {
87+ const score =
88+ typeof scorerOrOpts === "function"
89+ ? await scorerOrOpts ( {
90+ input : opts . input ,
91+ output,
92+ expected : opts . expected as TExpected ,
93+ } )
94+ : await createScorer ( scorerOrOpts ) ( {
95+ input : opts . input ,
96+ output,
97+ expected : opts . expected as TExpected ,
98+ } ) ;
99+
100+ // Attach cache hits to score if there were any
101+ return {
102+ ...score ,
103+ cacheHits : scorerCacheHits ,
104+ } ;
105+ }
106+ ) ;
107+ }
108+ ) ;
86109 } )
87110 ) ;
88111
@@ -224,7 +247,7 @@ function registerEvalite<TInput, TOutput, TExpected>(
224247 output : datasetResult . error ,
225248 scores : [ ] ,
226249 traces : [ ] ,
227- cacheHits : [ ] ,
250+ taskCacheHits : [ ] ,
228251 renderedColumns : [ ] ,
229252 } ,
230253 } )
@@ -318,18 +341,18 @@ function registerEvalite<TInput, TOutput, TExpected>(
318341 const traces : Evalite . Trace [ ] = [ ] ;
319342 reportTraceLocalStorage . enterWith ( ( trace ) => traces . push ( trace ) ) ;
320343
321- const cacheHits : Array < {
322- keyHash : string ;
323- hit : boolean ;
324- savedDuration : number ;
325- } > = [ ] ;
326- reportCacheHitLocalStorage . enterWith ( ( hit ) => cacheHits . push ( hit ) ) ;
344+ const taskCacheHits : Array < Evalite . CacheHit > = [ ] ;
327345
328- cacheContextLocalStorage . enterWith ( {
346+ const cacheContext : CacheContextConfig = {
329347 trialCount : inject ( "trialCount" ) ,
330348 evalName : evalName ,
331349 serverPort : inject ( "serverPort" ) ,
332350 cacheEnabled : inject ( "cacheEnabled" ) ,
351+ } ;
352+
353+ cacheContextLocalStorage . enterWith ( {
354+ ...cacheContext ,
355+ reportCacheHit : ( hit ) => taskCacheHits . push ( hit ) ,
333356 } ) ;
334357
335358 const [ inputForMeta , expectedForMeta ] = await Promise . all ( [
@@ -351,6 +374,7 @@ function registerEvalite<TInput, TOutput, TExpected>(
351374 task : opts . task ,
352375 columns : opts . columns ,
353376 traces,
377+ cacheContext,
354378 } ) ;
355379
356380 const [ outputWithFiles , tracesWithFiles , renderedColumns ] =
@@ -379,7 +403,7 @@ function registerEvalite<TInput, TOutput, TExpected>(
379403 output : serializableOutput ,
380404 scores,
381405 traces : tracesWithFiles ,
382- cacheHits ,
406+ taskCacheHits : taskCacheHits ,
383407 status : "success" ,
384408 renderedColumns,
385409 variantName : vitestOpts . variantName ,
@@ -415,7 +439,7 @@ function registerEvalite<TInput, TOutput, TExpected>(
415439 output : serializedError ,
416440 scores : [ ] ,
417441 traces : await handleFilesInTraces ( rootDir , traces ) ,
418- cacheHits ,
442+ taskCacheHits : taskCacheHits ,
419443 status : "fail" ,
420444 renderedColumns : [ ] ,
421445 variantName : vitestOpts . variantName ,
0 commit comments