Skip to content

Commit 6033c8c

Browse files
committed
Added Levenshtein distance scorer for fuzzy string matching
1 parent 059edfd commit 6033c8c

File tree

10 files changed

+201
-6
lines changed

10 files changed

+201
-6
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"evalite": patch
3+
---
4+
5+
Added Levenshtein distance scorer for fuzzy string matching

apps/evalite-docs/astro.config.mts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,10 @@ export default defineConfig({
203203
label: "faithfulness",
204204
slug: "api/scorers/faithfulness",
205205
},
206+
{
207+
label: "levenshtein",
208+
slug: "api/scorers/levenshtein",
209+
},
206210
{
207211
label: "noiseSensitivity",
208212
slug: "api/scorers/noise-sensitivity",

apps/evalite-docs/src/content/docs/api/scorers/index.mdx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Simple deterministic scorers for text matching. No AI SDK required.
1616

1717
- [**exactMatch**](/api/scorers/exact-match) - Exact string comparison
1818
- [**contains**](/api/scorers/contains) - Substring matching
19+
- [**levenshtein**](/api/scorers/levenshtein) - Fuzzy string matching with edit distance
1920

2021
## RAG Scorers
2122

@@ -40,6 +41,7 @@ Specialized scorers for specific use cases.
4041
| ----------------- | ---------------------- | ----------------------------- |
4142
| exactMatch | No | Exact string matching |
4243
| contains | No | Substring matching |
44+
| levenshtein | No | Fuzzy string matching |
4345
| faithfulness | Yes (LLM) | RAG hallucination detection |
4446
| answerSimilarity | Yes (Embeddings) | Semantic similarity |
4547
| answerCorrectness | Yes (LLM + Embeddings) | Comprehensive evaluation |
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
---
2+
title: levenshtein
3+
---
4+
5+
Measures string similarity using Levenshtein distance (edit distance), normalized to a 0-1 score. Returns a score from 0 to 1, where 1 means identical strings and 0 means completely different.
6+
7+
**When to use**: For fuzzy string matching when you want to tolerate small typos, spelling variations, or minor differences. Useful for testing outputs that should be close but not necessarily exact.
8+
9+
**When NOT to use**: When exact matches are required (use exactMatch) or when you need semantic similarity that understands meaning (use answerSimilarity).
10+
11+
## Example
12+
13+
```ts
14+
import { evalite } from "evalite";
15+
import { levenshtein } from "evalite/scorers";
16+
17+
evalite("Levenshtein", {
18+
data: [
19+
{
20+
input: "What is the capital of France?",
21+
expected: {
22+
reference: "Paris",
23+
},
24+
},
25+
],
26+
task: async (input) => {
27+
return "Pari"; // Typo - missing 's'
28+
},
29+
scorers: [
30+
{
31+
scorer: ({ output, expected }) =>
32+
levenshtein({
33+
actual: output,
34+
expected: expected.reference,
35+
}),
36+
},
37+
],
38+
});
39+
```
40+
41+
In this example, the output "Pari" compared to expected "Paris" would score 0.8 (4 matching characters out of 5 maximum length).
42+
43+
## Signature
44+
45+
```ts
46+
async function levenshtein(opts: {
47+
actual: string;
48+
expected: string;
49+
}): Promise<{
50+
name: string;
51+
description: string;
52+
score: number;
53+
}>;
54+
```
55+
56+
## Parameters
57+
58+
### actual
59+
60+
**Type:** `string`
61+
62+
The actual output to check.
63+
64+
### expected
65+
66+
**Type:** `string`
67+
68+
The expected string to compare against.
69+
70+
## How it works
71+
72+
The score is calculated as:
73+
74+
```
75+
score = 1 - (edit_distance / max_length)
76+
```
77+
78+
Where:
79+
80+
- `edit_distance` is the minimum number of single-character edits (insertions, deletions, substitutions) needed to change one string into the other
81+
- `max_length` is the length of the longer string
82+
83+
## See Also
84+
85+
- [createScorer()](/api/create-scorer)
86+
- [exactMatch](/api/scorers/exact-match)
87+
- [contains](/api/scorers/contains)
88+
- [answerSimilarity](/api/scorers/answer-similarity)

packages/evalite/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
"fastify": "^5.6.1",
6262
"file-type": "^19.6.0",
6363
"jiti": "^2.6.1",
64+
"js-levenshtein": "^1.1.6",
6465
"table": "^6.9.0",
6566
"tinyrainbow": "^3.0.3"
6667
},
@@ -75,6 +76,7 @@
7576
"devDependencies": {
7677
"@ai-sdk/provider": "^2.0.0",
7778
"@types/better-sqlite3": "^7.6.13",
79+
"@types/js-levenshtein": "^1.1.3",
7880
"@types/ws": "^8.18.1",
7981
"ai": "^5.0.59",
8082
"better-sqlite3": "^11.6.0",

packages/evalite/src/scorers/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ export { answerRelevancy } from "./answer-relevancy.js";
55
export { contextRecall } from "./context-recall.js";
66
export { toolCallAccuracy } from "./tool-call-accuracy.js";
77
export { noiseSensitivity } from "./noise-sensitivity.js";
8-
export { exactMatch, contains } from "./string.js";
8+
export { exactMatch, contains, levenshtein } from "./string.js";

packages/evalite/src/scorers/string.ts

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import type { Evalite } from "../types.js";
2+
import levenshteinDistance from "js-levenshtein";
23

34
/**
45
* Checks if your AI's output exactly matches the
@@ -57,3 +58,43 @@ export async function contains(opts: Evalite.Scorers.ContainsOpts) {
5758
score: opts.actual.includes(opts.expected) ? 1 : 0,
5859
};
5960
}
61+
62+
/**
63+
* Measures string similarity using Levenshtein distance
64+
* (edit distance), normalized to a 0-1 score.
65+
*
66+
* Returns a score from 0 to 1, where 1 means identical
67+
* strings and 0 means completely different.
68+
*
69+
* **When to use**: For fuzzy string matching when you
70+
* want to tolerate small typos, spelling variations,
71+
* or minor differences. Useful for testing outputs
72+
* that should be close but not necessarily exact.
73+
*
74+
* **When NOT to use**: When exact matches are required
75+
* (use exactMatch) or when you need semantic similarity
76+
* that understands meaning (use answerSimilarity).
77+
*
78+
* @param opts.actual - The actual output to check
79+
* @param opts.expected - The expected string to compare against
80+
*/
81+
export async function levenshtein(opts: Evalite.Scorers.LevenshteinOpts) {
82+
if (typeof opts.actual !== "string" || typeof opts.expected !== "string") {
83+
throw new Error("Both actual and expected must be strings");
84+
}
85+
86+
const maxLen = Math.max(opts.actual.length, opts.expected.length);
87+
88+
let score = 1;
89+
if (maxLen > 0) {
90+
const distance = levenshteinDistance(opts.actual, opts.expected);
91+
score = 1 - distance / maxLen;
92+
}
93+
94+
return {
95+
name: "Levenshtein",
96+
description:
97+
"Measures string similarity using edit distance (0 = different, 1 = identical).",
98+
score,
99+
};
100+
}

packages/evalite/src/types.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -892,6 +892,14 @@ export declare namespace Evalite {
892892
expected: string;
893893
};
894894

895+
/**
896+
* Options for the Levenshtein distance scorer.
897+
*/
898+
export type LevenshteinOpts = {
899+
actual: string;
900+
expected: string;
901+
};
902+
895903
/**
896904
* Classification result for a single statement in context recall scoring.
897905
*/

packages/example/src/string-scorers.eval.ts

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { evalite } from "evalite";
2-
import { contains, exactMatch } from "evalite/scorers";
2+
import { contains, exactMatch, levenshtein } from "evalite/scorers";
33

44
evalite("Exact Match", {
55
data: [
@@ -15,8 +15,6 @@ evalite("Exact Match", {
1515
},
1616
scorers: [
1717
{
18-
name: "Exact Match",
19-
description: "Checks exact match",
2018
scorer: ({ output, expected }) =>
2119
exactMatch({
2220
actual: output,
@@ -40,8 +38,6 @@ evalite("Contains", {
4038
},
4139
scorers: [
4240
{
43-
name: "Contains",
44-
description: "Checks if output contains substring",
4541
scorer: ({ output, expected }) =>
4642
contains({
4743
actual: output,
@@ -50,3 +46,35 @@ evalite("Contains", {
5046
},
5147
],
5248
});
49+
50+
evalite("Levenshtein", {
51+
data: [
52+
{
53+
input: "What is the capital of France?",
54+
expected: {
55+
reference: "Paris",
56+
},
57+
},
58+
{
59+
input: "What is 2+2?",
60+
expected: {
61+
reference: "4",
62+
},
63+
},
64+
],
65+
task: async (input) => {
66+
if (input.includes("France")) {
67+
return "Pari"; // Typo - missing 's', should score 0.8
68+
}
69+
return "Four"; // Wrong but similar, should score 0.0
70+
},
71+
scorers: [
72+
{
73+
scorer: ({ output, expected }) =>
74+
levenshtein({
75+
actual: output,
76+
expected: expected.reference,
77+
}),
78+
},
79+
],
80+
});

pnpm-lock.yaml

Lines changed: 17 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)