Added Levenshtein distance scorer for fuzzy string matching

mattpocock · mattpocock · commit 6033c8c59993 · 2025-11-10T17:31:28.000Z
diff --git a/.changeset/0000-levenshtein-scorer.md b/.changeset/0000-levenshtein-scorer.md
@@ -0,0 +1,5 @@
+---
+"evalite": patch
+---
+
+Added Levenshtein distance scorer for fuzzy string matching
diff --git a/apps/evalite-docs/astro.config.mts b/apps/evalite-docs/astro.config.mts
@@ -203,6 +203,10 @@ export default defineConfig({
               label: "faithfulness",
               slug: "api/scorers/faithfulness",
             },
+            {
+              label: "levenshtein",
+              slug: "api/scorers/levenshtein",
+            },
             {
               label: "noiseSensitivity",
               slug: "api/scorers/noise-sensitivity",
diff --git a/apps/evalite-docs/src/content/docs/api/scorers/index.mdx b/apps/evalite-docs/src/content/docs/api/scorers/index.mdx
@@ -16,6 +16,7 @@ Simple deterministic scorers for text matching. No AI SDK required.
 
 - [**exactMatch**](/api/scorers/exact-match) - Exact string comparison
 - [**contains**](/api/scorers/contains) - Substring matching
+- [**levenshtein**](/api/scorers/levenshtein) - Fuzzy string matching with edit distance
 
 ## RAG Scorers
 
@@ -40,6 +41,7 @@ Specialized scorers for specific use cases.
 | ----------------- | ---------------------- | ----------------------------- |
 | exactMatch        | No                     | Exact string matching         |
 | contains          | No                     | Substring matching            |
+| levenshtein       | No                     | Fuzzy string matching         |
 | faithfulness      | Yes (LLM)              | RAG hallucination detection   |
 | answerSimilarity  | Yes (Embeddings)       | Semantic similarity           |
 | answerCorrectness | Yes (LLM + Embeddings) | Comprehensive evaluation      |
diff --git a/apps/evalite-docs/src/content/docs/api/scorers/levenshtein.mdx b/apps/evalite-docs/src/content/docs/api/scorers/levenshtein.mdx
@@ -0,0 +1,88 @@
+---
+title: levenshtein
+---
+
+Measures string similarity using Levenshtein distance (edit distance), normalized to a 0-1 score. Returns a score from 0 to 1, where 1 means identical strings and 0 means completely different.
+
+**When to use**: For fuzzy string matching when you want to tolerate small typos, spelling variations, or minor differences. Useful for testing outputs that should be close but not necessarily exact.
+
+**When NOT to use**: When exact matches are required (use exactMatch) or when you need semantic similarity that understands meaning (use answerSimilarity).
+
+## Example
+
+```ts
+import { evalite } from "evalite";
+import { levenshtein } from "evalite/scorers";
+
+evalite("Levenshtein", {
+  data: [
+    {
+      input: "What is the capital of France?",
+      expected: {
+        reference: "Paris",
+      },
+    },
+  ],
+  task: async (input) => {
+    return "Pari"; // Typo - missing 's'
+  },
+  scorers: [
+    {
+      scorer: ({ output, expected }) =>
+        levenshtein({
+          actual: output,
+          expected: expected.reference,
+        }),
+    },
+  ],
+});
+```
+
+In this example, the output "Pari" compared to expected "Paris" would score 0.8 (4 matching characters out of 5 maximum length).
+
+## Signature
+
+```ts
+async function levenshtein(opts: {
+  actual: string;
+  expected: string;
+}): Promise<{
+  name: string;
+  description: string;
+  score: number;
+}>;
+```
+
+## Parameters
+
+### actual
+
+**Type:** `string`
+
+The actual output to check.
+
+### expected
+
+**Type:** `string`
+
+The expected string to compare against.
+
+## How it works
+
+The score is calculated as:
+
+```
+score = 1 - (edit_distance / max_length)
+```
+
+Where:
+
+- `edit_distance` is the minimum number of single-character edits (insertions, deletions, substitutions) needed to change one string into the other
+- `max_length` is the length of the longer string
+
+## See Also
+
+- [createScorer()](/api/create-scorer)
+- [exactMatch](/api/scorers/exact-match)
+- [contains](/api/scorers/contains)
+- [answerSimilarity](/api/scorers/answer-similarity)
diff --git a/packages/evalite/package.json b/packages/evalite/package.json
@@ -61,6 +61,7 @@
     "fastify": "^5.6.1",
     "file-type": "^19.6.0",
     "jiti": "^2.6.1",
+    "js-levenshtein": "^1.1.6",
     "table": "^6.9.0",
     "tinyrainbow": "^3.0.3"
   },
@@ -75,6 +76,7 @@
   "devDependencies": {
     "@ai-sdk/provider": "^2.0.0",
     "@types/better-sqlite3": "^7.6.13",
+    "@types/js-levenshtein": "^1.1.3",
     "@types/ws": "^8.18.1",
     "ai": "^5.0.59",
     "better-sqlite3": "^11.6.0",
diff --git a/packages/evalite/src/scorers/index.ts b/packages/evalite/src/scorers/index.ts
@@ -5,4 +5,4 @@ export { answerRelevancy } from "./answer-relevancy.js";
 export { contextRecall } from "./context-recall.js";
 export { toolCallAccuracy } from "./tool-call-accuracy.js";
 export { noiseSensitivity } from "./noise-sensitivity.js";
-export { exactMatch, contains } from "./string.js";
+export { exactMatch, contains, levenshtein } from "./string.js";
diff --git a/packages/evalite/src/scorers/string.ts b/packages/evalite/src/scorers/string.ts
@@ -1,4 +1,5 @@
 import type { Evalite } from "../types.js";
+import levenshteinDistance from "js-levenshtein";
 
 /**
  * Checks if your AI's output exactly matches the
@@ -57,3 +58,43 @@ export async function contains(opts: Evalite.Scorers.ContainsOpts) {
     score: opts.actual.includes(opts.expected) ? 1 : 0,
   };
 }
+
+/**
+ * Measures string similarity using Levenshtein distance
+ * (edit distance), normalized to a 0-1 score.
+ *
+ * Returns a score from 0 to 1, where 1 means identical
+ * strings and 0 means completely different.
+ *
+ * **When to use**: For fuzzy string matching when you
+ * want to tolerate small typos, spelling variations,
+ * or minor differences. Useful for testing outputs
+ * that should be close but not necessarily exact.
+ *
+ * **When NOT to use**: When exact matches are required
+ * (use exactMatch) or when you need semantic similarity
+ * that understands meaning (use answerSimilarity).
+ *
+ * @param opts.actual - The actual output to check
+ * @param opts.expected - The expected string to compare against
+ */
+export async function levenshtein(opts: Evalite.Scorers.LevenshteinOpts) {
+  if (typeof opts.actual !== "string" || typeof opts.expected !== "string") {
+    throw new Error("Both actual and expected must be strings");
+  }
+
+  const maxLen = Math.max(opts.actual.length, opts.expected.length);
+
+  let score = 1;
+  if (maxLen > 0) {
+    const distance = levenshteinDistance(opts.actual, opts.expected);
+    score = 1 - distance / maxLen;
+  }
+
+  return {
+    name: "Levenshtein",
+    description:
+      "Measures string similarity using edit distance (0 = different, 1 = identical).",
+    score,
+  };
+}
diff --git a/packages/evalite/src/types.ts b/packages/evalite/src/types.ts
@@ -892,6 +892,14 @@ export declare namespace Evalite {
       expected: string;
     };
 
+    /**
+     * Options for the Levenshtein distance scorer.
+     */
+    export type LevenshteinOpts = {
+      actual: string;
+      expected: string;
+    };
+
     /**
      * Classification result for a single statement in context recall scoring.
      */
diff --git a/packages/example/src/string-scorers.eval.ts b/packages/example/src/string-scorers.eval.ts
@@ -1,5 +1,5 @@
 import { evalite } from "evalite";
-import { contains, exactMatch } from "evalite/scorers";
+import { contains, exactMatch, levenshtein } from "evalite/scorers";
 
 evalite("Exact Match", {
   data: [
@@ -15,8 +15,6 @@ evalite("Exact Match", {
   },
   scorers: [
     {
-      name: "Exact Match",
-      description: "Checks exact match",
       scorer: ({ output, expected }) =>
         exactMatch({
           actual: output,
@@ -40,8 +38,6 @@ evalite("Contains", {
   },
   scorers: [
     {
-      name: "Contains",
-      description: "Checks if output contains substring",
       scorer: ({ output, expected }) =>
         contains({
           actual: output,
@@ -50,3 +46,35 @@ evalite("Contains", {
     },
   ],
 });
+
+evalite("Levenshtein", {
+  data: [
+    {
+      input: "What is the capital of France?",
+      expected: {
+        reference: "Paris",
+      },
+    },
+    {
+      input: "What is 2+2?",
+      expected: {
+        reference: "4",
+      },
+    },
+  ],
+  task: async (input) => {
+    if (input.includes("France")) {
+      return "Pari"; // Typo - missing 's', should score 0.8
+    }
+    return "Four"; // Wrong but similar, should score 0.0
+  },
+  scorers: [
+    {
+      scorer: ({ output, expected }) =>
+        levenshtein({
+          actual: output,
+          expected: expected.reference,
+        }),
+    },
+  ],
+});
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"evalite": patch
 +---
++
 +Added Levenshtein distance scorer for fuzzy string matching