Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions src/store.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import {
parseVirtualPath,
normalizeDocid,
isDocid,
buildFTS5Query,
type Store,
type DocumentResult,
type SearchResult,
Expand Down Expand Up @@ -935,6 +936,123 @@ describe("FTS Search", () => {
});
});

// =============================================================================
// buildFTS5Query Unit Tests
// =============================================================================

describe("buildFTS5Query", () => {
test("returns null for empty query", () => {
expect(buildFTS5Query("")).toBeNull();
expect(buildFTS5Query(" ")).toBeNull();
});

test("single term becomes prefix match", () => {
expect(buildFTS5Query("hello")).toBe('"hello"*');
});

test("multiple terms become prefix matches with AND", () => {
expect(buildFTS5Query("hello world")).toBe('"hello"* AND "world"*');
});

test("quoted phrase becomes exact phrase match", () => {
expect(buildFTS5Query('"hello world"')).toBe('"hello world"');
});

test("mixed quoted and unquoted terms", () => {
expect(buildFTS5Query('meeting "Q1 planning"')).toBe('"q1 planning" AND "meeting"*');
});

test("multiple quoted phrases", () => {
expect(buildFTS5Query('"hello world" "foo bar"')).toBe('"hello world" AND "foo bar"');
});

test("quoted phrase with unquoted terms on both sides", () => {
expect(buildFTS5Query('before "exact phrase" after')).toBe('"exact phrase" AND "before"* AND "after"*');
});

test("sanitizes special characters in terms", () => {
expect(buildFTS5Query("foo(bar)")).toBe('"foobar"*');
});

test("sanitizes special characters in phrases", () => {
expect(buildFTS5Query('"foo(bar) baz"')).toBe('"foo bar baz"');
});

test("handles unicode in terms", () => {
expect(buildFTS5Query("日本語")).toBe('"日本語"*');
});

test("handles unicode in phrases", () => {
expect(buildFTS5Query('"日本語 テスト"')).toBe('"日本語 テスト"');
});

test("normalizes case in terms", () => {
expect(buildFTS5Query("Hello WORLD")).toBe('"hello"* AND "world"*');
});

test("normalizes case in phrases", () => {
expect(buildFTS5Query('"Hello WORLD"')).toBe('"hello world"');
});

test("empty quoted phrase is ignored", () => {
expect(buildFTS5Query('"" hello')).toBe('"hello"*');
});

test("phrase with only special chars is ignored", () => {
expect(buildFTS5Query('"()" hello')).toBe('"hello"*');
});
});

describe("FTS Search with exact phrases", () => {
test("searchFTS finds exact phrase match", async () => {
const store = await createTestStore();
const collectionName = await createTestCollection();

await insertTestDocument(store.db, collectionName, {
name: "doc1",
body: "The quick brown fox jumps over the lazy dog",
displayPath: "test/doc1.md",
});

await insertTestDocument(store.db, collectionName, {
name: "doc2",
body: "The brown quick fox is here",
displayPath: "test/doc2.md",
});

// Exact phrase "quick brown" should only match doc1
const results = store.searchFTS('"quick brown"', 10);
expect(results).toHaveLength(1);
expect(results[0]!.displayPath).toBe(`${collectionName}/test/doc1.md`);

await cleanupTestDb(store);
});

test("searchFTS combines exact phrase with prefix term", async () => {
const store = await createTestStore();
const collectionName = await createTestCollection();

await insertTestDocument(store.db, collectionName, {
name: "doc1",
body: "Meeting notes about project timeline discussion",
displayPath: "test/doc1.md",
});

await insertTestDocument(store.db, collectionName, {
name: "doc2",
body: "The project timeline is important",
displayPath: "test/doc2.md",
});

// Should match doc1 which has both "meeting" and "project timeline"
const results = store.searchFTS('meeting "project timeline"', 10);
expect(results).toHaveLength(1);
expect(results[0]!.displayPath).toBe(`${collectionName}/test/doc1.md`);

await cleanupTestDb(store);
});
});

// =============================================================================
// Document Retrieval Tests
// =============================================================================
Expand Down
48 changes: 43 additions & 5 deletions src/store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1834,13 +1834,51 @@ function sanitizeFTS5Term(term: string): string {
return term.replace(/[^\p{L}\p{N}']/gu, '').toLowerCase();
}

function buildFTS5Query(query: string): string | null {
const terms = query.split(/\s+/)
/**
* Sanitize a phrase for FTS5 exact phrase matching.
* Removes characters that could break FTS5 syntax but preserves spaces.
*/
function sanitizeFTS5Phrase(phrase: string): string {
// Remove quotes and special FTS5 operators, but keep spaces and alphanumerics
return phrase.replace(/[^\p{L}\p{N}\s']/gu, ' ').replace(/\s+/g, ' ').trim().toLowerCase();
}

/**
* Build an FTS5 query string that supports:
* - Quoted phrases: "hello world" → exact phrase match
* - Unquoted terms: hello world → prefix matches with AND
* - Mixed: meeting "Q1 planning" → "meeting"* AND "Q1 planning"
*/
export function buildFTS5Query(query: string): string | null {
const parts: string[] = [];
let remaining = query;

// Extract quoted phrases first
const phraseRegex = /"([^"]+)"/g;
let match;
while ((match = phraseRegex.exec(query)) !== null) {
const phrase = sanitizeFTS5Phrase(match[1]);
if (phrase.length > 0) {
// Exact phrase match (no asterisk)
parts.push(`"${phrase}"`);
}
}
// Remove quoted phrases from remaining text
remaining = remaining.replace(phraseRegex, ' ');

// Process remaining unquoted terms as prefix matches
const terms = remaining.split(/\s+/)
.map(t => sanitizeFTS5Term(t))
.filter(t => t.length > 0);
if (terms.length === 0) return null;
if (terms.length === 1) return `"${terms[0]}"*`;
return terms.map(t => `"${t}"*`).join(' AND ');

for (const term of terms) {
// Prefix match with asterisk
parts.push(`"${term}"*`);
}

if (parts.length === 0) return null;
if (parts.length === 1) return parts[0];
return parts.join(' AND ');
}

export function searchFTS(db: Database, query: string, limit: number = 20, collectionId?: number): SearchResult[] {
Expand Down