Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,41 @@ llm_cache -- Cached LLM responses (query expansion, rerank scores)
| Variable | Default | Description |
|----------|---------|-------------|
| `XDG_CACHE_HOME` | `~/.cache` | Cache directory location |
| `QMD_PROVIDER` | `local` | Embedding provider: `local`, `voyage`, or `openai` |
| `VOYAGE_API_KEY` | - | API key for Voyage AI (required when `QMD_PROVIDER=voyage`) |
| `VOYAGE_EMBED_MODEL` | `voyage-4-lite` | Voyage embedding model |
| `VOYAGE_RERANK_MODEL` | `rerank-2` | Voyage reranking model |
| `OPENAI_API_KEY` | - | API key for OpenAI (required when `QMD_PROVIDER=openai`) |
| `OPENAI_EMBED_MODEL` | `text-embedding-3-small` | OpenAI embedding model |
| `OPENAI_API_BASE` | `https://api.openai.com/v1` | Base URL (for OpenAI-compatible APIs) |

### Remote Embedding Providers

QMD supports remote embedding APIs as an alternative to local models. This is useful when:
- You want faster embeddings without GPU
- You need higher quality embeddings (e.g., Voyage AI)
- You're running on a machine without enough resources for local models

**Voyage AI** (recommended for quality):
```bash
export QMD_PROVIDER=voyage
export VOYAGE_API_KEY=your-api-key

qmd embed # Uses Voyage for embeddings
qmd vsearch "query" # Uses Voyage for query embedding
qmd query "query" # Uses Voyage for embeddings + reranking
```

**OpenAI-compatible APIs**:
```bash
export QMD_PROVIDER=openai
export OPENAI_API_KEY=your-api-key

# Or use a local OpenAI-compatible server (Ollama, vLLM, etc.)
export OPENAI_API_BASE=http://localhost:11434/v1
```

**Note:** Query expansion always uses local models (LlamaCpp) regardless of the provider setting.

## How It Works

Expand Down
66 changes: 66 additions & 0 deletions src/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -841,3 +841,69 @@ export async function disposeDefaultLlamaCpp(): Promise<void> {
}
}

// =============================================================================
// Provider Selection (Local vs Remote)
// =============================================================================

let defaultLLM: LLM | null = null;

/**
* Get the default LLM instance based on QMD_PROVIDER env var.
* - "voyage" or "openai" → RemoteLLM (API-based)
* - unset or "local" → LlamaCpp (local models)
*/
export async function getDefaultLLM(): Promise<LLM> {
if (defaultLLM) return defaultLLM;

const provider = process.env.QMD_PROVIDER?.toLowerCase();

if (provider === "voyage" || provider === "openai") {
// Dynamic import to avoid loading remote.ts when not needed
const { RemoteLLM } = await import("./remote.js");
defaultLLM = new RemoteLLM({ provider: provider as "voyage" | "openai" });
} else {
defaultLLM = getDefaultLlamaCpp();
}

return defaultLLM;
}

/**
* Get provider info for status display
*/
export function getProviderInfo(): { provider: string; embedModel: string; rerankModel: string } {
const provider = process.env.QMD_PROVIDER?.toLowerCase() || "local";

if (provider === "voyage") {
return {
provider: "voyage",
embedModel: process.env.VOYAGE_EMBED_MODEL || "voyage-4-lite",
rerankModel: process.env.VOYAGE_RERANK_MODEL || "rerank-2",
};
} else if (provider === "openai") {
return {
provider: "openai",
embedModel: process.env.OPENAI_EMBED_MODEL || "text-embedding-3-small",
rerankModel: "(none)",
};
}

return {
provider: "local",
embedModel: process.env.QMD_EMBED_MODEL || "embeddinggemma",
rerankModel: process.env.QMD_RERANK_MODEL || "qwen3-reranker",
};
}

/**
* Dispose the default LLM instance
*/
export async function disposeDefaultLLM(): Promise<void> {
if (defaultLLM) {
await defaultLLM.dispose();
defaultLLM = null;
}
// Also dispose LlamaCpp if it was used
await disposeDefaultLlamaCpp();
}

16 changes: 15 additions & 1 deletion src/qmd.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ import {
DEFAULT_MULTI_GET_MAX_BYTES,
createStore,
getDefaultDbPath,
getProviderInfo,
} from "./store.js";
import { getDefaultLlamaCpp, disposeDefaultLlamaCpp, type RerankDocument, type Queryable, type QueryType } from "./llm.js";
import type { SearchResult, RankedResult } from "./store.js";
Expand Down Expand Up @@ -295,9 +296,13 @@ function showStatus(): void {
// Most recent update across all collections
const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get() as { latest: string | null };

// Provider info
const providerInfo = getProviderInfo();

console.log(`${c.bold}QMD Status${c.reset}\n`);
console.log(`Index: ${dbPath}`);
console.log(`Size: ${formatBytes(indexSize)}\n`);
console.log(`Size: ${formatBytes(indexSize)}`);
console.log(`Provider: ${c.cyan}${providerInfo.provider}${c.reset} (${providerInfo.embedModel})\n`);

console.log(`${c.bold}Documents${c.reset}`);
console.log(` Total: ${totalDocs.count} files indexed`);
Expand Down Expand Up @@ -2398,6 +2403,15 @@ function showHelp(): void {
console.log(" Reranking: qwen3-reranker-0.6b-q8_0");
console.log(" Generation: Qwen3-0.6B-Q8_0");
console.log("");
console.log("Environment variables (remote providers):");
console.log(" QMD_PROVIDER - Provider: local (default), voyage, openai");
console.log(" VOYAGE_API_KEY - Voyage AI API key");
console.log(" VOYAGE_EMBED_MODEL - Voyage model (default: voyage-4-lite)");
console.log(" VOYAGE_RERANK_MODEL - Voyage rerank model (default: rerank-2)");
console.log(" OPENAI_API_KEY - OpenAI API key");
console.log(" OPENAI_EMBED_MODEL - OpenAI model (default: text-embedding-3-small)");
console.log(" OPENAI_API_BASE - Base URL for OpenAI-compatible APIs");
console.log("");
console.log(`Index: ${getDbPath()}`);
}

Expand Down
178 changes: 178 additions & 0 deletions src/remote.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
/**
* Tests for remote embedding providers (Voyage AI, OpenAI-compatible)
*/

import { describe, it, expect, beforeAll, afterAll } from "bun:test";
import { RemoteLLM, getDefaultRemote } from "./remote";

// Skip tests if no API key is available
const VOYAGE_API_KEY = process.env.VOYAGE_API_KEY;
const describeWithVoyage = VOYAGE_API_KEY ? describe : describe.skip;

describeWithVoyage("RemoteLLM (Voyage)", () => {
let llm: RemoteLLM;

beforeAll(() => {
llm = new RemoteLLM({ provider: "voyage" });
});

afterAll(async () => {
await llm.dispose();
});

describe("embed", () => {
it("generates embeddings for text", async () => {
const result = await llm.embed("Hello world");
expect(result).not.toBeNull();
expect(result!.embedding).toBeArray();
expect(result!.embedding.length).toBeGreaterThan(0);
expect(result!.model).toContain("voyage");
});

it("generates embeddings with query input type", async () => {
const result = await llm.embed("What is QMD?", { isQuery: true });
expect(result).not.toBeNull();
expect(result!.embedding.length).toBeGreaterThan(0);
});

it("generates embeddings with document input type", async () => {
const result = await llm.embed("QMD is a markdown search tool", { isQuery: false });
expect(result).not.toBeNull();
expect(result!.embedding.length).toBeGreaterThan(0);
});
});

describe("embedBatch", () => {
it("embeds multiple texts efficiently", async () => {
const texts = ["First document", "Second document", "Third document"];
const results = await llm.embedBatch(texts);

expect(results.length).toBe(3);
for (const result of results) {
expect(result).not.toBeNull();
expect(result!.embedding.length).toBeGreaterThan(0);
}
});

it("handles empty input", async () => {
const results = await llm.embedBatch([]);
expect(results).toEqual([]);
});
});

describe("rerank", () => {
it("reranks documents by relevance", async () => {
const query = "What is drone training?";
const documents = [
{ file: "doc1.md", text: "Acme Corp offers professional software development courses" },
{ file: "doc2.md", text: "The weather forecast shows rain tomorrow" },
{ file: "doc3.md", text: "Learn to fly drones with our CAA-certified training program" },
];

const result = await llm.rerank(query, documents);

expect(result.results.length).toBe(3);
expect(result.model).toContain("rerank");

// The drone-related documents should rank higher
const topResult = result.results[0]!;
expect(["doc1.md", "doc3.md"]).toContain(topResult.file);
expect(topResult.score).toBeGreaterThan(0);
});

it("respects top_k parameter", async () => {
const documents = [
{ file: "a.md", text: "Document A" },
{ file: "b.md", text: "Document B" },
{ file: "c.md", text: "Document C" },
];

const result = await llm.rerank("test", documents, { topK: 2 });
expect(result.results.length).toBe(2);
});
});

describe("modelExists", () => {
it("returns info for valid Voyage models", async () => {
const info = await llm.modelExists("voyage-4-lite");
expect(info.name).toBe("voyage-4-lite");
expect(info.exists).toBe(true);
});

it("returns exists true for any model (provider-side validation)", async () => {
// RemoteLLM doesn't validate models client-side - the API will reject invalid ones
const info = await llm.modelExists("any-model-name");
expect(info.exists).toBe(true);
});
});

describe("expandQuery", () => {
it("returns lex and vec query types", async () => {
const results = await llm.expandQuery("test query");
expect(results.length).toBe(2);
expect(results.map(r => r.type)).toContain("lex");
expect(results.map(r => r.type)).toContain("vec");
expect(results.every(r => r.text === "test query")).toBe(true);
});

it("can exclude lexical queries", async () => {
const results = await llm.expandQuery("test query", { includeLexical: false });
expect(results.length).toBe(1);
expect(results[0]!.type).toBe("vec");
});
});
});

describe("RemoteLLM Configuration", () => {
it("throws when Voyage API key is missing", () => {
const originalKey = process.env.VOYAGE_API_KEY;
delete process.env.VOYAGE_API_KEY;

expect(() => new RemoteLLM({ provider: "voyage" })).toThrow("Voyage API key required");

process.env.VOYAGE_API_KEY = originalKey;
});

it("uses custom base URL", () => {
const originalKey = process.env.VOYAGE_API_KEY;
process.env.VOYAGE_API_KEY = "test-key";

const llm = new RemoteLLM({
provider: "voyage",
baseUrl: "https://custom.api.com/v1"
});

// Can't easily verify the URL was set, but at least it doesn't throw
expect(llm).toBeDefined();

process.env.VOYAGE_API_KEY = originalKey;
});

it("uses environment variables for model config", () => {
const originalKey = process.env.VOYAGE_API_KEY;
const originalModel = process.env.VOYAGE_EMBED_MODEL;

process.env.VOYAGE_API_KEY = "test-key";
process.env.VOYAGE_EMBED_MODEL = "voyage-3-large";

const llm = new RemoteLLM({ provider: "voyage" });
expect(llm).toBeDefined();

process.env.VOYAGE_API_KEY = originalKey;
if (originalModel) process.env.VOYAGE_EMBED_MODEL = originalModel;
else delete process.env.VOYAGE_EMBED_MODEL;
});
});

describe("getDefaultRemote", () => {
it("returns singleton instance", () => {
if (!VOYAGE_API_KEY) {
console.log("Skipping getDefaultRemote test - no API key");
return;
}

const llm1 = getDefaultRemote();
const llm2 = getDefaultRemote();
expect(llm1).toBe(llm2);
});
});
Loading