tobi · adam91holt · Jan 29, 2026
diff --git a/README.md b/README.md
@@ -438,6 +438,41 @@ llm_cache       -- Cached LLM responses (query expansion, rerank scores)
 | Variable | Default | Description |
 |----------|---------|-------------|
 | `XDG_CACHE_HOME` | `~/.cache` | Cache directory location |
+| `QMD_PROVIDER` | `local` | Embedding provider: `local`, `voyage`, or `openai` |
+| `VOYAGE_API_KEY` | - | API key for Voyage AI (required when `QMD_PROVIDER=voyage`) |
+| `VOYAGE_EMBED_MODEL` | `voyage-4-lite` | Voyage embedding model |
+| `VOYAGE_RERANK_MODEL` | `rerank-2` | Voyage reranking model |
+| `OPENAI_API_KEY` | - | API key for OpenAI (required when `QMD_PROVIDER=openai`) |
+| `OPENAI_EMBED_MODEL` | `text-embedding-3-small` | OpenAI embedding model |
+| `OPENAI_API_BASE` | `https://api.openai.com/v1` | Base URL (for OpenAI-compatible APIs) |
+
+### Remote Embedding Providers
+
+QMD supports remote embedding APIs as an alternative to local models. This is useful when:
+- You want faster embeddings without GPU
+- You need higher quality embeddings (e.g., Voyage AI)
+- You're running on a machine without enough resources for local models
+
+**Voyage AI** (recommended for quality):
+```bash
+export QMD_PROVIDER=voyage
+export VOYAGE_API_KEY=your-api-key
+
+qmd embed     # Uses Voyage for embeddings
+qmd vsearch "query"  # Uses Voyage for query embedding
+qmd query "query"    # Uses Voyage for embeddings + reranking
+```
+
+**OpenAI-compatible APIs**:
+```bash
+export QMD_PROVIDER=openai
+export OPENAI_API_KEY=your-api-key
+
+# Or use a local OpenAI-compatible server (Ollama, vLLM, etc.)
+export OPENAI_API_BASE=http://localhost:11434/v1
+```
+
+**Note:** Query expansion always uses local models (LlamaCpp) regardless of the provider setting.
 
 ## How It Works
 

diff --git a/src/llm.ts b/src/llm.ts
@@ -841,3 +841,69 @@ export async function disposeDefaultLlamaCpp(): Promise<void> {
   }
 }
 
+// =============================================================================
+// Provider Selection (Local vs Remote)
+// =============================================================================
+
+let defaultLLM: LLM | null = null;
+
+/**
+ * Get the default LLM instance based on QMD_PROVIDER env var.
+ * - "voyage" or "openai" → RemoteLLM (API-based)
+ * - unset or "local" → LlamaCpp (local models)
+ */
+export async function getDefaultLLM(): Promise<LLM> {
+  if (defaultLLM) return defaultLLM;
+
+  const provider = process.env.QMD_PROVIDER?.toLowerCase();
+
+  if (provider === "voyage" || provider === "openai") {
+    // Dynamic import to avoid loading remote.ts when not needed
+    const { RemoteLLM } = await import("./remote.js");
+    defaultLLM = new RemoteLLM({ provider: provider as "voyage" | "openai" });
+  } else {
+    defaultLLM = getDefaultLlamaCpp();
+  }
+
+  return defaultLLM;
+}
+
+/**
+ * Get provider info for status display
+ */
+export function getProviderInfo(): { provider: string; embedModel: string; rerankModel: string } {
+  const provider = process.env.QMD_PROVIDER?.toLowerCase() || "local";
+
+  if (provider === "voyage") {
+    return {
+      provider: "voyage",
+      embedModel: process.env.VOYAGE_EMBED_MODEL || "voyage-4-lite",
+      rerankModel: process.env.VOYAGE_RERANK_MODEL || "rerank-2",
+    };
+  } else if (provider === "openai") {
+    return {
+      provider: "openai",
+      embedModel: process.env.OPENAI_EMBED_MODEL || "text-embedding-3-small",
+      rerankModel: "(none)",
+    };
+  }
+
+  return {
+    provider: "local",
+    embedModel: process.env.QMD_EMBED_MODEL || "embeddinggemma",
+    rerankModel: process.env.QMD_RERANK_MODEL || "qwen3-reranker",
+  };
+}
+
+/**
+ * Dispose the default LLM instance
+ */
+export async function disposeDefaultLLM(): Promise<void> {
+  if (defaultLLM) {
+    await defaultLLM.dispose();
+    defaultLLM = null;
+  }
+  // Also dispose LlamaCpp if it was used
+  await disposeDefaultLlamaCpp();
+}
+
diff --git a/src/qmd.ts b/src/qmd.ts
@@ -64,6 +64,7 @@ import {
   DEFAULT_MULTI_GET_MAX_BYTES,
   createStore,
   getDefaultDbPath,
+  getProviderInfo,
 } from "./store.js";
 import { getDefaultLlamaCpp, disposeDefaultLlamaCpp, type RerankDocument, type Queryable, type QueryType } from "./llm.js";
 import type { SearchResult, RankedResult } from "./store.js";
@@ -295,9 +296,13 @@ function showStatus(): void {
   // Most recent update across all collections
   const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get() as { latest: string | null };
 
+  // Provider info
+  const providerInfo = getProviderInfo();
+
   console.log(`${c.bold}QMD Status${c.reset}\n`);
   console.log(`Index: ${dbPath}`);
-  console.log(`Size:  ${formatBytes(indexSize)}\n`);
+  console.log(`Size:  ${formatBytes(indexSize)}`);
+  console.log(`Provider: ${c.cyan}${providerInfo.provider}${c.reset} (${providerInfo.embedModel})\n`);
 
   console.log(`${c.bold}Documents${c.reset}`);
   console.log(`  Total:    ${totalDocs.count} files indexed`);
@@ -2398,6 +2403,15 @@ function showHelp(): void {
   console.log("  Reranking: qwen3-reranker-0.6b-q8_0");
   console.log("  Generation: Qwen3-0.6B-Q8_0");
   console.log("");
+  console.log("Environment variables (remote providers):");
+  console.log("  QMD_PROVIDER               - Provider: local (default), voyage, openai");
+  console.log("  VOYAGE_API_KEY             - Voyage AI API key");
+  console.log("  VOYAGE_EMBED_MODEL         - Voyage model (default: voyage-4-lite)");
+  console.log("  VOYAGE_RERANK_MODEL        - Voyage rerank model (default: rerank-2)");
+  console.log("  OPENAI_API_KEY             - OpenAI API key");
+  console.log("  OPENAI_EMBED_MODEL         - OpenAI model (default: text-embedding-3-small)");
+  console.log("  OPENAI_API_BASE            - Base URL for OpenAI-compatible APIs");
+  console.log("");
   console.log(`Index: ${getDbPath()}`);
 }
 

diff --git a/src/remote.test.ts b/src/remote.test.ts
@@ -0,0 +1,178 @@
+/**
+ * Tests for remote embedding providers (Voyage AI, OpenAI-compatible)
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from "bun:test";
+import { RemoteLLM, getDefaultRemote } from "./remote";
+
+// Skip tests if no API key is available
+const VOYAGE_API_KEY = process.env.VOYAGE_API_KEY;
+const describeWithVoyage = VOYAGE_API_KEY ? describe : describe.skip;
+
+describeWithVoyage("RemoteLLM (Voyage)", () => {
+  let llm: RemoteLLM;
+
+  beforeAll(() => {
+    llm = new RemoteLLM({ provider: "voyage" });
+  });
+
+  afterAll(async () => {
+    await llm.dispose();
+  });
+
+  describe("embed", () => {
+    it("generates embeddings for text", async () => {
+      const result = await llm.embed("Hello world");
+      expect(result).not.toBeNull();
+      expect(result!.embedding).toBeArray();
+      expect(result!.embedding.length).toBeGreaterThan(0);
+      expect(result!.model).toContain("voyage");
+    });
+
+    it("generates embeddings with query input type", async () => {
+      const result = await llm.embed("What is QMD?", { isQuery: true });
+      expect(result).not.toBeNull();
+      expect(result!.embedding.length).toBeGreaterThan(0);
+    });
+
+    it("generates embeddings with document input type", async () => {
+      const result = await llm.embed("QMD is a markdown search tool", { isQuery: false });
+      expect(result).not.toBeNull();
+      expect(result!.embedding.length).toBeGreaterThan(0);
+    });
+  });
+
+  describe("embedBatch", () => {
+    it("embeds multiple texts efficiently", async () => {
+      const texts = ["First document", "Second document", "Third document"];
+      const results = await llm.embedBatch(texts);
+
+      expect(results.length).toBe(3);
+      for (const result of results) {
+        expect(result).not.toBeNull();
+        expect(result!.embedding.length).toBeGreaterThan(0);
+      }
+    });
+
+    it("handles empty input", async () => {
+      const results = await llm.embedBatch([]);
+      expect(results).toEqual([]);
+    });
+  });
+
+  describe("rerank", () => {
+    it("reranks documents by relevance", async () => {
+      const query = "What is drone training?";
+      const documents = [
+        { file: "doc1.md", text: "Acme Corp offers professional software development courses" },
+        { file: "doc2.md", text: "The weather forecast shows rain tomorrow" },
+        { file: "doc3.md", text: "Learn to fly drones with our CAA-certified training program" },
+      ];
+
+      const result = await llm.rerank(query, documents);
+
+      expect(result.results.length).toBe(3);
+      expect(result.model).toContain("rerank");
+
+      // The drone-related documents should rank higher
+      const topResult = result.results[0]!;
+      expect(["doc1.md", "doc3.md"]).toContain(topResult.file);
+      expect(topResult.score).toBeGreaterThan(0);
+    });
+
+    it("respects top_k parameter", async () => {
+      const documents = [
+        { file: "a.md", text: "Document A" },
+        { file: "b.md", text: "Document B" },
+        { file: "c.md", text: "Document C" },
+      ];
+
+      const result = await llm.rerank("test", documents, { topK: 2 });
+      expect(result.results.length).toBe(2);
+    });
+  });
+
+  describe("modelExists", () => {
+    it("returns info for valid Voyage models", async () => {
+      const info = await llm.modelExists("voyage-4-lite");
+      expect(info.name).toBe("voyage-4-lite");
+      expect(info.exists).toBe(true);
+    });
+
+    it("returns exists true for any model (provider-side validation)", async () => {
+      // RemoteLLM doesn't validate models client-side - the API will reject invalid ones
+      const info = await llm.modelExists("any-model-name");
+      expect(info.exists).toBe(true);
+    });
+  });
+
+  describe("expandQuery", () => {
+    it("returns lex and vec query types", async () => {
+      const results = await llm.expandQuery("test query");
+      expect(results.length).toBe(2);
+      expect(results.map(r => r.type)).toContain("lex");
+      expect(results.map(r => r.type)).toContain("vec");
+      expect(results.every(r => r.text === "test query")).toBe(true);
+    });
+
+    it("can exclude lexical queries", async () => {
+      const results = await llm.expandQuery("test query", { includeLexical: false });
+      expect(results.length).toBe(1);
+      expect(results[0]!.type).toBe("vec");
+    });
+  });
+});
+
+describe("RemoteLLM Configuration", () => {
+  it("throws when Voyage API key is missing", () => {
+    const originalKey = process.env.VOYAGE_API_KEY;
+    delete process.env.VOYAGE_API_KEY;
+
+    expect(() => new RemoteLLM({ provider: "voyage" })).toThrow("Voyage API key required");
+
+    process.env.VOYAGE_API_KEY = originalKey;
+  });
+
+  it("uses custom base URL", () => {
+    const originalKey = process.env.VOYAGE_API_KEY;
+    process.env.VOYAGE_API_KEY = "test-key";
+
+    const llm = new RemoteLLM({ 
+      provider: "voyage",
+      baseUrl: "https://custom.api.com/v1"
+    });
+
+    // Can't easily verify the URL was set, but at least it doesn't throw
+    expect(llm).toBeDefined();
+
+    process.env.VOYAGE_API_KEY = originalKey;
+  });
+
+  it("uses environment variables for model config", () => {
+    const originalKey = process.env.VOYAGE_API_KEY;
+    const originalModel = process.env.VOYAGE_EMBED_MODEL;
+
+    process.env.VOYAGE_API_KEY = "test-key";
+    process.env.VOYAGE_EMBED_MODEL = "voyage-3-large";
+
+    const llm = new RemoteLLM({ provider: "voyage" });
+    expect(llm).toBeDefined();
+
+    process.env.VOYAGE_API_KEY = originalKey;
+    if (originalModel) process.env.VOYAGE_EMBED_MODEL = originalModel;
+    else delete process.env.VOYAGE_EMBED_MODEL;
+  });
+});
+
+describe("getDefaultRemote", () => {
+  it("returns singleton instance", () => {
+    if (!VOYAGE_API_KEY) {
+      console.log("Skipping getDefaultRemote test - no API key");
+      return;
+    }
+
+    const llm1 = getDefaultRemote();
+    const llm2 = getDefaultRemote();
+    expect(llm1).toBe(llm2);
+  });
+});