feat: add support for free azure search endpoint and github models (#8)

ankitrjk · web-flow · commit 030df8aa9db2 · 2025-07-19T23:51:59.000+02:00
diff --git a/.env.example b/.env.example
@@ -34,4 +34,8 @@ AZURE_AI_SEARCH_METADATA_FIELD="metadata"
 AZURE_AI_SEARCH_EMBEDDING_DIMENSIONALITY="1536"
 
 # Optional: Set the log level for the Azure SDKs.
-AZURE_LOG_LEVEL=info
+AZURE_LOG_LEVEL=info
+
+# For local development, you must provide personal access token for using github models and azure search.
+# Make sure to keep these keys secret and never expose them in public repositories.
+GITHUB_TOKEN=<your-personal-access-token>
diff --git a/README.md b/README.md
@@ -47,6 +47,7 @@ This template, the application code and configuration it contains, has been buil
   - [Deploying again](#deploying-again)
 - [Running the development server](#running-the-development-server)
   - [Using Docker (optional)](#using-docker-optional)
+- [Running application at no cost](#running-application-at-no-cost)
 - [Using the app](#using-the-app)
 - [Clean up](#clean-up)
 - [Guidance](#guidance)
@@ -81,6 +82,8 @@ However, you can try the [Azure pricing calculator](https://azure.com/e/a87a169b
 
 To reduce costs, you can switch to free SKUs for various services, but those SKUs have limitations.
 
+To try out the example at no cost refer [Running application at no cost](#running-application-at-no-cost).
+
 To avoid unnecessary costs, remember to take down your app if it's no longer in use,
 either by deleting the resource group in the Portal or running `azd down`.
 
@@ -227,6 +230,37 @@ npm run dev
 
 Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
 
+
+## Running application at no cost
+This approach uses free GitHub Models endpoint to access GPT models and embedding, and free Azure AI Search endpoint for data indexing and retrieval.
+
+First, install the project dependencies:
+
+```
+npm install
+```
+
+Create a GitHub personal access token (refer [Managing your personal access tokens](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens) for creating tokens). In the root of the project, create a `.env` file and provide values for below environment variables(refer `.env.example`):
+
+```
+GITHUB_TOKEN=
+LLAMAINDEX_STORAGE_CACHE_DIR=
+```
+
+Next, generate the embeddings of the documents in the [./data](./data) directory:
+
+```
+npm run generate
+```
+
+Finally, run the development server:
+
+```
+npm run dev
+```
+
+Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
+
 ## Using the app
 
 - In Azure: navigate to the Azure app deployed by `azd`. The URL is printed out when `azd` completes (as "Endpoint"), or you can find it in the Azure portal.
diff --git a/app/api/chat/engine/chat.ts b/app/api/chat/engine/chat.ts
@@ -10,6 +10,12 @@ import { getDataSource } from "./index";
 import { generateFilters } from "./queryFilter";
 import { createTools } from "./tools";
 
+const createRetrieverOptions = () => {
+  return process.env.GITHUB_TOKEN
+    ? { mode: "hybrid" as any }
+    : { mode: "semantic_hybrid" as any, similarityTopK: 5 };
+};
+
 export async function createChatEngine(documentIds?: string[], params?: any) {
   const tools: BaseToolWithCall[] = [];
 
@@ -20,12 +26,7 @@ export async function createChatEngine(documentIds?: string[], params?: any) {
     tools.push(
       new QueryEngineTool({
         queryEngine: index.asQueryEngine({
-          retriever: index.asRetriever({
-            // FIXME: Cannot read properties of undefined (reading 'SEMANTIC_HYBRID')
-            // mode: VectorStoreQueryMode.SEMANTIC_HYBRID,
-            mode: "semantic_hybrid" as any,
-            similarityTopK: 5,
-          }),
+          retriever: index.asRetriever(createRetrieverOptions()),
           preFilters: generateFilters(documentIds || [])
         }),
         metadata: {
diff --git a/app/api/chat/engine/createIndex.ts b/app/api/chat/engine/createIndex.ts
@@ -0,0 +1,50 @@
+import { MODELS_ENDPOINT } from "./settings";
+
+let requestHeaders: { 
+  "Authorization": string; 
+  "Content-Type": string; 
+  "X-Auth-Provider": string;  
+};
+
+async function getSearchEndpointDetails() {
+  try {
+    const response = await fetch(`${MODELS_ENDPOINT}/freeazuresearch/endpoint/`, {
+      headers: requestHeaders,
+    });
+    const jsonResponse = await response.json();
+    const searchServiceEndpoint = jsonResponse.endpoint;
+    const searchIndexName = jsonResponse.indexName;
+    console.log(`Your Azure AI Search Endpoint: ${searchServiceEndpoint}; Index Name: ${searchIndexName}`);
+    return { endpoint: searchServiceEndpoint, indexName: searchIndexName };
+  } catch (error) {
+    console.error("Error while retrieving search service details", error);
+  }
+}
+
+async function createUploadSession() {
+  try {
+    const response = await fetch(`${MODELS_ENDPOINT}/freeazuresearch/files/createUploadSession`, {
+      method: "POST",
+      headers: requestHeaders,
+    });
+  
+    const jsonResponse = await response.json();
+    const uploadSessionId = jsonResponse.id;
+    console.log(`Created upload session ${uploadSessionId}.`);
+    return uploadSessionId;
+  } catch (error) {
+    console.error("Error while creating upload session", error);
+  }
+}
+
+export async function createSearchService() {
+  const githubToken = process.env.GITHUB_TOKEN;
+  requestHeaders = {
+    "Authorization": `Bearer ${githubToken}`,
+    "Content-Type": "application/json",
+    "X-Auth-Provider": "github",
+  };
+
+  await createUploadSession();
+  return getSearchEndpointDetails();
+}
diff --git a/app/api/chat/engine/settings.ts b/app/api/chat/engine/settings.ts
@@ -4,6 +4,7 @@ import {
   ManagedIdentityCredential,
 } from "@azure/identity";
 import {
+  AzureKeyCredential,
   KnownAnalyzerNames,
   KnownVectorSearchAlgorithmKind,
 } from "@azure/search-documents";
@@ -14,24 +15,134 @@ import { OpenAI, OpenAIEmbedding, Settings } from "llamaindex";
 import {
   AzureAISearchVectorStore,
   IndexManagement,
-} from "llamaindex/vector-store/azure/AzureAISearchVectorStore";
+ } from "llamaindex/vector-store/AzureAISearchVectorStore"
+
+import { createSearchService } from "./createIndex";
 
 const CHUNK_SIZE = 512;
 const CHUNK_OVERLAP = 20;
 const AZURE_COGNITIVE_SERVICES_SCOPE =
   "https://cognitiveservices.azure.com/.default";
+export const MODELS_ENDPOINT = "https://models.inference.ai.azure.com";
 
-export const initSettings = async () => {
-  if (
-    !process.env.AZURE_OPENAI_CHAT_DEPLOYMENT ||
-    !process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT
-  ) {
+async function createAzureAISearchOptions(
+  azureAiSearchVectorStoreAuth: {
+    key?: string;
+    credential?: DefaultAzureCredential | ManagedIdentityCredential;
+  },
+  githubToken?: string
+) {
+  const commonOptions = {
+    serviceApiVersion: "2024-09-01-preview",
+    indexManagement: IndexManagement.CREATE_IF_NOT_EXISTS,
+    languageAnalyzer: KnownAnalyzerNames.EnLucene,
+    vectorAlgorithmType: KnownVectorSearchAlgorithmKind.ExhaustiveKnn,
+  };
+
+  if (!githubToken) {
+    return {
+      ...azureAiSearchVectorStoreAuth,
+      endpoint: process.env.AZURE_AI_SEARCH_ENDPOINT,
+      indexName: process.env.AZURE_AI_SEARCH_INDEX ?? "llamaindex-vector-search",
+      idFieldKey: process.env.AZURE_AI_SEARCH_ID_FIELD ?? "id",
+      chunkFieldKey: process.env.AZURE_AI_SEARCH_CHUNK_FIELD ?? "chunk",
+      embeddingFieldKey: process.env.AZURE_AI_SEARCH_EMBEDDING_FIELD ?? "embedding",
+      metadataStringFieldKey: process.env.AZURE_AI_SEARCH_METADATA_FIELD ?? "metadata",
+      docIdFieldKey: process.env.AZURE_AI_SEARCH_DOC_ID_FIELD ?? "doc_id",
+      embeddingDimensionality: Number(process.env.AZURE_AI_SEARCH_EMBEDDING_DIMENSIONALITY) ?? 1536,
+      ...commonOptions,
+    };
+  }
+
+  const searchService = await createSearchService();
+  if (!searchService) {
+    throw new Error("Failed to retrieve search service details.");
+  }
+
+  return {
+    credential: new AzureKeyCredential(githubToken),
+    endpoint: searchService.endpoint,
+    indexName: searchService.indexName,
+    idFieldKey: "chunk_id",
+    chunkFieldKey: "chunk",
+    embeddingFieldKey: "text_vector",
+    metadataStringFieldKey: "parent_id",
+    docIdFieldKey: "chunk_id",
+    embeddingDimensionality: 1536,
+    ...commonOptions,
+  };
+}
+
+function createEmbeddingParams(
+  openAiConfig: {
+    apiKey?: string;
+    deployment?: string;
+    model?: string;
+    azure?: Record<string, string | CallableFunction>;
+  },
+  githubToken?: string
+) {
+  if (!githubToken) {
+    return {
+      ...openAiConfig,
+      model: process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT,
+      azure: {
+        ...openAiConfig.azure,
+        deployment: process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT,
+      },
+    };
+  }
+
+  return {
+    model: "text-embedding-3-small",
+    apiKey: githubToken,
+    additionalSessionOptions: {
+      baseURL: MODELS_ENDPOINT,
+    },
+  };
+}
+
+function createOpenAiParams(
+  openAiConfig: {
+    apiKey?: string;
+    deployment?: string;
+    model?: string;
+    azure?: Record<string, string | CallableFunction>;
+  },
+  githubToken?: string
+) {
+  if (!githubToken) {
+    return {
+      ...openAiConfig,
+      model: process.env.AZURE_OPENAI_CHAT_DEPLOYMENT,
+    };
+  }
+
+  return {
+    apiKey: githubToken,
+    additionalSessionOptions: {
+      baseURL: MODELS_ENDPOINT
+    },
+    model: "gpt-4o",
+  }
+}
+
+function validateEnvironmentVariables() {
+  const areOpenAiChatAndEmbeddingDeploymentConfigured =
+    process.env.AZURE_OPENAI_CHAT_DEPLOYMENT && process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT;
+  const isGithubTokenConfigured = process.env.GITHUB_TOKEN;
+  if (!areOpenAiChatAndEmbeddingDeploymentConfigured && !isGithubTokenConfigured) {
     throw new Error(
-      "'AZURE_OPENAI_CHAT_DEPLOYMENT' and 'AZURE_OPENAI_EMBEDDING_DEPLOYMENT' env variables must be set.",
+      "Environment variables 'AZURE_OPENAI_CHAT_DEPLOYMENT' and 'AZURE_OPENAI_EMBEDDING_DEPLOYMENT' must be set, or a valid GITHUB_TOKEN must be provided."
     );
   }
+}
 
+export const initSettings = async () => {
+  validateEnvironmentVariables();
+  
   let credential;
+  const githubToken = process.env.GITHUB_TOKEN;
   const azureAiSearchVectorStoreAuth: {
     key?: string;
     credential?: DefaultAzureCredential | ManagedIdentityCredential;
@@ -72,7 +183,9 @@ export const initSettings = async () => {
     );
     openAiConfig.azure = {
       azureADTokenProvider,
+      ...(process.env.AZURE_OPENAI_CHAT_DEPLOYMENT && {
       deployment: process.env.AZURE_OPENAI_CHAT_DEPLOYMENT,
+      }),
     };
 
     azureAiSearchVectorStoreAuth.credential = credential;
@@ -86,59 +199,20 @@ export const initSettings = async () => {
   }
 
   // configure LLM model
-  Settings.llm = new OpenAI({
-    ...openAiConfig,
-    model: process.env.AZURE_OPENAI_CHAT_DEPLOYMENT,
-  });
+  Settings.llm = new OpenAI(createOpenAiParams(openAiConfig, githubToken));
   console.log({ openAiConfig });
 
   // configure embedding model
-  Settings.embedModel = new OpenAIEmbedding({
-    ...openAiConfig,
-    model: process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT,
-    azure: {
-      ...openAiConfig.azure,
-      deployment: process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT,
-    }
-  });
+  Settings.embedModel = new OpenAIEmbedding(createEmbeddingParams(openAiConfig, githubToken));
 
   Settings.chunkSize = CHUNK_SIZE;
   Settings.chunkOverlap = CHUNK_OVERLAP;
 
   // FIXME: find an elegant way to share the same instance across the ingestion and
   // generation pipelines
 
-  const endpoint = process.env.AZURE_AI_SEARCH_ENDPOINT;
-  const indexName =
-    process.env.AZURE_AI_SEARCH_INDEX ?? "llamaindex-vector-search";
-  const idFieldKey = process.env.AZURE_AI_SEARCH_ID_FIELD ?? "id";
-  const chunkFieldKey = process.env.AZURE_AI_SEARCH_CHUNK_FIELD ?? "chunk";
-  const embeddingFieldKey =
-    process.env.AZURE_AI_SEARCH_EMBEDDING_FIELD ?? "embedding";
-  const metadataStringFieldKey =
-    process.env.AZURE_AI_SEARCH_METADATA_FIELD ?? "metadata";
-  const docIdFieldKey = process.env.AZURE_AI_SEARCH_DOC_ID_FIELD ?? "doc_id";
-
+  const azureAiSearchOptions = await createAzureAISearchOptions(azureAiSearchVectorStoreAuth, githubToken);
   console.log("Initializing Azure AI Search Vector Store");
 
-  (Settings as any).__AzureAISearchVectorStoreInstance__ =
-    new AzureAISearchVectorStore({
-      // Use either a key or a credential based on the environment
-      ...azureAiSearchVectorStoreAuth,
-      endpoint,
-      indexName,
-      idFieldKey,
-      chunkFieldKey,
-      embeddingFieldKey,
-      metadataStringFieldKey,
-      docIdFieldKey,
-      serviceApiVersion: "2024-09-01-preview",
-      // FIXME: import IndexManagement.CREATE_IF_NOT_EXISTS from 'llamaindex'
-      // indexManagement: IndexManagement.CREATE_IF_NOT_EXISTS,
-      indexManagement: "CreateIfNotExists" as IndexManagement,
-      embeddingDimensionality: Number(process.env.AZURE_AI_SEARCH_EMBEDDING_DIMENSIONALITY) ?? 1536,
-      languageAnalyzer: KnownAnalyzerNames.EnLucene,
-      // store vectors on disk
-      vectorAlgorithmType: KnownVectorSearchAlgorithmKind.ExhaustiveKnn,
-    });
+  (Settings as any).__AzureAISearchVectorStoreInstance__ = new AzureAISearchVectorStore(azureAiSearchOptions);
 };
diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts
@@ -13,16 +13,22 @@ import { createCallbackManager } from "./llamaindex/streaming/events";
 import { generateNextQuestions } from "./llamaindex/streaming/suggestion";
 
 initObservability();
-initSettings();
 
 export const runtime = "nodejs";
 export const dynamic = "force-dynamic";
 
+async function setVectorStoreInstance() {
+  if (!(Settings as any).__AzureAISearchVectorStoreInstance__) {
+    await initSettings();
+  }
+}
+
 export async function POST(request: NextRequest) {
   // Init Vercel AI StreamData and timeout
   const vercelStreamData = new StreamData();
 
   try {
+    await setVectorStoreInstance();
     const body = await request.json();
     const { messages, data }: { messages: Message[]; data?: any } = body;
     if (!isValidMessages(messages)) {