Skip to content

Commit 030df8a

Browse files
authored
feat: add support for free azure search endpoint and github models (#8)
1 parent 139873a commit 030df8a

File tree

6 files changed

+227
-58
lines changed

6 files changed

+227
-58
lines changed

.env.example

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,8 @@ AZURE_AI_SEARCH_METADATA_FIELD="metadata"
3434
AZURE_AI_SEARCH_EMBEDDING_DIMENSIONALITY="1536"
3535

3636
# Optional: Set the log level for the Azure SDKs.
37-
AZURE_LOG_LEVEL=info
37+
AZURE_LOG_LEVEL=info
38+
39+
# For local development, you must provide personal access token for using github models and azure search.
40+
# Make sure to keep these keys secret and never expose them in public repositories.
41+
GITHUB_TOKEN=<your-personal-access-token>

README.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ This template, the application code and configuration it contains, has been buil
4747
- [Deploying again](#deploying-again)
4848
- [Running the development server](#running-the-development-server)
4949
- [Using Docker (optional)](#using-docker-optional)
50+
- [Running application at no cost](#running-application-at-no-cost)
5051
- [Using the app](#using-the-app)
5152
- [Clean up](#clean-up)
5253
- [Guidance](#guidance)
@@ -81,6 +82,8 @@ However, you can try the [Azure pricing calculator](https://azure.com/e/a87a169b
8182

8283
To reduce costs, you can switch to free SKUs for various services, but those SKUs have limitations.
8384

85+
To try out the example at no cost refer [Running application at no cost](#running-application-at-no-cost).
86+
8487
To avoid unnecessary costs, remember to take down your app if it's no longer in use,
8588
either by deleting the resource group in the Portal or running `azd down`.
8689

@@ -227,6 +230,37 @@ npm run dev
227230

228231
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
229232

233+
234+
## Running application at no cost
235+
This approach uses free GitHub Models endpoint to access GPT models and embedding, and free Azure AI Search endpoint for data indexing and retrieval.
236+
237+
First, install the project dependencies:
238+
239+
```
240+
npm install
241+
```
242+
243+
Create a GitHub personal access token (refer [Managing your personal access tokens](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens) for creating tokens). In the root of the project, create a `.env` file and provide values for below environment variables(refer `.env.example`):
244+
245+
```
246+
GITHUB_TOKEN=
247+
LLAMAINDEX_STORAGE_CACHE_DIR=
248+
```
249+
250+
Next, generate the embeddings of the documents in the [./data](./data) directory:
251+
252+
```
253+
npm run generate
254+
```
255+
256+
Finally, run the development server:
257+
258+
```
259+
npm run dev
260+
```
261+
262+
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
263+
230264
## Using the app
231265

232266
- In Azure: navigate to the Azure app deployed by `azd`. The URL is printed out when `azd` completes (as "Endpoint"), or you can find it in the Azure portal.

app/api/chat/engine/chat.ts

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@ import { getDataSource } from "./index";
1010
import { generateFilters } from "./queryFilter";
1111
import { createTools } from "./tools";
1212

13+
const createRetrieverOptions = () => {
14+
return process.env.GITHUB_TOKEN
15+
? { mode: "hybrid" as any }
16+
: { mode: "semantic_hybrid" as any, similarityTopK: 5 };
17+
};
18+
1319
export async function createChatEngine(documentIds?: string[], params?: any) {
1420
const tools: BaseToolWithCall[] = [];
1521

@@ -20,12 +26,7 @@ export async function createChatEngine(documentIds?: string[], params?: any) {
2026
tools.push(
2127
new QueryEngineTool({
2228
queryEngine: index.asQueryEngine({
23-
retriever: index.asRetriever({
24-
// FIXME: Cannot read properties of undefined (reading 'SEMANTIC_HYBRID')
25-
// mode: VectorStoreQueryMode.SEMANTIC_HYBRID,
26-
mode: "semantic_hybrid" as any,
27-
similarityTopK: 5,
28-
}),
29+
retriever: index.asRetriever(createRetrieverOptions()),
2930
preFilters: generateFilters(documentIds || [])
3031
}),
3132
metadata: {

app/api/chat/engine/createIndex.ts

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import { MODELS_ENDPOINT } from "./settings";
2+
3+
let requestHeaders: {
4+
"Authorization": string;
5+
"Content-Type": string;
6+
"X-Auth-Provider": string;
7+
};
8+
9+
async function getSearchEndpointDetails() {
10+
try {
11+
const response = await fetch(`${MODELS_ENDPOINT}/freeazuresearch/endpoint/`, {
12+
headers: requestHeaders,
13+
});
14+
const jsonResponse = await response.json();
15+
const searchServiceEndpoint = jsonResponse.endpoint;
16+
const searchIndexName = jsonResponse.indexName;
17+
console.log(`Your Azure AI Search Endpoint: ${searchServiceEndpoint}; Index Name: ${searchIndexName}`);
18+
return { endpoint: searchServiceEndpoint, indexName: searchIndexName };
19+
} catch (error) {
20+
console.error("Error while retrieving search service details", error);
21+
}
22+
}
23+
24+
async function createUploadSession() {
25+
try {
26+
const response = await fetch(`${MODELS_ENDPOINT}/freeazuresearch/files/createUploadSession`, {
27+
method: "POST",
28+
headers: requestHeaders,
29+
});
30+
31+
const jsonResponse = await response.json();
32+
const uploadSessionId = jsonResponse.id;
33+
console.log(`Created upload session ${uploadSessionId}.`);
34+
return uploadSessionId;
35+
} catch (error) {
36+
console.error("Error while creating upload session", error);
37+
}
38+
}
39+
40+
export async function createSearchService() {
41+
const githubToken = process.env.GITHUB_TOKEN;
42+
requestHeaders = {
43+
"Authorization": `Bearer ${githubToken}`,
44+
"Content-Type": "application/json",
45+
"X-Auth-Provider": "github",
46+
};
47+
48+
await createUploadSession();
49+
return getSearchEndpointDetails();
50+
}

app/api/chat/engine/settings.ts

Lines changed: 124 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import {
44
ManagedIdentityCredential,
55
} from "@azure/identity";
66
import {
7+
AzureKeyCredential,
78
KnownAnalyzerNames,
89
KnownVectorSearchAlgorithmKind,
910
} from "@azure/search-documents";
@@ -14,24 +15,134 @@ import { OpenAI, OpenAIEmbedding, Settings } from "llamaindex";
1415
import {
1516
AzureAISearchVectorStore,
1617
IndexManagement,
17-
} from "llamaindex/vector-store/azure/AzureAISearchVectorStore";
18+
} from "llamaindex/vector-store/AzureAISearchVectorStore"
19+
20+
import { createSearchService } from "./createIndex";
1821

1922
const CHUNK_SIZE = 512;
2023
const CHUNK_OVERLAP = 20;
2124
const AZURE_COGNITIVE_SERVICES_SCOPE =
2225
"https://cognitiveservices.azure.com/.default";
26+
export const MODELS_ENDPOINT = "https://models.inference.ai.azure.com";
2327

24-
export const initSettings = async () => {
25-
if (
26-
!process.env.AZURE_OPENAI_CHAT_DEPLOYMENT ||
27-
!process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT
28-
) {
28+
async function createAzureAISearchOptions(
29+
azureAiSearchVectorStoreAuth: {
30+
key?: string;
31+
credential?: DefaultAzureCredential | ManagedIdentityCredential;
32+
},
33+
githubToken?: string
34+
) {
35+
const commonOptions = {
36+
serviceApiVersion: "2024-09-01-preview",
37+
indexManagement: IndexManagement.CREATE_IF_NOT_EXISTS,
38+
languageAnalyzer: KnownAnalyzerNames.EnLucene,
39+
vectorAlgorithmType: KnownVectorSearchAlgorithmKind.ExhaustiveKnn,
40+
};
41+
42+
if (!githubToken) {
43+
return {
44+
...azureAiSearchVectorStoreAuth,
45+
endpoint: process.env.AZURE_AI_SEARCH_ENDPOINT,
46+
indexName: process.env.AZURE_AI_SEARCH_INDEX ?? "llamaindex-vector-search",
47+
idFieldKey: process.env.AZURE_AI_SEARCH_ID_FIELD ?? "id",
48+
chunkFieldKey: process.env.AZURE_AI_SEARCH_CHUNK_FIELD ?? "chunk",
49+
embeddingFieldKey: process.env.AZURE_AI_SEARCH_EMBEDDING_FIELD ?? "embedding",
50+
metadataStringFieldKey: process.env.AZURE_AI_SEARCH_METADATA_FIELD ?? "metadata",
51+
docIdFieldKey: process.env.AZURE_AI_SEARCH_DOC_ID_FIELD ?? "doc_id",
52+
embeddingDimensionality: Number(process.env.AZURE_AI_SEARCH_EMBEDDING_DIMENSIONALITY) ?? 1536,
53+
...commonOptions,
54+
};
55+
}
56+
57+
const searchService = await createSearchService();
58+
if (!searchService) {
59+
throw new Error("Failed to retrieve search service details.");
60+
}
61+
62+
return {
63+
credential: new AzureKeyCredential(githubToken),
64+
endpoint: searchService.endpoint,
65+
indexName: searchService.indexName,
66+
idFieldKey: "chunk_id",
67+
chunkFieldKey: "chunk",
68+
embeddingFieldKey: "text_vector",
69+
metadataStringFieldKey: "parent_id",
70+
docIdFieldKey: "chunk_id",
71+
embeddingDimensionality: 1536,
72+
...commonOptions,
73+
};
74+
}
75+
76+
function createEmbeddingParams(
77+
openAiConfig: {
78+
apiKey?: string;
79+
deployment?: string;
80+
model?: string;
81+
azure?: Record<string, string | CallableFunction>;
82+
},
83+
githubToken?: string
84+
) {
85+
if (!githubToken) {
86+
return {
87+
...openAiConfig,
88+
model: process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT,
89+
azure: {
90+
...openAiConfig.azure,
91+
deployment: process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT,
92+
},
93+
};
94+
}
95+
96+
return {
97+
model: "text-embedding-3-small",
98+
apiKey: githubToken,
99+
additionalSessionOptions: {
100+
baseURL: MODELS_ENDPOINT,
101+
},
102+
};
103+
}
104+
105+
function createOpenAiParams(
106+
openAiConfig: {
107+
apiKey?: string;
108+
deployment?: string;
109+
model?: string;
110+
azure?: Record<string, string | CallableFunction>;
111+
},
112+
githubToken?: string
113+
) {
114+
if (!githubToken) {
115+
return {
116+
...openAiConfig,
117+
model: process.env.AZURE_OPENAI_CHAT_DEPLOYMENT,
118+
};
119+
}
120+
121+
return {
122+
apiKey: githubToken,
123+
additionalSessionOptions: {
124+
baseURL: MODELS_ENDPOINT
125+
},
126+
model: "gpt-4o",
127+
}
128+
}
129+
130+
function validateEnvironmentVariables() {
131+
const areOpenAiChatAndEmbeddingDeploymentConfigured =
132+
process.env.AZURE_OPENAI_CHAT_DEPLOYMENT && process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT;
133+
const isGithubTokenConfigured = process.env.GITHUB_TOKEN;
134+
if (!areOpenAiChatAndEmbeddingDeploymentConfigured && !isGithubTokenConfigured) {
29135
throw new Error(
30-
"'AZURE_OPENAI_CHAT_DEPLOYMENT' and 'AZURE_OPENAI_EMBEDDING_DEPLOYMENT' env variables must be set.",
136+
"Environment variables 'AZURE_OPENAI_CHAT_DEPLOYMENT' and 'AZURE_OPENAI_EMBEDDING_DEPLOYMENT' must be set, or a valid GITHUB_TOKEN must be provided."
31137
);
32138
}
139+
}
33140

141+
export const initSettings = async () => {
142+
validateEnvironmentVariables();
143+
34144
let credential;
145+
const githubToken = process.env.GITHUB_TOKEN;
35146
const azureAiSearchVectorStoreAuth: {
36147
key?: string;
37148
credential?: DefaultAzureCredential | ManagedIdentityCredential;
@@ -72,7 +183,9 @@ export const initSettings = async () => {
72183
);
73184
openAiConfig.azure = {
74185
azureADTokenProvider,
186+
...(process.env.AZURE_OPENAI_CHAT_DEPLOYMENT && {
75187
deployment: process.env.AZURE_OPENAI_CHAT_DEPLOYMENT,
188+
}),
76189
};
77190

78191
azureAiSearchVectorStoreAuth.credential = credential;
@@ -86,59 +199,20 @@ export const initSettings = async () => {
86199
}
87200

88201
// configure LLM model
89-
Settings.llm = new OpenAI({
90-
...openAiConfig,
91-
model: process.env.AZURE_OPENAI_CHAT_DEPLOYMENT,
92-
});
202+
Settings.llm = new OpenAI(createOpenAiParams(openAiConfig, githubToken));
93203
console.log({ openAiConfig });
94204

95205
// configure embedding model
96-
Settings.embedModel = new OpenAIEmbedding({
97-
...openAiConfig,
98-
model: process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT,
99-
azure: {
100-
...openAiConfig.azure,
101-
deployment: process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT,
102-
}
103-
});
206+
Settings.embedModel = new OpenAIEmbedding(createEmbeddingParams(openAiConfig, githubToken));
104207

105208
Settings.chunkSize = CHUNK_SIZE;
106209
Settings.chunkOverlap = CHUNK_OVERLAP;
107210

108211
// FIXME: find an elegant way to share the same instance across the ingestion and
109212
// generation pipelines
110213

111-
const endpoint = process.env.AZURE_AI_SEARCH_ENDPOINT;
112-
const indexName =
113-
process.env.AZURE_AI_SEARCH_INDEX ?? "llamaindex-vector-search";
114-
const idFieldKey = process.env.AZURE_AI_SEARCH_ID_FIELD ?? "id";
115-
const chunkFieldKey = process.env.AZURE_AI_SEARCH_CHUNK_FIELD ?? "chunk";
116-
const embeddingFieldKey =
117-
process.env.AZURE_AI_SEARCH_EMBEDDING_FIELD ?? "embedding";
118-
const metadataStringFieldKey =
119-
process.env.AZURE_AI_SEARCH_METADATA_FIELD ?? "metadata";
120-
const docIdFieldKey = process.env.AZURE_AI_SEARCH_DOC_ID_FIELD ?? "doc_id";
121-
214+
const azureAiSearchOptions = await createAzureAISearchOptions(azureAiSearchVectorStoreAuth, githubToken);
122215
console.log("Initializing Azure AI Search Vector Store");
123216

124-
(Settings as any).__AzureAISearchVectorStoreInstance__ =
125-
new AzureAISearchVectorStore({
126-
// Use either a key or a credential based on the environment
127-
...azureAiSearchVectorStoreAuth,
128-
endpoint,
129-
indexName,
130-
idFieldKey,
131-
chunkFieldKey,
132-
embeddingFieldKey,
133-
metadataStringFieldKey,
134-
docIdFieldKey,
135-
serviceApiVersion: "2024-09-01-preview",
136-
// FIXME: import IndexManagement.CREATE_IF_NOT_EXISTS from 'llamaindex'
137-
// indexManagement: IndexManagement.CREATE_IF_NOT_EXISTS,
138-
indexManagement: "CreateIfNotExists" as IndexManagement,
139-
embeddingDimensionality: Number(process.env.AZURE_AI_SEARCH_EMBEDDING_DIMENSIONALITY) ?? 1536,
140-
languageAnalyzer: KnownAnalyzerNames.EnLucene,
141-
// store vectors on disk
142-
vectorAlgorithmType: KnownVectorSearchAlgorithmKind.ExhaustiveKnn,
143-
});
217+
(Settings as any).__AzureAISearchVectorStoreInstance__ = new AzureAISearchVectorStore(azureAiSearchOptions);
144218
};

app/api/chat/route.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,22 @@ import { createCallbackManager } from "./llamaindex/streaming/events";
1313
import { generateNextQuestions } from "./llamaindex/streaming/suggestion";
1414

1515
initObservability();
16-
initSettings();
1716

1817
export const runtime = "nodejs";
1918
export const dynamic = "force-dynamic";
2019

20+
async function setVectorStoreInstance() {
21+
if (!(Settings as any).__AzureAISearchVectorStoreInstance__) {
22+
await initSettings();
23+
}
24+
}
25+
2126
export async function POST(request: NextRequest) {
2227
// Init Vercel AI StreamData and timeout
2328
const vercelStreamData = new StreamData();
2429

2530
try {
31+
await setVectorStoreInstance();
2632
const body = await request.json();
2733
const { messages, data }: { messages: Message[]; data?: any } = body;
2834
if (!isValidMessages(messages)) {

0 commit comments

Comments
 (0)