Skip to content

Commit 4bf1924

Browse files
authored
[ENH] Add Schema to js client (#5621)
## Description of changes _Summarize the changes made by this PR._ - Improvements & Bug fixes - This PR adds schema support to the js client, along with tests, and logic to embed sparse vectors using efs, using the schema for dense vecs, and tests to ensure serialization and deserialization work - New functionality - ... ## Test plan _How are these changes tested?_ added schema unit tests matching python ones - [ x] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust ## Migration plan _Are there any migrations, or any forwards/backwards compatibility changes needed in order to make sure this change deploys reliably?_ ## Observability plan _What is the plan to instrument and monitor this change?_ ## Documentation Changes _Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the [docs section](https://github.com/chroma-core/chroma/tree/main/docs/docs.trychroma.com)?_
1 parent 4135947 commit 4bf1924

File tree

7 files changed

+2680
-60
lines changed

7 files changed

+2680
-60
lines changed

clients/new-js/packages/chromadb/src/chroma-client.ts

Lines changed: 80 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,23 @@ import {
2222
CreateCollectionConfiguration,
2323
processCreateCollectionConfig,
2424
} from "./collection-configuration";
25+
import { EMBEDDING_KEY, Schema } from "./schema";
26+
27+
const resolveSchemaEmbeddingFunction = (
28+
schema: Schema | undefined,
29+
): EmbeddingFunction | undefined => {
30+
if (!schema) {
31+
return undefined;
32+
}
33+
34+
const embeddingOverride =
35+
schema.keys[EMBEDDING_KEY]?.floatList?.vectorIndex?.config.embeddingFunction ?? undefined;
36+
if (embeddingOverride) {
37+
return embeddingOverride;
38+
}
39+
40+
return schema.defaults.floatList?.vectorIndex?.config.embeddingFunction ?? undefined;
41+
};
2542

2643
/**
2744
* Configuration options for the ChromaClient.
@@ -217,22 +234,27 @@ export class ChromaClient {
217234
});
218235

219236
return Promise.all(
220-
data.map(
221-
async (collection) =>
222-
new CollectionImpl({
223-
chromaClient: this,
224-
apiClient: this.apiClient,
225-
name: collection.name,
226-
id: collection.id,
227-
embeddingFunction: await getEmbeddingFunction(
228-
collection.name,
229-
collection.configuration_json.embedding_function ?? undefined,
230-
),
231-
configuration: collection.configuration_json,
232-
metadata:
233-
deserializeMetadata(collection.metadata ?? undefined) ?? undefined,
234-
}),
235-
),
237+
data.map(async (collection) => {
238+
const schema = Schema.deserializeFromJSON(collection.schema ?? undefined);
239+
const schemaEmbeddingFunction = resolveSchemaEmbeddingFunction(schema);
240+
const resolvedEmbeddingFunction =
241+
getEmbeddingFunction(
242+
collection.name,
243+
collection.configuration_json.embedding_function ?? undefined,
244+
) ?? schemaEmbeddingFunction;
245+
246+
return new CollectionImpl({
247+
chromaClient: this,
248+
apiClient: this.apiClient,
249+
name: collection.name,
250+
id: collection.id,
251+
embeddingFunction: resolvedEmbeddingFunction,
252+
configuration: collection.configuration_json,
253+
metadata:
254+
deserializeMetadata(collection.metadata ?? undefined) ?? undefined,
255+
schema,
256+
});
257+
}),
236258
);
237259
}
238260

@@ -264,11 +286,13 @@ export class ChromaClient {
264286
configuration,
265287
metadata,
266288
embeddingFunction,
289+
schema,
267290
}: {
268291
name: string;
269292
configuration?: CreateCollectionConfiguration;
270293
metadata?: CollectionMetadata;
271294
embeddingFunction?: EmbeddingFunction | null;
295+
schema?: Schema;
272296
}): Promise<Collection> {
273297
const collectionConfig = await processCreateCollectionConfig({
274298
configuration,
@@ -284,22 +308,29 @@ export class ChromaClient {
284308
configuration: collectionConfig,
285309
metadata: serializeMetadata(metadata),
286310
get_or_create: false,
311+
schema: schema ? schema.serializeToJSON() : undefined,
287312
},
288313
});
289314

315+
const serverSchema = Schema.deserializeFromJSON(data.schema ?? undefined);
316+
const schemaEmbeddingFunction = resolveSchemaEmbeddingFunction(serverSchema);
317+
const resolvedEmbeddingFunction =
318+
embeddingFunction ??
319+
getEmbeddingFunction(
320+
data.name,
321+
data.configuration_json.embedding_function ?? undefined,
322+
) ??
323+
schemaEmbeddingFunction;
324+
290325
return new CollectionImpl({
291326
chromaClient: this,
292327
apiClient: this.apiClient,
293328
name,
294329
configuration: data.configuration_json,
295330
metadata: deserializeMetadata(data.metadata ?? undefined) ?? undefined,
296-
embeddingFunction:
297-
embeddingFunction ??
298-
(await getEmbeddingFunction(
299-
data.name,
300-
data.configuration_json.embedding_function ?? undefined,
301-
)),
331+
embeddingFunction: resolvedEmbeddingFunction,
302332
id: data.id,
333+
schema: serverSchema,
303334
});
304335
}
305336

@@ -323,19 +354,25 @@ export class ChromaClient {
323354
path: { ...(await this._path()), collection_id: name },
324355
});
325356

357+
const schema = Schema.deserializeFromJSON(data.schema ?? undefined);
358+
const schemaEmbeddingFunction = resolveSchemaEmbeddingFunction(schema);
359+
const resolvedEmbeddingFunction =
360+
embeddingFunction ??
361+
getEmbeddingFunction(
362+
data.name,
363+
data.configuration_json.embedding_function ?? undefined,
364+
) ??
365+
schemaEmbeddingFunction;
366+
326367
return new CollectionImpl({
327368
chromaClient: this,
328369
apiClient: this.apiClient,
329370
name,
330371
configuration: data.configuration_json,
331372
metadata: deserializeMetadata(data.metadata ?? undefined) ?? undefined,
332-
embeddingFunction: embeddingFunction
333-
? embeddingFunction
334-
: await getEmbeddingFunction(
335-
data.name,
336-
data.configuration_json.embedding_function ?? undefined,
337-
),
373+
embeddingFunction: resolvedEmbeddingFunction,
338374
id: data.id,
375+
schema,
339376
});
340377
}
341378

@@ -382,11 +419,13 @@ export class ChromaClient {
382419
configuration,
383420
metadata,
384421
embeddingFunction,
422+
schema,
385423
}: {
386424
name: string;
387425
configuration?: CreateCollectionConfiguration;
388426
metadata?: CollectionMetadata;
389427
embeddingFunction?: EmbeddingFunction | null;
428+
schema?: Schema;
390429
}): Promise<Collection> {
391430
const collectionConfig = await processCreateCollectionConfig({
392431
configuration,
@@ -402,22 +441,29 @@ export class ChromaClient {
402441
configuration: collectionConfig,
403442
metadata: serializeMetadata(metadata),
404443
get_or_create: true,
444+
schema: schema ? schema.serializeToJSON() : undefined,
405445
},
406446
});
407447

448+
const serverSchema = Schema.deserializeFromJSON(data.schema ?? undefined);
449+
const schemaEmbeddingFunction = resolveSchemaEmbeddingFunction(serverSchema);
450+
const resolvedEmbeddingFunction =
451+
embeddingFunction ??
452+
getEmbeddingFunction(
453+
name,
454+
data.configuration_json.embedding_function ?? undefined,
455+
) ??
456+
schemaEmbeddingFunction;
457+
408458
return new CollectionImpl({
409459
chromaClient: this,
410460
apiClient: this.apiClient,
411461
name,
412462
configuration: data.configuration_json,
413463
metadata: deserializeMetadata(data.metadata ?? undefined) ?? undefined,
414-
embeddingFunction:
415-
embeddingFunction ??
416-
(await getEmbeddingFunction(
417-
name,
418-
data.configuration_json.embedding_function ?? undefined,
419-
)),
464+
embeddingFunction: resolvedEmbeddingFunction,
420465
id: data.id,
466+
schema: serverSchema,
421467
});
422468
}
423469

clients/new-js/packages/chromadb/src/collection-configuration.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,10 @@ export const processUpdateCollectionConfig = async ({
164164

165165
const embeddingFunction =
166166
currentEmbeddingFunction ||
167-
(await getEmbeddingFunction(
167+
getEmbeddingFunction(
168168
collectionName,
169169
currentConfiguration.embeddingFunction ?? undefined,
170-
));
170+
);
171171

172172
const newEmbeddingFunction = newConfiguration.embeddingFunction;
173173

0 commit comments

Comments
 (0)