Skip to content

Commit 7d4aeb3

Browse files
committed
[ENH] Add Schema to js client
1 parent 06ef7de commit 7d4aeb3

File tree

6 files changed

+2131
-25
lines changed

6 files changed

+2131
-25
lines changed

clients/new-js/packages/chromadb/src/chroma-client.ts

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import {
2222
CreateCollectionConfiguration,
2323
processCreateCollectionConfig,
2424
} from "./collection-configuration";
25+
import { Schema } from "./schema";
2526

2627
/**
2728
* Configuration options for the ChromaClient.
@@ -217,21 +218,21 @@ export class ChromaClient {
217218
});
218219

219220
return Promise.all(
220-
data.map(
221-
async (collection) =>
222-
new CollectionImpl({
223-
chromaClient: this,
224-
apiClient: this.apiClient,
225-
name: collection.name,
226-
id: collection.id,
227-
embeddingFunction: await getEmbeddingFunction(
228-
collection.name,
229-
collection.configuration_json.embedding_function ?? undefined,
230-
),
231-
configuration: collection.configuration_json,
232-
metadata:
233-
deserializeMetadata(collection.metadata ?? undefined) ?? undefined,
234-
}),
221+
data.map(async (collection) =>
222+
new CollectionImpl({
223+
chromaClient: this,
224+
apiClient: this.apiClient,
225+
name: collection.name,
226+
id: collection.id,
227+
embeddingFunction: await getEmbeddingFunction(
228+
collection.name,
229+
collection.configuration_json.embedding_function ?? undefined,
230+
),
231+
configuration: collection.configuration_json,
232+
metadata:
233+
deserializeMetadata(collection.metadata ?? undefined) ?? undefined,
234+
schema: Schema.deserializeFromJSON(collection.schema ?? undefined),
235+
}),
235236
),
236237
);
237238
}
@@ -264,11 +265,13 @@ export class ChromaClient {
264265
configuration,
265266
metadata,
266267
embeddingFunction,
268+
schema,
267269
}: {
268270
name: string;
269271
configuration?: CreateCollectionConfiguration;
270272
metadata?: CollectionMetadata;
271273
embeddingFunction?: EmbeddingFunction | null;
274+
schema?: Schema;
272275
}): Promise<Collection> {
273276
const collectionConfig = await processCreateCollectionConfig({
274277
configuration,
@@ -284,9 +287,12 @@ export class ChromaClient {
284287
configuration: collectionConfig,
285288
metadata: serializeMetadata(metadata),
286289
get_or_create: false,
290+
schema: schema ? schema.serializeToJSON() : undefined,
287291
},
288292
});
289293

294+
const serverSchema = Schema.deserializeFromJSON(data.schema ?? undefined);
295+
290296
return new CollectionImpl({
291297
chromaClient: this,
292298
apiClient: this.apiClient,
@@ -300,6 +306,7 @@ export class ChromaClient {
300306
data.configuration_json.embedding_function ?? undefined,
301307
)),
302308
id: data.id,
309+
schema: serverSchema,
303310
});
304311
}
305312

@@ -323,6 +330,8 @@ export class ChromaClient {
323330
path: { ...(await this._path()), collection_id: name },
324331
});
325332

333+
const schema = Schema.deserializeFromJSON(data.schema ?? undefined);
334+
326335
return new CollectionImpl({
327336
chromaClient: this,
328337
apiClient: this.apiClient,
@@ -336,6 +345,7 @@ export class ChromaClient {
336345
data.configuration_json.embedding_function ?? undefined,
337346
),
338347
id: data.id,
348+
schema,
339349
});
340350
}
341351

@@ -382,11 +392,13 @@ export class ChromaClient {
382392
configuration,
383393
metadata,
384394
embeddingFunction,
395+
schema,
385396
}: {
386397
name: string;
387398
configuration?: CreateCollectionConfiguration;
388399
metadata?: CollectionMetadata;
389400
embeddingFunction?: EmbeddingFunction | null;
401+
schema?: Schema;
390402
}): Promise<Collection> {
391403
const collectionConfig = await processCreateCollectionConfig({
392404
configuration,
@@ -402,9 +414,12 @@ export class ChromaClient {
402414
configuration: collectionConfig,
403415
metadata: serializeMetadata(metadata),
404416
get_or_create: true,
417+
schema: schema ? schema.serializeToJSON() : undefined,
405418
},
406419
});
407420

421+
const serverSchema = Schema.deserializeFromJSON(data.schema ?? undefined);
422+
408423
return new CollectionImpl({
409424
chromaClient: this,
410425
apiClient: this.apiClient,
@@ -418,6 +433,7 @@ export class ChromaClient {
418433
data.configuration_json.embedding_function ?? undefined,
419434
)),
420435
id: data.id,
436+
schema: serverSchema,
421437
});
422438
}
423439

clients/new-js/packages/chromadb/src/collection.ts

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ import {
4040
UpdateCollectionConfiguration,
4141
} from "./collection-configuration";
4242
import { SearchLike, SearchResult, toSearch } from "./execution/expression";
43+
import { Schema, EMBEDDING_KEY } from "./schema";
4344

4445
/**
4546
* Interface for collection operations using collection ID.
@@ -56,6 +57,8 @@ export interface Collection {
5657
configuration: CollectionConfiguration;
5758
/** Optional embedding function. Must match the one used to create the collection. */
5859
embeddingFunction?: EmbeddingFunction;
60+
/** Collection schema describing index configuration */
61+
schema?: Schema;
5962
/** Gets the total number of records in the collection */
6063
count(): Promise<number>;
6164
/**
@@ -212,6 +215,8 @@ export interface CollectionArgs {
212215
configuration: CollectionConfiguration;
213216
/** Optional collection metadata */
214217
metadata?: CollectionMetadata;
218+
/** Optional schema returned by the server */
219+
schema?: Schema;
215220
}
216221

217222
/**
@@ -226,6 +231,7 @@ export class CollectionImpl implements Collection {
226231
private _metadata: CollectionMetadata | undefined;
227232
private _configuration: CollectionConfiguration;
228233
protected _embeddingFunction: EmbeddingFunction | undefined;
234+
protected _schema: Schema | undefined;
229235

230236
/**
231237
* Creates a new CollectionAPIImpl instance.
@@ -239,6 +245,7 @@ export class CollectionImpl implements Collection {
239245
metadata,
240246
configuration,
241247
embeddingFunction,
248+
schema,
242249
}: CollectionArgs) {
243250
this.chromaClient = chromaClient;
244251
this.apiClient = apiClient;
@@ -247,6 +254,7 @@ export class CollectionImpl implements Collection {
247254
this._metadata = metadata;
248255
this._configuration = configuration;
249256
this._embeddingFunction = embeddingFunction;
257+
this._schema = schema;
250258
}
251259

252260
public get name(): string {
@@ -283,6 +291,14 @@ export class CollectionImpl implements Collection {
283291
this._embeddingFunction = embeddingFunction;
284292
}
285293

294+
public get schema(): Schema | undefined {
295+
return this._schema;
296+
}
297+
298+
protected set schema(schema: Schema | undefined) {
299+
this._schema = schema;
300+
}
301+
286302
protected async path(): Promise<{
287303
tenant: string;
288304
database: string;
@@ -296,17 +312,36 @@ export class CollectionImpl implements Collection {
296312
}
297313

298314
private async embed(inputs: string[], isQuery: boolean): Promise<number[][]> {
299-
if (!this._embeddingFunction) {
315+
const embeddingFunction =
316+
this._embeddingFunction ?? this.getSchemaEmbeddingFunction();
317+
318+
if (!embeddingFunction) {
300319
throw new ChromaValueError(
301320
"Embedding function must be defined for operations requiring embeddings.",
302321
);
303322
}
304323

305-
if (this._embeddingFunction.generateForQueries && isQuery) {
306-
return await this._embeddingFunction.generateForQueries(inputs);
307-
} else {
308-
return await this._embeddingFunction.generate(inputs);
309-
}
324+
if (isQuery && embeddingFunction.generateForQueries) {
325+
return await embeddingFunction.generateForQueries(inputs);
326+
}
327+
328+
return await embeddingFunction.generate(inputs);
329+
}
330+
331+
private getSchemaEmbeddingFunction(): EmbeddingFunction | undefined {
332+
const schema = this._schema;
333+
if (!schema) return undefined;
334+
335+
const schemaOverride = schema.keys[EMBEDDING_KEY];
336+
const overrideFunction = schemaOverride?.floatList?.vectorIndex?.config
337+
.embeddingFunction;
338+
if (overrideFunction) {
339+
return overrideFunction;
340+
}
341+
342+
const defaultFunction = schema.defaults.floatList?.vectorIndex?.config
343+
.embeddingFunction;
344+
return defaultFunction ?? undefined;
310345
}
311346

312347
private async prepareRecords<T extends boolean = false>({

clients/new-js/packages/chromadb/src/embedding-function.ts

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,11 @@ export const knownSparseEmbeddingFunctions = new Map<
130130
SparseEmbeddingFunctionClass
131131
>();
132132

133+
/**
134+
* Union type covering both dense and sparse embedding functions.
135+
*/
136+
export type AnyEmbeddingFunction = EmbeddingFunction | SparseEmbeddingFunction;
137+
133138
/**
134139
* Registers an embedding function in the global registry.
135140
* @param name - Unique name for the embedding function
@@ -170,9 +175,9 @@ export const registerSparseEmbeddingFunction = (
170175
* Retrieves and instantiates an embedding function from configuration.
171176
* @param collectionName - Name of the collection (for error messages)
172177
* @param efConfig - Configuration for the embedding function
173-
* @returns Promise resolving to an EmbeddingFunction instance
178+
* @returns EmbeddingFunction instance or undefined if it cannot be constructed
174179
*/
175-
export const getEmbeddingFunction = async (
180+
export const getEmbeddingFunction = (
176181
collectionName: string,
177182
efConfig?: EmbeddingFunctionConfiguration,
178183
) => {
@@ -235,9 +240,9 @@ export const getEmbeddingFunction = async (
235240
* Retrieves and instantiates a sparse embedding function from configuration.
236241
* @param collectionName - Name of the collection (for error messages)
237242
* @param efConfig - Configuration for the sparse embedding function
238-
* @returns Promise resolving to a SparseEmbeddingFunction instance
243+
* @returns SparseEmbeddingFunction instance or undefined if it cannot be constructed
239244
*/
240-
export const getSparseEmbeddingFunction = async (
245+
export const getSparseEmbeddingFunction = (
241246
collectionName: string,
242247
efConfig?: EmbeddingFunctionConfiguration,
243248
) => {

clients/new-js/packages/chromadb/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ export * from "./cloud-client";
1616
export * from "./errors";
1717
export * from "./collection-configuration";
1818
export * from "./execution";
19+
export * from "./schema";

0 commit comments

Comments
 (0)