Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 80 additions & 34 deletions clients/new-js/packages/chromadb/src/chroma-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,23 @@ import {
CreateCollectionConfiguration,
processCreateCollectionConfig,
} from "./collection-configuration";
import { EMBEDDING_KEY, Schema } from "./schema";

const resolveSchemaEmbeddingFunction = (
schema: Schema | undefined,
): EmbeddingFunction | undefined => {
if (!schema) {
return undefined;
}

const embeddingOverride =
schema.keys[EMBEDDING_KEY]?.floatList?.vectorIndex?.config.embeddingFunction ?? undefined;
if (embeddingOverride) {
return embeddingOverride;
}

return schema.defaults.floatList?.vectorIndex?.config.embeddingFunction ?? undefined;
};
Comment on lines +27 to +41
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[BestPractice]

The logic in resolveSchemaEmbeddingFunction is duplicated in clients/new-js/packages/chromadb/src/collection.ts (lines 473-487) as getSchemaEmbeddingFunction. To adhere to the DRY principle, this logic should be extracted into a single, shared function. A good location for this would be the new schema.ts file, as it's directly related to schema interpretation and follows ChromaDB's TypeScript architectural patterns.

Additionally, the use of ?? undefined is redundant since optional chaining (?.) already results in undefined if any part of the chain is nullish.

Here's a suggested implementation for a shared function in schema.ts:

// in clients/new-js/packages/chromadb/src/schema.ts
export const resolveSchemaEmbeddingFunction = (
  schema: Schema | undefined,
): EmbeddingFunction | undefined => {
  if (!schema) {
    return undefined;
  }

  return (
    schema.keys[EMBEDDING_KEY]?.floatList?.vectorIndex?.config.embeddingFunction ??
    schema.defaults.floatList?.vectorIndex?.config.embeddingFunction
  );
};

This refactoring aligns with ChromaDB's TypeScript patterns where embedding functions are modular and reusable components. The new function can then be imported and used in both chroma-client.ts and collection.ts, removing the duplicated implementations and improving maintainability.

Context for Agents
[**BestPractice**]

The logic in `resolveSchemaEmbeddingFunction` is duplicated in `clients/new-js/packages/chromadb/src/collection.ts` (lines 473-487) as `getSchemaEmbeddingFunction`. To adhere to the DRY principle, this logic should be extracted into a single, shared function. A good location for this would be the new `schema.ts` file, as it's directly related to schema interpretation and follows ChromaDB's TypeScript architectural patterns.

Additionally, the use of `?? undefined` is redundant since optional chaining (`?.`) already results in `undefined` if any part of the chain is nullish.

Here's a suggested implementation for a shared function in `schema.ts`:

```typescript
// in clients/new-js/packages/chromadb/src/schema.ts
export const resolveSchemaEmbeddingFunction = (
  schema: Schema | undefined,
): EmbeddingFunction | undefined => {
  if (!schema) {
    return undefined;
  }

  return (
    schema.keys[EMBEDDING_KEY]?.floatList?.vectorIndex?.config.embeddingFunction ??
    schema.defaults.floatList?.vectorIndex?.config.embeddingFunction
  );
};
```

This refactoring aligns with ChromaDB's TypeScript patterns where embedding functions are modular and reusable components. The new function can then be imported and used in both `chroma-client.ts` and `collection.ts`, removing the duplicated implementations and improving maintainability.

File: clients/new-js/packages/chromadb/src/chroma-client.ts
Line: 41


/**
* Configuration options for the ChromaClient.
Expand Down Expand Up @@ -217,22 +234,27 @@ export class ChromaClient {
});

return Promise.all(
data.map(
async (collection) =>
new CollectionImpl({
chromaClient: this,
apiClient: this.apiClient,
name: collection.name,
id: collection.id,
embeddingFunction: await getEmbeddingFunction(
collection.name,
collection.configuration_json.embedding_function ?? undefined,
),
configuration: collection.configuration_json,
metadata:
deserializeMetadata(collection.metadata ?? undefined) ?? undefined,
}),
),
data.map(async (collection) => {
const schema = Schema.deserializeFromJSON(collection.schema ?? undefined);
const schemaEmbeddingFunction = resolveSchemaEmbeddingFunction(schema);
const resolvedEmbeddingFunction =
getEmbeddingFunction(
collection.name,
collection.configuration_json.embedding_function ?? undefined,
) ?? schemaEmbeddingFunction;

return new CollectionImpl({
chromaClient: this,
apiClient: this.apiClient,
name: collection.name,
id: collection.id,
embeddingFunction: resolvedEmbeddingFunction,
configuration: collection.configuration_json,
metadata:
deserializeMetadata(collection.metadata ?? undefined) ?? undefined,
schema,
});
}),
);
}

Expand Down Expand Up @@ -264,11 +286,13 @@ export class ChromaClient {
configuration,
metadata,
embeddingFunction,
schema,
}: {
name: string;
configuration?: CreateCollectionConfiguration;
metadata?: CollectionMetadata;
embeddingFunction?: EmbeddingFunction | null;
schema?: Schema;
}): Promise<Collection> {
const collectionConfig = await processCreateCollectionConfig({
configuration,
Expand All @@ -284,22 +308,29 @@ export class ChromaClient {
configuration: collectionConfig,
metadata: serializeMetadata(metadata),
get_or_create: false,
schema: schema ? schema.serializeToJSON() : undefined,
},
});

const serverSchema = Schema.deserializeFromJSON(data.schema ?? undefined);
const schemaEmbeddingFunction = resolveSchemaEmbeddingFunction(serverSchema);
const resolvedEmbeddingFunction =
embeddingFunction ??
getEmbeddingFunction(
data.name,
data.configuration_json.embedding_function ?? undefined,
) ??
schemaEmbeddingFunction;

return new CollectionImpl({
chromaClient: this,
apiClient: this.apiClient,
name,
configuration: data.configuration_json,
metadata: deserializeMetadata(data.metadata ?? undefined) ?? undefined,
embeddingFunction:
embeddingFunction ??
(await getEmbeddingFunction(
data.name,
data.configuration_json.embedding_function ?? undefined,
)),
embeddingFunction: resolvedEmbeddingFunction,
id: data.id,
schema: serverSchema,
});
}

Expand All @@ -323,19 +354,25 @@ export class ChromaClient {
path: { ...(await this._path()), collection_id: name },
});

const schema = Schema.deserializeFromJSON(data.schema ?? undefined);
const schemaEmbeddingFunction = resolveSchemaEmbeddingFunction(schema);
const resolvedEmbeddingFunction =
embeddingFunction ??
getEmbeddingFunction(
data.name,
data.configuration_json.embedding_function ?? undefined,
) ??
schemaEmbeddingFunction;

return new CollectionImpl({
chromaClient: this,
apiClient: this.apiClient,
name,
configuration: data.configuration_json,
metadata: deserializeMetadata(data.metadata ?? undefined) ?? undefined,
embeddingFunction: embeddingFunction
? embeddingFunction
: await getEmbeddingFunction(
data.name,
data.configuration_json.embedding_function ?? undefined,
),
embeddingFunction: resolvedEmbeddingFunction,
id: data.id,
schema,
});
}

Expand Down Expand Up @@ -382,11 +419,13 @@ export class ChromaClient {
configuration,
metadata,
embeddingFunction,
schema,
}: {
name: string;
configuration?: CreateCollectionConfiguration;
metadata?: CollectionMetadata;
embeddingFunction?: EmbeddingFunction | null;
schema?: Schema;
}): Promise<Collection> {
const collectionConfig = await processCreateCollectionConfig({
configuration,
Expand All @@ -402,22 +441,29 @@ export class ChromaClient {
configuration: collectionConfig,
metadata: serializeMetadata(metadata),
get_or_create: true,
schema: schema ? schema.serializeToJSON() : undefined,
},
});

const serverSchema = Schema.deserializeFromJSON(data.schema ?? undefined);
const schemaEmbeddingFunction = resolveSchemaEmbeddingFunction(serverSchema);
const resolvedEmbeddingFunction =
embeddingFunction ??
getEmbeddingFunction(
name,
data.configuration_json.embedding_function ?? undefined,
) ??
schemaEmbeddingFunction;

return new CollectionImpl({
chromaClient: this,
apiClient: this.apiClient,
name,
configuration: data.configuration_json,
metadata: deserializeMetadata(data.metadata ?? undefined) ?? undefined,
embeddingFunction:
embeddingFunction ??
(await getEmbeddingFunction(
name,
data.configuration_json.embedding_function ?? undefined,
)),
embeddingFunction: resolvedEmbeddingFunction,
id: data.id,
schema: serverSchema,
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,10 +164,10 @@ export const processUpdateCollectionConfig = async ({

const embeddingFunction =
currentEmbeddingFunction ||
(await getEmbeddingFunction(
getEmbeddingFunction(
collectionName,
currentConfiguration.embeddingFunction ?? undefined,
));
);

const newEmbeddingFunction = newConfiguration.embeddingFunction;

Expand Down
Loading