feat: [RET-2633] add readiness method to clients (#25)

dbuades · web-flow · commit 05eaa5cdf50b · 2025-04-10T16:15:57.000-04:00
* feat(grpc): add health check service and generated protobuf files

* feat(grpc): add health check methods to Requester and RequesterAsync in Python

* feat(grpc): add health check method to Requester interface and implementations

* feat(grpc): add health check tests for Python

* feat(grpc): add health check methods and tests for clients

* docs(grpc): add health check calls to Embedder, Ranker, and Chunker clients

* feat(grpc): add health check method to Chunker, Embedder, and Ranker in TS

* feat(grpc): add health check method to Chunker, Embedder, and Ranker in Go

* refactor(python): use readiness instead

* feat(grpc): implement readiness checks for Chunker, Embedder, and Ranker interfaces and update tests for Go

* feat(grpc): add readiness checks for Chunker, Embedder, and Ranker models and update tests for TS

* docs(ts): add model readiness checks to Embedder, Chunker, and Ranker examples

* feat(grpc): add server ready check to Requester in Python

* feat(grpc): add server ready check to Requester in Go

* feat(grpc): add server ready check to Requester in TS

* docs: add embedder example for Go

* test: add readiness checks for chunker_embedder in Go and TS

* test: update embedder health check to readiness check in Python

* refactor: move name formatting logic to separate function

* test: add ModelReady JSON stub for GRPCInferenceService

* fix: enhance error messages for model readiness checks with model name and version

* test: fix stubs and tests
diff --git a/README.md b/README.md
@@ -50,9 +50,18 @@ async function runEmbedderExample() {
     },
   });
 
+  // Will throw an error if server is not ready
+  await myEmbedder.requester.health();
+
+  // Will throw an error if model is not ready
+  await myEmbedder.ready(
+    process.env.CLINIA_MODEL_NAME,
+    process.env.CLINIA_MODEL_VERSION,
+  );
+
   const result = await myEmbedder.embed(
-    'embedder_medical_journals_qa',
-    '20250306T064951Z',
+    process.env.CLINIA_MODEL_NAME,
+    process.env.CLINIA_MODEL_VERSION,
     {
       texts: ['Clinia is based in Montreal'],
       id: 'request-123',
@@ -79,9 +88,18 @@ async function runChunkerExample() {
     },
   });
 
+  // Will throw an error if server is not ready
+  await myChunker.requester.health();
+
+  // Will throw an error if model is not ready
+  await myChunker.ready(
+    process.env.CLINIA_MODEL_NAME,
+    process.env.CLINIA_MODEL_VERSION,
+  );
+
   const result = await myChunker.chunk(
-    'chunker',
-    '20250306T064951Z',
+    process.env.CLINIA_MODEL_NAME,
+    process.env.CLINIA_MODEL_VERSION,
     {
       texts: ['Clinia is based in Montreal'],
       id: 'request-123',
@@ -108,9 +126,18 @@ async function runRankerExample() {
     },
   });
 
+  // Will throw an error if server is not ready
+  await myRanker.requester.health();
+
+  // Will throw an error if model is not ready
+  await myRanker.ready(
+    process.env.CLINIA_MODEL_NAME,
+    process.env.CLINIA_MODEL_VERSION,
+  );
+
   const result = await myRanker.rank(
-    'ranker_medical_journals_qa',
-    '20250306T064951Z',
+    process.env.CLINIA_MODEL_NAME,
+    process.env.CLINIA_MODEL_VERSION,
     {
       query: 'hello, how are you?',
       texts: ['Clinia is based in Montreal'],
diff --git a/packages/models-client-chunker/src/chunker.ts b/packages/models-client-chunker/src/chunker.ts
@@ -32,6 +32,13 @@ const CHUNK_INPUT_DATATYPE: Datatype = 'BYTES';
 export class Chunker {
   private _requester: Requester;
 
+  /**
+   * Get the underlying requester instance.
+   */
+  get requester(): Requester {
+    return this._requester;
+  }
+
   constructor(options: ClientOptions) {
     this._requester = options.requester;
   }
@@ -93,4 +100,15 @@ export class Chunker {
       chunks,
     };
   }
+
+  /**
+   * Checks the readiness status of the model.
+   * @throws {Error} If the model is not ready.
+  */
+  async ready(
+    modelName: string,
+    modelVersion: string,
+  ): Promise<void> {
+    await this._requester.ready(modelName, modelVersion);
+  }
 }
diff --git a/packages/models-client-common/src/requester.ts b/packages/models-client-common/src/requester.ts
@@ -17,6 +17,13 @@ export interface Requester {
     inputs: Input[],
   ): AsyncIterable<string>;
 
+  ready(
+    modelName: string,
+    modelVersion: string,
+  ): Promise<void>;
+
+  health(): Promise<void>;
+
   close(): void;
 }
 
diff --git a/packages/models-client-embedder/src/embedder.ts b/packages/models-client-embedder/src/embedder.ts
@@ -35,6 +35,13 @@ const EMBEDDER_INPUT_DATATYPE: Datatype = 'BYTES';
 export class Embedder {
   private _requester: Requester;
 
+  /**
+   * Get the underlying requester instance.
+   */
+  get requester(): Requester {
+    return this._requester;
+  }
+
   /**
    * Creates an instance of Embedder.
    * @param options - The client options containing the requester.
@@ -88,4 +95,15 @@ export class Embedder {
       embeddings,
     };
   }
+
+  /**
+   * Checks the readiness status of the model.
+   * @throws {Error} If the model is not ready.
+  */
+  async ready(
+    modelName: string,
+    modelVersion: string,
+  ): Promise<void> {
+    await this._requester.ready(modelName, modelVersion);
+  }
 }
diff --git a/packages/models-client-ranker/src/ranker.ts b/packages/models-client-ranker/src/ranker.ts
@@ -44,6 +44,13 @@ export type RankResponse = {
 export class Ranker {
   private _requester: Requester;
 
+  /**
+   * Get the underlying requester instance.
+   */
+  get requester(): Requester {
+    return this._requester;
+  }
+
   /**
    * Creates an instance of Ranker.
    * @param options - The client options containing the requester.
@@ -121,4 +128,15 @@ export class Ranker {
       scores: new Float32Array(flattenedScores),
     };
   }
+
+  /**
+   * Checks the readiness status of the model.
+   * @throws {Error} If the model is not ready.
+  */
+  async ready(
+    modelName: string,
+    modelVersion: string,
+  ): Promise<void> {
+    await this._requester.ready(modelName, modelVersion);
+  }
 }
diff --git a/packages/models-requester-grpc/src/preprocess.ts b/packages/models-requester-grpc/src/preprocess.ts
@@ -57,3 +57,15 @@ const serializeByteTensor = (inputTensor: Uint8Array[]): Uint8Array => {
 
   return new Uint8Array(flattenedBytesBuffer);
 };
+
+/**
+ * Format model name and version for the request.
+ * The model version is always set to 1 because all models deployed within the same Triton
+ * server instance -- when stored in different model repositories -- must have unique names.
+ */
+export const formatModelNameAndVersion = (
+  modelName: string,
+  modelVersion: string,
+): [string, string] => {
+  return [`${modelName}:${modelVersion}`, '1'];
+};
diff --git a/packages/models-requester-grpc/src/requester.ts b/packages/models-requester-grpc/src/requester.ts
@@ -9,19 +9,21 @@ import {
 import { type Transport, createClient, type Client } from '@connectrpc/connect';
 import {
   GRPCInferenceService,
+  type ModelReadyRequest,
+  type ServerReadyRequest,
   type ModelInferRequest,
   type ModelInferRequest_InferInputTensor,
   type ModelInferRequest_InferRequestedOutputTensor,
   type ModelInferResponse,
 } from './gen/grpc_service_pb';
-import { preprocess } from './preprocess';
+import { preprocess, formatModelNameAndVersion } from './preprocess';
 import { postprocessBytes, postprocessFp32 } from './postprocess';
 
 export class GrpcRequester implements Requester {
-  private _client: Client<typeof GRPCInferenceService>;
+  private _inferenceClient: Client<typeof GRPCInferenceService>;
 
   constructor(transport: Transport) {
-    this._client = createClient(GRPCInferenceService, transport);
+    this._inferenceClient = createClient(GRPCInferenceService, transport);
   }
 
   private buildRequest(
@@ -65,10 +67,9 @@ export class GrpcRequester implements Requester {
         }),
       );
 
-    // NOTE: The model version is always set to 1 because all models deployed within the same Triton server instance -- when stored in different model repositories -- must have unique names.
     return {
-      modelName: `${modelName}:${modelVersion}`,
-      modelVersion: '1',
+      modelName: modelName,
+      modelVersion: modelVersion,
       id: id,
       inputs: grpcInputs,
       outputs: grpcOutputs,
@@ -119,14 +120,18 @@ export class GrpcRequester implements Requester {
     outputKeys: string[],
     id: string,
   ): Promise<Output[]> {
+    // Format model name and version
+    const [formattedModelName, formattedModelVersion] =
+      formatModelNameAndVersion(modelName, modelVersion);
+
     const req = this.buildRequest(
-      modelName,
-      modelVersion,
+      formattedModelName,
+      formattedModelVersion,
       inputs,
       outputKeys,
       id,
     );
-    const res = await this._client.modelInfer(req);
+    const res = await this._inferenceClient.modelInfer(req);
 
     // TODO: Check resp ID
     if (res.id !== id) {
@@ -142,6 +147,34 @@ export class GrpcRequester implements Requester {
     return this.processResponse(res);
   }
 
+  async ready(modelName: string, modelVersion: string): Promise<void> {
+    // Format model name and version
+    const [formattedModelName, formattedModelVersion] =
+      formatModelNameAndVersion(modelName, modelVersion);
+
+    const request: ModelReadyRequest = {
+      $typeName: 'inference.ModelReadyRequest',
+      name: formattedModelName,
+      version: formattedModelVersion,
+    };
+    const res = await this._inferenceClient.modelReady(request);
+    if (!res.ready) {
+      throw new Error(
+        `Model ${modelName} with version ${modelVersion} is not ready`,
+      );
+    }
+  }
+
+  async health(): Promise<void> {
+    const request: ServerReadyRequest = {
+      $typeName: 'inference.ServerReadyRequest',
+    };
+    const res = await this._inferenceClient.serverReady(request);
+    if (!res.ready) {
+      throw new Error('Server is not ready');
+    }
+  }
+
   stream(
     modelName: string,
     modelVersion: string,