Skip to content

Commit 05eaa5c

Browse files
authored
feat: [RET-2633] add readiness method to clients (#25)
* feat(grpc): add health check service and generated protobuf files * feat(grpc): add health check methods to Requester and RequesterAsync in Python * feat(grpc): add health check method to Requester interface and implementations * feat(grpc): add health check tests for Python * feat(grpc): add health check methods and tests for clients * docs(grpc): add health check calls to Embedder, Ranker, and Chunker clients * feat(grpc): add health check method to Chunker, Embedder, and Ranker in TS * feat(grpc): add health check method to Chunker, Embedder, and Ranker in Go * refactor(python): use readiness instead * feat(grpc): implement readiness checks for Chunker, Embedder, and Ranker interfaces and update tests for Go * feat(grpc): add readiness checks for Chunker, Embedder, and Ranker models and update tests for TS * docs(ts): add model readiness checks to Embedder, Chunker, and Ranker examples * feat(grpc): add server ready check to Requester in Python * feat(grpc): add server ready check to Requester in Go * feat(grpc): add server ready check to Requester in TS * docs: add embedder example for Go * test: add readiness checks for chunker_embedder in Go and TS * test: update embedder health check to readiness check in Python * refactor: move name formatting logic to separate function * test: add ModelReady JSON stub for GRPCInferenceService * fix: enhance error messages for model readiness checks with model name and version * test: fix stubs and tests
1 parent 8b1016d commit 05eaa5c

File tree

7 files changed

+148
-15
lines changed

7 files changed

+148
-15
lines changed

README.md

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,18 @@ async function runEmbedderExample() {
5050
},
5151
});
5252

53+
// Will throw an error if server is not ready
54+
await myEmbedder.requester.health();
55+
56+
// Will throw an error if model is not ready
57+
await myEmbedder.ready(
58+
process.env.CLINIA_MODEL_NAME,
59+
process.env.CLINIA_MODEL_VERSION,
60+
);
61+
5362
const result = await myEmbedder.embed(
54-
'embedder_medical_journals_qa',
55-
'20250306T064951Z',
63+
process.env.CLINIA_MODEL_NAME,
64+
process.env.CLINIA_MODEL_VERSION,
5665
{
5766
texts: ['Clinia is based in Montreal'],
5867
id: 'request-123',
@@ -79,9 +88,18 @@ async function runChunkerExample() {
7988
},
8089
});
8190

91+
// Will throw an error if server is not ready
92+
await myChunker.requester.health();
93+
94+
// Will throw an error if model is not ready
95+
await myChunker.ready(
96+
process.env.CLINIA_MODEL_NAME,
97+
process.env.CLINIA_MODEL_VERSION,
98+
);
99+
82100
const result = await myChunker.chunk(
83-
'chunker',
84-
'20250306T064951Z',
101+
process.env.CLINIA_MODEL_NAME,
102+
process.env.CLINIA_MODEL_VERSION,
85103
{
86104
texts: ['Clinia is based in Montreal'],
87105
id: 'request-123',
@@ -108,9 +126,18 @@ async function runRankerExample() {
108126
},
109127
});
110128

129+
// Will throw an error if server is not ready
130+
await myRanker.requester.health();
131+
132+
// Will throw an error if model is not ready
133+
await myRanker.ready(
134+
process.env.CLINIA_MODEL_NAME,
135+
process.env.CLINIA_MODEL_VERSION,
136+
);
137+
111138
const result = await myRanker.rank(
112-
'ranker_medical_journals_qa',
113-
'20250306T064951Z',
139+
process.env.CLINIA_MODEL_NAME,
140+
process.env.CLINIA_MODEL_VERSION,
114141
{
115142
query: 'hello, how are you?',
116143
texts: ['Clinia is based in Montreal'],

packages/models-client-chunker/src/chunker.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,13 @@ const CHUNK_INPUT_DATATYPE: Datatype = 'BYTES';
3232
export class Chunker {
3333
private _requester: Requester;
3434

35+
/**
36+
* Get the underlying requester instance.
37+
*/
38+
get requester(): Requester {
39+
return this._requester;
40+
}
41+
3542
constructor(options: ClientOptions) {
3643
this._requester = options.requester;
3744
}
@@ -93,4 +100,15 @@ export class Chunker {
93100
chunks,
94101
};
95102
}
103+
104+
/**
105+
* Checks the readiness status of the model.
106+
* @throws {Error} If the model is not ready.
107+
*/
108+
async ready(
109+
modelName: string,
110+
modelVersion: string,
111+
): Promise<void> {
112+
await this._requester.ready(modelName, modelVersion);
113+
}
96114
}

packages/models-client-common/src/requester.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ export interface Requester {
1717
inputs: Input[],
1818
): AsyncIterable<string>;
1919

20+
ready(
21+
modelName: string,
22+
modelVersion: string,
23+
): Promise<void>;
24+
25+
health(): Promise<void>;
26+
2027
close(): void;
2128
}
2229

packages/models-client-embedder/src/embedder.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,13 @@ const EMBEDDER_INPUT_DATATYPE: Datatype = 'BYTES';
3535
export class Embedder {
3636
private _requester: Requester;
3737

38+
/**
39+
* Get the underlying requester instance.
40+
*/
41+
get requester(): Requester {
42+
return this._requester;
43+
}
44+
3845
/**
3946
* Creates an instance of Embedder.
4047
* @param options - The client options containing the requester.
@@ -88,4 +95,15 @@ export class Embedder {
8895
embeddings,
8996
};
9097
}
98+
99+
/**
100+
* Checks the readiness status of the model.
101+
* @throws {Error} If the model is not ready.
102+
*/
103+
async ready(
104+
modelName: string,
105+
modelVersion: string,
106+
): Promise<void> {
107+
await this._requester.ready(modelName, modelVersion);
108+
}
91109
}

packages/models-client-ranker/src/ranker.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,13 @@ export type RankResponse = {
4444
export class Ranker {
4545
private _requester: Requester;
4646

47+
/**
48+
* Get the underlying requester instance.
49+
*/
50+
get requester(): Requester {
51+
return this._requester;
52+
}
53+
4754
/**
4855
* Creates an instance of Ranker.
4956
* @param options - The client options containing the requester.
@@ -121,4 +128,15 @@ export class Ranker {
121128
scores: new Float32Array(flattenedScores),
122129
};
123130
}
131+
132+
/**
133+
* Checks the readiness status of the model.
134+
* @throws {Error} If the model is not ready.
135+
*/
136+
async ready(
137+
modelName: string,
138+
modelVersion: string,
139+
): Promise<void> {
140+
await this._requester.ready(modelName, modelVersion);
141+
}
124142
}

packages/models-requester-grpc/src/preprocess.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,15 @@ const serializeByteTensor = (inputTensor: Uint8Array[]): Uint8Array => {
5757

5858
return new Uint8Array(flattenedBytesBuffer);
5959
};
60+
61+
/**
62+
* Format model name and version for the request.
63+
* The model version is always set to 1 because all models deployed within the same Triton
64+
* server instance -- when stored in different model repositories -- must have unique names.
65+
*/
66+
export const formatModelNameAndVersion = (
67+
modelName: string,
68+
modelVersion: string,
69+
): [string, string] => {
70+
return [`${modelName}:${modelVersion}`, '1'];
71+
};

packages/models-requester-grpc/src/requester.ts

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,21 @@ import {
99
import { type Transport, createClient, type Client } from '@connectrpc/connect';
1010
import {
1111
GRPCInferenceService,
12+
type ModelReadyRequest,
13+
type ServerReadyRequest,
1214
type ModelInferRequest,
1315
type ModelInferRequest_InferInputTensor,
1416
type ModelInferRequest_InferRequestedOutputTensor,
1517
type ModelInferResponse,
1618
} from './gen/grpc_service_pb';
17-
import { preprocess } from './preprocess';
19+
import { preprocess, formatModelNameAndVersion } from './preprocess';
1820
import { postprocessBytes, postprocessFp32 } from './postprocess';
1921

2022
export class GrpcRequester implements Requester {
21-
private _client: Client<typeof GRPCInferenceService>;
23+
private _inferenceClient: Client<typeof GRPCInferenceService>;
2224

2325
constructor(transport: Transport) {
24-
this._client = createClient(GRPCInferenceService, transport);
26+
this._inferenceClient = createClient(GRPCInferenceService, transport);
2527
}
2628

2729
private buildRequest(
@@ -65,10 +67,9 @@ export class GrpcRequester implements Requester {
6567
}),
6668
);
6769

68-
// NOTE: The model version is always set to 1 because all models deployed within the same Triton server instance -- when stored in different model repositories -- must have unique names.
6970
return {
70-
modelName: `${modelName}:${modelVersion}`,
71-
modelVersion: '1',
71+
modelName: modelName,
72+
modelVersion: modelVersion,
7273
id: id,
7374
inputs: grpcInputs,
7475
outputs: grpcOutputs,
@@ -119,14 +120,18 @@ export class GrpcRequester implements Requester {
119120
outputKeys: string[],
120121
id: string,
121122
): Promise<Output[]> {
123+
// Format model name and version
124+
const [formattedModelName, formattedModelVersion] =
125+
formatModelNameAndVersion(modelName, modelVersion);
126+
122127
const req = this.buildRequest(
123-
modelName,
124-
modelVersion,
128+
formattedModelName,
129+
formattedModelVersion,
125130
inputs,
126131
outputKeys,
127132
id,
128133
);
129-
const res = await this._client.modelInfer(req);
134+
const res = await this._inferenceClient.modelInfer(req);
130135

131136
// TODO: Check resp ID
132137
if (res.id !== id) {
@@ -142,6 +147,34 @@ export class GrpcRequester implements Requester {
142147
return this.processResponse(res);
143148
}
144149

150+
async ready(modelName: string, modelVersion: string): Promise<void> {
151+
// Format model name and version
152+
const [formattedModelName, formattedModelVersion] =
153+
formatModelNameAndVersion(modelName, modelVersion);
154+
155+
const request: ModelReadyRequest = {
156+
$typeName: 'inference.ModelReadyRequest',
157+
name: formattedModelName,
158+
version: formattedModelVersion,
159+
};
160+
const res = await this._inferenceClient.modelReady(request);
161+
if (!res.ready) {
162+
throw new Error(
163+
`Model ${modelName} with version ${modelVersion} is not ready`,
164+
);
165+
}
166+
}
167+
168+
async health(): Promise<void> {
169+
const request: ServerReadyRequest = {
170+
$typeName: 'inference.ServerReadyRequest',
171+
};
172+
const res = await this._inferenceClient.serverReady(request);
173+
if (!res.ready) {
174+
throw new Error('Server is not ready');
175+
}
176+
}
177+
145178
stream(
146179
modelName: string,
147180
modelVersion: string,

0 commit comments

Comments
 (0)