Skip to content

Commit 35ae3ee

Browse files
committed
Update create cserve example
1 parent 275b511 commit 35ae3ee

File tree

1 file changed

+43
-31
lines changed

1 file changed

+43
-31
lines changed

examples/sdk/create_cserve.py

Lines changed: 43 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,46 @@
11
import time
22
import centml
33
from centml.sdk.api import get_centml_client
4-
from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest
5-
6-
with get_centml_client() as cclient:
7-
# Get fastest recipe for the Qwen model
8-
fastest = cclient.get_cserve_recipe(model="Qwen/Qwen2-VL-7B-Instruct")[0].fastest
9-
10-
# Modify the recipe if necessary
11-
fastest.recipe.additional_properties["max_num_seqs"] = 512
12-
13-
# Create CServeV2 deployment
14-
request = CreateCServeV2DeploymentRequest(
15-
name="qwen-fastest",
16-
cluster_id=cclient.get_cluster_id(fastest.hardware_instance_id),
17-
hardware_instance_id=fastest.hardware_instance_id,
18-
recipe=fastest.recipe,
19-
min_scale=1,
20-
max_scale=1,
21-
env_vars={},
22-
)
23-
response = cclient.create_cserve(request)
24-
print("Create deployment response: ", response)
25-
26-
# Get deployment details
27-
deployment = cclient.get_cserve(response.id)
28-
print("Deployment details: ", deployment)
29-
30-
# Pause the deployment
31-
cclient.pause(deployment.id)
32-
33-
# Delete the deployment
34-
cclient.delete(deployment.id)
4+
from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest, CServeV2Recipe
5+
6+
def get_fastest_cserve_config(model):
7+
return cclient.get_cserve_recipe(model=model)[0].fastest
8+
9+
def get_default_cserve_config(model):
10+
return CServeV2Recipe(model=model)
11+
12+
def main():
13+
with get_centml_client() as cclient:
14+
# Get fastest recipe for the Qwen model
15+
qwen_config = get_fastest_config(model="Qwen/Qwen2-VL-7B-Instruct")
16+
17+
# Modify the recipe if necessary
18+
qwen_config.recipe.additional_properties["max_num_seqs"] = 512
19+
20+
# Create CServeV2 deployment
21+
request = CreateCServeV2DeploymentRequest(
22+
name="qwen-fastest",
23+
cluster_id=cclient.get_cluster_id(qwen_config.hardware_instance_id),
24+
hardware_instance_id=qwen_config.hardware_instance_id,
25+
recipe=qwen_config.recipe,
26+
min_scale=1,
27+
max_scale=1,
28+
env_vars={},
29+
)
30+
response = cclient.create_cserve(request)
31+
print("Create deployment response: ", response)
32+
33+
# Get deployment details
34+
deployment = cclient.get_cserve(response.id)
35+
print("Deployment details: ", deployment)
36+
37+
'''
38+
# Pause the deployment
39+
cclient.pause(deployment.id)
40+
41+
# Delete the deployment
42+
cclient.delete(deployment.id)
43+
'''
44+
45+
if __name__ == "__main__":
46+
main()

0 commit comments

Comments
 (0)