|
1 | 1 | import time |
2 | 2 | import centml |
3 | 3 | from centml.sdk.api import get_centml_client |
4 | | -from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest |
5 | | - |
6 | | -with get_centml_client() as cclient: |
7 | | - # Get fastest recipe for the Qwen model |
8 | | - fastest = cclient.get_cserve_recipe(model="Qwen/Qwen2-VL-7B-Instruct")[0].fastest |
9 | | - |
10 | | - # Modify the recipe if necessary |
11 | | - fastest.recipe.additional_properties["max_num_seqs"] = 512 |
12 | | - |
13 | | - # Create CServeV2 deployment |
14 | | - request = CreateCServeV2DeploymentRequest( |
15 | | - name="qwen-fastest", |
16 | | - cluster_id=cclient.get_cluster_id(fastest.hardware_instance_id), |
17 | | - hardware_instance_id=fastest.hardware_instance_id, |
18 | | - recipe=fastest.recipe, |
19 | | - min_scale=1, |
20 | | - max_scale=1, |
21 | | - env_vars={}, |
22 | | - ) |
23 | | - response = cclient.create_cserve(request) |
24 | | - print("Create deployment response: ", response) |
25 | | - |
26 | | - # Get deployment details |
27 | | - deployment = cclient.get_cserve(response.id) |
28 | | - print("Deployment details: ", deployment) |
29 | | - |
30 | | - # Pause the deployment |
31 | | - cclient.pause(deployment.id) |
32 | | - |
33 | | - # Delete the deployment |
34 | | - cclient.delete(deployment.id) |
| 4 | +from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest, CServeV2Recipe |
| 5 | + |
| 6 | +def get_fastest_cserve_config(model): |
| 7 | + return cclient.get_cserve_recipe(model=model)[0].fastest |
| 8 | + |
| 9 | +def get_default_cserve_config(model): |
| 10 | + return CServeV2Recipe(model=model) |
| 11 | + |
| 12 | +def main(): |
| 13 | + with get_centml_client() as cclient: |
| 14 | + # Get fastest recipe for the Qwen model |
| 15 | + qwen_config = get_fastest_config(model="Qwen/Qwen2-VL-7B-Instruct") |
| 16 | + |
| 17 | + # Modify the recipe if necessary |
| 18 | + qwen_config.recipe.additional_properties["max_num_seqs"] = 512 |
| 19 | + |
| 20 | + # Create CServeV2 deployment |
| 21 | + request = CreateCServeV2DeploymentRequest( |
| 22 | + name="qwen-fastest", |
| 23 | + cluster_id=cclient.get_cluster_id(qwen_config.hardware_instance_id), |
| 24 | + hardware_instance_id=qwen_config.hardware_instance_id, |
| 25 | + recipe=qwen_config.recipe, |
| 26 | + min_scale=1, |
| 27 | + max_scale=1, |
| 28 | + env_vars={}, |
| 29 | + ) |
| 30 | + response = cclient.create_cserve(request) |
| 31 | + print("Create deployment response: ", response) |
| 32 | + |
| 33 | + # Get deployment details |
| 34 | + deployment = cclient.get_cserve(response.id) |
| 35 | + print("Deployment details: ", deployment) |
| 36 | + |
| 37 | + ''' |
| 38 | + # Pause the deployment |
| 39 | + cclient.pause(deployment.id) |
| 40 | +
|
| 41 | + # Delete the deployment |
| 42 | + cclient.delete(deployment.id) |
| 43 | + ''' |
| 44 | + |
| 45 | +if __name__ == "__main__": |
| 46 | + main() |
0 commit comments