Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions centml/cli/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,13 @@ def get(type, id):
click.echo(
tabulate(
[
("Hugging face model", deployment.model),
("Hugging face model", deployment.recipe.model),
(
"Parallelism",
{"tensor": deployment.tensor_parallel_size, "pipeline": deployment.pipeline_parallel_size},
{
"tensor": deployment.recipe.additional_properties['tensor_parallel_size'],
"pipeline": deployment.recipe.additional_properties['pipeline_parallel_size']
},
),
("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}),
("Max concurrency", deployment.concurrency or "None"),
Expand Down
19 changes: 13 additions & 6 deletions centml/sdk/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
DeploymentStatus,
CreateInferenceDeploymentRequest,
CreateComputeDeploymentRequest,
CreateCServeDeploymentRequest,
CreateCServeV2DeploymentRequest,
)

from centml.sdk import auth
Expand All @@ -32,16 +32,16 @@ def get_compute(self, id):
return self._api.get_compute_deployment_deployments_compute_deployment_id_get(id)

def get_cserve(self, id):
return self._api.get_cserve_deployment_deployments_cserve_deployment_id_get(id)
return self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(id)

def create_inference(self, request: CreateInferenceDeploymentRequest):
return self._api.create_inference_deployment_deployments_inference_post(request)

def create_compute(self, request: CreateComputeDeploymentRequest):
return self._api.create_compute_deployment_deployments_compute_post(request)

def create_cserve(self, request: CreateCServeDeploymentRequest):
return self._api.create_cserve_deployment_deployments_cserve_post(request)
def create_cserve(self, request: CreateCServeV2DeploymentRequest):
return self._api.create_cserve_v2_deployment_deployments_cserve_v2_post(request)

def _update_status(self, id, new_status):
status_req = platform_api_python_client.DeploymentStatusRequest(status=new_status)
Expand All @@ -67,8 +67,15 @@ def get_hardware_instances(self, cluster_id=None):
def get_prebuilt_images(self, depl_type: DeploymentType):
return self._api.get_prebuilt_images_prebuilt_images_get(type=depl_type)

def get_cserve_recipe(self):
return self._api.get_cserve_recipe_deployments_cserve_recipes_get().results
def get_cserve_recipe(self, model=None, hf_token=None):
return self._api.get_cserve_recipe_deployments_cserve_recipes_get(model=model, hf_token=hf_token).results

def get_cluster_id(self, hardware_instance_id):
hardware_instance = list(filter(lambda h: h.id==hardware_instance_id, self.get_hardware_instances()))[0]
if hardware_instance:
return hardware_instance.cluster_id
else:
raise Exception(f"Invalid hardware instance id {hardware_instance_id}")


@contextmanager
Expand Down
34 changes: 34 additions & 0 deletions examples/sdk/create_cserve.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import time
import centml
from centml.sdk.api import get_centml_client
from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest

with get_centml_client() as cclient:
# Get fastest recipe for the Qwen model
fastest = cclient.get_cserve_recipe(model="Qwen/Qwen2-VL-7B-Instruct")[0].fastest

# Modify the recipe if necessary
fastest.recipe.additional_properties["max_num_seqs"] = 512

# Create CServeV2 deployment
request = CreateCServeV2DeploymentRequest(
name="qwen-fastest",
cluster_id=cclient.get_cluster_id(fastest.hardware_instance_id),
hardware_instance_id=fastest.hardware_instance_id,
recipe=fastest.recipe,
min_scale=1,
max_scale=1,
env_vars={},
)
response = cclient.create_cserve(request)
print("Create deployment response: ", response)

# Get deployment details
deployment = cclient.get_cserve(response.id)
print("Deployment details: ", deployment)

# Pause the deployment
cclient.pause(deployment.id)

# Delete the deployment
cclient.delete(deployment.id)
Loading