@@ -181,6 +181,120 @@ def get(name):
181181 )
182182
183183
184+ @click .command (help = "Create a new deployment" )
185+ @handle_exception
186+ def create ():
187+ with get_centml_client () as cclient :
188+ # Prompt for general fields
189+ name = click .prompt ("Enter a name for the deployment" )
190+ dtype_str = click .prompt (
191+ "Select a deployment type" ,
192+ type = click .Choice (list (depl_name_to_type_map .keys ())),
193+ show_choices = True
194+ )
195+ depl_type = depl_name_to_type_map [dtype_str ]
196+
197+ # Select cluster
198+ clusters = cclient .get_clusters ().results
199+ if not clusters :
200+ click .echo ("No clusters available. Please ensure you have a cluster setup." )
201+ return
202+ cluster_names = [c .name for c in clusters ]
203+ cluster_name = click .prompt (
204+ "Select a cluster" ,
205+ type = click .Choice (cluster_names ),
206+ show_choices = True
207+ )
208+ cluster_id = next (c .id for c in clusters if c .name == cluster_name )
209+
210+ # Hardware selection
211+ hw_resp = cclient .get_hardware_instances (cluster_id )
212+ if not hw_resp :
213+ click .echo ("No hardware instances available for this cluster." )
214+ return
215+ hw_names = [h .name for h in hw_resp ]
216+ hw_name = click .prompt (
217+ "Select a hardware instance" ,
218+ type = click .Choice (hw_names ),
219+ show_choices = True
220+ )
221+ hw_id = next (h .id for h in hw_resp if h .name == hw_name )
222+
223+ # Common fields
224+ min_scale = click .prompt ("Minimum number of replicas" , default = 1 , type = int )
225+ max_scale = click .prompt ("Maximum number of replicas" , default = 1 , type = int )
226+ concurrency = click .prompt ("Max concurrency (or leave blank)" , default = "" , show_default = False )
227+ concurrency = int (concurrency ) if concurrency else None
228+
229+ # Depending on type:
230+ if depl_type == DeploymentType .INFERENCE_V2 :
231+ image = click .prompt ("Enter the image URL" )
232+ container_port = click .prompt ("Enter the container port" , default = 8080 , type = int )
233+ healthcheck = click .prompt ("Enter healthcheck endpoint (default '/')" , default = "/" , show_default = True )
234+ env_vars_str = click .prompt ("Enter environment variables in KEY=VALUE format (comma separated) or leave blank" , default = "" , show_default = False )
235+ env_vars = {}
236+ if env_vars_str .strip ():
237+ for kv in env_vars_str .split ("," ):
238+ k , v = kv .strip ().split ("=" )
239+ env_vars [k ] = v
240+
241+ # Construct the inference request
242+ from platform_api_python_client import CreateInferenceDeploymentRequest
243+ req = CreateInferenceDeploymentRequest (
244+ name = name ,
245+ cluster_id = cluster_id ,
246+ hardware_instance_id = hw_id ,
247+ image_url = image ,
248+ container_port = container_port ,
249+ healthcheck = healthcheck ,
250+ min_scale = min_scale ,
251+ max_scale = max_scale ,
252+ concurrency = concurrency ,
253+ env_vars = env_vars if env_vars else None
254+ )
255+ created = cclient .create_inference (req )
256+ click .echo (f"Inference deployment created with ID: { created .id } " )
257+
258+ elif depl_type == DeploymentType .COMPUTE_V2 :
259+ # For compute deployments, we might ask for a public SSH key
260+ ssh_key = click .prompt ("Enter your public SSH key" , default = "" , show_default = False )
261+
262+ from platform_api_python_client import CreateComputeDeploymentRequest
263+ req = CreateComputeDeploymentRequest (
264+ name = name ,
265+ cluster_id = cluster_id ,
266+ hardware_instance_id = hw_id ,
267+ ssh_public_key = ssh_key if ssh_key .strip () else None
268+ )
269+ created = cclient .create_compute (req )
270+ click .echo (f"Compute deployment created with ID: { created .id } " )
271+
272+ elif depl_type == DeploymentType .CSERVE :
273+ # For cserve deployments, ask for model and parallelism
274+ model = click .prompt ("Enter the Hugging Face model" , default = "facebook/opt-1.3b" )
275+ tensor_parallel_size = click .prompt ("Tensor parallel size" , default = 1 , type = int )
276+ pipeline_parallel_size = click .prompt ("Pipeline parallel size" , default = 1 , type = int )
277+ # concurrency asked above
278+
279+ from platform_api_python_client import CreateCServeDeploymentRequest
280+ req = CreateCServeDeploymentRequest (
281+ name = name ,
282+ cluster_id = cluster_id ,
283+ hardware_instance_id = hw_id ,
284+ model = model ,
285+ tensor_parallel_size = tensor_parallel_size ,
286+ pipeline_parallel_size = pipeline_parallel_size ,
287+ min_scale = min_scale ,
288+ max_scale = max_scale ,
289+ concurrency = concurrency
290+ )
291+ created = cclient .create_cserve (req )
292+ click .echo (f"CServe deployment created with ID: { created .id } " )
293+
294+ else :
295+ click .echo ("Unknown deployment type." )
296+
297+
184298@click .command (help = "Delete a deployment" )
185299@click .argument ("id" , type = int )
186300@handle_exception
0 commit comments