@@ -33,10 +33,10 @@ def __init__(self,
33
33
StatusCode .TRITON_SERVER_ERR : ModelStatusCode .SERVER_ERR ,
34
34
StatusCode .TRITON_SESSION_CLOSED : ModelStatusCode .SESSION_CLOSED ,
35
35
StatusCode .TRITON_STREAM_ING : ModelStatusCode .STREAM_ING ,
36
- StatusCode .TRITON_SESSION_OUT_OF_LIMIT : ModelStatusCode .
37
- SESSION_OUT_OF_LIMIT ,
38
- StatusCode .TRITON_SESSION_INVALID_ARG : ModelStatusCode .
39
- SESSION_INVALID_ARG ,
36
+ StatusCode .TRITON_SESSION_OUT_OF_LIMIT :
37
+ ModelStatusCode . SESSION_OUT_OF_LIMIT ,
38
+ StatusCode .TRITON_SESSION_INVALID_ARG :
39
+ ModelStatusCode . SESSION_INVALID_ARG ,
40
40
StatusCode .TRITON_SESSION_READY : ModelStatusCode .SESSION_READY
41
41
}
42
42
self .chatbot = Chatbot (
@@ -232,9 +232,12 @@ def __init__(self,
232
232
** kwargs ):
233
233
234
234
super ().__init__ (path = path , ** kwargs )
235
- from lmdeploy import pipeline
235
+ from lmdeploy import TurbomindEngineConfig , pipeline
236
236
self .model = pipeline (
237
- model_path = self .path , model_name = model_name , tp = tp , ** pipeline_cfg )
237
+ model_path = self .path ,
238
+ model_name = model_name ,
239
+ backend_config = TurbomindEngineConfig (tp = tp ),
240
+ ** pipeline_cfg )
238
241
239
242
def generate (self ,
240
243
inputs : Union [str , List [str ]],
0 commit comments