Skip to content

Commit 40700f4

Browse files
Fix codegen and some small updates (#395)
* fix openapi yaml and runner.gen.go * fix streaming * cancel context if borrow container fails * fix error message
1 parent 1ede01e commit 40700f4

File tree

7 files changed

+116
-100
lines changed

7 files changed

+116
-100
lines changed

runner/app/pipelines/llm.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ async def generate(
204204
input_tokens = len(tokenizer.encode(full_prompt))
205205
if input_tokens > self.engine_args.max_model_len:
206206
raise ValueError(
207-
f"Input sequence length ({input_tokens}) exceeds maximum allowed ({self.engine.engine_args.max_model_len})")
207+
f"Input sequence length ({input_tokens}) exceeds maximum allowed ({self.engine_args.max_model_len})")
208208

209209
total_tokens = 0
210210
current_response = ""

runner/app/routes/llm.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,9 @@ async def llm(
101101
logger.error(f"LLM processing error: {str(e)}")
102102
return JSONResponse(
103103
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
104-
content={"detail": "Internal server error during LLM processing."}
104+
content=http_error(
105+
"Internal server error during LLM processing."
106+
)
105107
)
106108

107109

runner/gateway.openapi.yaml

+8-4
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,8 @@ components:
525525
AudioResponse:
526526
properties:
527527
audio:
528-
$ref: '#/components/schemas/MediaURL'
528+
allOf:
529+
- $ref: '#/components/schemas/MediaURL'
529530
description: The generated audio.
530531
type: object
531532
required:
@@ -826,7 +827,8 @@ components:
826827
HTTPError:
827828
properties:
828829
detail:
829-
$ref: '#/components/schemas/APIError'
830+
allOf:
831+
- $ref: '#/components/schemas/APIError'
830832
description: Detailed error information.
831833
type: object
832834
required:
@@ -876,9 +878,11 @@ components:
876878
title: Finish Reason
877879
default: ''
878880
delta:
879-
$ref: '#/components/schemas/LLMMessage'
881+
allOf:
882+
- $ref: '#/components/schemas/LLMMessage'
880883
message:
881-
$ref: '#/components/schemas/LLMMessage'
884+
allOf:
885+
- $ref: '#/components/schemas/LLMMessage'
882886
type: object
883887
required:
884888
- index

runner/gen_openapi.py

+1
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ def write_openapi(fname: str, entrypoint: str = "runner"):
123123
description="An application to run AI pipelines",
124124
routes=app.routes,
125125
servers=SERVERS,
126+
separate_input_output_schemas=False
126127
)
127128

128129
# Translate OpenAPI schema to 'gateway' side entrypoint if requested.

runner/openapi.yaml

+8-4
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,8 @@ components:
558558
AudioResponse:
559559
properties:
560560
audio:
561-
$ref: '#/components/schemas/MediaURL'
561+
allOf:
562+
- $ref: '#/components/schemas/MediaURL'
562563
description: The generated audio.
563564
type: object
564565
required:
@@ -918,7 +919,8 @@ components:
918919
HTTPError:
919920
properties:
920921
detail:
921-
$ref: '#/components/schemas/APIError'
922+
allOf:
923+
- $ref: '#/components/schemas/APIError'
922924
description: Detailed error information.
923925
type: object
924926
required:
@@ -1022,9 +1024,11 @@ components:
10221024
title: Finish Reason
10231025
default: ''
10241026
delta:
1025-
$ref: '#/components/schemas/LLMMessage'
1027+
allOf:
1028+
- $ref: '#/components/schemas/LLMMessage'
10261029
message:
1027-
$ref: '#/components/schemas/LLMMessage'
1030+
allOf:
1031+
- $ref: '#/components/schemas/LLMMessage'
10281032
type: object
10291033
required:
10301034
- index

worker/runner.gen.go

+85-85
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

worker/worker.go

+10-5
Original file line numberDiff line numberDiff line change
@@ -399,12 +399,15 @@ func (w *Worker) LLM(ctx context.Context, req GenLLMJSONRequestBody) (interface{
399399
ctx, cancel := context.WithCancel(ctx)
400400
c, err := w.borrowContainer(ctx, "llm", *req.Model)
401401
if err != nil {
402+
cancel()
402403
return nil, err
403404
}
404405
if c == nil {
406+
cancel()
405407
return nil, errors.New("borrowed container is nil")
406408
}
407409
if c.Client == nil {
410+
cancel()
408411
return nil, errors.New("container client is nil")
409412
}
410413

@@ -781,19 +784,21 @@ func (w *Worker) handleStreamingResponse(ctx context.Context, c *RunnerContainer
781784
default:
782785
line := scanner.Text()
783786
data := strings.TrimPrefix(line, "data: ")
784-
787+
if data == "" {
788+
continue
789+
}
785790
if data == "[DONE]" {
786791
break
787792
}
788793

789-
var llmRes *LLMResponse
790-
if err := json.Unmarshal([]byte(data), llmRes); err != nil {
791-
slog.Error("Error unmarshaling stream data", slog.String("err", err.Error()))
794+
var llmRes LLMResponse
795+
if err := json.Unmarshal([]byte(data), &llmRes); err != nil {
796+
slog.Error("Error unmarshaling stream data", slog.String("err", err.Error()), slog.String("json", data))
792797
continue
793798
}
794799

795800
select {
796-
case outputChan <- llmRes:
801+
case outputChan <- &llmRes:
797802
case <-ctx.Done():
798803
return
799804
}

0 commit comments

Comments
 (0)