Fix codegen and some small updates (#395)

ad-astra-video · web-flow · commit 40700f42331c · 2025-01-14T01:50:57.000-06:00
* fix openapi yaml and runner.gen.go

* fix streaming

* cancel context if borrow container fails

* fix error message
diff --git a/runner/app/pipelines/llm.py b/runner/app/pipelines/llm.py
@@ -204,7 +204,7 @@ async def generate(
         input_tokens = len(tokenizer.encode(full_prompt))
         if input_tokens > self.engine_args.max_model_len:
             raise ValueError(
-                f"Input sequence length ({input_tokens}) exceeds maximum allowed ({self.engine.engine_args.max_model_len})")
+                f"Input sequence length ({input_tokens}) exceeds maximum allowed ({self.engine_args.max_model_len})")
 
         total_tokens = 0
         current_response = ""
diff --git a/runner/app/routes/llm.py b/runner/app/routes/llm.py
@@ -101,7 +101,9 @@ async def llm(
         logger.error(f"LLM processing error: {str(e)}")
         return JSONResponse(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            content={"detail": "Internal server error during LLM processing."}
+            content=http_error(
+                "Internal server error during LLM processing."
+            )
         )
 
 
diff --git a/runner/gateway.openapi.yaml b/runner/gateway.openapi.yaml
@@ -525,7 +525,8 @@ components:
     AudioResponse:
       properties:
         audio:
-          $ref: '#/components/schemas/MediaURL'
+          allOf:
+          - $ref: '#/components/schemas/MediaURL'
           description: The generated audio.
       type: object
       required:
@@ -826,7 +827,8 @@ components:
     HTTPError:
       properties:
         detail:
-          $ref: '#/components/schemas/APIError'
+          allOf:
+          - $ref: '#/components/schemas/APIError'
           description: Detailed error information.
       type: object
       required:
@@ -876,9 +878,11 @@ components:
           title: Finish Reason
           default: ''
         delta:
-          $ref: '#/components/schemas/LLMMessage'
+          allOf:
+          - $ref: '#/components/schemas/LLMMessage'
         message:
-          $ref: '#/components/schemas/LLMMessage'
+          allOf:
+          - $ref: '#/components/schemas/LLMMessage'
       type: object
       required:
       - index
diff --git a/runner/gen_openapi.py b/runner/gen_openapi.py
@@ -123,6 +123,7 @@ def write_openapi(fname: str, entrypoint: str = "runner"):
         description="An application to run AI pipelines",
         routes=app.routes,
         servers=SERVERS,
+        separate_input_output_schemas=False
     )
 
     # Translate OpenAPI schema to 'gateway' side entrypoint if requested.
diff --git a/runner/openapi.yaml b/runner/openapi.yaml
@@ -558,7 +558,8 @@ components:
     AudioResponse:
       properties:
         audio:
-          $ref: '#/components/schemas/MediaURL'
+          allOf:
+          - $ref: '#/components/schemas/MediaURL'
           description: The generated audio.
       type: object
       required:
@@ -918,7 +919,8 @@ components:
     HTTPError:
       properties:
         detail:
-          $ref: '#/components/schemas/APIError'
+          allOf:
+          - $ref: '#/components/schemas/APIError'
           description: Detailed error information.
       type: object
       required:
@@ -1022,9 +1024,11 @@ components:
           title: Finish Reason
           default: ''
         delta:
-          $ref: '#/components/schemas/LLMMessage'
+          allOf:
+          - $ref: '#/components/schemas/LLMMessage'
         message:
-          $ref: '#/components/schemas/LLMMessage'
+          allOf:
+          - $ref: '#/components/schemas/LLMMessage'
       type: object
       required:
       - index
diff --git a/worker/runner.gen.go b/worker/runner.gen.go
diff --git a/worker/worker.go b/worker/worker.go
@@ -399,12 +399,15 @@ func (w *Worker) LLM(ctx context.Context, req GenLLMJSONRequestBody) (interface{
 	ctx, cancel := context.WithCancel(ctx)
 	c, err := w.borrowContainer(ctx, "llm", *req.Model)
 	if err != nil {
+		cancel()
 		return nil, err
 	}
 	if c == nil {
+		cancel()
 		return nil, errors.New("borrowed container is nil")
 	}
 	if c.Client == nil {
+		cancel()
 		return nil, errors.New("container client is nil")
 	}
 
@@ -781,19 +784,21 @@ func (w *Worker) handleStreamingResponse(ctx context.Context, c *RunnerContainer
 			default:
 				line := scanner.Text()
 				data := strings.TrimPrefix(line, "data: ")
-
+				if data == "" {
+					continue
+				}
 				if data == "[DONE]" {
 					break
 				}
 
-				var llmRes *LLMResponse
-				if err := json.Unmarshal([]byte(data), llmRes); err != nil {
-					slog.Error("Error unmarshaling stream data", slog.String("err", err.Error()))
+				var llmRes LLMResponse
+				if err := json.Unmarshal([]byte(data), &llmRes); err != nil {
+					slog.Error("Error unmarshaling stream data", slog.String("err", err.Error()), slog.String("json", data))
 					continue
 				}
 
 				select {
-				case outputChan <- llmRes:
+				case outputChan <- &llmRes:
 				case <-ctx.Done():
 					return
 				}

Original file line number	Diff line number	Diff line change
`@@ -101,7 +101,9 @@ async def llm(`
`101`	`101`	`logger.error(f"LLM processing error: {str(e)}")`
`102`	`102`	`return JSONResponse(`
`103`	`103`	`status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,`
`104`		`- content={"detail": "Internal server error during LLM processing."}`
	`104`	`+ content=http_error(`
	`105`	`+ "Internal server error during LLM processing."`
	`106`	`+ )`
`105`	`107`	`)`
`106`	`108`
`107`	`109`
Original file line number	Diff line number	Diff line change
`@@ -123,6 +123,7 @@ def write_openapi(fname: str, entrypoint: str = "runner"):`
`123`	`123`	`description="An application to run AI pipelines",`
`124`	`124`	`routes=app.routes,`
`125`	`125`	`servers=SERVERS,`
	`126`	`+ separate_input_output_schemas=False`
`126`	`127`	`)`
`127`	`128`
`128`	`129`	`# Translate OpenAPI schema to 'gateway' side entrypoint if requested.`