1414Note: you can also point `--model` at a downloaded or converted MLX model on local storage.
1515'''
1616
17+ import os
1718import json
1819import time
19- import os
2020from contextlib import asynccontextmanager
2121import warnings
2222
2323from fastapi import FastAPI , Request , status
2424from fastapi .responses import FileResponse , JSONResponse , StreamingResponse
2525from fastapi .exceptions import RequestValidationError
26+ from fastapi .middleware .cors import CORSMiddleware
2627import click
2728import uvicorn
2829
2930from llm_structured_output .util .output import info , warning , debug
3031
3132from toolio .schema_helper import Model
3233from toolio .llm_helper import enrich_chat_for_tools , DEFAULT_FLAGS , FLAGS_LOOKUP
33- from toolio .http_schematics import V1ChatCompletionsRequest , V1ChatMessage , V1ResponseFormatType
34+ from toolio .http_schematics import V1ChatCompletionsRequest , V1ResponseFormatType
3435from toolio .responder import (ToolCallStreamingResponder , ToolCallResponder ,
3536 ChatCompletionResponder , ChatCompletionStreamingResponder )
3637
3738
39+ NUM_CPUS = int (os .cpu_count ())
3840app_params = {}
3941
4042# Context manager for the FastAPI app's lifespan: https://fastapi.tiangolo.com/advanced/events/
@@ -57,6 +59,9 @@ async def lifespan(app: FastAPI):
5759 # print(app.state.model.model.__class__, app.state.model.model.model_type)
5860 info (f'Model loaded in { (tdone - tstart )/ 1000000000.0 :.3f} s. Type: { app .state .model .model .model_type } ' )
5961 app .state .model_flags = FLAGS_LOOKUP .get (app .state .model .model .model_type , DEFAULT_FLAGS )
62+ # Look into exposing control over methods & headers as well
63+ app .add_middleware (CORSMiddleware , allow_origins = app_params ['cors_origins' ], allow_credentials = True ,
64+ allow_methods = ["*" ], allow_headers = ["*" ])
6065 yield
6166 # Shutdown code here, if any
6267
@@ -199,7 +204,29 @@ async def post_v1_chat_completions_impl(req_data: V1ChatCompletionsRequest):
199204 help = 'Path to JSON schema to be used if not provided via API call.'
200205 'Interpolated into {jsonschema} placeholder in prompts' )
201206@click .option ('--llmtemp' , default = '0.1' , type = float , help = 'LLM sampling temperature' )
202- def main (host , port , model , default_schema , default_schema_file , llmtemp ):
207+ @click .option ('--workers' , type = int , default = 0 ,
208+ help = 'Number of workers processes to spawn (each utilizes one CPU core).'
209+ 'Defaults to $WEB_CONCURRENCY environment variable if available, or 1' )
210+ @click .option ('--cors_origin' , multiple = True ,
211+ help = 'Origin to be permitted for CORS https://fastapi.tiangolo.com/tutorial/cors/' )
212+ def main (host , port , model , default_schema , default_schema_file , llmtemp , workers , cors_origin ):
203213 app_params .update (model = model , default_schema = default_schema , default_schema_fpath = default_schema_file ,
204- llmtemp = llmtemp )
205- uvicorn .run ('toolio.cli.server:app' , host = host , port = port , reload = False )
214+ llmtemp = llmtemp , cors_origins = list (cors_origin ))
215+ workers = workers or None
216+ # logger.info(f'Host has {NUM_CPUS} CPU cores')
217+ uvicorn .run ('toolio.cli.server:app' , host = host , port = port , reload = False , workers = workers )
218+
219+
220+ # Implement log config when we
221+ def UNUSED_log_setup (config ):
222+ # Set up logging
223+ import logging
224+ global logger # noqa: PLW0603
225+
226+ main_loglevel = config .get ('log' , {'level' : 'INFO' })['level' ]
227+ logging .config .dictConfig (config ['log' ])
228+ # Following 2 lines configure the root logger, so all other loggers in this process space will inherit
229+ # logging.basicConfig(level=main_loglevel, format='%(levelname)s:%(name)s: %(message)s')
230+ logging .getLogger ().setLevel (main_loglevel ) # Seems redundant, but is necessary. Python logging is quirky
231+ logger = logging .getLogger (__name__ )
232+ # logger.addFilter(LocalFilter())
0 commit comments