fix/LLM API: return helpful error message on missing model (#64474)

olafurpg · web-flow · commit 7485a9c86773 · 2024-08-19T12:56:14.000+02:00
Previously, the `/.api/llm/chat/completions` API returned an error message saying a model was unsupported if it used the wrong syntax. Now, we only validate that the request is using the new modelref syntax and otherwise delegate to the underlying `/.api/completions/stream` endpoint to return the error message. This error message at least mentions the default model, which I've found very helpful when debugging why things are not working as expected. ``` unsupported chat model "openai::2024-02-01::gpt-4o" (default "anthropic::unknown::claude-3-sonnet-20240229" ```  ## Test plan See updated unit test. Also manually tested locally ``` ❯ curl 'https://sourcegraph.test:3443/.api/llm/chat/completions' \ -H 'Content-Type: application/json' \ -H "Authorization: token $HURL_token" \ --data-raw '{ "maxTokensToSample": 4000, "messages": [ { "role": "user", "content": "Respond with \"no\" and nothing else." } ], "model": "openai::2024-02-01::gpt-4o", "temperature": 0, "topK": -1, "topP": -1, "stream": false }' failed to forward request to apiHandler: handler returned unexpected status code: got 400 want 200, response body: unsupported chat model "openai::2024-02-01::gpt-4o" (default "anthropic::unknown::claude-3-sonnet-20240229") ```  ## Changelog
diff --git a/cmd/frontend/internal/llmapi/BUILD.bazel b/cmd/frontend/internal/llmapi/BUILD.bazel
@@ -13,6 +13,7 @@ go_library(
     visibility = ["//cmd/frontend:__subpackages__"],
     deps = [
         "//internal/completions/types",
+        "//internal/modelconfig",
         "//internal/modelconfig/types",
         "//internal/openapi/goapi",
         "//lib/errors",
diff --git a/cmd/frontend/internal/llmapi/golly-recordings/TestChatCompletionsHandler.recording.yaml b/cmd/frontend/internal/llmapi/golly-recordings/TestChatCompletionsHandler.recording.yaml
@@ -160,3 +160,29 @@ recordings:
     headers:
     - key: Content-Type
       value: text/plain; charset=utf-8
+- hash: 94b132c1d5c4a93c9d8f71a01b7b01b2118f16e67747fb028b71a46887f78c4a
+  request:
+    recording_date: "2024-08-14T23:16:24+02:00"
+    url: https://sourcegraph.com/.api/completions/stream?api-version=1&client-name=openai-rest-api&client-version=6.0.0
+    method: POST
+    headers:
+    - key: Content-Type
+      value: application/json
+    - key: Authorization
+      value: token REDACTED_51c20f884ac371e6e12fe635336ac83af942ddc816c96d99dd3e49b3f8dfeb26
+    body: |
+      Fast: false
+      logprobs: null
+      maxTokensToSample: 16
+      messages:
+      - speaker: human
+        text: Hello
+      model: anthropic::unknown::claude-gpt
+      stream: false
+  response:
+    status_code: 400
+    body: |
+      the requested chat model is not available ("anthropic::unknown::claude-gpt", onProTier=true)
+    headers:
+    - key: Content-Type
+      value: text/plain; charset=utf-8
diff --git a/cmd/frontend/internal/llmapi/handler_chat_completions.go b/cmd/frontend/internal/llmapi/handler_chat_completions.go
@@ -16,6 +16,7 @@ import (
 	"github.com/sourcegraph/sourcegraph/lib/errors"
 
 	completions "github.com/sourcegraph/sourcegraph/internal/completions/types"
+	"github.com/sourcegraph/sourcegraph/internal/modelconfig"
 	types "github.com/sourcegraph/sourcegraph/internal/modelconfig/types"
 	"github.com/sourcegraph/sourcegraph/internal/openapi/goapi"
 )
@@ -30,8 +31,6 @@ type chatCompletionsHandler struct {
 	// would have an in-house service we can use instead of going via HTTP but using HTTP
 	// simplifies a lof of things (including testing).
 	apiHandler http.Handler
-
-	GetModelConfig GetModelConfigurationFunc
 }
 
 var _ http.Handler = (*chatCompletionsHandler)(nil)
@@ -46,12 +45,6 @@ func (h *chatCompletionsHandler) ServeHTTP(w http.ResponseWriter, r *http.Reques
 
 	decoder := json.NewDecoder(io.NopCloser(bytes.NewBuffer(body)))
 
-	currentModelConfig, err := h.GetModelConfig()
-	if err != nil {
-		http.Error(w, fmt.Sprintf("modelConfigSvc.Get: %v", err), http.StatusInternalServerError)
-		return
-	}
-
 	if err := decoder.Decode(&chatCompletionRequest); err != nil {
 		http.Error(w, fmt.Sprintf("decoder.Decode: %v", err), http.StatusInternalServerError)
 		return
@@ -62,7 +55,7 @@ func (h *chatCompletionsHandler) ServeHTTP(w http.ResponseWriter, r *http.Reques
 		return
 	}
 
-	if errorMsg := validateRequestedModel(chatCompletionRequest, currentModelConfig); errorMsg != "" {
+	if errorMsg := validateRequestedModel(chatCompletionRequest); errorMsg != "" {
 		http.Error(w, errorMsg, http.StatusBadRequest)
 		return
 	}
@@ -79,27 +72,18 @@ func (h *chatCompletionsHandler) ServeHTTP(w http.ResponseWriter, r *http.Reques
 	serveJSON(w, r, h.logger, chatCompletionResponse)
 }
 
-// validateRequestedModel checks that are only use the modelref syntax
-// (${ProviderID}::${APIVersionID}::${ModelID}).  If the user passes the old
-// syntax `${ProviderID}/${ModelID}`, then we try to return a helpful error
-// message suggesting to use the new modelref syntax.
-func validateRequestedModel(chatCompletionRequest goapi.CreateChatCompletionRequest, modelConfig *types.ModelConfiguration) string {
-	closestModelRef := ""
-	for _, model := range modelConfig.Models {
-		if string(model.ModelRef) == chatCompletionRequest.Model {
-			return ""
-		}
-		if model.DisplayName == chatCompletionRequest.Model || model.ModelName == chatCompletionRequest.Model {
-			closestModelRef = string(model.ModelRef)
-		} else if chatCompletionRequest.Model == fmt.Sprintf("%s/%s", model.ModelRef.ProviderID(), model.ModelRef.ModelID()) {
-			closestModelRef = string(model.ModelRef)
-		}
+// Require client to use the new modelref syntax
+// (${ProviderID}::${APIVersionID}::${ModelID}). We don't validate that the
+// actual model exists because the underlying `/.api/completions/stream`
+// endpoint already does this validation and reports helpful error messages. We
+// just want to reject requests for models using the old non-modelref syntax
+// (example: anthropic/claude-3-haiku).
+func validateRequestedModel(chatCompletionRequest goapi.CreateChatCompletionRequest) string {
+	maybeMRef := types.ModelRef(chatCompletionRequest.Model)
+	if err := modelconfig.ValidateModelRef(maybeMRef); err != nil {
+		return fmt.Sprintf("requested model '%s' failed validation: %s. Expected format '${ProviderID}::${APIVersionID}::${ModelID}'. To fix this problem, send a request to `GET /.api/llm/models` to see the list of supported models.", chatCompletionRequest.Model, err)
 	}
-	didYouMean := ""
-	if closestModelRef != "" {
-		didYouMean = fmt.Sprintf(" (similar to %s)", closestModelRef)
-	}
-	return fmt.Sprintf("model %s is not supported%s", chatCompletionRequest.Model, didYouMean)
+	return ""
 }
 
 func validateChatCompletionRequest(chatCompletionRequest goapi.CreateChatCompletionRequest) string {
diff --git a/cmd/frontend/internal/llmapi/handler_chat_completions_test.go b/cmd/frontend/internal/llmapi/handler_chat_completions_test.go
@@ -57,8 +57,17 @@ func TestChatCompletionsHandler(t *testing.T) {
 		// For now, we reject requests when the model is not using the new ModelRef format.
 		assert.Equal(t, http.StatusBadRequest, rr.Code)
 
-		// Assert that we give a helpful error message nudging the user to use modelref instead of the old syntax.
-		assert.Equal(t, "model anthropic/claude-3-haiku-20240307 is not supported (similar to anthropic::unknown::claude-3-haiku-20240307)\n", rr.Body.String())
+		assert.Equal(t, "requested model 'anthropic/claude-3-haiku-20240307' failed validation: modelRef syntax error. Expected format '${ProviderID}::${APIVersionID}::${ModelID}'. To fix this problem, send a request to `GET /.api/llm/models` to see the list of supported models.\n", rr.Body.String())
+	})
+
+	t.Run("/.api/llm/chat/completions (400 model is invalid model)", func(t *testing.T) {
+		rr := c.chatCompletions(t, `{
+			    "model": "anthropic::unknown::claude-gpt",
+			    "messages": [{"role": "user", "content": "Hello"}]
+			}`)
+		// For now, we reject requests when the model is not using the new ModelRef format.
+		assert.Equal(t, http.StatusInternalServerError, rr.Code) // Should be 400 Bad Request, see CODY-3318
+		assert.Equal(t, "failed to forward request to apiHandler: handler returned unexpected status code: got 400 want 200, response body: the requested chat model is not available (\"anthropic::unknown::claude-gpt\", onProTier=true)\n", rr.Body.String())
 	})
 
 	t.Run("/.api/llm/chat/completions (200 OK)", func(t *testing.T) {
diff --git a/cmd/frontend/internal/llmapi/httpapi.go b/cmd/frontend/internal/llmapi/httpapi.go
@@ -18,9 +18,8 @@ func RegisterHandlers(m *mux.Router, apiHandler http.Handler, getModelConfigFunc
 	logger := sglog.Scoped("llmapi")
 
 	m.Path("/chat/completions").Methods("POST").Handler(&chatCompletionsHandler{
-		logger:         logger,
-		apiHandler:     apiHandler,
-		GetModelConfig: getModelConfigFunc,
+		logger:     logger,
+		apiHandler: apiHandler,
 	})
 	m.Path("/models").Methods("GET").Handler(&modelsHandler{
 		logger:         logger,