diff --git a/plugins/wasm-go/extensions/ai-proxy/VERSION b/plugins/wasm-go/extensions/ai-proxy/VERSION new file mode 100644 index 0000000000..dadcca1e02 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-proxy/VERSION @@ -0,0 +1 @@ +1.0.0-alpha diff --git a/test/e2e/conformance/base/llm-mock.yaml b/test/e2e/conformance/base/llm-mock.yaml index 22b89272b4..f515a12b52 100644 --- a/test/e2e/conformance/base/llm-mock.yaml +++ b/test/e2e/conformance/base/llm-mock.yaml @@ -29,7 +29,7 @@ metadata: spec: containers: - name: llm-mock - image: registry.cn-hangzhou.aliyuncs.com/hxt/llm-mock:latest + image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/llm-mock:latest ports: - containerPort: 3000 --- diff --git a/test/e2e/conformance/tests/go-wasm-ai-proxy.go b/test/e2e/conformance/tests/go-wasm-ai-proxy.go index 882b0b0816..97a001f6f0 100644 --- a/test/e2e/conformance/tests/go-wasm-ai-proxy.go +++ b/test/e2e/conformance/tests/go-wasm-ai-proxy.go @@ -37,6 +37,126 @@ var WasmPluginsAiProxy = suite.ConformanceTest{ Manifests: []string{"tests/go-wasm-ai-proxy.yaml"}, Test: func(t *testing.T, suite *suite.ConformanceTestSuite) { testcases := []http.Assertion{ + { + Meta: http.AssertionMeta{ + TestCaseName: "ai360 case 1: non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.360.cn", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"360gpt-turbo","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "ai360 case 2: streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.360.cn", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"360gpt-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"360gpt-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":","}}],"created":10,"model":"360gpt-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"360gpt-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"360gpt-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"360gpt-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}}],"created":10,"model":"360gpt-turbo","object":"chat.completion.chunk","usage":{}} + +data: [DONE] + +`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "baichuan case 1: non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.baichuan-ai.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"baichuan2-13b-chat-v1","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "baichuan case 2: streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.baichuan-ai.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"baichuan2-13b-chat-v1","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"baichuan2-13b-chat-v1","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":","}}],"created":10,"model":"baichuan2-13b-chat-v1","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"baichuan2-13b-chat-v1","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"baichuan2-13b-chat-v1","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"baichuan2-13b-chat-v1","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}}],"created":10,"model":"baichuan2-13b-chat-v1","object":"chat.completion.chunk","usage":{}} + +data: [DONE] + +`), + }, + }, + }, { Meta: http.AssertionMeta{ TestCaseName: "baidu case 1: non-streaming request", @@ -93,6 +213,66 @@ data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}} data: [DONE] +`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "deepseek case 1: non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.deepseek.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"deepseek-reasoner","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "deepseek case 2: streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.deepseek.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"deepseek-reasoner","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"deepseek-reasoner","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":","}}],"created":10,"model":"deepseek-reasoner","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"deepseek-reasoner","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"deepseek-reasoner","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"deepseek-reasoner","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}}],"created":10,"model":"deepseek-reasoner","object":"chat.completion.chunk","usage":{}} + +data: [DONE] + `), }, }, @@ -153,6 +333,126 @@ data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}} data: [DONE] +`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "github case 1: non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "models.inference.ai.azure.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"cohere-command-r-08-2024","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "github case 2: streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "models.inference.ai.azure.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"cohere-command-r-08-2024","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"cohere-command-r-08-2024","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":","}}],"created":10,"model":"cohere-command-r-08-2024","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"cohere-command-r-08-2024","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"cohere-command-r-08-2024","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"cohere-command-r-08-2024","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}}],"created":10,"model":"cohere-command-r-08-2024","object":"chat.completion.chunk","usage":{}} + +data: [DONE] + +`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "groq case 1: non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.groq.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"llama3-8b-8192","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "groq case 2: streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.groq.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"llama3-8b-8192","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"llama3-8b-8192","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":","}}],"created":10,"model":"llama3-8b-8192","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"llama3-8b-8192","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"llama3-8b-8192","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"llama3-8b-8192","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}}],"created":10,"model":"llama3-8b-8192","object":"chat.completion.chunk","usage":{}} + +data: [DONE] + `), }, }, @@ -273,6 +573,66 @@ data: {"choices":[{"index":0,"message":{"name":"MM智能助理","role":"assistan data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"name":"MM智能助理","role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"abab6.5s-chat","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}} +`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "mistral case 1: non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.mistral.ai", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"mistral-tiny","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "mistral case 2: streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.mistral.ai", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"mistral-tiny","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"mistral-tiny","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":","}}],"created":10,"model":"mistral-tiny","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"mistral-tiny","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"mistral-tiny","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"mistral-tiny","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}}],"created":10,"model":"mistral-tiny","object":"chat.completion.chunk","usage":{}} + +data: [DONE] + `), }, }, @@ -361,6 +721,246 @@ data: [DONE] }, }, }, + { + Meta: http.AssertionMeta{ + TestCaseName: "stepfun case 1: non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.stepfun.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"step-1-8k","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "stepfun case 2: streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.stepfun.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"step-1-8k","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"step-1-8k","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":","}}],"created":10,"model":"step-1-8k","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"step-1-8k","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"step-1-8k","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"step-1-8k","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}}],"created":10,"model":"step-1-8k","object":"chat.completion.chunk","usage":{}} + +data: [DONE] + +`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "together-ai case 1: non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.together.xyz", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"meta-llama/Meta-Llama-3-8B-Instruct-Turbo","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "together-ai case 2: streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.together.xyz", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"meta-llama/Meta-Llama-3-8B-Instruct-Turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"meta-llama/Meta-Llama-3-8B-Instruct-Turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":","}}],"created":10,"model":"meta-llama/Meta-Llama-3-8B-Instruct-Turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"meta-llama/Meta-Llama-3-8B-Instruct-Turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"meta-llama/Meta-Llama-3-8B-Instruct-Turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"meta-llama/Meta-Llama-3-8B-Instruct-Turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}}],"created":10,"model":"meta-llama/Meta-Llama-3-8B-Instruct-Turbo","object":"chat.completion.chunk","usage":{}} + +data: [DONE] + +`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "yi case 1: non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.lingyiwanwu.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"Yi-Medium","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "yi case 2: streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.lingyiwanwu.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"Yi-Medium","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"Yi-Medium","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":","}}],"created":10,"model":"Yi-Medium","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"Yi-Medium","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"Yi-Medium","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"Yi-Medium","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}}],"created":10,"model":"Yi-Medium","object":"chat.completion.chunk","usage":{}} + +data: [DONE] + +`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "zhipuai case 1: non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "open.bigmodel.cn", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"glm-4-plus","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "zhipuai case 2: streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "open.bigmodel.cn", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"glm-4-plus","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"glm-4-plus","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":","}}],"created":10,"model":"glm-4-plus","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"glm-4-plus","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"glm-4-plus","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"glm-4-plus","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}}],"created":10,"model":"glm-4-plus","object":"chat.completion.chunk","usage":{}} + +data: [DONE] + +`), + }, + }, + }, } t.Run("WasmPlugins ai-proxy", func(t *testing.T) { for _, testcase := range testcases { diff --git a/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml b/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml index 79853c1a99..f64ec77285 100644 --- a/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml +++ b/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml @@ -13,6 +13,44 @@ # limitations under the License. apiVersion: networking.k8s.io/v1 kind: Ingress +metadata: + name: wasmplugin-ai-proxy-ai360 + namespace: higress-conformance-ai-backend +spec: + ingressClassName: higress + rules: + - host: "api.360.cn" + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: llm-mock-service + port: + number: 3000 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wasmplugin-ai-proxy-baichuan + namespace: higress-conformance-ai-backend +spec: + ingressClassName: higress + rules: + - host: "api.baichuan-ai.com" + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: llm-mock-service + port: + number: 3000 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress metadata: name: wasmplugin-ai-proxy-baidu namespace: higress-conformance-ai-backend @@ -32,6 +70,25 @@ spec: --- apiVersion: networking.k8s.io/v1 kind: Ingress +metadata: + name: wasmplugin-ai-proxy-deepseek + namespace: higress-conformance-ai-backend +spec: + ingressClassName: higress + rules: + - host: "api.deepseek.com" + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: llm-mock-service + port: + number: 3000 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress metadata: name: wasmplugin-ai-proxy-doubao namespace: higress-conformance-ai-backend @@ -51,6 +108,44 @@ spec: --- apiVersion: networking.k8s.io/v1 kind: Ingress +metadata: + name: wasmplugin-ai-proxy-github + namespace: higress-conformance-ai-backend +spec: + ingressClassName: higress + rules: + - host: "models.inference.ai.azure.com" + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: llm-mock-service + port: + number: 3000 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wasmplugin-ai-proxy-groq + namespace: higress-conformance-ai-backend +spec: + ingressClassName: higress + rules: + - host: "api.groq.com" + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: llm-mock-service + port: + number: 3000 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress metadata: name: wasmplugin-ai-proxy-minimax-v2-api namespace: higress-conformance-ai-backend @@ -89,6 +184,25 @@ spec: --- apiVersion: networking.k8s.io/v1 kind: Ingress +metadata: + name: wasmplugin-ai-proxy-mistral + namespace: higress-conformance-ai-backend +spec: + ingressClassName: higress + rules: + - host: "api.mistral.ai" + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: llm-mock-service + port: + number: 3000 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress metadata: name: wasmplugin-ai-proxy-qwen-compatible-mode namespace: higress-conformance-ai-backend @@ -125,6 +239,82 @@ spec: port: number: 3000 --- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wasmplugin-ai-proxy-stepfun + namespace: higress-conformance-ai-backend +spec: + ingressClassName: higress + rules: + - host: "api.stepfun.com" + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: llm-mock-service + port: + number: 3000 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wasmplugin-ai-proxy-together-ai + namespace: higress-conformance-ai-backend +spec: + ingressClassName: higress + rules: + - host: "api.together.xyz" + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: llm-mock-service + port: + number: 3000 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wasmplugin-ai-proxy-yi + namespace: higress-conformance-ai-backend +spec: + ingressClassName: higress + rules: + - host: "api.lingyiwanwu.com" + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: llm-mock-service + port: + number: 3000 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wasmplugin-ai-proxy-zhipuai + namespace: higress-conformance-ai-backend +spec: + ingressClassName: higress + rules: + - host: "open.bigmodel.cn" + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: llm-mock-service + port: + number: 3000 +--- apiVersion: extensions.higress.io/v1alpha1 kind: WasmPlugin metadata: @@ -135,6 +325,26 @@ spec: phase: UNSPECIFIED_PHASE priority: 100 matchRules: + - config: + provider: + apiTokens: + - fake_token + modelMapping: + 'gpt-3': 360gpt-turbo + '*': 360gpt-pro + type: ai360 + ingress: + - higress-conformance-ai-backend/wasmplugin-ai-proxy-ai360 + - config: + provider: + apiTokens: + - fake_token + modelMapping: + 'gpt-3': baichuan2-13b-chat-v1 + '*': baichuan-7b-v1 + type: baichuan + ingress: + - higress-conformance-ai-backend/wasmplugin-ai-proxy-baichuan - config: provider: apiTokens: @@ -145,6 +355,16 @@ spec: type: baidu ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-baidu + - config: + provider: + apiTokens: + - fake_token + modelMapping: + 'gpt-3': deepseek-reasoner + '*': deepseek-chat + type: deepseek + ingress: + - higress-conformance-ai-backend/wasmplugin-ai-proxy-deepseek - config: provider: apiTokens: @@ -154,6 +374,26 @@ spec: type: doubao ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-doubao + - config: + provider: + apiTokens: + - fake_token + modelMapping: + 'gpt-3': cohere-command-r-08-2024 + '*': Phi-3.5-MoE-instruct + type: github + ingress: + - higress-conformance-ai-backend/wasmplugin-ai-proxy-github + - config: + provider: + apiTokens: + - fake_token + modelMapping: + 'gpt-3': llama3-8b-8192 + '*': llama-3.1-8b-instant + type: groq + ingress: + - higress-conformance-ai-backend/wasmplugin-ai-proxy-groq - config: provider: apiTokens: @@ -178,6 +418,16 @@ spec: minimaxGroupId: 1 ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-minimax-pro-api + - config: + provider: + apiTokens: + - fake_token + modelMapping: + 'gpt-3': mistral-tiny + '*': mistral-large-latest + type: mistral + ingress: + - higress-conformance-ai-backend/wasmplugin-ai-proxy-mistral - config: provider: apiTokens: @@ -203,4 +453,44 @@ spec: type: qwen ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-qwen - url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0 \ No newline at end of file + - config: + provider: + apiTokens: + - fake_token + modelMapping: + 'gpt-3': step-1-8k + '*': step-1-32k + type: stepfun + ingress: + - higress-conformance-ai-backend/wasmplugin-ai-proxy-stepfun + - config: + provider: + apiTokens: + - fake_token + modelMapping: + 'gpt-3': meta-llama/Meta-Llama-3-8B-Instruct-Turbo + '*': meta-llama/Llama-3-8b-chat-hf + type: together-ai + ingress: + - higress-conformance-ai-backend/wasmplugin-ai-proxy-together-ai + - config: + provider: + apiTokens: + - fake_token + modelMapping: + 'gpt-3': Yi-Medium + '*': Yi-Large + type: yi + ingress: + - higress-conformance-ai-backend/wasmplugin-ai-proxy-yi + - config: + provider: + apiTokens: + - fake_token + modelMapping: + 'gpt-3': glm-4-plus + '*': glm-4-long + type: zhipuai + ingress: + - higress-conformance-ai-backend/wasmplugin-ai-proxy-zhipuai + url: file:///opt/plugins/wasm-go/extensions/ai-proxy/plugin.wasm \ No newline at end of file diff --git a/tools/hack/build-wasm-plugins.sh b/tools/hack/build-wasm-plugins.sh index d8b5adf454..7e4e4c3879 100755 --- a/tools/hack/build-wasm-plugins.sh +++ b/tools/hack/build-wasm-plugins.sh @@ -71,7 +71,19 @@ else version=$(cat "$version_file") if [[ "$version" =~ -alpha$ ]]; then echo "🚀 Build Go WasmPlugin: $name (version $version)" - PLUGIN_NAME=${name} make build + # Load .buildrc file + buildrc_file="$EXTENSIONS_DIR$file/.buildrc" + if [ -f "$buildrc_file" ]; then + echo "Found .buildrc file, sourcing it..." + . "$buildrc_file" + else + echo ".buildrc file not found" + fi + echo "EXTRA_TAGS=${EXTRA_TAGS:-}" + # Build plugin + PLUGIN_NAME=${name} EXTRA_TAGS=${EXTRA_TAGS:-} make build + # Clean up EXTRA_TAGS environment variable + unset EXTRA_TAGS else echo "Plugin version $version not ends with '-alpha', skipping compilation for $name." fi