diff --git a/docs/guides/chat.md b/docs/guides/chat.md index 10ca78304..e365e0fa9 100644 --- a/docs/guides/chat.md +++ b/docs/guides/chat.md @@ -246,6 +246,19 @@ puts response2.content You can set the temperature using `with_temperature`, which returns the `Chat` instance for chaining. +## Custom Request Parameters + +You can configure additional provider-specific features by adding custom fields to each API request. Use the `with_params` method. + +```ruby +# response_format parameter is supported by :openai, :ollama, :deepseek +chat = RubyLLM.chat.with_params(response_format: { type: 'json_object' }) +response = chat.ask "What is the square root of 64? Answer with a JSON object with the key `result`." +puts JSON.parse(response.content) +``` + +Allowed parameters vary widely by provider and model. + ## Tracking Token Usage Understanding token usage is important for managing costs and staying within context limits. Each `RubyLLM::Message` returned by `ask` includes token counts. @@ -311,4 +324,4 @@ This guide covered the core `Chat` interface. Now you might want to explore: * [Using Tools]({% link guides/tools.md %}): Enable the AI to call your Ruby code. * [Streaming Responses]({% link guides/streaming.md %}): Get real-time feedback from the AI. * [Rails Integration]({% link guides/rails.md %}): Persist your chat conversations easily. -* [Error Handling]({% link guides/error-handling.md %}): Build robust applications that handle API issues. \ No newline at end of file +* [Error Handling]({% link guides/error-handling.md %}): Build robust applications that handle API issues. diff --git a/lib/ruby_llm/active_record/acts_as.rb b/lib/ruby_llm/active_record/acts_as.rb index 5b915391c..25abfefa9 100644 --- a/lib/ruby_llm/active_record/acts_as.rb +++ b/lib/ruby_llm/active_record/acts_as.rb @@ -130,6 +130,11 @@ def with_context(...) self end + def with_params(...) + to_llm.with_params(...) + self + end + def on_new_message(...) to_llm.on_new_message(...) self diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb index dc7cf84e0..f547887a7 100644 --- a/lib/ruby_llm/chat.rb +++ b/lib/ruby_llm/chat.rb @@ -11,7 +11,7 @@ module RubyLLM class Chat include Enumerable - attr_reader :model, :messages, :tools + attr_reader :model, :messages, :tools, :params def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil) if assume_model_exists && !provider @@ -25,6 +25,7 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n @temperature = 0.7 @messages = [] @tools = {} + @params = {} @on = { new_message: nil, end_message: nil @@ -78,6 +79,11 @@ def with_context(context) self end + def with_params(**params) + @params = params + self + end + def on_new_message(&block) @on[:new_message] = block self @@ -99,6 +105,7 @@ def complete(&) temperature: @temperature, model: @model.id, connection: @connection, + params: @params, &wrap_streaming_block(&) ) diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb index 11b52f1a4..8899e43db 100644 --- a/lib/ruby_llm/provider.rb +++ b/lib/ruby_llm/provider.rb @@ -10,14 +10,19 @@ module Provider module Methods extend Streaming - def complete(messages, tools:, temperature:, model:, connection:, &) + def complete(messages, tools:, temperature:, model:, connection:, params: {}, &) # rubocop:disable Metrics/ParameterLists normalized_temperature = maybe_normalize_temperature(temperature, model) - payload = render_payload(messages, - tools: tools, - temperature: normalized_temperature, - model: model, - stream: block_given?) + payload = deep_merge( + params, + render_payload( + messages, + tools: tools, + temperature: normalized_temperature, + model: model, + stream: block_given? + ) + ) if block_given? stream_response connection, payload, & @@ -26,6 +31,16 @@ def complete(messages, tools:, temperature:, model:, connection:, &) end end + def deep_merge(params, payload) + params.merge(payload) do |_key, params_value, payload_value| + if params_value.is_a?(Hash) && payload_value.is_a?(Hash) + deep_merge(params_value, payload_value) + else + payload_value + end + end + end + def list_models(connection:) response = connection.get models_url parse_list_models_response response, slug, capabilities diff --git a/spec/fixtures/vcr_cassettes/chat_with_params_anthropic_claude-3-5-haiku-20241022_supports_service_tier_param.yml b/spec/fixtures/vcr_cassettes/chat_with_params_anthropic_claude-3-5-haiku-20241022_supports_service_tier_param.yml new file mode 100644 index 000000000..1d421fe8d --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_with_params_anthropic_claude-3-5-haiku-20241022_supports_service_tier_param.yml @@ -0,0 +1,81 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"service_tier":"standard_only","model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"What + is the square root of 64? Answer with a JSON object with the key `result`."}]},{"role":"assistant","content":[{"type":"text","text":"{"}]}],"temperature":0.7,"stream":false,"max_tokens":8192}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Sat, 19 Jul 2025 19:53:47 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '25000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '25000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-07-19T19:53:46Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '5000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '5000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-07-19T19:53:47Z' + Anthropic-Ratelimit-Requests-Limit: + - '5' + Anthropic-Ratelimit-Requests-Remaining: + - '4' + Anthropic-Ratelimit-Requests-Reset: + - '2025-07-19T19:53:58Z' + Anthropic-Ratelimit-Tokens-Limit: + - '30000' + Anthropic-Ratelimit-Tokens-Remaining: + - '30000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-07-19T19:53:46Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01Y9cx1icSo5LmER4hv3mX6M","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"\n \"result\": + 8\n}"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":28,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":12,"service_tier":"standard"}}' + recorded_at: Sat, 19 Jul 2025 19:53:47 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_with_params_bedrock_anthropic_claude-3-5-haiku-20241022-v1_0_supports_top_k_param.yml b/spec/fixtures/vcr_cassettes/chat_with_params_bedrock_anthropic_claude-3-5-haiku-20241022-v1_0_supports_top_k_param.yml new file mode 100644 index 000000000..f30d1378d --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_with_params_bedrock_anthropic_claude-3-5-haiku-20241022-v1_0_supports_top_k_param.yml @@ -0,0 +1,54 @@ +--- +http_interactions: +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"top_k":5,"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"What + is the square root of 64? Answer with a JSON object with the key `result`."}]},{"role":"assistant","content":[{"type":"text","text":"{"}]}],"temperature":0.7,"max_tokens":4096}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250719T195347Z + X-Amz-Content-Sha256: + - 6e66b0df343074bb4e2b11bf211a10441f15ad68ce5af9849ddf69242979563a + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250719//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=b0b4bc8e31635379223d6df2e1f4f6fe8973c802fcd7414a50f16651a7779941 + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Sat, 19 Jul 2025 19:53:47 GMT + Content-Type: + - application/json + Content-Length: + - '267' + Connection: + - keep-alive + X-Amzn-Requestid: + - 0fa198e8-bb3d-47a4-9845-6ab084dcb4a7 + X-Amzn-Bedrock-Invocation-Latency: + - '577' + X-Amzn-Bedrock-Output-Token-Count: + - '12' + X-Amzn-Bedrock-Input-Token-Count: + - '28' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_01LtafMjvqzvDCrnHXmW3rRB","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"\n \"result\": + 8\n}"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":28,"output_tokens":12}}' + recorded_at: Sat, 19 Jul 2025 19:53:47 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_with_params_deepseek_deepseek-chat_supports_response_format_param.yml b/spec/fixtures/vcr_cassettes/chat_with_params_deepseek_deepseek-chat_supports_response_format_param.yml new file mode 100644 index 000000000..05b3bece9 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_with_params_deepseek_deepseek-chat_supports_response_format_param.yml @@ -0,0 +1,57 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.deepseek.com/chat/completions + body: + encoding: UTF-8 + string: '{"response_format":{"type":"json_object"},"model":"deepseek-chat","messages":[{"role":"user","content":"What + is the square root of 64? Answer with a JSON object with the key `result`."}],"stream":false,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Authorization: + - Bearer + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Sat, 19 Jul 2025 19:53:26 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Vary: + - origin, access-control-request-method, access-control-request-headers + Access-Control-Allow-Credentials: + - 'true' + X-Ds-Trace-Id: + - 735400e014bd761346ad96d9d347e5cd + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - "" + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"70050189-c6ad-4123-a660-9e558cf8d030","object":"chat.completion","created":1752954806,"model":"deepseek-chat","choices":[{"index":0,"message":{"role":"assistant","content":"{\n \"result\": + 8\n}"},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":39,"completion_tokens":13,"total_tokens":52,"prompt_tokens_details":{"cached_tokens":0},"prompt_cache_hit_tokens":0,"prompt_cache_miss_tokens":39},"system_fingerprint":"fp_8802369eaa_prod0623_fp8_kvcache"}' + recorded_at: Sat, 19 Jul 2025 19:53:30 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_with_params_gemini_gemini-2_0-flash_supports_responseschema_param.yml b/spec/fixtures/vcr_cassettes/chat_with_params_gemini_gemini-2_0-flash_supports_responseschema_param.yml new file mode 100644 index 000000000..bc7746cec --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_with_params_gemini_gemini-2_0-flash_supports_responseschema_param.yml @@ -0,0 +1,87 @@ +--- +http_interactions: +- request: + method: post + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent + body: + encoding: UTF-8 + string: '{"generationConfig":{"responseMimeType":"application/json","responseSchema":{"type":"OBJECT","properties":{"result":{"type":"NUMBER"}}},"temperature":0.7},"contents":[{"role":"user","parts":[{"text":"What + is the square root of 64? Answer with a JSON object with the key `result`."}]}]}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Goog-Api-Key: + - "" + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Content-Type: + - application/json; charset=UTF-8 + Vary: + - Origin + - Referer + - X-Origin + Date: + - Sat, 19 Jul 2025 19:53:46 GMT + Server: + - scaffolding on HTTPServer2 + X-Xss-Protection: + - '0' + X-Frame-Options: + - SAMEORIGIN + X-Content-Type-Options: + - nosniff + Server-Timing: + - gfet4t7; dur=565 + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Transfer-Encoding: + - chunked + body: + encoding: ASCII-8BIT + string: | + { + "candidates": [ + { + "content": { + "parts": [ + { + "text": "{\n \"result\": 8\n}" + } + ], + "role": "model" + }, + "finishReason": "STOP", + "avgLogprobs": -0.0243309885263443 + } + ], + "usageMetadata": { + "promptTokenCount": 24, + "candidatesTokenCount": 10, + "totalTokenCount": 34, + "promptTokensDetails": [ + { + "modality": "TEXT", + "tokenCount": 24 + } + ], + "candidatesTokensDetails": [ + { + "modality": "TEXT", + "tokenCount": 10 + } + ] + }, + "modelVersion": "gemini-2.0-flash", + "responseId": "yfd7aPOxOe6r1dkPw4Wy-QM" + } + recorded_at: Sat, 19 Jul 2025 19:53:46 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_with_params_ollama_qwen3_supports_response_format_param.yml b/spec/fixtures/vcr_cassettes/chat_with_params_ollama_qwen3_supports_response_format_param.yml new file mode 100644 index 000000000..f1a8219ac --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_with_params_ollama_qwen3_supports_response_format_param.yml @@ -0,0 +1,37 @@ +--- +http_interactions: +- request: + method: post + uri: "/chat/completions" + body: + encoding: UTF-8 + string: '{"response_format":{"type":"json_object"},"model":"qwen3","messages":[{"role":"user","content":"What + is the square root of 64? Answer with a JSON object with the key `result`."}],"stream":false,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Content-Type: + - application/json + Date: + - Sat, 19 Jul 2025 19:53:45 GMT + Content-Length: + - '301' + body: + encoding: UTF-8 + string: '{"id":"chatcmpl-176","object":"chat.completion","created":1752954825,"model":"qwen3","system_fingerprint":"fp_ollama","choices":[{"index":0,"message":{"role":"assistant","content":"{\n \"result\": + 8\n}"},"finish_reason":"stop"}],"usage":{"prompt_tokens":29,"completion_tokens":10,"total_tokens":39}} + + ' + recorded_at: Sat, 19 Jul 2025 19:53:45 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_with_params_openai_gpt-4_1-nano_supports_response_format_param.yml b/spec/fixtures/vcr_cassettes/chat_with_params_openai_gpt-4_1-nano_supports_response_format_param.yml new file mode 100644 index 000000000..dbda29ece --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_with_params_openai_gpt-4_1-nano_supports_response_format_param.yml @@ -0,0 +1,115 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"response_format":{"type":"json_object"},"model":"gpt-4.1-nano","messages":[{"role":"user","content":"What + is the square root of 64? Answer with a JSON object with the key `result`."}],"stream":false,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Authorization: + - Bearer + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Sat, 19 Jul 2025 19:53:31 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Access-Control-Expose-Headers: + - X-Request-ID + Openai-Organization: + - "" + Openai-Processing-Ms: + - '148' + Openai-Project: + - proj_Cgr91oaB3FCyMSthpusv7eJc + Openai-Version: + - '2020-10-01' + X-Envoy-Upstream-Service-Time: + - '150' + X-Ratelimit-Limit-Requests: + - '500' + X-Ratelimit-Limit-Tokens: + - '200000' + X-Ratelimit-Remaining-Requests: + - '499' + X-Ratelimit-Remaining-Tokens: + - '199977' + X-Ratelimit-Reset-Requests: + - 120ms + X-Ratelimit-Reset-Tokens: + - 6ms + X-Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - "" + - "" + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - "" + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: | + { + "id": "chatcmpl-Bv7ud7hg2U3FxalKcWA4qjZTTRsfk", + "object": "chat.completion", + "created": 1752954811, + "model": "gpt-4.1-nano-2025-04-14", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "{\n \"result\": 8\n}", + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 27, + "completion_tokens": 9, + "total_tokens": 36, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": null + } + recorded_at: Sat, 19 Jul 2025 19:53:31 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_with_params_openrouter_anthropic_claude-3_5-haiku_supports_top_k_param.yml b/spec/fixtures/vcr_cassettes/chat_with_params_openrouter_anthropic_claude-3_5-haiku_supports_top_k_param.yml new file mode 100644 index 000000000..3a0de14da --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_with_params_openrouter_anthropic_claude-3_5-haiku_supports_top_k_param.yml @@ -0,0 +1,54 @@ +--- +http_interactions: +- request: + method: post + uri: https://openrouter.ai/api/v1/chat/completions + body: + encoding: UTF-8 + string: '{"top_k":5,"model":"anthropic/claude-3.5-haiku","messages":[{"role":"user","content":"What + is the square root of 64? Answer with a JSON object with the key `result`."},{"role":"assistant","content":"{"}],"stream":false,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Authorization: + - Bearer + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Sat, 19 Jul 2025 19:53:48 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Access-Control-Allow-Origin: + - "*" + Vary: + - Accept-Encoding + Permissions-Policy: + - payment=(self "https://checkout.stripe.com" "https://connect-js.stripe.com" + "https://js.stripe.com" "https://*.js.stripe.com" "https://hooks.stripe.com") + Referrer-Policy: + - no-referrer, strict-origin-when-cross-origin + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: "\n \n{\"id\":\"gen-1752954827-41LuhLq98dKxehFNZkEa\",\"provider\":\"Anthropic\",\"model\":\"anthropic/claude-3.5-haiku\",\"object\":\"chat.completion\",\"created\":1752954827,\"choices\":[{\"logprobs\":null,\"finish_reason\":\"stop\",\"native_finish_reason\":\"stop\",\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"\\n + \ \\\"result\\\": 8\\n}\",\"refusal\":null,\"reasoning\":null}}],\"usage\":{\"prompt_tokens\":28,\"completion_tokens\":12,\"total_tokens\":40}}" + recorded_at: Sat, 19 Jul 2025 19:53:48 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/ruby_llm/chat_request_options_spec.rb b/spec/ruby_llm/chat_request_options_spec.rb new file mode 100644 index 000000000..07a3f87b2 --- /dev/null +++ b/spec/ruby_llm/chat_request_options_spec.rb @@ -0,0 +1,110 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe RubyLLM::Chat do + include_context 'with configured RubyLLM' + + describe 'with params' do + # Supported params vary by provider, and to lesser degree, by model. + + # Providers [:openai, :ollama, :deepseek] support {response_format: {type: 'json_object'}} + # to guarantee a JSON object is returned. + # (Note that :openrouter may accept the parameter but silently ignore it.) + CHAT_MODELS.select { |model_info| %i[openai ollama deepseek].include?(model_info[:provider]) }.each do |model_info| + model = model_info[:model] + provider = model_info[:provider] + it "#{provider}/#{model} supports response_format param" do # rubocop:disable RSpec/ExampleLength + chat = RubyLLM + .chat(model: model, provider: provider) + .with_params(response_format: { type: 'json_object' }) + + response = chat.ask('What is the square root of 64? Answer with a JSON object with the key `result`.') + + json_response = JSON.parse(response.content) + expect(json_response).to eq({ 'result' => 8 }) + end + end + + # Provider [:gemini] supports a {generationConfig: {responseMimeType: ..., responseSchema: ...} } param, + # which can specify a JSON schema, requiring a deep_merge of params into the payload. + CHAT_MODELS.select { |model_info| model_info[:provider] == :gemini }.each do |model_info| + model = model_info[:model] + provider = model_info[:provider] + it "#{provider}/#{model} supports responseSchema param" do # rubocop:disable RSpec/ExampleLength + chat = RubyLLM + .chat(model: model, provider: provider) + .with_params( + generationConfig: { + responseMimeType: 'application/json', + responseSchema: { + type: 'OBJECT', + properties: { result: { type: 'NUMBER' } } + } + } + ) + + response = chat.ask('What is the square root of 64? Answer with a JSON object with the key `result`.') + + json_response = JSON.parse(response.content) + expect(json_response).to eq({ 'result' => 8 }) + end + end + + # Provider [:anthropic] supports a service_tier param. + CHAT_MODELS.select { |model_info| model_info[:provider] == :anthropic }.each do |model_info| + model = model_info[:model] + provider = model_info[:provider] + it "#{provider}/#{model} supports service_tier param" do # rubocop:disable RSpec/ExampleLength + chat = RubyLLM + .chat(model: model, provider: provider) + .with_params(service_tier: 'standard_only') + + chat.add_message( + role: :user, + content: 'What is the square root of 64? Answer with a JSON object with the key `result`.' + ) + + # :anthropic does not support {response_format: {type: 'json_object'}}, + # but can be steered this way by adding a leading '{' as assistant. + # (This leading '{' must be prepended to response.content before parsing.) + chat.add_message( + role: :assistant, + content: '{' + ) + + response = chat.complete + + json_response = JSON.parse('{' + response.content) # rubocop:disable Style/StringConcatenation + expect(json_response).to eq({ 'result' => 8 }) + end + end + + # Providers [:openrouter, :bedrock] supports a {top_k: ...} param to remove low-probability next tokens. + CHAT_MODELS.select { |model_info| %i[openrouter bedrock].include?(model_info[:provider]) }.each do |model_info| + model = model_info[:model] + provider = model_info[:provider] + it "#{provider}/#{model} supports top_k param" do # rubocop:disable RSpec/ExampleLength + chat = RubyLLM + .chat(model: model, provider: provider) + .with_params(top_k: 5) + + chat.add_message( + role: :user, + content: 'What is the square root of 64? Answer with a JSON object with the key `result`.' + ) + + # See comment on :anthropic example above for explanation of steering the model toward a JSON object response. + chat.add_message( + role: :assistant, + content: '{' + ) + + response = chat.complete + + json_response = JSON.parse('{' + response.content) # rubocop:disable Style/StringConcatenation + expect(json_response).to eq({ 'result' => 8 }) + end + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 61ec54f29..5001b0437 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -76,6 +76,9 @@ config.filter_sensitive_data('') do |interaction| interaction.response.headers['Openai-Organization']&.first end + config.filter_sensitive_data('') do |interaction| + interaction.response.headers['Anthropic-Organization-Id']&.first + end config.filter_sensitive_data('') { |interaction| interaction.response.headers['X-Request-Id']&.first } config.filter_sensitive_data('') { |interaction| interaction.response.headers['Request-Id']&.first } config.filter_sensitive_data('') { |interaction| interaction.response.headers['Cf-Ray']&.first }