diff --git a/docs/batch_requests.md b/docs/batch_requests.md new file mode 100644 index 00000000..1c9d7c29 --- /dev/null +++ b/docs/batch_requests.md @@ -0,0 +1,90 @@ +# Batch Request Feature + +The batch request feature allows you to generate API request payloads without actually making API calls. This is useful for: + +1. **Batch Processing**: Generate multiple request payloads and send them to provider batch endpoints +2. **Testing**: Verify request payload structure without making API calls +3. **Debugging**: Inspect the exact payload that would be sent to the provider + +## Basic Usage + +```ruby +# Enable batch request mode +chat = RubyLLM.chat.for_batch_request +chat.add_message(role: :user, content: "What's 2 + 2?") + +# Returns the request payload instead of making an API call +payload = chat.complete +# => {:custom_id=>"...", :method=>"POST", :url=>"/v1/chat/completions", :body=>{...}} +``` + +## Generating Multiple Batch Requests + +```ruby +requests = [] + +3.times do |i| + chat = RubyLLM.chat.for_batch_request + chat.add_message(role: :user, content: "Question #{i + 1}") + + requests << chat.complete +end + +# Now you have an array of request payloads +# You can format them as JSONL and send to provider batch endpoints +``` + +## Provider Support + +Currently, only OpenAI supports batch requests. Other providers will raise `NotImplementedError`: + +```ruby +# OpenAI (supported) +chat = RubyLLM.chat(provider: :openai).for_batch_request +chat.add_message(role: :user, content: "Hello") +payload = chat.complete +# => { +# :custom_id=>"request-abc123", +# :method=>"POST", +# :url=>"/v1/chat/completions", +# :body=>{:model=>"gpt-4", :messages=>[...]} +# } + +# Other providers (not supported) +chat = RubyLLM.chat(provider: :anthropic).for_batch_request +chat.add_message(role: :user, content: "Hello") +chat.complete # Raises NotImplementedError +``` + +## Usage with Other Methods + +The `for_batch_request` method chains with other configuration methods: + +```ruby +chat = RubyLLM.chat + .with_model('gpt-4') + .with_temperature(0.7) + .with_tool(MyTool) + .for_batch_request + +chat.ask("Process this") +payload = chat.complete # Returns batch request payload +``` + +## Notes + +- Streaming is not supported when in batch request mode +- The batch request payload includes all configured parameters (tools, schema, temperature, etc.) +- No messages are added to the chat history when generating batch request payloads +- Providers must explicitly implement `render_payload_for_batch_request` to support this feature + +## Future Enhancements + +The remaining steps for full batch processing support (to be implemented by users): + +2. Combine multiple request payloads (typically in JSONL format) +3. Submit to provider's batch endpoint +4. Poll for batch completion status +5. Process batch results + +These steps are provider-specific and can be implemented based on your needs. \ No newline at end of file diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb index a5f70117..587d1295 100644 --- a/lib/ruby_llm/chat.rb +++ b/lib/ruby_llm/chat.rb @@ -28,6 +28,7 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n @params = {} @headers = {} @schema = nil + @batch_request = false @on = { new_message: nil, end_message: nil, @@ -111,6 +112,11 @@ def with_schema(schema, force: false) self end + def for_batch_request + @batch_request = true + self + end + def on_new_message(&block) @on[:new_message] = block self @@ -136,6 +142,20 @@ def each(&) end def complete(&) # rubocop:disable Metrics/PerceivedComplexity + # If batch_request mode is enabled, render and return the payload + if @batch_request + raise ArgumentError, 'Streaming is not supported for batch requests' if block_given? + + return @provider.render_payload_for_batch_request( + messages, + tools: @tools, + temperature: @temperature, + model: @model.id, + params: @params, + schema: @schema + ) + end + response = @provider.complete( messages, tools: @tools, diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb index ed0c2ac8..7323b36f 100644 --- a/lib/ruby_llm/provider.rb +++ b/lib/ruby_llm/provider.rb @@ -62,6 +62,11 @@ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, sc end end + def render_payload_for_batch_request(_messages, tools:, temperature:, model:, params: {}, schema: nil) # rubocop:disable Metrics/ParameterLists + raise NotImplementedError, "#{self.class.name} does not support batch requests. " \ + 'Provider must implement render_payload_for_batch_request to enable batch request generation.' + end + def list_models response = @connection.get models_url parse_list_models_response response, slug, capabilities diff --git a/lib/ruby_llm/providers/deepseek.rb b/lib/ruby_llm/providers/deepseek.rb index 41e07bfc..d12467d4 100644 --- a/lib/ruby_llm/providers/deepseek.rb +++ b/lib/ruby_llm/providers/deepseek.rb @@ -16,6 +16,12 @@ def headers } end + # DeepSeek doesn't support batch requests yet + def render_payload_for_batch_request(_messages, tools:, temperature:, model:, params: {}, schema: nil) # rubocop:disable Metrics/ParameterLists + raise NotImplementedError, 'DeepSeek does not support batch requests. ' \ + 'Batch request generation is not available for this provider.' + end + class << self def capabilities DeepSeek::Capabilities diff --git a/lib/ruby_llm/providers/gpustack.rb b/lib/ruby_llm/providers/gpustack.rb index 14ab3016..9dc43f84 100644 --- a/lib/ruby_llm/providers/gpustack.rb +++ b/lib/ruby_llm/providers/gpustack.rb @@ -19,6 +19,12 @@ def headers } end + # GPUStack doesn't support batch requests yet + def render_payload_for_batch_request(_messages, tools:, temperature:, model:, params: {}, schema: nil) # rubocop:disable Metrics/ParameterLists + raise NotImplementedError, 'GPUStack does not support batch requests. ' \ + 'Batch request generation is not available for this provider.' + end + class << self def local? true diff --git a/lib/ruby_llm/providers/mistral.rb b/lib/ruby_llm/providers/mistral.rb index 18ddc266..ac8e6634 100644 --- a/lib/ruby_llm/providers/mistral.rb +++ b/lib/ruby_llm/providers/mistral.rb @@ -18,6 +18,12 @@ def headers } end + # Mistral doesn't support batch requests yet + def render_payload_for_batch_request(_messages, tools:, temperature:, model:, params: {}, schema: nil) # rubocop:disable Metrics/ParameterLists + raise NotImplementedError, 'Mistral does not support batch requests. ' \ + 'Batch request generation is not available for this provider.' + end + class << self def capabilities Mistral::Capabilities diff --git a/lib/ruby_llm/providers/ollama.rb b/lib/ruby_llm/providers/ollama.rb index 32fc7f84..48413609 100644 --- a/lib/ruby_llm/providers/ollama.rb +++ b/lib/ruby_llm/providers/ollama.rb @@ -15,6 +15,12 @@ def headers {} end + # Ollama doesn't support batch requests yet + def render_payload_for_batch_request(_messages, tools:, temperature:, model:, params: {}, schema: nil) # rubocop:disable Metrics/ParameterLists + raise NotImplementedError, 'Ollama does not support batch requests. ' \ + 'Batch request generation is not available for this provider.' + end + class << self def configuration_requirements %i[ollama_api_base] diff --git a/lib/ruby_llm/providers/openai.rb b/lib/ruby_llm/providers/openai.rb index cca0b2cf..2d509d9e 100644 --- a/lib/ruby_llm/providers/openai.rb +++ b/lib/ruby_llm/providers/openai.rb @@ -30,6 +30,32 @@ def maybe_normalize_temperature(temperature, model_id) OpenAI::Capabilities.normalize_temperature(temperature, model_id) end + # Override to format payload according to OpenAI's batch request API + # https://platform.openai.com/docs/guides/batch + def render_payload_for_batch_request(messages, tools:, temperature:, model:, params: {}, schema: nil) # rubocop:disable Metrics/ParameterLists + normalized_temperature = maybe_normalize_temperature(temperature, model) + + payload = Utils.deep_merge( + params, + render_payload( + messages, + tools: tools, + temperature: normalized_temperature, + model: model, + stream: false, + schema: schema + ) + ) + + # Format according to OpenAI's batch request API + { + custom_id: "request-#{SecureRandom.uuid}", + method: 'POST', + url: '/v1/chat/completions', + body: payload + } + end + class << self def capabilities OpenAI::Capabilities diff --git a/lib/ruby_llm/providers/openrouter.rb b/lib/ruby_llm/providers/openrouter.rb index e8056b02..9ae0808d 100644 --- a/lib/ruby_llm/providers/openrouter.rb +++ b/lib/ruby_llm/providers/openrouter.rb @@ -16,6 +16,12 @@ def headers } end + # OpenRouter doesn't support batch requests yet + def render_payload_for_batch_request(_messages, tools:, temperature:, model:, params: {}, schema: nil) # rubocop:disable Metrics/ParameterLists + raise NotImplementedError, 'OpenRouter does not support batch requests. ' \ + 'Batch request generation is not available for this provider.' + end + class << self def configuration_requirements %i[openrouter_api_key] diff --git a/lib/ruby_llm/providers/perplexity.rb b/lib/ruby_llm/providers/perplexity.rb index 24a24ef9..41b34658 100644 --- a/lib/ruby_llm/providers/perplexity.rb +++ b/lib/ruby_llm/providers/perplexity.rb @@ -18,6 +18,12 @@ def headers } end + # Perplexity doesn't support batch requests yet + def render_payload_for_batch_request(_messages, tools:, temperature:, model:, params: {}, schema: nil) # rubocop:disable Metrics/ParameterLists + raise NotImplementedError, 'Perplexity does not support batch requests. ' \ + 'Batch request generation is not available for this provider.' + end + class << self def capabilities Perplexity::Capabilities diff --git a/spec/fixtures/vcr_cassettes/chat_batch_request_functionality_batch_request_workflow_example_can_switch_between_batch_request_and_normal_response_modes.yml b/spec/fixtures/vcr_cassettes/chat_batch_request_functionality_batch_request_workflow_example_can_switch_between_batch_request_and_normal_response_modes.yml new file mode 100644 index 00000000..6499154a --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_batch_request_functionality_batch_request_workflow_example_can_switch_between_batch_request_and_normal_response_modes.yml @@ -0,0 +1,121 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"model":"gpt-4.1-nano","messages":[{"role":"user","content":"What''s + 2 + 2?"}],"stream":false,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.4 + Authorization: + - Bearer test + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 401 + message: Unauthorized + headers: + Date: + - Tue, 12 Aug 2025 18:03:14 GMT + Content-Type: + - application/json; charset=utf-8 + Content-Length: + - '254' + Connection: + - keep-alive + Vary: + - Origin + X-Request-Id: + - "" + X-Envoy-Upstream-Service-Time: + - '2' + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - "" + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - "" + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: UTF-8 + string: | + { + "error": { + "message": "Incorrect API key provided: test. You can find your API key at https://platform.openai.com/account/api-keys.", + "type": "invalid_request_error", + "param": null, + "code": "invalid_api_key" + } + } + recorded_at: Tue, 12 Aug 2025 18:03:14 GMT +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"What''s + 2 + 2?"}]}],"stream":false,"max_tokens":8192,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.4 + X-Api-Key: + - test + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 401 + message: Unauthorized + headers: + Date: + - Tue, 12 Aug 2025 18:04:47 GMT + Content-Type: + - application/json + Content-Length: + - '86' + Connection: + - keep-alive + Cf-Ray: + - "" + X-Should-Retry: + - 'false' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + body: + encoding: UTF-8 + string: '{"type":"error","error":{"type":"authentication_error","message":"invalid + x-api-key"}}' + recorded_at: Tue, 12 Aug 2025 18:04:47 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/ruby_llm/chat_batch_request_spec.rb b/spec/ruby_llm/chat_batch_request_spec.rb new file mode 100644 index 00000000..146e0e38 --- /dev/null +++ b/spec/ruby_llm/chat_batch_request_spec.rb @@ -0,0 +1,216 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe RubyLLM::Chat do + include_context 'with configured RubyLLM' + + class WeatherTool < RubyLLM::Tool # rubocop:disable Lint/ConstantDefinitionInBlock,RSpec/LeakyConstantDeclaration + description 'Gets current weather for a location' + param :location, desc: 'City name or location' + + def execute(location:) + "Current weather in #{location}: 15°C, partly cloudy" + end + end + + describe 'batch request functionality' do + describe '#for_batch_request' do + it 'enables batch request mode' do + chat = RubyLLM.chat + result = chat.for_batch_request + expect(result).to eq(chat) # returns self for chaining + end + + it 'changes behavior of complete method' do + chat = RubyLLM.chat.for_batch_request + chat.add_message(role: :user, content: 'test') + + # Should return a hash payload instead of making API call + result = chat.complete + expect(result).to be_a(Hash) + end + end + + describe '#complete with batch_request format' do + CHAT_MODELS.each do |model_info| + model = model_info[:model] + provider = model_info[:provider] + + context "with #{provider}/#{model}" do + it 'returns a batch request payload instead of making an API call' do + chat = RubyLLM.chat(model: model, provider: provider) + .for_batch_request + + chat.add_message(role: :user, content: "What's 2 + 2?") + + # Only OpenAI currently supports batch requests + if provider == :openai + payload = chat.complete + + expect(payload).to be_a(Hash) + expect(payload[:custom_id]).to be_present + expect(payload[:method]).to eq('POST') + expect(payload[:url]).to eq('/v1/chat/completions') + expect(payload[:body]).to be_present + expect(payload[:body][:model]).to eq(model) + + # Verify no message was added (since no actual response was received) + expect(chat.messages.count).to eq(1) + expect(chat.messages.last.role).to eq(:user) + else + # Other providers should raise NotImplementedError + expect { chat.complete }.to raise_error( + NotImplementedError, + /does not support batch requests/ + ) + end + end + + it 'includes tools in the batch request payload when tools are configured' do + # Skip for non-OpenAI providers since they don't support batch requests + skip "#{provider} doesn't support batch requests" unless provider == :openai + + # Skip if provider doesn't support functions or model is not in registry + begin + model_info = RubyLLM::Models.find(model) + skip "#{provider} doesn't support functions" unless model_info.supports_functions? + rescue RubyLLM::ModelNotFoundError + skip "Model #{model} not found in registry" + end + + chat = RubyLLM.chat(model: model, provider: provider) + .for_batch_request + .with_tool(WeatherTool) + + chat.add_message(role: :user, content: "What's the weather in Tokyo?") + + payload = chat.complete + + expect(payload).to be_a(Hash) + expect(payload[:body]).to be_present + + # The body should contain tool definitions for OpenAI + body = payload[:body] + expect(body[:tools] || body[:functions]).to be_present + end + + it 'includes schema in the batch request payload when schema is configured' do + # Skip for non-OpenAI providers since they don't support batch requests + skip "#{provider} doesn't support batch requests" unless provider == :openai + + # Skip if provider doesn't support structured output or model is not in registry + begin + model_info = RubyLLM::Models.find(model) + skip "#{provider} doesn't support structured output" unless model_info.structured_output? + rescue RubyLLM::ModelNotFoundError + skip "Model #{model} not found in registry" + end + + schema = { + type: 'object', + properties: { + answer: { type: 'integer' }, + explanation: { type: 'string' } + }, + required: %w[answer explanation] + } + + chat = RubyLLM.chat(model: model, provider: provider) + .for_batch_request + .with_schema(schema) + + chat.add_message(role: :user, content: "What's 2 + 2?") + + payload = chat.complete + + expect(payload).to be_a(Hash) + expect(payload[:body]).to be_present + + # The body should contain schema information for OpenAI + body = payload[:body] + expect(body[:response_format]).to be_present + end + + it 'raises error when attempting to stream with batch_request format' do + chat = RubyLLM.chat(model: model, provider: provider) + .for_batch_request + + chat.add_message(role: :user, content: 'Test message') + + expect { chat.complete { |chunk| puts chunk } }.to raise_error( + ArgumentError, + 'Streaming is not supported for batch requests' + ) + end + + it 'includes custom parameters in the batch request payload' do + # Skip for non-OpenAI providers since they don't support batch requests + skip "#{provider} doesn't support batch requests" unless provider == :openai + + chat = RubyLLM.chat(model: model, provider: provider) + .for_batch_request + .with_params(max_tokens: 100, top_p: 0.9) + .with_temperature(0.5) + + chat.add_message(role: :user, content: 'Test message') + + payload = chat.complete + + expect(payload).to be_a(Hash) + body = payload[:body] + + # Check that parameters are present in the payload + expect(body).to be_present + expect(body[:messages]).to be_present + expect(body[:max_tokens]).to eq(100) + expect(body[:top_p]).to eq(0.9) + expect(body[:temperature]).to eq(0.5) + end + end + end + end + + describe 'batch request workflow example' do + it 'demonstrates generating multiple batch requests' do + requests = [] + + # Generate multiple batch request payloads + 3.times do |i| + chat = RubyLLM.chat.for_batch_request + chat.add_message(role: :user, content: "Question #{i + 1}: What's #{i + 1} + #{i + 1}?") + + payload = chat.complete + requests << payload + end + + expect(requests).to be_an(Array) + expect(requests.length).to eq(3) + + requests.each_with_index do |request, i| + expect(request).to be_a(Hash) + expect(request[:body]).to be_present + + # Verify each request has the correct question + messages = request[:body][:messages] + expect(messages).to be_present + expect(messages.last[:content]).to include("Question #{i + 1}") + end + end + + it 'demonstrates batch request mode' do + # Create a chat in batch request mode + chat = RubyLLM.chat.for_batch_request + chat.add_message(role: :user, content: "What's 2 + 2?") + + # Generate batch request payload + payload = chat.complete + expect(payload).to be_a(Hash) + expect(chat.messages.count).to eq(1) # Only user message (no response added) + + # NOTE: In production, you'd use this payload with the provider's batch API + # For normal API calls, create a new chat without for_batch_request + end + end + end +end