diff --git a/docs/_core_features/chat.md b/docs/_core_features/chat.md index 16fb3442..bee16717 100644 --- a/docs/_core_features/chat.md +++ b/docs/_core_features/chat.md @@ -497,6 +497,30 @@ puts "Total Conversation Tokens: #{total_conversation_tokens}" Refer to the [Working with Models Guide]({% link _advanced/models.md %}) for details on accessing model-specific pricing. +## Prompt Caching + +### Enabling +For Anthropic models, you can opt-in to prompt caching which is documented more fully in the [Anthropic API docs](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching). + +Enable prompt caching using the `cache_prompts` method on your chat instance: + +```ruby +chat = RubyLLM.chat(model: 'claude-3-5-haiku-20241022') + +# Enable caching for different types of content +chat.cache_prompts( + system: true, # Cache system instructions + user: true, # Cache user messages + tools: true # Cache tool definitions +) +``` + +### Checking cached token counts +For Anthropic, OpenAI, and Gemini, you can see the number of tokens read from cache by looking at the `cached_tokens` property on the output messages. + +For Anthropic, you can see the tokens written to cache by looking at the `cache_creation_tokens` property. + + ## Chat Event Handlers You can register blocks to be called when certain events occur during the chat lifecycle. This is particularly useful for UI updates, logging, analytics, or building real-time chat interfaces. diff --git a/lib/ruby_llm/active_record/acts_as.rb b/lib/ruby_llm/active_record/acts_as.rb index ee919672..fe888d18 100644 --- a/lib/ruby_llm/active_record/acts_as.rb +++ b/lib/ruby_llm/active_record/acts_as.rb @@ -148,6 +148,11 @@ def with_schema(...) self end + def cache_prompts(...) + to_llm.cache_prompts(...) + self + end + def on_new_message(&block) to_llm diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb index 7131aeb7..01cfc15a 100644 --- a/lib/ruby_llm/chat.rb +++ b/lib/ruby_llm/chat.rb @@ -25,6 +25,7 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n @temperature = 0.7 @messages = [] @tools = {} + @cache_prompts = { system: false, user: false, tools: false } @params = {} @headers = {} @schema = nil @@ -127,12 +128,18 @@ def each(&) messages.each(&) end + def cache_prompts(system: false, user: false, tools: false) + @cache_prompts = { system: system, user: user, tools: tools } + self + end + def complete(&) # rubocop:disable Metrics/PerceivedComplexity response = @provider.complete( messages, tools: @tools, temperature: @temperature, model: @model.id, + cache_prompts: @cache_prompts.dup, params: @params, headers: @headers, schema: @schema, diff --git a/lib/ruby_llm/message.rb b/lib/ruby_llm/message.rb index 3e9fcfd4..3c7fbbdb 100644 --- a/lib/ruby_llm/message.rb +++ b/lib/ruby_llm/message.rb @@ -7,7 +7,8 @@ module RubyLLM class Message ROLES = %i[system user assistant tool].freeze - attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :raw + attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :raw, + :cached_tokens, :cache_creation_tokens attr_writer :content def initialize(options = {}) @@ -18,6 +19,8 @@ def initialize(options = {}) @output_tokens = options[:output_tokens] @model_id = options[:model_id] @tool_call_id = options[:tool_call_id] + @cached_tokens = options[:cached_tokens] + @cache_creation_tokens = options[:cache_creation_tokens] @raw = options[:raw] ensure_valid_role @@ -51,7 +54,9 @@ def to_h tool_call_id: tool_call_id, input_tokens: input_tokens, output_tokens: output_tokens, - model_id: model_id + model_id: model_id, + cache_creation_tokens: cache_creation_tokens, + cached_tokens: cached_tokens }.compact end diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb index ed0c2ac8..cad215f6 100644 --- a/lib/ruby_llm/provider.rb +++ b/lib/ruby_llm/provider.rb @@ -40,7 +40,8 @@ def configuration_requirements self.class.configuration_requirements end - def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists + def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, # rubocop:disable Metrics/ParameterLists + cache_prompts: { system: false, user: false, tools: false }, &) normalized_temperature = maybe_normalize_temperature(temperature, model) payload = Utils.deep_merge( @@ -50,6 +51,7 @@ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, sc tools: tools, temperature: normalized_temperature, model: model, + cache_prompts: cache_prompts, stream: block_given?, schema: schema ) diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb index 64cd764b..407416bf 100644 --- a/lib/ruby_llm/providers/anthropic/chat.rb +++ b/lib/ruby_llm/providers/anthropic/chat.rb @@ -11,12 +11,14 @@ def completion_url '/v1/messages' end - def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument + def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument + cache_prompts: { system: false, user: false, tools: false }) system_messages, chat_messages = separate_messages(messages) - system_content = build_system_content(system_messages) + system_content = build_system_content(system_messages, cache: cache_prompts[:system]) - build_base_payload(chat_messages, model, stream).tap do |payload| - add_optional_fields(payload, system_content:, tools:, temperature:) + build_base_payload(chat_messages, model, stream, cache: cache_prompts[:user]).tap do |payload| + add_optional_fields(payload, system_content:, tools:, temperature:, + cache_tools: cache_prompts[:tools]) end end @@ -24,28 +26,34 @@ def separate_messages(messages) messages.partition { |msg| msg.role == :system } end - def build_system_content(system_messages) - if system_messages.length > 1 - RubyLLM.logger.warn( - "Anthropic's Claude implementation only supports a single system message. " \ - 'Multiple system messages will be combined into one.' - ) + def build_system_content(system_messages, cache: false) + system_messages.flat_map.with_index do |msg, idx| + message_cache = cache if idx == system_messages.size - 1 + format_system_message(msg, cache: message_cache) end - - system_messages.map(&:content).join("\n\n") end - def build_base_payload(chat_messages, model, stream) + def build_base_payload(chat_messages, model, stream, cache: false) + messages = chat_messages.map.with_index do |msg, idx| + message_cache = cache if idx == chat_messages.size - 1 + format_message(msg, cache: message_cache) + end + { model: model, - messages: chat_messages.map { |msg| format_message(msg) }, + messages:, stream: stream, max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096 } end - def add_optional_fields(payload, system_content:, tools:, temperature:) - payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any? + def add_optional_fields(payload, system_content:, tools:, temperature:, cache_tools: false) + if tools.any? + tool_definitions = tools.values.map { |t| Tools.function_for(t) } + tool_definitions[-1][:cache_control] = { type: 'ephemeral' } if cache_tools + payload[:tools] = tool_definitions + end + payload[:system] = system_content unless system_content.empty? payload[:temperature] = temperature unless temperature.nil? end @@ -73,24 +81,30 @@ def build_message(data, content, tool_use_blocks, response) input_tokens: data.dig('usage', 'input_tokens'), output_tokens: data.dig('usage', 'output_tokens'), model_id: data['model'], + cache_creation_tokens: data.dig('usage', 'cache_creation_input_tokens'), + cached_tokens: data.dig('usage', 'cache_read_input_tokens'), raw: response ) end - def format_message(msg) + def format_message(msg, cache: false) if msg.tool_call? Tools.format_tool_call(msg) elsif msg.tool_result? Tools.format_tool_result(msg) else - format_basic_message(msg) + format_basic_message(msg, cache:) end end - def format_basic_message(msg) + def format_system_message(msg, cache: false) + Media.format_content(msg.content, cache:) + end + + def format_basic_message(msg, cache: false) { role: convert_role(msg.role), - content: Media.format_content(msg.content) + content: Media.format_content(msg.content, cache:) } end diff --git a/lib/ruby_llm/providers/anthropic/media.rb b/lib/ruby_llm/providers/anthropic/media.rb index d4a35324..7789f4b2 100644 --- a/lib/ruby_llm/providers/anthropic/media.rb +++ b/lib/ruby_llm/providers/anthropic/media.rb @@ -7,13 +7,13 @@ class Anthropic module Media module_function - def format_content(content) + def format_content(content, cache: false) # Convert Hash/Array back to JSON string for API - return [format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array) - return [format_text(content)] unless content.is_a?(Content) + return [format_text(content.to_json, cache:)] if content.is_a?(Hash) || content.is_a?(Array) + return [format_text(content, cache:)] unless content.is_a?(Content) parts = [] - parts << format_text(content.text) if content.text + parts << format_text(content.text, cache:) if content.text content.attachments.each do |attachment| case attachment.type @@ -31,60 +31,84 @@ def format_content(content) parts end - def format_text(text) - { - type: 'text', - text: text - } + def format_text(text, cache: false) + with_cache_control( + { + type: 'text', + text: text + }, + cache: + ) end - def format_image(image) + def format_image(image, cache: false) if image.url? - { - type: 'image', - source: { - type: 'url', - url: image.source - } - } + with_cache_control( + { + type: 'image', + source: { + type: 'url', + url: image.source + } + }, + cache: + ) else - { - type: 'image', - source: { - type: 'base64', - media_type: image.mime_type, - data: image.encoded - } - } + with_cache_control( + { + type: 'image', + source: { + type: 'base64', + media_type: image.mime_type, + data: image.encoded + } + }, + cache: + ) end end - def format_pdf(pdf) + def format_pdf(pdf, cache: false) if pdf.url? - { - type: 'document', - source: { - type: 'url', - url: pdf.source - } - } + with_cache_control( + { + type: 'document', + source: { + type: 'url', + url: pdf.source + } + }, + cache: + ) else - { - type: 'document', - source: { - type: 'base64', - media_type: pdf.mime_type, - data: pdf.encoded - } - } + with_cache_control( + { + type: 'document', + source: { + type: 'base64', + media_type: pdf.mime_type, + data: pdf.encoded + } + }, + cache: + ) end end - def format_text_file(text_file) - { - type: 'text', - text: Utils.format_text_file_for_llm(text_file) - } + def format_text_file(text_file, cache: false) + with_cache_control( + { + type: 'text', + text: Utils.format_text_file_for_llm(text_file) + }, + cache: + ) + end + + def with_cache_control(hash, cache: false) + return hash unless cache + + hash.merge(cache_control: { type: 'ephemeral' }) end end end diff --git a/lib/ruby_llm/providers/anthropic/models.rb b/lib/ruby_llm/providers/anthropic/models.rb index 31066cd7..b3481851 100644 --- a/lib/ruby_llm/providers/anthropic/models.rb +++ b/lib/ruby_llm/providers/anthropic/models.rb @@ -42,6 +42,14 @@ def extract_input_tokens(data) def extract_output_tokens(data) data.dig('message', 'usage', 'output_tokens') || data.dig('usage', 'output_tokens') end + + def extract_cached_tokens(data) + data.dig('message', 'usage', 'cache_read_input_tokens') + end + + def extract_cache_creation_tokens(data) + data.dig('message', 'usage', 'cache_creation_input_tokens') + end end end end diff --git a/lib/ruby_llm/providers/anthropic/streaming.rb b/lib/ruby_llm/providers/anthropic/streaming.rb index 93b6fdfa..5fed1471 100644 --- a/lib/ruby_llm/providers/anthropic/streaming.rb +++ b/lib/ruby_llm/providers/anthropic/streaming.rb @@ -18,6 +18,8 @@ def build_chunk(data) content: data.dig('delta', 'text'), input_tokens: extract_input_tokens(data), output_tokens: extract_output_tokens(data), + cached_tokens: extract_cached_tokens(data), + cache_creation_tokens: extract_cache_creation_tokens(data), tool_calls: extract_tool_calls(data) ) end diff --git a/lib/ruby_llm/providers/bedrock/chat.rb b/lib/ruby_llm/providers/bedrock/chat.rb index 94655d11..f8e83156 100644 --- a/lib/ruby_llm/providers/bedrock/chat.rb +++ b/lib/ruby_llm/providers/bedrock/chat.rb @@ -17,20 +17,20 @@ def sync_response(connection, payload, additional_headers = {}) Anthropic::Chat.parse_completion_response response end - def format_message(msg) + def format_message(msg, cache: false) if msg.tool_call? Anthropic::Tools.format_tool_call(msg) elsif msg.tool_result? Anthropic::Tools.format_tool_result(msg) else - format_basic_message(msg) + format_basic_message(msg, cache:) end end - def format_basic_message(msg) + def format_basic_message(msg, cache: false) { role: Anthropic::Chat.convert_role(msg.role), - content: Media.format_content(msg.content) + content: Media.format_content(msg.content, cache:) } end @@ -40,22 +40,33 @@ def completion_url "model/#{@model_id}/invoke" end - def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists + def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists + cache_prompts: { system: false, user: false, tools: false }) # Hold model_id in instance variable for use in completion_url and stream_url @model_id = model system_messages, chat_messages = Anthropic::Chat.separate_messages(messages) - system_content = Anthropic::Chat.build_system_content(system_messages) + system_content = Anthropic::Chat.build_system_content(system_messages, cache: cache_prompts[:system]) - build_base_payload(chat_messages, model).tap do |payload| - Anthropic::Chat.add_optional_fields(payload, system_content:, tools:, temperature:) + build_base_payload(chat_messages, model, cache: cache_prompts[:user]).tap do |payload| + Anthropic::Chat.add_optional_fields( + payload, + system_content:, + tools:, + temperature:, + cache_tools: cache_prompts[:tools] + ) end end - def build_base_payload(chat_messages, model) + def build_base_payload(chat_messages, model, cache: false) + messages = chat_messages.map.with_index do |msg, idx| + message_cache = cache if idx == chat_messages.size - 1 + format_message(msg, cache: message_cache) + end { anthropic_version: 'bedrock-2023-05-31', - messages: chat_messages.map { |msg| format_message(msg) }, + messages: messages, max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096 } end diff --git a/lib/ruby_llm/providers/bedrock/media.rb b/lib/ruby_llm/providers/bedrock/media.rb index cef5c8e4..d911ada9 100644 --- a/lib/ruby_llm/providers/bedrock/media.rb +++ b/lib/ruby_llm/providers/bedrock/media.rb @@ -10,22 +10,22 @@ module Media module_function - def format_content(content) + def format_content(content, cache: false) # Convert Hash/Array back to JSON string for API - return [Anthropic::Media.format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array) - return [Anthropic::Media.format_text(content)] unless content.is_a?(Content) + return [Anthropic::Media.format_text(content.to_json, cache:)] if content.is_a?(Hash) || content.is_a?(Array) + return [Anthropic::Media.format_text(content, cache:)] unless content.is_a?(Content) parts = [] - parts << Anthropic::Media.format_text(content.text) if content.text + parts << Anthropic::Media.format_text(content.text, cache:) if content.text content.attachments.each do |attachment| case attachment.type when :image - parts << format_image(attachment) + parts << format_image(attachment, cache:) when :pdf - parts << format_pdf(attachment) + parts << format_pdf(attachment, cache:) when :text - parts << Anthropic::Media.format_text_file(attachment) + parts << Anthropic::Media.format_text_file(attachment, cache:) else raise UnsupportedAttachmentError, attachment.type end @@ -34,26 +34,38 @@ def format_content(content) parts end - def format_image(image) - { - type: 'image', - source: { - type: 'base64', - media_type: image.mime_type, - data: image.encoded - } - } + def format_image(image, cache: false) + with_cache_control( + { + type: 'image', + source: { + type: 'base64', + media_type: image.mime_type, + data: image.encoded + } + }, + cache: + ) end - def format_pdf(pdf) - { - type: 'document', - source: { - type: 'base64', - media_type: pdf.mime_type, - data: pdf.encoded - } - } + def format_pdf(pdf, cache: false) + with_cache_control( + { + type: 'document', + source: { + type: 'base64', + media_type: pdf.mime_type, + data: pdf.encoded + } + }, + cache: + ) + end + + def with_cache_control(hash, cache: false) + return hash unless cache + + hash.merge(cache_control: { type: 'ephemeral' }) end end end diff --git a/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb b/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb index 77a15a77..59ec14e3 100644 --- a/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +++ b/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb @@ -39,6 +39,14 @@ def extract_output_tokens(data) data.dig('message', 'usage', 'output_tokens') || data.dig('usage', 'output_tokens') end + def extract_cached_tokens(data) + data.dig('message', 'usage', 'cache_read_input_tokens') + end + + def extract_cache_creation_tokens(data) + data.dig('message', 'usage', 'cache_creation_input_tokens') + end + private def extract_content_by_type(data) diff --git a/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb b/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb index fb901435..ba168b11 100644 --- a/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +++ b/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb @@ -71,6 +71,8 @@ def extract_chunk_attributes(data) content: extract_streaming_content(data), input_tokens: extract_input_tokens(data), output_tokens: extract_output_tokens(data), + cached_tokens: extract_cached_tokens(data), + cache_creation_tokens: extract_cache_creation_tokens(data), tool_calls: extract_tool_calls(data) } end diff --git a/lib/ruby_llm/providers/gemini/chat.rb b/lib/ruby_llm/providers/gemini/chat.rb index 5254722a..1e28d98c 100644 --- a/lib/ruby_llm/providers/gemini/chat.rb +++ b/lib/ruby_llm/providers/gemini/chat.rb @@ -11,7 +11,7 @@ def completion_url "models/#{@model}:generateContent" end - def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument + def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument @model = model # Store model for completion_url/stream_url payload = { contents: format_messages(messages), @@ -81,6 +81,7 @@ def parse_completion_response(response) tool_calls: tool_calls, input_tokens: data.dig('usageMetadata', 'promptTokenCount'), output_tokens: calculate_output_tokens(data), + cached_tokens: data.dig('usageMetadata', 'cacheTokensDetails', 0, 'tokenCount') || 0, model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0], raw: response ) diff --git a/lib/ruby_llm/providers/gemini/streaming.rb b/lib/ruby_llm/providers/gemini/streaming.rb index 8aa630b2..6d41a6f0 100644 --- a/lib/ruby_llm/providers/gemini/streaming.rb +++ b/lib/ruby_llm/providers/gemini/streaming.rb @@ -16,6 +16,7 @@ def build_chunk(data) content: extract_content(data), input_tokens: extract_input_tokens(data), output_tokens: extract_output_tokens(data), + cached_tokens: extract_cached_tokens(data), tool_calls: extract_tool_calls(data) ) end @@ -48,6 +49,10 @@ def extract_output_tokens(data) total.positive? ? total : nil end + def extract_cached_tokens(data) + data.dig('usageMetadata', 'cachedContentTokenCount') + end + def parse_streaming_error(data) error_data = JSON.parse(data) [error_data['error']['code'], error_data['error']['message']] diff --git a/lib/ruby_llm/providers/mistral/chat.rb b/lib/ruby_llm/providers/mistral/chat.rb index 74d508d1..520c7510 100644 --- a/lib/ruby_llm/providers/mistral/chat.rb +++ b/lib/ruby_llm/providers/mistral/chat.rb @@ -13,7 +13,7 @@ def format_role(role) end # rubocop:disable Metrics/ParameterLists - def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) + def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Metrics/ParameterLists payload = super # Mistral doesn't support stream_options payload.delete(:stream_options) diff --git a/lib/ruby_llm/providers/openai/chat.rb b/lib/ruby_llm/providers/openai/chat.rb index 9ed7e170..fc6bf633 100644 --- a/lib/ruby_llm/providers/openai/chat.rb +++ b/lib/ruby_llm/providers/openai/chat.rb @@ -11,7 +11,7 @@ def completion_url module_function - def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists + def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Lint/UnusedMethodArgument, Metrics/ParameterLists payload = { model: model, messages: format_messages(messages), @@ -56,6 +56,7 @@ def parse_completion_response(response) tool_calls: parse_tool_calls(message_data['tool_calls']), input_tokens: data['usage']['prompt_tokens'], output_tokens: data['usage']['completion_tokens'], + cached_tokens: data.dig('usage', 'prompt_tokens_details', 'cached_tokens'), model_id: data['model'], raw: response ) diff --git a/lib/ruby_llm/providers/openai/streaming.rb b/lib/ruby_llm/providers/openai/streaming.rb index c3932ec3..d77dbe63 100644 --- a/lib/ruby_llm/providers/openai/streaming.rb +++ b/lib/ruby_llm/providers/openai/streaming.rb @@ -18,7 +18,8 @@ def build_chunk(data) content: data.dig('choices', 0, 'delta', 'content'), tool_calls: parse_tool_calls(data.dig('choices', 0, 'delta', 'tool_calls'), parse_arguments: false), input_tokens: data.dig('usage', 'prompt_tokens'), - output_tokens: data.dig('usage', 'completion_tokens') + output_tokens: data.dig('usage', 'completion_tokens'), + cached_tokens: data.dig('usage', 'cached_tokens') ) end diff --git a/lib/ruby_llm/stream_accumulator.rb b/lib/ruby_llm/stream_accumulator.rb index 6d2715a5..01b47fa5 100644 --- a/lib/ruby_llm/stream_accumulator.rb +++ b/lib/ruby_llm/stream_accumulator.rb @@ -12,6 +12,8 @@ def initialize @tool_calls = {} @input_tokens = 0 @output_tokens = 0 + @cached_tokens = 0 + @cache_creation_tokens = 0 @latest_tool_call_id = nil end @@ -37,6 +39,8 @@ def to_message(response) tool_calls: tool_calls_from_stream, input_tokens: @input_tokens.positive? ? @input_tokens : nil, output_tokens: @output_tokens.positive? ? @output_tokens : nil, + cached_tokens: @cached_tokens.positive? ? @cached_tokens : nil, + cache_creation_tokens: @cache_creation_tokens.positive? ? @cache_creation_tokens : nil, raw: response ) end @@ -92,6 +96,8 @@ def find_tool_call(tool_call_id) def count_tokens(chunk) @input_tokens = chunk.input_tokens if chunk.input_tokens @output_tokens = chunk.output_tokens if chunk.output_tokens + @cached_tokens = chunk.cached_tokens if chunk.cached_tokens + @cache_creation_tokens = chunk.cache_creation_tokens if chunk.cache_creation_tokens end end end diff --git a/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_caching_allows_prompt_caching.yml b/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_caching_allows_prompt_caching.yml new file mode 100644 index 00000000..0c18aa85 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_caching_allows_prompt_caching.yml @@ -0,0 +1,80 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Hello","cache_control":{"type":"ephemeral"}}]}],"stream":false,"max_tokens":8192,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 15 Aug 2025 21:07:10 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-15T21:07:10Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-15T21:07:10Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-15T21:07:10Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '480000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-15T21:07:10Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01GEK45t8NmPRgvVjrHWCmwU","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"Hi + there! How are you doing today? Is there anything I can help you with?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":8,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":21,"service_tier":"standard"}}' + recorded_at: Fri, 15 Aug 2025 21:07:10 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_anthropic_claude-3-5-haiku-20241022_successfully_uses_the_system_prompt.yml b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_anthropic_claude-3-5-haiku-20241022_successfully_uses_the_system_prompt.yml index 950ece1f..6087db48 100644 --- a/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_anthropic_claude-3-5-haiku-20241022_successfully_uses_the_system_prompt.yml +++ b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_anthropic_claude-3-5-haiku-20241022_successfully_uses_the_system_prompt.yml @@ -35,27 +35,27 @@ http_interactions: Connection: - keep-alive Anthropic-Ratelimit-Input-Tokens-Limit: - - '100000' + - '50000' Anthropic-Ratelimit-Input-Tokens-Remaining: - - '100000' + - '50000' Anthropic-Ratelimit-Input-Tokens-Reset: - '2025-08-10T13:31:30Z' Anthropic-Ratelimit-Output-Tokens-Limit: - - '20000' + - '10000' Anthropic-Ratelimit-Output-Tokens-Remaining: - - '20000' + - '10000' Anthropic-Ratelimit-Output-Tokens-Reset: - '2025-08-10T13:31:33Z' Anthropic-Ratelimit-Requests-Limit: - - '1000' + - '50' Anthropic-Ratelimit-Requests-Remaining: - - '999' + - '49' Anthropic-Ratelimit-Requests-Reset: - '2025-08-10T13:31:30Z' Anthropic-Ratelimit-Tokens-Limit: - - '120000' + - '60000' Anthropic-Ratelimit-Tokens-Remaining: - - '120000' + - '60000' Anthropic-Ratelimit-Tokens-Reset: - '2025-08-10T13:31:30Z' Request-Id: diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_multiple_caching_types_handles_multiple_caching_types_together.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_multiple_caching_types_handles_multiple_caching_types_together.yml new file mode 100644 index 00000000..6a8eb59b --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_multiple_caching_types_handles_multiple_caching_types_together.yml @@ -0,0 +1,175 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"\n\nBased + on the above, tell me about Ruby","cache_control":{"type":"ephemeral"}}]}],"stream":false,"max_tokens":8192,"tools":[{"name":"describe_ruby_dev","description":"","input_schema":{"type":"object","properties":{},"required":[]},"cache_control":{"type":"ephemeral"}}],"system":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:23:09 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '398000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:23:08Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:23:09Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:23:07Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '478000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:23:08Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01BDTerm8ELpf9Lh3a8v8bh1","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I''ll + help you get information about Ruby development by using the available tool."},{"type":"tool_use","id":"toolu_01NSq4nB43eJJUAMzoB9DQWv","name":"describe_ruby_dev","input":{}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":8514,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":8514,"ephemeral_1h_input_tokens":0},"output_tokens":54,"service_tier":"standard"}}' + recorded_at: Thu, 14 Aug 2025 16:23:09 GMT +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"\n\nBased + on the above, tell me about Ruby"}]},{"role":"assistant","content":[{"type":"text","text":"I''ll + help you get information about Ruby development by using the available tool."},{"type":"tool_use","id":"toolu_01NSq4nB43eJJUAMzoB9DQWv","name":"describe_ruby_dev","input":{}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_01NSq4nB43eJJUAMzoB9DQWv","content":"Ruby + is a great language for building web applications."}]}],"stream":false,"max_tokens":8192,"tools":[{"name":"describe_ruby_dev","description":"","input_schema":{"type":"object","properties":{},"required":[]},"cache_control":{"type":"ephemeral"}}],"system":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:23:15 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '395000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:23:11Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:23:15Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:23:09Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '475000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:23:11Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01JfsgKwnpUj5jL89fSETwdE","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"Ruby + is a dynamic, object-oriented programming language known for its simplicity + and readability. Here are some key points about Ruby:\n\n1. Created by Yukihiro + Matsumoto (often called \"Matz\") in 1995\n2. Designed to be programmer-friendly + with a focus on human-readable syntax\n3. Particularly popular for web development, + especially with the Ruby on Rails framework\n4. Supports multiple programming + paradigms, including object-oriented, functional, and imperative programming\n5. + Known for its elegant and concise code\n6. Has a strong standard library and + a vibrant ecosystem of gems (libraries)\n7. Commonly used for:\n - Web development\n - + Scripting\n - Automation\n - Prototyping\n - DevOps tools\n\nSome popular + frameworks and tools in the Ruby ecosystem include:\n- Ruby on Rails (web + application framework)\n- Sinatra (lightweight web framework)\n- RSpec (testing + framework)\n- Bundler (dependency management)\n- Rake (build automation)\n\nRuby + emphasizes the principle of \"developer happiness\" and follows the philosophy + that there should be more than one way to do something, giving developers + flexibility in their coding approach."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":2953,"cache_creation_input_tokens":0,"cache_read_input_tokens":5639,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":271,"service_tier":"standard"}}' + recorded_at: Thu, 14 Aug 2025 16:23:15 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_streaming_reports_cached_tokens.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_streaming_reports_cached_tokens.yml new file mode 100644 index 00000000..e75f07e9 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_streaming_reports_cached_tokens.yml @@ -0,0 +1,236 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"\n\nCount + from 1 to 3","cache_control":{"type":"ephemeral"}}]}],"stream":true,"max_tokens":8192,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:24:19 GMT + Content-Type: + - text/event-stream; charset=utf-8 + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Cf-Ray: + - "" + Cache-Control: + - no-cache + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '398000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:24:19Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:24:18Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:24:18Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '478000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:24:18Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + body: + encoding: UTF-8 + string: |+ + event: message_start + data: {"type":"message_start","message":{"id":"msg_01ESjJKDSNtD5bt6jcqvxp9e","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":2744,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":2744,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}} } + + event: content_block_start + data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} } + + event: ping + data: {"type": "ping"} + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Here"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"'s"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" counting"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" from 1 to"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" 3:\n\n1"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n2\n3"} } + + event: content_block_stop + data: {"type":"content_block_stop","index":0 } + + event: message_delta + data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":20} } + + event: message_stop + data: {"type":"message_stop" } + + recorded_at: Thu, 14 Aug 2025 16:24:19 GMT +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"\n\nCount + from 1 to 3"}]},{"role":"assistant","content":[{"type":"text","text":"Here''s + counting from 1 to 3:\n\n1\n2\n3"}]},{"role":"user","content":[{"type":"text","text":"\n\nCount + from 1 to 3","cache_control":{"type":"ephemeral"}}]}],"stream":true,"max_tokens":8192,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:24:21 GMT + Content-Type: + - text/event-stream; charset=utf-8 + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Cf-Ray: + - "" + Cache-Control: + - no-cache + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '396000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:24:20Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:24:19Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:24:20Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '476000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:24:19Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + body: + encoding: UTF-8 + string: |+ + event: message_start + data: {"type":"message_start","message":{"id":"msg_01AgYaKnVSNp9SbL9xW9g4c3","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":2764,"cache_read_input_tokens":2744,"cache_creation":{"ephemeral_5m_input_tokens":2764,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}} } + + event: content_block_start + data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} } + + event: ping + data: {"type": "ping"} + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Here"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"'s"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" counting"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" from 1 to"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" 3:\n\n1"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n2\n3"} } + + event: content_block_stop + data: {"type":"content_block_stop","index":0 } + + event: message_delta + data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":20}} + + event: message_stop + data: {"type":"message_stop" } + + recorded_at: Thu, 14 Aug 2025 16:24:21 GMT +recorded_with: VCR 6.3.1 +... diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_system_message_caching_adds_cache_control_to_the_last_system_message_when_system_caching_is_requested.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_system_message_caching_adds_cache_control_to_the_last_system_message_when_system_caching_is_requested.yml new file mode 100644 index 00000000..0adde294 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_system_message_caching_adds_cache_control_to_the_last_system_message_when_system_caching_is_requested.yml @@ -0,0 +1,174 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"What + are the key principles you follow?"}]}],"stream":false,"max_tokens":8192,"system":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:23:56 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '399000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:23:54Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:23:56Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:23:53Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '479000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:23:54Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01Fd5rjy7p2WRgRtdjyxnVHG","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I + want to be direct and transparent with you. My key principles include being + helpful, honest, avoiding harm, protecting individual privacy, and being respectful. + I aim to give accurate information, acknowledge when I''m uncertain, and not + pretend to have capabilities I don''t. I won''t help with anything illegal + or dangerous, and I try to provide balanced, nuanced perspectives."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":15,"cache_creation_input_tokens":2732,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":2732,"ephemeral_1h_input_tokens":0},"output_tokens":79,"service_tier":"standard"}}' + recorded_at: Thu, 14 Aug 2025 16:23:56 GMT +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"What + are the key principles you follow?"}]},{"role":"assistant","content":[{"type":"text","text":"I + want to be direct and transparent with you. My key principles include being + helpful, honest, avoiding harm, protecting individual privacy, and being respectful. + I aim to give accurate information, acknowledge when I''m uncertain, and not + pretend to have capabilities I don''t. I won''t help with anything illegal + or dangerous, and I try to provide balanced, nuanced perspectives."}]},{"role":"user","content":[{"type":"text","text":"What + are the key principles you follow?"}]}],"stream":false,"max_tokens":8192,"system":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:23:58 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '399000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:23:57Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:23:58Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:23:56Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '479000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:23:57Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01W75cVDRb1BVmcCYgaRMi3H","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I + aim to be helpful, honest, and ethical. My key principles include:\n\n1. Being + truthful and transparent\n2. Protecting individual privacy \n3. Avoiding potential + harm\n4. Respecting human rights\n5. Providing balanced, factual information\n6. + Acknowledging the limits of my knowledge\n7. Maintaining appropriate boundaries\n8. + Declining requests that could be unethical or dangerous\n\nI strive to be + a responsible AI assistant that supports human wellbeing."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":105,"cache_creation_input_tokens":0,"cache_read_input_tokens":2732,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":109,"service_tier":"standard"}}' + recorded_at: Thu, 14 Aug 2025 16:23:58 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_tool_definition_caching_adds_cache_control_to_tool_definitions_when_tools_caching_is_requested.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_tool_definition_caching_adds_cache_control_to_tool_definitions_when_tools_caching_is_requested.yml new file mode 100644 index 00000000..7f405820 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_tool_definition_caching_adds_cache_control_to_tool_definitions_when_tools_caching_is_requested.yml @@ -0,0 +1,180 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Tell + me about Ruby"}]}],"stream":false,"max_tokens":8192,"tools":[{"name":"describe_ruby_dev","description":"","input_schema":{"type":"object","properties":{},"required":[]},"cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:47:14 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '399000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:47:14Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:47:14Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:47:13Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '479000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:47:14Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01LVwUdFoKF4qiMoy8zpbt6k","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I''ll + use the describe_ruby_dev function to get information about Ruby for you."},{"type":"tool_use","id":"toolu_01J9F7Xi77jPzDXuGwkQEv5Q","name":"describe_ruby_dev","input":{}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":145,"cache_creation_input_tokens":2902,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":2902,"ephemeral_1h_input_tokens":0},"output_tokens":57,"service_tier":"standard"}}' + recorded_at: Thu, 14 Aug 2025 16:47:14 GMT +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Tell + me about Ruby"}]},{"role":"assistant","content":[{"type":"text","text":"I''ll + use the describe_ruby_dev function to get information about Ruby for you."},{"type":"tool_use","id":"toolu_01J9F7Xi77jPzDXuGwkQEv5Q","name":"describe_ruby_dev","input":{}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_01J9F7Xi77jPzDXuGwkQEv5Q","content":"Ruby + is a great language for building web applications."}]}],"stream":false,"max_tokens":8192,"tools":[{"name":"describe_ruby_dev","description":"","input_schema":{"type":"object","properties":{},"required":[]},"cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:47:22 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '399000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:47:15Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:47:22Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:47:15Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '479000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:47:15Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01PDSjZj6jjLSLWwRrU9gX2i","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"Let + me provide you with more comprehensive information about Ruby:\n\nRuby is + a dynamic, object-oriented programming language created by Yukihiro Matsumoto + (often called \"Matz\") in Japan during the mid-1990s. Here are some key characteristics:\n\n1. + Design Philosophy:\n- Designed to be programmer-friendly and productive\n- + Follows the principle of \"developer happiness\" and \"least surprise\"\n- + Emphasizes natural and readable syntax\n\n2. Key Features:\n- Fully object-oriented: + Everything in Ruby is an object\n- Dynamic typing\n- Supports functional programming + paradigms\n- Automatic memory management (garbage collection)\n- Extensive + standard library\n- Strong metaprogramming capabilities\n\n3. Popular Use + Cases:\n- Web development (especially with Ruby on Rails framework)\n- Scripting + and automation\n- DevOps tools\n- Prototyping\n- Backend web services\n\n4. + Notable Frameworks and Tools:\n- Ruby on Rails (web application framework)\n- + Sinatra (lightweight web framework)\n- RSpec (testing framework)\n- Bundler + (dependency management)\n\n5. Community and Ecosystem:\n- Active and supportive + open-source community\n- Extensive collection of libraries (called \"gems\")\n- + Regular language updates and improvements\n\nRuby is particularly loved by + developers for its elegant syntax, flexibility, and focus on developer productivity. + While it may not be as widely used as some other languages, it remains a powerful + and enjoyable language to work with, especially in web development and scripting + domains."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":222,"cache_creation_input_tokens":0,"cache_read_input_tokens":2902,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":338,"service_tier":"standard"}}' + recorded_at: Thu, 14 Aug 2025 16:47:22 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_user_message_caching_adds_cache_control_to_user_messages_when_user_caching_is_requested.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_user_message_caching_adds_cache_control_to_user_messages_when_user_caching_is_requested.yml new file mode 100644 index 00000000..97469375 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_user_message_caching_adds_cache_control_to_user_messages_when_user_caching_is_requested.yml @@ -0,0 +1,199 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"\n\nBased + on the above, tell me about Ruby","cache_control":{"type":"ephemeral"}}]}],"stream":false,"max_tokens":8192,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:24:03 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '399000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:23:59Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:24:03Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:23:59Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '479000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:23:59Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_018Kg2eT1LZZbyPTHmzomDEH","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I + apologize, but the input you provided is just a very long string of the letter + \"a\" and does not contain any information about Ruby. If you would like to + know about Ruby, I can provide some information:\n\nRuby is a dynamic, object-oriented + programming language created by Yukihiro Matsumoto (often called \"Matz\") + in Japan during the mid-1990s. Some key characteristics of Ruby include:\n\n1. + Designed for programmer productivity and happiness\n2. Supports multiple programming + paradigms (object-oriented, functional, imperative)\n3. Known for its clean + and readable syntax\n4. Widely used for web development, particularly with + the Ruby on Rails framework\n5. Open-source programming language\n6. Interpreted + language\n7. Features like automatic memory management and dynamic typing\n8. + Strong support for metaprogramming\n9. Cross-platform compatibility\n\nWould + you like me to elaborate on any of these points about Ruby?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":2745,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":2745,"ephemeral_1h_input_tokens":0},"output_tokens":207,"service_tier":"standard"}}' + recorded_at: Thu, 14 Aug 2025 16:24:03 GMT +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"\n\nBased + on the above, tell me about Ruby"}]},{"role":"assistant","content":[{"type":"text","text":"I + apologize, but the input you provided is just a very long string of the letter + \"a\" and does not contain any information about Ruby. If you would like to + know about Ruby, I can provide some information:\n\nRuby is a dynamic, object-oriented + programming language created by Yukihiro Matsumoto (often called \"Matz\") + in Japan during the mid-1990s. Some key characteristics of Ruby include:\n\n1. + Designed for programmer productivity and happiness\n2. Supports multiple programming + paradigms (object-oriented, functional, imperative)\n3. Known for its clean + and readable syntax\n4. Widely used for web development, particularly with + the Ruby on Rails framework\n5. Open-source programming language\n6. Interpreted + language\n7. Features like automatic memory management and dynamic typing\n8. + Strong support for metaprogramming\n9. Cross-platform compatibility\n\nWould + you like me to elaborate on any of these points about Ruby?"}]},{"role":"user","content":[{"type":"text","text":"Tell + me more about Ruby","cache_control":{"type":"ephemeral"}}]}],"stream":false,"max_tokens":8192,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:24:10 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '399000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:24:04Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:24:10Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:24:03Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '479000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:24:04Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_015rq7u1xDiESviQgHnN16LD","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"Here''s + a more detailed overview of Ruby:\n\nLanguage Design:\n- Created by Yukihiro + Matsumoto in 1995\n- Philosophy: \"Principle of least surprise\"\n- Designed + to make programming more enjoyable and productive\n- Fully object-oriented + language (everything is an object)\n\nKey Technical Features:\n- Dynamic typing\n- + Automatic memory management\n- Supports multiple programming paradigms\n- + Interpreted language\n- Uses garbage collection\n- Strong metaprogramming + capabilities\n\nSyntax Characteristics:\n- Clean, readable, and concise\n- + Uses indentation for readability\n- Supports functional programming concepts\n- + Uses snake_case for method and variable names\n- Uses CamelCase for class + and module names\n\nPopular Use Cases:\n- Web development (Ruby on Rails framework)\n- + Scripting\n- Automation\n- DevOps tools\n- Prototyping\n- Backend web services\n\nEcosystem:\n- + RubyGems package manager\n- Large open-source community\n- Extensive library + of pre-built modules\n- Strong testing frameworks\n\nPerformance:\n- Generally + slower than compiled languages\n- Improved performance with JIT compilation + in recent versions\n- Good for rapid development and scripting\n\nMajor Companies + Using Ruby:\n- GitHub\n- Airbnb\n- Shopify\n- Twitch\n- SoundCloud\n\nWould + you like me to elaborate on any of these aspects of Ruby?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":215,"cache_read_input_tokens":2745,"cache_creation":{"ephemeral_5m_input_tokens":215,"ephemeral_1h_input_tokens":0},"output_tokens":315,"service_tier":"standard"}}' + recorded_at: Thu, 14 Aug 2025 16:24:10 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_multiple_caching_types_handles_multiple_caching_types_together.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_multiple_caching_types_handles_multiple_caching_types_together.yml new file mode 100644 index 00000000..9690ba72 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_multiple_caching_types_handles_multiple_caching_types_together.yml @@ -0,0 +1,133 @@ +--- +http_interactions: +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"\n\nBased + on the above, tell me about Ruby","cache_control":{"type":"ephemeral"}}]}],"max_tokens":4096,"tools":[{"name":"describe_ruby_dev","description":"","input_schema":{"type":"object","properties":{},"required":[]},"cache_control":{"type":"ephemeral"}}],"system":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163033Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 6c0e81fe3d33cff0f11ffa88ea3dfeaa4417e7c89c536f0633fd312f9aa886ff + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=3cb53bf7294051dbd7f988c48ff0d4ba0cd41a1a08b33175e476d4f83a43c41b + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:36 GMT + Content-Type: + - application/json + Content-Length: + - '492' + Connection: + - keep-alive + X-Amzn-Requestid: + - d7e1a435-0cd0-4274-9a58-9899c1f01431 + X-Amzn-Bedrock-Invocation-Latency: + - '2340' + X-Amzn-Bedrock-Cache-Write-Input-Token-Count: + - '5612' + X-Amzn-Bedrock-Cache-Read-Input-Token-Count: + - '2902' + X-Amzn-Bedrock-Output-Token-Count: + - '54' + X-Amzn-Bedrock-Input-Token-Count: + - '4' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_01RQ1dXhJCRzFJymM59YyfLe","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I''ll + help you get information about Ruby development by using the available tool."},{"type":"tool_use","id":"toolu_bdrk_01GdWT6NQcLBHd2uhMfiZWXH","name":"describe_ruby_dev","input":{}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":5612,"cache_read_input_tokens":2902,"output_tokens":54}}' + recorded_at: Thu, 14 Aug 2025 16:30:36 GMT +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"\n\nBased + on the above, tell me about Ruby"}]},{"role":"assistant","content":[{"type":"text","text":"I''ll + help you get information about Ruby development by using the available tool."},{"type":"tool_use","id":"toolu_bdrk_01GdWT6NQcLBHd2uhMfiZWXH","name":"describe_ruby_dev","input":{}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_bdrk_01GdWT6NQcLBHd2uhMfiZWXH","content":"Ruby + is a great language for building web applications."}]}],"max_tokens":4096,"tools":[{"name":"describe_ruby_dev","description":"","input_schema":{"type":"object","properties":{},"required":[]},"cache_control":{"type":"ephemeral"}}],"system":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163036Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 89c2f0e94fa48d99647af0b714fcef903c5a1613003cb736e9dfd55838470744 + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=98251464b577738328efa3d551a410b2507df5c91294ff6f9945f63c14661e8b + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:41 GMT + Content-Type: + - application/json + Content-Length: + - '1448' + Connection: + - keep-alive + X-Amzn-Requestid: + - c728c6d9-2860-4e9b-a32f-0087597ad0d0 + X-Amzn-Bedrock-Invocation-Latency: + - '5178' + X-Amzn-Bedrock-Cache-Write-Input-Token-Count: + - '0' + X-Amzn-Bedrock-Cache-Read-Input-Token-Count: + - '5639' + X-Amzn-Bedrock-Output-Token-Count: + - '263' + X-Amzn-Bedrock-Input-Token-Count: + - '2953' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_01E39r7wzTxKySur92U4XSPp","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"Ruby + is a dynamic, object-oriented programming language known for its simplicity + and readability. Here are some key points about Ruby:\n\n1. Language Characteristics:\n- + Created by Yukihiro Matsumoto (Matz) in 1995\n- Designed to be programmer-friendly + and productive\n- Follows the principle of \"developer happiness\"\n\n2. Web + Development:\n- Particularly popular for web development through the Ruby + on Rails framework\n- Rails follows the Model-View-Controller (MVC) architectural + pattern\n- Known for rapid application development and convention over configuration\n\n3. + Key Features:\n- Dynamic typing\n- Garbage collection\n- Support for functional + programming paradigms\n- Extensive standard library\n- Strong metaprogramming + capabilities\n\n4. Common Use Cases:\n- Web applications\n- Scripting\n- Automation\n- + Prototyping\n- DevOps tools\n\n5. Popular Frameworks and Tools:\n- Ruby on + Rails\n- Sinatra (lightweight web framework)\n- RSpec (testing)\n- Bundler + (dependency management)\n\nRuby''s elegant syntax and powerful ecosystem make + it a favorite among developers who value clean, maintainable code and rapid + development."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":2953,"cache_creation_input_tokens":0,"cache_read_input_tokens":5639,"output_tokens":263}}' + recorded_at: Thu, 14 Aug 2025 16:30:41 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_streaming_reports_cached_tokens.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_streaming_reports_cached_tokens.yml new file mode 100644 index 00000000..22ef4bcd --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_streaming_reports_cached_tokens.yml @@ -0,0 +1,102 @@ +--- +http_interactions: +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke-with-response-stream + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"\n\nCount + from 1 to 3","cache_control":{"type":"ephemeral"}}]}],"max_tokens":4096,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163041Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 4fc51a216af64f791658e86560c4177bc076f4d7d40a9c9842840568e6ad3975 + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=6e9b4dec74c5d99de161fbd9b4fb416074b769c8c23e189fdcb949d849f8180c + Content-Type: + - application/json + Accept: + - application/vnd.amazon.eventstream + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:42 GMT + Content-Type: + - application/vnd.amazon.eventstream + Transfer-Encoding: + - chunked + Connection: + - keep-alive + X-Amzn-Requestid: + - 41b1aea6-739b-4232-9450-deb5cde5c285 + X-Amzn-Bedrock-Content-Type: + - application/json + body: + encoding: ASCII-8BIT + string: !binary |- + AAACPgAAAEvX9eCdCzpldmVudC10eXBlBwAFY2h1bmsNOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJieXRlcyI6ImV5SjBlWEJsSWpvaWJXVnpjMkZuWlY5emRHRnlkQ0lzSW0xbGMzTmhaMlVpT25zaWFXUWlPaUp0YzJkZlltUnlhMTh3TVZGdGJqWmpaRmhXT0c1d2NYUlFNMFZMYUVkeFVsb2lMQ0owZVhCbElqb2liV1Z6YzJGblpTSXNJbkp2YkdVaU9pSmhjM05wYzNSaGJuUWlMQ0p0YjJSbGJDSTZJbU5zWVhWa1pTMHpMVFV0YUdGcGEzVXRNakF5TkRFd01qSWlMQ0pqYjI1MFpXNTBJanBiWFN3aWMzUnZjRjl5WldGemIyNGlPbTUxYkd3c0luTjBiM0JmYzJWeGRXVnVZMlVpT201MWJHd3NJblZ6WVdkbElqcDdJbWx1Y0hWMFgzUnZhMlZ1Y3lJNk5Dd2lZMkZqYUdWZlkzSmxZWFJwYjI1ZmFXNXdkWFJmZEc5clpXNXpJam95TnpRMExDSmpZV05vWlY5eVpXRmtYMmx1Y0hWMFgzUnZhMlZ1Y3lJNk1Dd2liM1YwY0hWMFgzUnZhMlZ1Y3lJNk1YMTlmUT09IiwicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSU1RVViJ9UFYdEwAAARUAAABL53CMJgs6ZXZlbnQtdHlwZQcABWNodW5rDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiYnl0ZXMiOiJleUowZVhCbElqb2lZMjl1ZEdWdWRGOWliRzlqYTE5emRHRnlkQ0lzSW1sdVpHVjRJam93TENKamIyNTBaVzUwWDJKc2IyTnJJanA3SW5SNWNHVWlPaUowWlhoMElpd2lkR1Y0ZENJNklpSjlmUT09IiwicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMTIiffGwR+QAAADmAAAASyO4dQ4LOmV2ZW50LXR5cGUHAAVjaHVuaw06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImJ5dGVzIjoiZXlKMGVYQmxJam9pWTI5dWRHVnVkRjlpYkc5amExOWtaV3gwWVNJc0ltbHVaR1Y0SWpvd0xDSmtaV3gwWVNJNmV5SjBlWEJsSWpvaWRHVjRkRjlrWld4MFlTSXNJblJsZUhRaU9pSXhYRzR5SW4xOSIsInAiOiJhYmNkZWZnaCJ9H2bm+gAAAQ8AAABLzSADBQs6ZXZlbnQtdHlwZQcABWNodW5rDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiYnl0ZXMiOiJleUowZVhCbElqb2lZMjl1ZEdWdWRGOWliRzlqYTE5a1pXeDBZU0lzSW1sdVpHVjRJam93TENKa1pXeDBZU0k2ZXlKMGVYQmxJam9pZEdWNGRGOWtaV3gwWVNJc0luUmxlSFFpT2lKY2JqTWlmWDA9IiwicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSU1RVVlcifdzMmQ4AAACwAAAAS5TrT2ULOmV2ZW50LXR5cGUHAAVjaHVuaw06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImJ5dGVzIjoiZXlKMGVYQmxJam9pWTI5dWRHVnVkRjlpYkc5amExOXpkRzl3SWl3aWFXNWtaWGdpT2pCOSIsInAiOiJhYmNkZWZnaGlqa2xtbiJ9aUr6HgAAASwAAABLS0FW0Qs6ZXZlbnQtdHlwZQcABWNodW5rDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiYnl0ZXMiOiJleUowZVhCbElqb2liV1Z6YzJGblpWOWtaV3gwWVNJc0ltUmxiSFJoSWpwN0luTjBiM0JmY21WaGMyOXVJam9pWlc1a1gzUjFjbTRpTENKemRHOXdYM05sY1hWbGJtTmxJanB1ZFd4c2ZTd2lkWE5oWjJVaU9uc2liM1YwY0hWMFgzUnZhMlZ1Y3lJNk9YMTkiLCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVCJ9SVplJwAAAbQAAABLqjc6AAs6ZXZlbnQtdHlwZQcABWNodW5rDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiYnl0ZXMiOiJleUowZVhCbElqb2liV1Z6YzJGblpWOXpkRzl3SWl3aVlXMWhlbTl1TFdKbFpISnZZMnN0YVc1MmIyTmhkR2x2YmsxbGRISnBZM01pT25zaWFXNXdkWFJVYjJ0bGJrTnZkVzUwSWpvMExDSnZkWFJ3ZFhSVWIydGxia052ZFc1MElqbzVMQ0pwYm5adlkyRjBhVzl1VEdGMFpXNWplU0k2TVRJd09Dd2labWx5YzNSQ2VYUmxUR0YwWlc1amVTSTZNVEl3Tml3aVkyRmphR1ZTWldGa1NXNXdkWFJVYjJ0bGJrTnZkVzUwSWpvd0xDSmpZV05vWlZkeWFYUmxTVzV3ZFhSVWIydGxia052ZFc1MElqb3lOelEwZlgwPSIsInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTE1OT1AifZsqRpI= + recorded_at: Thu, 14 Aug 2025 16:30:43 GMT +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke-with-response-stream + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"\n\nCount + from 1 to 3"}]},{"role":"assistant","content":[{"type":"text","text":"1\n2\n3"}]},{"role":"user","content":[{"type":"text","text":"\n\nCount + from 1 to 3","cache_control":{"type":"ephemeral"}}]}],"max_tokens":4096,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163043Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 050bbdfaa51d7829d07b5d3ccbd5826da2a52574004a35e999b54a15a2ee4357 + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=44a676dbab70c7b3d98e297b7d46b7ef443c5ee515f219ad2eebf87c04e49b5a + Content-Type: + - application/json + Accept: + - application/vnd.amazon.eventstream + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:45 GMT + Content-Type: + - application/vnd.amazon.eventstream + Transfer-Encoding: + - chunked + Connection: + - keep-alive + X-Amzn-Requestid: + - e55f4a17-7d39-45bb-90d9-da017c79be85 + X-Amzn-Bedrock-Content-Type: + - application/json + body: + encoding: ASCII-8BIT + string: !binary |- + AAACGwAAAEve1EDpCzpldmVudC10eXBlBwAFY2h1bmsNOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJieXRlcyI6ImV5SjBlWEJsSWpvaWJXVnpjMkZuWlY5emRHRnlkQ0lzSW0xbGMzTmhaMlVpT25zaWFXUWlPaUp0YzJkZlltUnlhMTh3TVZabFkzVjFaMjB6VFdoT09WWkthMk5DVEU1bk5tUWlMQ0owZVhCbElqb2liV1Z6YzJGblpTSXNJbkp2YkdVaU9pSmhjM05wYzNSaGJuUWlMQ0p0YjJSbGJDSTZJbU5zWVhWa1pTMHpMVFV0YUdGcGEzVXRNakF5TkRFd01qSWlMQ0pqYjI1MFpXNTBJanBiWFN3aWMzUnZjRjl5WldGemIyNGlPbTUxYkd3c0luTjBiM0JmYzJWeGRXVnVZMlVpT201MWJHd3NJblZ6WVdkbElqcDdJbWx1Y0hWMFgzUnZhMlZ1Y3lJNk5Dd2lZMkZqYUdWZlkzSmxZWFJwYjI1ZmFXNXdkWFJmZEc5clpXNXpJam95TnpVekxDSmpZV05vWlY5eVpXRmtYMmx1Y0hWMFgzUnZhMlZ1Y3lJNk1qYzBOQ3dpYjNWMGNIVjBYM1J2YTJWdWN5STZNWDE5ZlE9PSIsInAiOiJhYmNkZWZnaGkiffKXxhcAAAEPAAAAS80gAwULOmV2ZW50LXR5cGUHAAVjaHVuaw06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImJ5dGVzIjoiZXlKMGVYQmxJam9pWTI5dWRHVnVkRjlpYkc5amExOXpkR0Z5ZENJc0ltbHVaR1Y0SWpvd0xDSmpiMjUwWlc1MFgySnNiMk5ySWpwN0luUjVjR1VpT2lKMFpYaDBJaXdpZEdWNGRDSTZJaUo5ZlE9PSIsInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXIn0hUuprAAAA/wAAAEtOSID9CzpldmVudC10eXBlBwAFY2h1bmsNOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJieXRlcyI6ImV5SjBlWEJsSWpvaVkyOXVkR1Z1ZEY5aWJHOWphMTlrWld4MFlTSXNJbWx1WkdWNElqb3dMQ0prWld4MFlTSTZleUowZVhCbElqb2lkR1Y0ZEY5a1pXeDBZU0lzSW5SbGVIUWlPaUl4WEc0eUluMTkiLCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHIn37nPuKAAAA/gAAAEtzKKlNCzpldmVudC10eXBlBwAFY2h1bmsNOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJieXRlcyI6ImV5SjBlWEJsSWpvaVkyOXVkR1Z1ZEY5aWJHOWphMTlrWld4MFlTSXNJbWx1WkdWNElqb3dMQ0prWld4MFlTSTZleUowZVhCbElqb2lkR1Y0ZEY5a1pXeDBZU0lzSW5SbGVIUWlPaUpjYmpNaWZYMD0iLCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUYifRiiJ98AAADKAAAASyeJtwsLOmV2ZW50LXR5cGUHAAVjaHVuaw06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImJ5dGVzIjoiZXlKMGVYQmxJam9pWTI5dWRHVnVkRjlpYkc5amExOXpkRzl3SWl3aWFXNWtaWGdpT2pCOSIsInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTE1OIn1nPrWkAAABEQAAAEsS8CrmCzpldmVudC10eXBlBwAFY2h1bmsNOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJieXRlcyI6ImV5SjBlWEJsSWpvaWJXVnpjMkZuWlY5a1pXeDBZU0lzSW1SbGJIUmhJanA3SW5OMGIzQmZjbVZoYzI5dUlqb2laVzVrWDNSMWNtNGlMQ0p6ZEc5d1gzTmxjWFZsYm1ObElqcHVkV3hzZlN3aWRYTmhaMlVpT25zaWIzVjBjSFYwWDNSdmEyVnVjeUk2T1gxOSIsInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzIn07vfzYAAABvQAAAEunJ1hxCzpldmVudC10eXBlBwAFY2h1bmsNOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJieXRlcyI6ImV5SjBlWEJsSWpvaWJXVnpjMkZuWlY5emRHOXdJaXdpWVcxaGVtOXVMV0psWkhKdlkyc3RhVzUyYjJOaGRHbHZiazFsZEhKcFkzTWlPbnNpYVc1d2RYUlViMnRsYmtOdmRXNTBJam8wTENKdmRYUndkWFJVYjJ0bGJrTnZkVzUwSWpvNUxDSnBiblp2WTJGMGFXOXVUR0YwWlc1amVTSTZNVGd5T0N3aVptbHljM1JDZVhSbFRHRjBaVzVqZVNJNk1UZ3lOU3dpWTJGamFHVlNaV0ZrU1c1d2RYUlViMnRsYmtOdmRXNTBJam95TnpRMExDSmpZV05vWlZkeWFYUmxTVzV3ZFhSVWIydGxia052ZFc1MElqb3lOelV6ZlgwPSIsInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTE1OT1BRUlNUVSJ99PzsnA== + recorded_at: Thu, 14 Aug 2025 16:30:45 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_system_message_caching_adds_cache_control_to_the_last_system_message_when_system_caching_is_requested.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_system_message_caching_adds_cache_control_to_the_last_system_message_when_system_caching_is_requested.yml new file mode 100644 index 00000000..d6c31ce2 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_system_message_caching_adds_cache_control_to_the_last_system_message_when_system_caching_is_requested.yml @@ -0,0 +1,134 @@ +--- +http_interactions: +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"What + are the key principles you follow?"}]}],"max_tokens":4096,"system":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163008Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 965bbb410cd806c839d6d17bc77733a814091b20d819e2363f76aa404c7e40b7 + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=66bc9900487e201394c7b44faa2df13d13012e2b052cdfe750549417bc8ebdb0 + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:11 GMT + Content-Type: + - application/json + Content-Length: + - '736' + Connection: + - keep-alive + X-Amzn-Requestid: + - 8e32664d-1e16-4efd-8a97-ad76b4fd082e + X-Amzn-Bedrock-Invocation-Latency: + - '2771' + X-Amzn-Bedrock-Cache-Write-Input-Token-Count: + - '2732' + X-Amzn-Bedrock-Cache-Read-Input-Token-Count: + - '0' + X-Amzn-Bedrock-Output-Token-Count: + - '91' + X-Amzn-Bedrock-Input-Token-Count: + - '15' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_01AVm5QS4yCyB45uXUDGHYUB","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I + want to be direct and transparent. My key principles include:\n\n1. Being + helpful while avoiding harm\n2. Being honest about my capabilities and limitations + \n3. Protecting individual privacy\n4. Respecting ethical boundaries\n5. Providing + accurate information\n6. Being objective and balanced\n7. Maintaining user + confidentiality\n\nI aim to be a supportive tool that assists humans while + maintaining clear ethical standards."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":15,"cache_creation_input_tokens":2732,"cache_read_input_tokens":0,"output_tokens":91}}' + recorded_at: Thu, 14 Aug 2025 16:30:11 GMT +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"What + are the key principles you follow?"}]},{"role":"assistant","content":[{"type":"text","text":"I + want to be direct and transparent. My key principles include:\n\n1. Being + helpful while avoiding harm\n2. Being honest about my capabilities and limitations + \n3. Protecting individual privacy\n4. Respecting ethical boundaries\n5. Providing + accurate information\n6. Being objective and balanced\n7. Maintaining user + confidentiality\n\nI aim to be a supportive tool that assists humans while + maintaining clear ethical standards."}]},{"role":"user","content":[{"type":"text","text":"What + are the key principles you follow?"}]}],"max_tokens":4096,"system":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163011Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 1d77e990628c90735a3bfaf8a70a59a82d78d1baa8d76c016890743da433a58c + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=b8bd67a1f6e725a12099851544c233e8e3739713fdb313d510bb4c911234be20 + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:13 GMT + Content-Type: + - application/json + Content-Length: + - '786' + Connection: + - keep-alive + X-Amzn-Requestid: + - 80933c39-82bc-43e7-a827-82f86a6e150f + X-Amzn-Bedrock-Invocation-Latency: + - '2074' + X-Amzn-Bedrock-Cache-Write-Input-Token-Count: + - '0' + X-Amzn-Bedrock-Cache-Read-Input-Token-Count: + - '2732' + X-Amzn-Bedrock-Output-Token-Count: + - '108' + X-Amzn-Bedrock-Input-Token-Count: + - '117' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_0128v6SkYpy2RHfJAHKLTAyc","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I + aim to be helpful while following key ethical principles:\n\n1. Be honest + and direct\n2. Protect individual privacy\n3. Avoid potential harm\n4. Provide + accurate information\n5. Respect ethical boundaries\n6. Be objective and balanced\n7. + Maintain user confidentiality\n8. Acknowledge my limitations\n9. Prioritize + human wellbeing\n10. Refuse inappropriate requests\n\nI strive to be a responsible + AI assistant that supports humans while maintaining strong moral guidelines."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":117,"cache_creation_input_tokens":0,"cache_read_input_tokens":2732,"output_tokens":108}}' + recorded_at: Thu, 14 Aug 2025 16:30:13 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_tool_definition_caching_adds_cache_control_to_tool_definitions_when_tools_caching_is_requested.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_tool_definition_caching_adds_cache_control_to_tool_definitions_when_tools_caching_is_requested.yml new file mode 100644 index 00000000..a2d4245e --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_tool_definition_caching_adds_cache_control_to_tool_definitions_when_tools_caching_is_requested.yml @@ -0,0 +1,135 @@ +--- +http_interactions: +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"Tell + me about Ruby"}]}],"max_tokens":4096,"tools":[{"name":"describe_ruby_dev","description":"","input_schema":{"type":"object","properties":{},"required":[]},"cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163025Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 66d69cc12e95a1abcd338456d7ede496a815bdd198660e9d21b3e9606fa4db5a + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=6182b63f783094de2554de58a126beee0dd8f6e2f2acfa858895b4643308c085 + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:26 GMT + Content-Type: + - application/json + Content-Length: + - '480' + Connection: + - keep-alive + X-Amzn-Requestid: + - 712b2d45-e506-4968-aa87-965bc58983a5 + X-Amzn-Bedrock-Invocation-Latency: + - '1565' + X-Amzn-Bedrock-Cache-Write-Input-Token-Count: + - '2902' + X-Amzn-Bedrock-Cache-Read-Input-Token-Count: + - '0' + X-Amzn-Bedrock-Output-Token-Count: + - '55' + X-Amzn-Bedrock-Input-Token-Count: + - '145' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_01Unm4o5kEo9PyDRXtnwzYQe","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I''ll + use the describe_ruby_dev function to get information about Ruby."},{"type":"tool_use","id":"toolu_bdrk_01HogbJT5garLoQbRin6Yi71","name":"describe_ruby_dev","input":{}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":145,"cache_creation_input_tokens":2902,"cache_read_input_tokens":0,"output_tokens":55}}' + recorded_at: Thu, 14 Aug 2025 16:30:27 GMT +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"Tell + me about Ruby"}]},{"role":"assistant","content":[{"type":"text","text":"I''ll + use the describe_ruby_dev function to get information about Ruby."},{"type":"tool_use","id":"toolu_bdrk_01HogbJT5garLoQbRin6Yi71","name":"describe_ruby_dev","input":{}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_bdrk_01HogbJT5garLoQbRin6Yi71","content":"Ruby + is a great language for building web applications."}]}],"max_tokens":4096,"tools":[{"name":"describe_ruby_dev","description":"","input_schema":{"type":"object","properties":{},"required":[]},"cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163027Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 5fac1dda31038ff29ab45910ff27f5b62d41efd66a9ecc55c1c16dba749ed7de + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=62a82c52f27e503a4ccb759ca4e591cd43babf12ad9e5b8ffdbb3807abc42fc5 + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:33 GMT + Content-Type: + - application/json + Content-Length: + - '1618' + Connection: + - keep-alive + X-Amzn-Requestid: + - 007070a2-fa7c-41a0-9fad-58ed8723f848 + X-Amzn-Bedrock-Invocation-Latency: + - '6420' + X-Amzn-Bedrock-Cache-Write-Input-Token-Count: + - '0' + X-Amzn-Bedrock-Cache-Read-Input-Token-Count: + - '2902' + X-Amzn-Bedrock-Output-Token-Count: + - '302' + X-Amzn-Bedrock-Input-Token-Count: + - '220' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_015ca9q3c8ZiNZSBdQu3bbYD","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"Let + me provide you with more comprehensive information about Ruby:\n\nRuby is + a dynamic, object-oriented programming language created by Yukihiro Matsumoto + (often called \"Matz\") in Japan during the mid-1990s. Here are some key characteristics:\n\n1. + Design Philosophy:\n- Designed to be programmer-friendly and productive\n- + Follows the principle of \"developer happiness\" and \"least surprise\"\n- + Emphasizes natural, readable syntax\n\n2. Key Features:\n- Fully object-oriented: + Everything in Ruby is an object\n- Dynamic typing\n- Supports functional programming + paradigms\n- Automatic memory management (garbage collection)\n- Strong metaprogramming + capabilities\n\n3. Popular Use Cases:\n- Web development (especially with + Ruby on Rails framework)\n- Scripting and automation\n- DevOps tools\n- Prototyping\n- + Data processing\n\n4. Strengths:\n- Elegant and concise syntax\n- Highly productive\n- + Extensive libraries and gems\n- Strong community support\n- Cross-platform + compatibility\n\n5. Notable Frameworks and Tools:\n- Ruby on Rails (web application + framework)\n- Sinatra (lightweight web framework)\n- RSpec (testing framework)\n- + Bundler (dependency management)\n\nRuby continues to be a popular language, + particularly in web development and among developers who value clean, expressive + code."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":220,"cache_creation_input_tokens":0,"cache_read_input_tokens":2902,"output_tokens":302}}' + recorded_at: Thu, 14 Aug 2025 16:30:33 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_user_message_caching_adds_cache_control_to_user_messages_when_user_caching_is_requested.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_user_message_caching_adds_cache_control_to_user_messages_when_user_caching_is_requested.yml new file mode 100644 index 00000000..3ca5390b --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_user_message_caching_adds_cache_control_to_user_messages_when_user_caching_is_requested.yml @@ -0,0 +1,156 @@ +--- +http_interactions: +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"\n\nBased + on the above, tell me about Ruby","cache_control":{"type":"ephemeral"}}]}],"max_tokens":4096,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163013Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 7369a818a412d229a72fcc73315b2c66fa39b4517242db86bcab3c2c7a6a9b3a + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=b1abc025aff6fbe749e7ebbb11c5c7fb2035c33d4bb85af92baa5738fccf5fb8 + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:18 GMT + Content-Type: + - application/json + Content-Length: + - '1172' + Connection: + - keep-alive + X-Amzn-Requestid: + - eadc536f-27c0-4a67-89e2-13a1b1a47ee2 + X-Amzn-Bedrock-Invocation-Latency: + - '4680' + X-Amzn-Bedrock-Cache-Write-Input-Token-Count: + - '2745' + X-Amzn-Bedrock-Cache-Read-Input-Token-Count: + - '0' + X-Amzn-Bedrock-Output-Token-Count: + - '195' + X-Amzn-Bedrock-Input-Token-Count: + - '4' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_01DnvM7rCLxyNBRhW4MniqQs","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I + apologize, but the input you provided is just a very long string of the letter + \"a\" which doesn''t contain any meaningful information about Ruby. If you''re + looking to learn about Ruby, I can provide some information:\n\nRuby is a + dynamic, object-oriented programming language created by Yukihiro Matsumoto + (often called \"Matz\") in Japan in the mid-1990s. Some key characteristics + of Ruby include:\n\n1. Designed for programmer productivity and happiness\n2. + Supports multiple programming paradigms (object-oriented, functional, imperative)\n3. + Known for its clean, readable syntax\n4. Widely used for web development (especially + with Ruby on Rails framework)\n5. Open-source programming language\n6. Interpreted + language\n7. Supports automatic memory management\n8. Strong and dynamic typing\n\nWould + you like me to elaborate on any of these points about Ruby?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":2745,"cache_read_input_tokens":0,"output_tokens":195}}' + recorded_at: Thu, 14 Aug 2025 16:30:18 GMT +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"\n\nBased + on the above, tell me about Ruby"}]},{"role":"assistant","content":[{"type":"text","text":"I + apologize, but the input you provided is just a very long string of the letter + \"a\" which doesn''t contain any meaningful information about Ruby. If you''re + looking to learn about Ruby, I can provide some information:\n\nRuby is a + dynamic, object-oriented programming language created by Yukihiro Matsumoto + (often called \"Matz\") in Japan in the mid-1990s. Some key characteristics + of Ruby include:\n\n1. Designed for programmer productivity and happiness\n2. + Supports multiple programming paradigms (object-oriented, functional, imperative)\n3. + Known for its clean, readable syntax\n4. Widely used for web development (especially + with Ruby on Rails framework)\n5. Open-source programming language\n6. Interpreted + language\n7. Supports automatic memory management\n8. Strong and dynamic typing\n\nWould + you like me to elaborate on any of these points about Ruby?"}]},{"role":"user","content":[{"type":"text","text":"Tell + me more about Ruby","cache_control":{"type":"ephemeral"}}]}],"max_tokens":4096,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163018Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 4a6501317564f06251210000c287d721c5e7ffc6fcc43e78c536c83e7f05c59d + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=ad565dd91729662df099cafef1da0ac46caa51c4898cd052c96437ffd123dbff + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:25 GMT + Content-Type: + - application/json + Content-Length: + - '1729' + Connection: + - keep-alive + X-Amzn-Requestid: + - a2de6c3a-edb9-4053-ab79-9090b851a001 + X-Amzn-Bedrock-Invocation-Latency: + - '6605' + X-Amzn-Bedrock-Cache-Write-Input-Token-Count: + - '203' + X-Amzn-Bedrock-Cache-Read-Input-Token-Count: + - '2745' + X-Amzn-Bedrock-Output-Token-Count: + - '327' + X-Amzn-Bedrock-Input-Token-Count: + - '4' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_01QBRJLbWgYrLtbtwmcqjhpN","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"Here''s + a more detailed overview of Ruby:\n\nLanguage Design and Philosophy:\n- Created + by Yukihiro Matsumoto in 1995\n- Designed with the principle of \"developer + happiness\"\n- Follows the principle of \"least surprise\" - code should behave + intuitively\n- Emphasizes human-readable code and programmer productivity\n\nKey + Technical Features:\n- Fully object-oriented: everything is an object\n- Dynamic + typing\n- Automatic memory management (garbage collection)\n- Support for + functional programming paradigms\n- Built-in support for metaprogramming\n- + Supports multiple inheritance through modules\n\nPopular Use Cases:\n1. Web + Development\n- Ruby on Rails framework is extremely popular\n- Used by companies + like Airbnb, GitHub, Shopify\n\n2. Scripting and Automation\n- Great for writing + quick scripts\n- Used in system administration\n- Supports cross-platform + scripting\n\n3. DevOps and Infrastructure\n- Used in configuration management\n- + Tools like Chef and Puppet are written in Ruby\n\nSyntax Characteristics:\n- + Clean and expressive syntax\n- Uses snake_case for method and variable names\n- + Optional parentheses in method calls\n- Blocks and lambdas are first-class + citizens\n\nCommunity and Ecosystem:\n- Strong, supportive open-source community\n- + RubyGems package manager\n- Regular language updates\n- Annual RubyConf and + regional conferences\n\nWould you like me to elaborate on any of these aspects?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":203,"cache_read_input_tokens":2745,"output_tokens":327}}' + recorded_at: Thu, 14 Aug 2025 16:30:25 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_gemini_provider_gemini-2_5-flash_reports_cached_tokens.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_gemini_provider_gemini-2_5-flash_reports_cached_tokens.yml new file mode 100644 index 00000000..b99386a8 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_gemini_provider_gemini-2_5-flash_reports_cached_tokens.yml @@ -0,0 +1,167 @@ +--- +http_interactions: +- request: + method: post + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + body: + encoding: UTF-8 + string: '{"contents":[{"role":"user","parts":[{"text":"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\nBased + on the above, tell me about Ruby"}]}],"generationConfig":{"temperature":0.7}}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Goog-Api-Key: + - "" + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Content-Type: + - application/json; charset=UTF-8 + Vary: + - Origin + - Referer + - X-Origin + Date: + - Thu, 14 Aug 2025 16:24:24 GMT + Server: + - scaffolding on HTTPServer2 + X-Xss-Protection: + - '0' + X-Frame-Options: + - SAMEORIGIN + X-Content-Type-Options: + - nosniff + Server-Timing: + - gfet4t7; dur=1217 + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Transfer-Encoding: + - chunked + body: + encoding: ASCII-8BIT + string: | + { + "candidates": [ + { + "content": { + "parts": [ + { + "text": "You've provided a long string of \"b\" characters. This doesn't contain any information about \"Ruby\".\n\nIf you'd like to know about Ruby, please provide some text or context that actually mentions her, or ask a question directly about her. For example, you could ask:\n\n* \"Tell me about the programming language Ruby.\"\n* \"Who is Ruby Bridges?\"\n* \"What is the gemstone Ruby known for?\"\n\nWithout any relevant information, I can't tell you anything about her." + } + ], + "role": "model" + }, + "finishReason": "STOP", + "index": 0 + } + ], + "usageMetadata": { + "promptTokenCount": 2572, + "candidatesTokenCount": 107, + "totalTokenCount": 2679, + "promptTokensDetails": [ + { + "modality": "TEXT", + "tokenCount": 2572 + } + ] + }, + "modelVersion": "gemini-2.5-flash", + "responseId": "uA2eaMWqFt-Hz7IPke-Z2Aw" + } + recorded_at: Thu, 14 Aug 2025 16:24:24 GMT +- request: + method: post + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + body: + encoding: UTF-8 + string: '{"contents":[{"role":"user","parts":[{"text":"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\nBased + on the above, tell me about Ruby"}]}],"generationConfig":{"temperature":0.7}}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Goog-Api-Key: + - "" + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Content-Type: + - application/json; charset=UTF-8 + Vary: + - Origin + - Referer + - X-Origin + Date: + - Thu, 14 Aug 2025 16:24:27 GMT + Server: + - scaffolding on HTTPServer2 + X-Xss-Protection: + - '0' + X-Frame-Options: + - SAMEORIGIN + X-Content-Type-Options: + - nosniff + Server-Timing: + - gfet4t7; dur=3019 + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Transfer-Encoding: + - chunked + body: + encoding: ASCII-8BIT + string: | + { + "candidates": [ + { + "content": { + "parts": [ + { + "text": "Based on the text you provided, which consists solely of repeated 'a' and 'b' characters, I cannot tell you anything about \"Ruby.\"\n\nThe provided text does not contain any information related to the Ruby programming language, gemstones, or any other topic that \"Ruby\" might refer to.\n\nIf you'd like to know about Ruby, please provide some relevant text or ask me a general question about it!" + } + ], + "role": "model" + }, + "finishReason": "STOP", + "index": 0 + } + ], + "usageMetadata": { + "promptTokenCount": 2572, + "candidatesTokenCount": 85, + "totalTokenCount": 3080, + "cachedContentTokenCount": 2030, + "promptTokensDetails": [ + { + "modality": "TEXT", + "tokenCount": 2572 + } + ], + "cacheTokensDetails": [ + { + "modality": "TEXT", + "tokenCount": 2030 + } + ], + "thoughtsTokenCount": 423 + }, + "modelVersion": "gemini-2.5-flash", + "responseId": "uw2eaOjOG7itz7IPo7Ot0A8" + } + recorded_at: Thu, 14 Aug 2025 16:24:27 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_openai_provider_gpt-4_1-nano_reports_cached_tokens.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_openai_provider_gpt-4_1-nano_reports_cached_tokens.yml new file mode 100644 index 00000000..35116cca --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_openai_provider_gpt-4_1-nano_reports_cached_tokens.yml @@ -0,0 +1,227 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"model":"gpt-4.1-nano","messages":[{"role":"user","content":"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\nBased + on the above, tell me about Ruby"}],"stream":false,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Authorization: + - Bearer + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:24:30 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Access-Control-Expose-Headers: + - X-Request-ID + Openai-Organization: + - "" + Openai-Processing-Ms: + - '2368' + Openai-Project: + - proj_j3YWwie2yjmMHTGYtUxoTOJ7 + Openai-Version: + - '2020-10-01' + X-Envoy-Upstream-Service-Time: + - '2742' + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149995637' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 1ms + X-Request-Id: + - "" + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - "" + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - "" + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: | + { + "id": "chatcmpl-C4V2auul1KMB2q1MI602aVLBEYb82", + "object": "chat.completion", + "created": 1755188668, + "model": "gpt-4.1-nano-2025-04-14", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "The provided text is an extremely long string composed mostly of the letters \"a\" and \"b\" repeated many times. It does not contain any specific information, words, or data related to \"Ruby\" or any other subject. Therefore, I cannot extract or infer any details about Ruby from this text.\n\nIf you have a specific question about Ruby (the programming language, the gemstone, or any other context), please provide more details or clarify your request!", + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 2323, + "completion_tokens": 91, + "total_tokens": 2414, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": "fp_f12167b370" + } + recorded_at: Thu, 14 Aug 2025 16:24:30 GMT +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"model":"gpt-4.1-nano","messages":[{"role":"user","content":"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\nBased + on the above, tell me about Ruby"}],"stream":false,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Authorization: + - Bearer + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:24:33 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Access-Control-Expose-Headers: + - X-Request-ID + Openai-Organization: + - "" + Openai-Processing-Ms: + - '2485' + Openai-Project: + - proj_j3YWwie2yjmMHTGYtUxoTOJ7 + Openai-Version: + - '2020-10-01' + X-Envoy-Upstream-Service-Time: + - '2664' + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149995637' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 1ms + X-Request-Id: + - "" + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - "" + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - "" + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: | + { + "id": "chatcmpl-C4V2dbv3q1PATuk3Hd2iN2BgaCrtE", + "object": "chat.completion", + "created": 1755188671, + "model": "gpt-4.1-nano-2025-04-14", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "The provided text is an extremely long string consisting primarily of repeated 'a's followed by repeated 'b's, with no meaningful information or context related to Ruby. Therefore, I do not have any specific details about Ruby from this data.\n\nIf you are referring to Ruby as a programming language, I can tell you that:\n- Ruby is a dynamic, open-source programming language focused on simplicity and productivity.\n- It was created by Yukihiro \"Matz\" Matsumoto in the mid-1990s.\n- Ruby is known for its elegant syntax that is natural to read and write.\n- It is widely used for web development, particularly with the Ruby on Rails framework.\n- Ruby supports multiple programming paradigms, including procedural, object-oriented, and functional programming.\n\nPlease let me know if you want more specific information about Ruby or if there's a particular aspect you're interested in!", + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 2323, + "completion_tokens": 176, + "total_tokens": 2499, + "prompt_tokens_details": { + "cached_tokens": 2176, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": "fp_f12167b370" + } + recorded_at: Thu, 14 Aug 2025 16:24:33 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/ruby_llm/active_record/acts_as_spec.rb b/spec/ruby_llm/active_record/acts_as_spec.rb index 45832676..471d1881 100644 --- a/spec/ruby_llm/active_record/acts_as_spec.rb +++ b/spec/ruby_llm/active_record/acts_as_spec.rb @@ -570,4 +570,16 @@ def uploaded_file(path, type) expect(messages_after_last_user).to be_empty end end + + describe 'prompt caching' do + let(:model) { 'claude-3-5-haiku-20241022' } + + it 'allows prompt caching' do + chat = Chat.create!(model_id: model) + chat.cache_prompts(system: true, tools: true, user: true) + + response = chat.ask('Hello') + expect(response.raw.env.request_body).to include('"cache_control":{"type":"ephemeral"}') + end + end end diff --git a/spec/ruby_llm/chat_complete_with_prompt_caching_spec.rb b/spec/ruby_llm/chat_complete_with_prompt_caching_spec.rb new file mode 100644 index 00000000..313b358b --- /dev/null +++ b/spec/ruby_llm/chat_complete_with_prompt_caching_spec.rb @@ -0,0 +1,113 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe RubyLLM::Chat, '.complete with prompt caching' do + include_context 'with configured RubyLLM' + + class DescribeRubyDev < RubyLLM::Tool # rubocop:disable Lint/ConstantDefinitionInBlock,RSpec/LeakyConstantDeclaration + description MASSIVE_TEXT_FOR_PROMPT_CACHING + + def execute + 'Ruby is a great language for building web applications.' + end + end + + CACHING_MODELS.each do |model_info| + provider = model_info[:provider] + model = model_info[:model] + + describe "with #{provider} provider (#{model})" do + let(:chat) { RubyLLM.chat(model: model, provider: provider).with_temperature(0.7) } + + context 'with system message caching' do + it 'adds cache_control to the last system message when system caching is requested' do + chat.with_instructions(MASSIVE_TEXT_FOR_PROMPT_CACHING) + chat.cache_prompts(system: true) + + response = chat.ask('What are the key principles you follow?') + + expect(response.cache_creation_tokens).to be_positive + + response = chat.ask('What are the key principles you follow?') + + expect(response.cached_tokens).to be_positive + end + end + + context 'with user message caching' do + it 'adds cache_control to user messages when user caching is requested' do + chat.cache_prompts(user: true) + response = chat.ask("#{MASSIVE_TEXT_FOR_PROMPT_CACHING}\n\nBased on the above, tell me about Ruby") + + expect(response.cache_creation_tokens).to be_positive + + response = chat.ask('Tell me more about Ruby') + + expect(response.cached_tokens).to be_positive + end + end + + context 'with tool definition caching' do + it 'adds cache_control to tool definitions when tools caching is requested' do + chat.with_tools(DescribeRubyDev) + chat.cache_prompts(tools: true) + + response = chat.ask('Tell me about Ruby') + + expect(chat.messages[1].cache_creation_tokens).to be_positive + expect(response.cached_tokens).to be_positive + end + end + + context 'with multiple caching types' do + it 'handles multiple caching types together' do + chat.with_tools(DescribeRubyDev) + chat.with_instructions(MASSIVE_TEXT_FOR_PROMPT_CACHING) + chat.cache_prompts(system: true, tools: true, user: true) + + response = chat.ask("#{MASSIVE_TEXT_FOR_PROMPT_CACHING}\n\nBased on the above, tell me about Ruby") + + expect(chat.messages[2].cache_creation_tokens).to be_positive + expect(response.cached_tokens).to be_positive + end + end + + context 'with streaming' do + it 'reports cached tokens' do + chat.cache_prompts(user: true) + response = chat.ask("#{MASSIVE_TEXT_FOR_PROMPT_CACHING}\n\nCount from 1 to 3") do |chunk| + # do nothing + end + + expect(response.cache_creation_tokens).to be_positive + + response = chat.ask("#{MASSIVE_TEXT_FOR_PROMPT_CACHING}\n\nCount from 1 to 3") do |chunk| + # do nothing + end + + expect(response.cached_tokens).to be_positive + end + end + end + end + + CACHED_MODELS.each do |model_info| + provider = model_info[:provider] + model = model_info[:model] + + describe "with #{provider} provider (#{model})" do + let(:chat_first) { RubyLLM.chat(model: model, provider: provider).with_temperature(0.7) } + let(:chat_second) { RubyLLM.chat(model: model, provider: provider).with_temperature(0.7) } + + it 'reports cached tokens' do + question = "#{MASSIVE_TEXT_FOR_PROMPT_CACHE_REPORTING}\n\nBased on the above, tell me about Ruby" + response_first = chat_first.ask question + response_second = chat_second.ask question + + expect(response_first.cached_tokens).to be_zero + expect(response_second.cached_tokens).to be_positive + end + end + end +end diff --git a/spec/ruby_llm/chat_error_spec.rb b/spec/ruby_llm/chat_error_spec.rb index 31d82067..cb00f6eb 100644 --- a/spec/ruby_llm/chat_error_spec.rb +++ b/spec/ruby_llm/chat_error_spec.rb @@ -76,7 +76,7 @@ Psych::Parser.code_point_limit = 20_000_000 if Psych::Parser.respond_to?(:code_point_limit=) # Create a huge conversation (matching in spec_helper) - massive_text = 'a' * 1_000_000 + massive_text = MASSIVE_TEXT # Create a few copies in the conversation 5.times do diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 3ce35494..3a4f9402 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -44,6 +44,10 @@ require 'webmock/rspec' require_relative 'support/streaming_error_helpers' +MASSIVE_TEXT = 'a' * 1_000_000 +MASSIVE_TEXT_FOR_PROMPT_CACHING = 'a' * (2048 * 4) +MASSIVE_TEXT_FOR_PROMPT_CACHE_REPORTING = (MASSIVE_TEXT_FOR_PROMPT_CACHING * 2) + ('b' * 1024) + # VCR Configuration VCR.configure do |config| config.cassette_library_dir = 'spec/fixtures/vcr_cassettes' @@ -90,7 +94,11 @@ config.filter_sensitive_data('') { |interaction| interaction.response.headers['Cf-Ray']&.first } # Filter large strings used to test "context length exceeded" error handling - config.filter_sensitive_data('') { 'a' * 1_000_000 } + config.filter_sensitive_data('') { MASSIVE_TEXT } + + # Filter large strings used to test prompt caching + config.filter_sensitive_data('') { MASSIVE_TEXT_FOR_PROMPT_CACHING } + config.filter_sensitive_data('') { MASSIVE_TEXT_FOR_PROMPT_CACHE_REPORTING } # Filter cookies config.before_record do |interaction| @@ -187,3 +195,15 @@ { provider: :openai, model: 'text-embedding-3-small' }, { provider: :mistral, model: 'mistral-embed' } ].freeze + +# Models that require prompt caching configuration +CACHING_MODELS = [ + { provider: :anthropic, model: 'claude-3-5-haiku-20241022' }, + { provider: :bedrock, model: 'anthropic.claude-3-5-haiku-20241022-v1:0' } +].freeze + +# Models that report cached tokens +CACHED_MODELS = [ + { provider: :gemini, model: 'gemini-2.5-flash' }, + { provider: :openai, model: 'gpt-4.1-nano' } +].freeze