From c81af10af29655e200d9835c397103ae96bf059b Mon Sep 17 00:00:00 2001 From: Daniel Friis Date: Wed, 2 Apr 2025 10:11:05 +0100 Subject: [PATCH 1/4] Add structured output class and tests for the class --- lib/ruby_llm/chat.rb | 11 ++ lib/ruby_llm/model_info.rb | 5 +- lib/ruby_llm/structured_output.rb | 205 ++++++++++++++++++++++++ spec/ruby_llm/chat_functions_spec.rb | 15 ++ spec/ruby_llm/structured_output_spec.rb | 129 +++++++++++++++ 5 files changed, 364 insertions(+), 1 deletion(-) create mode 100644 lib/ruby_llm/structured_output.rb create mode 100644 spec/ruby_llm/structured_output_spec.rb diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb index 9c6d29419..bf50a4582 100644 --- a/lib/ruby_llm/chat.rb +++ b/lib/ruby_llm/chat.rb @@ -19,6 +19,7 @@ def initialize(model: nil, provider: nil) @temperature = 0.7 @messages = [] @tools = {} + @structured_output_schema = nil @on = { new_message: nil, end_message: nil @@ -47,6 +48,16 @@ def with_tools(*tools) self end + def with_structured_output(schema) + unless @model.supports_structured_output + raise UnsupportedStructuredOutputError, "Model #{@model.id} doesn't support structured output" + end + + structured_output_schema_instance = schema.is_a?(Class) ? schema.new : schema + @structured_output_schema = structured_output_schema_instance.to_hash + self + end + def with_model(model_id, provider: nil) @model = Models.find model_id, provider @provider = Provider.providers[@model.provider.to_sym] || raise(Error, "Unknown provider: #{@model.provider}") diff --git a/lib/ruby_llm/model_info.rb b/lib/ruby_llm/model_info.rb index 31b2e8b1f..e43ff1e4b 100644 --- a/lib/ruby_llm/model_info.rb +++ b/lib/ruby_llm/model_info.rb @@ -15,7 +15,8 @@ module RubyLLM class ModelInfo attr_reader :id, :created_at, :display_name, :provider, :metadata, :context_window, :max_tokens, :supports_vision, :supports_functions, - :supports_json_mode, :input_price_per_million, :output_price_per_million, :type, :family + :supports_structured_output, :supports_json_mode, :input_price_per_million, + :output_price_per_million, :type, :family def initialize(data) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength @id = data[:id] @@ -28,6 +29,7 @@ def initialize(data) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength @family = data[:family] @supports_vision = data[:supports_vision] @supports_functions = data[:supports_functions] + @supports_structured_output = data[:supports_structured_output] @supports_json_mode = data[:supports_json_mode] @input_price_per_million = data[:input_price_per_million] @output_price_per_million = data[:output_price_per_million] @@ -46,6 +48,7 @@ def to_h # rubocop:disable Metrics/MethodLength family: family, supports_vision: supports_vision, supports_functions: supports_functions, + supports_structured_output: supports_structured_output, supports_json_mode: supports_json_mode, input_price_per_million: input_price_per_million, output_price_per_million: output_price_per_million, diff --git a/lib/ruby_llm/structured_output.rb b/lib/ruby_llm/structured_output.rb new file mode 100644 index 000000000..71ea97279 --- /dev/null +++ b/lib/ruby_llm/structured_output.rb @@ -0,0 +1,205 @@ +# frozen_string_literal: true + +module RubyLLM + module StructuredOutput + class Schema + MAX_OBJECT_PROPERTIES = 100 + MAX_NESTING_DEPTH = 5 + + class << self + def string(name = nil, enum: nil, description: nil) + schema = { type: 'string', enum: enum, description: description }.compact + name ? add_property(name, schema) : schema + end + + def number(name = nil, description: nil) + schema = { type: 'number', description: description }.compact + name ? add_property(name, schema) : schema + end + + def boolean(name = nil, description: nil) + schema = { type: 'boolean', description: description }.compact + name ? add_property(name, schema) : schema + end + + def null(name = nil, description: nil) + schema = { type: 'null', description: description }.compact + name ? add_property(name, schema) : schema + end + + def object(name = nil, description: nil, &block) + sub_schema = Class.new(Schema) + sub_schema.class_eval(&block) + + schema = { + type: 'object', + properties: sub_schema.properties, + required: sub_schema.required, + additionalProperties: false, + description: description + }.compact + + name ? add_property(name, schema) : schema + end + + def array(name, type = nil, description: nil, &block) + items = if block_given? + collector = SchemaCollector.new + collector.instance_eval(&block) + collector.schemas.first + elsif type.is_a?(Symbol) + case type + when :string, :number, :boolean, :null + send(type) + else + ref(type) + end + else + raise ArgumentError, "Invalid array type: #{type}" + end + + add_property(name, { + type: 'array', + description: description, + items: items + }.compact) + end + + def any_of(name, description: nil, &block) + collector = SchemaCollector.new + collector.instance_eval(&block) + + add_property(name, { + description: description, + anyOf: collector.schemas + }.compact) + end + + def ref(schema_name) + { '$ref' => "#/$defs/#{schema_name}" } + end + + def properties + @properties ||= {} + end + + def required + @required ||= [] + end + + def definitions + @definitions ||= {} + end + + def define(name, &) + sub_schema = Class.new(Schema) + sub_schema.class_eval(&) + + definitions[name] = { + type: 'object', + properties: sub_schema.properties, + required: sub_schema.required + } + end + + private + + def add_property(name, definition) + properties[name.to_sym] = definition + required << name.to_sym + end + end + + # Simple collector that just stores schemas + class SchemaCollector + attr_reader :schemas + + def initialize + @schemas = [] + end + + def method_missing(method_name, ...) + if Schema.respond_to?(method_name) + @schemas << Schema.send(method_name, ...) + else + super + end + end + + def respond_to_missing?(method_name, include_private = false) + Schema.respond_to?(method_name) || super + end + end + + def initialize(name = nil) + @name = name || self.class.name + validate_schema + end + + def to_hash + { + name: @name, + description: 'Schema for the structured response', + schema: { + type: 'object', + properties: self.class.properties, + required: self.class.required, + additionalProperties: false, + strict: true, + '$defs' => self.class.definitions + } + } + end + + private + + # Validate the schema against defined limits + def validate_schema + properties_count = count_properties(self.class.properties) + raise 'Exceeded maximum number of object properties' if properties_count > MAX_OBJECT_PROPERTIES + + max_depth = calculate_max_depth(self.class.properties) + raise 'Exceeded maximum nesting depth' if max_depth > MAX_NESTING_DEPTH + end + + # Count the total number of properties in the schema + def count_properties(schema) + return 0 unless schema.is_a?(Hash) && schema[:properties] + + count = schema[:properties].size + schema[:properties].each_value do |prop| + count += count_properties(prop) + end + count + end + + # Calculate the maximum nesting depth of the schema + def calculate_max_depth(schema, current_depth = 1) + return current_depth unless schema.is_a?(Hash) + + if schema[:type] == 'object' && schema[:properties] + child_depths = schema[:properties].values.map do |prop| + calculate_max_depth(prop, current_depth + 1) + end + [current_depth, child_depths.max].compact.max + elsif schema[:items] # For arrays + calculate_max_depth(schema[:items], current_depth + 1) + else + current_depth + end + end + + def method_missing(method_name, ...) + if respond_to_missing?(method_name) + send(method_name, ...) + else + super + end + end + + def respond_to_missing?(method_name, include_private = false) + %i[string number boolean array object any_of null].include?(method_name) || super + end + end + end +end diff --git a/spec/ruby_llm/chat_functions_spec.rb b/spec/ruby_llm/chat_functions_spec.rb index 7ee926cb4..b59e34e5d 100644 --- a/spec/ruby_llm/chat_functions_spec.rb +++ b/spec/ruby_llm/chat_functions_spec.rb @@ -41,6 +41,21 @@ def name = 'tool2' end end + describe '#with_structured_output' do + it 'adds a structured output schema' do # rubocop:disable RSpec/ExampleLength + chat = described_class.new + + schema = Class.new(RubyLLM::StructuredOutput::Schema) do + string :name + number :age + end + + chat.with_structured_output(schema) + + expect(chat.structured_output_schema).to eq(schema) + end + end + describe '#with_model' do it 'changes the model and returns self' do # rubocop:disable RSpec/MultipleExpectations chat = described_class.new(model: 'gpt-4o-mini') diff --git a/spec/ruby_llm/structured_output_spec.rb b/spec/ruby_llm/structured_output_spec.rb new file mode 100644 index 000000000..6235f3ae5 --- /dev/null +++ b/spec/ruby_llm/structured_output_spec.rb @@ -0,0 +1,129 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe RubyLLM::StructuredOutput::Schema do + describe 'schema definition' do + subject { schema.to_hash } + + let(:schema_class) do + Class.new(described_class) do + string :name, description: "User's name" + number :age + boolean :active + + object :address do + string :street + string :city + end + + array :tags, :string, description: 'User tags' + + array :contacts do + object do + string :email + string :phone + end + end + + any_of :status do + string enum: %w[active pending] + null + end + + define :location do + string :latitude + string :longitude + end + + array :locations, :location + end + end + + let(:schema) { schema_class.new } + + it 'generates the correct JSON schema' do + expect(subject).to include( + name: schema_class.name, + description: 'Schema for the structured response' + ) + + properties = subject[:schema][:properties] + + # Test basic types + expect(properties[:name]).to eq({ type: 'string', description: "User's name" }) + expect(properties[:age]).to eq({ type: 'number' }) + expect(properties[:active]).to eq({ type: 'boolean' }) + + # Test nested object + expect(properties[:address]).to include( + type: 'object', + properties: { + street: { type: 'string' }, + city: { type: 'string' } + }, + required: %i[street city], + additionalProperties: false + ) + + # Test arrays + expect(properties[:tags]).to eq({ + type: 'array', + description: 'User tags', + items: { type: 'string' } + }) + + expect(properties[:contacts]).to include( + type: 'array', + items: { + type: 'object', + properties: { + email: { type: 'string' }, + phone: { type: 'string' } + }, + required: %i[email phone], + additionalProperties: false + } + ) + + # Test any_of + expect(properties[:status]).to include( + anyOf: [ + { type: 'string', enum: %w[active pending] }, + { type: 'null' } + ] + ) + + # Test references + expect(properties[:locations]).to eq({ + type: 'array', + items: { '$ref' => '#/$defs/location' } + }) + + # Test definitions + expect(subject[:schema]['$defs']).to include( + location: { + type: 'object', + properties: { + latitude: { type: 'string' }, + longitude: { type: 'string' } + }, + required: %i[latitude longitude] + } + ) + end + + it 'includes all properties in required array' do + expect(subject[:schema][:required]).to contain_exactly( + :name, :age, :active, :address, :tags, :contacts, :status, :locations + ) + end + + it 'enforces schema constraints' do + expect(subject[:schema]).to include( + additionalProperties: false, + strict: true + ) + end + end +end From 5f670ea1f80dfbc44a26e85412b483c5e816167e Mon Sep 17 00:00:00 2001 From: Daniel Friis Date: Sat, 19 Apr 2025 15:31:03 +0100 Subject: [PATCH 2/4] Focus on schema building only --- lib/ruby_llm/chat.rb | 11 ----------- lib/ruby_llm/model_info.rb | 5 +---- lib/ruby_llm/structured_output.rb | 2 +- spec/ruby_llm/chat_functions_spec.rb | 15 --------------- 4 files changed, 2 insertions(+), 31 deletions(-) diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb index bf50a4582..9c6d29419 100644 --- a/lib/ruby_llm/chat.rb +++ b/lib/ruby_llm/chat.rb @@ -19,7 +19,6 @@ def initialize(model: nil, provider: nil) @temperature = 0.7 @messages = [] @tools = {} - @structured_output_schema = nil @on = { new_message: nil, end_message: nil @@ -48,16 +47,6 @@ def with_tools(*tools) self end - def with_structured_output(schema) - unless @model.supports_structured_output - raise UnsupportedStructuredOutputError, "Model #{@model.id} doesn't support structured output" - end - - structured_output_schema_instance = schema.is_a?(Class) ? schema.new : schema - @structured_output_schema = structured_output_schema_instance.to_hash - self - end - def with_model(model_id, provider: nil) @model = Models.find model_id, provider @provider = Provider.providers[@model.provider.to_sym] || raise(Error, "Unknown provider: #{@model.provider}") diff --git a/lib/ruby_llm/model_info.rb b/lib/ruby_llm/model_info.rb index e43ff1e4b..31b2e8b1f 100644 --- a/lib/ruby_llm/model_info.rb +++ b/lib/ruby_llm/model_info.rb @@ -15,8 +15,7 @@ module RubyLLM class ModelInfo attr_reader :id, :created_at, :display_name, :provider, :metadata, :context_window, :max_tokens, :supports_vision, :supports_functions, - :supports_structured_output, :supports_json_mode, :input_price_per_million, - :output_price_per_million, :type, :family + :supports_json_mode, :input_price_per_million, :output_price_per_million, :type, :family def initialize(data) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength @id = data[:id] @@ -29,7 +28,6 @@ def initialize(data) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength @family = data[:family] @supports_vision = data[:supports_vision] @supports_functions = data[:supports_functions] - @supports_structured_output = data[:supports_structured_output] @supports_json_mode = data[:supports_json_mode] @input_price_per_million = data[:input_price_per_million] @output_price_per_million = data[:output_price_per_million] @@ -48,7 +46,6 @@ def to_h # rubocop:disable Metrics/MethodLength family: family, supports_vision: supports_vision, supports_functions: supports_functions, - supports_structured_output: supports_structured_output, supports_json_mode: supports_json_mode, input_price_per_million: input_price_per_million, output_price_per_million: output_price_per_million, diff --git a/lib/ruby_llm/structured_output.rb b/lib/ruby_llm/structured_output.rb index 71ea97279..2f3e0cd31 100644 --- a/lib/ruby_llm/structured_output.rb +++ b/lib/ruby_llm/structured_output.rb @@ -136,7 +136,7 @@ def initialize(name = nil) validate_schema end - def to_hash + def json_schema { name: @name, description: 'Schema for the structured response', diff --git a/spec/ruby_llm/chat_functions_spec.rb b/spec/ruby_llm/chat_functions_spec.rb index b59e34e5d..7ee926cb4 100644 --- a/spec/ruby_llm/chat_functions_spec.rb +++ b/spec/ruby_llm/chat_functions_spec.rb @@ -41,21 +41,6 @@ def name = 'tool2' end end - describe '#with_structured_output' do - it 'adds a structured output schema' do # rubocop:disable RSpec/ExampleLength - chat = described_class.new - - schema = Class.new(RubyLLM::StructuredOutput::Schema) do - string :name - number :age - end - - chat.with_structured_output(schema) - - expect(chat.structured_output_schema).to eq(schema) - end - end - describe '#with_model' do it 'changes the model and returns self' do # rubocop:disable RSpec/MultipleExpectations chat = described_class.new(model: 'gpt-4o-mini') From 3802eb5fa1fb076cf869619effc183e4687d52b4 Mon Sep 17 00:00:00 2001 From: Daniel Friis Date: Sat, 19 Apr 2025 15:32:14 +0100 Subject: [PATCH 3/4] Use .json_schema instead of .to_hash --- spec/ruby_llm/structured_output_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/ruby_llm/structured_output_spec.rb b/spec/ruby_llm/structured_output_spec.rb index 6235f3ae5..48cf02508 100644 --- a/spec/ruby_llm/structured_output_spec.rb +++ b/spec/ruby_llm/structured_output_spec.rb @@ -4,7 +4,7 @@ RSpec.describe RubyLLM::StructuredOutput::Schema do describe 'schema definition' do - subject { schema.to_hash } + subject { schema.json_schema } let(:schema_class) do Class.new(described_class) do From a1353899c4ac2fc6017a272db852e443474aba56 Mon Sep 17 00:00:00 2001 From: Daniel Friis Date: Sat, 19 Apr 2025 15:39:15 +0100 Subject: [PATCH 4/4] Clean up specs --- spec/ruby_llm/structured_output_spec.rb | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/spec/ruby_llm/structured_output_spec.rb b/spec/ruby_llm/structured_output_spec.rb index 48cf02508..5d8db3f29 100644 --- a/spec/ruby_llm/structured_output_spec.rb +++ b/spec/ruby_llm/structured_output_spec.rb @@ -2,9 +2,9 @@ require 'spec_helper' -RSpec.describe RubyLLM::StructuredOutput::Schema do +RSpec.describe RubyLLM::StructuredOutput::Schema do # rubocop:disable RSpec/SpecFilePathFormat describe 'schema definition' do - subject { schema.json_schema } + json_output { schema.json_schema } let(:schema_class) do Class.new(described_class) do @@ -42,13 +42,13 @@ let(:schema) { schema_class.new } - it 'generates the correct JSON schema' do - expect(subject).to include( + it 'generates the correct JSON schema' do # rubocop:disable RSpec/ExampleLength,RSpec/MultipleExpectations + expect(json_output).to include( name: schema_class.name, description: 'Schema for the structured response' ) - properties = subject[:schema][:properties] + properties = json_output[:schema][:properties] # Test basic types expect(properties[:name]).to eq({ type: 'string', description: "User's name" }) @@ -101,7 +101,7 @@ }) # Test definitions - expect(subject[:schema]['$defs']).to include( + expect(json_output[:schema]['$defs']).to include( location: { type: 'object', properties: { @@ -114,13 +114,13 @@ end it 'includes all properties in required array' do - expect(subject[:schema][:required]).to contain_exactly( + expect(json_output[:schema][:required]).to contain_exactly( :name, :age, :active, :address, :tags, :contacts, :status, :locations ) end it 'enforces schema constraints' do - expect(subject[:schema]).to include( + expect(json_output[:schema]).to include( additionalProperties: false, strict: true )