Skip to content
Open
6 changes: 6 additions & 0 deletions lib/ruby_llm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
require 'event_stream_parser'
require 'faraday'
require 'faraday/retry'
require 'faraday/multipart'

require 'json'
require 'logger'
require 'securerandom'
Expand Down Expand Up @@ -50,6 +52,10 @@ def paint(...)
Image.paint(...)
end

def edit(...)
Image.edit(...)
end

def models
Models.instance
end
Expand Down
16 changes: 16 additions & 0 deletions lib/ruby_llm/connection_multipart.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
module RubyLLM
class ConnectionMultipart < Connection
def post(url, payload, &)
@connection.post url, payload do |req|
req.headers.merge! @provider.headers(@config) if @provider.respond_to?(:headers)
req.headers['Content-Type'] = 'multipart/form-data'
yield req if block_given?
end
end

def setup_middleware(faraday)
super
faraday.request :multipart, content_type: 'multipart/form-data'
end
end
end
1 change: 1 addition & 0 deletions lib/ruby_llm/error.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class ConfigurationError < StandardError; end
class InvalidRoleError < StandardError; end
class ModelNotFoundError < StandardError; end
class UnsupportedFunctionsError < StandardError; end
class NetworkError < StandardError; end

# Error classes for different HTTP status codes
class BadRequestError < Error; end
Expand Down
39 changes: 36 additions & 3 deletions lib/ruby_llm/image.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@ module RubyLLM
# Provides an interface to image generation capabilities
# from providers like DALL-E and Gemini's Imagen.
class Image
attr_reader :url, :data, :mime_type, :revised_prompt, :model_id
attr_reader :url, :data, :mime_type, :revised_prompt, :model, :usage

def initialize(url: nil, data: nil, mime_type: nil, revised_prompt: nil, model_id: nil)
def initialize(model:, url: nil, data: nil, mime_type: nil, revised_prompt: nil, usage: {})
@url = url
@data = data
@mime_type = mime_type
@revised_prompt = revised_prompt
@model_id = model_id
@usage = usage
@model = model
end

def base64?
Expand Down Expand Up @@ -51,5 +52,37 @@ def self.paint(prompt, # rubocop:disable Metrics/ParameterLists
connection = context ? context.connection_for(provider) : provider.connection(config)
provider.paint(prompt, model: model_id, size:, connection:)
end

def self.edit(prompt, # rubocop:disable Metrics/ParameterLists
model: nil,
provider: nil,
assume_model_exists: false,
context: nil,
with: {},
options: {})
config = context&.config || RubyLLM.config
model, provider = Models.resolve(model, provider: provider, assume_exists: assume_model_exists) if model
model_id = model&.id || config.default_image_model

provider = Provider.for(model_id) if provider.nil?
connection = context ? context.connection_for(provider) : provider.connection_multipart(config)
provider.edit(prompt, model: model_id, with:, connection:, options:)
end

def total_cost
input_cost + output_cost
end

def model_info
@model_info ||= RubyLLM.models.find(model)
end

def input_cost
usage['input_tokens'] * model_info.input_price_per_million / 1_000_000
end

def output_cost
usage['output_tokens'] * model_info.output_price_per_million / 1_000_000
end
end
end
2 changes: 1 addition & 1 deletion lib/ruby_llm/models.json
Original file line number Diff line number Diff line change
Expand Up @@ -6073,7 +6073,7 @@
"pricing": {
"text_tokens": {
"standard": {
"input_per_million": 5.0,
"input_per_million": 10.0,
"output_per_million": 40.0
}
}
Expand Down
13 changes: 12 additions & 1 deletion lib/ruby_llm/provider.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,14 @@ def embed(text, model:, connection:, dimensions:)
def paint(prompt, model:, size:, connection:)
payload = render_image_payload(prompt, model:, size:)
response = connection.post images_url, payload
parse_image_response response
parse_image_response(response, model:)
end

def edit(prompt, model:, with:, options:, connection:)
payload = render_edit_payload(prompt, model:, with:, options:)

response = connection.post(edits_url, payload)
parse_edit_response(response, model:)
end

def configured?(config = nil)
Expand Down Expand Up @@ -117,6 +124,10 @@ def connection(config)
@connection ||= Connection.new(self, config)
end

def connection_multipart(config)
@connection_multipart ||= ConnectionMultipart.new(self, config)
end

class << self
def extended(base)
base.extend(Methods)
Expand Down
5 changes: 3 additions & 2 deletions lib/ruby_llm/providers/gemini/images.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def render_image_payload(prompt, model:, size:)
}
end

def parse_image_response(response)
def parse_image_response(response, model:)
data = response.body
image_data = data['predictions']&.first

Expand All @@ -38,7 +38,8 @@ def parse_image_response(response)

Image.new(
data: base64_data,
mime_type: mime_type
mime_type: mime_type,
model:
)
end
end
Expand Down
2 changes: 2 additions & 0 deletions lib/ruby_llm/providers/openai.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ module OpenAI
extend OpenAI::Streaming
extend OpenAI::Tools
extend OpenAI::Images
extend OpenAI::Edits
extend OpenAI::Media

def self.extended(base)
Expand All @@ -23,6 +24,7 @@ def self.extended(base)
base.extend(OpenAI::Streaming)
base.extend(OpenAI::Tools)
base.extend(OpenAI::Images)
base.extend(OpenAI::Edits)
base.extend(OpenAI::Media)
end

Expand Down
4 changes: 3 additions & 1 deletion lib/ruby_llm/providers/openai/capabilities.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ module Capabilities
MODEL_PATTERNS = {
dall_e: /^dall-e/,
chatgpt4o: /^chatgpt-4o/,
gpt_image: /^gpt-image/,
gpt41: /^gpt-4\.1(?!-(?:mini|nano))/,
gpt41_mini: /^gpt-4\.1-mini/,
gpt41_nano: /^gpt-4\.1-nano/,
Expand Down Expand Up @@ -105,6 +106,7 @@ def supports_json_mode?(model_id)
end

PRICES = {
gpt_image_1: { input_text: 5.0, input_image: 10.0, output: 8.0, cached_input: 0.5 },
gpt41: { input: 2.0, output: 8.0, cached_input: 0.5 },
gpt41_mini: { input: 0.4, output: 1.6, cached_input: 0.1 },
gpt41_nano: { input: 0.1, output: 0.4 },
Expand Down Expand Up @@ -168,7 +170,7 @@ def model_type(model_id)
when /embedding/ then 'embedding'
when /^tts|whisper|gpt4o_(?:mini_)?(?:transcribe|tts)$/ then 'audio'
when 'moderation' then 'moderation'
when /dall/ then 'image'
when /dall-e|gpt-image/ then 'image'
else 'chat'
end
end
Expand Down
41 changes: 41 additions & 0 deletions lib/ruby_llm/providers/openai/edits.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# frozen_string_literal: true

module RubyLLM
module Providers
module OpenAI
# Image generation methods for the OpenAI API integration
module Edits
module_function

def edits_url
'images/edits'
end

# Options:
# - size: '1024x1024'
# - quality: 'low'
# - user: 'user_123'
# See https://platform.openai.com/docs/api-reference/images/createEdit
def render_edit_payload(prompt, model:, with:, options:)
options.merge({
model:,
prompt:,
image: ImageAttachments.new(with[:image]).format,
n: 1
})
end

def parse_edit_response(response, model:)
data = response.body
image_data = data['data'].first
Image.new(
data: image_data['b64_json'], # Edits API returns base64 when requested
mime_type: 'image/png',
usage: data['usage'],
model:
)
end
end
end
end
end
50 changes: 50 additions & 0 deletions lib/ruby_llm/providers/openai/image_attachments.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# frozen_string_literal: true

require 'open-uri' # Added for fetching URLs

module RubyLLM
module Providers
module OpenAI
class ImageAttachments
def initialize(sources)
@sources = Array(sources)
end

def format
@sources.map do |source|
source.start_with?('http') ? from_remote_url(source) : from_local_file(source)
end
end

private

def mime_type_for_image(path)
ext = File.extname(path).downcase.delete('.')
case ext
when 'png' then 'image/png'
when 'gif' then 'image/gif'
when 'webp' then 'image/webp'
else 'image/jpeg'
end
end

def from_local_file(source)
Faraday::UploadIO.new(source, mime_type_for_image(source), File.basename(source))
end

def from_remote_url(source)
parsed_uri = URI.parse(source)

# Fetch the remote content or open local file. URI.open returns an IO-like object (StringIO or Tempfile)
io = parsed_uri.open
content_type = io.content_type # Get MIME type from the response headers or guess for local files

# Extract filename from path, provide fallback
filename = File.basename(parsed_uri.path)
Faraday::UploadIO.new(io, content_type, filename)
# NOTE: Do NOT close the IO stream here. Faraday will handle it.
end
end
end
end
end
4 changes: 2 additions & 2 deletions lib/ruby_llm/providers/openai/images.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ def render_image_payload(prompt, model:, size:)
}
end

def parse_image_response(response)
def parse_image_response(response, model:)
data = response.body
image_data = data['data'].first

Image.new(
url: image_data['url'],
mime_type: 'image/png', # DALL-E typically returns PNGs
revised_prompt: image_data['revised_prompt'],
model_id: data['model']
model:
)
end
end
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Loading