Responses API:

peterbanda · peterbanda · commit 85ef34f31eb6 · 2025-08-31T14:02:01.000+02:00
- Introduced new parameters in CreateModelResponseSettings: prompt, promptCacheKey, background, maxToolCalls, safetyIdentifier, serviceTier, streamOptions, and topLogprobs.
- Added Prompt and StreamOptions case classes to support new features.
- Updated JSON format handling for CreateModelResponseSettings to accommodate the new fields.
- Enhanced unit tests to validate the new settings and their serialization/deserialization.
diff --git a/openai-core/src/main/scala/io/cequence/openaiscala/domain/responsesapi/CreateModelResponseSettings.scala b/openai-core/src/main/scala/io/cequence/openaiscala/domain/responsesapi/CreateModelResponseSettings.scala
@@ -11,9 +11,17 @@ import io.cequence.openaiscala.domain.responsesapi.tools.Tool
  * @param include
  *   Specify additional output data to include in the model response. Currently supported
  *   values are:
+ *   - web_search_call.action.sources: Include the sources of the web search tool call.
+ *   - code_interpreter_call.outputs: Includes the outputs of python code execution in code
+ *     interpreter tool call items.
+ *   - computer_call_output.output.image_url: Include image urls from the computer call output.
  *   - file_search_call.results: Include the search results of the file search tool call.
  *   - message.input_image.image_url: Include image urls from the input message.
- *   - computer_call_output.output.image_url: Include image urls from the computer call output.
+ *   - message.output_text.logprobs: Include logprobs with assistant messages.
+ *   - reasoning.encrypted_content: Includes an encrypted version of reasoning tokens in
+ *     reasoning item outputs. This enables reasoning items to be used in multi-turn
+ *     conversations when using the Responses API statelessly (like when the store parameter is
+ *     set to false, or when an organization is enrolled in the zero data retention program).
  * @param instructions
  *   Inserts a system (or developer) message as the first item in the model's context.
  * @param maxOutputTokens
@@ -52,6 +60,36 @@ import io.cequence.openaiscala.domain.responsesapi.tools.Tool
  * @param user
  *   A unique identifier representing your end-user, which can help OpenAI to monitor and
  *   detect abuse. Learn more.
+ * @param prompt
+ *   Reference to a prompt template and its variables.
+ * @param promptCacheKey
+ *   Used by OpenAI to cache responses for similar requests to optimize your cache hit rates.
+ *   Replaces the user field.
+ * @param background
+ *   Whether to run the model response in the background. Optional, defaults to false.
+ * @param maxToolCalls
+ *   The maximum number of total calls to built-in tools that can be processed in a response.
+ *   This maximum number applies across all built-in tool calls, not per individual tool. Any
+ *   further attempts to call a tool by the model will be ignored. Optional.
+ * @param safetyIdentifier
+ *   A stable identifier used to help detect users of your application that may be violating
+ *   OpenAI's usage policies. The IDs should be a string that uniquely identifies each user. We
+ *   recommend hashing their username or email address, in order to avoid sending us any
+ *   identifying information Optional.
+ * @param serviceTier
+ *   Specifies the processing type used for serving the request.
+ *   - If set to 'auto', then the request will be processed with the service tier configured in
+ *     the Project settings. Unless otherwise configured, the Project will use 'default'.
+ *   - If set to 'default', then the request will be processed with the standard pricing and
+ *     performance for the selected model.
+ *   - If set to 'flex' or 'priority', then the request will be processed with the
+ *     corresponding service tier.
+ *   - When not set, the default behavior is 'auto'.
+ * @param streamOptions
+ *   Options for streaming responses.
+ * @param topLogprobs
+ *   An integer between 0 and 20 specifying the number of most likely tokens to return at each
+ *   token position, each with an associated log probability. Optional
  */
 final case class CreateModelResponseSettings(
   model: String,
@@ -70,5 +108,112 @@ final case class CreateModelResponseSettings(
   tools: Seq[Tool] = Nil,
   topP: Option[Double] = None,
   truncation: Option[TruncationStrategy] = None,
-  user: Option[String] = None
+  user: Option[String] = None,
+  prompt: Option[Prompt] = None,
+  promptCacheKey: Option[String] = None,
+  background: Option[Boolean] = None,
+  maxToolCalls: Option[Int] = None,
+  safetyIdentifier: Option[String] = None,
+  serviceTier: Option[String] = None,
+  streamOptions: Option[StreamOptions] = None,
+  topLogprobs: Option[Int] = None
+)
+
+object CreateModelResponseSettings {
+
+  def toAuxPart1(x: CreateModelResponseSettings) =
+    CreateModelResponseSettingsAuxPart1(
+      model = x.model,
+      include = x.include,
+      instructions = x.instructions,
+      maxOutputTokens = x.maxOutputTokens,
+      metadata = x.metadata,
+      parallelToolCalls = x.parallelToolCalls,
+      previousResponseId = x.previousResponseId,
+      reasoning = x.reasoning,
+      store = x.store,
+      stream = x.stream,
+      temperature = x.temperature,
+      text = x.text
+    )
+
+  def toAuxPart2(x: CreateModelResponseSettings) =
+    CreateModelResponseSettingsAuxPart2(
+      toolChoice = x.toolChoice,
+      tools = x.tools,
+      topP = x.topP,
+      truncation = x.truncation,
+      user = x.user,
+      prompt = x.prompt,
+      promptCacheKey = x.promptCacheKey,
+      background = x.background,
+      maxToolCalls = x.maxToolCalls,
+      safetyIdentifier = x.safetyIdentifier,
+      serviceTier = x.serviceTier,
+      streamOptions = x.streamOptions,
+      topLogprobs = x.topLogprobs
+    )
+
+  private def fromParts(
+    part1: CreateModelResponseSettingsAuxPart1,
+    part2: CreateModelResponseSettingsAuxPart2
+  ) =
+    CreateModelResponseSettings(
+      model = part1.model,
+      include = part1.include,
+      instructions = part1.instructions,
+      maxOutputTokens = part1.maxOutputTokens,
+      metadata = part1.metadata,
+      parallelToolCalls = part1.parallelToolCalls,
+      previousResponseId = part1.previousResponseId,
+      reasoning = part1.reasoning,
+      store = part1.store,
+      stream = part1.stream,
+      temperature = part1.temperature,
+      text = part1.text,
+      toolChoice = part2.toolChoice,
+      tools = part2.tools,
+      topP = part2.topP,
+      truncation = part2.truncation,
+      user = part2.user,
+      prompt = part2.prompt,
+      promptCacheKey = part2.promptCacheKey,
+      background = part2.background,
+      maxToolCalls = part2.maxToolCalls,
+      safetyIdentifier = part2.safetyIdentifier,
+      serviceTier = part2.serviceTier,
+      streamOptions = part2.streamOptions,
+      topLogprobs = part2.topLogprobs
+    )
+}
+
+final case class CreateModelResponseSettingsAuxPart1(
+  model: String,
+  include: Seq[String],
+  instructions: Option[String],
+  maxOutputTokens: Option[Int],
+  metadata: Option[Map[String, String]],
+  parallelToolCalls: Option[Boolean],
+  previousResponseId: Option[String],
+  reasoning: Option[ReasoningConfig],
+  store: Option[Boolean],
+  stream: Option[Boolean],
+  temperature: Option[Double],
+  text: Option[TextResponseConfig]
+)
+
+final case class CreateModelResponseSettingsAuxPart2(
+  toolChoice: Option[ToolChoice],
+  tools: Seq[Tool],
+  topP: Option[Double],
+  truncation: Option[TruncationStrategy],
+  user: Option[String],
+  prompt: Option[Prompt],
+  promptCacheKey: Option[String],
+  background: Option[Boolean],
+  maxToolCalls: Option[Int],
+  safetyIdentifier: Option[String],
+  serviceTier: Option[String],
+  streamOptions: Option[StreamOptions],
+  topLogprobs: Option[Int]
 )
diff --git a/openai-core/src/main/scala/io/cequence/openaiscala/domain/responsesapi/JsonFormats.scala b/openai-core/src/main/scala/io/cequence/openaiscala/domain/responsesapi/JsonFormats.scala
@@ -313,8 +313,18 @@ object JsonFormats {
     }
   }
 
+  private def writesNonEmpty(fieldName: String) = (jsObject: JsObject) => {
+    val include = (jsObject \ fieldName).as[JsArray].value
+    if (include.nonEmpty) jsObject else jsObject.-(fieldName)
+  }
+
+  implicit lazy val promptFormat: OFormat[Prompt] = Json.format[Prompt]
+
+  implicit lazy val streamOptionsFormat: OFormat[StreamOptions] = Json.format[StreamOptions]
+
   // create model response
-  implicit lazy val createModelResponseSettingsReads: Reads[CreateModelResponseSettings] =
+  private implicit lazy val createModelResponseSettingsAuxPart1Reads
+    : Reads[CreateModelResponseSettingsAuxPart1] =
     (
       (__ \ "model").read[String] and
         (__ \ "include").readWithDefault[Seq[String]](Nil) and
@@ -327,20 +337,11 @@ object JsonFormats {
         (__ \ "store").readNullable[Boolean] and
         (__ \ "stream").readNullable[Boolean] and
         (__ \ "temperature").readNullable[Double] and
-        (__ \ "text").readNullable[TextResponseConfig] and
-        (__ \ "tool_choice").readNullable[ToolChoice] and
-        (__ \ "tools").readWithDefault[Seq[Tool]](Nil) and
-        (__ \ "top_p").readNullable[Double] and
-        (__ \ "truncation").readNullable[TruncationStrategy] and
-        (__ \ "user").readNullable[String]
-    )(CreateModelResponseSettings.apply _)
-
-  private def writesNonEmpty(fieldName: String) = (jsObject: JsObject) => {
-    val include = (jsObject \ fieldName).as[JsArray].value
-    if (include.nonEmpty) jsObject else jsObject.-(fieldName)
-  }
+        (__ \ "text").readNullable[TextResponseConfig]
+    )(CreateModelResponseSettingsAuxPart1.apply _)
 
-  implicit lazy val createModelResponseSettingsWrites: OWrites[CreateModelResponseSettings] =
+  private implicit lazy val createModelResponseSettingsAuxPart1Writes
+    : OWrites[CreateModelResponseSettingsAuxPart1] =
     (
       (__ \ "model").write[String] and
         (__ \ "include").write[Seq[String]].transform(writesNonEmpty("include")) and
@@ -353,34 +354,89 @@ object JsonFormats {
         (__ \ "store").writeNullable[Boolean] and
         (__ \ "stream").writeNullable[Boolean] and
         (__ \ "temperature").writeNullable[Double] and
-        (__ \ "text").writeNullable[TextResponseConfig] and
-        (__ \ "tool_choice").writeNullable[ToolChoice] and
+        (__ \ "text").writeNullable[TextResponseConfig]
+    )(unlift(CreateModelResponseSettingsAuxPart1.unapply))
+
+  private implicit lazy val createModelResponseSettingsAuxPart2Reads
+    : Reads[CreateModelResponseSettingsAuxPart2] =
+    (
+      (__ \ "tool_choice").readNullable[ToolChoice] and
+        (__ \ "tools").readWithDefault[Seq[Tool]](Nil) and
+        (__ \ "top_p").readNullable[Double] and
+        (__ \ "truncation").readNullable[TruncationStrategy] and
+        (__ \ "user").readNullable[String] and
+        (__ \ "prompt").readNullable[Prompt] and
+        (__ \ "prompt_cache_key").readNullable[String] and
+        (__ \ "background").readNullable[Boolean] and
+        (__ \ "max_tool_calls").readNullable[Int] and
+        (__ \ "safety_identifier").readNullable[String] and
+        (__ \ "service_tier").readNullable[String] and
+        (__ \ "stream_options").readNullable[StreamOptions] and
+        (__ \ "top_logprobs").readNullable[Int]
+    )(CreateModelResponseSettingsAuxPart2.apply _)
+
+  private implicit lazy val createModelResponseSettingsAuxPart2Writes
+    : OWrites[CreateModelResponseSettingsAuxPart2] =
+    (
+      (__ \ "tool_choice").writeNullable[ToolChoice] and
         (__ \ "tools").write[Seq[Tool]].transform(writesNonEmpty("tools")) and
         (__ \ "top_p").writeNullable[Double] and
         (__ \ "truncation").writeNullable[TruncationStrategy] and
-        (__ \ "user").writeNullable[String]
-    )((x: CreateModelResponseSettings) =>
-      (
-        x.model,
-        x.include,
-        x.instructions,
-        x.maxOutputTokens,
-        x.metadata,
-        x.parallelToolCalls,
-        x.previousResponseId,
-        x.reasoning,
-        x.store,
-        x.stream,
-        x.temperature,
-        x.text,
-        x.toolChoice,
-        x.tools,
-        x.topP,
-        x.truncation,
-        x.user
-      )
+        (__ \ "user").writeNullable[String] and
+        (__ \ "prompt").writeNullable[Prompt] and
+        (__ \ "prompt_cache_key").writeNullable[String] and
+        (__ \ "background").writeNullable[Boolean] and
+        (__ \ "max_tool_calls").writeNullable[Int] and
+        (__ \ "safety_identifier").writeNullable[String] and
+        (__ \ "service_tier").writeNullable[String] and
+        (__ \ "stream_options").writeNullable[StreamOptions] and
+        (__ \ "top_logprobs").writeNullable[Int]
+    )(unlift(CreateModelResponseSettingsAuxPart2.unapply))
+
+  // Compose Reads and Writes for CreateModelResponseSettings using the AuxPart1 and AuxPart2
+  implicit lazy val createModelResponseSettingsReads: Reads[CreateModelResponseSettings] =
+    for {
+      part1 <- createModelResponseSettingsAuxPart1Reads
+      part2 <- createModelResponseSettingsAuxPart2Reads
+    } yield CreateModelResponseSettings(
+      model = part1.model,
+      include = part1.include,
+      instructions = part1.instructions,
+      maxOutputTokens = part1.maxOutputTokens,
+      metadata = part1.metadata,
+      parallelToolCalls = part1.parallelToolCalls,
+      previousResponseId = part1.previousResponseId,
+      reasoning = part1.reasoning,
+      store = part1.store,
+      stream = part1.stream,
+      temperature = part1.temperature,
+      text = part1.text,
+      toolChoice = part2.toolChoice,
+      tools = part2.tools,
+      topP = part2.topP,
+      truncation = part2.truncation,
+      user = part2.user,
+      prompt = part2.prompt,
+      promptCacheKey = part2.promptCacheKey,
+      background = part2.background,
+      maxToolCalls = part2.maxToolCalls,
+      safetyIdentifier = part2.safetyIdentifier,
+      serviceTier = part2.serviceTier,
+      streamOptions = part2.streamOptions,
+      topLogprobs = part2.topLogprobs
     )
 
+  implicit lazy val createModelResponseSettingsWrites: OWrites[CreateModelResponseSettings] =
+    OWrites[CreateModelResponseSettings] { x =>
+      val part1Json = createModelResponseSettingsAuxPart1Writes.writes(
+        CreateModelResponseSettings.toAuxPart1(x)
+      )
+      val part2Json = createModelResponseSettingsAuxPart2Writes.writes(
+        CreateModelResponseSettings.toAuxPart2(x)
+      )
+      part1Json ++ part2Json
+    }
+
   implicit lazy val createModelResponseSettingsFormat: OFormat[CreateModelResponseSettings] =
     OFormat(createModelResponseSettingsReads, createModelResponseSettingsWrites)
 
diff --git a/openai-core/src/main/scala/io/cequence/openaiscala/domain/responsesapi/Prompt.scala b/openai-core/src/main/scala/io/cequence/openaiscala/domain/responsesapi/Prompt.scala
@@ -0,0 +1,28 @@
+package io.cequence.openaiscala.domain.responsesapi
+
+/**
+ * Reusable prompts are intended to be used with Response API.
+ *
+ * Here's how it works:
+ *   - Create a reusable prompt in the dashboard with placeholders like {{customer_name}}.
+ *   - Use the prompt in your API request with the prompt parameter. The prompt parameter
+ *     object has three properties you can configure: id — Unique identifier of your prompt,
+ *     found in the dashboard version — A specific version of your prompt (defaults to the
+ *     "current" version as specified in the dashboard) variables — A map of values to
+ *     substitute in for variables in your prompt. The substitution values can either be
+ *     strings, or other Response input message types like input_image or input_file. See the
+ *     full API reference.
+ *
+ * @param id
+ *   The unique identifier of the prompt template to use.
+ * @param variables
+ *   Optional map of values to substitute in for variables in your prompt. The substitution
+ *   values can either be strings, or other Response input types like images or files.
+ * @param version
+ *   Optional version of the prompt template.
+ */
+case class Prompt(
+  id: String,
+  variables: Map[String, String],
+  version: Option[String] = None
+)
diff --git a/openai-core/src/main/scala/io/cequence/openaiscala/domain/responsesapi/ReasoningConfig.scala b/openai-core/src/main/scala/io/cequence/openaiscala/domain/responsesapi/ReasoningConfig.scala
@@ -12,10 +12,15 @@ import io.cequence.wsclient.domain.EnumValue
  * @param generateSummary
  *   A summary of the reasoning performed by the model. This can be useful for debugging and
  *   understanding the model's reasoning process. One of "concise" or "detailed". Optional.
+ * @param summary
+ *   A summary of the reasoning performed by the model. This can be useful for debugging and
+ *   understanding the model's reasoning process. One of auto, concise, or detailed. Optional.
  */
 case class ReasoningConfig(
   effort: Option[ReasoningEffort] = None,
-  generateSummary: Option[String] = None
+  @deprecated("Use summary instead", "1.3.0")
+  generateSummary: Option[String] = None,
+  summary: Option[String] = None
 )
 
 /**
diff --git a/openai-core/src/main/scala/io/cequence/openaiscala/domain/responsesapi/StreamOptions.scala b/openai-core/src/main/scala/io/cequence/openaiscala/domain/responsesapi/StreamOptions.scala
@@ -0,0 +1,16 @@
+package io.cequence.openaiscala.domain.responsesapi
+
+/**
+ * Options for streaming responses.
+ *
+ * @param includeObfuscation
+ *   When true, stream obfuscation will be enabled. Stream obfuscation adds random characters
+ *   to an obfuscation field on streaming delta events to normalize payload sizes as a
+ *   mitigation to certain side-channel attacks. These obfuscation fields are included by
+ *   default, but add a small amount of overhead to the data stream. You can set
+ *   include_obfuscation to false to optimize for bandwidth if you trust the network links
+ *   between your application and the OpenAI API.
+ */
+case class StreamOptions(
+  includeObfuscation: Option[Boolean] = None
+)
diff --git a/openai-core/src/test/scala/io/cequence/openaiscala/domain/responsesapi/JsonFormatsSpecs.scala b/openai-core/src/test/scala/io/cequence/openaiscala/domain/responsesapi/JsonFormatsSpecs.scala