From 12dddd93e47eb5c816630778aeeaf60d7ddb866f Mon Sep 17 00:00:00 2001
From: gbmarc1 <marcantoine.belanger@shopify.com>
Date: Thu, 5 Jun 2025 11:44:46 -0400
Subject: [PATCH] feat: model as first parameter in body for proxy routing
 efficiency

---
 src/openai/resources/audio/speech.py                 |  4 ++--
 src/openai/resources/audio/transcriptions.py         |  4 ++--
 src/openai/resources/audio/translations.py           |  4 ++--
 src/openai/resources/beta/assistants.py              |  8 ++++----
 src/openai/resources/beta/chat/completions.py        |  4 ++--
 src/openai/resources/beta/realtime/sessions.py       |  4 ++--
 src/openai/resources/beta/threads/runs/runs.py       | 12 ++++++------
 src/openai/resources/beta/threads/threads.py         |  8 ++++----
 src/openai/resources/chat/completions/completions.py |  4 ++--
 src/openai/resources/completions.py                  |  4 ++--
 src/openai/resources/embeddings.py                   |  4 ++--
 src/openai/resources/fine_tuning/jobs/jobs.py        |  4 ++--
 src/openai/resources/images.py                       | 12 ++++++------
 src/openai/resources/moderations.py                  |  4 ++--
 src/openai/resources/responses/responses.py          | 12 ++++++------
 15 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/src/openai/resources/audio/speech.py b/src/openai/resources/audio/speech.py
index a195d7135e..a36d909e9c 100644
--- a/src/openai/resources/audio/speech.py
+++ b/src/openai/resources/audio/speech.py
@@ -100,8 +100,8 @@ def create(
             "/audio/speech",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "input": input,
-                    "model": model,
                     "voice": voice,
                     "instructions": instructions,
                     "response_format": response_format,
@@ -191,8 +191,8 @@ async def create(
             "/audio/speech",
             body=await async_maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "input": input,
-                    "model": model,
                     "voice": voice,
                     "instructions": instructions,
                     "response_format": response_format,
diff --git a/src/openai/resources/audio/transcriptions.py b/src/openai/resources/audio/transcriptions.py
index 208f6e8b05..3a7b8d0bd2 100644
--- a/src/openai/resources/audio/transcriptions.py
+++ b/src/openai/resources/audio/transcriptions.py
@@ -313,8 +313,8 @@ def create(
     ) -> str | Transcription | TranscriptionVerbose | Stream[TranscriptionStreamEvent]:
         body = deepcopy_minimal(
             {
+                "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                 "file": file,
-                "model": model,
                 "chunking_strategy": chunking_strategy,
                 "include": include,
                 "language": language,
@@ -692,8 +692,8 @@ async def create(
     ) -> Transcription | TranscriptionVerbose | str | AsyncStream[TranscriptionStreamEvent]:
         body = deepcopy_minimal(
             {
+                "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                 "file": file,
-                "model": model,
                 "chunking_strategy": chunking_strategy,
                 "include": include,
                 "language": language,
diff --git a/src/openai/resources/audio/translations.py b/src/openai/resources/audio/translations.py
index 28b577ce2e..4997608953 100644
--- a/src/openai/resources/audio/translations.py
+++ b/src/openai/resources/audio/translations.py
@@ -146,8 +146,8 @@ def create(
         """
         body = deepcopy_minimal(
             {
+                "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                 "file": file,
-                "model": model,
                 "prompt": prompt,
                 "response_format": response_format,
                 "temperature": temperature,
@@ -289,8 +289,8 @@ async def create(
         """
         body = deepcopy_minimal(
             {
+                "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                 "file": file,
-                "model": model,
                 "prompt": prompt,
                 "response_format": response_format,
                 "temperature": temperature,
diff --git a/src/openai/resources/beta/assistants.py b/src/openai/resources/beta/assistants.py
index 9059d93616..8e03f42c8c 100644
--- a/src/openai/resources/beta/assistants.py
+++ b/src/openai/resources/beta/assistants.py
@@ -156,7 +156,7 @@ def create(
             "/assistants",
             body=maybe_transform(
                 {
-                    "model": model,
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "description": description,
                     "instructions": instructions,
                     "metadata": metadata,
@@ -360,10 +360,10 @@ def update(
             f"/assistants/{assistant_id}",
             body=maybe_transform(
                 {
+                    "model": model, # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "description": description,
                     "instructions": instructions,
                     "metadata": metadata,
-                    "model": model,
                     "name": name,
                     "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
@@ -605,7 +605,7 @@ async def create(
             "/assistants",
             body=await async_maybe_transform(
                 {
-                    "model": model,
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "description": description,
                     "instructions": instructions,
                     "metadata": metadata,
@@ -809,10 +809,10 @@ async def update(
             f"/assistants/{assistant_id}",
             body=await async_maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "description": description,
                     "instructions": instructions,
                     "metadata": metadata,
-                    "model": model,
                     "name": name,
                     "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
diff --git a/src/openai/resources/beta/chat/completions.py b/src/openai/resources/beta/chat/completions.py
index 80e015615f..d6ee142d64 100644
--- a/src/openai/resources/beta/chat/completions.py
+++ b/src/openai/resources/beta/chat/completions.py
@@ -159,8 +159,8 @@ def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseForma
             "/chat/completions",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "messages": messages,
-                    "model": model,
                     "audio": audio,
                     "frequency_penalty": frequency_penalty,
                     "function_call": function_call,
@@ -438,8 +438,8 @@ def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseForma
             "/chat/completions",
             body=await async_maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "messages": messages,
-                    "model": model,
                     "audio": audio,
                     "frequency_penalty": frequency_penalty,
                     "function_call": function_call,
diff --git a/src/openai/resources/beta/realtime/sessions.py b/src/openai/resources/beta/realtime/sessions.py
index 77f1ec9059..51b25db3ce 100644
--- a/src/openai/resources/beta/realtime/sessions.py
+++ b/src/openai/resources/beta/realtime/sessions.py
@@ -179,6 +179,7 @@ def create(
             "/realtime/sessions",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "client_secret": client_secret,
                     "input_audio_format": input_audio_format,
                     "input_audio_noise_reduction": input_audio_noise_reduction,
@@ -186,7 +187,6 @@ def create(
                     "instructions": instructions,
                     "max_response_output_tokens": max_response_output_tokens,
                     "modalities": modalities,
-                    "model": model,
                     "output_audio_format": output_audio_format,
                     "speed": speed,
                     "temperature": temperature,
@@ -364,6 +364,7 @@ async def create(
             "/realtime/sessions",
             body=await async_maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "client_secret": client_secret,
                     "input_audio_format": input_audio_format,
                     "input_audio_noise_reduction": input_audio_noise_reduction,
@@ -371,7 +372,6 @@ async def create(
                     "instructions": instructions,
                     "max_response_output_tokens": max_response_output_tokens,
                     "modalities": modalities,
-                    "model": model,
                     "output_audio_format": output_audio_format,
                     "speed": speed,
                     "temperature": temperature,
diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py
index 3d9ae9759e..0a96c91d31 100644
--- a/src/openai/resources/beta/threads/runs/runs.py
+++ b/src/openai/resources/beta/threads/runs/runs.py
@@ -573,6 +573,7 @@ def create(
             f"/threads/{thread_id}/runs",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
@@ -580,7 +581,6 @@ def create(
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "parallel_tool_calls": parallel_tool_calls,
                     "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
@@ -976,6 +976,7 @@ def create_and_stream(
             f"/threads/{thread_id}/runs",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
@@ -983,7 +984,6 @@ def create_and_stream(
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "response_format": response_format,
                     "temperature": temperature,
                     "tool_choice": tool_choice,
@@ -1163,6 +1163,7 @@ def stream(
             f"/threads/{thread_id}/runs",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
@@ -1170,7 +1171,6 @@ def stream(
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "response_format": response_format,
                     "temperature": temperature,
                     "tool_choice": tool_choice,
@@ -2007,6 +2007,7 @@ async def create(
             f"/threads/{thread_id}/runs",
             body=await async_maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
@@ -2014,7 +2015,6 @@ async def create(
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "parallel_tool_calls": parallel_tool_calls,
                     "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
@@ -2409,6 +2409,7 @@ def create_and_stream(
             f"/threads/{thread_id}/runs",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
@@ -2416,7 +2417,6 @@ def create_and_stream(
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "response_format": response_format,
                     "temperature": temperature,
                     "tool_choice": tool_choice,
@@ -2596,6 +2596,7 @@ def stream(
             f"/threads/{thread_id}/runs",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
@@ -2603,7 +2604,6 @@ def stream(
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "response_format": response_format,
                     "temperature": temperature,
                     "tool_choice": tool_choice,
diff --git a/src/openai/resources/beta/threads/threads.py b/src/openai/resources/beta/threads/threads.py
index ff2a41155d..76b916d27b 100644
--- a/src/openai/resources/beta/threads/threads.py
+++ b/src/openai/resources/beta/threads/threads.py
@@ -707,12 +707,12 @@ def create_and_run(
             "/threads/runs",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "instructions": instructions,
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "parallel_tool_calls": parallel_tool_calls,
                     "response_format": response_format,
                     "stream": stream,
@@ -888,12 +888,12 @@ def create_and_run_stream(
             "/threads/runs",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "instructions": instructions,
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "parallel_tool_calls": parallel_tool_calls,
                     "response_format": response_format,
                     "temperature": temperature,
@@ -1565,12 +1565,12 @@ async def create_and_run(
             "/threads/runs",
             body=await async_maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "instructions": instructions,
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "parallel_tool_calls": parallel_tool_calls,
                     "response_format": response_format,
                     "stream": stream,
@@ -1750,12 +1750,12 @@ def create_and_run_stream(
             "/threads/runs",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "instructions": instructions,
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "parallel_tool_calls": parallel_tool_calls,
                     "response_format": response_format,
                     "temperature": temperature,
diff --git a/src/openai/resources/chat/completions/completions.py b/src/openai/resources/chat/completions/completions.py
index a2a664ac59..90c5da6e9f 100644
--- a/src/openai/resources/chat/completions/completions.py
+++ b/src/openai/resources/chat/completions/completions.py
@@ -926,8 +926,8 @@ def create(
             "/chat/completions",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "messages": messages,
-                    "model": model,
                     "audio": audio,
                     "frequency_penalty": frequency_penalty,
                     "function_call": function_call,
@@ -2029,8 +2029,8 @@ async def create(
             "/chat/completions",
             body=await async_maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "messages": messages,
-                    "model": model,
                     "audio": audio,
                     "frequency_penalty": frequency_penalty,
                     "function_call": function_call,
diff --git a/src/openai/resources/completions.py b/src/openai/resources/completions.py
index 43b923b9b9..49e10af449 100644
--- a/src/openai/resources/completions.py
+++ b/src/openai/resources/completions.py
@@ -542,7 +542,7 @@ def create(
             "/completions",
             body=maybe_transform(
                 {
-                    "model": model,
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "prompt": prompt,
                     "best_of": best_of,
                     "echo": echo,
@@ -1092,7 +1092,7 @@ async def create(
             "/completions",
             body=await async_maybe_transform(
                 {
-                    "model": model,
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "prompt": prompt,
                     "best_of": best_of,
                     "echo": echo,
diff --git a/src/openai/resources/embeddings.py b/src/openai/resources/embeddings.py
index 553dacc284..a22a4e0f59 100644
--- a/src/openai/resources/embeddings.py
+++ b/src/openai/resources/embeddings.py
@@ -98,8 +98,8 @@ def create(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         params = {
+            "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
             "input": input,
-            "model": model,
             "user": user,
             "dimensions": dimensions,
             "encoding_format": encoding_format,
@@ -214,8 +214,8 @@ async def create(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         params = {
+            "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
             "input": input,
-            "model": model,
             "user": user,
             "dimensions": dimensions,
             "encoding_format": encoding_format,
diff --git a/src/openai/resources/fine_tuning/jobs/jobs.py b/src/openai/resources/fine_tuning/jobs/jobs.py
index 5cca219172..a9c7a9f46a 100644
--- a/src/openai/resources/fine_tuning/jobs/jobs.py
+++ b/src/openai/resources/fine_tuning/jobs/jobs.py
@@ -157,7 +157,7 @@ def create(
             "/fine_tuning/jobs",
             body=maybe_transform(
                 {
-                    "model": model,
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
                     "integrations": integrations,
@@ -535,7 +535,7 @@ async def create(
             "/fine_tuning/jobs",
             body=await async_maybe_transform(
                 {
-                    "model": model,
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
                     "integrations": integrations,
diff --git a/src/openai/resources/images.py b/src/openai/resources/images.py
index 0f1c9fcb9e..0b8eaf5fe4 100644
--- a/src/openai/resources/images.py
+++ b/src/openai/resources/images.py
@@ -91,8 +91,8 @@ def create_variation(
         """
         body = deepcopy_minimal(
             {
+                "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                 "image": image,
-                "model": model,
                 "n": n,
                 "response_format": response_format,
                 "size": size,
@@ -198,11 +198,11 @@ def edit(
         """
         body = deepcopy_minimal(
             {
+                "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                 "image": image,
                 "prompt": prompt,
                 "background": background,
                 "mask": mask,
-                "model": model,
                 "n": n,
                 "quality": quality,
                 "response_format": response_format,
@@ -323,9 +323,9 @@ def generate(
             "/images/generations",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "prompt": prompt,
                     "background": background,
-                    "model": model,
                     "moderation": moderation,
                     "n": n,
                     "output_compression": output_compression,
@@ -415,8 +415,8 @@ async def create_variation(
         """
         body = deepcopy_minimal(
             {
+                "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                 "image": image,
-                "model": model,
                 "n": n,
                 "response_format": response_format,
                 "size": size,
@@ -522,11 +522,11 @@ async def edit(
         """
         body = deepcopy_minimal(
             {
+                "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                 "image": image,
                 "prompt": prompt,
                 "background": background,
                 "mask": mask,
-                "model": model,
                 "n": n,
                 "quality": quality,
                 "response_format": response_format,
@@ -647,9 +647,9 @@ async def generate(
             "/images/generations",
             body=await async_maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "prompt": prompt,
                     "background": background,
-                    "model": model,
                     "moderation": moderation,
                     "n": n,
                     "output_compression": output_compression,
diff --git a/src/openai/resources/moderations.py b/src/openai/resources/moderations.py
index f7a8b52c23..c12fbffa52 100644
--- a/src/openai/resources/moderations.py
+++ b/src/openai/resources/moderations.py
@@ -79,8 +79,8 @@ def create(
             "/moderations",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "input": input,
-                    "model": model,
                 },
                 moderation_create_params.ModerationCreateParams,
             ),
@@ -149,8 +149,8 @@ async def create(
             "/moderations",
             body=await async_maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "input": input,
-                    "model": model,
                 },
                 moderation_create_params.ModerationCreateParams,
             ),
diff --git a/src/openai/resources/responses/responses.py b/src/openai/resources/responses/responses.py
index c3bec87153..c4c6237afd 100644
--- a/src/openai/resources/responses/responses.py
+++ b/src/openai/resources/responses/responses.py
@@ -691,8 +691,8 @@ def create(
             "/responses",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "input": input,
-                    "model": model,
                     "background": background,
                     "include": include,
                     "instructions": instructions,
@@ -802,8 +802,8 @@ def stream(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ResponseStreamManager[TextFormatT]:
         new_response_args = {
+            "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
             "input": input,
-            "model": model,
             "include": include,
             "instructions": instructions,
             "max_output_tokens": max_output_tokens,
@@ -943,8 +943,8 @@ def parser(raw_response: Response) -> ParsedResponse[TextFormatT]:
             "/responses",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "input": input,
-                    "model": model,
                     "include": include,
                     "instructions": instructions,
                     "max_output_tokens": max_output_tokens,
@@ -1899,8 +1899,8 @@ async def create(
             "/responses",
             body=await async_maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "input": input,
-                    "model": model,
                     "background": background,
                     "include": include,
                     "instructions": instructions,
@@ -2010,8 +2010,8 @@ def stream(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> AsyncResponseStreamManager[TextFormatT]:
         new_response_args = {
+            "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
             "input": input,
-            "model": model,
             "include": include,
             "instructions": instructions,
             "max_output_tokens": max_output_tokens,
@@ -2155,8 +2155,8 @@ def parser(raw_response: Response) -> ParsedResponse[TextFormatT]:
             "/responses",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "input": input,
-                    "model": model,
                     "include": include,
                     "instructions": instructions,
                     "max_output_tokens": max_output_tokens,