diff --git a/docs/api-inference/tasks/audio-classification.md b/docs/api-inference/tasks/audio-classification.md
index 47144b754..a7e5d55d8 100644
--- a/docs/api-inference/tasks/audio-classification.md
+++ b/docs/api-inference/tasks/audio-classification.md
@@ -29,73 +29,13 @@ For more details about the `audio-classification` task, check out its [dedicated
 
 ### Recommended models
 
-- [speechbrain/google_speech_command_xvector](https://huggingface.co/speechbrain/google_speech_command_xvector): An easy-to-use model for command recognition.
-- [ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition](https://huggingface.co/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition): An emotion recognition model.
-- [facebook/mms-lid-126](https://huggingface.co/facebook/mms-lid-126): A language identification model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=audio-classification&sort=trending).
 
 ### Using the API
 
 
-<inferencesnippet>
-
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/speechbrain/google_speech_command_xvector \
-	-X POST \
-	--data-binary '@sample1.flac' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
-
-<python>
-```py
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/v1"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(filename):
-	with open(filename, "rb") as f:
-		data = f.read()
-	response = requests.post(API_URL, headers=headers, data=data)
-	return response.json()
-
-output = query("sample1.flac")
-```
-
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.audio_classification).
-</python>
-
-<js>
-```js
-async function query(filename) {
-	const data = fs.readFileSync(filename);
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/speechbrain/google_speech_command_xvector",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: data,
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query("sample1.flac").then((response) => {
-	console.log(JSON.stringify(response));
-});
-```
-
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#audioclassification).
-</js>
-
-</inferencesnippet>
+No snippet available for this task.
 
 
 
diff --git a/docs/api-inference/tasks/automatic-speech-recognition.md b/docs/api-inference/tasks/automatic-speech-recognition.md
index c28a10f14..cf50cb2c8 100644
--- a/docs/api-inference/tasks/automatic-speech-recognition.md
+++ b/docs/api-inference/tasks/automatic-speech-recognition.md
@@ -29,8 +29,6 @@ For more details about the `automatic-speech-recognition` task, check out its [d
 
 ### Recommended models
 
-- [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3): A powerful ASR model by OpenAI.
-- [facebook/seamless-m4t-v2-large](https://huggingface.co/facebook/seamless-m4t-v2-large): An end-to-end model that performs ASR and Speech Translation by MetaAI.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=automatic-speech-recognition&sort=trending).
 
@@ -39,79 +37,204 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3 \
-	-X POST \
-	--data-binary '@sample1.flac' \
-	-H 'Authorization: Bearer hf_***'
+
+<snippet provider="fal-ai" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="fal-ai",
+    api_key="hf_***",
+)
+
+output = client.automatic_speech_recognition("sample1.flac", model="openai/whisper-large-v3")
 ```
-</curl>
 
-<python>
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="fal-ai" language="python" client="requests">
+
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/fal-ai/fal-ai/whisper"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
-	with open(filename, "rb") as f:
-		data = f.read()
-	response = requests.post(API_URL, headers=headers, data=data)
-	return response.json()
+    with open(filename, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers={"Content-Type": "audio/flac", **headers}, data=data)
+    return response.json()
 
 output = query("sample1.flac")
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.automatic_speech_recognition).
-</python>
+</snippet>
+
+
+<snippet provider="fal-ai" language="js" client="fetch">
 
-<js>
-Using `huggingface.js`:
 ```js
-import { HfInference } from "@huggingface/inference";
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/fal-ai/fal-ai/whisper",
+		{
+			headers: {
+				Authorization: "Bearer hf_***",
+				"Content-Type": "audio/flac"
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
 
-const client = new HfInference("hf_***");
+query({ inputs: "sample1.flac" }).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
+
+
+<snippet provider="fal-ai" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
 
 const data = fs.readFileSync("sample1.flac");
 
 const output = await client.automaticSpeechRecognition({
 	data,
 	model: "openai/whisper-large-v3",
-	provider: "hf-inference",
+	provider: "fal-ai",
 });
 
 console.log(output);
+```
 
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="fal-ai" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/fal-ai/fal-ai/whisper \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: audio/flac' \
+    --data-binary @"sample1.flac"
 ```
 
-Using `fetch`:
+</snippet>
+
+
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hf-inference",
+    api_key="hf_***",
+)
+
+output = client.automatic_speech_recognition("sample1.flac", model="openai/whisper-large-v3-turbo")
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hf-inference" language="python" client="requests">
+
+```python
+import requests
+
+API_URL = "https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3-turbo"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(filename):
+    with open(filename, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers={"Content-Type": "audio/flac", **headers}, data=data)
+    return response.json()
+
+output = query("sample1.flac")
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="fetch">
+
 ```js
-async function query(filename) {
-	const data = fs.readFileSync(filename);
+async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3",
+		"https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3-turbo",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
+				"Content-Type": "audio/flac"
 			},
 			method: "POST",
-			body: data,
+			body: JSON.stringify(data),
 		}
 	);
 	const result = await response.json();
 	return result;
 }
 
-query("sample1.flac").then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "sample1.flac" }).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const data = fs.readFileSync("sample1.flac");
+
+const output = await client.automaticSpeechRecognition({
+	data,
+	model: "openai/whisper-large-v3-turbo",
+	provider: "hf-inference",
 });
+
+console.log(output);
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#automaticspeechrecognition).
-</js>
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hf-inference" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3-turbo \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: audio/flac' \
+    --data-binary @"sample1.flac"
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/chat-completion.md b/docs/api-inference/tasks/chat-completion.md
index e4aac572d..e376a1c09 100644
--- a/docs/api-inference/tasks/chat-completion.md
+++ b/docs/api-inference/tasks/chat-completion.md
@@ -21,16 +21,9 @@ This is a subtask of [`text-generation`](https://huggingface.co/docs/api-inferen
 
 #### Conversational Large Language Models (LLMs)
 
-- [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
-- [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B): Smaller variant of one of the most powerful models.
-- [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
-- [microsoft/phi-4](https://huggingface.co/microsoft/phi-4): Powerful text generation model by Microsoft.
-- [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct): Text generation model used to write code.
-- [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1): Powerful reasoning based open large language model.
 
 #### Conversational Vision-Language Models (VLMs)
 
-- [Qwen/Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct): Strong image-text-to-text model.
 
 ### API Playground
 
@@ -60,147 +53,324 @@ The API supports:
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl 'https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it/v1/chat/completions' \
--H 'Authorization: Bearer hf_***' \
--H 'Content-Type: application/json' \
---data '{
-    "model": "google/gemma-2-2b-it",
+
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hf-inference",
+    api_key="hf_***",
+)
+
+completion = client.chat.completions.create(
+    model="Qwen/QwQ-32B",
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+    max_tokens=500,
+)
+
+print(completion.choices[0].message)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hf-inference" language="python" client="requests">
+
+```python
+import requests
+
+API_URL = "https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B/v1/chat/completions"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
+response = query({
     "messages": [
-		{
-			"role": "user",
-			"content": "What is the capital of France?"
-		}
-	],
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
     "max_tokens": 500,
-    "stream": true
-}'
+    "model": "Qwen/QwQ-32B"
+})
+
+print(response["choices"][0]["message"])
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="python" client="openai">
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B/v1",
+    api_key="hf_***"
+)
+
+completion = client.chat.completions.create(
+    model="Qwen/QwQ-32B",
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+    max_tokens=500,
+)
+
+print(completion.choices[0].message)
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const chatCompletion = await client.chatCompletion({
+    provider: "hf-inference",
+    model: "Qwen/QwQ-32B",
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+    max_tokens: 500,
+});
+
+console.log(chatCompletion.choices[0].message);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hf-inference" language="js" client="openai">
+
+```js
+import { OpenAI } from "openai";
+
+const client = new OpenAI({
+	baseURL: "https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B/v1",
+	apiKey: "hf_***",
+});
+
+const chatCompletion = await client.chat.completions.create({
+	model: "Qwen/QwQ-32B",
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+    max_tokens: 500,
+});
+
+console.log(chatCompletion.choices[0].message);
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B/v1/chat/completions \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "messages": [
+            {
+                "role": "user",
+                "content": "What is the capital of France?"
+            }
+        ],
+        "max_tokens": 500,
+        "model": "Qwen/QwQ-32B",
+        "stream": false
+    }'
 ```
-</curl>
 
-<python>
-Using `huggingface_hub`:
-```py
+</snippet>
+
+
+<snippet provider="together" language="python" client="huggingface_hub">
+
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="together",
+    api_key="hf_***",
 )
 
-messages = [
-	{
-		"role": "user",
-		"content": "What is the capital of France?"
-	}
-]
-
-stream = client.chat.completions.create(
-	model="google/gemma-2-2b-it", 
-	messages=messages, 
-	max_tokens=500,
-	stream=True
+completion = client.chat.completions.create(
+    model="deepseek-ai/DeepSeek-R1",
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+    max_tokens=500,
 )
 
-for chunk in stream:
-    print(chunk.choices[0].delta.content, end="")
+print(completion.choices[0].message)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="together" language="python" client="requests">
+
+```python
+import requests
+
+API_URL = "https://router.huggingface.co/together/v1/chat/completions"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
+response = query({
+    "messages": [
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+    "max_tokens": 500,
+    "model": "deepseek-ai/DeepSeek-R1"
+})
+
+print(response["choices"][0]["message"])
 ```
 
-Using `openai`:
-```py
+</snippet>
+
+
+<snippet provider="together" language="python" client="openai">
+
+```python
 from openai import OpenAI
 
 client = OpenAI(
-	base_url="https://router.huggingface.co/hf-inference/v1",
-	api_key="hf_***"
+    base_url="https://router.huggingface.co/together/v1",
+    api_key="hf_***"
 )
 
-messages = [
-	{
-		"role": "user",
-		"content": "What is the capital of France?"
-	}
-]
-
-stream = client.chat.completions.create(
-    model="google/gemma-2-2b-it", 
-	messages=messages, 
-	max_tokens=500,
-	stream=True
+completion = client.chat.completions.create(
+    model="deepseek-ai/DeepSeek-R1",
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+    max_tokens=500,
 )
 
-for chunk in stream:
-	print(chunk.choices[0].delta.content, end="")
+print(completion.choices[0].message)
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion).
-</python>
+</snippet>
+
+
+<snippet provider="together" language="js" client="huggingface.js">
 
-<js>
-Using `huggingface.js`:
 ```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
-
-let out = "";
-
-const stream = client.chatCompletionStream({
-	model: "google/gemma-2-2b-it",
-	messages: [
-		{
-			role: "user",
-			content: "What is the capital of France?"
-		}
-	],
-	provider: "hf-inference",
-	max_tokens: 500,
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const chatCompletion = await client.chatCompletion({
+    provider: "together",
+    model: "deepseek-ai/DeepSeek-R1",
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+    max_tokens: 500,
 });
 
-for await (const chunk of stream) {
-	if (chunk.choices && chunk.choices.length > 0) {
-		const newContent = chunk.choices[0].delta.content;
-		out += newContent;
-		console.log(newContent);
-	}  
-}
+console.log(chatCompletion.choices[0].message);
 ```
 
-Using `openai`:
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="together" language="js" client="openai">
+
 ```js
 import { OpenAI } from "openai";
 
 const client = new OpenAI({
-	baseURL: "https://router.huggingface.co/hf-inference/v1",
-	apiKey: "hf_***"
+	baseURL: "https://router.huggingface.co/together/v1",
+	apiKey: "hf_***",
 });
 
-let out = "";
-
-const stream = await client.chat.completions.create({
-	model: "google/gemma-2-2b-it",
-	messages: [
-		{
-			role: "user",
-			content: "What is the capital of France?"
-		}
-	],
-	max_tokens: 500,
-	stream: true,
+const chatCompletion = await client.chat.completions.create({
+	model: "deepseek-ai/DeepSeek-R1",
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+    max_tokens: 500,
 });
 
-for await (const chunk of stream) {
-	if (chunk.choices && chunk.choices.length > 0) {
-		const newContent = chunk.choices[0].delta.content;
-		out += newContent;
-		console.log(newContent);
-	}  
-}
+console.log(chatCompletion.choices[0].message);
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#chatcompletion).
-</js>
+</snippet>
+
+
+<snippet provider="together" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/together/v1/chat/completions \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "messages": [
+            {
+                "role": "user",
+                "content": "What is the capital of France?"
+            }
+        ],
+        "max_tokens": 500,
+        "model": "deepseek-ai/DeepSeek-R1",
+        "stream": false
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
@@ -211,202 +381,456 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl 'https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1/chat/completions' \
--H 'Authorization: Bearer hf_***' \
--H 'Content-Type: application/json' \
---data '{
-    "model": "Qwen/Qwen2.5-VL-7B-Instruct",
+
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hf-inference",
+    api_key="hf_***",
+)
+
+completion = client.chat.completions.create(
+    model="google/gemma-3-27b-it",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    max_tokens=500,
+)
+
+print(completion.choices[0].message)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hf-inference" language="python" client="requests">
+
+```python
+import requests
+
+API_URL = "https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1/chat/completions"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
+response = query({
     "messages": [
-		{
-			"role": "user",
-			"content": [
-				{
-					"type": "text",
-					"text": "Describe this image in one sentence."
-				},
-				{
-					"type": "image_url",
-					"image_url": {
-						"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-					}
-				}
-			]
-		}
-	],
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
     "max_tokens": 500,
-    "stream": true
-}'
+    "model": "google/gemma-3-27b-it"
+})
+
+print(response["choices"][0]["message"])
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="python" client="openai">
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1",
+    api_key="hf_***"
+)
+
+completion = client.chat.completions.create(
+    model="google/gemma-3-27b-it",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    max_tokens=500,
+)
+
+print(completion.choices[0].message)
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const chatCompletion = await client.chatCompletion({
+    provider: "hf-inference",
+    model: "google/gemma-3-27b-it",
+    messages: [
+        {
+            role: "user",
+            content: [
+                {
+                    type: "text",
+                    text: "Describe this image in one sentence.",
+                },
+                {
+                    type: "image_url",
+                    image_url: {
+                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
+                    },
+                },
+            ],
+        },
+    ],
+    max_tokens: 500,
+});
+
+console.log(chatCompletion.choices[0].message);
 ```
-</curl>
 
-<python>
-Using `huggingface_hub`:
-```py
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hf-inference" language="js" client="openai">
+
+```js
+import { OpenAI } from "openai";
+
+const client = new OpenAI({
+	baseURL: "https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1",
+	apiKey: "hf_***",
+});
+
+const chatCompletion = await client.chat.completions.create({
+	model: "google/gemma-3-27b-it",
+    messages: [
+        {
+            role: "user",
+            content: [
+                {
+                    type: "text",
+                    text: "Describe this image in one sentence.",
+                },
+                {
+                    type: "image_url",
+                    image_url: {
+                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
+                    },
+                },
+            ],
+        },
+    ],
+    max_tokens: 500,
+});
+
+console.log(chatCompletion.choices[0].message);
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1/chat/completions \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Describe this image in one sentence."
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                        }
+                    }
+                ]
+            }
+        ],
+        "max_tokens": 500,
+        "model": "google/gemma-3-27b-it",
+        "stream": false
+    }'
+```
+
+</snippet>
+
+
+<snippet provider="hyperbolic" language="python" client="huggingface_hub">
+
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hyperbolic",
+    api_key="hf_***",
 )
 
-messages = [
-	{
-		"role": "user",
-		"content": [
-			{
-				"type": "text",
-				"text": "Describe this image in one sentence."
-			},
-			{
-				"type": "image_url",
-				"image_url": {
-					"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-				}
-			}
-		]
-	}
-]
-
-stream = client.chat.completions.create(
-	model="Qwen/Qwen2.5-VL-7B-Instruct", 
-	messages=messages, 
-	max_tokens=500,
-	stream=True
+completion = client.chat.completions.create(
+    model="Qwen/Qwen2.5-VL-7B-Instruct",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    max_tokens=500,
 )
 
-for chunk in stream:
-    print(chunk.choices[0].delta.content, end="")
+print(completion.choices[0].message)
 ```
 
-Using `openai`:
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hyperbolic" language="python" client="requests">
+
+```python
+import requests
+
+API_URL = "https://router.huggingface.co/hyperbolic/v1/chat/completions"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
+response = query({
+    "messages": [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    "max_tokens": 500,
+    "model": "Qwen/Qwen2.5-VL-7B-Instruct"
+})
+
+print(response["choices"][0]["message"])
+```
+
+</snippet>
+
+
+<snippet provider="hyperbolic" language="python" client="openai">
+
+```python
 from openai import OpenAI
 
 client = OpenAI(
-	base_url="https://router.huggingface.co/hf-inference/v1",
-	api_key="hf_***"
+    base_url="https://router.huggingface.co/hyperbolic/v1",
+    api_key="hf_***"
 )
 
-messages = [
-	{
-		"role": "user",
-		"content": [
-			{
-				"type": "text",
-				"text": "Describe this image in one sentence."
-			},
-			{
-				"type": "image_url",
-				"image_url": {
-					"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-				}
-			}
-		]
-	}
-]
-
-stream = client.chat.completions.create(
-    model="Qwen/Qwen2.5-VL-7B-Instruct", 
-	messages=messages, 
-	max_tokens=500,
-	stream=True
+completion = client.chat.completions.create(
+    model="Qwen/Qwen2.5-VL-7B-Instruct",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    max_tokens=500,
 )
 
-for chunk in stream:
-	print(chunk.choices[0].delta.content, end="")
+print(completion.choices[0].message)
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion).
-</python>
-
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
+</snippet>
 
-const client = new HfInference("hf_***");
 
-let out = "";
+<snippet provider="hyperbolic" language="js" client="huggingface.js">
 
-const stream = client.chatCompletionStream({
-	model: "Qwen/Qwen2.5-VL-7B-Instruct",
-	messages: [
-		{
-			role: "user",
-			content: [
-				{
-					type: "text",
-					text: "Describe this image in one sentence."
-				},
-				{
-					type: "image_url",
-					image_url: {
-						url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-					}
-				}
-			]
-		}
-	],
-	provider: "hf-inference",
-	max_tokens: 500,
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const chatCompletion = await client.chatCompletion({
+    provider: "hyperbolic",
+    model: "Qwen/Qwen2.5-VL-7B-Instruct",
+    messages: [
+        {
+            role: "user",
+            content: [
+                {
+                    type: "text",
+                    text: "Describe this image in one sentence.",
+                },
+                {
+                    type: "image_url",
+                    image_url: {
+                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
+                    },
+                },
+            ],
+        },
+    ],
+    max_tokens: 500,
 });
 
-for await (const chunk of stream) {
-	if (chunk.choices && chunk.choices.length > 0) {
-		const newContent = chunk.choices[0].delta.content;
-		out += newContent;
-		console.log(newContent);
-	}  
-}
+console.log(chatCompletion.choices[0].message);
 ```
 
-Using `openai`:
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hyperbolic" language="js" client="openai">
+
 ```js
 import { OpenAI } from "openai";
 
 const client = new OpenAI({
-	baseURL: "https://router.huggingface.co/hf-inference/v1",
-	apiKey: "hf_***"
+	baseURL: "https://router.huggingface.co/hyperbolic/v1",
+	apiKey: "hf_***",
 });
 
-let out = "";
-
-const stream = await client.chat.completions.create({
+const chatCompletion = await client.chat.completions.create({
 	model: "Qwen/Qwen2.5-VL-7B-Instruct",
-	messages: [
-		{
-			role: "user",
-			content: [
-				{
-					type: "text",
-					text: "Describe this image in one sentence."
-				},
-				{
-					type: "image_url",
-					image_url: {
-						url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-					}
-				}
-			]
-		}
-	],
-	max_tokens: 500,
-	stream: true,
+    messages: [
+        {
+            role: "user",
+            content: [
+                {
+                    type: "text",
+                    text: "Describe this image in one sentence.",
+                },
+                {
+                    type: "image_url",
+                    image_url: {
+                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
+                    },
+                },
+            ],
+        },
+    ],
+    max_tokens: 500,
 });
 
-for await (const chunk of stream) {
-	if (chunk.choices && chunk.choices.length > 0) {
-		const newContent = chunk.choices[0].delta.content;
-		out += newContent;
-		console.log(newContent);
-	}  
-}
+console.log(chatCompletion.choices[0].message);
+```
+
+</snippet>
+
+
+<snippet provider="hyperbolic" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hyperbolic/v1/chat/completions \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Describe this image in one sentence."
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                        }
+                    }
+                ]
+            }
+        ],
+        "max_tokens": 500,
+        "model": "Qwen/Qwen2.5-VL-7B-Instruct",
+        "stream": false
+    }'
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#chatcompletion).
-</js>
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/feature-extraction.md b/docs/api-inference/tasks/feature-extraction.md
index fc707ad85..b246abe87 100644
--- a/docs/api-inference/tasks/feature-extraction.md
+++ b/docs/api-inference/tasks/feature-extraction.md
@@ -29,7 +29,6 @@ For more details about the `feature-extraction` task, check out its [dedicated p
 
 ### Recommended models
 
-- [thenlper/gte-large](https://huggingface.co/thenlper/gte-large): A powerful feature extraction model for natural language processing tasks.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=feature-extraction&sort=trending).
 
@@ -38,77 +37,53 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/thenlper/gte-large \
-	-X POST \
-	-d '{"inputs": "Today is a sunny day and I will get some ice cream."}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
 
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 result = client.feature_extraction(
-	model="thenlper/gte-large",
-	inputs="Today is a sunny day and I will get some ice cream.",
-	provider="hf-inference",
+    inputs="Today is a sunny day and I will get some ice cream.",
+    model="mixedbread-ai/mxbai-embed-large-v1",
 )
+```
 
-print(result)
+</snippet>
 
-```
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hf-inference" language="python" client="requests">
 
-Using `requests`:
-```py
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/pipeline/feature-extraction/mixedbread-ai/mxbai-embed-large-v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
 output = query({
-	"inputs": "Today is a sunny day and I will get some ice cream.",
+    "inputs": "Today is a sunny day and I will get some ice cream.",
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.feature_extraction).
-</python>
-
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
+</snippet>
 
-const output = await client.featureExtraction({
-	model: "thenlper/gte-large",
-	inputs: "Today is a sunny day and I will get some ice cream.",
-	provider: "hf-inference",
-});
 
-console.log(output);
+<snippet provider="hf-inference" language="js" client="fetch">
 
-```
-
-Using `fetch`:
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/thenlper/gte-large",
+		"https://router.huggingface.co/hf-inference/pipeline/feature-extraction/mixedbread-ai/mxbai-embed-large-v1",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -122,13 +97,48 @@ async function query(data) {
 	return result;
 }
 
-query({"inputs": "Today is a sunny day and I will get some ice cream."}).then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "Today is a sunny day and I will get some ice cream." }).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const output = await client.featureExtraction({
+	model: "mixedbread-ai/mxbai-embed-large-v1",
+	inputs: "Today is a sunny day and I will get some ice cream.",
+	provider: "hf-inference",
 });
+
+console.log(output);
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#featureextraction).
-</js>
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hf-inference" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hf-inference/pipeline/feature-extraction/mixedbread-ai/mxbai-embed-large-v1 \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "\"Today is a sunny day and I will get some ice cream.\""
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/fill-mask.md b/docs/api-inference/tasks/fill-mask.md
index 70e7c256c..5eac91ac9 100644
--- a/docs/api-inference/tasks/fill-mask.md
+++ b/docs/api-inference/tasks/fill-mask.md
@@ -24,108 +24,13 @@ For more details about the `fill-mask` task, check out its [dedicated page](http
 
 ### Recommended models
 
-- [FacebookAI/xlm-roberta-base](https://huggingface.co/FacebookAI/xlm-roberta-base): A multilingual model trained on 100 languages.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=fill-mask&sort=trending).
 
 ### Using the API
 
 
-<inferencesnippet>
-
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-base \
-	-X POST \
-	-d '{"inputs": "The answer to the universe is [MASK]."}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
-
-<python>
-Using `huggingface_hub`:
-```py
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
-)
-
-result = client.fill_mask(
-	model="FacebookAI/xlm-roberta-base",
-	inputs="The answer to the universe is [MASK].",
-	provider="hf-inference",
-)
-
-print(result)
-
-```
-
-Using `requests`:
-```py
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/v1"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
-output = query({
-	"inputs": "The answer to the universe is [MASK].",
-})
-```
-
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.fill_mask).
-</python>
-
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
-
-const output = await client.fillMask({
-	model: "FacebookAI/xlm-roberta-base",
-	inputs: "The answer to the universe is [MASK].",
-	provider: "hf-inference",
-});
-
-console.log(output);
-
-```
-
-Using `fetch`:
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-base",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({"inputs": "The answer to the universe is [MASK]."}).then((response) => {
-	console.log(JSON.stringify(response));
-});
-```
-
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#fillmask).
-</js>
-
-</inferencesnippet>
+No snippet available for this task.
 
 
 
diff --git a/docs/api-inference/tasks/image-classification.md b/docs/api-inference/tasks/image-classification.md
index 0a0eb78c8..c71575230 100644
--- a/docs/api-inference/tasks/image-classification.md
+++ b/docs/api-inference/tasks/image-classification.md
@@ -24,9 +24,6 @@ For more details about the `image-classification` task, check out its [dedicated
 
 ### Recommended models
 
-- [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224): A strong image classification model.
-- [facebook/deit-base-distilled-patch16-224](https://huggingface.co/facebook/deit-base-distilled-patch16-224): A robust image classification model.
-- [facebook/convnext-large-224](https://huggingface.co/facebook/convnext-large-224): A strong image classification model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-classification&sort=trending).
 
@@ -35,60 +32,105 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/google/vit-base-patch16-224 \
-	-X POST \
-	--data-binary '@cats.jpg' \
-	-H 'Authorization: Bearer hf_***'
+
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hf-inference",
+    api_key="hf_***",
+)
+
+output = client.image_classification("cats.jpg", model="Falconsai/nsfw_image_detection")
 ```
-</curl>
 
-<python>
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hf-inference" language="python" client="requests">
+
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/Falconsai/nsfw_image_detection"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
-	with open(filename, "rb") as f:
-		data = f.read()
-	response = requests.post(API_URL, headers=headers, data=data)
-	return response.json()
+    with open(filename, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers={"Content-Type": "image/jpeg", **headers}, data=data)
+    return response.json()
 
 output = query("cats.jpg")
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.image_classification).
-</python>
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="fetch">
 
-<js>
 ```js
-async function query(filename) {
-	const data = fs.readFileSync(filename);
+async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/google/vit-base-patch16-224",
+		"https://router.huggingface.co/hf-inference/models/Falconsai/nsfw_image_detection",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
+				"Content-Type": "image/jpeg"
 			},
 			method: "POST",
-			body: data,
+			body: JSON.stringify(data),
 		}
 	);
 	const result = await response.json();
 	return result;
 }
 
-query("cats.jpg").then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "cats.jpg" }).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const data = fs.readFileSync("cats.jpg");
+
+const output = await client.imageClassification({
+	data,
+	model: "Falconsai/nsfw_image_detection",
+	provider: "hf-inference",
 });
+
+console.log(output);
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#imageclassification).
-</js>
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hf-inference" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hf-inference/models/Falconsai/nsfw_image_detection \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: image/jpeg' \
+    --data-binary @"cats.jpg"
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/image-segmentation.md b/docs/api-inference/tasks/image-segmentation.md
index 459b5037d..59be54262 100644
--- a/docs/api-inference/tasks/image-segmentation.md
+++ b/docs/api-inference/tasks/image-segmentation.md
@@ -24,8 +24,6 @@ For more details about the `image-segmentation` task, check out its [dedicated p
 
 ### Recommended models
 
-- [openmmlab/upernet-convnext-small](https://huggingface.co/openmmlab/upernet-convnext-small): Solid semantic segmentation model trained on ADE20k.
-- [facebook/mask2former-swin-large-coco-panoptic](https://huggingface.co/facebook/mask2former-swin-large-coco-panoptic): Panoptic segmentation model trained on the COCO (common objects) dataset.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-segmentation&sort=trending).
 
@@ -34,60 +32,83 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/openmmlab/upernet-convnext-small \
-	-X POST \
-	--data-binary '@cats.jpg' \
-	-H 'Authorization: Bearer hf_***'
+
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hf-inference",
+    api_key="hf_***",
+)
+
+output = client.image_segmentation("cats.jpg", model="jonathandinu/face-parsing")
 ```
-</curl>
 
-<python>
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hf-inference" language="python" client="requests">
+
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/jonathandinu/face-parsing"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
-	with open(filename, "rb") as f:
-		data = f.read()
-	response = requests.post(API_URL, headers=headers, data=data)
-	return response.json()
+    with open(filename, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers={"Content-Type": "image/jpeg", **headers}, data=data)
+    return response.json()
 
 output = query("cats.jpg")
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.image_segmentation).
-</python>
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="fetch">
 
-<js>
 ```js
-async function query(filename) {
-	const data = fs.readFileSync(filename);
+async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/openmmlab/upernet-convnext-small",
+		"https://router.huggingface.co/hf-inference/models/jonathandinu/face-parsing",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
+				"Content-Type": "image/jpeg"
 			},
 			method: "POST",
-			body: data,
+			body: JSON.stringify(data),
 		}
 	);
 	const result = await response.json();
 	return result;
 }
 
-query("cats.jpg").then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "cats.jpg" }).then((response) => {
+    console.log(JSON.stringify(response));
 });
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#imagesegmentation).
-</js>
+</snippet>
+
+
+<snippet provider="hf-inference" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hf-inference/models/jonathandinu/face-parsing \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: image/jpeg' \
+    --data-binary @"cats.jpg"
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/image-text-to-text.md b/docs/api-inference/tasks/image-text-to-text.md
index 14903fb01..f4ca691e0 100644
--- a/docs/api-inference/tasks/image-text-to-text.md
+++ b/docs/api-inference/tasks/image-text-to-text.md
@@ -24,7 +24,6 @@ For more details about the `image-text-to-text` task, check out its [dedicated p
 
 ### Recommended models
 
-- [Qwen/Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct): Strong image-text-to-text model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-text-to-text&sort=trending).
 
@@ -33,89 +32,456 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct \
-	-X POST \
-	-d '{"inputs": "Can you please let us know more details about your "}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
 
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
-messages = "\"Can you please let us know more details about your \""
-
-stream = client.chat.completions.create(
-	model="Qwen/Qwen2.5-VL-7B-Instruct", 
-	messages=messages, 
-	max_tokens=500,
-	stream=True
+completion = client.chat.completions.create(
+    model="google/gemma-3-27b-it",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    max_tokens=500,
 )
 
-for chunk in stream:
-    print(chunk.choices[0].delta.content, end="")
+print(completion.choices[0].message)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hf-inference" language="python" client="requests">
+
+```python
+import requests
+
+API_URL = "https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1/chat/completions"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
+response = query({
+    "messages": [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    "max_tokens": 500,
+    "model": "google/gemma-3-27b-it"
+})
+
+print(response["choices"][0]["message"])
 ```
 
-Using `openai`:
-```py
+</snippet>
+
+
+<snippet provider="hf-inference" language="python" client="openai">
+
+```python
 from openai import OpenAI
 
 client = OpenAI(
-	base_url="https://router.huggingface.co/hf-inference/v1",
-	api_key="hf_***"
+    base_url="https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1",
+    api_key="hf_***"
+)
+
+completion = client.chat.completions.create(
+    model="google/gemma-3-27b-it",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    max_tokens=500,
 )
 
-messages = "\"Can you please let us know more details about your \""
+print(completion.choices[0].message)
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const chatCompletion = await client.chatCompletion({
+    provider: "hf-inference",
+    model: "google/gemma-3-27b-it",
+    messages: [
+        {
+            role: "user",
+            content: [
+                {
+                    type: "text",
+                    text: "Describe this image in one sentence.",
+                },
+                {
+                    type: "image_url",
+                    image_url: {
+                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
+                    },
+                },
+            ],
+        },
+    ],
+    max_tokens: 500,
+});
+
+console.log(chatCompletion.choices[0].message);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hf-inference" language="js" client="openai">
+
+```js
+import { OpenAI } from "openai";
+
+const client = new OpenAI({
+	baseURL: "https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1",
+	apiKey: "hf_***",
+});
+
+const chatCompletion = await client.chat.completions.create({
+	model: "google/gemma-3-27b-it",
+    messages: [
+        {
+            role: "user",
+            content: [
+                {
+                    type: "text",
+                    text: "Describe this image in one sentence.",
+                },
+                {
+                    type: "image_url",
+                    image_url: {
+                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
+                    },
+                },
+            ],
+        },
+    ],
+    max_tokens: 500,
+});
+
+console.log(chatCompletion.choices[0].message);
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1/chat/completions \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Describe this image in one sentence."
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                        }
+                    }
+                ]
+            }
+        ],
+        "max_tokens": 500,
+        "model": "google/gemma-3-27b-it",
+        "stream": false
+    }'
+```
 
-stream = client.chat.completions.create(
-    model="Qwen/Qwen2.5-VL-7B-Instruct", 
-	messages=messages, 
-	max_tokens=500,
-	stream=True
+</snippet>
+
+
+<snippet provider="hyperbolic" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hyperbolic",
+    api_key="hf_***",
+)
+
+completion = client.chat.completions.create(
+    model="Qwen/Qwen2.5-VL-7B-Instruct",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    max_tokens=500,
+)
+
+print(completion.choices[0].message)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hyperbolic" language="python" client="requests">
+
+```python
+import requests
+
+API_URL = "https://router.huggingface.co/hyperbolic/v1/chat/completions"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
+response = query({
+    "messages": [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    "max_tokens": 500,
+    "model": "Qwen/Qwen2.5-VL-7B-Instruct"
+})
+
+print(response["choices"][0]["message"])
+```
+
+</snippet>
+
+
+<snippet provider="hyperbolic" language="python" client="openai">
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="https://router.huggingface.co/hyperbolic/v1",
+    api_key="hf_***"
+)
+
+completion = client.chat.completions.create(
+    model="Qwen/Qwen2.5-VL-7B-Instruct",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    max_tokens=500,
 )
 
-for chunk in stream:
-	print(chunk.choices[0].delta.content, end="")
+print(completion.choices[0].message)
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.image_text_to_text).
-</python>
+</snippet>
+
+
+<snippet provider="hyperbolic" language="js" client="huggingface.js">
 
-<js>
 ```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({"inputs": "Can you please let us know more details about your "}).then((response) => {
-	console.log(JSON.stringify(response));
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const chatCompletion = await client.chatCompletion({
+    provider: "hyperbolic",
+    model: "Qwen/Qwen2.5-VL-7B-Instruct",
+    messages: [
+        {
+            role: "user",
+            content: [
+                {
+                    type: "text",
+                    text: "Describe this image in one sentence.",
+                },
+                {
+                    type: "image_url",
+                    image_url: {
+                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
+                    },
+                },
+            ],
+        },
+    ],
+    max_tokens: 500,
 });
+
+console.log(chatCompletion.choices[0].message);
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#imagetexttotext).
-</js>
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hyperbolic" language="js" client="openai">
+
+```js
+import { OpenAI } from "openai";
+
+const client = new OpenAI({
+	baseURL: "https://router.huggingface.co/hyperbolic/v1",
+	apiKey: "hf_***",
+});
+
+const chatCompletion = await client.chat.completions.create({
+	model: "Qwen/Qwen2.5-VL-7B-Instruct",
+    messages: [
+        {
+            role: "user",
+            content: [
+                {
+                    type: "text",
+                    text: "Describe this image in one sentence.",
+                },
+                {
+                    type: "image_url",
+                    image_url: {
+                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
+                    },
+                },
+            ],
+        },
+    ],
+    max_tokens: 500,
+});
+
+console.log(chatCompletion.choices[0].message);
+```
+
+</snippet>
+
+
+<snippet provider="hyperbolic" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hyperbolic/v1/chat/completions \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Describe this image in one sentence."
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                        }
+                    }
+                ]
+            }
+        ],
+        "max_tokens": 500,
+        "model": "Qwen/Qwen2.5-VL-7B-Instruct",
+        "stream": false
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/image-to-image.md b/docs/api-inference/tasks/image-to-image.md
index 810b97e57..9596da927 100644
--- a/docs/api-inference/tasks/image-to-image.md
+++ b/docs/api-inference/tasks/image-to-image.md
@@ -35,7 +35,64 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-No snippet available for this task.
+<inferencesnippet>
+
+
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hf-inference",
+    api_key="hf_***",
+)
+
+# output is a PIL.Image object
+image = client.image_to_image(
+    "cat.png",
+    prompt="Turn the cat into a tiger.",
+    model="stabilityai/stable-diffusion-xl-refiner-1.0",
+)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hf-inference" language="python" client="requests">
+
+```python
+import base64
+import requests
+
+API_URL = "https://router.huggingface.co/hf-inference/models/stabilityai/stable-diffusion-xl-refiner-1.0"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+    with open(payload["inputs"], "rb") as f:
+        img = f.read()
+        payload["inputs"] = base64.b64encode(img).decode("utf-8")
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.content
+
+image_bytes = query({
+    "inputs": "cat.png",
+    "parameters": {
+        "prompt": "Turn the cat into a tiger."
+    }
+})
+
+# You can access the image with PIL.Image for example
+import io
+from PIL import Image
+image = Image.open(io.BytesIO(image_bytes))
+```
+
+</snippet>
+
+
+</inferencesnippet>
 
 
 
diff --git a/docs/api-inference/tasks/object-detection.md b/docs/api-inference/tasks/object-detection.md
index b8fde8d08..07ae70e02 100644
--- a/docs/api-inference/tasks/object-detection.md
+++ b/docs/api-inference/tasks/object-detection.md
@@ -24,7 +24,6 @@ For more details about the `object-detection` task, check out its [dedicated pag
 
 ### Recommended models
 
-- [facebook/detr-resnet-50](https://huggingface.co/facebook/detr-resnet-50): Solid object detection model pre-trained on the COCO 2017 dataset.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=object-detection&sort=trending).
 
@@ -33,60 +32,83 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/facebook/detr-resnet-50 \
-	-X POST \
-	--data-binary '@cats.jpg' \
-	-H 'Authorization: Bearer hf_***'
+
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hf-inference",
+    api_key="hf_***",
+)
+
+output = client.object_detection("cats.jpg", model="facebook/detr-resnet-50")
 ```
-</curl>
 
-<python>
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hf-inference" language="python" client="requests">
+
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/facebook/detr-resnet-50"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
-	with open(filename, "rb") as f:
-		data = f.read()
-	response = requests.post(API_URL, headers=headers, data=data)
-	return response.json()
+    with open(filename, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers={"Content-Type": "image/jpeg", **headers}, data=data)
+    return response.json()
 
 output = query("cats.jpg")
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.object_detection).
-</python>
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="fetch">
 
-<js>
 ```js
-async function query(filename) {
-	const data = fs.readFileSync(filename);
+async function query(data) {
 	const response = await fetch(
 		"https://router.huggingface.co/hf-inference/models/facebook/detr-resnet-50",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
+				"Content-Type": "image/jpeg"
 			},
 			method: "POST",
-			body: data,
+			body: JSON.stringify(data),
 		}
 	);
 	const result = await response.json();
 	return result;
 }
 
-query("cats.jpg").then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "cats.jpg" }).then((response) => {
+    console.log(JSON.stringify(response));
 });
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#objectdetection).
-</js>
+</snippet>
+
+
+<snippet provider="hf-inference" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hf-inference/models/facebook/detr-resnet-50 \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: image/jpeg' \
+    --data-binary @"cats.jpg"
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/question-answering.md b/docs/api-inference/tasks/question-answering.md
index 0cca700b3..2ff3fded3 100644
--- a/docs/api-inference/tasks/question-answering.md
+++ b/docs/api-inference/tasks/question-answering.md
@@ -24,9 +24,6 @@ For more details about the `question-answering` task, check out its [dedicated p
 
 ### Recommended models
 
-- [deepset/roberta-base-squad2](https://huggingface.co/deepset/roberta-base-squad2): A robust baseline model for most question answering domains.
-- [distilbert/distilbert-base-cased-distilled-squad](https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad): Small yet robust model that can answer questions.
-- [google/tapas-base-finetuned-wtq](https://huggingface.co/google/tapas-base-finetuned-wtq): A special model that can answer questions from tables.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=question-answering&sort=trending).
 
@@ -35,86 +32,59 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/deepset/roberta-base-squad2 \
-	-X POST \
-	-d '{"inputs": { "question": "What is my name?", "context": "My name is Clara and I live in Berkeley." }}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
 
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 result = client.question_answering(
-	model="deepset/roberta-base-squad2",
-	inputs={
+    inputs={
 	"question": "What is my name?",
 	"context": "My name is Clara and I live in Berkeley."
 },
-	provider="hf-inference",
+    model="distilbert/distilbert-base-cased-distilled-squad",
 )
+```
 
-print(result)
+</snippet>
 
-```
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hf-inference" language="python" client="requests">
 
-Using `requests`:
-```py
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-cased-distilled-squad"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
 output = query({
-	"inputs": {
+    "inputs": {
 	"question": "What is my name?",
 	"context": "My name is Clara and I live in Berkeley."
 },
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.question_answering).
-</python>
+</snippet>
 
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
 
-const client = new HfInference("hf_***");
+<snippet provider="hf-inference" language="js" client="fetch">
 
-const output = await client.questionAnswering({
-	model: "deepset/roberta-base-squad2",
-	inputs: {
-	"question": "What is my name?",
-	"context": "My name is Clara and I live in Berkeley."
-},
-	provider: "hf-inference",
-});
-
-console.log(output);
-
-```
-
-Using `fetch`:
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/deepset/roberta-base-squad2",
+		"https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-cased-distilled-squad",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -128,16 +98,54 @@ async function query(data) {
 	return result;
 }
 
-query({"inputs": {
+query({ inputs: {
+	"question": "What is my name?",
+	"context": "My name is Clara and I live in Berkeley."
+} }).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const output = await client.questionAnswering({
+	model: "distilbert/distilbert-base-cased-distilled-squad",
+	inputs: {
 	"question": "What is my name?",
 	"context": "My name is Clara and I live in Berkeley."
-}}).then((response) => {
-	console.log(JSON.stringify(response));
+},
+	provider: "hf-inference",
 });
+
+console.log(output);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hf-inference" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-cased-distilled-squad \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "{\n\t\"question\": \"What is my name?\",\n\t\"context\": \"My name is Clara and I live in Berkeley.\"\n}"
+    }'
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#questionanswering).
-</js>
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/summarization.md b/docs/api-inference/tasks/summarization.md
index b55dfac70..161fe3ed2 100644
--- a/docs/api-inference/tasks/summarization.md
+++ b/docs/api-inference/tasks/summarization.md
@@ -24,8 +24,6 @@ For more details about the `summarization` task, check out its [dedicated page](
 
 ### Recommended models
 
-- [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn): A strong summarization model trained on English news articles. Excels at generating factual summaries.
-- [Falconsai/medical_summarization](https://huggingface.co/Falconsai/medical_summarization): A summarization model trained on medical articles.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=summarization&sort=trending).
 
@@ -34,73 +32,49 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn \
-	-X POST \
-	-d '{"inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
 
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 result = client.summarization(
-	model="facebook/bart-large-cnn",
-	inputs="The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
-	provider="hf-inference",
+    inputs="The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
+    model="facebook/bart-large-cnn",
 )
+```
 
-print(result)
+</snippet>
 
-```
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
 
-Using `requests`:
-```py
+<snippet provider="hf-inference" language="python" client="requests">
+
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
 output = query({
-	"inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
+    "inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.summarization).
-</python>
-
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
+</snippet>
 
-const output = await client.summarization({
-	model: "facebook/bart-large-cnn",
-	inputs: "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
-	provider: "hf-inference",
-});
 
-console.log(output);
-
-```
+<snippet provider="hf-inference" language="js" client="fetch">
 
-Using `fetch`:
 ```js
 async function query(data) {
 	const response = await fetch(
@@ -118,13 +92,48 @@ async function query(data) {
 	return result;
 }
 
-query({"inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."}).then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct." }).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const output = await client.summarization({
+	model: "facebook/bart-large-cnn",
+	inputs: "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
+	provider: "hf-inference",
 });
+
+console.log(output);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hf-inference" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "\"The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.\""
+    }'
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#summarization).
-</js>
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/table-question-answering.md b/docs/api-inference/tasks/table-question-answering.md
index 8004ab9b0..76e68637a 100644
--- a/docs/api-inference/tasks/table-question-answering.md
+++ b/docs/api-inference/tasks/table-question-answering.md
@@ -24,157 +24,13 @@ For more details about the `table-question-answering` task, check out its [dedic
 
 ### Recommended models
 
-- [microsoft/tapex-base](https://huggingface.co/microsoft/tapex-base): A table question answering model that is capable of neural SQL execution, i.e., employ TAPEX to execute a SQL query on a given table.
-- [google/tapas-base-finetuned-wtq](https://huggingface.co/google/tapas-base-finetuned-wtq): A robust table question answering model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=table-question-answering&sort=trending).
 
 ### Using the API
 
 
-<inferencesnippet>
-
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/microsoft/tapex-base \
-	-X POST \
-	-d '{"inputs": { "query": "How many stars does the transformers repository have?", "table": { "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": [ "Python", "Python", "Rust, Python and NodeJS" ] } }}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
-
-<python>
-Using `huggingface_hub`:
-```py
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
-)
-
-result = client.table_question_answering(
-	model="microsoft/tapex-base",
-	inputs={
-	"query": "How many stars does the transformers repository have?",
-	"table": {
-		"Repository": ["Transformers", "Datasets", "Tokenizers"],
-		"Stars": ["36542", "4512", "3934"],
-		"Contributors": ["651", "77", "34"],
-		"Programming language": [
-			"Python",
-			"Python",
-			"Rust, Python and NodeJS"
-		]
-	}
-},
-	provider="hf-inference",
-)
-
-print(result)
-
-```
-
-Using `requests`:
-```py
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/v1"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
-output = query({
-	"inputs": {
-	"query": "How many stars does the transformers repository have?",
-	"table": {
-		"Repository": ["Transformers", "Datasets", "Tokenizers"],
-		"Stars": ["36542", "4512", "3934"],
-		"Contributors": ["651", "77", "34"],
-		"Programming language": [
-			"Python",
-			"Python",
-			"Rust, Python and NodeJS"
-		]
-	}
-},
-})
-```
-
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.table_question_answering).
-</python>
-
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
-
-const output = await client.tableQuestionAnswering({
-	model: "microsoft/tapex-base",
-	inputs: {
-	"query": "How many stars does the transformers repository have?",
-	"table": {
-		"Repository": ["Transformers", "Datasets", "Tokenizers"],
-		"Stars": ["36542", "4512", "3934"],
-		"Contributors": ["651", "77", "34"],
-		"Programming language": [
-			"Python",
-			"Python",
-			"Rust, Python and NodeJS"
-		]
-	}
-},
-	provider: "hf-inference",
-});
-
-console.log(output);
-
-```
-
-Using `fetch`:
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/microsoft/tapex-base",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({"inputs": {
-	"query": "How many stars does the transformers repository have?",
-	"table": {
-		"Repository": ["Transformers", "Datasets", "Tokenizers"],
-		"Stars": ["36542", "4512", "3934"],
-		"Contributors": ["651", "77", "34"],
-		"Programming language": [
-			"Python",
-			"Python",
-			"Rust, Python and NodeJS"
-		]
-	}
-}}).then((response) => {
-	console.log(JSON.stringify(response));
-});
-```
-
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#tablequestionanswering).
-</js>
-
-</inferencesnippet>
+No snippet available for this task.
 
 
 
diff --git a/docs/api-inference/tasks/text-classification.md b/docs/api-inference/tasks/text-classification.md
index ed2458c0c..4aba37365 100644
--- a/docs/api-inference/tasks/text-classification.md
+++ b/docs/api-inference/tasks/text-classification.md
@@ -24,11 +24,6 @@ For more details about the `text-classification` task, check out its [dedicated
 
 ### Recommended models
 
-- [distilbert/distilbert-base-uncased-finetuned-sst-2-english](https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english): A robust model trained for sentiment analysis.
-- [ProsusAI/finbert](https://huggingface.co/ProsusAI/finbert): A sentiment analysis model specialized in financial sentiment.
-- [cardiffnlp/twitter-roberta-base-sentiment-latest](https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest): A sentiment analysis model specialized in analyzing tweets.
-- [papluca/xlm-roberta-base-language-detection](https://huggingface.co/papluca/xlm-roberta-base-language-detection): A model that can classify languages.
-- [meta-llama/Prompt-Guard-86M](https://huggingface.co/meta-llama/Prompt-Guard-86M): A model that can classify text generation attacks.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-classification&sort=trending).
 
@@ -37,77 +32,53 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-uncased-finetuned-sst-2-english \
-	-X POST \
-	-d '{"inputs": "I like you. I love you"}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
 
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 result = client.text_classification(
-	model="distilbert/distilbert-base-uncased-finetuned-sst-2-english",
-	inputs="I like you. I love you",
-	provider="hf-inference",
+    inputs="I like you. I love you",
+    model="ProsusAI/finbert",
 )
+```
 
-print(result)
+</snippet>
 
-```
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hf-inference" language="python" client="requests">
 
-Using `requests`:
-```py
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/ProsusAI/finbert"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
 output = query({
-	"inputs": "I like you. I love you",
+    "inputs": "I like you. I love you",
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_classification).
-</python>
-
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
+</snippet>
 
-const output = await client.textClassification({
-	model: "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
-	inputs: "I like you. I love you",
-	provider: "hf-inference",
-});
 
-console.log(output);
+<snippet provider="hf-inference" language="js" client="fetch">
 
-```
-
-Using `fetch`:
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+		"https://router.huggingface.co/hf-inference/models/ProsusAI/finbert",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -121,13 +92,48 @@ async function query(data) {
 	return result;
 }
 
-query({"inputs": "I like you. I love you"}).then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "I like you. I love you" }).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const output = await client.textClassification({
+	model: "ProsusAI/finbert",
+	inputs: "I like you. I love you",
+	provider: "hf-inference",
 });
+
+console.log(output);
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#textclassification).
-</js>
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hf-inference" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hf-inference/models/ProsusAI/finbert \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "\"I like you. I love you\""
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/text-generation.md b/docs/api-inference/tasks/text-generation.md
index 7cedd2c76..0c5da2f63 100644
--- a/docs/api-inference/tasks/text-generation.md
+++ b/docs/api-inference/tasks/text-generation.md
@@ -26,12 +26,6 @@ For more details about the `text-generation` task, check out its [dedicated page
 
 ### Recommended models
 
-- [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
-- [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B): Smaller variant of one of the most powerful models.
-- [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
-- [microsoft/phi-4](https://huggingface.co/microsoft/phi-4): Powerful text generation model by Microsoft.
-- [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct): Text generation model used to write code.
-- [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1): Powerful reasoning based open large language model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending).
 
@@ -40,77 +34,155 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it \
-	-X POST \
-	-d '{"inputs": "Can you please let us know more details about your "}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
 
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 result = client.text_generation(
-	model="google/gemma-2-2b-it",
-	inputs="Can you please let us know more details about your ",
-	provider="hf-inference",
+    inputs="Can you please let us know more details about your ",
+    model="Qwen/QwQ-32B",
 )
+```
 
-print(result)
+</snippet>
 
-```
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hf-inference" language="python" client="requests">
 
-Using `requests`:
-```py
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
 output = query({
-	"inputs": "Can you please let us know more details about your ",
+    "inputs": "Can you please let us know more details about your ",
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation).
-</python>
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="fetch">
 
-<js>
-Using `huggingface.js`:
 ```js
-import { HfInference } from "@huggingface/inference";
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B",
+		{
+			headers: {
+				Authorization: "Bearer hf_***",
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({ inputs: "Can you please let us know more details about your " }).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
 
-const client = new HfInference("hf_***");
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
 
 const output = await client.textGeneration({
-	model: "google/gemma-2-2b-it",
+	model: "Qwen/QwQ-32B",
 	inputs: "Can you please let us know more details about your ",
 	provider: "hf-inference",
 });
 
 console.log(output);
+```
 
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hf-inference" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "\"Can you please let us know more details about your \""
+    }'
 ```
 
-Using `fetch`:
+</snippet>
+
+
+<snippet provider="together" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="together",
+    api_key="hf_***",
+)
+
+result = client.text_generation(
+    inputs="Can you please let us know more details about your ",
+    model="deepseek-ai/DeepSeek-R1",
+)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="together" language="python" client="requests">
+
+```python
+import requests
+
+API_URL = "https://router.huggingface.co/together/v1/completions"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
+output = query({
+    "inputs": "Can you please let us know more details about your ",
+})
+```
+
+</snippet>
+
+
+<snippet provider="together" language="js" client="fetch">
+
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it",
+		"https://router.huggingface.co/together/v1/completions",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -124,13 +196,49 @@ async function query(data) {
 	return result;
 }
 
-query({"inputs": "Can you please let us know more details about your "}).then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "Can you please let us know more details about your " }).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
+
+
+<snippet provider="together" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const output = await client.textGeneration({
+	model: "deepseek-ai/DeepSeek-R1",
+	inputs: "Can you please let us know more details about your ",
+	provider: "together",
 });
+
+console.log(output);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="together" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/together/v1/completions \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "\"Can you please let us know more details about your \"",
+        "model": "deepseek-ai/DeepSeek-R1"
+    }'
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#textgeneration).
-</js>
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/text-to-image.md b/docs/api-inference/tasks/text-to-image.md
index 71bb3b141..3213c6ed2 100644
--- a/docs/api-inference/tasks/text-to-image.md
+++ b/docs/api-inference/tasks/text-to-image.md
@@ -24,9 +24,6 @@ For more details about the `text-to-image` task, check out its [dedicated page](
 
 ### Recommended models
 
-- [black-forest-labs/FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev): One of the most powerful image generation models that can generate realistic outputs.
-- [Kwai-Kolors/Kolors](https://huggingface.co/Kwai-Kolors/Kolors): Text-to-image model for photorealistic generation.
-- [stabilityai/stable-diffusion-3-medium-diffusers](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers): A powerful text-to-image model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-to-image&sort=trending).
 
@@ -35,46 +32,127 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/black-forest-labs/FLUX.1-dev \
-	-X POST \
-	-d '{"inputs": "Astronaut riding a horse"}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
+
+<snippet provider="fal-ai" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="fal-ai",
+    api_key="hf_***",
+)
+
+# output is a PIL.Image object
+image = client.text_to_image(
+    "Astronaut riding a horse",
+    model="black-forest-labs/FLUX.1-dev",
+)
 ```
-</curl>
 
-<python>
-Using `huggingface_hub`:
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="fal-ai" language="python" client="fal_client">
+
+```python
+import fal_client
+
+result = fal_client.subscribe(
+    "fal-ai/flux/dev",
+    arguments={
+        "prompt": "Astronaut riding a horse",
+    },
+)
+print(result)
+```
+
+</snippet>
+
+
+<snippet provider="fal-ai" language="js" client="fetch">
+
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/fal-ai/fal-ai/flux/dev",
+		{
+			headers: {
+				Authorization: "Bearer hf_***",
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.blob();
+	return result;
+}
+
+query({ inputs: "Astronaut riding a horse" }).then((response) => {
+    // Use image
+});
+```
+
+</snippet>
+
+
+<snippet provider="fal-ai" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const image = await client.textToImage({
+    provider: "fal-ai",
+    model: "black-forest-labs/FLUX.1-dev",
+	inputs: "Astronaut riding a horse",
+	parameters: { num_inference_steps: 5 },
+});
+/// Use the generated image (it's a Blob)
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 # output is a PIL.Image object
 image = client.text_to_image(
-	"Astronaut riding a horse",
-	model="black-forest-labs/FLUX.1-dev"
+    "Astronaut riding a horse",
+    model="black-forest-labs/FLUX.1-dev",
 )
 ```
 
-Using `requests`:
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hf-inference" language="python" client="requests">
+
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/black-forest-labs/FLUX.1-dev"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.content
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.content
 
 image_bytes = query({
-	"inputs": "Astronaut riding a horse",
+    "inputs": "Astronaut riding a horse",
 })
 
 # You can access the image with PIL.Image for example
@@ -83,31 +161,151 @@ from PIL import Image
 image = Image.open(io.BytesIO(image_bytes))
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_to_image).
-</python>
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="fetch">
+
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/hf-inference/models/black-forest-labs/FLUX.1-dev",
+		{
+			headers: {
+				Authorization: "Bearer hf_***",
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.blob();
+	return result;
+}
+
+query({ inputs: "Astronaut riding a horse" }).then((response) => {
+    // Use image
+});
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
 
-<js>
-Using `huggingface.js`:
 ```js
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
-const client = new HfInference("hf_***");
+const client = new InferenceClient("hf_***");
 
 const image = await client.textToImage({
-	model: "black-forest-labs/FLUX.1-dev",
+    provider: "hf-inference",
+    model: "black-forest-labs/FLUX.1-dev",
 	inputs: "Astronaut riding a horse",
 	parameters: { num_inference_steps: 5 },
-	provider: "hf-inference",
 });
 /// Use the generated image (it's a Blob)
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="nebius" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="nebius",
+    api_key="hf_***",
+)
 
+# output is a PIL.Image object
+image = client.text_to_image(
+    "Astronaut riding a horse",
+    model="black-forest-labs/FLUX.1-dev",
+)
 ```
 
-Using `fetch`:
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="nebius" language="js" client="fetch">
+
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/black-forest-labs/FLUX.1-dev",
+		"https://router.huggingface.co/nebius/v1/images/generations",
+		{
+			headers: {
+				Authorization: "Bearer hf_***",
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.blob();
+	return result;
+}
+
+query({ inputs: "Astronaut riding a horse" }).then((response) => {
+    // Use image
+});
+```
+
+</snippet>
+
+
+<snippet provider="nebius" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const image = await client.textToImage({
+    provider: "nebius",
+    model: "black-forest-labs/FLUX.1-dev",
+	inputs: "Astronaut riding a horse",
+	parameters: { num_inference_steps: 5 },
+});
+/// Use the generated image (it's a Blob)
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="replicate" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="replicate",
+    api_key="hf_***",
+)
+
+# output is a PIL.Image object
+image = client.text_to_image(
+    "Astronaut riding a horse",
+    model="black-forest-labs/FLUX.1-dev",
+)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="replicate" language="js" client="fetch">
+
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/replicate/v1/models/black-forest-labs/flux-dev/predictions",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -120,13 +318,102 @@ async function query(data) {
 	const result = await response.blob();
 	return result;
 }
-query({"inputs": "Astronaut riding a horse"}).then((response) => {
-	// Use image
+
+query({ inputs:  }).then((response) => {
+    // Use image
 });
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#texttoimage).
-</js>
+</snippet>
+
+
+<snippet provider="replicate" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const image = await client.textToImage({
+    provider: "replicate",
+    model: "black-forest-labs/FLUX.1-dev",
+	inputs: "Astronaut riding a horse",
+	parameters: { num_inference_steps: 5 },
+});
+/// Use the generated image (it's a Blob)
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="together" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="together",
+    api_key="hf_***",
+)
+
+# output is a PIL.Image object
+image = client.text_to_image(
+    "Astronaut riding a horse",
+    model="black-forest-labs/FLUX.1-dev",
+)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="together" language="js" client="fetch">
+
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/together/v1/images/generations",
+		{
+			headers: {
+				Authorization: "Bearer hf_***",
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.blob();
+	return result;
+}
+
+query({ inputs: "Astronaut riding a horse" }).then((response) => {
+    // Use image
+});
+```
+
+</snippet>
+
+
+<snippet provider="together" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const image = await client.textToImage({
+    provider: "together",
+    model: "black-forest-labs/FLUX.1-dev",
+	inputs: "Astronaut riding a horse",
+	parameters: { num_inference_steps: 5 },
+});
+/// Use the generated image (it's a Blob)
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/token-classification.md b/docs/api-inference/tasks/token-classification.md
index eeecde32c..296c9f139 100644
--- a/docs/api-inference/tasks/token-classification.md
+++ b/docs/api-inference/tasks/token-classification.md
@@ -24,10 +24,6 @@ For more details about the `token-classification` task, check out its [dedicated
 
 ### Recommended models
 
-- [dslim/bert-base-NER](https://huggingface.co/dslim/bert-base-NER): A robust performance model to identify people, locations, organizations and names of miscellaneous entities.
-- [FacebookAI/xlm-roberta-large-finetuned-conll03-english](https://huggingface.co/FacebookAI/xlm-roberta-large-finetuned-conll03-english): A strong model to identify people, locations, organizations and names in multiple languages.
-- [blaze999/Medical-NER](https://huggingface.co/blaze999/Medical-NER): A token classification model specialized on medical entity recognition.
-- [flair/ner-english](https://huggingface.co/flair/ner-english): Flair models are typically the state of the art in named entity recognition tasks.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=token-classification&sort=trending).
 
@@ -36,77 +32,53 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/dslim/bert-base-NER \
-	-X POST \
-	-d '{"inputs": "My name is Sarah Jessica Parker but you can call me Jessica"}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
 
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 result = client.token_classification(
-	model="dslim/bert-base-NER",
-	inputs="My name is Sarah Jessica Parker but you can call me Jessica",
-	provider="hf-inference",
+    inputs="My name is Sarah Jessica Parker but you can call me Jessica",
+    model="FacebookAI/xlm-roberta-large-finetuned-conll03-english",
 )
+```
 
-print(result)
+</snippet>
 
-```
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hf-inference" language="python" client="requests">
 
-Using `requests`:
-```py
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-large-finetuned-conll03-english"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
 output = query({
-	"inputs": "My name is Sarah Jessica Parker but you can call me Jessica",
+    "inputs": "My name is Sarah Jessica Parker but you can call me Jessica",
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.token_classification).
-</python>
-
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
-
-const output = await client.tokenClassification({
-	model: "dslim/bert-base-NER",
-	inputs: "My name is Sarah Jessica Parker but you can call me Jessica",
-	provider: "hf-inference",
-});
+</snippet>
 
-console.log(output);
 
-```
+<snippet provider="hf-inference" language="js" client="fetch">
 
-Using `fetch`:
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/dslim/bert-base-NER",
+		"https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-large-finetuned-conll03-english",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -120,13 +92,48 @@ async function query(data) {
 	return result;
 }
 
-query({"inputs": "My name is Sarah Jessica Parker but you can call me Jessica"}).then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "My name is Sarah Jessica Parker but you can call me Jessica" }).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const output = await client.tokenClassification({
+	model: "FacebookAI/xlm-roberta-large-finetuned-conll03-english",
+	inputs: "My name is Sarah Jessica Parker but you can call me Jessica",
+	provider: "hf-inference",
 });
+
+console.log(output);
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#tokenclassification).
-</js>
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hf-inference" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-large-finetuned-conll03-english \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "\"My name is Sarah Jessica Parker but you can call me Jessica\""
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
@@ -162,9 +169,6 @@ For more information about Inference API headers, check out the parameters [guid
 
 #### Response
 
-Output type depends on the `stream` input parameter.
-If `stream` is `false` (default), the response will be a JSON object with the following fields:
-
 | Body |  |
 | :--- | :--- | :--- |
 | **(array)** | _object[]_ | Output is an array of objects. |
@@ -176,8 +180,3 @@ If `stream` is `false` (default), the response will be a JSON object with the fo
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;end** | _integer_ | The character position in the input where this group ends. |
 
 
-If `stream` is `true`, generated tokens are returned as a stream, using Server-Sent Events (SSE).
-For more information about streaming, check out [this guide](https://huggingface.co/docs/token-classification-inference/conceptual/streaming).
-
-
-
diff --git a/docs/api-inference/tasks/translation.md b/docs/api-inference/tasks/translation.md
index bc4939d7b..ca5572f92 100644
--- a/docs/api-inference/tasks/translation.md
+++ b/docs/api-inference/tasks/translation.md
@@ -24,8 +24,6 @@ For more details about the `translation` task, check out its [dedicated page](ht
 
 ### Recommended models
 
-- [facebook/nllb-200-1.3B](https://huggingface.co/facebook/nllb-200-1.3B): Very powerful model that can translate many languages between each other, especially low-resource languages.
-- [google-t5/t5-base](https://huggingface.co/google-t5/t5-base): A general-purpose Transformer that can be used to translate from English to German, French, or Romanian.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=translation&sort=trending).
 
@@ -34,77 +32,53 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/facebook/nllb-200-1.3B \
-	-X POST \
-	-d '{"inputs": "Меня зовут Вольфганг и я живу в Берлине"}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
 
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 result = client.translation(
-	model="facebook/nllb-200-1.3B",
-	inputs="Меня зовут Вольфганг и я живу в Берлине",
-	provider="hf-inference",
+    inputs="Меня зовут Вольфганг и я живу в Берлине",
+    model="facebook/mbart-large-50-many-to-many-mmt",
 )
+```
 
-print(result)
+</snippet>
 
-```
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hf-inference" language="python" client="requests">
 
-Using `requests`:
-```py
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/facebook/mbart-large-50-many-to-many-mmt"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
 output = query({
-	"inputs": "Меня зовут Вольфганг и я живу в Берлине",
+    "inputs": "Меня зовут Вольфганг и я живу в Берлине",
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.translation).
-</python>
-
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
+</snippet>
 
-const output = await client.translation({
-	model: "facebook/nllb-200-1.3B",
-	inputs: "Меня зовут Вольфганг и я живу в Берлине",
-	provider: "hf-inference",
-});
 
-console.log(output);
+<snippet provider="hf-inference" language="js" client="fetch">
 
-```
-
-Using `fetch`:
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/facebook/nllb-200-1.3B",
+		"https://router.huggingface.co/hf-inference/models/facebook/mbart-large-50-many-to-many-mmt",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -118,13 +92,48 @@ async function query(data) {
 	return result;
 }
 
-query({"inputs": "Меня зовут Вольфганг и я живу в Берлине"}).then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "Меня зовут Вольфганг и я живу в Берлине" }).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const output = await client.translation({
+	model: "facebook/mbart-large-50-many-to-many-mmt",
+	inputs: "Меня зовут Вольфганг и я живу в Берлине",
+	provider: "hf-inference",
 });
+
+console.log(output);
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#translation).
-</js>
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hf-inference" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hf-inference/models/facebook/mbart-large-50-many-to-many-mmt \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "\"Меня зовут Вольфганг и я живу в Берлине\""
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/zero-shot-classification.md b/docs/api-inference/tasks/zero-shot-classification.md
index 0b0d3a1a0..619ddf3cc 100644
--- a/docs/api-inference/tasks/zero-shot-classification.md
+++ b/docs/api-inference/tasks/zero-shot-classification.md
@@ -24,7 +24,6 @@ For more details about the `zero-shot-classification` task, check out its [dedic
 
 ### Recommended models
 
-- [facebook/bart-large-mnli](https://huggingface.co/facebook/bart-large-mnli): Powerful zero-shot text classification model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=zero-shot-classification&sort=trending).
 
@@ -33,26 +32,18 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli \
-	-X POST \
-	-d '{"inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!", "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
 
-<python>
-```py
+<snippet provider="hf-inference" language="python" client="requests">
+
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
 
 output = query({
     "inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!",
@@ -60,34 +51,51 @@ output = query({
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.zero_shot_classification).
-</python>
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="fetch">
 
-<js>
 ```js
 async function query(data) {
-			const response = await fetch(
-				"https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli",
-				{
-					headers: {
-						Authorization: "Bearer hf_***",
-						"Content-Type": "application/json",
-					},
-					method: "POST",
-					body: JSON.stringify(data),
-				}
-			);
-			const result = await response.json();
-			return result;
-		}
-		
-		query({"inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!", "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}).then((response) => {
-			console.log(JSON.stringify(response));
-		});
+    const response = await fetch(
+		"https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli",
+        {
+            headers: {
+				Authorization: "Bearer hf_***",
+                "Content-Type": "application/json",
+            },
+            method: "POST",
+            body: JSON.stringify(data),
+        }
+    );
+    const result = await response.json();
+    return result;
+}
+
+query({
+    inputs: "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!",
+    parameters: { candidate_labels: ["refund", "legal", "faq"] }
+}).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli \
+    -X POST \
+    -d '{"inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!", "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}' \
+    -H 'Content-Type: application/json' \
+    -H 'Authorization: Bearer hf_***'
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#zeroshotclassification).
-</js>
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/scripts/api-inference/package.json b/scripts/api-inference/package.json
index 49cd7a61f..f23b61183 100644
--- a/scripts/api-inference/package.json
+++ b/scripts/api-inference/package.json
@@ -14,8 +14,8 @@
   "author": "",
   "license": "ISC",
   "dependencies": {
-    "@huggingface/inference": "^3.5.0",
-    "@huggingface/tasks": "^0.17.4",
+    "@huggingface/inference": "^3.6.1",
+    "@huggingface/tasks": "^0.18.1",
     "@types/node": "^22.5.0",
     "handlebars": "^4.7.8",
     "node": "^20.17.0",
diff --git a/scripts/api-inference/pnpm-lock.yaml b/scripts/api-inference/pnpm-lock.yaml
index 46a86a1bd..26e5e4574 100644
--- a/scripts/api-inference/pnpm-lock.yaml
+++ b/scripts/api-inference/pnpm-lock.yaml
@@ -9,11 +9,11 @@ importers:
   .:
     dependencies:
       '@huggingface/inference':
-        specifier: ^3.5.0
-        version: 3.5.0
+        specifier: ^3.6.1
+        version: 3.6.1
       '@huggingface/tasks':
-        specifier: ^0.17.4
-        version: 0.17.4
+        specifier: ^0.18.1
+        version: 0.18.1
       '@types/node':
         specifier: ^22.5.0
         version: 22.5.0
@@ -189,12 +189,19 @@ packages:
     cpu: [x64]
     os: [win32]
 
-  '@huggingface/inference@3.5.0':
-    resolution: {integrity: sha512-5IKkI/HJDDWg5aVWyd60kj27L9Kwxyyvu64U1To4/HzsZj13flqv2rJMrT6OB0izvFwTfUN1SDrrA5OH3YbxQQ==}
+  '@huggingface/inference@3.6.1':
+    resolution: {integrity: sha512-EtQlbBqcZycPe+qiTEFI+wNHOMpG0gwNTaZSvYu1juN1p/1dEgqAb2GO31dxLgNev2PzH9d+9nm8GngOsIepJg==}
     engines: {node: '>=18'}
 
-  '@huggingface/tasks@0.17.4':
-    resolution: {integrity: sha512-LES7+OosthFKdqRL0e+bA2d4jfKmiQWuqahsPrv0+EsSZtdHdaZ3nje0f2g5wq4miHX4xWpBLuWJknjdnBwXsA==}
+  '@huggingface/jinja@0.3.3':
+    resolution: {integrity: sha512-vQQr2JyWvVFba3Lj9es4q9vCl1sAc74fdgnEMoX8qHrXtswap9ge9uO3ONDzQB0cQ0PUyaKY2N6HaVbTBvSXvw==}
+    engines: {node: '>=18'}
+
+  '@huggingface/tasks@0.17.9':
+    resolution: {integrity: sha512-lV6RgCJkqy3p93FFxP9H4SGJmFcHAwr1FO+Zk56q/JWsf7Tdsel1DEo1Xfd3An7ZPWpc2Y9ldRecGo9efDYghg==}
+
+  '@huggingface/tasks@0.18.1':
+    resolution: {integrity: sha512-HK6JTVB/nrgjOnbe77HFSENftfAp67AI4mHMR2x64Os1hvchuTT88M8fKEiyESSvqKFKwW4lQKkHva07p05AXw==}
 
   '@jridgewell/resolve-uri@3.1.2':
     resolution: {integrity: sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==}
@@ -411,11 +418,16 @@ snapshots:
   '@esbuild/win32-x64@0.23.1':
     optional: true
 
-  '@huggingface/inference@3.5.0':
+  '@huggingface/inference@3.6.1':
     dependencies:
-      '@huggingface/tasks': 0.17.4
+      '@huggingface/jinja': 0.3.3
+      '@huggingface/tasks': 0.17.9
+
+  '@huggingface/jinja@0.3.3': {}
+
+  '@huggingface/tasks@0.17.9': {}
 
-  '@huggingface/tasks@0.17.4': {}
+  '@huggingface/tasks@0.18.1': {}
 
   '@jridgewell/resolve-uri@3.1.2': {}
 
diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts
index 98254a84e..36c7d7e4d 100644
--- a/scripts/api-inference/scripts/generate.ts
+++ b/scripts/api-inference/scripts/generate.ts
@@ -1,5 +1,13 @@
-import { snippets } from "@huggingface/inference";
-import { PipelineType, InferenceSnippet } from "@huggingface/tasks";
+import {
+  snippets,
+  INFERENCE_PROVIDERS,
+  InferenceProvider,
+} from "@huggingface/inference";
+import {
+  PipelineType,
+  InferenceSnippet,
+  type ModelDataMinimal,
+} from "@huggingface/tasks";
 import Handlebars from "handlebars";
 import * as fs from "node:fs/promises";
 import * as path from "node:path/posix";
@@ -28,8 +36,22 @@ const TASKS: PipelineType[] = [
 const TASKS_EXTENDED = [...TASKS, "chat-completion"];
 const SPECS_REVISION = "main";
 
-const inferenceSnippetLanguages = ["python", "js", "curl"] as const;
-type InferenceSnippetLanguage = (typeof inferenceSnippetLanguages)[number];
+const HEADERS = { Authorization: `Bearer ${process.env.HF_TOKEN}` };
+
+async function authFetchJson(url: string) {
+  const headers = url.includes("huggingface.co") ? HEADERS : {};
+  try {
+    const res = await fetch(url, { headers: headers });
+    if (!res.ok) {
+      console.warn(`Failed to fetch ${url}: ${await res.text()}`);
+      return {};
+    }
+    return res.json();
+  } catch (e) {
+    console.warn(`Failed to fetch ${url}: ${e}`);
+    return {};
+  }
+}
 
 // Taken from https://stackoverflow.com/a/31632215
 Handlebars.registerHelper({
@@ -49,6 +71,39 @@ Handlebars.registerHelper({
 
 console.log("🛠️  Preparing...");
 
+////////////////////////
+//// Provider utils ////
+////////////////////////
+
+/// PER_TASK_SUPPORTED_PROVIDERS[task] = provider[]
+const PER_TASK_SUPPORTED_PROVIDERS: Record<string, string[]> = {};
+
+await Promise.all(
+  INFERENCE_PROVIDERS.map(async (provider) => {
+    if (provider == "hf-inference") {
+      return; // handled separately
+    }
+    console.log("   ⚡ Fetching supported tasks for provider " + provider);
+    const url = `https://huggingface.co/api/partners/${provider}/models`;
+    const mapping = (await authFetchJson(url)) as Record<
+      string,
+      Record<string, { status: "live" | "staging"; providerId: string }>
+    >;
+
+    for (const [task, models] of Object.entries(mapping)) {
+      for (const [modelId, modelMapping] of Object.entries(models)) {
+        if (modelMapping.status == "live") {
+          if (!PER_TASK_SUPPORTED_PROVIDERS[task]) {
+            PER_TASK_SUPPORTED_PROVIDERS[task] = [];
+          }
+          PER_TASK_SUPPORTED_PROVIDERS[task].push(provider);
+          break;
+        }
+      }
+    }
+  })
+);
+
 ////////////////////////
 //// Filepath utils ////
 ////////////////////////
@@ -65,12 +120,12 @@ const TABLE_INDENT = NBSP.repeat(8);
 
 function readTemplate(
   templateName: string,
-  namespace: string,
+  namespace: string
 ): Promise<string> {
   const templatePath = path.join(
     TEMPLATE_DIR,
     namespace,
-    `${templateName}.handlebars`,
+    `${templateName}.handlebars`
   );
   console.log(`   🔍 Reading ${templateName}.handlebars`);
   return fs.readFile(templatePath, { encoding: "utf-8" });
@@ -84,7 +139,7 @@ function writeTaskDoc(templateName: string, content: string): Promise<void> {
   return fs
     .mkdir(TASKS_DOCS_DIR, { recursive: true })
     .then(() =>
-      fs.writeFile(taskDocPath, contentWithHeader, { encoding: "utf-8" }),
+      fs.writeFile(taskDocPath, contentWithHeader, { encoding: "utf-8" })
     );
 }
 
@@ -94,81 +149,40 @@ function writeTaskDoc(templateName: string, content: string): Promise<void> {
 
 const TASKS_API_URL = "https://huggingface.co/api/tasks";
 console.log(`   🕸️  Fetching ${TASKS_API_URL}`);
-const response = await fetch(TASKS_API_URL);
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
-const TASKS_DATA = (await response.json()) as any;
+const TASKS_DATA = (await authFetchJson(TASKS_API_URL)) as any;
 
 ///////////////////////
 //// Snippet utils ////
 ///////////////////////
 
-const formatSnippets = (
-  result: InferenceSnippet | InferenceSnippet[],
-  defaultClient: string,
-  language: string,
-): string => {
-  // For single snippet, just wrap with code block
-  if (!Array.isArray(result) || result.length === 1) {
-    const snippet = Array.isArray(result) ? result[0] : result;
-    return `\`\`\`${language}\n${snippet.content}\n\`\`\``;
-  }
-
-  // For multiple snippets, add description and wrap each one
-  return result
-    .map((snippet) => {
-      const client = snippet.client || defaultClient;
-      return `Using \`${client}\`:\n\`\`\`${language}\n${snippet.content}\n\`\`\``;
-    })
-    .join("\n\n");
-};
-
-const GET_SNIPPET_FN = {
-  curl: (modelData: any, token: string) => {
-    const result = snippets.curl.getCurlInferenceSnippet(
-      modelData,
-      token,
-      "hf-inference",
-    );
-    return formatSnippets(result, "curl", "bash");
-  },
-  js: (modelData: any, token: string) => {
-    const result = snippets.js.getJsInferenceSnippet(
-      modelData,
-      token,
-      "hf-inference",
-    );
-    return formatSnippets(result, "javascript", "js");
-  },
-  python: (modelData: any, token: string) => {
-    const result = snippets.python.getPythonInferenceSnippet(
-      modelData,
-      token,
-      "hf-inference",
-    );
-    return formatSnippets(result, "python", "py");
-  },
-} as const;
-
-export function getInferenceSnippet(
-  id: string,
+export function getFormattedInferenceSnippet(
   pipeline_tag: PipelineType,
-  language: InferenceSnippetLanguage,
-  config?: JsonObject,
-  tags?: string[],
-): string | undefined {
-  const modelData = {
-    id,
-    pipeline_tag,
-    mask_token: "[MASK]",
-    library_name: "",
-    config: config ?? {},
-    tags: tags ?? [],
-  };
-  // @ts-ignore
-  const generatedSnippets = GET_SNIPPET_FN[language](modelData, "hf_***");
-  if (generatedSnippets) {
-    return generatedSnippets;
+  model: {
+    modelId: string;
+    provider: string;
+    providerModelId: string;
+    providerTask: string;
+    tags: string[];
+  },
+  conversational: boolean
+): InferenceSnippet[] {
+  if (conversational && !model.tags.includes("conversational")) {
+    return [];
   }
+  return snippets.getInferenceSnippets(
+    {
+      id: model.modelId,
+      pipeline_tag,
+      mask_token: "[MASK]",
+      library_name: "",
+      tags: conversational ? ["conversational"] : [],
+      inference: "",
+    } as ModelDataMinimal,
+    "hf_***",
+    model.provider as InferenceProvider,
+    model.providerModelId
+  );
 }
 
 /////////////////////
@@ -178,23 +192,21 @@ export function getInferenceSnippet(
 type SpecNameType = "input" | "output" | "stream_output";
 
 const SPECS_URL_TEMPLATE = Handlebars.compile(
-  `https://raw.githubusercontent.com/huggingface/huggingface.js/${SPECS_REVISION}/packages/tasks/src/tasks/{{task}}/spec/{{name}}.json`,
+  `https://raw.githubusercontent.com/huggingface/huggingface.js/${SPECS_REVISION}/packages/tasks/src/tasks/{{task}}/spec/{{name}}.json`
 );
 const COMMON_DEFINITIONS_URL = `https://raw.githubusercontent.com/huggingface/huggingface.js/${SPECS_REVISION}/packages/tasks/src/tasks/common-definitions.json`;
 
 async function fetchOneSpec(
   task: PipelineType,
-  name: SpecNameType,
+  name: SpecNameType
 ): Promise<JsonObject | undefined> {
   const url = SPECS_URL_TEMPLATE({ task, name });
   console.log(`   🕸️  Fetching ${task} ${name} specs`);
-  return fetch(url)
-    .then((res) => res.json())
-    .catch(() => undefined);
+  return (await authFetchJson(url)) ?? undefined;
 }
 
 async function fetchSpecs(
-  task: PipelineType,
+  task: PipelineType
 ): Promise<
   Record<"input" | "output" | "stream_output", JsonObject | undefined>
 > {
@@ -207,7 +219,7 @@ async function fetchSpecs(
 
 async function fetchCommonDefinitions(): Promise<JsonObject> {
   console.log(`   🕸️  Fetching common definitions`);
-  return fetch(COMMON_DEFINITIONS_URL).then((res) => res.json());
+  return await authFetchJson(COMMON_DEFINITIONS_URL);
 }
 
 const COMMON_DEFINITIONS = await fetchCommonDefinitions();
@@ -232,7 +244,7 @@ function processPayloadSchema(schema: any): JsonObject[] {
     key: string,
     value: any,
     required: boolean,
-    parentPrefix: string,
+    parentPrefix: string
   ): void {
     const isRequired = required;
     let type = value.type || "unknown";
@@ -296,9 +308,9 @@ function processPayloadSchema(schema: any): JsonObject[] {
             nestedKey,
             nestedValue,
             nestedRequired,
-            parentPrefix + TABLE_INDENT,
+            parentPrefix + TABLE_INDENT
           );
-        },
+        }
       );
     } else if (isArray) {
       // Process array items
@@ -316,7 +328,7 @@ function processPayloadSchema(schema: any): JsonObject[] {
             `${NBSP}(#${index + 1})`,
             subSchema,
             false,
-            parentPrefix + TABLE_INDENT,
+            parentPrefix + TABLE_INDENT
           );
         });
       }
@@ -358,20 +370,20 @@ For more details about the \`{{task}}\` task, check out its [dedicated page](htt
 </Tip>`);
 
 const TIP_LIST_MODELS_LINK_TEMPLATE = Handlebars.compile(
-  `Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag={{task}}&sort=trending).`,
+  `Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag={{task}}&sort=trending).`
 );
 const SPECS_HEADERS = await readTemplate("specs-headers", "common");
 const PAGE_HEADER = Handlebars.compile(
-  await readTemplate("page-header", "common"),
+  await readTemplate("page-header", "common")
 );
 const SNIPPETS_TEMPLATE = Handlebars.compile(
-  await readTemplate("snippets-template", "common"),
+  await readTemplate("snippets-template", "common")
 );
 const SPECS_PAYLOAD_TEMPLATE = Handlebars.compile(
-  await readTemplate("specs-payload", "common"),
+  await readTemplate("specs-payload", "common")
 );
 const SPECS_OUTPUT_TEMPLATE = Handlebars.compile(
-  await readTemplate("specs-output", "common"),
+  await readTemplate("specs-output", "common")
 );
 
 ////////////////////
@@ -382,7 +394,25 @@ const DATA: {
   constants: {
     specsHeaders: string;
   };
-  models: Record<string, { id: string; description: string }[]>;
+  recommendedModels: Record<
+    string,
+    {
+      id: string;
+      description: string;
+      inference: string | undefined;
+      tags: string[];
+    }[]
+  >;
+  perProviderWarmModels: Record<
+    string,
+    {
+      modelId: string;
+      provider: string;
+      providerModelId: string;
+      providerTask: string;
+      tags: string[];
+    }[]
+  >;
   snippets: Record<string, string>;
   specs: Record<
     string,
@@ -400,13 +430,14 @@ const DATA: {
   constants: {
     specsHeaders: SPECS_HEADERS,
   },
-  models: {},
+  recommendedModels: {},
+  perProviderWarmModels: {},
   snippets: {},
   specs: {},
   tips: { linksToTaskPage: {}, listModelsLink: {} },
 };
 
-// Check for each model if inference status is "warm"
+// For each task, retrieve inference status of all recommended models
 await Promise.all(
   TASKS.map(async (task) => {
     await Promise.all(
@@ -415,43 +446,101 @@ await Promise.all(
           id: string;
           description: string;
           inference: string | undefined;
-          config: JsonObject | undefined;
         }) => {
           console.log(`   ⚡ Checking inference status ${model.id}`);
-          let url = `https://huggingface.co/api/models/${model.id}?expand[]=inference`;
-          if (task === "text-generation" || task === "image-text-to-text") {
-            url += "&expand[]=config";
-          }
-          const modelData = await fetch(url).then((res) => res.json());
+          let url = `https://huggingface.co/api/models/${model.id}?expand[]=inference&expand[]=tags`;
+          const modelData = await authFetchJson(url);
           model.inference = modelData.inference;
-          model.config = modelData.config;
-        },
-      ),
+        }
+      )
     );
-  }),
+  })
+);
+
+async function fetchWarmModels(task: PipelineType): Promise<
+  {
+    modelId: string;
+    provider: string;
+    providerModelId: string;
+    providerTask: string;
+    tags: string[];
+  }[]
+> {
+  const providers = [
+    "hf-inference",
+    ...(PER_TASK_SUPPORTED_PROVIDERS[task] ?? []),
+  ].sort();
+  return (
+    await Promise.all(
+      providers.map(async (provider) => {
+        console.log(
+          `   ⚡ Fetching most popular warm model for ${task} from ${provider}`
+        );
+        const url = `https://huggingface.co/api/models?pipeline_tag=${task}&inference_provider=${provider}&sort=likes30d&expand[]=inferenceProviderMapping&expand[]=tags&limit=5`;
+        const modelsData = (await authFetchJson(url)) as {
+          id: string;
+          likes30d: number;
+          inferenceProviderMapping: Record<string, string>[];
+          tags: string[];
+        }[];
+        if (modelsData.length === 0) {
+          return;
+        }
+
+        /// Little hack: if there are multiple models with the same number of likes (typically 0), we arbitrarily pick the one with the smallest ID to get a deterministic result
+        const topLikes = modelsData[0].likes30d;
+        const topModelData = modelsData
+          .filter((model) => model.likes30d === topLikes)
+          .sort((a, b) => a.id.localeCompare(b.id))[0];
+
+        const providerMapping = topModelData.inferenceProviderMapping as
+          | Record<string, string>[]
+          | undefined;
+        if (!providerMapping) {
+          return;
+        }
+        const providerData = providerMapping.filter(
+          (mapping) => mapping.provider === provider
+        )[0];
+        return {
+          modelId: topModelData.id,
+          provider: provider,
+          providerModelId: providerData.providerId,
+          providerTask: providerData.task,
+          tags: topModelData.tags,
+        };
+      })
+    )
+  ).filter((model) => model !== undefined);
+}
+
+// For each task and for each provider, retrieve the most popular warm model
+await Promise.all(
+  TASKS.map(async (task) => {
+    DATA.perProviderWarmModels[task] = await fetchWarmModels(task);
+  })
 );
 
-// Fetch recommended models
+// Filter recommended models (i.e. recommended + warm)
 TASKS.forEach((task) => {
-  DATA.models[task] = TASKS_DATA[task].models.filter(
-    (model: { inference: string }) =>
-      ["cold", "loading", "warm"].includes(model.inference),
+  DATA.recommendedModels[task] = TASKS_DATA[task].models.filter(
+    (model: { inference: string }) => model.inference === "warm"
   );
 });
 
-// Fetch snippets
-// TODO: render snippets only if they are available
+// Generate snippets
 TASKS.forEach((task) => {
-  // Let's take as example the first available model that is recommended.
-  // Otherwise, fallback to "<REPO_ID>".
-  const mainModel = DATA.models[task][0]?.id ?? "<REPO_ID>";
-  const taskSnippets = {
-    curl: getInferenceSnippet(mainModel, task, "curl"),
-    python: getInferenceSnippet(mainModel, task, "python"),
-    javascript: getInferenceSnippet(mainModel, task, "js"),
-  };
+  const inferenceSnippets = DATA.perProviderWarmModels[task].flatMap((model) =>
+    getFormattedInferenceSnippet(task, model, false).map(
+      (inferenceSnippet) => ({
+        ...inferenceSnippet,
+        provider: model.provider,
+      })
+    )
+  );
+
   DATA.snippets[task] = SNIPPETS_TEMPLATE({
-    taskSnippets,
+    inferenceSnippets,
     taskSnakeCase: task.replaceAll("-", "_"),
     taskAttached: task.replaceAll("-", ""),
   });
@@ -475,7 +564,7 @@ await Promise.all(
           })
         : undefined,
     };
-  }),
+  })
 );
 
 // Render tips
@@ -488,65 +577,52 @@ TASKS.forEach((task) => {
 //// Data for chat-completion special case ////
 ///////////////////////////////////////////////
 
-function fetchChatCompletion() {
-  const baseName = "chat-completion";
-  const conversationalTasks = [
-    {
-      name: "chat-completion",
-      baseName: "text-generation",
-      pipelineTag: "text-generation",
-    },
-    {
-      name: "conversational-image-text-to-text",
-      baseName: "image-text-to-text",
-      pipelineTag: "image-text-to-text",
-    },
-  ];
-
-  conversationalTasks.forEach((task) => {
-    // Recommended models based on the base task
-    DATA.models[task.name] = DATA.models[task.baseName].filter(
-      // @ts-ignore
-      (model) => model.config?.tokenizer_config?.chat_template,
-    );
+async function fetchChatCompletion() {
+  // Conversational text-generation
+  console.log(
+    "   ⚡ Prepare data for chat-completion (conversational text-generation)"
+  );
+  DATA.recommendedModels["chat-completion"] = DATA.recommendedModels[
+    "text-generation"
+  ].filter((model) => model.tags?.includes("conversational"));
+  DATA.snippets["chat-completion"] = SNIPPETS_TEMPLATE({
+    taskSnakeCase: "chat_completion",
+    taskAttached: "chatCompletion",
+    inferenceSnippets: (await fetchWarmModels("text-generation")).flatMap(
+      (model) =>
+        getFormattedInferenceSnippet("text-generation", model, true).map(
+          (inferenceSnippet) => ({
+            ...inferenceSnippet,
+            provider: model.provider,
+          })
+        )
+    ),
+  });
 
-    const mainModel = DATA.models[task.name][0];
-
-    const taskSnippets = {
-      // @ts-ignore
-      curl: getInferenceSnippet(
-        mainModel.id,
-        task.pipelineTag,
-        "curl",
-        mainModel.config,
-        ["conversational"],
-      ),
-      // @ts-ignore
-      python: getInferenceSnippet(
-        mainModel.id,
-        task.pipelineTag,
-        "python",
-        mainModel.config,
-        ["conversational"],
-      ),
-      // @ts-ignore
-      javascript: getInferenceSnippet(
-        mainModel.id,
-        task.pipelineTag,
-        "js",
-        mainModel.config,
-        ["conversational"],
-      ),
-    };
-    DATA.snippets[task.name] = SNIPPETS_TEMPLATE({
-      taskSnippets,
-      taskSnakeCase: baseName.replaceAll("-", "_"),
-      taskAttached: baseName.replaceAll("-", ""),
-    });
+  // Conversational image-text-to-text
+  console.log(
+    "   ⚡ Prepare data for chat-completion (conversational image-text-to-text)"
+  );
+  DATA.recommendedModels["conversational-image-text-to-text"] =
+    DATA.recommendedModels["image-text-to-text"].filter((model) =>
+      model.tags?.includes("conversational")
+    );
+  DATA.snippets["conversational-image-text-to-text"] = SNIPPETS_TEMPLATE({
+    taskSnakeCase: "chat_completion",
+    taskAttached: "chatCompletion",
+    inferenceSnippets: (await fetchWarmModels("image-text-to-text")).flatMap(
+      (model) =>
+        getFormattedInferenceSnippet("image-text-to-text", model, true).map(
+          (inferenceSnippet) => ({
+            ...inferenceSnippet,
+            provider: model.provider,
+          })
+        )
+    ),
   });
 }
 
-fetchChatCompletion();
+await fetchChatCompletion();
 
 /////////////////////////
 //// Rendering utils ////
@@ -554,7 +630,7 @@ fetchChatCompletion();
 
 async function renderTemplate(
   templateName: string,
-  data: JsonObject,
+  data: JsonObject
 ): Promise<string> {
   console.log(`🎨  Rendering ${templateName}`);
   const template = Handlebars.compile(await readTemplate(templateName, "task"));
@@ -566,7 +642,7 @@ await Promise.all(
     // @ts-ignore
     const rendered = await renderTemplate(task, DATA);
     await writeTaskDoc(task, rendered);
-  }),
+  })
 );
 
 console.log("✅ All done!");
diff --git a/scripts/api-inference/templates/common/snippets-template.handlebars b/scripts/api-inference/templates/common/snippets-template.handlebars
index 09202f6ba..2f974071d 100644
--- a/scripts/api-inference/templates/common/snippets-template.handlebars
+++ b/scripts/api-inference/templates/common/snippets-template.handlebars
@@ -1,31 +1,24 @@
-{{#if (or taskSnippets.curl taskSnippets.python taskSnippets.javascript)}}
+{{#if inferenceSnippets.length }}
 
 <inferencesnippet>
 
-{{!-- cURL snippet (if exists) --}}
-{{#if taskSnippets.curl}}
-<curl>
-{{{taskSnippets.curl}}}
-</curl>
-{{/if}}
+{{#each inferenceSnippets}}
 
-{{!-- Python snippet (if exists) --}}
-{{#if taskSnippets.python}}
-<python>
-{{{taskSnippets.python}}}
+<snippet provider="{{this.provider}}" language="{{this.language}}" client="{{this.client}}">
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.{{taskSnakeCase}}).
-</python>
-{{/if}}
+```{{this.language}}
+{{{this.content}}}
+```
 
-{{!-- JavaScript snippet (if exists) --}}
-{{#if taskSnippets.javascript}}
-<js>
-{{{taskSnippets.javascript}}}
+</snippet>
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#{{taskAttached}}).
-</js>
+{{#if (eq this.client "huggingface_hub")}}
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.{{taskSnakeCase}}).
+{{/if}}
+{{#if (eq this.client "huggingface.js")}}
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#{{taskAttached}}).
 {{/if}}
+{{/each}}
 
 </inferencesnippet>
 
diff --git a/scripts/api-inference/templates/task/image-text-to-text.handlebars b/scripts/api-inference/templates/task/image-text-to-text.handlebars
index 8aa03f37e..7f2554d78 100644
--- a/scripts/api-inference/templates/task/image-text-to-text.handlebars
+++ b/scripts/api-inference/templates/task/image-text-to-text.handlebars
@@ -6,7 +6,7 @@ Image-text-to-text models take in an image and text prompt and output text. Thes
 
 ### Recommended models
 
-{{#each models.image-text-to-text}}
+{{#each models.conversational-image-text-to-text}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
@@ -14,7 +14,7 @@ Image-text-to-text models take in an image and text prompt and output text. Thes
 
 ### Using the API
 
-{{{snippets.image-text-to-text}}}
+{{{snippets.conversational-image-text-to-text}}}
 
 ### API specification
 
diff --git a/scripts/api-inference/templates/task/token-classification.handlebars b/scripts/api-inference/templates/task/token-classification.handlebars
index 4a627783f..9045de0ba 100644
--- a/scripts/api-inference/templates/task/token-classification.handlebars
+++ b/scripts/api-inference/templates/task/token-classification.handlebars
@@ -26,13 +26,5 @@ Token classification is a task in which a label is assigned to some tokens in a
 
 #### Response
 
-Output type depends on the `stream` input parameter.
-If `stream` is `false` (default), the response will be a JSON object with the following fields:
-
 {{{specs.token-classification.output}}}
 
-If `stream` is `true`, generated tokens are returned as a stream, using Server-Sent Events (SSE).
-For more information about streaming, check out [this guide](https://huggingface.co/docs/token-classification-inference/conceptual/streaming).
-
-{{{specs.token-classification.stream_output}}}
-