huggingface · Narsil · Feb 7, 2025 · Feb 5, 2025 · Feb 6, 2025 · Feb 6, 2025
diff --git a/Dockerfile_intel b/Dockerfile_intel
@@ -215,16 +215,9 @@ COPY server server
 COPY server/Makefile server/Makefile
 ENV UV_SYSTEM_PYTHON=1
 RUN cd server && \
+    make gen-server && \
     pip install -U pip uv && \
-	uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --no-install-project && \
-    . ./.venv/bin/activate && \
-    make gen-server-raw
-
-RUN cd server && \
-	uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines && \
-    . ./.venv/bin/activate && \
-    pwd && \
-    text-generation-server --help
+    uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
-    uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
+    uv pip install -e ".[accelerate, compressed-tensors, quantized, peft, outlines]" --no-cache-dir
-    uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
+    uv pip install -e ".[accelerate, compressed-tensors, quantized, peft, outlines]" --no-cache-dir
 
 # Install benchmarker
 COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
@@ -238,8 +231,5 @@ ENV ATTENTION=flashdecoding-ipex
 ENV PREFIX_CACHING=1
 ENV PREFILL_CHUNKING=1
 ENV CUDA_GRAPHS=0
-COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
-RUN chmod +x /tgi-entrypoint.sh
-
-ENTRYPOINT ["/tgi-entrypoint.sh"]
+ENTRYPOINT ["text-generation-launcher"]
 CMD ["--json-output"]
diff --git a/integration-tests/models/__snapshots__/test_mllama/test_mllama_load.json b/integration-tests/models/__snapshots__/test_mllama/test_mllama_load.json
@@ -6,19 +6,19 @@
         "index": 0,
         "logprobs": null,
         "message": {
-          "content": "In a bustling city, a chicken named Cluck",
+          "content": "In a small town, a chicken named Cluck",
           "name": null,
           "role": "assistant",
           "tool_calls": null
         },
         "usage": null
       }
     ],
-    "created": 1727773835,
+    "created": 1738753835,
     "id": "",
     "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
     "object": "chat.completion",
-    "system_fingerprint": "2.4.2-dev0-native",
+    "system_fingerprint": "3.1.1-dev0-native",
     "usage": {
       "completion_tokens": 10,
       "prompt_tokens": 50,
@@ -32,71 +32,19 @@
         "index": 0,
         "logprobs": null,
         "message": {
-          "content": "In a world where even chickens could dream big,",
+          "content": "In a small town, a chicken named Cluck",
           "name": null,
           "role": "assistant",
           "tool_calls": null
         },
         "usage": null
       }
     ],
-    "created": 1727773835,
+    "created": 1738753835,
     "id": "",
     "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
     "object": "chat.completion",
-    "system_fingerprint": "2.4.2-dev0-native",
-    "usage": {
-      "completion_tokens": 10,
-      "prompt_tokens": 50,
-      "total_tokens": 60
-    }
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "length",
-        "index": 0,
-        "logprobs": null,
-        "message": {
-          "content": "In a world where even chickens could dream big,",
-          "name": null,
-          "role": "assistant",
-          "tool_calls": null
-        },
-        "usage": null
-      }
-    ],
-    "created": 1727773835,
-    "id": "",
-    "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
-    "object": "chat.completion",
-    "system_fingerprint": "2.4.2-dev0-native",
-    "usage": {
-      "completion_tokens": 10,
-      "prompt_tokens": 50,
-      "total_tokens": 60
-    }
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "length",
-        "index": 0,
-        "logprobs": null,
-        "message": {
-          "content": "In a world where even chickens could dream big,",
-          "name": null,
-          "role": "assistant",
-          "tool_calls": null
-        },
-        "usage": null
-      }
-    ],
-    "created": 1727773835,
-    "id": "",
-    "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
-    "object": "chat.completion",
-    "system_fingerprint": "2.4.2-dev0-native",
+    "system_fingerprint": "3.1.1-dev0-native",
     "usage": {
       "completion_tokens": 10,
       "prompt_tokens": 50,

diff --git a/integration-tests/models/__snapshots__/test_mllama/test_mllama_simpl.json b/integration-tests/models/__snapshots__/test_mllama/test_mllama_simpl.json
@@ -5,19 +5,19 @@
       "index": 0,
       "logprobs": null,
       "message": {
-        "content": "In a bustling city, a chicken named Cluck",
+        "content": "In a small town, a chicken named Cluck",
         "name": null,
         "role": "assistant",
         "tool_calls": null
       },
       "usage": null
     }
   ],
-  "created": 1727556016,
+  "created": 1738753833,
   "id": "",
   "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
   "object": "chat.completion",
-  "system_fingerprint": "2.4.2-dev0-native",
+  "system_fingerprint": "3.1.1-dev0-native",
   "usage": {
     "completion_tokens": 10,
     "prompt_tokens": 50,

diff --git a/integration-tests/models/test_mllama.py b/integration-tests/models/test_mllama.py
@@ -47,8 +47,7 @@ async def test_mllama_simpl(mllama, response_snapshot):
         "total_tokens": 60,
     }
     assert (
-        response.choices[0].message.content
-        == "In a bustling city, a chicken named Cluck"
+        response.choices[0].message.content == "In a small town, a chicken named Cluck"
     )
     assert response == response_snapshot
 
@@ -84,12 +83,12 @@ async def test_mllama_load(mllama, generate_load, response_snapshot):
     ]
     responses = await asyncio.gather(*futures)
 
-    _ = [response.choices[0].message.content for response in responses]
+    generated_texts = [response.choices[0].message.content for response in responses]
 
     # XXX: TODO: Fix this test.
-    # assert generated_texts[0] == "In a bustling city, a chicken named Cluck"
-    # assert len(generated_texts) == 4
-    # assert generated_texts, all(
-    #     [text == generated_texts[0] for text in generated_texts]
-    # )
-    # assert responses == response_snapshot
+    assert generated_texts[0] == "In a small town, a chicken named Cluck"
+    assert len(generated_texts) == 2
+    assert generated_texts, all(
+        [text == generated_texts[0] for text in generated_texts]
+    )
+    assert responses == response_snapshot