microsoft · skottmckay · May 16, 2026 · May 16, 2026 · May 16, 2026 · May 16, 2026
diff --git a/sdk/cs/test/FoundryLocal.Tests/ChatCompletionsTests.cs b/sdk/cs/test/FoundryLocal.Tests/ChatCompletionsTests.cs
@@ -100,6 +100,11 @@ public async Task DirectChat_Streaming_Succeeds()
             await Assert.That(message.Role).IsEqualTo("assistant");
             await Assert.That(message.Content).IsNotNull();
             responseMessage.Append(message.Content);
+
+            if (response.Choices[0].FinishReason == "stop")
+            {
+                break; // test doesn't handle the usage response
+            }
         }
 
         var fullResponse = responseMessage.ToString();
@@ -124,6 +129,11 @@ public async Task DirectChat_Streaming_Succeeds()
             await Assert.That(message.Role).IsEqualTo("assistant");
             await Assert.That(message.Content).IsNotNull();
             responseMessage.Append(message.Content);
+
+            if (response.Choices[0].FinishReason == "stop")
+            {
+                break; // test doesn't handle the usage response
+            }
         }
 
         fullResponse = responseMessage.ToString();
@@ -271,15 +281,21 @@ public async Task DirectTool_Streaming_Succeeds()
             var content = response.Choices[0].Message.Content;
             await Assert.That(content).IsNotNull();
             Console.WriteLine($"Content in streaming: {content}, Finish reason: {response.Choices[0].FinishReason}");
+
             if (!string.IsNullOrEmpty(content))
             {
                 responseMessage.Append(content);
                 numTokens += 1;
             }
+
             if (response.Choices[0].FinishReason == "tool_calls")
             {
                 toolCallResponse = response;
             }
+            else if (response.Choices[0].FinishReason == "stop")
+            {
+                break; // test doesn't handle the usage response
+            }
         }
 
         // Check that the full response contains the expected tool call and that the tool call information is correct
@@ -330,6 +346,11 @@ public async Task DirectTool_Streaming_Succeeds()
             {
                 responseMessage.Append(content);
             }
+
+            if (response.Choices[0].FinishReason == "stop")
+            {
+                break; // test doesn't handle the usage response
+            }
         }
 
         // Check that the conversation continued

diff --git a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs
@@ -1,4 +1,4 @@
-// --------------------------------------------------------------------------------------------------------------------
+// --------------------------------------------------------------------------------------------------------------------
 // <copyright company="Microsoft">
 //   Copyright (c) Microsoft. All rights reserved.
 // </copyright>
@@ -22,10 +22,10 @@ public static async Task Setup()
         // Reduce max_length in the embedding model's genai_config.json to avoid OOM
         // when allocating the KV cache. Embedding models only need a single forward pass
         // so a large max_length is unnecessary.
-        Utils.PatchModelMaxLength("qwen3-0.6b-embedding-generic-cpu-1", "v1");
+        Utils.PatchModelMaxLength("qwen3-embedding-0.6b-generic-cpu-1", "v1");
 
         // Load the specific cached model variant directly
-        var model = await catalog.GetModelVariantAsync("qwen3-0.6b-embedding-generic-cpu:1").ConfigureAwait(false);
+        var model = await catalog.GetModelVariantAsync("qwen3-embedding-0.6b-generic-cpu:1").ConfigureAwait(false);
         await Assert.That(model).IsNotNull();
 
         await model!.LoadAsync().ConfigureAwait(false);
@@ -53,7 +53,7 @@ public async Task Embedding_BasicRequest_Succeeds()
                                              .ConfigureAwait(false);
 
         await Assert.That(response).IsNotNull();
-        await Assert.That(response.Model).IsEqualTo("qwen3-0.6b-embedding-generic-cpu:1");
+        await Assert.That(response.Model).IsEqualTo("qwen3-embedding-0.6b-generic-cpu:1");
         await Assert.That(response.Data).IsNotNull().And.IsNotEmpty();
         await Assert.That(response.Data[0].Embedding).IsNotNull();
         await Assert.That(response.Data[0].Embedding.Count).IsEqualTo(1024);
@@ -178,7 +178,7 @@ public async Task Embedding_KnownValues_CapitalOfFrance()
 
         // Use tolerance for float32 model outputs which may vary across hardware
         const double tolerance = 1e-3;
-        await Assert.That(Math.Abs(embedding[0] - (-0.02815740555524826))).IsLessThanOrEqualTo(tolerance);
+        await Assert.That(Math.Abs(embedding[0] - (-0.035993535071611404))).IsLessThanOrEqualTo(tolerance);
         await Assert.That(Math.Abs(embedding[1023] - (-0.00887922290712595))).IsLessThanOrEqualTo(tolerance);
     }
 

diff --git a/sdk/cs/test/FoundryLocal.Tests/Utils.cs b/sdk/cs/test/FoundryLocal.Tests/Utils.cs
@@ -35,8 +35,7 @@ internal TestCatalogInfo(bool includeCuda)
 
     internal static readonly TestCatalogInfo TestCatalog = new(true);
 
-    [Before(Assembly)]
-    public static void AssemblyInit(AssemblyHookContext _)
+    static Utils()
     {
         using var loggerFactory = LoggerFactory.Create(builder =>
         {
@@ -266,7 +265,7 @@ private static List<ModelInfo> BuildTestCatalog(bool includeCuda = true)
                     PromptTemplate = common.PromptTemplate,
                     Publisher = common.Publisher, Task = common.Task,
                     FileSizeMb = common.FileSizeMb - 10,  // smaller so default chosen in test that sorts on this
-                    ModelSettings = common.ModelSettings, 
+                    ModelSettings = common.ModelSettings,
                     SupportsToolCalling = common.SupportsToolCalling,
                     License = common.License,
                     LicenseDescription = common.LicenseDescription,

diff --git a/sdk/js/test/openai/embeddingClient.test.ts b/sdk/js/test/openai/embeddingClient.test.ts
@@ -13,7 +13,7 @@ describe('Embedding Client Tests', () => {
         expect(cachedModels.length).to.be.greaterThan(0);
 
         const cachedVariant = cachedModels.find(m => m.alias === EMBEDDING_MODEL_ALIAS);
-        expect(cachedVariant, 'qwen3-0.6b-embedding-generic-cpu should be cached').to.not.be.undefined;
+        expect(cachedVariant, 'qwen3-embedding-0.6b-generic-cpu should be cached').to.not.be.undefined;
 
         const model = await catalog.getModel(EMBEDDING_MODEL_ALIAS);
         expect(model).to.not.be.undefined;

diff --git a/sdk/js/test/testUtils.ts b/sdk/js/test/testUtils.ts
@@ -44,7 +44,7 @@ export const TEST_CONFIG: FoundryLocalConfig = {
 };
 
 export const TEST_MODEL_ALIAS = 'qwen2.5-0.5b';
-export const EMBEDDING_MODEL_ALIAS = 'qwen3-0.6b-embedding-generic-cpu';
+export const EMBEDDING_MODEL_ALIAS = 'qwen3-embedding-0.6b-generic-cpu';
 
 export function getTestManager() {
     return FoundryLocalManager.create(TEST_CONFIG);

diff --git a/sdk/python/test/README.md b/sdk/python/test/README.md
@@ -76,4 +76,5 @@ Tests that require the web service are skipped when either `TF_BUILD=true` (Azur
 | Alias | Use | Variant |
 |---|---|---|
 | `qwen2.5-0.5b` | Chat completions | `qwen2.5-0.5b-instruct-generic-cpu:4` |
-| `whisper-tiny` | Audio transcription | `openai-whisper-tiny-generic-cpu:2` |
+| `whisper-tiny` | Audio transcription | `openai-whisper-tiny-generic-cpu:4` |
+| `qwen3-embedding-0.6b-generic-cpu` | Embeddings | `qwen3-embedding-0.6b-generic-cpu:1` |
diff --git a/sdk/python/test/conftest.py b/sdk/python/test/conftest.py
@@ -26,7 +26,7 @@
 
 TEST_MODEL_ALIAS = "qwen2.5-0.5b"
 AUDIO_MODEL_ALIAS = "whisper-tiny"
-EMBEDDING_MODEL_ALIAS = "qwen3-0.6b-embedding-generic-cpu"
+EMBEDDING_MODEL_ALIAS = "qwen3-embedding-0.6b-generic-cpu"
 
 def get_git_repo_root() -> Path:
     """Walk upward from __file__ until we find a .git directory."""

diff --git a/sdk/rust/tests/integration/common/mod.rs b/sdk/rust/tests/integration/common/mod.rs
@@ -15,7 +15,7 @@ pub const TEST_MODEL_ALIAS: &str = "qwen2.5-0.5b";
 pub const WHISPER_MODEL_ALIAS: &str = "whisper-tiny";
 
 /// Default model alias used for embedding integration tests.
-pub const EMBEDDING_MODEL_ALIAS: &str = "qwen3-0.6b-embedding-generic-cpu";
+pub const EMBEDDING_MODEL_ALIAS: &str = "qwen3-embedding-0.6b-generic-cpu";
 
 /// Expected transcription text fragment for the shared audio test file.
 pub const EXPECTED_TRANSCRIPTION_TEXT: &str =