microsoft · skottmckay · May 16, 2026 · May 16, 2026 · May 16, 2026 · May 16, 2026
diff --git a/samples/cs/embeddings/Program.cs b/samples/cs/embeddings/Program.cs
@@ -20,7 +20,7 @@
 var catalog = await mgr.GetCatalogAsync();
 
 // Get an embedding model
-var model = await catalog.GetModelAsync("qwen3-0.6b-embedding") ?? throw new Exception("Embedding model not found");
+var model = await catalog.GetModelAsync("qwen3-embedding-0.6b") ?? throw new Exception("Embedding model not found");
 
 // Download the model (the method skips download if already cached)
 await model.DownloadAsync(progress =>

diff --git a/samples/js/embeddings/app.js b/samples/js/embeddings/app.js
@@ -16,7 +16,7 @@ console.log('✓ SDK initialized successfully');
 
 // <model_setup>
 // Get an embedding model
-const modelAlias = 'qwen3-0.6b-embedding';
+const modelAlias = 'qwen3-embedding-0.6b';
 const model = await manager.catalog.getModel(modelAlias);
 
 // Download the model

diff --git a/samples/python/embeddings/src/app.py b/samples/python/embeddings/src/app.py
@@ -12,7 +12,7 @@ def main():
     manager = FoundryLocalManager.instance
 
     # Select and load an embedding model from the catalog
-    model = manager.catalog.get_model("qwen3-0.6b-embedding")
+    model = manager.catalog.get_model("qwen3-embedding-0.6b")
     model.download(
         lambda progress: print(
             f"\rDownloading model: {progress:.2f}%",

diff --git a/samples/rust/embeddings/src/main.rs b/samples/rust/embeddings/src/main.rs
@@ -6,7 +6,7 @@
 use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager};
 // </imports>
 
-const ALIAS: &str = "qwen3-0.6b-embedding";
+const ALIAS: &str = "qwen3-embedding-0.6b";
 
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {

diff --git a/sdk/cs/src/Detail/CoreInterop.NetStandard.cs b/sdk/cs/src/Detail/CoreInterop.NetStandard.cs
@@ -20,26 +20,33 @@ namespace Microsoft.AI.Foundry.Local.Detail;
 
 internal partial class CoreInterop
 {
-    [DllImport(LibraryName, EntryPoint = "execute_command", CallingConvention = CallingConvention.Cdecl)]
+    // Win32 LoadLibraryEx parses the last dot in a name as the file extension. Because LibraryName
+    // contains dots (e.g. "Microsoft.AI.Foundry.Local.Core"), the loader treats ".Core" as the
+    // extension and does NOT append ".dll", so resolution fails with ERROR_MOD_NOT_FOUND even after
+    // the DLL has been pre-loaded by full path. Including the explicit ".dll" suffix here forces the
+    // marshaller to look up the module under its actual loaded name.
+    private const string DllImportLibraryName = LibraryName + ".dll";
+
+    [DllImport(DllImportLibraryName, EntryPoint = "execute_command", CallingConvention = CallingConvention.Cdecl)]
     private static unsafe extern void CoreExecuteCommand(RequestBuffer* request, ResponseBuffer* response);
 
-    [DllImport(LibraryName, EntryPoint = "execute_command_with_callback", CallingConvention = CallingConvention.Cdecl)]
+    [DllImport(DllImportLibraryName, EntryPoint = "execute_command_with_callback", CallingConvention = CallingConvention.Cdecl)]
     private static unsafe extern void CoreExecuteCommandWithCallback(RequestBuffer* nativeRequest,
                                                                      ResponseBuffer* nativeResponse,
                                                                      nint callbackPtr,
                                                                      nint userData);
 
-    [DllImport(LibraryName, EntryPoint = "execute_command_with_binary", CallingConvention = CallingConvention.Cdecl)]
+    [DllImport(DllImportLibraryName, EntryPoint = "execute_command_with_binary", CallingConvention = CallingConvention.Cdecl)]
     private static unsafe extern void CoreExecuteCommandWithBinary(StreamingRequestBuffer* nativeRequest,
                                                                     ResponseBuffer* nativeResponse);
 
-    [DllImport(LibraryName, EntryPoint = "audio_stream_start", CallingConvention = CallingConvention.Cdecl)]
+    [DllImport(DllImportLibraryName, EntryPoint = "audio_stream_start", CallingConvention = CallingConvention.Cdecl)]
     private static unsafe extern void CoreAudioStreamStart(RequestBuffer* request, ResponseBuffer* response);
 
-    [DllImport(LibraryName, EntryPoint = "audio_stream_push", CallingConvention = CallingConvention.Cdecl)]
+    [DllImport(DllImportLibraryName, EntryPoint = "audio_stream_push", CallingConvention = CallingConvention.Cdecl)]
     private static unsafe extern void CoreAudioStreamPush(StreamingRequestBuffer* request, ResponseBuffer* response);
 
-    [DllImport(LibraryName, EntryPoint = "audio_stream_stop", CallingConvention = CallingConvention.Cdecl)]
+    [DllImport(DllImportLibraryName, EntryPoint = "audio_stream_stop", CallingConvention = CallingConvention.Cdecl)]
     private static unsafe extern void CoreAudioStreamStop(RequestBuffer* request, ResponseBuffer* response);
 
     [DllImport("kernel32", SetLastError = true, CharSet = CharSet.Unicode)]

diff --git a/sdk/cs/test/FoundryLocal.Tests/ChatCompletionsTests.cs b/sdk/cs/test/FoundryLocal.Tests/ChatCompletionsTests.cs
@@ -100,6 +100,11 @@ public async Task DirectChat_Streaming_Succeeds()
             await Assert.That(message.Role).IsEqualTo("assistant");
             await Assert.That(message.Content).IsNotNull();
             responseMessage.Append(message.Content);
+
+            if (response.Choices[0].FinishReason == "stop")
+            {
+                break; // test doesn't handle the usage response
+            }
         }
 
         var fullResponse = responseMessage.ToString();
@@ -124,6 +129,11 @@ public async Task DirectChat_Streaming_Succeeds()
             await Assert.That(message.Role).IsEqualTo("assistant");
             await Assert.That(message.Content).IsNotNull();
             responseMessage.Append(message.Content);
+
+            if (response.Choices[0].FinishReason == "stop")
+            {
+                break; // test doesn't handle the usage response
+            }
         }
 
         fullResponse = responseMessage.ToString();
@@ -271,15 +281,21 @@ public async Task DirectTool_Streaming_Succeeds()
             var content = response.Choices[0].Message.Content;
             await Assert.That(content).IsNotNull();
             Console.WriteLine($"Content in streaming: {content}, Finish reason: {response.Choices[0].FinishReason}");
+
             if (!string.IsNullOrEmpty(content))
             {
                 responseMessage.Append(content);
                 numTokens += 1;
             }
+
             if (response.Choices[0].FinishReason == "tool_calls")
             {
                 toolCallResponse = response;
             }
+            else if (response.Choices[0].FinishReason == "stop")
+            {
+                break; // test doesn't handle the usage response
+            }
         }
 
         // Check that the full response contains the expected tool call and that the tool call information is correct
@@ -330,6 +346,11 @@ public async Task DirectTool_Streaming_Succeeds()
             {
                 responseMessage.Append(content);
             }
+
+            if (response.Choices[0].FinishReason == "stop")
+            {
+                break; // test doesn't handle the usage response
+            }
         }
 
         // Check that the conversation continued

diff --git a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs
@@ -1,4 +1,4 @@
-// --------------------------------------------------------------------------------------------------------------------
+// --------------------------------------------------------------------------------------------------------------------
 // <copyright company="Microsoft">
 //   Copyright (c) Microsoft. All rights reserved.
 // </copyright>
@@ -22,10 +22,10 @@ public static async Task Setup()
         // Reduce max_length in the embedding model's genai_config.json to avoid OOM
         // when allocating the KV cache. Embedding models only need a single forward pass
         // so a large max_length is unnecessary.
-        Utils.PatchModelMaxLength("qwen3-0.6b-embedding-generic-cpu-1", "v1");
+        Utils.PatchModelMaxLength("qwen3-embedding-0.6b-generic-cpu-1", "v1");
 
         // Load the specific cached model variant directly
-        var model = await catalog.GetModelVariantAsync("qwen3-0.6b-embedding-generic-cpu:1").ConfigureAwait(false);
+        var model = await catalog.GetModelVariantAsync("qwen3-embedding-0.6b-generic-cpu:1").ConfigureAwait(false);
         await Assert.That(model).IsNotNull();
 
         await model!.LoadAsync().ConfigureAwait(false);
@@ -53,7 +53,7 @@ public async Task Embedding_BasicRequest_Succeeds()
                                              .ConfigureAwait(false);
 
         await Assert.That(response).IsNotNull();
-        await Assert.That(response.Model).IsEqualTo("qwen3-0.6b-embedding-generic-cpu:1");
+        await Assert.That(response.Model).IsEqualTo("qwen3-embedding-0.6b-generic-cpu:1");
         await Assert.That(response.Data).IsNotNull().And.IsNotEmpty();
         await Assert.That(response.Data[0].Embedding).IsNotNull();
         await Assert.That(response.Data[0].Embedding.Count).IsEqualTo(1024);
@@ -178,7 +178,7 @@ public async Task Embedding_KnownValues_CapitalOfFrance()
 
         // Use tolerance for float32 model outputs which may vary across hardware
         const double tolerance = 1e-3;
-        await Assert.That(Math.Abs(embedding[0] - (-0.02815740555524826))).IsLessThanOrEqualTo(tolerance);
+        await Assert.That(Math.Abs(embedding[0] - (-0.035993535071611404))).IsLessThanOrEqualTo(tolerance);
         await Assert.That(Math.Abs(embedding[1023] - (-0.00887922290712595))).IsLessThanOrEqualTo(tolerance);
     }
 

diff --git a/sdk/cs/test/FoundryLocal.Tests/SkipUnlessIntegrationAttribute.cs b/sdk/cs/test/FoundryLocal.Tests/SkipUnlessIntegrationAttribute.cs
@@ -1,4 +1,4 @@
-// --------------------------------------------------------------------------------------------------------------------
+// --------------------------------------------------------------------------------------------------------------------
 // <copyright company="Microsoft">
 //   Copyright (c) Microsoft. All rights reserved.
 // </copyright>
@@ -15,6 +15,7 @@ public class SkipUnlessIntegrationAttribute()
 {
     public override Task<bool> ShouldSkip(TestRegisteredContext context)
     {
-        return Task.FromResult(!Utils.IntegrationTestsAvailable);
+        var integrationTestsAvailable = Utils.IntegrationTestsAvailable;
+        return Task.FromResult(!integrationTestsAvailable);
     }
 }
diff --git a/sdk/cs/test/FoundryLocal.Tests/Utils.cs b/sdk/cs/test/FoundryLocal.Tests/Utils.cs
@@ -37,12 +37,18 @@ internal TestCatalogInfo(bool includeCuda)
 
     [Before(Assembly)]
     public static void AssemblyInit(AssemblyHookContext _)
+    {
+        // this is to ensure the static ctor is called
+        // there's also a path via SkipUnlessIntegrationAttribute that inits it for some tests not all
+        Console.WriteLine("AssemblyInit: IntegrationTestsAvailable = " + IntegrationTestsAvailable);
+    }
+
+    static Utils()
     {
         using var loggerFactory = LoggerFactory.Create(builder =>
         {
-            builder
-                .AddConsole()
-                .SetMinimumLevel(LogLevel.Debug);
+            builder.AddConsole()
+                   .SetMinimumLevel(LogLevel.Debug);
         });
 
         ILogger logger = loggerFactory.CreateLogger("FoundryLocal.Tests");
@@ -84,6 +90,7 @@ public static void AssemblyInit(AssemblyHookContext _)
             return;
         }
 
+
         try
         {
             var config = new Configuration
@@ -266,7 +273,7 @@ private static List<ModelInfo> BuildTestCatalog(bool includeCuda = true)
                     PromptTemplate = common.PromptTemplate,
                     Publisher = common.Publisher, Task = common.Task,
                     FileSizeMb = common.FileSizeMb - 10,  // smaller so default chosen in test that sorts on this
-                    ModelSettings = common.ModelSettings, 
+                    ModelSettings = common.ModelSettings,
                     SupportsToolCalling = common.SupportsToolCalling,
                     License = common.License,
                     LicenseDescription = common.LicenseDescription,

diff --git a/sdk/js/test/openai/embeddingClient.test.ts b/sdk/js/test/openai/embeddingClient.test.ts
@@ -13,7 +13,7 @@ describe('Embedding Client Tests', () => {
         expect(cachedModels.length).to.be.greaterThan(0);
 
         const cachedVariant = cachedModels.find(m => m.alias === EMBEDDING_MODEL_ALIAS);
-        expect(cachedVariant, 'qwen3-0.6b-embedding-generic-cpu should be cached').to.not.be.undefined;
+        expect(cachedVariant, 'qwen3-embedding-0.6b-generic-cpu should be cached').to.not.be.undefined;
 
         const model = await catalog.getModel(EMBEDDING_MODEL_ALIAS);
         expect(model).to.not.be.undefined;

diff --git a/sdk/js/test/testUtils.ts b/sdk/js/test/testUtils.ts
@@ -44,7 +44,7 @@ export const TEST_CONFIG: FoundryLocalConfig = {
 };
 
 export const TEST_MODEL_ALIAS = 'qwen2.5-0.5b';
-export const EMBEDDING_MODEL_ALIAS = 'qwen3-0.6b-embedding-generic-cpu';
+export const EMBEDDING_MODEL_ALIAS = 'qwen3-embedding-0.6b';
 
 export function getTestManager() {
     return FoundryLocalManager.create(TEST_CONFIG);

diff --git a/sdk/python/test/README.md b/sdk/python/test/README.md
@@ -76,4 +76,5 @@ Tests that require the web service are skipped when either `TF_BUILD=true` (Azur
 | Alias | Use | Variant |
 |---|---|---|
 | `qwen2.5-0.5b` | Chat completions | `qwen2.5-0.5b-instruct-generic-cpu:4` |
-| `whisper-tiny` | Audio transcription | `openai-whisper-tiny-generic-cpu:2` |
+| `whisper-tiny` | Audio transcription | `openai-whisper-tiny-generic-cpu:4` |
+| `qwen3-embedding-0.6b-generic-cpu` | Embeddings | `qwen3-embedding-0.6b-generic-cpu:1` |
diff --git a/sdk/python/test/conftest.py b/sdk/python/test/conftest.py
@@ -26,7 +26,7 @@
 
 TEST_MODEL_ALIAS = "qwen2.5-0.5b"
 AUDIO_MODEL_ALIAS = "whisper-tiny"
-EMBEDDING_MODEL_ALIAS = "qwen3-0.6b-embedding-generic-cpu"
+EMBEDDING_MODEL_ALIAS = "qwen3-embedding-0.6b-generic-cpu"
 
 def get_git_repo_root() -> Path:
     """Walk upward from __file__ until we find a .git directory."""

diff --git a/sdk/rust/tests/integration/common/mod.rs b/sdk/rust/tests/integration/common/mod.rs
@@ -15,7 +15,7 @@ pub const TEST_MODEL_ALIAS: &str = "qwen2.5-0.5b";
 pub const WHISPER_MODEL_ALIAS: &str = "whisper-tiny";
 
 /// Default model alias used for embedding integration tests.
-pub const EMBEDDING_MODEL_ALIAS: &str = "qwen3-0.6b-embedding-generic-cpu";
+pub const EMBEDDING_MODEL_ALIAS: &str = "qwen3-embedding-0.6b-generic-cpu";
 
 /// Expected transcription text fragment for the shared audio test file.
 pub const EXPECTED_TRANSCRIPTION_TEXT: &str =