diff --git a/samples/cs/embeddings/Program.cs b/samples/cs/embeddings/Program.cs index 348bc3461..724ab531e 100644 --- a/samples/cs/embeddings/Program.cs +++ b/samples/cs/embeddings/Program.cs @@ -20,7 +20,7 @@ var catalog = await mgr.GetCatalogAsync(); // Get an embedding model -var model = await catalog.GetModelAsync("qwen3-0.6b-embedding") ?? throw new Exception("Embedding model not found"); +var model = await catalog.GetModelAsync("qwen3-embedding-0.6b") ?? throw new Exception("Embedding model not found"); // Download the model (the method skips download if already cached) await model.DownloadAsync(progress => diff --git a/samples/js/embeddings/app.js b/samples/js/embeddings/app.js index ea6ff1858..b50a31a82 100644 --- a/samples/js/embeddings/app.js +++ b/samples/js/embeddings/app.js @@ -16,7 +16,7 @@ console.log('✓ SDK initialized successfully'); // // Get an embedding model -const modelAlias = 'qwen3-0.6b-embedding'; +const modelAlias = 'qwen3-embedding-0.6b'; const model = await manager.catalog.getModel(modelAlias); // Download the model diff --git a/samples/python/embeddings/src/app.py b/samples/python/embeddings/src/app.py index 30ade4b20..fcf36d201 100644 --- a/samples/python/embeddings/src/app.py +++ b/samples/python/embeddings/src/app.py @@ -12,7 +12,7 @@ def main(): manager = FoundryLocalManager.instance # Select and load an embedding model from the catalog - model = manager.catalog.get_model("qwen3-0.6b-embedding") + model = manager.catalog.get_model("qwen3-embedding-0.6b") model.download( lambda progress: print( f"\rDownloading model: {progress:.2f}%", diff --git a/samples/rust/embeddings/src/main.rs b/samples/rust/embeddings/src/main.rs index 9b5550f05..b76e53b01 100644 --- a/samples/rust/embeddings/src/main.rs +++ b/samples/rust/embeddings/src/main.rs @@ -6,7 +6,7 @@ use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager}; // -const ALIAS: &str = "qwen3-0.6b-embedding"; +const ALIAS: &str = "qwen3-embedding-0.6b"; #[tokio::main] async fn main() -> Result<(), Box> { diff --git a/sdk/cs/src/Detail/CoreInterop.NetStandard.cs b/sdk/cs/src/Detail/CoreInterop.NetStandard.cs index b96a258b1..547f25148 100644 --- a/sdk/cs/src/Detail/CoreInterop.NetStandard.cs +++ b/sdk/cs/src/Detail/CoreInterop.NetStandard.cs @@ -20,26 +20,33 @@ namespace Microsoft.AI.Foundry.Local.Detail; internal partial class CoreInterop { - [DllImport(LibraryName, EntryPoint = "execute_command", CallingConvention = CallingConvention.Cdecl)] + // Win32 LoadLibraryEx parses the last dot in a name as the file extension. Because LibraryName + // contains dots (e.g. "Microsoft.AI.Foundry.Local.Core"), the loader treats ".Core" as the + // extension and does NOT append ".dll", so resolution fails with ERROR_MOD_NOT_FOUND even after + // the DLL has been pre-loaded by full path. Including the explicit ".dll" suffix here forces the + // marshaller to look up the module under its actual loaded name. + private const string DllImportLibraryName = LibraryName + ".dll"; + + [DllImport(DllImportLibraryName, EntryPoint = "execute_command", CallingConvention = CallingConvention.Cdecl)] private static unsafe extern void CoreExecuteCommand(RequestBuffer* request, ResponseBuffer* response); - [DllImport(LibraryName, EntryPoint = "execute_command_with_callback", CallingConvention = CallingConvention.Cdecl)] + [DllImport(DllImportLibraryName, EntryPoint = "execute_command_with_callback", CallingConvention = CallingConvention.Cdecl)] private static unsafe extern void CoreExecuteCommandWithCallback(RequestBuffer* nativeRequest, ResponseBuffer* nativeResponse, nint callbackPtr, nint userData); - [DllImport(LibraryName, EntryPoint = "execute_command_with_binary", CallingConvention = CallingConvention.Cdecl)] + [DllImport(DllImportLibraryName, EntryPoint = "execute_command_with_binary", CallingConvention = CallingConvention.Cdecl)] private static unsafe extern void CoreExecuteCommandWithBinary(StreamingRequestBuffer* nativeRequest, ResponseBuffer* nativeResponse); - [DllImport(LibraryName, EntryPoint = "audio_stream_start", CallingConvention = CallingConvention.Cdecl)] + [DllImport(DllImportLibraryName, EntryPoint = "audio_stream_start", CallingConvention = CallingConvention.Cdecl)] private static unsafe extern void CoreAudioStreamStart(RequestBuffer* request, ResponseBuffer* response); - [DllImport(LibraryName, EntryPoint = "audio_stream_push", CallingConvention = CallingConvention.Cdecl)] + [DllImport(DllImportLibraryName, EntryPoint = "audio_stream_push", CallingConvention = CallingConvention.Cdecl)] private static unsafe extern void CoreAudioStreamPush(StreamingRequestBuffer* request, ResponseBuffer* response); - [DllImport(LibraryName, EntryPoint = "audio_stream_stop", CallingConvention = CallingConvention.Cdecl)] + [DllImport(DllImportLibraryName, EntryPoint = "audio_stream_stop", CallingConvention = CallingConvention.Cdecl)] private static unsafe extern void CoreAudioStreamStop(RequestBuffer* request, ResponseBuffer* response); [DllImport("kernel32", SetLastError = true, CharSet = CharSet.Unicode)] diff --git a/sdk/cs/test/FoundryLocal.Tests/ChatCompletionsTests.cs b/sdk/cs/test/FoundryLocal.Tests/ChatCompletionsTests.cs index 21578147b..13c1f5cfd 100644 --- a/sdk/cs/test/FoundryLocal.Tests/ChatCompletionsTests.cs +++ b/sdk/cs/test/FoundryLocal.Tests/ChatCompletionsTests.cs @@ -100,6 +100,11 @@ public async Task DirectChat_Streaming_Succeeds() await Assert.That(message.Role).IsEqualTo("assistant"); await Assert.That(message.Content).IsNotNull(); responseMessage.Append(message.Content); + + if (response.Choices[0].FinishReason == "stop") + { + break; // test doesn't handle the usage response + } } var fullResponse = responseMessage.ToString(); @@ -124,6 +129,11 @@ public async Task DirectChat_Streaming_Succeeds() await Assert.That(message.Role).IsEqualTo("assistant"); await Assert.That(message.Content).IsNotNull(); responseMessage.Append(message.Content); + + if (response.Choices[0].FinishReason == "stop") + { + break; // test doesn't handle the usage response + } } fullResponse = responseMessage.ToString(); @@ -271,15 +281,21 @@ public async Task DirectTool_Streaming_Succeeds() var content = response.Choices[0].Message.Content; await Assert.That(content).IsNotNull(); Console.WriteLine($"Content in streaming: {content}, Finish reason: {response.Choices[0].FinishReason}"); + if (!string.IsNullOrEmpty(content)) { responseMessage.Append(content); numTokens += 1; } + if (response.Choices[0].FinishReason == "tool_calls") { toolCallResponse = response; } + else if (response.Choices[0].FinishReason == "stop") + { + break; // test doesn't handle the usage response + } } // Check that the full response contains the expected tool call and that the tool call information is correct @@ -330,6 +346,11 @@ public async Task DirectTool_Streaming_Succeeds() { responseMessage.Append(content); } + + if (response.Choices[0].FinishReason == "stop") + { + break; // test doesn't handle the usage response + } } // Check that the conversation continued diff --git a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs index bed3a8ea5..f14a4e763 100644 --- a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs +++ b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs @@ -1,4 +1,4 @@ -// -------------------------------------------------------------------------------------------------------------------- +// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) Microsoft. All rights reserved. // @@ -22,10 +22,10 @@ public static async Task Setup() // Reduce max_length in the embedding model's genai_config.json to avoid OOM // when allocating the KV cache. Embedding models only need a single forward pass // so a large max_length is unnecessary. - Utils.PatchModelMaxLength("qwen3-0.6b-embedding-generic-cpu-1", "v1"); + Utils.PatchModelMaxLength("qwen3-embedding-0.6b-generic-cpu-1", "v1"); // Load the specific cached model variant directly - var model = await catalog.GetModelVariantAsync("qwen3-0.6b-embedding-generic-cpu:1").ConfigureAwait(false); + var model = await catalog.GetModelVariantAsync("qwen3-embedding-0.6b-generic-cpu:1").ConfigureAwait(false); await Assert.That(model).IsNotNull(); await model!.LoadAsync().ConfigureAwait(false); @@ -53,7 +53,7 @@ public async Task Embedding_BasicRequest_Succeeds() .ConfigureAwait(false); await Assert.That(response).IsNotNull(); - await Assert.That(response.Model).IsEqualTo("qwen3-0.6b-embedding-generic-cpu:1"); + await Assert.That(response.Model).IsEqualTo("qwen3-embedding-0.6b-generic-cpu:1"); await Assert.That(response.Data).IsNotNull().And.IsNotEmpty(); await Assert.That(response.Data[0].Embedding).IsNotNull(); await Assert.That(response.Data[0].Embedding.Count).IsEqualTo(1024); @@ -178,7 +178,7 @@ public async Task Embedding_KnownValues_CapitalOfFrance() // Use tolerance for float32 model outputs which may vary across hardware const double tolerance = 1e-3; - await Assert.That(Math.Abs(embedding[0] - (-0.02815740555524826))).IsLessThanOrEqualTo(tolerance); + await Assert.That(Math.Abs(embedding[0] - (-0.035993535071611404))).IsLessThanOrEqualTo(tolerance); await Assert.That(Math.Abs(embedding[1023] - (-0.00887922290712595))).IsLessThanOrEqualTo(tolerance); } diff --git a/sdk/cs/test/FoundryLocal.Tests/SkipUnlessIntegrationAttribute.cs b/sdk/cs/test/FoundryLocal.Tests/SkipUnlessIntegrationAttribute.cs index 7125c7654..268720668 100644 --- a/sdk/cs/test/FoundryLocal.Tests/SkipUnlessIntegrationAttribute.cs +++ b/sdk/cs/test/FoundryLocal.Tests/SkipUnlessIntegrationAttribute.cs @@ -1,4 +1,4 @@ -// -------------------------------------------------------------------------------------------------------------------- +// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) Microsoft. All rights reserved. // @@ -15,6 +15,7 @@ public class SkipUnlessIntegrationAttribute() { public override Task ShouldSkip(TestRegisteredContext context) { - return Task.FromResult(!Utils.IntegrationTestsAvailable); + var integrationTestsAvailable = Utils.IntegrationTestsAvailable; + return Task.FromResult(!integrationTestsAvailable); } } diff --git a/sdk/cs/test/FoundryLocal.Tests/Utils.cs b/sdk/cs/test/FoundryLocal.Tests/Utils.cs index f89698539..d6e450177 100644 --- a/sdk/cs/test/FoundryLocal.Tests/Utils.cs +++ b/sdk/cs/test/FoundryLocal.Tests/Utils.cs @@ -37,12 +37,18 @@ internal TestCatalogInfo(bool includeCuda) [Before(Assembly)] public static void AssemblyInit(AssemblyHookContext _) + { + // this is to ensure the static ctor is called + // there's also a path via SkipUnlessIntegrationAttribute that inits it for some tests not all + Console.WriteLine("AssemblyInit: IntegrationTestsAvailable = " + IntegrationTestsAvailable); + } + + static Utils() { using var loggerFactory = LoggerFactory.Create(builder => { - builder - .AddConsole() - .SetMinimumLevel(LogLevel.Debug); + builder.AddConsole() + .SetMinimumLevel(LogLevel.Debug); }); ILogger logger = loggerFactory.CreateLogger("FoundryLocal.Tests"); @@ -84,6 +90,7 @@ public static void AssemblyInit(AssemblyHookContext _) return; } + try { var config = new Configuration @@ -266,7 +273,7 @@ private static List BuildTestCatalog(bool includeCuda = true) PromptTemplate = common.PromptTemplate, Publisher = common.Publisher, Task = common.Task, FileSizeMb = common.FileSizeMb - 10, // smaller so default chosen in test that sorts on this - ModelSettings = common.ModelSettings, + ModelSettings = common.ModelSettings, SupportsToolCalling = common.SupportsToolCalling, License = common.License, LicenseDescription = common.LicenseDescription, diff --git a/sdk/js/test/openai/embeddingClient.test.ts b/sdk/js/test/openai/embeddingClient.test.ts index 968249646..9aeae83c2 100644 --- a/sdk/js/test/openai/embeddingClient.test.ts +++ b/sdk/js/test/openai/embeddingClient.test.ts @@ -13,7 +13,7 @@ describe('Embedding Client Tests', () => { expect(cachedModels.length).to.be.greaterThan(0); const cachedVariant = cachedModels.find(m => m.alias === EMBEDDING_MODEL_ALIAS); - expect(cachedVariant, 'qwen3-0.6b-embedding-generic-cpu should be cached').to.not.be.undefined; + expect(cachedVariant, 'qwen3-embedding-0.6b-generic-cpu should be cached').to.not.be.undefined; const model = await catalog.getModel(EMBEDDING_MODEL_ALIAS); expect(model).to.not.be.undefined; diff --git a/sdk/js/test/testUtils.ts b/sdk/js/test/testUtils.ts index 7cac6b293..066bce4d1 100644 --- a/sdk/js/test/testUtils.ts +++ b/sdk/js/test/testUtils.ts @@ -44,7 +44,7 @@ export const TEST_CONFIG: FoundryLocalConfig = { }; export const TEST_MODEL_ALIAS = 'qwen2.5-0.5b'; -export const EMBEDDING_MODEL_ALIAS = 'qwen3-0.6b-embedding-generic-cpu'; +export const EMBEDDING_MODEL_ALIAS = 'qwen3-embedding-0.6b'; export function getTestManager() { return FoundryLocalManager.create(TEST_CONFIG); diff --git a/sdk/python/test/README.md b/sdk/python/test/README.md index 4d60d5571..fd87c5c0c 100644 --- a/sdk/python/test/README.md +++ b/sdk/python/test/README.md @@ -76,4 +76,5 @@ Tests that require the web service are skipped when either `TF_BUILD=true` (Azur | Alias | Use | Variant | |---|---|---| | `qwen2.5-0.5b` | Chat completions | `qwen2.5-0.5b-instruct-generic-cpu:4` | -| `whisper-tiny` | Audio transcription | `openai-whisper-tiny-generic-cpu:2` | +| `whisper-tiny` | Audio transcription | `openai-whisper-tiny-generic-cpu:4` | +| `qwen3-embedding-0.6b-generic-cpu` | Embeddings | `qwen3-embedding-0.6b-generic-cpu:1` | diff --git a/sdk/python/test/conftest.py b/sdk/python/test/conftest.py index dc76a2372..1638454fa 100644 --- a/sdk/python/test/conftest.py +++ b/sdk/python/test/conftest.py @@ -26,7 +26,7 @@ TEST_MODEL_ALIAS = "qwen2.5-0.5b" AUDIO_MODEL_ALIAS = "whisper-tiny" -EMBEDDING_MODEL_ALIAS = "qwen3-0.6b-embedding-generic-cpu" +EMBEDDING_MODEL_ALIAS = "qwen3-embedding-0.6b-generic-cpu" def get_git_repo_root() -> Path: """Walk upward from __file__ until we find a .git directory.""" diff --git a/sdk/rust/tests/integration/common/mod.rs b/sdk/rust/tests/integration/common/mod.rs index 4e65e4eaf..16faffd87 100644 --- a/sdk/rust/tests/integration/common/mod.rs +++ b/sdk/rust/tests/integration/common/mod.rs @@ -15,7 +15,7 @@ pub const TEST_MODEL_ALIAS: &str = "qwen2.5-0.5b"; pub const WHISPER_MODEL_ALIAS: &str = "whisper-tiny"; /// Default model alias used for embedding integration tests. -pub const EMBEDDING_MODEL_ALIAS: &str = "qwen3-0.6b-embedding-generic-cpu"; +pub const EMBEDDING_MODEL_ALIAS: &str = "qwen3-embedding-0.6b-generic-cpu"; /// Expected transcription text fragment for the shared audio test file. pub const EXPECTED_TRANSCRIPTION_TEXT: &str =