Skip to content
Open
2 changes: 1 addition & 1 deletion samples/cs/embeddings/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
var catalog = await mgr.GetCatalogAsync();

// Get an embedding model
var model = await catalog.GetModelAsync("qwen3-0.6b-embedding") ?? throw new Exception("Embedding model not found");
var model = await catalog.GetModelAsync("qwen3-embedding-0.6b") ?? throw new Exception("Embedding model not found");

// Download the model (the method skips download if already cached)
await model.DownloadAsync(progress =>
Expand Down
2 changes: 1 addition & 1 deletion samples/js/embeddings/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ console.log('✓ SDK initialized successfully');

// <model_setup>
// Get an embedding model
const modelAlias = 'qwen3-0.6b-embedding';
const modelAlias = 'qwen3-embedding-0.6b';
const model = await manager.catalog.getModel(modelAlias);

// Download the model
Expand Down
2 changes: 1 addition & 1 deletion samples/python/embeddings/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def main():
manager = FoundryLocalManager.instance

# Select and load an embedding model from the catalog
model = manager.catalog.get_model("qwen3-0.6b-embedding")
model = manager.catalog.get_model("qwen3-embedding-0.6b")
model.download(
lambda progress: print(
f"\rDownloading model: {progress:.2f}%",
Expand Down
2 changes: 1 addition & 1 deletion samples/rust/embeddings/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager};
// </imports>

const ALIAS: &str = "qwen3-0.6b-embedding";
const ALIAS: &str = "qwen3-embedding-0.6b";

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
Expand Down
19 changes: 13 additions & 6 deletions sdk/cs/src/Detail/CoreInterop.NetStandard.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,33 @@ namespace Microsoft.AI.Foundry.Local.Detail;

internal partial class CoreInterop
{
[DllImport(LibraryName, EntryPoint = "execute_command", CallingConvention = CallingConvention.Cdecl)]
// Win32 LoadLibraryEx parses the last dot in a name as the file extension. Because LibraryName
// contains dots (e.g. "Microsoft.AI.Foundry.Local.Core"), the loader treats ".Core" as the
// extension and does NOT append ".dll", so resolution fails with ERROR_MOD_NOT_FOUND even after
// the DLL has been pre-loaded by full path. Including the explicit ".dll" suffix here forces the
// marshaller to look up the module under its actual loaded name.
private const string DllImportLibraryName = LibraryName + ".dll";

[DllImport(DllImportLibraryName, EntryPoint = "execute_command", CallingConvention = CallingConvention.Cdecl)]
private static unsafe extern void CoreExecuteCommand(RequestBuffer* request, ResponseBuffer* response);

[DllImport(LibraryName, EntryPoint = "execute_command_with_callback", CallingConvention = CallingConvention.Cdecl)]
[DllImport(DllImportLibraryName, EntryPoint = "execute_command_with_callback", CallingConvention = CallingConvention.Cdecl)]
private static unsafe extern void CoreExecuteCommandWithCallback(RequestBuffer* nativeRequest,
ResponseBuffer* nativeResponse,
nint callbackPtr,
nint userData);

[DllImport(LibraryName, EntryPoint = "execute_command_with_binary", CallingConvention = CallingConvention.Cdecl)]
[DllImport(DllImportLibraryName, EntryPoint = "execute_command_with_binary", CallingConvention = CallingConvention.Cdecl)]
private static unsafe extern void CoreExecuteCommandWithBinary(StreamingRequestBuffer* nativeRequest,
ResponseBuffer* nativeResponse);

[DllImport(LibraryName, EntryPoint = "audio_stream_start", CallingConvention = CallingConvention.Cdecl)]
[DllImport(DllImportLibraryName, EntryPoint = "audio_stream_start", CallingConvention = CallingConvention.Cdecl)]
private static unsafe extern void CoreAudioStreamStart(RequestBuffer* request, ResponseBuffer* response);

[DllImport(LibraryName, EntryPoint = "audio_stream_push", CallingConvention = CallingConvention.Cdecl)]
[DllImport(DllImportLibraryName, EntryPoint = "audio_stream_push", CallingConvention = CallingConvention.Cdecl)]
private static unsafe extern void CoreAudioStreamPush(StreamingRequestBuffer* request, ResponseBuffer* response);

[DllImport(LibraryName, EntryPoint = "audio_stream_stop", CallingConvention = CallingConvention.Cdecl)]
[DllImport(DllImportLibraryName, EntryPoint = "audio_stream_stop", CallingConvention = CallingConvention.Cdecl)]
private static unsafe extern void CoreAudioStreamStop(RequestBuffer* request, ResponseBuffer* response);

[DllImport("kernel32", SetLastError = true, CharSet = CharSet.Unicode)]
Expand Down
21 changes: 21 additions & 0 deletions sdk/cs/test/FoundryLocal.Tests/ChatCompletionsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ public async Task DirectChat_Streaming_Succeeds()
await Assert.That(message.Role).IsEqualTo("assistant");
await Assert.That(message.Content).IsNotNull();
responseMessage.Append(message.Content);

if (response.Choices[0].FinishReason == "stop")
{
break; // test doesn't handle the usage response
}
}

var fullResponse = responseMessage.ToString();
Expand All @@ -124,6 +129,11 @@ public async Task DirectChat_Streaming_Succeeds()
await Assert.That(message.Role).IsEqualTo("assistant");
await Assert.That(message.Content).IsNotNull();
responseMessage.Append(message.Content);

if (response.Choices[0].FinishReason == "stop")
{
break; // test doesn't handle the usage response
}
}

fullResponse = responseMessage.ToString();
Expand Down Expand Up @@ -271,15 +281,21 @@ public async Task DirectTool_Streaming_Succeeds()
var content = response.Choices[0].Message.Content;
await Assert.That(content).IsNotNull();
Console.WriteLine($"Content in streaming: {content}, Finish reason: {response.Choices[0].FinishReason}");

if (!string.IsNullOrEmpty(content))
{
responseMessage.Append(content);
numTokens += 1;
}

if (response.Choices[0].FinishReason == "tool_calls")
{
toolCallResponse = response;
}
else if (response.Choices[0].FinishReason == "stop")
{
break; // test doesn't handle the usage response
}
}

// Check that the full response contains the expected tool call and that the tool call information is correct
Expand Down Expand Up @@ -330,6 +346,11 @@ public async Task DirectTool_Streaming_Succeeds()
{
responseMessage.Append(content);
}

if (response.Choices[0].FinishReason == "stop")
{
break; // test doesn't handle the usage response
}
}

// Check that the conversation continued
Expand Down
10 changes: 5 additions & 5 deletions sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// --------------------------------------------------------------------------------------------------------------------
// --------------------------------------------------------------------------------------------------------------------
// <copyright company="Microsoft">
// Copyright (c) Microsoft. All rights reserved.
// </copyright>
Expand All @@ -22,10 +22,10 @@ public static async Task Setup()
// Reduce max_length in the embedding model's genai_config.json to avoid OOM
// when allocating the KV cache. Embedding models only need a single forward pass
// so a large max_length is unnecessary.
Utils.PatchModelMaxLength("qwen3-0.6b-embedding-generic-cpu-1", "v1");
Utils.PatchModelMaxLength("qwen3-embedding-0.6b-generic-cpu-1", "v1");

// Load the specific cached model variant directly
var model = await catalog.GetModelVariantAsync("qwen3-0.6b-embedding-generic-cpu:1").ConfigureAwait(false);
var model = await catalog.GetModelVariantAsync("qwen3-embedding-0.6b-generic-cpu:1").ConfigureAwait(false);
await Assert.That(model).IsNotNull();

await model!.LoadAsync().ConfigureAwait(false);
Expand Down Expand Up @@ -53,7 +53,7 @@ public async Task Embedding_BasicRequest_Succeeds()
.ConfigureAwait(false);

await Assert.That(response).IsNotNull();
await Assert.That(response.Model).IsEqualTo("qwen3-0.6b-embedding-generic-cpu:1");
await Assert.That(response.Model).IsEqualTo("qwen3-embedding-0.6b-generic-cpu:1");
await Assert.That(response.Data).IsNotNull().And.IsNotEmpty();
await Assert.That(response.Data[0].Embedding).IsNotNull();
await Assert.That(response.Data[0].Embedding.Count).IsEqualTo(1024);
Expand Down Expand Up @@ -178,7 +178,7 @@ public async Task Embedding_KnownValues_CapitalOfFrance()

// Use tolerance for float32 model outputs which may vary across hardware
const double tolerance = 1e-3;
await Assert.That(Math.Abs(embedding[0] - (-0.02815740555524826))).IsLessThanOrEqualTo(tolerance);
await Assert.That(Math.Abs(embedding[0] - (-0.035993535071611404))).IsLessThanOrEqualTo(tolerance);
await Assert.That(Math.Abs(embedding[1023] - (-0.00887922290712595))).IsLessThanOrEqualTo(tolerance);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// --------------------------------------------------------------------------------------------------------------------
// --------------------------------------------------------------------------------------------------------------------
// <copyright company="Microsoft">
// Copyright (c) Microsoft. All rights reserved.
// </copyright>
Expand All @@ -15,6 +15,7 @@ public class SkipUnlessIntegrationAttribute()
{
public override Task<bool> ShouldSkip(TestRegisteredContext context)
{
return Task.FromResult(!Utils.IntegrationTestsAvailable);
var integrationTestsAvailable = Utils.IntegrationTestsAvailable;
return Task.FromResult(!integrationTestsAvailable);
}
}
15 changes: 11 additions & 4 deletions sdk/cs/test/FoundryLocal.Tests/Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,18 @@ internal TestCatalogInfo(bool includeCuda)

[Before(Assembly)]
public static void AssemblyInit(AssemblyHookContext _)
{
// this is to ensure the static ctor is called
// there's also a path via SkipUnlessIntegrationAttribute that inits it for some tests not all
Console.WriteLine("AssemblyInit: IntegrationTestsAvailable = " + IntegrationTestsAvailable);
}

static Utils()
{
using var loggerFactory = LoggerFactory.Create(builder =>
{
builder
.AddConsole()
.SetMinimumLevel(LogLevel.Debug);
builder.AddConsole()
.SetMinimumLevel(LogLevel.Debug);
});

ILogger logger = loggerFactory.CreateLogger("FoundryLocal.Tests");
Expand Down Expand Up @@ -84,6 +90,7 @@ public static void AssemblyInit(AssemblyHookContext _)
return;
}


try
{
var config = new Configuration
Expand Down Expand Up @@ -266,7 +273,7 @@ private static List<ModelInfo> BuildTestCatalog(bool includeCuda = true)
PromptTemplate = common.PromptTemplate,
Publisher = common.Publisher, Task = common.Task,
FileSizeMb = common.FileSizeMb - 10, // smaller so default chosen in test that sorts on this
ModelSettings = common.ModelSettings,
ModelSettings = common.ModelSettings,
SupportsToolCalling = common.SupportsToolCalling,
License = common.License,
LicenseDescription = common.LicenseDescription,
Expand Down
2 changes: 1 addition & 1 deletion sdk/js/test/openai/embeddingClient.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ describe('Embedding Client Tests', () => {
expect(cachedModels.length).to.be.greaterThan(0);

const cachedVariant = cachedModels.find(m => m.alias === EMBEDDING_MODEL_ALIAS);
expect(cachedVariant, 'qwen3-0.6b-embedding-generic-cpu should be cached').to.not.be.undefined;
expect(cachedVariant, `${EMBEDDING_MODEL_ALIAS} should be cached`).to.not.be.undefined;

const model = await catalog.getModel(EMBEDDING_MODEL_ALIAS);
expect(model).to.not.be.undefined;
Expand Down
2 changes: 1 addition & 1 deletion sdk/js/test/testUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ export const TEST_CONFIG: FoundryLocalConfig = {
};

export const TEST_MODEL_ALIAS = 'qwen2.5-0.5b';
export const EMBEDDING_MODEL_ALIAS = 'qwen3-0.6b-embedding-generic-cpu';
export const EMBEDDING_MODEL_ALIAS = 'qwen3-embedding-0.6b';

export function getTestManager() {
return FoundryLocalManager.create(TEST_CONFIG);
Expand Down
3 changes: 2 additions & 1 deletion sdk/python/test/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,5 @@ Tests that require the web service are skipped when either `TF_BUILD=true` (Azur
| Alias | Use | Variant |
|---|---|---|
| `qwen2.5-0.5b` | Chat completions | `qwen2.5-0.5b-instruct-generic-cpu:4` |
| `whisper-tiny` | Audio transcription | `openai-whisper-tiny-generic-cpu:2` |
| `whisper-tiny` | Audio transcription | `openai-whisper-tiny-generic-cpu:4` |
Comment thread
skottmckay marked this conversation as resolved.
| `qwen3-embedding-0.6b` | Embeddings | `qwen3-embedding-0.6b-generic-cpu:1` |
2 changes: 1 addition & 1 deletion sdk/python/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

TEST_MODEL_ALIAS = "qwen2.5-0.5b"
AUDIO_MODEL_ALIAS = "whisper-tiny"
EMBEDDING_MODEL_ALIAS = "qwen3-0.6b-embedding-generic-cpu"
EMBEDDING_MODEL_ALIAS = "qwen3-embedding-0.6b"

def get_git_repo_root() -> Path:
"""Walk upward from __file__ until we find a .git directory."""
Expand Down
2 changes: 1 addition & 1 deletion sdk/rust/tests/integration/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub const TEST_MODEL_ALIAS: &str = "qwen2.5-0.5b";
pub const WHISPER_MODEL_ALIAS: &str = "whisper-tiny";

/// Default model alias used for embedding integration tests.
pub const EMBEDDING_MODEL_ALIAS: &str = "qwen3-0.6b-embedding-generic-cpu";
pub const EMBEDDING_MODEL_ALIAS: &str = "qwen3-embedding-0.6b";

/// Expected transcription text fragment for the shared audio test file.
pub const EXPECTED_TRANSCRIPTION_TEXT: &str =
Expand Down
Loading