Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion samples/cs/embeddings/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
var catalog = await mgr.GetCatalogAsync();

// Get an embedding model
var model = await catalog.GetModelAsync("qwen3-0.6b-embedding") ?? throw new Exception("Embedding model not found");
var model = await catalog.GetModelAsync("qwen3-embedding-0.6b") ?? throw new Exception("Embedding model not found");

// Download the model (the method skips download if already cached)
await model.DownloadAsync(progress =>
Expand Down
2 changes: 1 addition & 1 deletion samples/js/embeddings/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ console.log('✓ SDK initialized successfully');

// <model_setup>
// Get an embedding model
const modelAlias = 'qwen3-0.6b-embedding';
const modelAlias = 'qwen3-embedding-0.6b';
const model = await manager.catalog.getModel(modelAlias);

// Download the model
Expand Down
2 changes: 1 addition & 1 deletion samples/python/embeddings/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def main():
manager = FoundryLocalManager.instance

# Select and load an embedding model from the catalog
model = manager.catalog.get_model("qwen3-0.6b-embedding")
model = manager.catalog.get_model("qwen3-embedding-0.6b")
model.download(
lambda progress: print(
f"\rDownloading model: {progress:.2f}%",
Expand Down
2 changes: 1 addition & 1 deletion samples/rust/embeddings/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager};
// </imports>

const ALIAS: &str = "qwen3-0.6b-embedding";
const ALIAS: &str = "qwen3-embedding-0.6b";

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
Expand Down
19 changes: 13 additions & 6 deletions sdk/cs/src/Detail/CoreInterop.NetStandard.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,33 @@ namespace Microsoft.AI.Foundry.Local.Detail;

internal partial class CoreInterop
{
[DllImport(LibraryName, EntryPoint = "execute_command", CallingConvention = CallingConvention.Cdecl)]
// Win32 LoadLibraryEx parses the last dot in a name as the file extension. Because LibraryName
// contains dots (e.g. "Microsoft.AI.Foundry.Local.Core"), the loader treats ".Core" as the
// extension and does NOT append ".dll", so resolution fails with ERROR_MOD_NOT_FOUND even after
// the DLL has been pre-loaded by full path. Including the explicit ".dll" suffix here forces the
// marshaller to look up the module under its actual loaded name.
private const string DllImportLibraryName = LibraryName + ".dll";

[DllImport(DllImportLibraryName, EntryPoint = "execute_command", CallingConvention = CallingConvention.Cdecl)]
private static unsafe extern void CoreExecuteCommand(RequestBuffer* request, ResponseBuffer* response);

[DllImport(LibraryName, EntryPoint = "execute_command_with_callback", CallingConvention = CallingConvention.Cdecl)]
[DllImport(DllImportLibraryName, EntryPoint = "execute_command_with_callback", CallingConvention = CallingConvention.Cdecl)]
private static unsafe extern void CoreExecuteCommandWithCallback(RequestBuffer* nativeRequest,
ResponseBuffer* nativeResponse,
nint callbackPtr,
nint userData);

[DllImport(LibraryName, EntryPoint = "execute_command_with_binary", CallingConvention = CallingConvention.Cdecl)]
[DllImport(DllImportLibraryName, EntryPoint = "execute_command_with_binary", CallingConvention = CallingConvention.Cdecl)]
private static unsafe extern void CoreExecuteCommandWithBinary(StreamingRequestBuffer* nativeRequest,
ResponseBuffer* nativeResponse);

[DllImport(LibraryName, EntryPoint = "audio_stream_start", CallingConvention = CallingConvention.Cdecl)]
[DllImport(DllImportLibraryName, EntryPoint = "audio_stream_start", CallingConvention = CallingConvention.Cdecl)]
private static unsafe extern void CoreAudioStreamStart(RequestBuffer* request, ResponseBuffer* response);

[DllImport(LibraryName, EntryPoint = "audio_stream_push", CallingConvention = CallingConvention.Cdecl)]
[DllImport(DllImportLibraryName, EntryPoint = "audio_stream_push", CallingConvention = CallingConvention.Cdecl)]
private static unsafe extern void CoreAudioStreamPush(StreamingRequestBuffer* request, ResponseBuffer* response);

[DllImport(LibraryName, EntryPoint = "audio_stream_stop", CallingConvention = CallingConvention.Cdecl)]
[DllImport(DllImportLibraryName, EntryPoint = "audio_stream_stop", CallingConvention = CallingConvention.Cdecl)]
private static unsafe extern void CoreAudioStreamStop(RequestBuffer* request, ResponseBuffer* response);

[DllImport("kernel32", SetLastError = true, CharSet = CharSet.Unicode)]
Expand Down
21 changes: 21 additions & 0 deletions sdk/cs/test/FoundryLocal.Tests/ChatCompletionsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ public async Task DirectChat_Streaming_Succeeds()
await Assert.That(message.Role).IsEqualTo("assistant");
await Assert.That(message.Content).IsNotNull();
responseMessage.Append(message.Content);

if (response.Choices[0].FinishReason == "stop")
{
break; // test doesn't handle the usage response
}
}

var fullResponse = responseMessage.ToString();
Expand All @@ -124,6 +129,11 @@ public async Task DirectChat_Streaming_Succeeds()
await Assert.That(message.Role).IsEqualTo("assistant");
await Assert.That(message.Content).IsNotNull();
responseMessage.Append(message.Content);

if (response.Choices[0].FinishReason == "stop")
{
break; // test doesn't handle the usage response
}
}

fullResponse = responseMessage.ToString();
Expand Down Expand Up @@ -271,15 +281,21 @@ public async Task DirectTool_Streaming_Succeeds()
var content = response.Choices[0].Message.Content;
await Assert.That(content).IsNotNull();
Console.WriteLine($"Content in streaming: {content}, Finish reason: {response.Choices[0].FinishReason}");

if (!string.IsNullOrEmpty(content))
{
responseMessage.Append(content);
numTokens += 1;
}

if (response.Choices[0].FinishReason == "tool_calls")
{
toolCallResponse = response;
}
else if (response.Choices[0].FinishReason == "stop")
{
break; // test doesn't handle the usage response
}
}

// Check that the full response contains the expected tool call and that the tool call information is correct
Expand Down Expand Up @@ -330,6 +346,11 @@ public async Task DirectTool_Streaming_Succeeds()
{
responseMessage.Append(content);
}

if (response.Choices[0].FinishReason == "stop")
{
break; // test doesn't handle the usage response
}
}

// Check that the conversation continued
Expand Down
10 changes: 5 additions & 5 deletions sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// --------------------------------------------------------------------------------------------------------------------
// --------------------------------------------------------------------------------------------------------------------
// <copyright company="Microsoft">
// Copyright (c) Microsoft. All rights reserved.
// </copyright>
Expand All @@ -22,10 +22,10 @@ public static async Task Setup()
// Reduce max_length in the embedding model's genai_config.json to avoid OOM
// when allocating the KV cache. Embedding models only need a single forward pass
// so a large max_length is unnecessary.
Utils.PatchModelMaxLength("qwen3-0.6b-embedding-generic-cpu-1", "v1");
Utils.PatchModelMaxLength("qwen3-embedding-0.6b-generic-cpu-1", "v1");

// Load the specific cached model variant directly
var model = await catalog.GetModelVariantAsync("qwen3-0.6b-embedding-generic-cpu:1").ConfigureAwait(false);
var model = await catalog.GetModelVariantAsync("qwen3-embedding-0.6b-generic-cpu:1").ConfigureAwait(false);
await Assert.That(model).IsNotNull();

await model!.LoadAsync().ConfigureAwait(false);
Expand Down Expand Up @@ -53,7 +53,7 @@ public async Task Embedding_BasicRequest_Succeeds()
.ConfigureAwait(false);

await Assert.That(response).IsNotNull();
await Assert.That(response.Model).IsEqualTo("qwen3-0.6b-embedding-generic-cpu:1");
await Assert.That(response.Model).IsEqualTo("qwen3-embedding-0.6b-generic-cpu:1");
await Assert.That(response.Data).IsNotNull().And.IsNotEmpty();
await Assert.That(response.Data[0].Embedding).IsNotNull();
await Assert.That(response.Data[0].Embedding.Count).IsEqualTo(1024);
Expand Down Expand Up @@ -178,7 +178,7 @@ public async Task Embedding_KnownValues_CapitalOfFrance()

// Use tolerance for float32 model outputs which may vary across hardware
const double tolerance = 1e-3;
await Assert.That(Math.Abs(embedding[0] - (-0.02815740555524826))).IsLessThanOrEqualTo(tolerance);
await Assert.That(Math.Abs(embedding[0] - (-0.035993535071611404))).IsLessThanOrEqualTo(tolerance);
await Assert.That(Math.Abs(embedding[1023] - (-0.00887922290712595))).IsLessThanOrEqualTo(tolerance);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// --------------------------------------------------------------------------------------------------------------------
// --------------------------------------------------------------------------------------------------------------------
// <copyright company="Microsoft">
// Copyright (c) Microsoft. All rights reserved.
// </copyright>
Expand All @@ -15,6 +15,7 @@ public class SkipUnlessIntegrationAttribute()
{
public override Task<bool> ShouldSkip(TestRegisteredContext context)
{
return Task.FromResult(!Utils.IntegrationTestsAvailable);
var integrationTestsAvailable = Utils.IntegrationTestsAvailable;
return Task.FromResult(!integrationTestsAvailable);
}
}
15 changes: 11 additions & 4 deletions sdk/cs/test/FoundryLocal.Tests/Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,18 @@ internal TestCatalogInfo(bool includeCuda)

[Before(Assembly)]
public static void AssemblyInit(AssemblyHookContext _)
{
// this is to ensure the static ctor is called
// there's also a path via SkipUnlessIntegrationAttribute that inits it for some tests not all
Console.WriteLine("AssemblyInit: IntegrationTestsAvailable = " + IntegrationTestsAvailable);
}

static Utils()
{
using var loggerFactory = LoggerFactory.Create(builder =>
{
builder
.AddConsole()
.SetMinimumLevel(LogLevel.Debug);
builder.AddConsole()
.SetMinimumLevel(LogLevel.Debug);
});

ILogger logger = loggerFactory.CreateLogger("FoundryLocal.Tests");
Expand Down Expand Up @@ -84,6 +90,7 @@ public static void AssemblyInit(AssemblyHookContext _)
return;
}


try
{
var config = new Configuration
Expand Down Expand Up @@ -266,7 +273,7 @@ private static List<ModelInfo> BuildTestCatalog(bool includeCuda = true)
PromptTemplate = common.PromptTemplate,
Publisher = common.Publisher, Task = common.Task,
FileSizeMb = common.FileSizeMb - 10, // smaller so default chosen in test that sorts on this
ModelSettings = common.ModelSettings,
ModelSettings = common.ModelSettings,
SupportsToolCalling = common.SupportsToolCalling,
License = common.License,
LicenseDescription = common.LicenseDescription,
Expand Down
2 changes: 1 addition & 1 deletion sdk/js/test/openai/embeddingClient.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ describe('Embedding Client Tests', () => {
expect(cachedModels.length).to.be.greaterThan(0);

const cachedVariant = cachedModels.find(m => m.alias === EMBEDDING_MODEL_ALIAS);
expect(cachedVariant, 'qwen3-0.6b-embedding-generic-cpu should be cached').to.not.be.undefined;
expect(cachedVariant, 'qwen3-embedding-0.6b-generic-cpu should be cached').to.not.be.undefined;

const model = await catalog.getModel(EMBEDDING_MODEL_ALIAS);
expect(model).to.not.be.undefined;
Expand Down
2 changes: 1 addition & 1 deletion sdk/js/test/testUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ export const TEST_CONFIG: FoundryLocalConfig = {
};

export const TEST_MODEL_ALIAS = 'qwen2.5-0.5b';
export const EMBEDDING_MODEL_ALIAS = 'qwen3-0.6b-embedding-generic-cpu';
export const EMBEDDING_MODEL_ALIAS = 'qwen3-embedding-0.6b';

export function getTestManager() {
return FoundryLocalManager.create(TEST_CONFIG);
Expand Down
3 changes: 2 additions & 1 deletion sdk/python/test/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,5 @@ Tests that require the web service are skipped when either `TF_BUILD=true` (Azur
| Alias | Use | Variant |
|---|---|---|
| `qwen2.5-0.5b` | Chat completions | `qwen2.5-0.5b-instruct-generic-cpu:4` |
| `whisper-tiny` | Audio transcription | `openai-whisper-tiny-generic-cpu:2` |
| `whisper-tiny` | Audio transcription | `openai-whisper-tiny-generic-cpu:4` |
Comment thread
skottmckay marked this conversation as resolved.
| `qwen3-embedding-0.6b-generic-cpu` | Embeddings | `qwen3-embedding-0.6b-generic-cpu:1` |
2 changes: 1 addition & 1 deletion sdk/python/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

TEST_MODEL_ALIAS = "qwen2.5-0.5b"
AUDIO_MODEL_ALIAS = "whisper-tiny"
EMBEDDING_MODEL_ALIAS = "qwen3-0.6b-embedding-generic-cpu"
EMBEDDING_MODEL_ALIAS = "qwen3-embedding-0.6b-generic-cpu"

def get_git_repo_root() -> Path:
"""Walk upward from __file__ until we find a .git directory."""
Expand Down
2 changes: 1 addition & 1 deletion sdk/rust/tests/integration/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub const TEST_MODEL_ALIAS: &str = "qwen2.5-0.5b";
pub const WHISPER_MODEL_ALIAS: &str = "whisper-tiny";

/// Default model alias used for embedding integration tests.
pub const EMBEDDING_MODEL_ALIAS: &str = "qwen3-0.6b-embedding-generic-cpu";
pub const EMBEDDING_MODEL_ALIAS: &str = "qwen3-embedding-0.6b-generic-cpu";

/// Expected transcription text fragment for the shared audio test file.
pub const EXPECTED_TRANSCRIPTION_TEXT: &str =
Expand Down
Loading