diff --git a/.pipelines/templates/build-core-steps.yml b/.pipelines/templates/build-core-steps.yml index c21e0b921..e9a5fe4cb 100644 --- a/.pipelines/templates/build-core-steps.yml +++ b/.pipelines/templates/build-core-steps.yml @@ -58,7 +58,7 @@ steps: inputs: command: restore projects: '$(nsRoot)/src/FoundryLocalCore/Core/Core.csproj' - restoreArguments: '-r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.26100.0 /p:UseWinML=true' + restoreArguments: '-r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.18362.0 /p:UseWinML=true' feedsToUse: config nugetConfigPath: '$(nsRoot)/nuget.config' @@ -67,14 +67,14 @@ steps: inputs: command: build projects: '$(nsRoot)/src/FoundryLocalCore/Core/Core.csproj' - arguments: '--no-restore -r ${{ parameters.flavor }} -f net9.0-windows10.0.26100.0 /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.26100.0 /p:UseWinML=true' + arguments: '--no-restore -r ${{ parameters.flavor }} -f net9.0-windows10.0.18362.0 /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.18362.0 /p:UseWinML=true' - task: DotNetCoreCLI@2 displayName: 'Publish FLC AOT ${{ parameters.flavor }} (WinML)' inputs: command: publish projects: '$(nsRoot)/src/FoundryLocalCore/Core/Core.csproj' - arguments: '--no-restore --no-build -r ${{ parameters.flavor }} -f net9.0-windows10.0.26100.0 /p:Platform=${{ parameters.platform }} /p:Configuration=Release /p:PublishAot=true /p:NetTargetFramework=net9.0-windows10.0.26100.0 /p:UseWinML=true' + arguments: '--no-restore --no-build -r ${{ parameters.flavor }} -f net9.0-windows10.0.18362.0 /p:Platform=${{ parameters.platform }} /p:Configuration=Release /p:PublishAot=true /p:NetTargetFramework=net9.0-windows10.0.18362.0 /p:UseWinML=true' publishWebProjects: false zipAfterPublish: false @@ -84,7 +84,7 @@ steps: inputs: command: restore projects: '$(nsRoot)/test/FoundryLocalCore/Core/FoundryLocalCore.Tests.csproj' - restoreArguments: '-r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.26100.0 /p:UseWinML=true' + restoreArguments: '-r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.18362.0 /p:UseWinML=true' feedsToUse: config nugetConfigPath: '$(nsRoot)/nuget.config' @@ -93,7 +93,7 @@ steps: inputs: command: build projects: '$(nsRoot)/test/FoundryLocalCore/Core/FoundryLocalCore.Tests.csproj' - arguments: '--no-restore -r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.26100.0 /p:UseWinML=true' + arguments: '--no-restore -r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.18362.0 /p:UseWinML=true' - task: DotNetCoreCLI@2 displayName: 'Test FLC ${{ parameters.flavor }} (WinML)' @@ -170,18 +170,8 @@ steps: script: | $destDir = "$(Build.ArtifactStagingDirectory)/native" New-Item -ItemType Directory -Path $destDir -Force | Out-Null - # WinML publishes additional files (e.g. WindowsAppRuntime Bootstrapper DLLs) - # beyond Microsoft.AI.Foundry.Local.Core.*. - $isWinML = "${{ parameters.isWinML }}" -eq "True" - if ($isWinML) { - Get-ChildItem "$(nsRoot)/artifacts/publish" -Recurse -File | - Where-Object { $_.Name -like "Microsoft.AI.Foundry.Local.Core.*" -or $_.Name -eq "Microsoft.WindowsAppRuntime.Bootstrap.dll" } | - Copy-Item -Destination $destDir -Force - } else { - Get-ChildItem "$(nsRoot)/artifacts/publish" -Recurse -File | - Where-Object { $_.Name -like "Microsoft.AI.Foundry.Local.Core.*" } | - Copy-Item -Destination $destDir -Force - } + Get-ChildItem "$(nsRoot)/artifacts/publish" -Recurse -File | + Where-Object { $_.Name -like "Microsoft.AI.Foundry.Local.Core.*" } | + Copy-Item -Destination $destDir -Force Write-Host "Staged binaries:" Get-ChildItem $destDir | ForEach-Object { Write-Host " $($_.Name)" } - diff --git a/.pipelines/templates/package-core-steps.yml b/.pipelines/templates/package-core-steps.yml index e00a63167..61f6e16b0 100644 --- a/.pipelines/templates/package-core-steps.yml +++ b/.pipelines/templates/package-core-steps.yml @@ -39,6 +39,8 @@ steps: '@ $platforms = $platformsJson | ConvertFrom-Json + $isWinML = "${{ parameters.isWinML }}" -eq "True" + foreach ($p in $platforms) { $srcDir = "$(Pipeline.Workspace)/$($p.artifactName)" Write-Host "Looking for artifacts at: $srcDir" @@ -47,22 +49,63 @@ steps: } $destDir = "$unifiedPath/runtimes/$($p.name)/native" New-Item -ItemType Directory -Path $destDir -Force | Out-Null - # WinML artifacts include WindowsAppRuntime Bootstrapper DLLs in addition - # to Microsoft.AI.Foundry.Local.Core.*. - $isWinML = "${{ parameters.isWinML }}" -eq "True" - if ($isWinML) { - Get-ChildItem $srcDir -File | - Where-Object { $_.Name -like "Microsoft.AI.Foundry.Local.Core.*" -or $_.Name -eq "Microsoft.WindowsAppRuntime.Bootstrap.dll" } | - Copy-Item -Destination $destDir -Force - } else { - Get-ChildItem $srcDir -File | Where-Object { $_.Name -like "Microsoft.AI.Foundry.Local.Core.*" } | - Copy-Item -Destination $destDir -Force - } + Get-ChildItem $srcDir -File | Where-Object { $_.Name -like "Microsoft.AI.Foundry.Local.Core.*" } | + Copy-Item -Destination $destDir -Force Write-Host "Copied $($p.name) binaries to $destDir" } - # Copy build integration files from neutron-server $nsRoot = "$(nsRoot)" + + if ($isWinML) { + [xml]$propsXml = Get-Content "$nsRoot/Directory.Packages.props" + $winMLVer = [string]$propsXml.Project.PropertyGroup.WinMLVersion + if ([string]::IsNullOrWhiteSpace($winMLVer)) { + throw "Directory.Packages.props is missing WinMLVersion." + } + + $runtimePackageDir = "$(Build.ArtifactStagingDirectory)/winml-runtime-package" + New-Item -ItemType Directory -Path $runtimePackageDir -Force | Out-Null + + $nugetArgs = @( + 'install', 'Microsoft.Windows.AI.MachineLearning', + '-Version', $winMLVer, + '-Source', 'https://api.nuget.org/v3/index.json', + '-OutputDirectory', $runtimePackageDir, + '-ExcludeVersion', + '-NonInteractive', + '-DirectDownload' + ) + Write-Host "Running: nuget $($nugetArgs -join ' ')" + & nuget $nugetArgs + if ($LASTEXITCODE -ne 0) { throw "Failed to download Microsoft.Windows.AI.MachineLearning $winMLVer" } + + $runtimePackageRoot = Get-ChildItem $runtimePackageDir -Directory | + Where-Object { $_.Name -like "Microsoft.Windows.AI.MachineLearning*" } | + Select-Object -First 1 + if (-not $runtimePackageRoot) { + throw "nuget install did not produce a Microsoft.Windows.AI.MachineLearning package directory in $runtimePackageDir" + } + + foreach ($p in $platforms) { + if (-not $p.name.StartsWith("win-")) { + continue + } + + $runtimeDll = @( + "$($runtimePackageRoot.FullName)/runtimes/$($p.name)/Microsoft.Windows.AI.MachineLearning.dll", + "$($runtimePackageRoot.FullName)/runtimes/$($p.name)/native/Microsoft.Windows.AI.MachineLearning.dll" + ) | Where-Object { Test-Path $_ } | Select-Object -First 1 + if ([string]::IsNullOrWhiteSpace($runtimeDll)) { + throw "Microsoft.Windows.AI.MachineLearning $winMLVer does not contain a Microsoft.Windows.AI.MachineLearning.dll for $($p.name)" + } + + $destDir = "$unifiedPath/runtimes/$($p.name)/native" + Copy-Item $runtimeDll -Destination $destDir -Force + Write-Host "Copied WinML runtime DLL for $($p.name) to $destDir" + } + } + + # Copy build integration files from neutron-server foreach ($dir in @("build", "buildTransitive")) { $src = "$nsRoot/src/FoundryLocalCore/Core/$dir" if (Test-Path $src) { @@ -100,10 +143,14 @@ steps: if ("${{ parameters.isWinML }}" -eq "True") { $nuspec = "$nsRoot/src/FoundryLocalCore/Core/WinMLNuget.nuspec" $id = "Microsoft.AI.Foundry.Local.Core.WinML" - $ortVer = $pg.OnnxRuntimeFoundryVersionForWinML - $genaiVer = $pg.OnnxRuntimeGenAIFoundryVersion - $winAppSdkVer = $pg.WinAppSdkVersion - $props = "id=$id;version=$(flcVersion);commitId=$(Build.SourceVersion);OnnxRuntimeFoundryVersionForWinML=$ortVer;OnnxRuntimeGenAIFoundryVersion=$genaiVer;WinAppSdkVersion=$winAppSdkVer" + $ortVer = [string]$pg.OnnxRuntimeFoundryVersionForWinML + $genaiVer = [string]$pg.OnnxRuntimeGenAIFoundryVersion + $winMLVer = [string]$pg.WinMLVersion + if ([string]::IsNullOrWhiteSpace($ortVer)) { throw "Directory.Packages.props is missing OnnxRuntimeFoundryVersionForWinML." } + if ([string]::IsNullOrWhiteSpace($genaiVer)) { throw "Directory.Packages.props is missing OnnxRuntimeGenAIFoundryVersion." } + if ([string]::IsNullOrWhiteSpace($winMLVer)) { throw "Directory.Packages.props is missing WinMLVersion." } + + $props = "id=$id;version=$(flcVersion);commitId=$(Build.SourceVersion);OnnxRuntimeFoundryVersionForWinML=$ortVer;OnnxRuntimeGenAIFoundryVersion=$genaiVer;WinMLVersion=$winMLVer" } else { $nuspec = "$nsRoot/src/FoundryLocalCore/Core/NativeNuget.nuspec" $id = "Microsoft.AI.Foundry.Local.Core" @@ -266,12 +313,13 @@ steps: elseif ($parts.Count -eq 2) { "$($parts[0])$($parts[1])" } else { $parts[0] } - # Both standard and WinML write a deps_versions.json with identical key - # structure. The pipeline produces separate artifacts (deps-versions-standard - # / deps-versions-winml) so SDK stages pick the right one via isWinML. + # The pipeline produces separate dependency version artifacts + # (deps-versions-standard / deps-versions-winml), so SDK stages pick the + # right one via isWinML. if ($isWinML) { $deps = @{ 'foundry-local-core' = @{ nuget = "$(flcVersion)"; python = $pyVer } + 'windows-ai-machinelearning' = @{ version = [string]$pg.WinMLVersion } onnxruntime = @{ version = [string]$pg.OnnxRuntimeFoundryVersionForWinML } 'onnxruntime-genai' = @{ version = [string]$pg.OnnxRuntimeGenAIFoundryVersion } } diff --git a/.pipelines/templates/update-deps-versions-steps.yml b/.pipelines/templates/update-deps-versions-steps.yml index 9d489ab7e..6f0ebcc34 100644 --- a/.pipelines/templates/update-deps-versions-steps.yml +++ b/.pipelines/templates/update-deps-versions-steps.yml @@ -1,6 +1,6 @@ # Shared template to update deps_versions.json / deps_versions_winml.json -# from pipeline artifacts. Both files use identical key structure — the -# isWinML parameter determines which file gets overwritten. +# from pipeline artifacts. The isWinML parameter determines which file gets +# overwritten. parameters: - name: repoRoot type: string @@ -39,3 +39,6 @@ steps: Write-Host " FLC Core (Python): $($deps.'foundry-local-core'.python)" Write-Host " OnnxRuntime: $($deps.onnxruntime.version)" Write-Host " GenAI: $($deps.'onnxruntime-genai'.version)" + if ($isWinML -and $deps.'windows-ai-machinelearning') { + Write-Host " Windows AI ML: $($deps.'windows-ai-machinelearning'.version)" + } diff --git a/samples/README.md b/samples/README.md index bed7e41c1..ebd1afb8c 100644 --- a/samples/README.md +++ b/samples/README.md @@ -8,7 +8,8 @@ Explore complete working examples that demonstrate how to use Foundry Local — | Language | Samples | Description | |----------|---------|-------------| -| [**C#**](cs/) | 13 | .NET SDK samples including native chat, embeddings, audio transcription, tool calling, model management, web server, and tutorials. Uses WinML on Windows for hardware acceleration. | -| [**JavaScript**](js/) | 13 | Node.js SDK samples including native chat, embeddings, audio transcription, Electron desktop app, Copilot SDK integration, LangChain, tool calling, web server, and tutorials. | -| [**Python**](python/) | 11 | Python samples using the OpenAI-compatible API, including chat, embeddings, audio transcription, LangChain integration, tool calling, web server, Responses API, and tutorials. | -| [**Rust**](rust/) | 9 | Rust SDK samples including native chat, embeddings, audio transcription, tool calling, web server, and tutorials. | +| [**C#**](cs/) | 13 | .NET SDK samples including native chat, embeddings, audio transcription, tool calling, model management, web server, tutorials, and WinML EP verification. Uses WinML on Windows for hardware acceleration. | +| [**JavaScript**](js/) | 15 | Node.js SDK samples including native chat, embeddings, audio transcription, Electron desktop app, Copilot SDK integration, LangChain, tool calling, web server, tutorials, and WinML EP verification. | +| [**Python**](python/) | 14 | Python samples using the OpenAI-compatible API, including chat, embeddings, audio transcription, LangChain integration, tool calling, web server, Responses API, tutorials, and WinML EP verification. | +| [**Rust**](rust/) | 11 | Rust SDK samples including native chat, embeddings, audio transcription, tool calling, web server, tutorials, and WinML EP verification. | +| [**C++**](cpp/) | 1 | C++ sample for live audio transcription. | diff --git a/samples/cs/audio-transcription-example/AudioTranscriptionExample.csproj b/samples/cs/audio-transcription-example/AudioTranscriptionExample.csproj index bd42e38b5..4f048e152 100644 --- a/samples/cs/audio-transcription-example/AudioTranscriptionExample.csproj +++ b/samples/cs/audio-transcription-example/AudioTranscriptionExample.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/embeddings/Embeddings.csproj b/samples/cs/embeddings/Embeddings.csproj index 4d948c56b..870c34acd 100644 --- a/samples/cs/embeddings/Embeddings.csproj +++ b/samples/cs/embeddings/Embeddings.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/foundry-local-web-server/FoundryLocalWebServer.csproj b/samples/cs/foundry-local-web-server/FoundryLocalWebServer.csproj index fe890be2d..a7c1a3766 100644 --- a/samples/cs/foundry-local-web-server/FoundryLocalWebServer.csproj +++ b/samples/cs/foundry-local-web-server/FoundryLocalWebServer.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/live-audio-transcription/LiveAudioTranscriptionExample.csproj b/samples/cs/live-audio-transcription/LiveAudioTranscriptionExample.csproj index 3d91b6773..1a276b73d 100644 --- a/samples/cs/live-audio-transcription/LiveAudioTranscriptionExample.csproj +++ b/samples/cs/live-audio-transcription/LiveAudioTranscriptionExample.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/model-management-example/ModelManagementExample.csproj b/samples/cs/model-management-example/ModelManagementExample.csproj index 4d948c56b..870c34acd 100644 --- a/samples/cs/model-management-example/ModelManagementExample.csproj +++ b/samples/cs/model-management-example/ModelManagementExample.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/native-chat-completions/NativeChatCompletions.csproj b/samples/cs/native-chat-completions/NativeChatCompletions.csproj index 4d948c56b..870c34acd 100644 --- a/samples/cs/native-chat-completions/NativeChatCompletions.csproj +++ b/samples/cs/native-chat-completions/NativeChatCompletions.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.csproj b/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.csproj index 4d948c56b..870c34acd 100644 --- a/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.csproj +++ b/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.csproj b/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.csproj index fe890be2d..a7c1a3766 100644 --- a/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.csproj +++ b/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.csproj b/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.csproj index a3533047e..f07da7a75 100644 --- a/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.csproj +++ b/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.csproj b/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.csproj index a3533047e..f07da7a75 100644 --- a/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.csproj +++ b/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/tutorial-tool-calling/TutorialToolCalling.csproj b/samples/cs/tutorial-tool-calling/TutorialToolCalling.csproj index a3533047e..f07da7a75 100644 --- a/samples/cs/tutorial-tool-calling/TutorialToolCalling.csproj +++ b/samples/cs/tutorial-tool-calling/TutorialToolCalling.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.csproj b/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.csproj index a3533047e..f07da7a75 100644 --- a/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.csproj +++ b/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/verify-winml/Program.cs b/samples/cs/verify-winml/Program.cs new file mode 100644 index 000000000..27a141296 --- /dev/null +++ b/samples/cs/verify-winml/Program.cs @@ -0,0 +1,278 @@ +/// +/// Foundry Local SDK - WinML 2.0 EP Verification (C#) +/// +/// Verifies: +/// 1. Execution providers are discovered and registered +/// 2. Accelerated models appear in catalog after EP registration +/// 3. Streaming chat completions work on an accelerated model +/// + +using Microsoft.AI.Foundry.Local; +using Microsoft.Extensions.Logging; +using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels; + +const string PASS = "\x1b[92m[PASS]\x1b[0m"; +const string FAIL = "\x1b[91m[FAIL]\x1b[0m"; +const string INFO = "\x1b[94m[INFO]\x1b[0m"; +const string WARN = "\x1b[93m[WARN]\x1b[0m"; + +var results = new List<(string Name, bool Passed)>(); + +void LogResult(string testName, bool passed, string detail = "") +{ + var status = passed ? PASS : FAIL; + var msg = string.IsNullOrEmpty(detail) ? $"{status} {testName}" : $"{status} {testName} - {detail}"; + Console.WriteLine(msg); + results.Add((testName, passed)); +} + +void PrintSeparator(string title) +{ + Console.WriteLine($"\n{new string('=', 60)}"); + Console.WriteLine($" {title}"); + Console.WriteLine($"{new string('=', 60)}\n"); +} + +void PrintSummary() +{ + PrintSeparator("Summary"); + var passed = results.Count(r => r.Passed); + foreach (var (name, p) in results) + { + Console.WriteLine($" {(p ? "✓" : "✗")} {name}"); + } + + Console.WriteLine($"\n {passed}/{results.Count} tests passed"); +} + +bool IsAcceleratedVariant(IModel model) +{ + var runtime = model.Info?.Runtime; + return runtime != null && (runtime.DeviceType == DeviceType.GPU || runtime.DeviceType == DeviceType.NPU); +} + +CancellationToken ct = CancellationToken.None; + +// ── 0. Initialize FoundryLocalManager ────────────────────── +PrintSeparator("Initialization"); +var config = new Configuration +{ + AppName = "verify_winml", + LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information +}; + +using var loggerFactory = LoggerFactory.Create(builder => + builder.SetMinimumLevel(Microsoft.Extensions.Logging.LogLevel.Information)); +var logger = loggerFactory.CreateLogger(); + +await FoundryLocalManager.CreateAsync(config, logger); +var mgr = FoundryLocalManager.Instance; +Console.WriteLine($"{INFO} FoundryLocalManager initialized."); + +// ── 1. Discover & Register EPs ──────────────────────────── +PrintSeparator("Step 1: Discover & Register Execution Providers"); +EpInfo[] eps = []; +try +{ + eps = mgr.DiscoverEps(); + Console.WriteLine($"{INFO} Discovered {eps.Length} execution providers:"); + foreach (var ep in eps) + { + Console.WriteLine($" - {ep.Name,-40} Registered: {ep.IsRegistered}"); + } + + LogResult("EP Discovery", true, $"{eps.Length} EP(s) found"); +} +catch (Exception e) +{ + LogResult("EP Discovery", false, e.Message); +} + +if (eps.Length == 0) +{ + var detail = "No execution providers discovered on this machine"; + LogResult("EP Download & Registration", false, detail); + Console.WriteLine($"\n{FAIL} {detail}."); + PrintSummary(); + return; +} + +try +{ + string? currentProgressEp = null; + var currentProgressPercent = -1d; + + var epResult = await mgr.DownloadAndRegisterEpsAsync( + new Action((epName, percent) => + { + if (currentProgressEp != null && + (!epName.Equals(currentProgressEp, StringComparison.OrdinalIgnoreCase) || percent < currentProgressPercent)) + { + Console.WriteLine(); + } + + currentProgressEp = epName; + currentProgressPercent = percent; + Console.Write($"\r Downloading {epName}: {percent:F1}%"); + }), + ct); + + if (currentProgressEp != null) + { + Console.WriteLine(); + } + + Console.WriteLine($"{INFO} EP registration: success={epResult.Success}, status={epResult.Status}"); + if (epResult.RegisteredEps?.Any() == true) + { + Console.WriteLine($" Registered: {string.Join(", ", epResult.RegisteredEps)}"); + } + + if (epResult.FailedEps?.Any() == true) + { + Console.WriteLine($" Failed: {string.Join(", ", epResult.FailedEps)}"); + } + + var downloadOk = epResult.Success; + var detail = downloadOk && epResult.RegisteredEps?.Any() == true + ? $"{epResult.RegisteredEps.Length} EP(s) registered" + : epResult.Status; + LogResult("EP Download & Registration", downloadOk, detail); + if (!downloadOk) + { + PrintSummary(); + return; + } +} +catch (Exception e) +{ + Console.WriteLine(); + LogResult("EP Download & Registration", false, e.Message); + PrintSummary(); + return; +} + +// ── 2. List Models & Find Accelerated Variants ──────────── +PrintSeparator("Step 2: Model Catalog - Accelerated Models"); +var catalog = await mgr.GetCatalogAsync(); +var models = await catalog.ListModelsAsync(); +Console.WriteLine($"{INFO} Total models in catalog: {models.Count}"); + +var acceleratedVariants = new List(); +foreach (var model in models) +{ + foreach (var variant in model.Variants) + { + if (IsAcceleratedVariant(variant)) + { + acceleratedVariants.Add(variant); + var runtime = variant.Info?.Runtime; + Console.WriteLine($" - {variant.Id,-50} Device: {runtime?.DeviceType,-3} EP: {runtime?.ExecutionProvider ?? "?"}"); + } + } +} + +LogResult("Catalog - Accelerated models found", acceleratedVariants.Count > 0, + acceleratedVariants.Count > 0 ? $"{acceleratedVariants.Count} accelerated variant(s)" : "No accelerated model variants"); + +if (acceleratedVariants.Count == 0) +{ + Console.WriteLine($"\n{FAIL} No accelerated model variants are available."); + Console.WriteLine($"{WARN} Ensure the system has a compatible accelerator and matching model variants installed."); + PrintSummary(); + return; +} + +// ── 3. Download & Load Model ────────────────────────────── +PrintSeparator("Step 3: Download & Load Model"); +IModel? chosen = null; +Exception? lastLoadError = null; +var downloadedAny = false; + +foreach (var candidate in acceleratedVariants) +{ + var ep = candidate.Info?.Runtime?.ExecutionProvider ?? "unknown"; + Console.WriteLine($"\n{INFO} Trying model: {candidate.Id} (EP: {ep})"); + + try + { + await candidate.DownloadAsync(progress => + Console.Write($"\r Downloading model: {progress:F1}%")); + Console.WriteLine(); + downloadedAny = true; + } + catch (Exception e) + { + Console.WriteLine(); + Console.WriteLine($"{WARN} Skipping {candidate.Id}: download failed: {e.Message}"); + lastLoadError = e; + continue; + } + + try + { + await candidate.LoadAsync(); + chosen = candidate; + break; + } + catch (Exception e) + { + Console.WriteLine($"{WARN} Skipping {candidate.Id}: load failed: {e.Message}"); + lastLoadError = e; + } +} + +LogResult("Model Download", downloadedAny, + downloadedAny ? "At least one accelerated variant downloaded" : lastLoadError?.Message ?? "No accelerated variant could be downloaded"); + +if (chosen == null) +{ + LogResult("Model Load", false, + lastLoadError?.Message ?? "No accelerated variant could be loaded on this machine"); + PrintSummary(); + return; +} + +LogResult("Model Load", true, $"Loaded {chosen.Id}"); + +// ── 4. Streaming Chat Completions (Native SDK) ──────────── +PrintSeparator("Step 4: Streaming Chat Completions (Native)"); +try +{ + var chatClient = await chosen.GetChatClientAsync(); + chatClient.Settings.Temperature = 0; + chatClient.Settings.MaxTokens = 16; + var messages = new List + { + new() { Role = "system", Content = "You are a helpful assistant." }, + new() { Role = "user", Content = "What is 2 + 2? Reply with just the number." }, + }; + + var fullResponse = ""; + var start = DateTime.UtcNow; + await foreach (var chunk in chatClient.CompleteChatStreamingAsync(messages, ct)) + { + var content = chunk.Choices?.FirstOrDefault()?.Message?.Content; + if (!string.IsNullOrEmpty(content)) + { + Console.Write(content); + Console.Out.Flush(); + fullResponse += content; + } + } + + var elapsed = (DateTime.UtcNow - start).TotalSeconds; + Console.WriteLine(); + LogResult("Streaming Chat (Native)", fullResponse.Length > 0, + $"{fullResponse.Length} chars in {elapsed:F2}s"); +} +catch (Exception e) +{ + LogResult("Streaming Chat (Native)", false, e.Message); +} + +// ── Summary ────────────────────────────────────────────── +PrintSummary(); + +await chosen.UnloadAsync(); +Console.WriteLine("Model unloaded. Done!"); diff --git a/samples/cs/verify-winml/README.md b/samples/cs/verify-winml/README.md new file mode 100644 index 000000000..88540fbc8 --- /dev/null +++ b/samples/cs/verify-winml/README.md @@ -0,0 +1,21 @@ +# Verify WinML 2.0 Execution Providers (C#) + +This sample verifies that WinML 2.0 execution providers are correctly discovered, +downloaded, and registered using the Foundry Local C# SDK. It uses registered WinML +EP-backed model variants and finishes with one native streaming chat check. + +## Prerequisites + +- Windows with a compatible GPU +- .NET 9.0 SDK + +## Build & Run + +This sample uses the `Microsoft.AI.Foundry.Local.WinML` SDK package selected by +the shared central package versions. The SDK package owns its native +`Microsoft.AI.Foundry.Local.Core.WinML` dependency, so it restores the matching +Core package transitively. + +```bash +dotnet run +``` diff --git a/samples/cs/verify-winml/VerifyWinML.csproj b/samples/cs/verify-winml/VerifyWinML.csproj new file mode 100644 index 000000000..151f655f3 --- /dev/null +++ b/samples/cs/verify-winml/VerifyWinML.csproj @@ -0,0 +1,38 @@ + + + + Exe + enable + enable + + + + net9.0-windows10.0.18362.0 + x64;ARM64 + None + false + + + + net9.0 + + + + $(NETCoreSdkRuntimeIdentifier) + + + + + + + + + + + + + + + + + diff --git a/samples/js/verify-winml/README.md b/samples/js/verify-winml/README.md new file mode 100644 index 000000000..a5e52921a --- /dev/null +++ b/samples/js/verify-winml/README.md @@ -0,0 +1,26 @@ +# Verify WinML 2.0 Execution Providers (JavaScript) + +This sample verifies that WinML 2.0 execution providers are correctly discovered, +downloaded, and registered using the Foundry Local JavaScript SDK. It uses registered +WinML EP-backed model variants and finishes with one native streaming chat check. + +## Prerequisites + +- Windows with a compatible GPU +- Node.js 20+ + +## Setup + +`package.json` installs the repo-local `foundry-local-sdk` package and then +runs its WinML installer script, so the sample always uses the current +branch's WinML artifact pins: + +```bash +npm install +``` + +## Run + +```bash +node app.js +``` diff --git a/samples/js/verify-winml/app.js b/samples/js/verify-winml/app.js new file mode 100644 index 000000000..e7f947774 --- /dev/null +++ b/samples/js/verify-winml/app.js @@ -0,0 +1,240 @@ +/** + * Foundry Local SDK - WinML 2.0 EP Verification Script (JavaScript) + * + * Verifies: + * 1. Execution providers are discovered and registered + * 2. Accelerated models appear in catalog after EP registration + * 3. Streaming chat completions work on an accelerated model + */ + +import { FoundryLocalManager } from "foundry-local-sdk"; + +const PASS = "\x1b[92m[PASS]\x1b[0m"; +const FAIL = "\x1b[91m[FAIL]\x1b[0m"; +const INFO = "\x1b[94m[INFO]\x1b[0m"; +const WARN = "\x1b[93m[WARN]\x1b[0m"; + +const results = []; + +function logResult(testName, passed, detail = "") { + const status = passed ? PASS : FAIL; + const msg = detail ? `${status} ${testName} - ${detail}` : `${status} ${testName}`; + console.log(msg); + results.push({ testName, passed }); +} + +function printSeparator(title) { + console.log(`\n${"=".repeat(60)}`); + console.log(` ${title}`); + console.log(`${"=".repeat(60)}\n`); +} + +function isAcceleratedVariant(variant) { + const runtime = variant.info?.runtime; + return Boolean(runtime && ["GPU", "NPU"].includes(runtime.deviceType)); +} + +async function main() { + // ── 0. Initialize FoundryLocalManager ────────────────────── + printSeparator("Initialization"); + const manager = FoundryLocalManager.create({ + appName: "verify_winml", + logLevel: "info", + }); + console.log(`${INFO} FoundryLocalManager initialized.`); + + // ── 1. Discover & Register EPs ──────────────────────────── + printSeparator("Step 1: Discover & Register Execution Providers"); + let eps = []; + try { + eps = manager.discoverEps(); + console.log(`${INFO} Discovered ${eps.length} execution providers:`); + for (const ep of eps) { + console.log(` - ${ep.name.padEnd(40)} Registered: ${ep.isRegistered}`); + } + logResult("EP Discovery", true, `${eps.length} EP(s) found`); + } catch (e) { + logResult("EP Discovery", false, e.message); + } + + if (!eps.length) { + const detail = "No execution providers discovered on this machine"; + logResult("EP Download & Registration", false, detail); + console.log(`\n${FAIL} ${detail}.`); + printSummary(); + return; + } + + try { + let lastProgressEp = null; + let lastProgressPercent = -1; + const result = await manager.downloadAndRegisterEps((epName, percent) => { + if (lastProgressEp && (lastProgressEp !== epName || percent < lastProgressPercent)) { + process.stdout.write("\n"); + } + lastProgressEp = epName; + lastProgressPercent = percent; + process.stdout.write(`\r Downloading ${epName}: ${percent.toFixed(1)}%`); + }); + if (lastProgressEp) { + console.log(); + } + + console.log(`${INFO} EP registration result: success=${result.success}, status=${result.status}`); + if (result.registeredEps?.length) { + console.log(` Registered: ${result.registeredEps.join(", ")}`); + } + if (result.failedEps?.length) { + console.log(` Failed: ${result.failedEps.join(", ")}`); + } + + const downloadOk = result.success; + const detail = downloadOk && result.registeredEps?.length + ? `${result.registeredEps.length} EP(s) registered` + : result.status; + logResult("EP Download & Registration", downloadOk, detail); + if (!downloadOk) { + printSummary(); + return; + } + } catch (e) { + console.log(); + logResult("EP Download & Registration", false, e.message); + printSummary(); + return; + } + + // ── 2. List Models & Find Accelerated Variants ──────────── + printSeparator("Step 2: Model Catalog - Accelerated Models"); + const models = await manager.catalog.getModels(); + console.log(`${INFO} Total models in catalog: ${models.length}`); + + const acceleratedVariants = []; + + for (const model of models) { + for (const variant of model.variants) { + if (isAcceleratedVariant(variant)) { + acceleratedVariants.push(variant); + } + } + } + + console.log(`${INFO} Accelerated model variants: ${acceleratedVariants.length}`); + for (const variant of acceleratedVariants) { + const runtime = variant.info?.runtime; + const ep = runtime?.executionProvider || "?"; + const device = runtime?.deviceType || "?"; + console.log(` - ${variant.id.padEnd(50)} Device: ${String(device).padEnd(3)} EP: ${ep}`); + } + + logResult( + "Catalog - Accelerated models found", + acceleratedVariants.length > 0, + `${acceleratedVariants.length} accelerated variant(s)`, + ); + + if (!acceleratedVariants.length) { + console.log(`\n${FAIL} No accelerated model variants are available.`); + console.log(`${WARN} Ensure the system has a compatible accelerator and matching model variants installed.`); + printSummary(); + process.exit(1); + } + + // ── 3. Download & Load Model ────────────────────────────── + printSeparator("Step 3: Download & Load Model"); + + let chosen = null; + let downloadedAny = false; + let lastLoadError = null; + for (const candidate of acceleratedVariants) { + const ep = candidate.info?.runtime?.executionProvider || "unknown"; + console.log(`\n${INFO} Trying model: ${candidate.id} (EP: ${ep})`); + + try { + await candidate.download((percent) => { + process.stdout.write(`\r Downloading model: ${percent.toFixed(1)}%`); + }); + console.log(); + downloadedAny = true; + } catch (e) { + console.log(); + console.log(`${WARN} Skipping ${candidate.id}: download failed: ${e.message}`); + lastLoadError = e; + continue; + } + + try { + await candidate.load(); + chosen = candidate; + break; + } catch (e) { + console.log(`${WARN} Skipping ${candidate.id}: load failed: ${e.message}`); + lastLoadError = e; + } + } + + logResult( + "Model Download", + downloadedAny, + downloadedAny ? "At least one accelerated variant downloaded" : lastLoadError?.message || "No accelerated variant could be downloaded", + ); + + if (!chosen) { + logResult("Model Load", false, lastLoadError?.message || "No accelerated variant could be loaded on this machine"); + printSummary(); + process.exit(1); + } + + logResult("Model Load", true, `Loaded ${chosen.id}`); + + // ── 4. Streaming Chat Completions (Native SDK) ──────────── + printSeparator("Step 4: Streaming Chat Completions (Native)"); + const messages = [ + { role: "system", content: "You are a helpful assistant." }, + { role: "user", content: "What is 2 + 2? Reply with just the number." }, + ]; + + try { + const client = chosen.createChatClient(); + client.settings.temperature = 0; + client.settings.maxTokens = 16; + let responseText = ""; + const start = Date.now(); + for await (const chunk of client.completeStreamingChat(messages)) { + const content = chunk?.choices?.[0]?.delta?.content; + if (content) { + responseText += content; + process.stdout.write(content); + } + } + const elapsed = ((Date.now() - start) / 1000).toFixed(2); + console.log(); + logResult("Streaming Chat (Native)", responseText.length > 0, `${responseText.length} chars in ${elapsed}s`); + } catch (e) { + logResult("Streaming Chat (Native)", false, e.message); + } + + try { + await chosen.unload(); + console.log(`${INFO} Model unloaded.`); + } catch (e) { + console.warn(`${WARN} Failed to unload model: ${e.message}`); + } + + printSummary(); +} + +function printSummary() { + printSeparator("Summary"); + const passed = results.filter((r) => r.passed).length; + for (const { testName, passed: p } of results) { + console.log(` ${p ? "✓" : "✗"} ${testName}`); + } + console.log(`\n ${passed}/${results.length} tests passed`); + if (passed < results.length) process.exit(1); +} + +main().catch((e) => { + console.error(e); + process.exit(1); +}); diff --git a/samples/js/verify-winml/package.json b/samples/js/verify-winml/package.json new file mode 100644 index 000000000..f8ba84ad9 --- /dev/null +++ b/samples/js/verify-winml/package.json @@ -0,0 +1,12 @@ +{ + "name": "verify-winml", + "version": "1.0.0", + "type": "module", + "main": "app.js", + "scripts": { + "postinstall": "node node_modules/foundry-local-sdk/script/install-winml.cjs" + }, + "dependencies": { + "foundry-local-sdk": "file:../../../sdk/js" + } +} diff --git a/samples/python/verify-winml/README.md b/samples/python/verify-winml/README.md new file mode 100644 index 000000000..eabfd7201 --- /dev/null +++ b/samples/python/verify-winml/README.md @@ -0,0 +1,47 @@ +# Verify WinML 2.0 Execution Providers + +This sample verifies that WinML 2.0 execution providers are correctly discovered, +downloaded, and registered. It then runs inference on a model variant backed by a +registered WinML EP. It finishes with one native streaming chat check. + +## Prerequisites + +- Windows with a compatible GPU +- Python 3.11+ + +## Setup + +Use a fresh virtual environment for the cleanest setup. + +If you want to reuse your existing Python environment instead, delete that +environment's `Lib\site-packages\foundry_local_core` directory before +reinstalling so stale native files are not left behind. + +`requirements.txt` installs the WinML SDK variant, which brings the matching +WinML native package transitively. Either install path is enough: + +```bash +python -m venv .venv +.venv\Scripts\Activate.ps1 +pip install --upgrade -r requirements.txt +``` + +Or, after removing `Lib\site-packages\foundry_local_core` from your existing +Python environment: + +```bash +pip install --upgrade -r requirements.txt +``` + +## Run + +```bash +python src/app.py +``` + +## What it tests + +1. **EP Discovery** — Lists all available execution providers +2. **EP Download & Registration** — Downloads only the WinML EPs relevant to the machine +3. **Model Catalog** — Lists model variants backed by the registered WinML EPs +4. **Streaming Chat** — Runs streaming chat completion on a WinML EP-backed model via native SDK diff --git a/samples/python/verify-winml/requirements.txt b/samples/python/verify-winml/requirements.txt new file mode 100644 index 000000000..481d9dc46 --- /dev/null +++ b/samples/python/verify-winml/requirements.txt @@ -0,0 +1 @@ +foundry-local-sdk-winml diff --git a/samples/python/verify-winml/src/app.py b/samples/python/verify-winml/src/app.py new file mode 100644 index 000000000..a03417b1b --- /dev/null +++ b/samples/python/verify-winml/src/app.py @@ -0,0 +1,226 @@ +""" +Foundry Local SDK - WinML 2.0 EP Verification Script + +Verifies: + 1. Execution providers are discovered and registered + 2. Accelerated models appear in catalog after EP registration + 3. Streaming chat completions work on an accelerated model +""" + +import sys +import time +from foundry_local_sdk import Configuration, FoundryLocalManager + + +PASS = "\033[92m[PASS]\033[0m" +FAIL = "\033[91m[FAIL]\033[0m" +INFO = "\033[94m[INFO]\033[0m" +WARN = "\033[93m[WARN]\033[0m" + +results = [] + + +def log_result(test_name: str, passed: bool, detail: str = ""): + status = PASS if passed else FAIL + msg = f"{status} {test_name}" + if detail: + msg += f" - {detail}" + print(msg) + results.append((test_name, passed)) + + +def print_separator(title: str): + print(f"\n{'=' * 60}") + print(f" {title}") + print(f"{'=' * 60}\n") + + +def is_accelerated_variant(variant) -> bool: + rt = variant.info.runtime + return rt is not None and rt.device_type in ("GPU", "NPU") + + +def main(): + # ── 0. Initialize FoundryLocalManager ────────────────────── + print_separator("Initialization") + config = Configuration(app_name="verify_winml") + FoundryLocalManager.initialize(config) + manager = FoundryLocalManager.instance + print(f"{INFO} FoundryLocalManager initialized.") + + # ── 1. Discover & Register EPs ──────────────────────────── + print_separator("Step 1: Discover & Register Execution Providers") + eps = [] + try: + eps = manager.discover_eps() + print(f"{INFO} Discovered {len(eps)} execution providers:") + for ep in eps: + print(f" - {ep.name:40s} Registered: {ep.is_registered}") + log_result("EP Discovery", True, f"{len(eps)} EP(s) found") + except Exception as e: + log_result("EP Discovery", False, str(e)) + + if not eps: + detail = "No execution providers discovered on this machine" + log_result("EP Download & Registration", False, detail) + print(f"\n{FAIL} {detail}.") + _print_summary() + return + + try: + progress_state = {"ep": None, "percent": -1.0} + + def ep_progress(ep_name: str, percent: float): + if progress_state["ep"] is not None and ( + progress_state["ep"] != ep_name or percent < progress_state["percent"] + ): + print() + progress_state["ep"] = ep_name + progress_state["percent"] = percent + print(f"\r Downloading {ep_name}: {percent:.1f}%", end="", flush=True) + + result = manager.download_and_register_eps(progress_callback=ep_progress) + if progress_state["ep"] is not None: + print() + + print(f"{INFO} EP registration result: success={result.success}, status={result.status}") + if result.registered_eps: + print(f" Registered: {', '.join(result.registered_eps)}") + if result.failed_eps: + print(f" Failed: {', '.join(result.failed_eps)}") + download_ok = result.success + detail = ( + f"{len(result.registered_eps)} EP(s) registered" + if download_ok and result.registered_eps + else result.status + ) + log_result("EP Download & Registration", download_ok, detail) + if not download_ok: + _print_summary() + return + except Exception as e: + print() + log_result("EP Download & Registration", False, str(e)) + _print_summary() + return + + # ── 2. List Models & Find Accelerated Variants ───────────── + print_separator("Step 2: Model Catalog - Accelerated Models") + catalog = manager.catalog + models = catalog.list_models() + print(f"{INFO} Total models in catalog: {len(models)}") + + accelerated_variants = [] + + for model in models: + for variant in model.variants: + if is_accelerated_variant(variant): + accelerated_variants.append(variant) + + print(f"{INFO} Accelerated model variants: {len(accelerated_variants)}") + for v in accelerated_variants: + rt = v.info.runtime + ep = rt.execution_provider if rt else "?" + device = rt.device_type if rt else "?" + print(f" - {v.id:50s} Device: {device:3s} EP: {ep}") + + log_result("Catalog - Accelerated models found", len(accelerated_variants) > 0, + f"{len(accelerated_variants)} accelerated variant(s)") + + if not accelerated_variants: + print(f"\n{FAIL} No accelerated model variants are available.") + print(f"{WARN} Ensure the system has a compatible accelerator and matching model variants installed.") + _print_summary() + return + + # ── 3. Download & Load Model ────────────────────────────── + print_separator("Step 3: Download & Load Model") + + chosen = None + downloaded_any = False + last_load_error = None + for candidate in accelerated_variants: + chosen_ep = candidate.info.runtime.execution_provider if candidate.info.runtime else "unknown" + print(f"\n{INFO} Trying model: {candidate.id} (EP: {chosen_ep})") + + try: + def dl_progress(percent): + print(f"\r Downloading model: {percent:.1f}%", end="", flush=True) + + candidate.download(progress_callback=dl_progress) + print() + downloaded_any = True + except Exception as e: + print() + print(f"{WARN} Skipping {candidate.id}: download failed: {e}") + last_load_error = e + continue + + try: + candidate.load() + chosen = candidate + break + except Exception as e: + print(f"{WARN} Skipping {candidate.id}: load failed: {e}") + last_load_error = e + + log_result("Model Download", downloaded_any, + "At least one accelerated variant downloaded" if downloaded_any + else str(last_load_error) if last_load_error else "No accelerated variant could be downloaded") + + if chosen is None: + log_result("Model Load", False, + str(last_load_error) if last_load_error else "No accelerated variant could be loaded on this machine") + _print_summary() + return + + log_result("Model Load", True, f"Loaded {chosen.id}") + + # ── 4. Streaming Chat Completions (Native SDK) ──────────── + print_separator("Step 4: Streaming Chat Completions (Native)") + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is 2 + 2? Reply with just the number."}, + ] + + try: + client = chosen.get_chat_client() + client.settings.temperature = 0 + client.settings.max_tokens = 16 + response_text = "" + start = time.time() + for chunk in client.complete_streaming_chat(messages): + choices = getattr(chunk, "choices", None) + content = choices[0].delta.content if choices and len(choices) > 0 else None + if content: + response_text += content + print(content, end="", flush=True) + elapsed = time.time() - start + print() + log_result("Streaming Chat (Native)", len(response_text) > 0, + f"{len(response_text)} chars in {elapsed:.2f}s") + except Exception as e: + log_result("Streaming Chat (Native)", False, str(e)) + + try: + chosen.unload() + print(f"{INFO} Model unloaded.") + except Exception as e: + print(f"{WARN} Failed to unload model: {e}") + + _print_summary() + + +def _print_summary(): + print_separator("Summary") + passed = sum(1 for _, p in results if p) + total = len(results) + for name, p in results: + print(f" {'PASS' if p else 'FAIL'} {name}") + print(f"\n {passed}/{total} tests passed") + if passed < total: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/samples/rust/Cargo.toml b/samples/rust/Cargo.toml index 7be551ea0..37a579a1b 100644 --- a/samples/rust/Cargo.toml +++ b/samples/rust/Cargo.toml @@ -4,10 +4,12 @@ members = [ "tool-calling-foundry-local", "native-chat-completions", "audio-transcription-example", + "live-audio-transcription", "embeddings", "tutorial-chat-assistant", "tutorial-document-summarizer", "tutorial-tool-calling", "tutorial-voice-to-text", + "verify-winml", ] resolver = "2" diff --git a/samples/rust/live-audio-transcription/src/main.rs b/samples/rust/live-audio-transcription/src/main.rs index 12074ae46..633e32062 100644 --- a/samples/rust/live-audio-transcription/src/main.rs +++ b/samples/rust/live-audio-transcription/src/main.rs @@ -12,7 +12,7 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use cpal::traits::{DeviceTrait, HostTrait, StreamTrait}; -use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager}; +use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager, LiveAudioTranscriptionSession}; use tokio_stream::StreamExt; const ALIAS: &str = "nemotron-speech-streaming-en-0.6b"; @@ -44,8 +44,8 @@ async fn main() -> Result<(), Box> { if !model.is_cached().await? { println!("Downloading model..."); model - .download(Some(|progress: &str| { - print!("\r {progress}%"); + .download(Some(|progress: f64| { + print!("\r {progress:.1}%"); io::stdout().flush().ok(); })) .await?; @@ -135,7 +135,7 @@ async fn main() -> Result<(), Box> { /// Try to open the default microphone with CPAL and forward PCM to the session. /// Blocks until Ctrl+C is pressed. async fn try_start_mic( - session: &Arc, + session: &Arc, running: &Arc, ) -> Result<(), Box> { let host = cpal::default_host(); diff --git a/samples/rust/verify-winml/Cargo.toml b/samples/rust/verify-winml/Cargo.toml new file mode 100644 index 000000000..6ca2cf275 --- /dev/null +++ b/samples/rust/verify-winml/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "verify-winml" +version = "1.0.0" +edition = "2021" + +[dependencies] +foundry-local-sdk = { path = "../../../sdk/rust", features = ["winml"] } +tokio = { version = "1", features = ["full"] } +tokio-stream = "0.1" +anyhow = "1" diff --git a/samples/rust/verify-winml/README.md b/samples/rust/verify-winml/README.md new file mode 100644 index 000000000..57eb83ced --- /dev/null +++ b/samples/rust/verify-winml/README.md @@ -0,0 +1,20 @@ +# Verify WinML 2.0 Execution Providers (Rust) + +This sample verifies that WinML 2.0 execution providers are correctly discovered, +downloaded, and registered using the Foundry Local Rust SDK. It uses registered WinML +EP-backed model variants and finishes with one native streaming chat check. + +## Prerequisites + +- Windows with a compatible GPU +- Rust toolchain + +## Build & Run + +This sample enables the Rust SDK's `winml` feature and the SDK build script +downloads the pinned `Microsoft.AI.Foundry.Local.Core.WinML` package from the +configured NuGet feeds during the build. + +```bash +cargo run +``` diff --git a/samples/rust/verify-winml/src/main.rs b/samples/rust/verify-winml/src/main.rs new file mode 100644 index 000000000..bd090130f --- /dev/null +++ b/samples/rust/verify-winml/src/main.rs @@ -0,0 +1,327 @@ +// Foundry Local SDK - WinML 2.0 EP Verification (Rust) +// +// Verifies: +// 1. Execution providers are discovered and registered +// 2. Accelerated models appear in catalog after EP registration +// 3. Streaming chat completions work on an accelerated model + +use foundry_local_sdk::{ + ChatCompletionRequestMessage, ChatCompletionRequestSystemMessage, + ChatCompletionRequestUserMessage, DeviceType, FoundryLocalConfig, + FoundryLocalManager, Model, +}; +use std::io::{self, Write}; +use tokio_stream::StreamExt; + +const PASS: &str = "\x1b[92m[PASS]\x1b[0m"; +const FAIL: &str = "\x1b[91m[FAIL]\x1b[0m"; +const INFO: &str = "\x1b[94m[INFO]\x1b[0m"; +const WARN: &str = "\x1b[93m[WARN]\x1b[0m"; + +fn is_accelerated_variant(model: &Model) -> bool { + model.info() + .runtime + .as_ref() + .map(|rt| matches!(rt.device_type, DeviceType::GPU | DeviceType::NPU)) + .unwrap_or(false) +} + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let mut results: Vec<(&str, bool)> = Vec::new(); + + // ── 0. Initialize FoundryLocalManager ────────────────────── + println!("\n{}", "=".repeat(60)); + println!(" Initialization"); + println!("{}\n", "=".repeat(60)); + + let manager = FoundryLocalManager::create(FoundryLocalConfig::new("verify_winml"))?; + println!("{INFO} FoundryLocalManager initialized."); + + // ── 1. Discover & Register EPs ──────────────────────────── + println!("\n{}", "=".repeat(60)); + println!(" Step 1: Discover & Register Execution Providers"); + println!("{}\n", "=".repeat(60)); + + let eps = match manager.discover_eps() { + Ok(eps) => { + println!("{INFO} Discovered {} execution providers:", eps.len()); + for ep in &eps { + println!(" - {:<40} Registered: {}", ep.name, ep.is_registered); + } + + let detail = format!("{} EP(s) found", eps.len()); + println!("{PASS} EP Discovery - {detail}"); + results.push(("EP Discovery", true)); + eps + } + Err(e) => { + println!("{FAIL} EP Discovery - {e}"); + results.push(("EP Discovery", false)); + Vec::new() + } + }; + + if eps.is_empty() { + let detail = "No execution providers discovered on this machine"; + println!("{FAIL} EP Download & Registration - {detail}"); + println!("\n{FAIL} {detail}."); + results.push(("EP Download & Registration", false)); + print_summary(&results); + return Ok(()); + } + + match manager.download_and_register_eps_with_progress(None, { + let mut last_progress_ep: Option = None; + let mut last_progress_percent = -1.0f64; + + move |ep_name: &str, percent: f64| { + if last_progress_ep + .as_ref() + .map(|current| current != ep_name || percent < last_progress_percent) + .unwrap_or(false) + { + println!(); + } + + last_progress_ep = Some(ep_name.to_string()); + last_progress_percent = percent; + print!("\r Downloading {ep_name}: {percent:.1}%"); + io::stdout().flush().ok(); + } + }).await { + Ok(result) => { + println!(); + println!( + "{INFO} EP registration result: success={}, status={}", + result.success, result.status + ); + if !result.registered_eps.is_empty() { + println!(" Registered: {}", result.registered_eps.join(", ")); + } + if !result.failed_eps.is_empty() { + println!(" Failed: {}", result.failed_eps.join(", ")); + } + + let download_ok = result.success; + let status = if download_ok { PASS } else { FAIL }; + let detail = if download_ok && !result.registered_eps.is_empty() { + format!("{} EP(s) registered", result.registered_eps.len()) + } else { + result.status.clone() + }; + println!("{status} EP Download & Registration - {detail}"); + results.push(("EP Download & Registration", download_ok)); + + if !download_ok { + print_summary(&results); + return Ok(()); + } + } + Err(e) => { + println!(); + println!("{FAIL} EP Download & Registration - {e}"); + results.push(("EP Download & Registration", false)); + print_summary(&results); + return Ok(()); + } + } + + // ── 2. List Models & Find Accelerated Variants ──────────── + println!("\n{}", "=".repeat(60)); + println!(" Step 2: Model Catalog - Accelerated Models"); + println!("{}\n", "=".repeat(60)); + + let models = manager.catalog().get_models().await?; + println!("{INFO} Total models in catalog: {}", models.len()); + + let mut accelerated_variants = Vec::new(); + for model in &models { + for variant in model.variants() { + if is_accelerated_variant(variant.as_ref()) { + let device = variant + .info() + .runtime + .as_ref() + .map(|rt| format!("{:?}", rt.device_type)) + .unwrap_or_else(|| "?".to_string()); + let ep = variant + .info() + .runtime + .as_ref() + .map(|rt| rt.execution_provider.as_str()) + .unwrap_or("?"); + println!( + " - {:<50} Device: {:<3} EP: {}", + variant.id(), + device, + ep + ); + accelerated_variants.push(variant); + } + } + } + + println!("{INFO} Accelerated model variants: {}", accelerated_variants.len()); + let has_accelerated_models = !accelerated_variants.is_empty(); + let status = if has_accelerated_models { PASS } else { FAIL }; + println!( + "{status} Catalog - Accelerated models found - {} accelerated variant(s)", + accelerated_variants.len() + ); + results.push(("Catalog - Accelerated models found", has_accelerated_models)); + + if accelerated_variants.is_empty() { + println!("\n{FAIL} No accelerated model variants are available."); + println!("{WARN} Ensure the system has a compatible accelerator and matching model variants installed."); + print_summary(&results); + return Ok(()); + } + + // ── 3. Download & Load Model ────────────────────────────── + println!("\n{}", "=".repeat(60)); + println!(" Step 3: Download & Load Model"); + println!("{}\n", "=".repeat(60)); + + let mut model = None; + let mut downloaded_any = false; + let mut last_load_error: Option = None; + + for candidate in accelerated_variants { + let candidate_ep = candidate + .info() + .runtime + .as_ref() + .map(|rt| rt.execution_provider.as_str()) + .unwrap_or("unknown"); + println!("\n{INFO} Trying model: {} (EP: {candidate_ep})", candidate.id()); + + if !candidate.is_cached().await? { + match candidate + .download(Some(|progress: f64| { + print!("\r Downloading model: {progress:.1}%"); + io::stdout().flush().ok(); + })) + .await + { + Ok(_) => { + println!(); + downloaded_any = true; + } + Err(e) => { + println!(); + println!("{WARN} Skipping {}: download failed: {e}", candidate.id()); + last_load_error = Some(e.to_string()); + continue; + } + } + } else { + println!("{INFO} Model already cached"); + downloaded_any = true; + } + + match candidate.load().await { + Ok(_) => { + model = Some(candidate); + break; + } + Err(e) => { + println!("{WARN} Skipping {}: load failed: {e}", candidate.id()); + last_load_error = Some(e.to_string()); + } + } + } + + let download_status = if downloaded_any { PASS } else { FAIL }; + let download_detail = if downloaded_any { + "At least one accelerated variant downloaded".to_string() + } else { + last_load_error + .clone() + .unwrap_or_else(|| "No accelerated variant could be downloaded".to_string()) + }; + println!("{download_status} Model Download - {download_detail}"); + results.push(("Model Download", downloaded_any)); + + let Some(model) = model else { + let detail = last_load_error + .unwrap_or_else(|| "No accelerated variant could be loaded on this machine".to_string()); + println!("{FAIL} Model Load - {detail}"); + results.push(("Model Load", false)); + print_summary(&results); + return Ok(()); + }; + + println!("{PASS} Model Load - Loaded {}", model.id()); + results.push(("Model Load", true)); + + // ── 4. Streaming Chat Completions ──────────────────────── + println!("\n{}", "=".repeat(60)); + println!(" Step 4: Streaming Chat Completions"); + println!("{}\n", "=".repeat(60)); + + let messages: Vec = vec![ + ChatCompletionRequestSystemMessage::from("You are a helpful assistant.").into(), + ChatCompletionRequestUserMessage::from("What is 2 + 2? Reply with just the number.").into(), + ]; + + let client = model.create_chat_client().temperature(0.0).max_tokens(16); + match client.complete_streaming_chat(&messages, None).await { + Ok(mut stream) => { + let mut full_response = String::new(); + let start = std::time::Instant::now(); + while let Some(chunk) = stream.next().await { + match chunk { + Ok(c) => { + if let Some(text) = c + .choices + .first() + .and_then(|ch| ch.delta.content.as_deref()) + { + print!("{text}"); + io::stdout().flush().ok(); + full_response.push_str(text); + } + } + Err(e) => { + println!("\n{FAIL} Streaming chunk error: {e}"); + break; + } + } + } + let elapsed = start.elapsed().as_secs_f64(); + println!(); + let ok = !full_response.is_empty(); + let status = if ok { PASS } else { FAIL }; + println!( + "{status} Streaming Chat - {} chars in {elapsed:.2}s", + full_response.len() + ); + results.push(("Streaming Chat", ok)); + } + Err(e) => { + println!("{FAIL} Streaming Chat - {e}"); + results.push(("Streaming Chat", false)); + } + } + + if let Err(e) = model.unload().await { + println!("{WARN} Failed to unload model: {e}"); + } else { + println!("{INFO} Model unloaded."); + } + + print_summary(&results); + Ok(()) +} + +fn print_summary(results: &[(&str, bool)]) { + println!("\n{}", "=".repeat(60)); + println!(" Summary"); + println!("{}\n", "=".repeat(60)); + let passed = results.iter().filter(|(_, p)| *p).count(); + for (name, p) in results { + println!(" {} {name}", if *p { "✓" } else { "✗" }); + } + println!("\n {passed}/{} tests passed", results.len()); +} diff --git a/sdk/cpp/README.md b/sdk/cpp/README.md index 017f2fa6f..5959ec95b 100644 --- a/sdk/cpp/README.md +++ b/sdk/cpp/README.md @@ -209,6 +209,7 @@ Download, load, and unload: // Download with progress reporting model->Download([](float progress) { std::cout << "Download: " << progress << "%\n"; + return true; }); // Load into memory diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.epinfo.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.epinfo.md index d2df44d3b..7818a9a32 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.epinfo.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.epinfo.md @@ -2,7 +2,7 @@ Namespace: Microsoft.AI.Foundry.Local -Describes a discoverable execution provider bootstrapper. +Describes a discoverable execution provider. ```csharp public record EpInfo @@ -12,7 +12,7 @@ public record EpInfo ### **Name** -The identifier of the bootstrapper/execution provider (e.g. "CUDAExecutionProvider"). +The identifier of the execution provider (e.g. "CUDAExecutionProvider"). ```csharp public string Name { get; set; } diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.foundrylocalmanager.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.foundrylocalmanager.md index 5f1ba50e9..9a324e2bb 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.foundrylocalmanager.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.foundrylocalmanager.md @@ -143,7 +143,7 @@ Task stopping the web service. ### **DiscoverEps()** -Discovers all available execution provider bootstrappers. +Discovers all available execution providers. Returns metadata about each EP including whether it is already registered. ```csharp @@ -153,7 +153,7 @@ public EpInfo[] DiscoverEps() #### Returns [EpInfo[]](./microsoft.ai.foundry.local.epinfo.md)
-Array of EP bootstrapper info describing available EPs. +Array of EP info describing available EPs. ### **DownloadAndRegisterEpsAsync(Nullable<CancellationToken>)** @@ -189,7 +189,7 @@ public Task DownloadAndRegisterEpsAsync(IEnumerable na #### Parameters `names` [IEnumerable<String>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ienumerable-1)
-Subset of EP bootstrapper names to download (as returned by [FoundryLocalManager.DiscoverEps()](./microsoft.ai.foundry.local.foundrylocalmanager.md#discovereps)). +Subset of EP names to download (as returned by [FoundryLocalManager.DiscoverEps()](./microsoft.ai.foundry.local.foundrylocalmanager.md#discovereps)). `ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
Optional cancellation token. @@ -241,7 +241,7 @@ public Task DownloadAndRegisterEpsAsync(IEnumerable na #### Parameters `names` [IEnumerable<String>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ienumerable-1)
-Subset of EP bootstrapper names to download (as returned by [FoundryLocalManager.DiscoverEps()](./microsoft.ai.foundry.local.foundrylocalmanager.md#discovereps)). +Subset of EP names to download (as returned by [FoundryLocalManager.DiscoverEps()](./microsoft.ai.foundry.local.foundrylocalmanager.md#discovereps)). `progressCallback` [Action<String, Double>](https://docs.microsoft.com/en-us/dotnet/api/system.action-2)
Callback invoked as each EP downloads. Parameters are (epName, percentComplete) where percentComplete is 0-100. diff --git a/sdk/cs/src/Detail/CoreInterop.WinML.cs b/sdk/cs/src/Detail/CoreInterop.WinML.cs deleted file mode 100644 index 1a9ebd139..000000000 --- a/sdk/cs/src/Detail/CoreInterop.WinML.cs +++ /dev/null @@ -1,24 +0,0 @@ -// -------------------------------------------------------------------------------------------------------------------- -// -// Copyright (c) Microsoft. All rights reserved. -// -// -------------------------------------------------------------------------------------------------------------------- - -// WinML build variant: injects Bootstrap parameter for Windows App Runtime initialization. - -#if IS_WINML - -namespace Microsoft.AI.Foundry.Local.Detail; - -internal partial class CoreInterop -{ - partial void PrepareWinMLBootstrap(CoreInteropRequest request) - { - if (!request.Params.ContainsKey("Bootstrap")) - { - request.Params["Bootstrap"] = "true"; - } - } -} - -#endif diff --git a/sdk/cs/src/Detail/CoreInterop.cs b/sdk/cs/src/Detail/CoreInterop.cs index 138aa9411..86db0fff1 100644 --- a/sdk/cs/src/Detail/CoreInterop.cs +++ b/sdk/cs/src/Detail/CoreInterop.cs @@ -57,8 +57,6 @@ internal CoreInterop(Configuration config, ILogger logger) _logger = logger ?? throw new ArgumentNullException(nameof(logger)); var request = new CoreInteropRequest { Params = config.AsDictionary() }; - PrepareWinMLBootstrap(request); - var response = ExecuteCommand("initialize", request); if (response.Error != null) @@ -80,9 +78,6 @@ internal CoreInterop(ILogger logger) // Implemented in CoreInterop.NetStandard.cs and CoreInterop.Modern.cs. static partial void InitializeNativeLibraryResolver(); - // Implemented in CoreInterop.WinML.cs when IS_WINML is defined; otherwise a no-op. - partial void PrepareWinMLBootstrap(CoreInteropRequest request); - private static string AddLibraryExtension(string name) => IsWindows ? $"{name}.dll" : IsLinux ? $"{name}.so" : diff --git a/sdk/cs/src/EpInfo.cs b/sdk/cs/src/EpInfo.cs index d170ac0ed..9a8b022e1 100644 --- a/sdk/cs/src/EpInfo.cs +++ b/sdk/cs/src/EpInfo.cs @@ -9,11 +9,11 @@ namespace Microsoft.AI.Foundry.Local; using System.Text.Json.Serialization; /// -/// Describes a discoverable execution provider bootstrapper. +/// Describes a discoverable execution provider. /// public record EpInfo { - /// The identifier of the bootstrapper/execution provider (e.g. "CUDAExecutionProvider"). + /// The identifier of the execution provider (e.g. "CUDAExecutionProvider"). [JsonPropertyName("Name")] public required string Name { get; init; } diff --git a/sdk/cs/src/FoundryLocalManager.cs b/sdk/cs/src/FoundryLocalManager.cs index 855aed4a2..6f16e6059 100644 --- a/sdk/cs/src/FoundryLocalManager.cs +++ b/sdk/cs/src/FoundryLocalManager.cs @@ -136,10 +136,10 @@ await Utils.CallWithExceptionHandling(() => StopWebServiceImplAsync(ct), } /// - /// Discovers all available execution provider bootstrappers. + /// Discovers all available execution providers. /// Returns metadata about each EP including whether it is already registered. /// - /// Array of EP bootstrapper info describing available EPs. + /// Array of EP info describing available EPs. public EpInfo[] DiscoverEps() { return Utils.CallWithExceptionHandling(DiscoverEpsImpl, @@ -166,7 +166,7 @@ public async Task DownloadAndRegisterEpsAsync(CancellationToke /// Downloads and registers the specified execution providers. /// /// - /// Subset of EP bootstrapper names to download (as returned by ). + /// Subset of EP names to download (as returned by ). /// /// Optional cancellation token. /// Result describing which EPs succeeded and which failed. @@ -206,7 +206,7 @@ public async Task DownloadAndRegisterEpsAsync(Action /// - /// Subset of EP bootstrapper names to download (as returned by ). + /// Subset of EP names to download (as returned by ). /// /// /// Callback invoked as each EP downloads. Parameters are (epName, percentComplete) where percentComplete is 0-100. diff --git a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj index 2e2131e56..b06a06ec6 100644 --- a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj +++ b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj @@ -44,9 +44,9 @@ --> - net8.0-windows10.0.26100.0 + net8.0-windows10.0.18362.0 win-x64;win-arm64 - 10.0.17763.0 + 10.0.18362.0 true $(DefineConstants);IS_WINML diff --git a/sdk/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj b/sdk/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj index 94e54532d..c5a4badf2 100644 --- a/sdk/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj +++ b/sdk/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj @@ -24,8 +24,8 @@ - net8.0-windows10.0.26100.0; - 10.0.17763.0 + net8.0-windows10.0.18362.0; + 10.0.18362.0 None $(NETCoreSdkRuntimeIdentifier) diff --git a/sdk/deps_versions_winml.json b/sdk/deps_versions_winml.json index 08bf40de2..e67af58a6 100644 --- a/sdk/deps_versions_winml.json +++ b/sdk/deps_versions_winml.json @@ -3,8 +3,11 @@ "nuget": "1.1.0", "python": "1.1.0" }, + "windows-ai-machinelearning": { + "version": "2.1.1" + }, "onnxruntime": { - "version": "1.23.2.3" + "version": "1.25.1" }, "onnxruntime-genai": { "version": "0.13.2" diff --git a/sdk/js/README.md b/sdk/js/README.md index fad973353..5edc0f2c8 100644 --- a/sdk/js/README.md +++ b/sdk/js/README.md @@ -29,13 +29,15 @@ Importing from `foundry-local-sdk` in a TypeScript project gives you full type i ## WinML: Automatic Hardware Acceleration (Windows) -On Windows, install the WinML package to enable automatic execution provider management. The SDK will automatically discover, download, and register hardware-specific execution providers (e.g., Qualcomm QNN for NPU acceleration) via the Windows App Runtime — no manual driver or EP setup required. +On Windows, install the WinML package to enable automatic execution provider management. The SDK can discover, download, and register hardware-specific execution providers (e.g., Qualcomm QNN for NPU acceleration) without manual driver or EP setup. > **Note:** `foundry-local-sdk-winml` is a Windows-only package. Its install script downloads WinML artifacts during installation and may fail on macOS or Linux. ```bash npm install foundry-local-sdk-winml ``` +To use a newer Windows ML runtime DLL, set `FOUNDRY_LOCAL_WINDOWS_AI_MACHINELEARNING_VERSION` before installing or rebuilding `foundry-local-sdk-winml`; the install script downloads `Microsoft.Windows.AI.MachineLearning.dll` from that NuGet version. + When WinML is enabled: - Execution providers like `QNNExecutionProvider`, `OpenVINOExecutionProvider`, etc. are downloaded and registered on the fly, enabling NPU/GPU acceleration without manual configuration - **No code changes needed** — your application code stays the same whether WinML is enabled or not diff --git a/sdk/js/docs/README.md b/sdk/js/docs/README.md index 8be2e1e45..d245428a1 100644 --- a/sdk/js/docs/README.md +++ b/sdk/js/docs/README.md @@ -195,7 +195,7 @@ True if all requested EPs were successfully downloaded and registered. ### EpInfo -Describes a discoverable execution provider bootstrapper. +Describes a discoverable execution provider. #### Properties @@ -213,7 +213,7 @@ True if this EP has already been successfully downloaded and registered. name: string; ``` -The identifier of the bootstrapper/execution provider (e.g. "CUDAExecutionProvider"). +The identifier of the execution provider (e.g. "CUDAExecutionProvider"). *** diff --git a/sdk/js/script/install-utils.cjs b/sdk/js/script/install-utils.cjs index 14df434f3..a611bfa2b 100644 --- a/sdk/js/script/install-utils.cjs +++ b/sdk/js/script/install-utils.cjs @@ -101,6 +101,69 @@ async function downloadFile(url, dest) { const serviceIndexCache = new Map(); +function expectedFileForPackage(pkgName) { + const prefix = os.platform() === 'win32' ? '' : 'lib'; + if (pkgName.includes('Foundry.Local.Core')) { + return `Microsoft.AI.Foundry.Local.Core${EXT}`; + } + if (pkgName.includes('Windows.AI.MachineLearning')) { + return `Microsoft.Windows.AI.MachineLearning${EXT}`; + } + if (pkgName.includes('OnnxRuntimeGenAI')) { + return `${prefix}onnxruntime-genai${EXT}`; + } + if (pkgName.includes('OnnxRuntime')) { + return `${prefix}onnxruntime${EXT}`; + } + return undefined; +} + +function entryFileName(entry) { + const normalized = entry.entryName.replace(/\\/g, '/'); + return normalized.slice(normalized.lastIndexOf('/') + 1); +} + +function nativeEntriesForRid(zip, includeFiles) { + const includedNames = includeFiles + ? new Set(includeFiles.map(name => name.toLowerCase())) + : null; + const nativePrefix = `runtimes/${RID}/native/`.toLowerCase(); + const runtimePrefix = `runtimes/${RID}/`.toLowerCase(); + return zip.getEntries().filter(e => { + const p = e.entryName.toLowerCase(); + if (!p.endsWith(EXT)) { + return false; + } + + const inNativePath = p.startsWith(nativePrefix); + let inRuntimePath = false; + if (p.startsWith(runtimePrefix)) { + const relativePath = p.slice(runtimePrefix.length); + inRuntimePath = relativePath.length > 0 && !relativePath.includes('/'); + } + + if (!inNativePath && !inRuntimePath) { + return false; + } + + if (includedNames && !includedNames.has(entryFileName(e).toLowerCase())) { + return false; + } + + return true; + }); +} + +function removeFiles(binDir, files) { + for (const file of files || []) { + const filePath = path.join(binDir, file); + if (fs.existsSync(filePath)) { + fs.rmSync(filePath, { force: true }); + console.log(` Removed ${file}`); + } + } +} + async function getBaseAddress(feedUrl) { if (!serviceIndexCache.has(feedUrl)) { serviceIndexCache.set(feedUrl, await downloadJson(feedUrl)); @@ -120,15 +183,7 @@ async function installPackage(artifact, tempDir, binDir, skipIfPresent) { // (e.g. pre-populated by CI from a locally-built artifact). // Callers pass skipIfPresent=false when overriding (e.g. WinML over standard). if (skipIfPresent) { - const prefix = os.platform() === 'win32' ? '' : 'lib'; - let expectedFile; - if (pkgName.includes('Foundry.Local.Core')) { - expectedFile = `Microsoft.AI.Foundry.Local.Core${EXT}`; - } else if (pkgName.includes('OnnxRuntimeGenAI')) { - expectedFile = `${prefix}onnxruntime-genai${EXT}`; - } else if (pkgName.includes('OnnxRuntime')) { - expectedFile = `${prefix}onnxruntime${EXT}`; - } + const expectedFile = expectedFileForPackage(pkgName); if (expectedFile && fs.existsSync(path.join(binDir, expectedFile))) { console.log(` ${pkgName}: already present, skipping download.`); return; @@ -152,11 +207,7 @@ async function installPackage(artifact, tempDir, binDir, skipIfPresent) { console.log(` Extracting...`); const zip = new AdmZip(nupkgPath); - const targetPathPrefix = `runtimes/${RID}/native/`.toLowerCase(); - const entries = zip.getEntries().filter(e => { - const p = e.entryName.toLowerCase(); - return p.includes(targetPathPrefix) && p.endsWith(EXT); - }); + const entries = nativeEntriesForRid(zip, artifact.includeFiles); if (entries.length > 0) { entries.forEach(entry => { @@ -167,6 +218,8 @@ async function installPackage(artifact, tempDir, binDir, skipIfPresent) { console.warn(` No files found for RID ${RID} in ${pkgName}.`); } + removeFiles(binDir, artifact.removeFiles); + // Write a metadata package.json with version info for diagnostics if (pkgName.startsWith('Microsoft.AI.Foundry.Local.Core')) { const pkgJsonPath = path.join(binDir, 'package.json'); diff --git a/sdk/js/script/install-winml.cjs b/sdk/js/script/install-winml.cjs index 0de13503f..dbbd615e8 100644 --- a/sdk/js/script/install-winml.cjs +++ b/sdk/js/script/install-winml.cjs @@ -14,24 +14,62 @@ const fs = require('fs'); const path = require('path'); const { runInstall } = require('./install-utils.cjs'); -// WinML uses its own deps_versions_winml.json with the same key structure -// as the standard deps_versions.json — no variant-specific keys needed. // deps_versions_winml.json lives at the package root when published, or at sdk/ in the repo. const depsPath = fs.existsSync(path.resolve(__dirname, '..', 'deps_versions_winml.json')) ? path.resolve(__dirname, '..', 'deps_versions_winml.json') : path.resolve(__dirname, '..', '..', 'deps_versions_winml.json'); const deps = require(depsPath); + +function resolveFoundryLocalSdkRoot() { + try { + return path.dirname(require.resolve('foundry-local-sdk/package.json')); + } catch (err) { + const packageRoot = path.resolve(__dirname, '..'); + const packageJson = path.join(packageRoot, 'package.json'); + if (fs.existsSync(packageJson)) { + const pkg = JSON.parse(fs.readFileSync(packageJson, 'utf8')); + if (pkg.name === 'foundry-local-sdk') { + return packageRoot; + } + } + + throw err; + } +} + // Resolve foundry-local-sdk's binary directory -const sdkRoot = path.dirname(require.resolve('foundry-local-sdk/package.json')); +const sdkRoot = resolveFoundryLocalSdkRoot(); const platformKey = `${process.platform}-${process.arch}`; const binDir = path.join(sdkRoot, 'foundry-local-core', platformKey); +function resolveWindowsAiMachineLearningVersion() { + const override = process.env.FOUNDRY_LOCAL_WINDOWS_AI_MACHINELEARNING_VERSION; + if (override) { + return override; + } + + const dep = deps['windows-ai-machinelearning']; + if (!dep || !dep.version) { + throw new Error('deps_versions_winml.json is missing windows-ai-machinelearning.version'); + } + return dep.version; +} + const ARTIFACTS = [ { name: 'Microsoft.AI.Foundry.Local.Core.WinML', version: deps['foundry-local-core']['nuget'] }, { name: 'Microsoft.ML.OnnxRuntime.Foundry', version: deps.onnxruntime.version }, { name: 'Microsoft.ML.OnnxRuntimeGenAI.Foundry', version: deps['onnxruntime-genai']['version'] }, ]; +if (process.platform === 'win32') { + ARTIFACTS.push({ + name: 'Microsoft.Windows.AI.MachineLearning', + version: resolveWindowsAiMachineLearningVersion(), + includeFiles: ['Microsoft.Windows.AI.MachineLearning.dll'], + removeFiles: ['DirectML.dll'], + }); +} + (async () => { try { // Force override into foundry-local-sdk's binary directory diff --git a/sdk/js/src/detail/coreInterop.ts b/sdk/js/src/detail/coreInterop.ts index 36098d4ab..006c5bc4a 100644 --- a/sdk/js/src/detail/coreInterop.ts +++ b/sdk/js/src/detail/coreInterop.ts @@ -70,17 +70,6 @@ export class CoreInterop { const corePath = path.join(packageDir, `Microsoft.AI.Foundry.Local.Core${ext}`); if (fs.existsSync(corePath)) { config.params['FoundryLocalCorePath'] = corePath; - - // Auto-detect if WinML Bootstrap is needed by checking for Bootstrap DLL in FoundryLocalCorePath - // Only auto-set if the user hasn't explicitly provided a value - if (!('Bootstrap' in config.params)) { - const bootstrapDllPath = path.join(packageDir, 'Microsoft.WindowsAppRuntime.Bootstrap.dll'); - if (fs.existsSync(bootstrapDllPath)) { - // WinML Bootstrap DLL found, enable bootstrapping - config.params['Bootstrap'] = 'true'; - } - } - return corePath; } diff --git a/sdk/js/src/types.ts b/sdk/js/src/types.ts index 521ae34b4..71f4b5f9b 100644 --- a/sdk/js/src/types.ts +++ b/sdk/js/src/types.ts @@ -71,9 +71,9 @@ export interface ToolChoice { // Execution Provider Types // ============================================================================ -/** Describes a discoverable execution provider bootstrapper. */ +/** Describes a discoverable execution provider. */ export interface EpInfo { - /** The identifier of the bootstrapper/execution provider (e.g. "CUDAExecutionProvider"). */ + /** The identifier of the execution provider (e.g. "CUDAExecutionProvider"). */ name: string; /** True if this EP has already been successfully downloaded and registered. */ isRegistered: boolean; diff --git a/sdk/js/test/testUtils.ts b/sdk/js/test/testUtils.ts index 7cac6b293..5ed5ed3d2 100644 --- a/sdk/js/test/testUtils.ts +++ b/sdk/js/test/testUtils.ts @@ -39,8 +39,7 @@ export const TEST_CONFIG: FoundryLocalConfig = { appName: 'FoundryLocalTest', modelCacheDir: getTestDataSharedPath(), logLevel: 'warn', - logsDir: path.join(getGitRepoRoot(), 'sdk', 'js', 'logs'), - additionalSettings: { 'Bootstrap': 'false' } + logsDir: path.join(getGitRepoRoot(), 'sdk', 'js', 'logs') }; export const TEST_MODEL_ALIAS = 'qwen2.5-0.5b'; diff --git a/sdk/python/README.md b/sdk/python/README.md index 55a6f8d17..03f8fec8c 100644 --- a/sdk/python/README.md +++ b/sdk/python/README.md @@ -30,7 +30,9 @@ pip install foundry-local-sdk pip install foundry-local-sdk-winml ``` -Each package installs the correct native binaries (`foundry-local-core`, `onnxruntime-core`, `onnxruntime-genai-core`) as wheel dependencies. They are mutually exclusive — install only one per environment. WinML is auto-detected at runtime: if the WinML package is installed, the SDK automatically enables the Windows App Runtime Bootstrap. +Each package installs the correct native binaries (`foundry-local-core`, `onnxruntime-core`, `onnxruntime-genai-core`) as wheel dependencies. They are mutually exclusive — install only one per environment. + +The WinML Core wheel also includes `Microsoft.Windows.AI.MachineLearning.dll`. To replace it with a DLL from a newer `Microsoft.Windows.AI.MachineLearning` NuGet package, run `foundry-local-install --winml --winml-runtime-version ` or set `FOUNDRY_LOCAL_WINDOWS_AI_MACHINELEARNING_VERSION` before `foundry-local-install --winml`. ### Building from source @@ -170,7 +172,6 @@ config = Configuration( app_name="MyApp", model_cache_dir="/path/to/cache", # optional log_level=LogLevel.INFORMATION, # optional (default: Warning) - additional_settings={"Bootstrap": "false"}, # optional ) FoundryLocalManager.initialize(config) manager = FoundryLocalManager.instance diff --git a/sdk/python/src/detail/core_interop.py b/sdk/python/src/detail/core_interop.py index a013f7ba7..b8c4186aa 100644 --- a/sdk/python/src/detail/core_interop.py +++ b/sdk/python/src/detail/core_interop.py @@ -243,21 +243,6 @@ def __init__(self, config: Configuration): config.additional_settings["OrtLibraryPath"] = str(paths.ort) config.additional_settings["OrtGenAILibraryPath"] = str(paths.genai) - # Auto-detect WinML Bootstrap: if the Bootstrap DLL is present - # in the native binaries directory and the user hasn't explicitly - # set the Bootstrap config, enable it automatically. - if sys.platform.startswith("win"): - bootstrap_dll = paths.core_dir / "Microsoft.WindowsAppRuntime.Bootstrap.dll" - if bootstrap_dll.exists(): - # Pre-load so the DLL is already in the process when - # C# P/Invoke resolves it during Bootstrap.Initialize(). - ctypes.CDLL(str(bootstrap_dll)) - if config.additional_settings is None: - config.additional_settings = {} - if "Bootstrap" not in config.additional_settings: - logger.info("WinML Bootstrap DLL detected — enabling Bootstrap") - config.additional_settings["Bootstrap"] = "true" - request = InteropRequest(params=config.as_dictionary()) response = self.execute_command("initialize", request) if response.error is not None: diff --git a/sdk/python/test/conftest.py b/sdk/python/test/conftest.py index dc76a2372..5f01f3a09 100644 --- a/sdk/python/test/conftest.py +++ b/sdk/python/test/conftest.py @@ -73,7 +73,6 @@ def get_test_config() -> Configuration: model_cache_dir=get_test_data_shared_path(), log_level=LogLevel.WARNING, logs_dir=str(repo_root / "sdk" / "python" / "logs"), - additional_settings={"Bootstrap": "false"}, ) diff --git a/sdk/rust/README.md b/sdk/rust/README.md index 058b8f721..852d5aba9 100644 --- a/sdk/rust/README.md +++ b/sdk/rust/README.md @@ -61,6 +61,8 @@ foundry-local-sdk = { version = "0.1", features = ["winml"] } > **Note:** The `winml` feature is only relevant on Windows. On macOS and Linux, the standard build is used regardless. No code changes are needed — your application code stays the same. +With `winml` enabled on Windows, the build downloads `Microsoft.Windows.AI.MachineLearning.dll` from the pinned `Microsoft.Windows.AI.MachineLearning` NuGet version. Set `FOUNDRY_LOCAL_WINDOWS_AI_MACHINELEARNING_VERSION` before `cargo build` to use a newer runtime DLL, or set `FOUNDRY_NATIVE_OVERRIDE_DIR` to a directory containing the DLL. + ### Explicit EP Management You can explicitly discover and download execution providers: diff --git a/sdk/rust/build.rs b/sdk/rust/build.rs index 9209032b6..c28d64c72 100644 --- a/sdk/rust/build.rs +++ b/sdk/rust/build.rs @@ -16,10 +16,11 @@ const FEEDS: &[&str] = &[ ]; /// Versions loaded from deps_versions.json (or deps_versions_winml.json). -/// Both files share the same key structure — the build script picks the +/// Both files share common keys — the build script picks the /// right file based on the winml cargo feature. struct DepsVersions { core: String, + winml_runtime: Option, ort: String, genai: String, } @@ -29,7 +30,7 @@ fn load_deps_versions() -> DepsVersions { let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_default(); let manifest_path = Path::new(&manifest_dir); - // Standard and WinML each have their own file with identical key structure. + // Standard and WinML each have their own versions file. let filename = if winml { "deps_versions_winml.json" } else { @@ -65,10 +66,20 @@ fn load_deps_versions() -> DepsVersions { .to_string() }; let flc = &val["foundry-local-core"]; + let winml_runtime = &val["windows-ai-machinelearning"]; let ort = &val["onnxruntime"]; let genai = &val["onnxruntime-genai"]; DepsVersions { core: s(flc, "nuget"), + winml_runtime: env::var("FOUNDRY_LOCAL_WINDOWS_AI_MACHINELEARNING_VERSION") + .ok() + .filter(|v| !v.trim().is_empty()) + .or_else(|| { + winml_runtime + .get("version") + .and_then(|v| v.as_str()) + .map(|v| v.to_string()) + }), ort: s(ort, "version"), genai: s(genai, "version"), } @@ -77,8 +88,14 @@ fn load_deps_versions() -> DepsVersions { struct NuGetPackage { name: &'static str, version: String, + expected_file: String, + include_files: &'static [&'static str], + always_extract: bool, } +const ALL_NATIVE_FILES: &[&str] = &[]; +const WINML_RUNTIME_FILES: &[&str] = &["Microsoft.Windows.AI.MachineLearning.dll"]; + fn get_rid() -> Option<&'static str> { let os = env::consts::OS; let arch = env::consts::ARCH; @@ -100,10 +117,25 @@ fn native_lib_extension() -> &'static str { } } +fn native_lib_prefix() -> &'static str { + if env::consts::OS == "windows" { + "" + } else { + "lib" + } +} + fn get_packages(rid: &str) -> Vec { let winml = env::var("CARGO_FEATURE_WINML").is_ok(); let is_linux = rid.starts_with("linux"); let deps = load_deps_versions(); + let ext = native_lib_extension(); + let prefix = native_lib_prefix(); + + let core_file = format!("Microsoft.AI.Foundry.Local.Core.{ext}"); + let ort_file = format!("{prefix}onnxruntime.{ext}"); + let genai_file = format!("{prefix}onnxruntime-genai.{ext}"); + let winml_runtime_file = "Microsoft.Windows.AI.MachineLearning.dll".to_string(); // Use pinned versions directly — dynamic resolution via resolve_latest_version // is unreliable (feed returns versions in unexpected order, and some old versions @@ -115,36 +147,70 @@ fn get_packages(rid: &str) -> Vec { packages.push(NuGetPackage { name: "Microsoft.AI.Foundry.Local.Core.WinML", version: deps.core.clone(), + expected_file: core_file.clone(), + include_files: ALL_NATIVE_FILES, + always_extract: false, }); packages.push(NuGetPackage { name: "Microsoft.ML.OnnxRuntime.Foundry", version: deps.ort.clone(), + expected_file: ort_file.clone(), + include_files: ALL_NATIVE_FILES, + always_extract: false, }); packages.push(NuGetPackage { name: "Microsoft.ML.OnnxRuntimeGenAI.Foundry", version: deps.genai.clone(), + expected_file: genai_file.clone(), + include_files: ALL_NATIVE_FILES, + always_extract: false, }); + if rid.starts_with("win-") { + let winml_runtime = deps + .winml_runtime + .clone() + .expect("deps_versions_winml.json is missing windows-ai-machinelearning.version"); + packages.push(NuGetPackage { + name: "Microsoft.Windows.AI.MachineLearning", + version: winml_runtime, + expected_file: winml_runtime_file, + include_files: WINML_RUNTIME_FILES, + always_extract: true, + }); + } } else { packages.push(NuGetPackage { name: "Microsoft.AI.Foundry.Local.Core", version: deps.core.clone(), + expected_file: core_file, + include_files: ALL_NATIVE_FILES, + always_extract: false, }); if is_linux { packages.push(NuGetPackage { name: "Microsoft.ML.OnnxRuntime.Gpu.Linux", version: deps.ort.clone(), + expected_file: ort_file.clone(), + include_files: ALL_NATIVE_FILES, + always_extract: false, }); } else { packages.push(NuGetPackage { name: "Microsoft.ML.OnnxRuntime.Foundry", version: deps.ort.clone(), + expected_file: ort_file.clone(), + include_files: ALL_NATIVE_FILES, + always_extract: false, }); } packages.push(NuGetPackage { name: "Microsoft.ML.OnnxRuntimeGenAI.Foundry", version: deps.genai.clone(), + expected_file: genai_file, + include_files: ALL_NATIVE_FILES, + always_extract: false, }); } @@ -242,7 +308,8 @@ fn try_download_from_feed( .map_err(|e| format!("Failed to read response body for {}: {e}", pkg.name))?; let ext = native_lib_extension(); - let prefix = format!("runtimes/{rid}/native/"); + let native_prefix = format!("runtimes/{rid}/native/"); + let runtime_prefix = format!("runtimes/{rid}/"); let cursor = io::Cursor::new(&bytes); let mut archive = zip::ZipArchive::new(cursor) @@ -255,10 +322,15 @@ fn try_download_from_feed( .map_err(|e| format!("Failed to read zip entry: {e}"))?; let name = file.name().to_string(); - if !name.starts_with(&prefix) { + if !name.ends_with(&format!(".{ext}")) { continue; } - if !name.ends_with(&format!(".{ext}")) { + + let direct_runtime_file = name + .strip_prefix(&runtime_prefix) + .map(|relative| !relative.is_empty() && !relative.contains('/')) + .unwrap_or(false); + if !name.starts_with(&native_prefix) && !direct_runtime_file { continue; } @@ -271,6 +343,15 @@ fn try_download_from_feed( continue; } + if !pkg.include_files.is_empty() + && !pkg + .include_files + .iter() + .any(|included| file_name.eq_ignore_ascii_case(included)) + { + continue; + } + let dest = out_dir.join(&file_name); let mut out_file = fs::File::create(&dest) .map_err(|e| format!("Failed to create {}: {e}", dest.display()))?; @@ -297,22 +378,7 @@ fn try_download_from_feed( fn download_and_extract(pkg: &NuGetPackage, rid: &str, out_dir: &Path) -> Result<(), String> { // Skip if this package's main native library is already in out_dir // (e.g. pre-populated from FOUNDRY_NATIVE_OVERRIDE_DIR). - let ext = native_lib_extension(); - let prefix = if env::consts::OS == "windows" { - "" - } else { - "lib" - }; - let expected_file = if pkg.name.contains("Foundry.Local.Core") { - format!("Microsoft.AI.Foundry.Local.Core.{ext}") - } else if pkg.name.contains("OnnxRuntimeGenAI") { - format!("{prefix}onnxruntime-genai.{ext}") - } else if pkg.name.contains("OnnxRuntime") { - format!("{prefix}onnxruntime.{ext}") - } else { - String::new() - }; - if !expected_file.is_empty() && out_dir.join(&expected_file).exists() { + if !pkg.always_extract && out_dir.join(&pkg.expected_file).exists() { println!( "cargo:warning={} already present, skipping download.", pkg.name @@ -343,24 +409,35 @@ fn download_and_extract(pkg: &NuGetPackage, rid: &str, out_dir: &Path) -> Result } /// Check whether all required native libraries are already present in `out_dir`. -fn libs_already_present(out_dir: &Path) -> bool { - let ext = native_lib_extension(); - let prefix = if env::consts::OS == "windows" { - "" - } else { - "lib" - }; - let required = [ - format!("Microsoft.AI.Foundry.Local.Core.{ext}"), - format!("{prefix}onnxruntime.{ext}"), - format!("{prefix}onnxruntime-genai.{ext}"), - ]; - required.iter().all(|f| out_dir.join(f).exists()) +fn libs_already_present(packages: &[NuGetPackage], out_dir: &Path) -> bool { + packages + .iter() + .all(|pkg| out_dir.join(&pkg.expected_file).exists()) +} + +fn remove_unneeded_winml_runtime_files(out_dir: &Path) { + if env::var("CARGO_FEATURE_WINML").is_err() || env::consts::OS != "windows" { + return; + } + + let directml = out_dir.join("DirectML.dll"); + if directml.exists() { + match fs::remove_file(&directml) { + Ok(()) => println!("cargo:warning=Removed unneeded DirectML.dll from OUT_DIR"), + // Best-effort cleanup: a locked DirectML.dll (common on Windows when a + // previous build's process still holds a handle) shouldn't fail the + // entire build script, since the DLL is unused under WinML 2.0. + Err(e) => println!( + "cargo:warning=Could not remove unneeded DirectML.dll from OUT_DIR: {e}" + ), + } + } } fn main() { println!("cargo:rerun-if-changed=build.rs"); println!("cargo:rerun-if-env-changed=FOUNDRY_NATIVE_OVERRIDE_DIR"); + println!("cargo:rerun-if-env-changed=FOUNDRY_LOCAL_WINDOWS_AI_MACHINELEARNING_VERSION"); println!("cargo:rerun-if-env-changed=CARGO_FEATURE_WINML"); let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set")); @@ -377,7 +454,7 @@ fn main() { } }; - // If FOUNDRY_NATIVE_OVERRIDE_DIR is set (e.g. by CI), copy all native + // If FOUNDRY_NATIVE_OVERRIDE_DIR is set (e.g. by CI), copy native // libraries from that directory into OUT_DIR. This pre-populates FLC Core // binaries that aren't published to a feed yet. The download loop below // will then only fetch packages whose files are still missing (ORT, GenAI). @@ -389,6 +466,18 @@ fn main() { let path = entry.expect("Failed to read dir entry").path(); if path.extension().and_then(|e| e.to_str()) == Some(ext) { let dest = out_dir.join(path.file_name().unwrap()); + if env::var("CARGO_FEATURE_WINML").is_ok() + && env::consts::OS == "windows" + && path + .file_name() + .and_then(|n| n.to_str()) + .map(|n| n.eq_ignore_ascii_case("DirectML.dll")) + .unwrap_or(false) + { + println!("cargo:warning=Skipped unneeded DirectML.dll from override dir"); + continue; + } + fs::copy(&path, &dest).expect("Failed to copy native lib from override dir"); println!( "cargo:warning=Copied {} from override dir", @@ -399,8 +488,12 @@ fn main() { } } - // Skip all downloads if every required library is already present - if libs_already_present(&out_dir) { + let packages = get_packages(rid); + let packages_require_extraction = packages.iter().any(|pkg| pkg.always_extract); + + // Skip all downloads if every required library is already present. + // WinML packages that overwrite stale runtime files still need to run. + if !packages_require_extraction && libs_already_present(&packages, &out_dir) { println!("cargo:warning=Native libraries already present in OUT_DIR, skipping download."); println!("cargo:rustc-link-search=native={}", out_dir.display()); println!("cargo:rustc-env=FOUNDRY_NATIVE_DIR={}", out_dir.display()); @@ -409,8 +502,6 @@ fn main() { return; } - let packages = get_packages(rid); - let mut download_failed = false; for pkg in &packages { if let Err(e) = download_and_extract(pkg, rid, &out_dir) { @@ -419,7 +510,9 @@ fn main() { } } - if download_failed && !libs_already_present(&out_dir) { + remove_unneeded_winml_runtime_files(&out_dir); + + if download_failed && !libs_already_present(&packages, &out_dir) { panic!( "One or more native library downloads failed and required libraries are missing. \ You can manually place native libraries in the output directory: {}", diff --git a/sdk/rust/src/configuration.rs b/sdk/rust/src/configuration.rs index c1ec2964b..fc261d0a9 100644 --- a/sdk/rust/src/configuration.rs +++ b/sdk/rust/src/configuration.rs @@ -227,11 +227,8 @@ mod tests { #[test] fn builder_additional_settings() { - let cfg = FoundryLocalConfig::new("App") - .additional_setting("Bootstrap", "false") - .additional_setting("Foo", "bar"); + let cfg = FoundryLocalConfig::new("App").additional_setting("Foo", "bar"); let (c, _) = Configuration::new(cfg).unwrap(); - assert_eq!(c.params["Bootstrap"], "false"); assert_eq!(c.params["Foo"], "bar"); } } diff --git a/sdk/rust/src/detail/core_interop.rs b/sdk/rust/src/detail/core_interop.rs index 5120fa2ef..bd456f378 100644 --- a/sdk/rust/src/detail/core_interop.rs +++ b/sdk/rust/src/detail/core_interop.rs @@ -318,21 +318,6 @@ impl CoreInterop { pub fn new(config: &mut Configuration) -> Result { let lib_path = Self::resolve_library_path(config)?; - // Auto-detect WinAppSDK Bootstrap DLL next to the core library. - // If present, tell the native core to run the bootstrapper during - // initialisation — this is required for WinML execution providers. - #[cfg(target_os = "windows")] - if !config.params.contains_key("Bootstrap") { - if let Some(dir) = lib_path.parent() { - if dir - .join("Microsoft.WindowsAppRuntime.Bootstrap.dll") - .exists() - { - config.params.insert("Bootstrap".into(), "true".into()); - } - } - } - #[cfg(target_os = "windows")] let _dependency_libs = Self::load_windows_dependencies(&lib_path)?; @@ -787,16 +772,6 @@ impl CoreInterop { let mut libs = Vec::new(); - // Load WinML bootstrap if present. - let bootstrap = dir.join("Microsoft.WindowsAppRuntime.Bootstrap.dll"); - if bootstrap.exists() { - // SAFETY: Pre-loading a known dependency DLL from the same trusted - // directory as the core library. - if let Ok(lib) = unsafe { Library::new(&bootstrap) } { - libs.push(lib); - } - } - for dep in &["onnxruntime.dll", "onnxruntime-genai.dll"] { let dep_path = dir.join(dep); if dep_path.exists() { diff --git a/sdk/rust/src/types.rs b/sdk/rust/src/types.rs index 28b37ed24..f39109adf 100644 --- a/sdk/rust/src/types.rs +++ b/sdk/rust/src/types.rs @@ -126,7 +126,7 @@ pub enum ChatToolChoice { Function(String), } -/// Information about an available execution provider bootstrapper. +/// Information about an available execution provider. #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "PascalCase")] pub struct EpInfo { diff --git a/sdk/rust/tests/integration/common/mod.rs b/sdk/rust/tests/integration/common/mod.rs index 4e65e4eaf..1f319b4b0 100644 --- a/sdk/rust/tests/integration/common/mod.rs +++ b/sdk/rust/tests/integration/common/mod.rs @@ -79,7 +79,6 @@ pub fn get_audio_file_path() -> PathBuf { /// * `modelCacheDir` → `/../test-data-shared` /// * `logsDir` → `/sdk/rust/logs` /// * `logLevel` → `Warn` -/// * `Bootstrap` → `false` (via additional settings) pub fn test_config() -> FoundryLocalConfig { let repo_root = get_git_repo_root(); let logs_dir = repo_root.join("sdk").join("rust").join("logs"); @@ -88,7 +87,6 @@ pub fn test_config() -> FoundryLocalConfig { .model_cache_dir(get_test_data_shared_path().to_string_lossy().into_owned()) .logs_dir(logs_dir.to_string_lossy().into_owned()) .log_level(LogLevel::Warn) - .additional_setting("Bootstrap", "false") } /// Create (or return the cached) [`FoundryLocalManager`] for tests.