Skip to content

Commit

Permalink
Merge branch 'main' into add-slm-engine-example
Browse files Browse the repository at this point in the history
  • Loading branch information
Avijit committed Feb 21, 2025
2 parents 361099c + 20f907e commit aafc01c
Show file tree
Hide file tree
Showing 31 changed files with 206 additions and 478 deletions.
4 changes: 0 additions & 4 deletions .github/workflows/linux-cpu-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,6 @@ jobs:
python3 -m pip install -r test/python/cpu/ort/requirements.txt --user
python3 -m pip install --user --no-index --no-deps --find-links build/cpu/wheel onnxruntime_genai
- name: Use Dummy HuggingFace Token
run: |
echo "HF_TOKEN=12345" >> $GITHUB_ENV
- name: Verify Build Artifacts
if: always()
continue-on-error: true
Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/linux-cpu-x64-nightly-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,6 @@ jobs:
python3 -m pip install -r test/python/cpu/ort/requirements.txt --user
python3 -m pip install build/cpu/wheel/onnxruntime_genai*.whl --no-deps
- name: Use Dummy HuggingFace Token
run: |
echo "HF_TOKEN=12345" >> $GITHUB_ENV
- name: Run the python tests
run: |
python3 test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models --e2e
Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/linux-gpu-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,6 @@ jobs:
bash -c " \
/usr/bin/cmake --build --preset linux_gcc_cuda_release"
- name: Use Dummy HuggingFace Token
run: |
echo "HF_TOKEN=12345" >> $GITHUB_ENV
- name: Install the onnxruntime-genai Python wheel and run python test
run: |
echo "Installing the onnxruntime-genai Python wheel and running the Python tests"
Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/win-cpu-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,6 @@ jobs:
python3 -m pip install -r test\python\cpu\ort\requirements.txt --user
python3 -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps
- name: Use Dummy HuggingFace Token
run: |
Add-Content -Path $env:GITHUB_ENV -Value "HF_TOKEN=12345"
- name: Run the Python Tests
run: |
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models"
Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/win-cuda-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,6 @@ jobs:
python -m pip install -r test\python\cuda\ort\requirements.txt
python -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps
- name: Use Dummy HuggingFace Token
run: |
Add-Content -Path $env:GITHUB_ENV -Value "HF_TOKEN=12345"
- name: Run the Python Tests
run: |
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models" --e2e
Expand Down
2 changes: 1 addition & 1 deletion VERSION_INFO
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.6.0-dev
0.7.0-dev
7 changes: 0 additions & 7 deletions examples/c/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,6 @@ git clone https://github.com/microsoft/onnxruntime-genai.git
cd onnxruntime-genai/examples/c
```

If they don't already exist, create folders called `include` and `lib`.

```bash
mkdir include
mkdir lib
```

## Phi-3.5 mini

### Download model
Expand Down
6 changes: 3 additions & 3 deletions examples/csharp/HelloPhi/HelloPhi.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI" Version="0.6.0-dev" Condition=" '$(Configuration)' == 'Debug' OR '$(Configuration)' == 'Release' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" Version="0.6.0-dev" Condition=" '$(Configuration)' == 'Debug_Cuda' OR '$(Configuration)' == 'Release_Cuda' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" Version="0.6.0-dev" Condition=" '$(Configuration)' == 'Debug_DirectML' OR '$(Configuration)' == 'Release_DirectML' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI" Version="0.6.0" Condition=" '$(Configuration)' == 'Debug' OR '$(Configuration)' == 'Release' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" Version="0.6.0" Condition=" '$(Configuration)' == 'Debug_Cuda' OR '$(Configuration)' == 'Release_Cuda' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" Version="0.6.0" Condition=" '$(Configuration)' == 'Debug_DirectML' OR '$(Configuration)' == 'Release_DirectML' " />
</ItemGroup>

<ItemGroup>
Expand Down
128 changes: 92 additions & 36 deletions examples/csharp/HelloPhi/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,54 +83,109 @@ void PrintUsage()
if (interactive)
{
Console.WriteLine("Please enter option number:");
Console.WriteLine("1. Complete Output");
Console.WriteLine("2. Streaming Output");
Console.WriteLine("1. Complete Q&A");
Console.WriteLine("2. Streaming Q&A");
Console.WriteLine("3. Streaming Chat (not supported for DirectML and QNN currently)");
int.TryParse(Console.ReadLine(), out option);
}

do
int minLength = 50;
int maxLength = 500;

static string GetPrompt(bool interactive)
{
string prompt = "def is_prime(num):"; // Example prompt
if (interactive)
{
Console.WriteLine("Prompt:");
Console.WriteLine("Prompt: (Use quit() to exit)");
prompt = Console.ReadLine();
}
if (string.IsNullOrEmpty(prompt))
{
continue;
}
var sequences = tokenizer.Encode($"<|user|>{prompt}<|end|><|assistant|>");
return prompt;
}

using GeneratorParams generatorParams = new GeneratorParams(model);
generatorParams.SetSearchOption("min_length", 50);
generatorParams.SetSearchOption("max_length", 200);
if (option == 1) // Complete Output
if (option == 1 || option == 2)
{
do
{
using var generator = new Generator(model, generatorParams);
generator.AppendTokenSequences(sequences);
var watch = System.Diagnostics.Stopwatch.StartNew();
while (!generator.IsDone())
string prompt = GetPrompt(interactive);
if (string.IsNullOrEmpty(prompt))
{
generator.GenerateNextToken();
continue;
}
if (string.Compare(prompt, "quit()", StringComparison.OrdinalIgnoreCase) == 0)
{
break;
}
var sequences = tokenizer.Encode($"<|user|>{prompt}<|end|><|assistant|>");

var outputSequence = generator.GetSequence(0);
var outputString = tokenizer.Decode(outputSequence);
watch.Stop();
var runTimeInSeconds = watch.Elapsed.TotalSeconds;
Console.WriteLine("Output:");
Console.WriteLine(outputString);
var totalTokens = outputSequence.Length;
Console.WriteLine($"Tokens: {totalTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalTokens / runTimeInSeconds:0.00}");
}
if (option == 1) // Complete Output
{
using GeneratorParams generatorParams = new GeneratorParams(model);
generatorParams.SetSearchOption("min_length", minLength);
generatorParams.SetSearchOption("max_length", maxLength);
using var generator = new Generator(model, generatorParams);
generator.AppendTokenSequences(sequences);
var watch = System.Diagnostics.Stopwatch.StartNew();
while (!generator.IsDone())
{
generator.GenerateNextToken();
}

else if (option == 2) //Streaming Output
{
using var tokenizerStream = tokenizer.CreateStream();
using var generator = new Generator(model, generatorParams);
generator.AppendTokenSequences(sequences);
var outputSequence = generator.GetSequence(0);
var outputString = tokenizer.Decode(outputSequence);
watch.Stop();
var runTimeInSeconds = watch.Elapsed.TotalSeconds;
Console.WriteLine("Output:");
Console.WriteLine(outputString);
var totalTokens = outputSequence.Length;
Console.WriteLine($"Tokens: {totalTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalTokens / runTimeInSeconds:0.00}");
}

else if (option == 2) //Streaming Output
{
using GeneratorParams generatorParams = new GeneratorParams(model);
generatorParams.SetSearchOption("min_length", minLength);
generatorParams.SetSearchOption("max_length", maxLength);
using var tokenizerStream = tokenizer.CreateStream();
using var generator = new Generator(model, generatorParams);
generator.AppendTokenSequences(sequences);
var watch = System.Diagnostics.Stopwatch.StartNew();
while (!generator.IsDone())
{
generator.GenerateNextToken();
Console.Write(tokenizerStream.Decode(generator.GetSequence(0)[^1]));
}
Console.WriteLine();
watch.Stop();
var runTimeInSeconds = watch.Elapsed.TotalSeconds;
var outputSequence = generator.GetSequence(0);
var totalTokens = outputSequence.Length;
Console.WriteLine($"Streaming Tokens: {totalTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalTokens / runTimeInSeconds:0.00}");
}
} while (interactive);
}

if (option == 3) // Streaming Chat
{
using GeneratorParams generatorParams = new GeneratorParams(model);
generatorParams.SetSearchOption("min_length", minLength);
generatorParams.SetSearchOption("max_length", maxLength);
using var tokenizerStream = tokenizer.CreateStream();
using var generator = new Generator(model, generatorParams);
var prevTotalTokens = 0;
do{
string prompt = GetPrompt(interactive);
if (string.IsNullOrEmpty(prompt))
{
continue;
}
if (string.Compare(prompt, "quit()", StringComparison.OrdinalIgnoreCase) == 0)
{
break;
}
var sequences = tokenizer.Encode($"<|user|>{prompt}<|end|><|assistant|>");
var watch = System.Diagnostics.Stopwatch.StartNew();
generator.AppendTokenSequences(sequences);
while (!generator.IsDone())
{
generator.GenerateNextToken();
Expand All @@ -140,7 +195,8 @@ void PrintUsage()
watch.Stop();
var runTimeInSeconds = watch.Elapsed.TotalSeconds;
var outputSequence = generator.GetSequence(0);
var totalTokens = outputSequence.Length;
Console.WriteLine($"Streaming Tokens: {totalTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalTokens / runTimeInSeconds:0.00}");
}
} while (interactive);
var totalNewTokens = outputSequence.Length - prevTotalTokens;
prevTotalTokens = totalNewTokens;
Console.WriteLine($"Streaming Tokens: {totalNewTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalNewTokens / runTimeInSeconds:0.00}");
} while (interactive);
}
2 changes: 1 addition & 1 deletion examples/csharp/HelloPhi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
You can download a published model from Hugging Face. For example, this is Phi-3.5 mini optimized for CPU and mobile. You can find other models here:

```script
huggingface-cli download microsoft/Phi-3.5-mini-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir models
huggingface-cli download microsoft/Phi-3.5-mini-instruct-onnx --include cpu_and_mobile/cpu-int4-awq-block-128-acc-level-4/* --local-dir models
move models\cpu_and_mobile\cpu-int4-rtn-block-32-acc-level-4 models\phi-3
```

Expand Down
6 changes: 3 additions & 3 deletions examples/csharp/HelloPhi3V/HelloPhi3V.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI" Version="0.6.0-dev" Condition=" '$(Configuration)' == 'Debug' OR '$(Configuration)' == 'Release' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" Version="0.6.0-dev" Condition=" '$(Configuration)' == 'Debug_Cuda' OR '$(Configuration)' == 'Release_Cuda' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" Version="0.6.0-dev" Condition=" '$(Configuration)' == 'Debug_DirectML' OR '$(Configuration)' == 'Release_DirectML' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI" Version="0.6.0" Condition=" '$(Configuration)' == 'Debug' OR '$(Configuration)' == 'Release' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" Version="0.6.0" Condition=" '$(Configuration)' == 'Debug_Cuda' OR '$(Configuration)' == 'Release_Cuda' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" Version="0.6.0" Condition=" '$(Configuration)' == 'Debug_DirectML' OR '$(Configuration)' == 'Release_DirectML' " />
</ItemGroup>

</Project>
31 changes: 13 additions & 18 deletions src/csharp/Adapters.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,39 +12,34 @@ namespace Microsoft.ML.OnnxRuntimeGenAI
public class Adapters : SafeHandle
{
/// <summary>
/// Constructs an Adapters object with the given model.
/// Creates a container for adapters
/// used to load, unload and hold them.
/// Throws on error.
/// </summary>
/// <param name="model">Reference to a loaded model</param>
/// <exception cref="OnnxRuntimeGenAIException">
/// Thrown when the call to the GenAI native API fails.
/// </exception>
/// <returns>new Adapters object</returns>
public Adapters(Model model) : base(IntPtr.Zero, true)
{
Result.VerifySuccess(NativeMethods.OgaCreateAdapters(model.Handle, out handle));
}

/// <summary>
/// Loads the model adapter from the given adapter file path and adapter name.
/// Method that loads adapter data and assigns it a nmae that
/// it can be referred to. Throws on error.
/// </summary>
/// <param name="adapterPath">The path of the adapter.</param>
/// <param name="adapterName">A unique user supplied adapter identifier.</param>
/// <exception cref="OnnxRuntimeGenAIException">
/// Thrown when the call to the GenAI native API fails.
/// </exception>
public void LoadAdapter(string adapterFilePath, string adapterName)
/// <param name="adapterPath">file path to load</param>
/// <param name="adapterName">adapter name</param>
public void LoadAdapter(string adapterPath, string adapterName)
{
Result.VerifySuccess(NativeMethods.OgaLoadAdapter(handle,
StringUtils.ToUtf8(adapterFilePath), StringUtils.ToUtf8(adapterName)));
StringUtils.ToUtf8(adapterPath), StringUtils.ToUtf8(adapterName)));
}

/// <summary>
/// Unloads the adapter with the given identifier from the previosly loaded adapters. If the
/// adapter is not found, or if it cannot be unloaded (when it is in use), an error is returned.
/// Unload the adatper that was loaded by the LoadAdapter method.
/// Throws on error.
/// </summary>
/// <param name="adapterName"></param>
/// <exception cref="OnnxRuntimeGenAIException">
/// Thrown when the call to the GenAI native API fails.
/// </exception>
public void UnloadAdapter(string adapterName)
{
Result.VerifySuccess(NativeMethods.OgaUnloadAdapter(handle, StringUtils.ToUtf8(adapterName)));
Expand All @@ -53,7 +48,7 @@ public void UnloadAdapter(string adapterName)
internal IntPtr Handle { get { return handle; } }

/// <summary>
/// Implement SafeHandle override.
/// Implement SafeHandle override
/// </summary>
public override bool IsInvalid => handle == IntPtr.Zero;

Expand Down
44 changes: 4 additions & 40 deletions src/csharp/Config.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,65 +5,29 @@

namespace Microsoft.ML.OnnxRuntimeGenAI
{
/// <summary>
/// Use Config to set the ORT execution providers (EPs) and their options. The EPs are applied based on
/// insertion order.
/// </summary>
public class Config : IDisposable
{
private IntPtr _configHandle;
private bool _disposed = false;

/// <summary>
/// Creates a Config from the given configuration directory.
/// </summary>
/// <param name="modelPath">The path to the configuration directory.</param>
/// <exception cref="OnnxRuntimeGenAIException">
/// Thrown when the call to the GenAI native API fails.
/// </exception>
public Config(string modelPath)
{
Result.VerifySuccess(NativeMethods.OgaCreateConfig(StringUtils.ToUtf8(modelPath), out _configHandle));
}

internal IntPtr Handle { get { return _configHandle; } }

/// <summary>
/// Clear the list of providers in the config.
/// </summary>
/// <exception cref="OnnxRuntimeGenAIException">
/// Thrown when the call to the GenAI native API fails.
/// </exception>
public void ClearProviders()
{
Result.VerifySuccess(NativeMethods.OgaConfigClearProviders(_configHandle));
}

/// <summary>
/// Add the provider at the end of the list of providers in the given config if it doesn't already
/// exist. If it already exists, does nothing.
/// </summary>
/// <param name="providerName">Name of the provider</param>
/// <exception cref="OnnxRuntimeGenAIException">
/// Thrown when the call to the GenAI native API fails.
/// </exception>
public void AppendProvider(string providerName)
public void AppendProvider(string provider)
{
Result.VerifySuccess(NativeMethods.OgaConfigAppendProvider(_configHandle, StringUtils.ToUtf8(providerName)));
Result.VerifySuccess(NativeMethods.OgaConfigAppendProvider(_configHandle, StringUtils.ToUtf8(provider)));
}

/// <summary>
/// Set a provider option.
/// </summary>
/// <param name="providerName">Name of the provider</param>
/// <param name="optionKey">Name of the option</param>
/// <param name="optionValue">Value of the option</param>
/// <exception cref="OnnxRuntimeGenAIException">
/// Thrown when the call to the GenAI native API fails.
/// </exception>
public void SetProviderOption(string providerName, string optionKey, string optionValue)
public void SetProviderOption(string provider, string option, string value)
{
Result.VerifySuccess(NativeMethods.OgaConfigSetProviderOption(_configHandle, StringUtils.ToUtf8(providerName), StringUtils.ToUtf8(optionKey), StringUtils.ToUtf8(optionValue)));
Result.VerifySuccess(NativeMethods.OgaConfigSetProviderOption(_configHandle, StringUtils.ToUtf8(provider), StringUtils.ToUtf8(option), StringUtils.ToUtf8(value)));
}

~Config()
Expand Down
Loading

0 comments on commit aafc01c

Please sign in to comment.