Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add chat example for csharp #1266

Merged
merged 3 commits into from
Feb 21, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 87 additions & 39 deletions examples/csharp/HelloPhi/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,64 +83,112 @@ void PrintUsage()
if (interactive)
{
Console.WriteLine("Please enter option number:");
Console.WriteLine("1. Complete Output");
Console.WriteLine("2. Streaming Output");
Console.WriteLine("1. Complete Q&A");
Console.WriteLine("2. Streaming Q&A");
Console.WriteLine("3. Streaming Chat (not supported for DirectML and QNN currently)");
int.TryParse(Console.ReadLine(), out option);
}

do
int minLength = 50;
int maxLength = 500;

static string GetPrompt(bool interactive)
{
string prompt = "def is_prime(num):"; // Example prompt
if (interactive)
{
Console.WriteLine("Prompt:");
Console.WriteLine("Prompt: (Use quit() to exit)");
prompt = Console.ReadLine();
}
if (string.IsNullOrEmpty(prompt))
{
continue;
}
var sequences = tokenizer.Encode($"<|user|>{prompt}<|end|><|assistant|>");
return prompt;
}

using GeneratorParams generatorParams = new GeneratorParams(model);
generatorParams.SetSearchOption("min_length", 50);
generatorParams.SetSearchOption("max_length", 200);
if (option == 1) // Complete Output
if (option == 1 || option == 2)
{
do
{
using var generator = new Generator(model, generatorParams);
generator.AppendTokenSequences(sequences);
var watch = System.Diagnostics.Stopwatch.StartNew();
while (!generator.IsDone())
string prompt = GetPrompt(interactive);
if (string.IsNullOrEmpty(prompt))
{
generator.GenerateNextToken();
continue;
}
if (string.Compare(prompt, "quit()", StringComparison.OrdinalIgnoreCase) == 0)
{
break;
}
var sequences = tokenizer.Encode($"<|user|>{prompt}<|end|><|assistant|>");

var outputSequence = generator.GetSequence(0);
var outputString = tokenizer.Decode(outputSequence);
watch.Stop();
var runTimeInSeconds = watch.Elapsed.TotalSeconds;
Console.WriteLine("Output:");
Console.WriteLine(outputString);
var totalTokens = outputSequence.Length;
Console.WriteLine($"Tokens: {totalTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalTokens / runTimeInSeconds:0.00}");
}
if (option == 1) // Complete Output
{
using GeneratorParams generatorParams = new GeneratorParams(model);
generatorParams.SetSearchOption("min_length", minLength);
generatorParams.SetSearchOption("max_length", maxLength);
using var generator = new Generator(model, generatorParams);
generator.AppendTokenSequences(sequences);
var watch = System.Diagnostics.Stopwatch.StartNew();
while (!generator.IsDone())
{
generator.GenerateNextToken();
}

else if (option == 2) //Streaming Output
{
using var tokenizerStream = tokenizer.CreateStream();
using var generator = new Generator(model, generatorParams);
var outputSequence = generator.GetSequence(0);
var outputString = tokenizer.Decode(outputSequence);
watch.Stop();
var runTimeInSeconds = watch.Elapsed.TotalSeconds;
Console.WriteLine("Output:");
Console.WriteLine(outputString);
var totalTokens = outputSequence.Length;
Console.WriteLine($"Tokens: {totalTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalTokens / runTimeInSeconds:0.00}");
}

else if (option == 2) //Streaming Output
{
using GeneratorParams generatorParams = new GeneratorParams(model);
generatorParams.SetSearchOption("min_length", minLength);
generatorParams.SetSearchOption("max_length", maxLength);
using var tokenizerStream = tokenizer.CreateStream();
using var generator = new Generator(model, generatorParams);
generator.AppendTokenSequences(sequences);
var watch = System.Diagnostics.Stopwatch.StartNew();
while (!generator.IsDone())
{
generator.GenerateNextToken();
Console.Write(tokenizerStream.Decode(generator.GetSequence(0)[^1]));
}
Console.WriteLine();
watch.Stop();
var runTimeInSeconds = watch.Elapsed.TotalSeconds;
var outputSequence = generator.GetSequence(0);
var totalTokens = outputSequence.Length;
Console.WriteLine($"Streaming Tokens: {totalTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalTokens / runTimeInSeconds:0.00}");
}
} while (interactive);
}

if (option == 3) // Streaming Chat
{
using GeneratorParams generatorParams = new GeneratorParams(model);
generatorParams.SetSearchOption("min_length", minLength);
generatorParams.SetSearchOption("max_length", maxLength);
using var tokenizerStream = tokenizer.CreateStream();
using var generator = new Generator(model, generatorParams);
do{
string prompt = GetPrompt(interactive);
if (string.IsNullOrEmpty(prompt))
{
continue;
}
if (string.Compare(prompt, "quit()", StringComparison.OrdinalIgnoreCase) == 0)
{
break;
}
var sequences = tokenizer.Encode($"<|user|>{prompt}<|end|><|assistant|>");
generator.AppendTokenSequences(sequences);
var watch = System.Diagnostics.Stopwatch.StartNew();
while (!generator.IsDone())
{
generator.GenerateNextToken();
Console.Write(tokenizerStream.Decode(generator.GetSequence(0)[^1]));
}
Console.WriteLine();
watch.Stop();
var runTimeInSeconds = watch.Elapsed.TotalSeconds;
var outputSequence = generator.GetSequence(0);
var totalTokens = outputSequence.Length;
Console.WriteLine($"Streaming Tokens: {totalTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalTokens / runTimeInSeconds:0.00}");
}
} while (interactive);
} while (interactive);
}
2 changes: 1 addition & 1 deletion examples/csharp/HelloPhi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
You can download a published model from Hugging Face. For example, this is Phi-3.5 mini optimized for CPU and mobile. You can find other models here:

```script
huggingface-cli download microsoft/Phi-3.5-mini-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir models
huggingface-cli download microsoft/Phi-3.5-mini-instruct-onnx --include cpu_and_mobile/cpu-int4-awq-block-128-acc-level-4/* --local-dir models
move models\cpu_and_mobile\cpu-int4-rtn-block-32-acc-level-4 models\phi-3
```

Expand Down
Loading