Skip to content

Commit

Permalink
Add chat example for csharp (#1266)
Browse files Browse the repository at this point in the history
Add chat example for csharp
  • Loading branch information
ajindal1 authored Feb 21, 2025
1 parent 8301e2c commit 20f907e
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 37 deletions.
128 changes: 92 additions & 36 deletions examples/csharp/HelloPhi/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,54 +83,109 @@ void PrintUsage()
if (interactive)
{
Console.WriteLine("Please enter option number:");
Console.WriteLine("1. Complete Output");
Console.WriteLine("2. Streaming Output");
Console.WriteLine("1. Complete Q&A");
Console.WriteLine("2. Streaming Q&A");
Console.WriteLine("3. Streaming Chat (not supported for DirectML and QNN currently)");
int.TryParse(Console.ReadLine(), out option);
}

do
int minLength = 50;
int maxLength = 500;

static string GetPrompt(bool interactive)
{
string prompt = "def is_prime(num):"; // Example prompt
if (interactive)
{
Console.WriteLine("Prompt:");
Console.WriteLine("Prompt: (Use quit() to exit)");
prompt = Console.ReadLine();
}
if (string.IsNullOrEmpty(prompt))
{
continue;
}
var sequences = tokenizer.Encode($"<|user|>{prompt}<|end|><|assistant|>");
return prompt;
}

using GeneratorParams generatorParams = new GeneratorParams(model);
generatorParams.SetSearchOption("min_length", 50);
generatorParams.SetSearchOption("max_length", 200);
if (option == 1) // Complete Output
if (option == 1 || option == 2)
{
do
{
using var generator = new Generator(model, generatorParams);
generator.AppendTokenSequences(sequences);
var watch = System.Diagnostics.Stopwatch.StartNew();
while (!generator.IsDone())
string prompt = GetPrompt(interactive);
if (string.IsNullOrEmpty(prompt))
{
generator.GenerateNextToken();
continue;
}
if (string.Compare(prompt, "quit()", StringComparison.OrdinalIgnoreCase) == 0)
{
break;
}
var sequences = tokenizer.Encode($"<|user|>{prompt}<|end|><|assistant|>");

var outputSequence = generator.GetSequence(0);
var outputString = tokenizer.Decode(outputSequence);
watch.Stop();
var runTimeInSeconds = watch.Elapsed.TotalSeconds;
Console.WriteLine("Output:");
Console.WriteLine(outputString);
var totalTokens = outputSequence.Length;
Console.WriteLine($"Tokens: {totalTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalTokens / runTimeInSeconds:0.00}");
}
if (option == 1) // Complete Output
{
using GeneratorParams generatorParams = new GeneratorParams(model);
generatorParams.SetSearchOption("min_length", minLength);
generatorParams.SetSearchOption("max_length", maxLength);
using var generator = new Generator(model, generatorParams);
generator.AppendTokenSequences(sequences);
var watch = System.Diagnostics.Stopwatch.StartNew();
while (!generator.IsDone())
{
generator.GenerateNextToken();
}

else if (option == 2) //Streaming Output
{
using var tokenizerStream = tokenizer.CreateStream();
using var generator = new Generator(model, generatorParams);
generator.AppendTokenSequences(sequences);
var outputSequence = generator.GetSequence(0);
var outputString = tokenizer.Decode(outputSequence);
watch.Stop();
var runTimeInSeconds = watch.Elapsed.TotalSeconds;
Console.WriteLine("Output:");
Console.WriteLine(outputString);
var totalTokens = outputSequence.Length;
Console.WriteLine($"Tokens: {totalTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalTokens / runTimeInSeconds:0.00}");
}

else if (option == 2) //Streaming Output
{
using GeneratorParams generatorParams = new GeneratorParams(model);
generatorParams.SetSearchOption("min_length", minLength);
generatorParams.SetSearchOption("max_length", maxLength);
using var tokenizerStream = tokenizer.CreateStream();
using var generator = new Generator(model, generatorParams);
generator.AppendTokenSequences(sequences);
var watch = System.Diagnostics.Stopwatch.StartNew();
while (!generator.IsDone())
{
generator.GenerateNextToken();
Console.Write(tokenizerStream.Decode(generator.GetSequence(0)[^1]));
}
Console.WriteLine();
watch.Stop();
var runTimeInSeconds = watch.Elapsed.TotalSeconds;
var outputSequence = generator.GetSequence(0);
var totalTokens = outputSequence.Length;
Console.WriteLine($"Streaming Tokens: {totalTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalTokens / runTimeInSeconds:0.00}");
}
} while (interactive);
}

if (option == 3) // Streaming Chat
{
using GeneratorParams generatorParams = new GeneratorParams(model);
generatorParams.SetSearchOption("min_length", minLength);
generatorParams.SetSearchOption("max_length", maxLength);
using var tokenizerStream = tokenizer.CreateStream();
using var generator = new Generator(model, generatorParams);
var prevTotalTokens = 0;
do{
string prompt = GetPrompt(interactive);
if (string.IsNullOrEmpty(prompt))
{
continue;
}
if (string.Compare(prompt, "quit()", StringComparison.OrdinalIgnoreCase) == 0)
{
break;
}
var sequences = tokenizer.Encode($"<|user|>{prompt}<|end|><|assistant|>");
var watch = System.Diagnostics.Stopwatch.StartNew();
generator.AppendTokenSequences(sequences);
while (!generator.IsDone())
{
generator.GenerateNextToken();
Expand All @@ -140,7 +195,8 @@ void PrintUsage()
watch.Stop();
var runTimeInSeconds = watch.Elapsed.TotalSeconds;
var outputSequence = generator.GetSequence(0);
var totalTokens = outputSequence.Length;
Console.WriteLine($"Streaming Tokens: {totalTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalTokens / runTimeInSeconds:0.00}");
}
} while (interactive);
var totalNewTokens = outputSequence.Length - prevTotalTokens;
prevTotalTokens = totalNewTokens;
Console.WriteLine($"Streaming Tokens: {totalNewTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalNewTokens / runTimeInSeconds:0.00}");
} while (interactive);
}
2 changes: 1 addition & 1 deletion examples/csharp/HelloPhi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
You can download a published model from Hugging Face. For example, this is Phi-3.5 mini optimized for CPU and mobile. You can find other models here:

```script
huggingface-cli download microsoft/Phi-3.5-mini-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir models
huggingface-cli download microsoft/Phi-3.5-mini-instruct-onnx --include cpu_and_mobile/cpu-int4-awq-block-128-acc-level-4/* --local-dir models
move models\cpu_and_mobile\cpu-int4-rtn-block-32-acc-level-4 models\phi-3
```

Expand Down

0 comments on commit 20f907e

Please sign in to comment.