From 20f907ee6b940aa0a2ec135e36cc86814ad494d4 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Fri, 21 Feb 2025 10:30:33 -0800 Subject: [PATCH] Add chat example for csharp (#1266) Add chat example for csharp --- examples/csharp/HelloPhi/Program.cs | 128 ++++++++++++++++++++-------- examples/csharp/HelloPhi/README.md | 2 +- 2 files changed, 93 insertions(+), 37 deletions(-) diff --git a/examples/csharp/HelloPhi/Program.cs b/examples/csharp/HelloPhi/Program.cs index e1c7381a3..dff1db670 100644 --- a/examples/csharp/HelloPhi/Program.cs +++ b/examples/csharp/HelloPhi/Program.cs @@ -83,54 +83,109 @@ void PrintUsage() if (interactive) { Console.WriteLine("Please enter option number:"); - Console.WriteLine("1. Complete Output"); - Console.WriteLine("2. Streaming Output"); + Console.WriteLine("1. Complete Q&A"); + Console.WriteLine("2. Streaming Q&A"); + Console.WriteLine("3. Streaming Chat (not supported for DirectML and QNN currently)"); int.TryParse(Console.ReadLine(), out option); } -do +int minLength = 50; +int maxLength = 500; + +static string GetPrompt(bool interactive) { string prompt = "def is_prime(num):"; // Example prompt if (interactive) { - Console.WriteLine("Prompt:"); + Console.WriteLine("Prompt: (Use quit() to exit)"); prompt = Console.ReadLine(); } - if (string.IsNullOrEmpty(prompt)) - { - continue; - } - var sequences = tokenizer.Encode($"<|user|>{prompt}<|end|><|assistant|>"); + return prompt; +} - using GeneratorParams generatorParams = new GeneratorParams(model); - generatorParams.SetSearchOption("min_length", 50); - generatorParams.SetSearchOption("max_length", 200); - if (option == 1) // Complete Output +if (option == 1 || option == 2) +{ + do { - using var generator = new Generator(model, generatorParams); - generator.AppendTokenSequences(sequences); - var watch = System.Diagnostics.Stopwatch.StartNew(); - while (!generator.IsDone()) + string prompt = GetPrompt(interactive); + if (string.IsNullOrEmpty(prompt)) { - generator.GenerateNextToken(); + continue; + } + if (string.Compare(prompt, "quit()", StringComparison.OrdinalIgnoreCase) == 0) + { + break; } + var sequences = tokenizer.Encode($"<|user|>{prompt}<|end|><|assistant|>"); - var outputSequence = generator.GetSequence(0); - var outputString = tokenizer.Decode(outputSequence); - watch.Stop(); - var runTimeInSeconds = watch.Elapsed.TotalSeconds; - Console.WriteLine("Output:"); - Console.WriteLine(outputString); - var totalTokens = outputSequence.Length; - Console.WriteLine($"Tokens: {totalTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalTokens / runTimeInSeconds:0.00}"); - } + if (option == 1) // Complete Output + { + using GeneratorParams generatorParams = new GeneratorParams(model); + generatorParams.SetSearchOption("min_length", minLength); + generatorParams.SetSearchOption("max_length", maxLength); + using var generator = new Generator(model, generatorParams); + generator.AppendTokenSequences(sequences); + var watch = System.Diagnostics.Stopwatch.StartNew(); + while (!generator.IsDone()) + { + generator.GenerateNextToken(); + } - else if (option == 2) //Streaming Output - { - using var tokenizerStream = tokenizer.CreateStream(); - using var generator = new Generator(model, generatorParams); - generator.AppendTokenSequences(sequences); + var outputSequence = generator.GetSequence(0); + var outputString = tokenizer.Decode(outputSequence); + watch.Stop(); + var runTimeInSeconds = watch.Elapsed.TotalSeconds; + Console.WriteLine("Output:"); + Console.WriteLine(outputString); + var totalTokens = outputSequence.Length; + Console.WriteLine($"Tokens: {totalTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalTokens / runTimeInSeconds:0.00}"); + } + + else if (option == 2) //Streaming Output + { + using GeneratorParams generatorParams = new GeneratorParams(model); + generatorParams.SetSearchOption("min_length", minLength); + generatorParams.SetSearchOption("max_length", maxLength); + using var tokenizerStream = tokenizer.CreateStream(); + using var generator = new Generator(model, generatorParams); + generator.AppendTokenSequences(sequences); + var watch = System.Diagnostics.Stopwatch.StartNew(); + while (!generator.IsDone()) + { + generator.GenerateNextToken(); + Console.Write(tokenizerStream.Decode(generator.GetSequence(0)[^1])); + } + Console.WriteLine(); + watch.Stop(); + var runTimeInSeconds = watch.Elapsed.TotalSeconds; + var outputSequence = generator.GetSequence(0); + var totalTokens = outputSequence.Length; + Console.WriteLine($"Streaming Tokens: {totalTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalTokens / runTimeInSeconds:0.00}"); + } + } while (interactive); +} + +if (option == 3) // Streaming Chat +{ + using GeneratorParams generatorParams = new GeneratorParams(model); + generatorParams.SetSearchOption("min_length", minLength); + generatorParams.SetSearchOption("max_length", maxLength); + using var tokenizerStream = tokenizer.CreateStream(); + using var generator = new Generator(model, generatorParams); + var prevTotalTokens = 0; + do{ + string prompt = GetPrompt(interactive); + if (string.IsNullOrEmpty(prompt)) + { + continue; + } + if (string.Compare(prompt, "quit()", StringComparison.OrdinalIgnoreCase) == 0) + { + break; + } + var sequences = tokenizer.Encode($"<|user|>{prompt}<|end|><|assistant|>"); var watch = System.Diagnostics.Stopwatch.StartNew(); + generator.AppendTokenSequences(sequences); while (!generator.IsDone()) { generator.GenerateNextToken(); @@ -140,7 +195,8 @@ void PrintUsage() watch.Stop(); var runTimeInSeconds = watch.Elapsed.TotalSeconds; var outputSequence = generator.GetSequence(0); - var totalTokens = outputSequence.Length; - Console.WriteLine($"Streaming Tokens: {totalTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalTokens / runTimeInSeconds:0.00}"); - } -} while (interactive); + var totalNewTokens = outputSequence.Length - prevTotalTokens; + prevTotalTokens = totalNewTokens; + Console.WriteLine($"Streaming Tokens: {totalNewTokens} Time: {runTimeInSeconds:0.00} Tokens per second: {totalNewTokens / runTimeInSeconds:0.00}"); + } while (interactive); +} diff --git a/examples/csharp/HelloPhi/README.md b/examples/csharp/HelloPhi/README.md index 7598a3b87..7d54c0bf0 100644 --- a/examples/csharp/HelloPhi/README.md +++ b/examples/csharp/HelloPhi/README.md @@ -5,7 +5,7 @@ You can download a published model from Hugging Face. For example, this is Phi-3.5 mini optimized for CPU and mobile. You can find other models here: ```script -huggingface-cli download microsoft/Phi-3.5-mini-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir models +huggingface-cli download microsoft/Phi-3.5-mini-instruct-onnx --include cpu_and_mobile/cpu-int4-awq-block-128-acc-level-4/* --local-dir models move models\cpu_and_mobile\cpu-int4-rtn-block-32-acc-level-4 models\phi-3 ```