diff --git a/ClassTranscribeDatabase/Globals.cs b/ClassTranscribeDatabase/Globals.cs index 9480b41..01bad41 100644 --- a/ClassTranscribeDatabase/Globals.cs +++ b/ClassTranscribeDatabase/Globals.cs @@ -82,6 +82,12 @@ public class AppSettings public string DIGEST_CALCULATION_METHOD { get; set; } = ""; + public string LLAVA_PATH { get; set; } = "/llava/llava-v1.5-7b-q4.llamafile"; + public string LLAVA_ARGS { get; set; } = "--threads {cpuCount} -m llava-v1.5-7b-Q4_K.gguf --mmproj llava-v1.5-7b-mmproj-Q4_0.gguf --temp 0.0 --silent-prompt --image \"{imagePath}\" --escape -p \"{prompt}\""; + public string LLAVA_PROMPT { get; set; } = "### User: I am blind and listening to a university lecture video. What is in this image, that has been extracted from the lecture video? Be concise. Do your best to describe only the technical content of the image that is relevant to learning. Do not add opinions about the image.\n### Assistant:"; + + public string LLAVA_LOG_STREAMS { get; set; } = "out,err"; + } /// diff --git a/ClassTranscribeServer.sln b/ClassTranscribeServer.sln index e76893a..aa36b9d 100644 --- a/ClassTranscribeServer.sln +++ b/ClassTranscribeServer.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 16 -VisualStudioVersion = 16.0.28902.138 +# Visual Studio Version 17 +VisualStudioVersion = 17.8.34330.188 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ClassTranscribeServer", "ClassTranscribeServer\ClassTranscribeServer.csproj", "{E4C52518-A6B4-42B0-8A02-DC1BFBE9CD89}" EndProject @@ -25,6 +25,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UnitTests", "UnitTests\Unit EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TestAzureCognitiveServices", "TestAzureCognitiveServices\TestAzureCognitiveServices.csproj", "{DA560288-98FC-4233-8CD5-252F8570CBFB}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TestRemoteLLM", "TestRemoteLLM\TestRemoteLLM.csproj", "{9D9DB7A1-DBDA-49DF-9D31-D39846BABE30}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -55,6 +57,10 @@ Global {DA560288-98FC-4233-8CD5-252F8570CBFB}.Debug|Any CPU.Build.0 = Debug|Any CPU {DA560288-98FC-4233-8CD5-252F8570CBFB}.Release|Any CPU.ActiveCfg = Release|Any CPU {DA560288-98FC-4233-8CD5-252F8570CBFB}.Release|Any CPU.Build.0 = Release|Any CPU + {9D9DB7A1-DBDA-49DF-9D31-D39846BABE30}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {9D9DB7A1-DBDA-49DF-9D31-D39846BABE30}.Debug|Any CPU.Build.0 = Debug|Any CPU + {9D9DB7A1-DBDA-49DF-9D31-D39846BABE30}.Release|Any CPU.ActiveCfg = Release|Any CPU + {9D9DB7A1-DBDA-49DF-9D31-D39846BABE30}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/TaskEngine/TaskEngine.csproj b/TaskEngine/TaskEngine.csproj index 41c8510..31504a8 100644 --- a/TaskEngine/TaskEngine.csproj +++ b/TaskEngine/TaskEngine.csproj @@ -26,6 +26,7 @@ + diff --git a/TaskEngine/TaskEngine.sln b/TaskEngine/TaskEngine.sln new file mode 100644 index 0000000..bd462d5 --- /dev/null +++ b/TaskEngine/TaskEngine.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.5.002.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TaskEngine", "TaskEngine.csproj", "{09720F43-452A-43D0-BE7E-AAE2CF822AA0}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {09720F43-452A-43D0-BE7E-AAE2CF822AA0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {09720F43-452A-43D0-BE7E-AAE2CF822AA0}.Debug|Any CPU.Build.0 = Debug|Any CPU + {09720F43-452A-43D0-BE7E-AAE2CF822AA0}.Release|Any CPU.ActiveCfg = Release|Any CPU + {09720F43-452A-43D0-BE7E-AAE2CF822AA0}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {83861AB8-4CDE-41B0-8309-63BD076F3EA6} + EndGlobalSection +EndGlobal diff --git a/TaskEngine/Tasks/DescribeImageTask.cs b/TaskEngine/Tasks/DescribeImageTask.cs index 13b00da..389303b 100644 --- a/TaskEngine/Tasks/DescribeImageTask.cs +++ b/TaskEngine/Tasks/DescribeImageTask.cs @@ -1,8 +1,4 @@ -// using Newtonsoft.Json.Linq; -// using System.Collections.Generic; -using System.Diagnostics.CodeAnalysis; -// using System; -// using System.Linq; +using System.Diagnostics.CodeAnalysis; using System.Threading.Tasks; using Microsoft.Extensions.Logging; @@ -13,6 +9,9 @@ using Newtonsoft.Json.Linq; using System; using System.Text; +// using SkiaSharp; +using System.IO; +using System.Diagnostics; @@ -70,8 +69,9 @@ protected async override Task OnConsume(string id, TaskParameters taskParameters GetLogger().LogInformation($"Describe Image {id}: Caption Text changed or caption missing"); return; } - string result = $"MOCK AI output: An interesting lecture slide ({captionId}) for image {imageFile} and ocr (\"{ocrtext}\")"; - c.Text = result; + // string result = $"MOCK AI output: An interesting lecture slide ({captionId}) for image {imageFile} and ocr (\"{ocrtext}\")"; + string description = await DescribeImage(imageFile, ocrtext); + c.Text = description; _context.Update(c); await _context.SaveChangesAsync(); } @@ -82,5 +82,97 @@ protected async override Task OnConsume(string id, TaskParameters taskParameters } GetLogger().LogInformation($"DescribeImageTask({id}): Complete - end of task"); } + + /*async Task loadImage(string imageFile) + { + string baseDir = Globals.appSettings.DATA_DIRECTORY; + var fullPath = $"{baseDir}/${imageFile}"; + GetLogger().LogInformation($"Opening Image ${fullPath} ..."); + var bytes = await File.ReadAllBytesAsync(fullPath); + var image = SKBitmap.Decode(bytes); + GetLogger().LogInformation($"Image ${imageFile} loaded. Dimensions: ${image.Width} x ${image.Height}"); + return image; + } */ + + async Task DescribeImage(string imagePath, string ocrtext) { + GetLogger().LogInformation($"DescribeImage Image <${imagePath}> ..."); + if (!File.Exists(imagePath)) { GetLogger().LogError($"DescribeImage. Image file <{imagePath}> does not exist - nothing to do."); return ""; } + var llavaExec = Globals.appSettings.LLAVA_PATH; // "/llava/llava-v1.5-7b-q4.llamafile" + var prompt = Globals.appSettings.LLAVA_PROMPT; + var cpuCount = Math.Max(1, Environment.ProcessorCount / 2); // don't want hyperthreading (we are memory bandwidth bound)- and this may report logical not physical cores + // besides we dont want monopolize the server + var llavaArguments = Globals.appSettings.LLAVA_ARGS; + if (!File.Exists(llavaExec)) + { + var mesg = $"llava executable: {llavaExec} does not exist - did you install it? Check .env/LLAVA_PATH and taskengine docker mountpoint"; + throw new Exception(mesg); + } + if(! llavaArguments.Contains("{imagePath}") || ! llavaArguments.Contains("{prompt}")) + { + throw new Exception("LLAVA_ARGS MUST have have {imagePath} and {prompt} placeholders"); + } + if(String.IsNullOrEmpty(prompt)) + { + throw new Exception("LLAVA prompt cannot be empty or missing"); + } + var imagePathEscape = imagePath.Replace("\"", "\\\""); + var promptEscape = prompt.Replace("\"", "\\\"").Replace("\\n", "\\\\n"); + var args = llavaArguments.Replace("{cpuCount}", $"{cpuCount}").Replace("{prompt}", promptEscape).Replace("{imagePath}", $"{imagePathEscape}"); + if (args.Contains("{") || args.Contains("}") ) { + throw new Exception("Argument still has a curly brace - unprocessed placeholder? Only {cpuCount|prompt|imagePath} are supported." + args + ". Check LLAVA_ARGS"); + } + + var info = new ProcessStartInfo() + { // --escape = Process prompt escapes sequences (\n, \r, \t, \', \", \\) + FileName = llavaExec, + Arguments = args, // "--threads 12 --help", // ", + RedirectStandardInput = true, + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = false, + WindowStyle = ProcessWindowStyle.Hidden + }; + var errorBuilder = new StringBuilder(); + var outputBuilder = new StringBuilder(); + Process p = new Process() + { + StartInfo = info + }; + var logOutput = Globals.appSettings.LLAVA_LOG_STREAMS.Contains("out"); + var logError = Globals.appSettings.LLAVA_LOG_STREAMS.Contains("err"); + + p.ErrorDataReceived += new DataReceivedEventHandler((src, e) => { errorBuilder.AppendLine(e.Data); + if (logOutput) GetLogger().LogInformation($"Describe {imagePath} err:${e.Data}"); + }); + p.OutputDataReceived += new DataReceivedEventHandler((src, e) =>{ outputBuilder.AppendLine(e.Data); + if (logError) GetLogger().LogInformation($"Describe {imagePath} out:${e.Data}"); + }); + + var startTime = DateTime.Now; + GetLogger().LogInformation($"LLAVA Process starting {startTime}"); + + + p.Start(); + p.BeginErrorReadLine(); + p.BeginOutputReadLine(); + + p.StandardInput.Close(); + GetLogger().LogInformation(p.StartInfo.Arguments); + + await p.WaitForExitAsync(); + var output = outputBuilder.ToString(); + var error = errorBuilder.ToString(); + + var endTime = DateTime.Now; + var processTime = p.TotalProcessorTime; + GetLogger().LogInformation($"Description complete ({output.Length} characters). ProcessorTime: {processTime} seconds for {endTime-startTime} wallclock seconds"); + + + p.Close(); + p.Dispose(); + GetLogger().LogInformation($"{imagePath} - Returning. Description:<<{output}>>"); + return output; + } } } diff --git a/TestRemoteLLM/ClassTranscribeStudentsUse2020.png b/TestRemoteLLM/ClassTranscribeStudentsUse2020.png new file mode 100644 index 0000000..9bc47e6 Binary files /dev/null and b/TestRemoteLLM/ClassTranscribeStudentsUse2020.png differ diff --git a/TestRemoteLLM/Program.cs b/TestRemoteLLM/Program.cs new file mode 100644 index 0000000..bbd7d6f --- /dev/null +++ b/TestRemoteLLM/Program.cs @@ -0,0 +1,189 @@ +// See https://aka.ms/new-console-template for more information +using RestSharp; +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; +using System.Diagnostics; +using System.Text; +using Microsoft.AspNetCore.Routing.Constraints; + +internal class Program +{ + private static async Task DescribeImage(string imagePath) + { + if (!File.Exists(imagePath)) { Console.WriteLine($"Invalid image path:<{imagePath}>"); return ""; } + // Shell examples from https://github.com/Mozilla-Ocho/llamafile + // ./llava-v1.5-7b-q4.llamafile --temp 0.2 --image lemurs.jpg -e -p '### User: What do you see? \n### Assistant:' + + /* llamafile --temp 0 --image ~/Pictures/lemurs.jpg -m llava-v1.5-7b-Q4_K.gguf --mmproj llava-v1.5-7b-mmproj-Q4_0.gguf -e -p '### User: What do you see?\n### Assistant: ' \ + --silent-prompt 2>/dev/null */ + + var execFile = "./llava-v1.5-7b-q4.llamafile"; + var execPath = "E:/downloads/" + execFile; + + if (!File.Exists(execPath)) { Console.WriteLine($"Invalid exec path:<{execPath}>"); return ""; } + + // The first shell example did not explicitly specify the two models; maybe these are the default for llava llamafile? + var cpuCount = Math.Max(1, Environment.ProcessorCount / 2); // assume hyperthreading - we want physical count because we are memory bandwidth limited + + var llamaOptions = $"--threads {cpuCount} -m llava-v1.5-7b-Q4_K.gguf --mmproj llava-v1.5-7b-mmproj-Q4_0.gguf --temp 0.0 --silent-prompt"; + + var prompt = "### User: What do you see in this image?\n### Assistant:"; // add single quotes and -p + // See https://learn.microsoft.com/en-us/dotnet/api/system.diagnostics.processstartinfo.redirectstandardoutput?view=net-8.0 + var processArgs = $"{llamaOptions} --image {imagePath} --escape -p \"{prompt}\""; // + var info = new ProcessStartInfo() + { // --escape = Process prompt escapes sequences (\n, \r, \t, \', \", \\) + FileName = execPath, + Arguments = processArgs, // "--threads 12 --help", // ", + RedirectStandardInput = true, + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = false, + WindowStyle = ProcessWindowStyle.Hidden + }; + var errorBuilder = new StringBuilder(); + var outputBuilder = new StringBuilder(); + Process p = new Process() + { + StartInfo = info + }; + bool writeData = false; + p.ErrorDataReceived += new DataReceivedEventHandler((src, e) => + { errorBuilder.AppendLine( e.Data); if(writeData) Console.WriteLine("err:" + e.Data); }); + p.OutputDataReceived += new DataReceivedEventHandler((src, e) => + { outputBuilder.AppendLine(e.Data); if(writeData) Console.WriteLine("out:" + e.Data); }); + + Console.WriteLine("Starting " + DateTime.Now.ToString()); + + Process llamaProcess = p; + if (llamaProcess == null) { Console.WriteLine("Could not create process"); return ""; } + + p.Start(); + p.BeginErrorReadLine(); + p.BeginOutputReadLine(); + + llamaProcess.StandardInput.Close(); + Console.WriteLine($"{imagePath}\n{prompt}"); + Console.WriteLine($"{ p.StartInfo.Arguments}"); + + await llamaProcess.WaitForExitAsync(); + // var output = await llamaProcess.StandardOutput.ReadToEndAsync(); + var output =outputBuilder.ToString(); + var error = errorBuilder.ToString(); + + Console.WriteLine("StandardOutput:"); + Console.WriteLine(output); + llamaProcess.WaitForExit(); + + var processTime = llamaProcess.TotalProcessorTime; + Console.WriteLine($"ProcessorTime: {processTime}"); + + // var err = await llamaProcess.StandardError.ReadToEndAsync(); + Console.WriteLine("StandardError:"); + Console.WriteLine(error); + + llamaProcess.Close(); + llamaProcess.Dispose(); + Console.WriteLine("Ending " + DateTime.Now.ToString()); + + return output; + } + static async Task Main(string[] args) + { + var imageFile = "dieselsubmarine.jpg"; // add --image + var imagePath = "E:/proj2/testimages/" + imageFile; + string result = await DescribeImage(imagePath); + // var result = ""; + Console.WriteLine("\n\nResult:" + result); + + } + + private static void BadLLamaAPI(string[] args) + { + // As of Jan 10, Llama API is broken + // i) Images are ignored ii) Messing aroud with stop parameter is required,otherwise the server crashes (and ignores future api requests) + + // System.Environment.GetEnvironmentVariable("AZURE_SUBSCRIPTION_KEYS") ?? defaultKeys + string fullPath = "../../../" + "dieselsubmarine.jpg"; // ClassTranscribeStudentsUse2020.png"; + + var bytes = File.ReadAllBytes(fullPath); + string imageBytesAsBase64 = Convert.ToBase64String(bytes); + //string mimetype = "image/png"; + + // var image = SKBitmap.Decode(bytes); + // Console.WriteLine($"Image ${fullPath} loaded. Dimensions: ${image.Width} x ${image.Height}"); + + //var CONTEXT = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n"; + //var prompt = CONTEXT + "What doe image convey [img-1]?"; + //var prompt = "USER:[img-12]Describe the image in detail.\nASSISTANT:"; + var msg = "Describe this image."; + var prompt = $"A chat between a curious human and an artificial intelligence assistant.The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:[img-10]{msg}\nASSISTANT:"; + + string model = "llava-v1.5-7b-Q4_K.gguf"; + // "llava-v1.5-7b-Q4_K.gguf"; /* Verified using unzip -t AND network content*/ + + + var userRole1 = new JObject { { "role", "user" }, { "content", "Write 2 truthful sentences." } }; + var userRole2 = new JObject { { "role", "user" }, { "content", "tell me history of canada" } }; + var userRole3 = new JObject { { "role", "user" }, { "content", prompt } }; + + // https://github.com/Mozilla-Ocho/llamafile/blob/main/llama.cpp/server/README.md#api-endpoints + // An array of objects to hold base64-encoded image data and its ids to be reference in prompt. + // You can determine the place of the image in the prompt as in the following: USER:[img-12]Describe the image in detail.\nASSISTANT: + // In this case, [img-12] will be replaced by the embeddings of the image id 12 in the following image_data array: + // {..., "image_data": [{"data": "", "id": 12}]}. + + JObject image12 = new JObject + { + {"data", imageBytesAsBase64 }, {"id",10} + }; + JObject requestJson = new JObject + { + { "model", model}, // "llava-v1.5-7b-Q4_K.GGUF" }, + // { "stop" , null}, + { "mode", "instruct" }, + { "image_data", new JArray { image12 } }, + { "messages", new JArray { + // systemRole, + userRole3 + } + } +}; + + string requestJsonAsString = requestJson.ToString(); + // Console.WriteLine(requestJsonAsString); + + + string LLMBASE = "http://localhost:8965/"; + var authKey = "Nokey"; + + var clientOptions = new RestClientOptions + { + BaseUrl = new Uri(LLMBASE) + }; + var client = new RestClient(clientOptions, null, null, true /*Enable simple factory */); + var request = new RestRequest("v1/chat/completions", Method.Post); + request.AddHeader("Content-Type", "application/json"); + request.AddHeader("Authorization", $"Bearer {authKey}"); + request.AddJsonBody(requestJsonAsString); + + // request.AddJsonBody(requestJsonAsString); + // Todo: Are these even required? + // https://restsharp.dev/usage.html#get-or-post + // Put or Post ... Also, the request will be sent as application/x-www-form-urlencoded. + + // In both cases, name and value will automatically be url - encoded. + //request.AddHeader("content-type", "application/x-www-form-urlencoded"); + + RestResponse response = client.Execute(request); // may throw exception + Console.WriteLine($"ResponseStatus: {response.ResponseStatus}"); + Console.WriteLine($"Status Code: {response.StatusCode}"); + Console.WriteLine($"Content: {response.Content}"); + if (response.Content != null) + { + var responseAsJson = JObject.Parse(response.Content); + var responseContent = responseAsJson["choices"][0]["message"]["content"]; + Console.WriteLine(responseContent); + } + } +} \ No newline at end of file diff --git a/TestRemoteLLM/TestRemoteLLM.csproj b/TestRemoteLLM/TestRemoteLLM.csproj new file mode 100644 index 0000000..2acfcc4 --- /dev/null +++ b/TestRemoteLLM/TestRemoteLLM.csproj @@ -0,0 +1,15 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + + diff --git a/TestRemoteLLM/dieselsubmarine.jpg b/TestRemoteLLM/dieselsubmarine.jpg new file mode 100644 index 0000000..21bf8f5 Binary files /dev/null and b/TestRemoteLLM/dieselsubmarine.jpg differ