Skip to content

Commit

Permalink
Add LLava for automatic descriptions
Browse files Browse the repository at this point in the history
  • Loading branch information
angrave committed Jan 10, 2024
1 parent 20e3eb6 commit 41ebe4e
Show file tree
Hide file tree
Showing 9 changed files with 343 additions and 9 deletions.
6 changes: 6 additions & 0 deletions ClassTranscribeDatabase/Globals.cs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ public class AppSettings

public string DIGEST_CALCULATION_METHOD { get; set; } = "";

public string LLAVA_PATH { get; set; } = "/llava/llava-v1.5-7b-q4.llamafile";
public string LLAVA_ARGS { get; set; } = "--threads {cpuCount} -m llava-v1.5-7b-Q4_K.gguf --mmproj llava-v1.5-7b-mmproj-Q4_0.gguf --temp 0.0 --silent-prompt --image \"{imagePath}\" --escape -p \"{prompt}\"";
public string LLAVA_PROMPT { get; set; } = "### User: I am blind and listening to a university lecture video. What is in this image, that has been extracted from the lecture video? Be concise. Do your best to describe only the technical content of the image that is relevant to learning. Do not add opinions about the image.\n### Assistant:";

public string LLAVA_LOG_STREAMS { get; set; } = "out,err";

}

/// <summary>
Expand Down
10 changes: 8 additions & 2 deletions ClassTranscribeServer.sln
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.28902.138
# Visual Studio Version 17
VisualStudioVersion = 17.8.34330.188
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ClassTranscribeServer", "ClassTranscribeServer\ClassTranscribeServer.csproj", "{E4C52518-A6B4-42B0-8A02-DC1BFBE9CD89}"
EndProject
Expand All @@ -25,6 +25,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UnitTests", "UnitTests\Unit
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TestAzureCognitiveServices", "TestAzureCognitiveServices\TestAzureCognitiveServices.csproj", "{DA560288-98FC-4233-8CD5-252F8570CBFB}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TestRemoteLLM", "TestRemoteLLM\TestRemoteLLM.csproj", "{9D9DB7A1-DBDA-49DF-9D31-D39846BABE30}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -55,6 +57,10 @@ Global
{DA560288-98FC-4233-8CD5-252F8570CBFB}.Debug|Any CPU.Build.0 = Debug|Any CPU
{DA560288-98FC-4233-8CD5-252F8570CBFB}.Release|Any CPU.ActiveCfg = Release|Any CPU
{DA560288-98FC-4233-8CD5-252F8570CBFB}.Release|Any CPU.Build.0 = Release|Any CPU
{9D9DB7A1-DBDA-49DF-9D31-D39846BABE30}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{9D9DB7A1-DBDA-49DF-9D31-D39846BABE30}.Debug|Any CPU.Build.0 = Debug|Any CPU
{9D9DB7A1-DBDA-49DF-9D31-D39846BABE30}.Release|Any CPU.ActiveCfg = Release|Any CPU
{9D9DB7A1-DBDA-49DF-9D31-D39846BABE30}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
1 change: 1 addition & 0 deletions TaskEngine/TaskEngine.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="8.0.0" />
<PackageReference Include="NEST" Version="7.17.5" />
<PackageReference Include="Elasticsearch.Net" Version="7.17.5" />
<PackageReference Include="RestSharp" Version="110.2.0" />
</ItemGroup>

<ItemGroup>
Expand Down
25 changes: 25 additions & 0 deletions TaskEngine/TaskEngine.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.5.002.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TaskEngine", "TaskEngine.csproj", "{09720F43-452A-43D0-BE7E-AAE2CF822AA0}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{09720F43-452A-43D0-BE7E-AAE2CF822AA0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{09720F43-452A-43D0-BE7E-AAE2CF822AA0}.Debug|Any CPU.Build.0 = Debug|Any CPU
{09720F43-452A-43D0-BE7E-AAE2CF822AA0}.Release|Any CPU.ActiveCfg = Release|Any CPU
{09720F43-452A-43D0-BE7E-AAE2CF822AA0}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {83861AB8-4CDE-41B0-8309-63BD076F3EA6}
EndGlobalSection
EndGlobal
106 changes: 99 additions & 7 deletions TaskEngine/Tasks/DescribeImageTask.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
// using Newtonsoft.Json.Linq;
// using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
// using System;
// using System.Linq;
using System.Diagnostics.CodeAnalysis;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;

Expand All @@ -13,6 +9,9 @@
using Newtonsoft.Json.Linq;
using System;
using System.Text;
// using SkiaSharp;
using System.IO;
using System.Diagnostics;



Expand Down Expand Up @@ -70,8 +69,9 @@ protected async override Task OnConsume(string id, TaskParameters taskParameters
GetLogger().LogInformation($"Describe Image {id}: Caption Text changed or caption missing");
return;
}
string result = $"MOCK AI output: An interesting lecture slide ({captionId}) for image {imageFile} and ocr (\"{ocrtext}\")";
c.Text = result;
// string result = $"MOCK AI output: An interesting lecture slide ({captionId}) for image {imageFile} and ocr (\"{ocrtext}\")";
string description = await DescribeImage(imageFile, ocrtext);
c.Text = description;
_context.Update(c);
await _context.SaveChangesAsync();
}
Expand All @@ -82,5 +82,97 @@ protected async override Task OnConsume(string id, TaskParameters taskParameters
}
GetLogger().LogInformation($"DescribeImageTask({id}): Complete - end of task");
}

/*async Task<SKBitmap> loadImage(string imageFile)
{
string baseDir = Globals.appSettings.DATA_DIRECTORY;
var fullPath = $"{baseDir}/${imageFile}";
GetLogger().LogInformation($"Opening Image ${fullPath} ...");
var bytes = await File.ReadAllBytesAsync(fullPath);
var image = SKBitmap.Decode(bytes);
GetLogger().LogInformation($"Image ${imageFile} loaded. Dimensions: ${image.Width} x ${image.Height}");
return image;
} */

async Task<string> DescribeImage(string imagePath, string ocrtext) {
GetLogger().LogInformation($"DescribeImage Image <${imagePath}> ...");
if (!File.Exists(imagePath)) { GetLogger().LogError($"DescribeImage. Image file <{imagePath}> does not exist - nothing to do."); return ""; }
var llavaExec = Globals.appSettings.LLAVA_PATH; // "/llava/llava-v1.5-7b-q4.llamafile"
var prompt = Globals.appSettings.LLAVA_PROMPT;
var cpuCount = Math.Max(1, Environment.ProcessorCount / 2); // don't want hyperthreading (we are memory bandwidth bound)- and this may report logical not physical cores
// besides we dont want monopolize the server
var llavaArguments = Globals.appSettings.LLAVA_ARGS;
if (!File.Exists(llavaExec))
{
var mesg = $"llava executable: {llavaExec} does not exist - did you install it? Check .env/LLAVA_PATH and taskengine docker mountpoint";
throw new Exception(mesg);
}
if(! llavaArguments.Contains("{imagePath}") || ! llavaArguments.Contains("{prompt}"))
{
throw new Exception("LLAVA_ARGS MUST have have {imagePath} and {prompt} placeholders");
}
if(String.IsNullOrEmpty(prompt))
{
throw new Exception("LLAVA prompt cannot be empty or missing");
}
var imagePathEscape = imagePath.Replace("\"", "\\\"");
var promptEscape = prompt.Replace("\"", "\\\"").Replace("\\n", "\\\\n");
var args = llavaArguments.Replace("{cpuCount}", $"{cpuCount}").Replace("{prompt}", promptEscape).Replace("{imagePath}", $"{imagePathEscape}");
if (args.Contains("{") || args.Contains("}") ) {
throw new Exception("Argument still has a curly brace - unprocessed placeholder? Only {cpuCount|prompt|imagePath} are supported." + args + ". Check LLAVA_ARGS");
}

var info = new ProcessStartInfo()
{ // --escape = Process prompt escapes sequences (\n, \r, \t, \', \", \\)
FileName = llavaExec,
Arguments = args, // "--threads 12 --help", // ",
RedirectStandardInput = true,
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
CreateNoWindow = false,
WindowStyle = ProcessWindowStyle.Hidden
};
var errorBuilder = new StringBuilder();
var outputBuilder = new StringBuilder();
Process p = new Process()
{
StartInfo = info
};
var logOutput = Globals.appSettings.LLAVA_LOG_STREAMS.Contains("out");
var logError = Globals.appSettings.LLAVA_LOG_STREAMS.Contains("err");

p.ErrorDataReceived += new DataReceivedEventHandler((src, e) => { errorBuilder.AppendLine(e.Data);
if (logOutput) GetLogger().LogInformation($"Describe {imagePath} err:${e.Data}");
});
p.OutputDataReceived += new DataReceivedEventHandler((src, e) =>{ outputBuilder.AppendLine(e.Data);
if (logError) GetLogger().LogInformation($"Describe {imagePath} out:${e.Data}");
});

var startTime = DateTime.Now;
GetLogger().LogInformation($"LLAVA Process starting {startTime}");


p.Start();
p.BeginErrorReadLine();
p.BeginOutputReadLine();

p.StandardInput.Close();
GetLogger().LogInformation(p.StartInfo.Arguments);

await p.WaitForExitAsync();
var output = outputBuilder.ToString();
var error = errorBuilder.ToString();

var endTime = DateTime.Now;
var processTime = p.TotalProcessorTime;
GetLogger().LogInformation($"Description complete ({output.Length} characters). ProcessorTime: {processTime} seconds for {endTime-startTime} wallclock seconds");


p.Close();
p.Dispose();
GetLogger().LogInformation($"{imagePath} - Returning. Description:<<{output}>>");
return output;
}
}
}
Binary file added TestRemoteLLM/ClassTranscribeStudentsUse2020.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
189 changes: 189 additions & 0 deletions TestRemoteLLM/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
// See https://aka.ms/new-console-template for more information
using RestSharp;

Check failure on line 2 in TestRemoteLLM/Program.cs

View workflow job for this annotation

GitHub Actions / Build

The type or namespace name 'RestSharp' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 2 in TestRemoteLLM/Program.cs

View workflow job for this annotation

GitHub Actions / Build

The type or namespace name 'RestSharp' could not be found (are you missing a using directive or an assembly reference?)
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System.Diagnostics;
using System.Text;
using Microsoft.AspNetCore.Routing.Constraints;

internal class Program
{
private static async Task<string> DescribeImage(string imagePath)
{
if (!File.Exists(imagePath)) { Console.WriteLine($"Invalid image path:<{imagePath}>"); return ""; }
// Shell examples from https://github.com/Mozilla-Ocho/llamafile
// ./llava-v1.5-7b-q4.llamafile --temp 0.2 --image lemurs.jpg -e -p '### User: What do you see? \n### Assistant:'

/* llamafile --temp 0 --image ~/Pictures/lemurs.jpg -m llava-v1.5-7b-Q4_K.gguf --mmproj llava-v1.5-7b-mmproj-Q4_0.gguf -e -p '### User: What do you see?\n### Assistant: ' \
--silent-prompt 2>/dev/null */

var execFile = "./llava-v1.5-7b-q4.llamafile";
var execPath = "E:/downloads/" + execFile;

if (!File.Exists(execPath)) { Console.WriteLine($"Invalid exec path:<{execPath}>"); return ""; }

// The first shell example did not explicitly specify the two models; maybe these are the default for llava llamafile?
var cpuCount = Math.Max(1, Environment.ProcessorCount / 2); // assume hyperthreading - we want physical count because we are memory bandwidth limited

var llamaOptions = $"--threads {cpuCount} -m llava-v1.5-7b-Q4_K.gguf --mmproj llava-v1.5-7b-mmproj-Q4_0.gguf --temp 0.0 --silent-prompt";

var prompt = "### User: What do you see in this image?\n### Assistant:"; // add single quotes and -p
// See https://learn.microsoft.com/en-us/dotnet/api/system.diagnostics.processstartinfo.redirectstandardoutput?view=net-8.0
var processArgs = $"{llamaOptions} --image {imagePath} --escape -p \"{prompt}\""; //
var info = new ProcessStartInfo()
{ // --escape = Process prompt escapes sequences (\n, \r, \t, \', \", \\)
FileName = execPath,
Arguments = processArgs, // "--threads 12 --help", // ",
RedirectStandardInput = true,
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
CreateNoWindow = false,
WindowStyle = ProcessWindowStyle.Hidden
};
var errorBuilder = new StringBuilder();
var outputBuilder = new StringBuilder();
Process p = new Process()
{
StartInfo = info
};
bool writeData = false;
p.ErrorDataReceived += new DataReceivedEventHandler((src, e) =>
{ errorBuilder.AppendLine( e.Data); if(writeData) Console.WriteLine("err:" + e.Data); });
p.OutputDataReceived += new DataReceivedEventHandler((src, e) =>
{ outputBuilder.AppendLine(e.Data); if(writeData) Console.WriteLine("out:" + e.Data); });

Console.WriteLine("Starting " + DateTime.Now.ToString());

Process llamaProcess = p;
if (llamaProcess == null) { Console.WriteLine("Could not create process"); return ""; }

p.Start();
p.BeginErrorReadLine();
p.BeginOutputReadLine();

llamaProcess.StandardInput.Close();
Console.WriteLine($"{imagePath}\n{prompt}");
Console.WriteLine($"{ p.StartInfo.Arguments}");

await llamaProcess.WaitForExitAsync();
// var output = await llamaProcess.StandardOutput.ReadToEndAsync();
var output =outputBuilder.ToString();
var error = errorBuilder.ToString();

Console.WriteLine("StandardOutput:");
Console.WriteLine(output);
llamaProcess.WaitForExit();

var processTime = llamaProcess.TotalProcessorTime;
Console.WriteLine($"ProcessorTime: {processTime}");

// var err = await llamaProcess.StandardError.ReadToEndAsync();
Console.WriteLine("StandardError:");
Console.WriteLine(error);

llamaProcess.Close();
llamaProcess.Dispose();
Console.WriteLine("Ending " + DateTime.Now.ToString());

return output;
}
static async Task Main(string[] args)
{
var imageFile = "dieselsubmarine.jpg"; // add --image
var imagePath = "E:/proj2/testimages/" + imageFile;
string result = await DescribeImage(imagePath);
// var result = "";
Console.WriteLine("\n\nResult:" + result);

}

private static void BadLLamaAPI(string[] args)
{
// As of Jan 10, Llama API is broken
// i) Images are ignored ii) Messing aroud with stop parameter is required,otherwise the server crashes (and ignores future api requests)

// System.Environment.GetEnvironmentVariable("AZURE_SUBSCRIPTION_KEYS") ?? defaultKeys
string fullPath = "../../../" + "dieselsubmarine.jpg"; // ClassTranscribeStudentsUse2020.png";

var bytes = File.ReadAllBytes(fullPath);
string imageBytesAsBase64 = Convert.ToBase64String(bytes);
//string mimetype = "image/png";

// var image = SKBitmap.Decode(bytes);
// Console.WriteLine($"Image ${fullPath} loaded. Dimensions: ${image.Width} x ${image.Height}");

//var CONTEXT = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n";
//var prompt = CONTEXT + "What doe image convey [img-1]?";
//var prompt = "USER:[img-12]Describe the image in detail.\nASSISTANT:";
var msg = "Describe this image.";
var prompt = $"A chat between a curious human and an artificial intelligence assistant.The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:[img-10]{msg}\nASSISTANT:";

string model = "llava-v1.5-7b-Q4_K.gguf";
// "llava-v1.5-7b-Q4_K.gguf"; /* Verified using unzip -t AND network content*/


var userRole1 = new JObject { { "role", "user" }, { "content", "Write 2 truthful sentences." } };
var userRole2 = new JObject { { "role", "user" }, { "content", "tell me history of canada" } };
var userRole3 = new JObject { { "role", "user" }, { "content", prompt } };

// https://github.com/Mozilla-Ocho/llamafile/blob/main/llama.cpp/server/README.md#api-endpoints
// An array of objects to hold base64-encoded image data and its ids to be reference in prompt.
// You can determine the place of the image in the prompt as in the following: USER:[img-12]Describe the image in detail.\nASSISTANT:
// In this case, [img-12] will be replaced by the embeddings of the image id 12 in the following image_data array:
// {..., "image_data": [{"data": "<BASE64_STRING>", "id": 12}]}.

JObject image12 = new JObject
{
{"data", imageBytesAsBase64 }, {"id",10}
};
JObject requestJson = new JObject
{
{ "model", model}, // "llava-v1.5-7b-Q4_K.GGUF" },
// { "stop" , null},
{ "mode", "instruct" },
{ "image_data", new JArray { image12 } },
{ "messages", new JArray {
// systemRole,
userRole3
}
}
};

string requestJsonAsString = requestJson.ToString();
// Console.WriteLine(requestJsonAsString);


string LLMBASE = "http://localhost:8965/";
var authKey = "Nokey";

var clientOptions = new RestClientOptions
{
BaseUrl = new Uri(LLMBASE)
};
var client = new RestClient(clientOptions, null, null, true /*Enable simple factory */);
var request = new RestRequest("v1/chat/completions", Method.Post);
request.AddHeader("Content-Type", "application/json");
request.AddHeader("Authorization", $"Bearer {authKey}");
request.AddJsonBody(requestJsonAsString);

// request.AddJsonBody(requestJsonAsString);
// Todo: Are these even required?
// https://restsharp.dev/usage.html#get-or-post
// Put or Post ... Also, the request will be sent as application/x-www-form-urlencoded.

// In both cases, name and value will automatically be url - encoded.
//request.AddHeader("content-type", "application/x-www-form-urlencoded");

RestResponse response = client.Execute(request); // may throw exception
Console.WriteLine($"ResponseStatus: {response.ResponseStatus}");
Console.WriteLine($"Status Code: {response.StatusCode}");
Console.WriteLine($"Content: {response.Content}");
if (response.Content != null)
{
var responseAsJson = JObject.Parse(response.Content);
var responseContent = responseAsJson["choices"][0]["message"]["content"];
Console.WriteLine(responseContent);
}
}
}
Loading

0 comments on commit 41ebe4e

Please sign in to comment.