From 16d772b143dda68b7751341f9450e6011bc54adf Mon Sep 17 00:00:00 2001 From: Lawrence Angrave Date: Sun, 3 Dec 2023 17:05:41 -0600 Subject: [PATCH 1/5] Initial Video and Image Automatic Description work --- ClassTranscribeDatabase/CaptionQueries.cs | 3 +- ClassTranscribeDatabase/CommonUtils.cs | 6 +- ClassTranscribeDatabase/Models/Caption.cs | 7 + .../Controllers/AdminController.cs | 9 ++ ClassTranscribeServer/Utils/WakeDownloader.cs | 8 + TaskEngine/Tasks/DescribeImageTask.cs | 55 +++++++ TaskEngine/Tasks/DescribeVideoTask.cs | 142 ++++++++++++++++++ TaskEngine/Tasks/QueueAwakerTask.cs | 33 +++- TaskEngine/Tasks/TranscriptionTask.cs | 6 +- 9 files changed, 263 insertions(+), 6 deletions(-) create mode 100644 TaskEngine/Tasks/DescribeImageTask.cs create mode 100644 TaskEngine/Tasks/DescribeVideoTask.cs diff --git a/ClassTranscribeDatabase/CaptionQueries.cs b/ClassTranscribeDatabase/CaptionQueries.cs index 717a67ce..df9d417e 100644 --- a/ClassTranscribeDatabase/CaptionQueries.cs +++ b/ClassTranscribeDatabase/CaptionQueries.cs @@ -25,7 +25,8 @@ public async Task> GetCaptionsAsync(string videoId, string languag { try { - var transcriptionId = _context.Transcriptions.Where(t => t.Language == language && t.VideoId == videoId).First().Id; + var transcriptionId = _context.Transcriptions.Where(t => t.Language == language && t.VideoId == videoId + && t.TranscriptionType == TranscriptionType.Caption).First().Id; return await GetCaptionsAsync(transcriptionId); } catch (System.InvalidOperationException) diff --git a/ClassTranscribeDatabase/CommonUtils.cs b/ClassTranscribeDatabase/CommonUtils.cs index 646aab8e..02e259b0 100644 --- a/ClassTranscribeDatabase/CommonUtils.cs +++ b/ClassTranscribeDatabase/CommonUtils.cs @@ -34,7 +34,11 @@ public enum TaskType BuildElasticIndex = 16, ExampleTask = 17, CleanUpElasticIndex = 18, - PythonCrawler = 19 + PythonCrawler = 19, + + DescribeVideo = 20, + DescribeImage = 21 + } public class Languages diff --git a/ClassTranscribeDatabase/Models/Caption.cs b/ClassTranscribeDatabase/Models/Caption.cs index 0373540f..d5840e60 100644 --- a/ClassTranscribeDatabase/Models/Caption.cs +++ b/ClassTranscribeDatabase/Models/Caption.cs @@ -13,11 +13,18 @@ public enum CaptionType TextCaption = 0, AudioDescription = 1 } + + public static class CaptionConstants { + public const string PlaceHolderText = "...Processing..."; + + } /// /// Each line of caption is stored as a row in the database. /// public class Caption : Entity { + + public bool HasPlaceHolderText() { return this.Text == CaptionConstants.PlaceHolderText; } public int Index { get; set; } public TimeSpan Begin { get; set; } public TimeSpan End { get; set; } diff --git a/ClassTranscribeServer/Controllers/AdminController.cs b/ClassTranscribeServer/Controllers/AdminController.cs index 7475a0f6..b65ef1ac 100644 --- a/ClassTranscribeServer/Controllers/AdminController.cs +++ b/ClassTranscribeServer/Controllers/AdminController.cs @@ -135,6 +135,15 @@ public ActionResult UpdatePlaylist(string playlistId) return Ok(); } + [HttpPost("DescribeVideo")] + [Authorize(Roles = Globals.ROLE_ADMIN)] + public ActionResult DescribeVideo(string playlistMediaVideoId, bool deleteExisting) + { + _wakeDownloader.DescribeVideo(playlistMediaVideoId, deleteExisting); + return Ok(); + } + + /// /// Requests a re-download of missing media /// diff --git a/ClassTranscribeServer/Utils/WakeDownloader.cs b/ClassTranscribeServer/Utils/WakeDownloader.cs index 50ecd0f3..a5d10e14 100644 --- a/ClassTranscribeServer/Utils/WakeDownloader.cs +++ b/ClassTranscribeServer/Utils/WakeDownloader.cs @@ -99,6 +99,14 @@ public virtual void SceneDetection(string videoMediaPlaylistId, bool deleteExist Wake(msg); } + public virtual void DescribeVideo(string videoMediaPlaylistId, bool deleteExisting) { + JObject msg = new JObject(); + msg.Add("Type", TaskType.DescribeVideo.ToString()); + msg.Add("videoMediaPlaylistId", videoMediaPlaylistId); + msg.Add("DeleteExisting", deleteExisting); + Wake(msg); + } + public void UpdateASLVideo(string sourceId) { JObject msg = new JObject(); diff --git a/TaskEngine/Tasks/DescribeImageTask.cs b/TaskEngine/Tasks/DescribeImageTask.cs new file mode 100644 index 00000000..d0f95235 --- /dev/null +++ b/TaskEngine/Tasks/DescribeImageTask.cs @@ -0,0 +1,55 @@ +// using Newtonsoft.Json.Linq; +// using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +// using System; +// using System.Linq; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; + +using ClassTranscribeDatabase; +using ClassTranscribeDatabase.Models; +using ClassTranscribeDatabase.Services; +using static ClassTranscribeDatabase.CommonUtils; + + + +#pragma warning disable CA2007 +// https://learn.microsoft.com/en-us/dotnet/fundamentals/code-analysis/quality-rules/ca2007 +// We are okay awaiting on a task in the same thread + +namespace TaskEngine.Tasks +{ + [SuppressMessage("Microsoft.Performance", "CA1812:MarkMembersAsStatic")] // This class is never directly instantiated + class DescribeImageTask : RabbitMQTask + { + private readonly DescribeImageTask _describeImageTask; + + public DescribeImageTask(RabbitMQConnection rabbitMQ, ILogger logger) + : base(rabbitMQ, TaskType.DescribeImage, logger) + { + + } + /// Extracts scene descriptions for a video. + /// Beware: It is possible to start another scene task while the first one is still running + protected async override Task OnConsume(string imagePath, TaskParameters taskParameters, ClientActiveTasks cleanup) + { + registerTask(cleanup, imagePath); // may throw AlreadyInProgress exception + GetLogger().LogInformation($"DescribeImageTask({imagePath}): Consuming Task"); + string captionId = taskParameters.Metadata["captionId"].ToString(); + + using (var _context = CTDbContext.CreateDbContext()) + { + Caption c= await _context.Captions.FindAsync(captionId); + if (c == null || c.HasPlaceHolderText()) { + GetLogger().LogInformation($"Describe Image {imagePath}: Caption Text changed or caption missing"); + return; + } + string result = $"A very interesting lecture slide ({captionId})"; + c.Text = result; + _context.Update(c); + await _context.SaveChangesAsync(); + } + GetLogger().LogInformation($"DescribeImageTask({imagePath}): Complete- end of task"); + } + } +} diff --git a/TaskEngine/Tasks/DescribeVideoTask.cs b/TaskEngine/Tasks/DescribeVideoTask.cs new file mode 100644 index 00000000..b7625e97 --- /dev/null +++ b/TaskEngine/Tasks/DescribeVideoTask.cs @@ -0,0 +1,142 @@ +using ClassTranscribeDatabase; +using ClassTranscribeDatabase.Models; +using ClassTranscribeDatabase.Services;using Microsoft.Extensions.Logging; +using Newtonsoft.Json.Linq; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System; +using System.Linq; +using System.Threading.Tasks; +using static ClassTranscribeDatabase.CommonUtils; + + +#pragma warning disable CA2007 +// https://learn.microsoft.com/en-us/dotnet/fundamentals/code-analysis/quality-rules/ca2007 +// We are okay awaiting on a task in the same thread + +namespace TaskEngine.Tasks +{ + [SuppressMessage("Microsoft.Performance", "CA1812:MarkMembersAsStatic")] // This class is never directly instantiated + class DescribeVideoTask : RabbitMQTask + { + private readonly DescribeImageTask _describeImageTask; + + public DescribeVideoTask(RabbitMQConnection rabbitMQ, DescribeImageTask task, RpcClient rpcClient, ILogger logger) + : base(rabbitMQ, TaskType.DescribeVideo, logger) + { + _describeImageTask = task; + } + /// Extracts scene descriptions for a video. + /// Beware: It is possible to start another scene task while the first one is still running + protected async override Task OnConsume(string videoId, TaskParameters taskParameters, ClientActiveTasks cleanup) + { + registerTask(cleanup, videoId); // may throw AlreadyInProgress exception + GetLogger().LogInformation($"SceneDescriptionTask({videoId}): Consuming Task"); + + using (var _context = CTDbContext.CreateDbContext()) + { + Video video = await _context.Videos.FindAsync(videoId); + + if(!video.HasSceneObjectData()) { + GetLogger().LogInformation($"Describe Video {videoId}: Early return - no scene data to process"); + return; + } + TextData td = await _context.TextData.FindAsync(video.SceneObjectDataId); + + JObject sceneData = td.getAsJSON() as JObject; + JArray scenes = sceneData["Scenes"] as JArray; + var captions = new List(); + + const string SIR = "ClassTranscribe/Scene-Describe"; // todo move into Model e.g. CaptionConstants + Transcription? transcription = video.Transcriptions.Where(t=>t.SourceInternalRef == SIR).FirstOrDefault(); + if(transcription == null) { + var theLanguage = Languages.ENGLISH_AMERICAN; + + transcription = new Transcription() + { + TranscriptionType = TranscriptionType.TextDescription, + VideoId = video.Id, + Language = Languages.ENGLISH_AMERICAN, + Label = "Description", + SourceLabel = "ClassTranscribe", + SourceInternalRef = SIR + }; + GetLogger().LogInformation($"Describe Video {videoId}: Creating new (empty) Description Entry"); + + _context.Add(transcription); + await _context.SaveChangesAsync(); + } + else { + captions = transcription.Captions.ToList(); + GetLogger().LogInformation($"{videoId}: Reusing Description. Found {captions.Count} captions"); + } + // Step 2 Create Placeholder captions if they don't exist for every scene. + int alreadyDoneCount = 0, taskEnqueueCount = 0;; + + int CaptionIndex = captions != null && captions.Count > 0 ? captions.Select(c=>c.Index).Max() + 1 : 0; + var newCaptions = new List(); + var scenesWithNewCaption = new List(); + var describeScenes = new List(); + int sceneIndex = 0; + foreach (JObject scene in scenes) { + string? captionId = scene["captionId"].ToString(); + var caption = captions.Where(c=>c.Id==captionId).FirstOrDefault(); + if( caption == null) { + var c = new Caption + { + Index = CaptionIndex++, + Begin = TimeSpan.Parse(scene["start"].ToString()), + End = TimeSpan.Parse(scene["end"].ToString()), + CaptionType = CaptionType.AudioDescription, + Text = CaptionConstants.PlaceHolderText, + TranscriptionId = transcription.Id + }; + newCaptions.Add(c); + scenesWithNewCaption.Add(sceneIndex); + describeScenes.Add(sceneIndex); + } else { + if(caption.HasPlaceHolderText()) { + describeScenes.Add(sceneIndex); + } else { + alreadyDoneCount ++; + } + } + + sceneIndex ++; // todo rewrite as map with index? + } + + GetLogger().LogInformation($"Describe Video {videoId}: ${newCaptions.Count} new captions to create"); + if(newCaptions.Any()) { + _context.AddRange(newCaptions); + await _context.SaveChangesAsync(); + // Now we can associate the captionIds with the scene data + foreach(int i in scenesWithNewCaption) { + scenes[i]["captionId"] = newCaptions[i].Id; + } + sceneData.Remove("Scenes"); + sceneData.Add("Scenes", scenes); + td.setFromJSON(sceneData); + _context.Update(td); + await _context.SaveChangesAsync(); + GetLogger().LogInformation($"Describe Video {videoId}: Scene Data {td.Id} Updated with Caption references"); + } + GetLogger().LogInformation($"Describe Video {videoId}: {describeScenes.Count} Description Tasks to enqueue"); + foreach (int i in describeScenes) { + JObject scene = scenes[i] as JObject; + string imageFilePath = scene[""].ToString(); + string captionId = scene["captionId"].ToString(); + + taskEnqueueCount ++; + // Todo Create Parameter imageFilePath, captionId,transcription.Id + var taskParams = new TaskParameters(); + + GetLogger().LogInformation($"Describe Video {videoId}: {imageFilePath} {captionId} {transcription.Id}"); + _describeImageTask.Publish(imageFilePath,taskParams); + + } + GetLogger().LogInformation($"Describe Video {videoId}: AlreadyDone={alreadyDoneCount}.enqueueCount={taskEnqueueCount}"); + GetLogger().LogInformation($"Describe Video {videoId}: Returning."); + } + } + } +} \ No newline at end of file diff --git a/TaskEngine/Tasks/QueueAwakerTask.cs b/TaskEngine/Tasks/QueueAwakerTask.cs index cb03324e..51b56ff0 100644 --- a/TaskEngine/Tasks/QueueAwakerTask.cs +++ b/TaskEngine/Tasks/QueueAwakerTask.cs @@ -33,7 +33,8 @@ class QueueAwakerTask : RabbitMQTask private readonly CleanUpElasticIndexTask _cleanUpElasticIndexTask; private readonly ExampleTask _exampleTask; private readonly SlackLogger _slackLogger; - + private readonly DescribeVideoTask _describeVideoTask; + private readonly DescribeImageTask _describeImageTask; public QueueAwakerTask() { } public QueueAwakerTask(RabbitMQConnection rabbitMQ, DownloadPlaylistInfoTask downloadPlaylistInfoTask, @@ -42,7 +43,7 @@ public QueueAwakerTask(RabbitMQConnection rabbitMQ, DownloadPlaylistInfoTask dow GenerateVTTFileTask generateVTTFileTask, SceneDetectionTask sceneDetectionTask, CreateBoxTokenTask createBoxTokenTask, UpdateBoxTokenTask updateBoxTokenTask, PythonCrawlerTask pythonCrawlerTask, BuildElasticIndexTask buildElasticIndexTask, CleanUpElasticIndexTask cleanUpElasticIndexTask, - ExampleTask exampleTask, + ExampleTask exampleTask,DescribeVideoTask describeVideoTask,DescribeImageTask describeImageTask, ILogger logger, SlackLogger slackLogger) : base(rabbitMQ, TaskType.QueueAwaker, logger) { @@ -58,6 +59,8 @@ public QueueAwakerTask(RabbitMQConnection rabbitMQ, DownloadPlaylistInfoTask dow _updateBoxTokenTask = updateBoxTokenTask; _buildElasticIndexTask = buildElasticIndexTask; _cleanUpElasticIndexTask = cleanUpElasticIndexTask; + _describeVideoTask = describeVideoTask; + _describeImageTask = describeImageTask; _exampleTask = exampleTask; _slackLogger = slackLogger; } @@ -361,6 +364,32 @@ protected async override Task OnConsume(JObject jObject, TaskParameters taskPara } + } else if(type==TaskType.DescribeVideo.ToString()) + { + var id = jObject["videoMediaPlaylistId"].ToString(); + bool deleteExisting = jObject["DeleteExisting"]?.Value() ?? false; + GetLogger().LogInformation($"{type}:{id}"); + var videos = await _context.Videos.Where(v=>v.Id ==id).ToListAsync(); + if (videos.Count == 0) + { + videos = await _context.Medias.Where(m => (m.PlaylistId == id) || (m.Id == id)).Select(m => m.Video).ToListAsync(); + } + foreach (var video in videos) + { + if (deleteExisting) + { + GetLogger().LogInformation($"{id}:Removing Descriptions for video ({video.Id})"); + + var transcriptions = await _context.Transcriptions.Where( t =>(t.VideoId == video.Id && t.SourceInternalRef =="Local-SceneDescription")).ToListAsync(); + foreach(var t in transcriptions) { + _context.RemoveRange(t.Captions); + } + _context.RemoveRange(transcriptions); + await _context.SaveChangesAsync(); + } + _describeVideoTask.Publish(video.Id); + } + } else if (type == TaskType.PythonCrawler.ToString()) { diff --git a/TaskEngine/Tasks/TranscriptionTask.cs b/TaskEngine/Tasks/TranscriptionTask.cs index 27f8e825..ee36cc4d 100644 --- a/TaskEngine/Tasks/TranscriptionTask.cs +++ b/TaskEngine/Tasks/TranscriptionTask.cs @@ -50,7 +50,7 @@ private async void buildMockCaptions(string videoId) foreach (var language in languages) { - var transcription = video.Transcriptions.SingleOrDefault(t => t.Language == language); + var transcription = video.Transcriptions.SingleOrDefault(t => t.Language == language && t.TranscriptionType == TranscriptionType.Caption); // Did we get the default or an existing Transcription entity? if (transcription == null) { @@ -206,11 +206,13 @@ protected async override Task OnConsume(string videoId, TaskParameters taskParam { t = new Transcription() { + TranscriptionType = TranscriptionType.Caption, Captions = theCaptions, Language = theLanguage, VideoId = video.Id, Label = $"{theLanguage} (ClassTranscribe)", - SourceInternalRef = "ClassTranscribe/Azure" + SourceInternalRef = "ClassTranscribe/Azure", + SourceLabel = "ClassTranscribe (Azure" + (phraseHints.Length>0 ?" with phrase hints)" : ")") }; _context.Add(t); } From 12e5a2401500259839192cccb6ab7e78914f4a57 Mon Sep 17 00:00:00 2001 From: Lawrence Angrave Date: Thu, 21 Dec 2023 21:19:56 -0600 Subject: [PATCH 2/5] LocalEnvVariablesFile + Support Altnerative Postgres port --- ClassTranscribeDatabase/CTDbContext.cs | 65 +++++- ClassTranscribeDatabase/Globals.cs | 1 + ClassTranscribeServer.sln | 3 +- LocalEnvironmentVariables.txt | 264 +++++++++++++++++++++++++ 4 files changed, 323 insertions(+), 10 deletions(-) create mode 100644 LocalEnvironmentVariables.txt diff --git a/ClassTranscribeDatabase/CTDbContext.cs b/ClassTranscribeDatabase/CTDbContext.cs index 8e59b4a5..ee96b710 100644 --- a/ClassTranscribeDatabase/CTDbContext.cs +++ b/ClassTranscribeDatabase/CTDbContext.cs @@ -74,12 +74,13 @@ public static string ConnectionStringBuilder() // TODO: Max MaxPoolSize and port should be configurable var configurations = CTDbContext.GetConfigurations(); - return "Server=" + configurations["POSTGRES_SERVER_NAME"] - + ";Port=5432" - + ";Database=" + configurations["POSTGRES_DB"] - + ";User Id=" + configurations["ADMIN_USER_ID"] - + ";Password=" + configurations["ADMIN_PASSWORD"] - + ";MaxPoolSize=1000;"; + string conn = $"Server={configurations["POSTGRES_SERVER_NAME"]};" + + $"Port={configurations["POSTGRES_SERVER_PORT"] ?? "5432"};" + + $"Database={configurations["POSTGRES_DB"]};" + + $"User Id={configurations["ADMIN_USER_ID"]};" + + $"Password={configurations["ADMIN_PASSWORD"]};" + + $"MaxPoolSize={configurations["POSTGRES_CLIENT_MAX_POOL_SIZE"] ?? "1000"};"; + return conn; } /// @@ -112,21 +113,67 @@ public static CTDbContext CreateDbContext() /// The configurations public static IConfiguration GetConfigurations() { + var basedir = System.IO.Directory.GetCurrentDirectory(); + + if (String.IsNullOrEmpty(Environment.GetEnvironmentVariable("POSTGRES_DB"))) + { + LoadEnvFileIfExists($"{basedir}/../../../LocalEnvironmentVariables.txt"); + } + var configuration = new ConfigurationBuilder().AddEnvironmentVariables().Build(); if (configuration.GetValue("DEV_ENV", "NULL") != "DOCKER") { - string path = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); string appSettingsFileName = "vs_appsettings.json"; + string path = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); if (File.Exists(path + Path.DirectorySeparatorChar + appSettingsFileName)) { return new ConfigurationBuilder().SetBasePath(path).AddJsonFile(appSettingsFileName).Build(); - } + } + } return configuration; } + public static string DropQuotes(string s) + { + char first = s.Length < 2 ? 'a': s[0]; + if (! "\"'".Contains(first)) + { + return s; + } + char last = s[^1]; + if(first == last) + { + return s[1..^1].Replace($"\\{first}", first.ToString()); + } + return s; + } + public static bool LoadEnvFileIfExists(string filePath) + { + if (!File.Exists(filePath)) { + Console.WriteLine($"Env file {filePath} not found - ignoring"); + return false; + } + var count = 0; + foreach (var line in File.ReadAllLines(filePath)) + { + if (!line.Contains("=") || line.TrimStart().StartsWith("#")) + continue; + + var parts = line.Split( '=', 2); + var key = parts[0].Trim(); + var val = DropQuotes( parts[1].Trim()); + //Console.WriteLine($"{key}:{val.Length} chars"); + + Environment.SetEnvironmentVariable(key,val); + count += 1; + } + Console.WriteLine($"{count} environment variables set using {filePath}"); + return true; + } + protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder) { optionsBuilder.UseNpgsql(ConnectionStringBuilder()); @@ -268,7 +315,7 @@ public override int SaveChanges(bool acceptAllChangesOnSuccess) return base.SaveChanges(acceptAllChangesOnSuccess); } - public override Task SaveChangesAsync(bool acceptAllChangesOnSuccess, CancellationToken cancellationToken = default(CancellationToken)) + public override Task SaveChangesAsync(bool acceptAllChangesOnSuccess, CancellationToken cancellationToken = default) { OnBeforeSaving(); return base.SaveChangesAsync(acceptAllChangesOnSuccess, cancellationToken); diff --git a/ClassTranscribeDatabase/Globals.cs b/ClassTranscribeDatabase/Globals.cs index 78491b07..9480b414 100644 --- a/ClassTranscribeDatabase/Globals.cs +++ b/ClassTranscribeDatabase/Globals.cs @@ -13,6 +13,7 @@ public class AppSettings public string JWT_KEY { get; set; } public string ALLOWED_HOSTS { get; set; } public string POSTGRES_SERVER_NAME { get; set; } + public string POSTGRES_SERVER_PORT { get; set; } = "5432"; public string POSTGRES_DB { get; set; } public string ADMIN_USER_ID { get; set; } diff --git a/ClassTranscribeServer.sln b/ClassTranscribeServer.sln index 4e687865..e76893aa 100644 --- a/ClassTranscribeServer.sln +++ b/ClassTranscribeServer.sln @@ -14,6 +14,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution .editorconfig = .editorconfig .gitignore = .gitignore ct.proto = ct.proto + LocalEnvironmentVariables.txt = LocalEnvironmentVariables.txt vs_appsettings.json = vs_appsettings.json world_universities_and_domains.json = world_universities_and_domains.json EndProjectSection @@ -22,7 +23,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DevExperiments", "DevExperi EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UnitTests", "UnitTests\UnitTests.csproj", "{E944DD15-8FC0-481D-8B79-ACC0D092296E}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TestAzureCognitiveServices", "TestAzureCognitiveServices\TestAzureCognitiveServices.csproj", "{DA560288-98FC-4233-8CD5-252F8570CBFB}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TestAzureCognitiveServices", "TestAzureCognitiveServices\TestAzureCognitiveServices.csproj", "{DA560288-98FC-4233-8CD5-252F8570CBFB}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/LocalEnvironmentVariables.txt b/LocalEnvironmentVariables.txt new file mode 100644 index 00000000..44444f9c --- /dev/null +++ b/LocalEnvironmentVariables.txt @@ -0,0 +1,264 @@ +MOCK_RECOGNITION = MOCK + +# Consider this a guide for the environment variables required to deploy ClassTranscribe +# Make a copy of this file and rename to - ".env" and place in Deployment/ +# ALL VARIABLES HAVE TO BE SET UNLESS MENTIONED OTHERWISE + +TRAEFIK_HTTP_PORT=8000 +TRAEFIK_HTTPS_PORT=8443 + +DATA=../../../../pyapi/devruntime/docker_data + +LTI_SHARED_SECRET=ltisecret +MEDIA_WORKER_SHARED_SECRET=mediasecret + +PERIODIC_CHECK_MINUTES=300 +# ---------------------------------------------------------------------- +# DATA DIRECTORY +# ---------------------------------------------------------------------- +# Description: The path to store all data of classtranscribe +# Instruction: Substitute with path passed an argument to create_directories.sh (For more info, check readme.md) +# Example: +# DATA=/classtranscribe_data +# ---------------------------------------------------------------------- + + +# ---------------------------------------------------------------------- +# HOST_NAME +# ---------------------------------------------------------------------- +# Description: The host name for where the website will be hosted +# Instruction: Subsititute with the host name. +# Example: +# HOST_NAME=my.classtranscribe.com +# ---------------------------------------------------------------------- +HOST_NAME=localhost + + +# ---------------------------------------------------------------------- +# ADMIN CONFIGURATION +# ---------------------------------------------------------------------- +# Description: These are used as the credentials for the following services - portainer, db, pgadmin, rabbitmq, traefik, letsencrypt +# Instruction: Subsititute with a valid email address and a strong password +# Protip: To generate password on bash run, "openssl rand -base64 16" +# Example: +# ADMIN_USER_ID=my_emailid@example.com +# ADMIN_PASSWORD=MyVeryStrongPassword123 +ADMIN_USER_ID=mahipal2@illinois.edu +ADMIN_PASSWORD=Test123 + + +# ---------------------------------------------------------------------- +# TEST ACCOUNT +# ---------------------------------------------------------------------- +# Description: This property is to enable a testuser for local development only. +# CAUTION: DO NOT ENABLE ON A PRODUCTION SERVER. +# Instruction: Subsititute with true or false +# Example: +# TEST_SIGN_IN=false +TEST_SIGN_IN=true + + +# ---------------------------------------------------------------------- +# JWT CONFIGURATION +# ---------------------------------------------------------------------- +# Description: This is used to generate authentication cookies by the api server. +# Instruction: Substitute with a long random string. +# Protip: To generate string on bash run, "openssl rand -base64 32" +# Example: +# JWT_KEY=cl9rdcPxDA6I9ARxJPlNYaZF7Xae7mgS7iqibI48xYQ= +JWT_KEY=hd0TnCmI7C4VUymKlQ6hLovlxN1QcgXM9jfReqXEP+A= + + +# ---------------------------------------------------------------------- +# AUTH0 CONFIGURATION +# ---------------------------------------------------------------------- +# Description: Auth0 is used as the authentication middleware for ClassTranscribe. +# Instructions: +# Step 1. Create a tenant on Auth0 +# Step 2. Set up the Connections on your Auth0 tenant (For more info, search google for "Auth0 Connections") +# Step 3. Create a Single Page Application on the Auth0 tenant.# +# Step 4. On auth0 fill in the following fields (If HOST_NAME used above is my.classtranscribe.com) +# a. Allowed Callback URLs - https://my.classtranscribe.com,https://my.classtranscribe.com/login +# b. Allowed Logout URLs - https://my.classtranscribe.com +# c. Allowed Web Origins - https://my.classtranscribe.com +# d. Allowed Origins(CORS) - https://my.classtranscribe.com +# Step 5. Save Changes +# Step 6. Subsititute with the auth0 domain and client Id generated. +# Example: +# AUTH0_CLIENT_ID=laksjfdsahfjkhfkjhsdhffj +# AUTH0_DOMAIN=something.auth0.com +AUTH0_CLIENT_ID=changeme +AUTH0_DOMAIN=changeme + +# ---------------------------------------------------------------------- +# CILOGON CONFIGURATION +# ---------------------------------------------------------------------- +CILOGON_CLIENT_ID=cilogon:/client_id/0 +CILOGON_CLIENT_SECRET=changeme +CILOGON_DOMAIN=cilogon.org + +# ---------------------------------------------------------------------- +# PORTAINER CONFIGURATION +# ---------------------------------------------------------------------- +# Description: To set the password for the portainer dashboard it must be passed to portainer as a hash. +# Instruction: 1. Run the following command below, (Substitute with the password generated above) +# "docker run --rm httpd:2.4-alpine htpasswd -nbB admin '' | cut -d ":" -f 2" +# 2. Substitute generated string below. +# (For more info, refer https://portainer.readthedocs.io/en/stable/configuration.html) +# Example: If ADMIN_PASSWORD is Test123, generated hash is $2y$05$KRCr38aM/Avz8fXMDPN3aetQSCql.H5fMXzMcdyX7XNw5HauB20ay +# PORTAINER_PASSWORD=$2y$05$KRCr38aM/Avz8fXMDPN3aetQSCql.H5fMXzMcdyX7XNw5HauB20ay +PORTAINER_PASSWORD=$$2y$$05$$KRCr38aM/Avz8fXMDPN3aetQSCql.H5fMXzMcdyX7XNw5HauB20ay + + +# ---------------------------------------------------------------------- +# TRAEFIK CONFIGURATION (Modify if deployed on server) +# ---------------------------------------------------------------------- +# Traefik is the reverse proxy used for routing, more info - https://docs.traefik.io/ +# Description: TRAEFIK_IPFILTER accepts / refuses requests based on the client IP. +# For more info - https://docs.traefik.io/v2.0/middlewares/ipwhitelist/ +# Instruction: Subsititute with required ipwhitelist +# Example: +# TRAEFIK_IPFILTER=123.123.0.0/12, 141.142.0.0/16, 0.0.0.0/0 +TRAEFIK_IPFILTER=123.123.0.0/12, 141.142.0.0/16, 0.0.0.0/0 + +# Description: Traefik could generate SSL certificates for additional host names. +# Instruction: Specify up to 2 additional hosts to generate SSL certificates for. +# Eg. TRAEFIK_ADDITIONAL_HOST1=my.classtranscribe.org +# Eg. TRAEFIK_ADDITIONAL_HOST2=my.classtranscribe.net +TRAEFIK_ADDITIONAL_HOST1= +TRAEFIK_ADDITIONAL_HOST2= + + +# ---------------------------------------------------------------------- +# AZURE APPLICATION INSIGHTS CONFIGURATION +# ---------------------------------------------------------------------- +# Description: Azure Application Insights allows reporting of all exceptions to Azure. +# Instruction: Create an Azure Application Insights service on Azure (For more info, google "Azure Application Insights") +# And, substitute below with the application insights key (aka Instruentation Key) +# Example: +# APPLICATION_INSIGHTS_KEY=6f7a5ee2-0811-4e3a-beaa-498af35d6ea2 +APPLICATION_INSIGHTS_KEY=0 + + +# ---------------------------------------------------------------------- +# SLACK WEBHOOK CONFIGURATION +# ---------------------------------------------------------------------- +# Description: To enable logging and monitoring messages to slack. +# Instruction: Create an incoming webhook on Slack, (For more info, https://api.slack.com/messaging/webhooks) +# Subsititute the url below, also substitute in dem_conf.yml +# Example: +# SLACK_WEBHOOK_URL=https://hooks.slack.com/services/abcdef/ghijkl/lkdsjfldskfksldljf +SLACK_WEBHOOK_URL=https://hooks.slack.com/services/changeme + + +# ---------------------------------------------------------------------- +# Task Engine Configuration +# ---------------------------------------------------------------------- +# Task Engine uses various media providers, their respective API keys need to be configured + +# Youtube +# Description: To enable Youtube Integration +# Instruction: Generate a youtube api key and subsititute below. (For more info, https://developers.google.com/youtube/registering_an_application) +# Eg. YOUTUBE_API_KEY=AIzaSyDKnpdznYOFxm_IRnrclGh4oSdQloZsdfsdo +YOUTUBE_API_KEY=changeme + +# Azure +# Description: ClassTranscribe uses Azure Cognitive Services to produce transcriptions, these require azure subscription keys +# Instruction: Generate and subsititute with subscription keys and regions, (For more info, https://azure.microsoft.com/en-us/services/cognitive-services/speech-to-text/) +# Multiple keys can be used, use a comma as separator between key and region and semi-colon to add multiple. +# Example: +AZURE_SUBSCRIPTION_KEYS=changeme,northcentralus + +# Kaltura +# Description: To enable a Katura/Mediaspace integration +# Instruction: Contact Mediaspace admin to obtain the partner_id, token_id and app_token. +# Example: + +KALTURA_PARTNER_ID=1329972 +KALTURA_TOKEN_ID=changeme +KALTURA_APP_TOKEN=changeme + + +# Box +# Description: To enable a Box integration +# Instruction: Contact Box admin to get client_id and client_secret +# Example: +# BOX_CLIENT_ID=jsdksdfhsdkfhskjdhfskjfhd +# BOX_CLIENT_SECRET=jlksdjfksjlsljslkfjlksjlkdsflk +BOX_CLIENT_ID= +BOX_CLIENT_SECRET= + +# ---------------------------------------------------------------------- +# FRONTEND CONFIGURATION (Optional) +# ---------------------------------------------------------------------- +# Description: Frontend typically points to the same host for the api service, if we need to change this explicitly setting this is required +# Instruction: Subsititute with alternate endpoint +# Example: +# REACT_APP_TESTING_BASE_URL=https://my.classtranscribe2.com +REACT_APP_TESTING_BASE_URL= +# Description: Setting this variable will trigger a hard reload on the uesr's device, whenever a new commit is made to the branch +# Instruction: Subsititute with an endpoint like below +# Example: +# REACT_APP_FRONTEND_COMMIT_ENDPOINT=https://api.github.com/repos/classtranscribe/Frontend/commits/master +REACT_APP_FRONTEND_COMMIT_ENDPOINT=https://api.github.com/repos/classtranscribe/Frontend/commits/master + +# ---------------------------------------------------------------------- +# RABBITMQ CONFIGURATION (Optional) +# ---------------------------------------------------------------------- +# Description: RabbitMQ Prefetch count sets the number of jobs rabbitmq can process in parallel. +# Instruction: Subsititute with an integer, default value is 10 if undefined +# Example: +# RABBITMQ_PREFETCH_COUNT=10 +#no longer used RABBITMQ_PREFETCH_COUNT=10 + +# New +# Max number of threads used by video processing +JOB_MAX_THREADS=1 + +MAX_CONCURRENT_VIDEO_TASKS=1 +MAX_CONCURRENT_SYNC_TASKS=1 +MAX_CONCURRENT_TRANSCRIPTIONS=1 + +#single threaded OCR +OMP_THREAD_LIMIT=1 + +SCENE_DETECT_FPS=0.5 +SCENE_DETECT_USE_FACE=true +SCENE_DETECT_USE_OCR=false + +#-------TRAEFIK----------- +TRAEFIK_HTTPS_OPTIONS=TLS +# Always use https, traffic to http is redirected to https +TRAEFIK_HTTP_REDIRECT=Redirect.EntryPoint:https +TRAEFIK_ACME_ENABLE=false + +#-------API----------- +ALLOWED_HOSTS=* +JWT_EXPIRE_DAYS=30 +ASPNETCORE_URLS=http://+:80 +DEV_ENV=OUTSIDEDOCKER +NODE_RPC_SERVER=localhost:50052 +PYTHON_RPC_SERVER=localhost:50051 +RabbitMQServer=localhost +#-------POSTGRES----------- +# Fixed server name, db name and data storage locations +POSTGRES_SERVER_NAME=localhost +POSTGRES_SERVER_PORT=5433 +PGDATA=/var/lib/postgresql/data/pgdata +POSTGRES_DB=ct2019db +#-------RPCSERVER----------- +DATA_DIRECTORY=/data +#TMPDIR=/data/temp +#-------NGINX----------- +NGINX_HTTP_PORT=80 +NGINX_HTTPS_PORT=443 +#VAR= +#-------FRONTEND----------- +NODE_PATH=src +#-------PGADMIN----------- +SCRIPT_NAME=/pgadmin + +#-------ELASTIC SEARCH------- +ES_CONNECTION_ADDR=http://localhost:9200 +ES_INDEX_TIME_TO_LIVE=2880 + From 5d40ea2ffd7bbaed7ccdb976789f1513654f47a8 Mon Sep 17 00:00:00 2001 From: Lawrence Angrave Date: Thu, 21 Dec 2023 21:23:53 -0600 Subject: [PATCH 3/5] Code Cleanup --- ClassTranscribeDatabase/Models/Caption.cs | 8 +- ClassTranscribeDatabase/Models/Models.cs | 8 +- .../Services/MSTranscription/KeyProvider.cs | 8 +- .../Services/RabbitMQConnection.cs | 112 +++--- .../Controllers/AdminController.cs | 2 +- .../Controllers/EPubsController.cs | 4 +- .../Controllers/PlaylistsController.cs | 20 +- .../Controllers/TaskController.cs | 8 +- ClassTranscribeServer/Utils/WakeDownloader.cs | 114 +++--- TaskEngine/Tasks/BuildElasticIndexTask.cs | 2 +- TaskEngine/Tasks/CleanUpElasticIndexTask.cs | 2 +- TaskEngine/Tasks/ConvertVideoToWavTask.cs | 2 +- TaskEngine/Tasks/CreateBoxTokenTask.cs | 2 +- TaskEngine/Tasks/DownloadMediaTask.cs | 8 +- TaskEngine/Tasks/DownloadPlaylistInfoTask.cs | 2 +- TaskEngine/Tasks/ExampleTask.cs | 2 +- TaskEngine/Tasks/GenerateVTTFileTask.cs | 2 +- TaskEngine/Tasks/ProcessVideoTask.cs | 2 +- TaskEngine/Tasks/QueueAwakerTask.cs | 329 +++++++++--------- TaskEngine/Tasks/RabbitMQTask.cs | 52 +-- TaskEngine/Tasks/SceneDetectionTask.cs | 2 +- TaskEngine/Tasks/TranscriptionTask.cs | 2 +- 22 files changed, 365 insertions(+), 328 deletions(-) diff --git a/ClassTranscribeDatabase/Models/Caption.cs b/ClassTranscribeDatabase/Models/Caption.cs index d5840e60..398ed19e 100644 --- a/ClassTranscribeDatabase/Models/Caption.cs +++ b/ClassTranscribeDatabase/Models/Caption.cs @@ -122,7 +122,7 @@ public static List ToCaptionEntitiesInterpolate(int captionsCount, Time else { caption = tempCaption.Substring(0, index); - tempCaption = tempCaption.Substring(index); + tempCaption = tempCaption[index..]; tempCaption = tempCaption.Trim(); } curEnd = curBegin.Add(new TimeSpan(0, 0, 0, 0, newDuration)); @@ -147,8 +147,8 @@ public static List ToCaptionEntitiesInterpolate(int captionsCount, Time End = curEnd, Text = tempCaption }); - curBegin = curEnd; - curDuration = End.Subtract(curBegin); + // curBegin = curEnd; + // curDuration = End.Subtract(curBegin); } return captions; } @@ -211,7 +211,7 @@ public static string GenerateWebVTTString(List captions, string languag // public static string GenerateDXFPString(List captions, string language) { - string now = DateTime.UtcNow.ToString("o", System.Globalization.CultureInfo.InvariantCulture); + // string now = DateTime.UtcNow.ToString("o", System.Globalization.CultureInfo.InvariantCulture); string header = @" public class KeyProvider { - private AppSettings _appSettings; - private List Keys; - private HashSet CurrentVideoIds; + private readonly AppSettings _appSettings; + private readonly List Keys; + private readonly HashSet CurrentVideoIds; public KeyProvider(AppSettings appSettings) { _appSettings = appSettings; - string subscriptionKeys = _appSettings.AZURE_SUBSCRIPTION_KEYS; + string subscriptionKeys = _appSettings.AZURE_SUBSCRIPTION_KEYS ?? ""; Keys = new List(); CurrentVideoIds = new HashSet(); diff --git a/ClassTranscribeDatabase/Services/RabbitMQConnection.cs b/ClassTranscribeDatabase/Services/RabbitMQConnection.cs index ad19b298..86a9800a 100644 --- a/ClassTranscribeDatabase/Services/RabbitMQConnection.cs +++ b/ClassTranscribeDatabase/Services/RabbitMQConnection.cs @@ -34,21 +34,21 @@ public ClientActiveTasks(ClientActiveTasks source) : base(source) public class RabbitMQConnection : IDisposable { // Created by the first instance, then re-used - private static IConnection _connection; - private static int _connectionRefCount; + private static IConnection Connection; + private static int ConnectionRefCount; - IModel _channel { get; set; } - String _expiration; // milliseconds + IModel Channel { get; set; } + String Expiration; // milliseconds - private readonly ILogger _logger; + private readonly ILogger Logger; public RabbitMQConnection(ILogger logger) { - _logger = logger; + Logger = logger; CreateSharedConnection(); // TODO/TOREVIEW: Check number of threads created // Potentially Model can be shared too - _channel = _connection.CreateModel(); + Channel = Connection.CreateModel(); uint time = Math.Max(1, Convert.ToUInt32(Globals.appSettings.RABBITMQ_TASK_TTL_MINUTES)); SetMessageExpiration(time); @@ -57,12 +57,12 @@ public RabbitMQConnection(ILogger logger) private void CreateSharedConnection() { - if (_connection != null) + if (Connection != null) { - _connectionRefCount++; + ConnectionRefCount++; return; } - _logger.LogInformation("Creating RabbitMQ connection"); + Logger.LogInformation("Creating RabbitMQ connection"); var factory = new ConnectionFactory() { @@ -77,22 +77,22 @@ private void CreateSharedConnection() // In 2021 we can remove support for the old variable if (Globals.appSettings.RabbitMQServer.Length > 0) { - _logger.LogError("*** Mixed case 'RabbitMQServer' environment variable is deprecated. Review your .env or vs_appsettings.json environment settings"); + Logger.LogError("*** Mixed case 'RabbitMQServer' environment variable is deprecated. Review your .env or vs_appsettings.json environment settings"); if (Globals.appSettings.RABBITMQ_SERVER_NAME.Length == 0) { - _logger.LogError("*** Update your environment to use RABBITMQ_SERVER_NAME."); + Logger.LogError("*** Update your environment to use RABBITMQ_SERVER_NAME."); } else if (Globals.appSettings.RABBITMQ_SERVER_NAME != Globals.appSettings.RabbitMQServer) { { - _logger.LogError("*** RABBITMQ_SERVER_NAME and RabbitMQServer are both set and different! Using RABBITMQ_SERVER_NAME"); + Logger.LogError("*** RABBITMQ_SERVER_NAME and RabbitMQServer are both set and different! Using RABBITMQ_SERVER_NAME"); } } } - _logger.LogInformation($"Connecting to RabbitMQ server {factory.HostName} with user {factory.UserName} on port {factory.Port}..."); - _connection = factory.CreateConnection(); - _connectionRefCount = 1; + Logger.LogInformation($"Connecting to RabbitMQ server {factory.HostName} with user {factory.UserName} on port {factory.Port}..."); + Connection = factory.CreateConnection(); + ConnectionRefCount = 1; } @@ -100,8 +100,8 @@ public void SetMessageExpiration(uint ttlMinutes) { uint OneMinuteAsMilliseconds = 1000 * 60; - _expiration = (OneMinuteAsMilliseconds * ttlMinutes).ToString(); - _logger.LogInformation("Using Message TTL {0} minutes", ttlMinutes); + Expiration = (OneMinuteAsMilliseconds * ttlMinutes).ToString(); + Logger.LogInformation("Using Message TTL {0} minutes", ttlMinutes); } /// @@ -118,23 +118,23 @@ public void PublishTask(string queueName, T data, TaskParameters taskParamete // they might publish a task to different queue? // Better? Should we create the queue (in Program.cs) separately from servicing or publishing the queue // Or is always (re-)declaring the queue best practices for RabbitNQ? - lock ((_channel)) + lock ((Channel)) { - _channel.QueueDeclare(queue: queueName, durable: true, exclusive: false, autoDelete: false, arguments: null); + Channel.QueueDeclare(queue: queueName, durable: true, exclusive: false, autoDelete: false, arguments: null); } var taskObject = new TaskObject { Data = data, TaskParameters = taskParameters }; var body = CommonUtils.MessageToBytes(taskObject); - var properties = _channel.CreateBasicProperties(); + var properties = Channel.CreateBasicProperties(); properties.Persistent = true; // Note delivered but unacked messages do not expire - properties.Expiration = _expiration; // milliseconds + properties.Expiration = Expiration; // milliseconds // See https://www.rabbitmq.com/dotnet-api-guide.html#concurrency-channel-sharing // Use a lock to ensure thread safety - lock (_channel) + lock (Channel) { - _channel.BasicPublish(exchange: "", routingKey: queueName, basicProperties: properties, body: body); + Channel.BasicPublish(exchange: "", routingKey: queueName, basicProperties: properties, body: body); } } @@ -148,10 +148,10 @@ public void PublishTask(string queueName, T data, TaskParameters taskParamete public void ConsumeTask(string queueName, Func OnConsume, Func PostConsumeCleanup, ushort concurrency) { // Caution. The queue is also declard inside PublishTask above - _logger.LogInformation("Prefetch concurrency count {0}", concurrency); - lock (_channel) + Logger.LogInformation("Prefetch concurrency count {0}", concurrency); + lock (Channel) { - _channel.QueueDeclare(queue: queueName, + Channel.QueueDeclare(queue: queueName, durable: true, exclusive: false, autoDelete: false, @@ -159,15 +159,15 @@ public void ConsumeTask(string queueName, Func { // This object exists so that we can wrap all OnConsumes with a try-finally here @@ -177,7 +177,7 @@ public void ConsumeTask(string queueName, Func>(ea.Body); - _logger.LogInformation(" [x] {0} Received {1}", queueName, taskObject.ToString()); + Logger.LogInformation(" [x] {0} Received {1}", queueName, taskObject.ToString()); // TODO: Update JobStatus table here (started timestamp) try { @@ -185,23 +185,23 @@ public void ConsumeTask(string queueName, Func(string queueName, Func> UpdateSceneDataSchema(String requestId) } else { JToken olddata = video.SceneData; TextData data = new TextData(); - data.setFromJSON(olddata); + data.SetFromJSON(olddata); _context.TextData.Add(data); video.SceneObjectDataId = data.Id; System.Diagnostics.Trace.Assert(!string.IsNullOrEmpty(data.Id)); diff --git a/ClassTranscribeServer/Controllers/EPubsController.cs b/ClassTranscribeServer/Controllers/EPubsController.cs index 1e3a8323..bdc48e6b 100644 --- a/ClassTranscribeServer/Controllers/EPubsController.cs +++ b/ClassTranscribeServer/Controllers/EPubsController.cs @@ -96,7 +96,7 @@ public async Task>> GetEpubData(string mediaId, } TextData data = await _context.TextData.FindAsync(video.SceneObjectDataId); _logger.LogInformation($"GetEpubData({mediaId},{language}) getting scenedata as JArray"); - JArray sceneArray = data.getAsJSON()["Scenes"] as JArray; + JArray sceneArray = data.GetAsJSON()["Scenes"] as JArray; EPub epub = new EPub { @@ -124,7 +124,7 @@ public async Task> GetGlossaryData(string mediaId) Video video = await _context.Videos.FindAsync(media.VideoId); if (video.HasGlossaryData()) { TextData data = await _context.TextData.FindAsync(video.GlossaryDataId); - return data.getAsJSON(); + return data.GetAsJSON(); } return video.Glossary; diff --git a/ClassTranscribeServer/Controllers/PlaylistsController.cs b/ClassTranscribeServer/Controllers/PlaylistsController.cs index 4c19629d..fbc18c68 100644 --- a/ClassTranscribeServer/Controllers/PlaylistsController.cs +++ b/ClassTranscribeServer/Controllers/PlaylistsController.cs @@ -89,7 +89,7 @@ public async Task>> GetPlaylists(string of var authorizationResult = await _authorizationService.AuthorizeAsync(this.User, offering, Globals.POLICY_READ_OFFERING); if (!authorizationResult.Succeeded) { - return Unauthorized(new { Reason = "Insufficient Permission", AccessType = offering.AccessType }); + return Unauthorized(new { Reason = "Insufficient Permission", offering.AccessType }); } var temp = await _context.Playlists .Where(p => p.OfferingId == offeringId) @@ -126,7 +126,7 @@ public async Task>> GetPlaylists2(string o var authorizationResult = await _authorizationService.AuthorizeAsync(this.User, offering, Globals.POLICY_READ_OFFERING); if (!authorizationResult.Succeeded) { - return Unauthorized(new { Reason = "Insufficient Permission", AccessType = offering.AccessType }); + return Unauthorized(new { Reason = "Insufficient Permission", offering.AccessType }); } var temp = await _context.Playlists .Where(p => p.OfferingId == offeringId) @@ -152,7 +152,7 @@ public async Task>> GetPlaylists2(string o JsonMetadata = m.JsonMetadata, CreatedAt = m.CreatedAt, SceneDetectReady = m.Video.HasSceneObjectData(), - Ready = m.Video == null ? false : "NoError" == m.Video.TranscriptionStatus , + Ready = m.Video != null && "NoError" == m.Video.TranscriptionStatus , SourceType = m.SourceType, Duration = m.Video?.Duration, PublishStatus = m.PublishStatus, @@ -167,8 +167,8 @@ public async Task>> GetPlaylists2(string o Transcriptions = m.Video.Transcriptions.Select(t => new TranscriptionDTO { Id = t.Id, - Path = t.File != null ? t.File.Path : null, - SrtPath = t.SrtFile != null ? t.SrtFile.Path : null, + Path = t.File?.Path, + SrtPath = t.SrtFile?.Path, Language = t.Language, Label = t.Label, SourceLabel = t.SourceLabel, @@ -239,18 +239,18 @@ public async Task> GetPlaylist(string id) Duration = m.Video?.Duration, PublishStatus = m.PublishStatus, SceneDetectReady = m.Video != null && m.Video.HasSceneObjectData(), - Ready = m.Video == null ? false : "NoError" == m.Video.TranscriptionStatus , + Ready = m.Video != null && "NoError" == m.Video.TranscriptionStatus , Video = m.Video == null ? null : new VideoDTO { Id = m.Video.Id, Video1Path = m.Video.Video1?.Path, Video2Path = m.Video.Video2?.Path }, - Transcriptions = m.Video == null ? null : m.Video.Transcriptions.Select(t => new TranscriptionDTO + Transcriptions = m.Video?.Transcriptions.Select(t => new TranscriptionDTO { Id = t.Id, - Path = t.File != null ? t.File.Path : null, - SrtPath = t.SrtFile != null ? t.SrtFile.Path : null, + Path = t.File?.Path, + SrtPath = t.SrtFile?.Path, Language = t.Language }).ToList(), WatchHistory = user != null ? partialWatchHistories.Where(w => w.MediaId == m.Id).FirstOrDefault() :null @@ -433,7 +433,7 @@ public async Task Reorder(string offeringId, List playlist } _context.Playlists.UpdateRange(playlists); await _context.SaveChangesAsync(); - return RedirectToAction("GetPlaylists", new { offeringId = offeringId }); + return RedirectToAction("GetPlaylists", new { offeringId }); } private bool PlaylistExists(string id) diff --git a/ClassTranscribeServer/Controllers/TaskController.cs b/ClassTranscribeServer/Controllers/TaskController.cs index 73441115..15df46aa 100644 --- a/ClassTranscribeServer/Controllers/TaskController.cs +++ b/ClassTranscribeServer/Controllers/TaskController.cs @@ -69,7 +69,7 @@ public async Task UpdateSceneData(string videoId, JObject scene) } private void createDescriptionsIfNone(Video v, TextData scenedata) { - JArray scenes = scenedata.getAsJSON()["Scenes"] as JArray; + JArray scenes = scenedata.GetAsJSON()["Scenes"] as JArray; if (scenes == null || v == null || v.Id == null) { return; @@ -131,7 +131,7 @@ public async Task> GetSceneData(string videoId) { Video video = await _context.Videos.FindAsync(videoId); if(video.HasSceneObjectData()) { TextData data = await _context.TextData.FindAsync(video.SceneObjectDataId); - return data.getAsJSON(); + return data.GetAsJSON(); } // old version - return video.SceneData; @@ -195,7 +195,7 @@ public async Task> GetGlossary(string videoId) { Video video = await _context.Videos.FindAsync(videoId); if(video.HasGlossaryData()) { TextData data = await _context.TextData.FindAsync(video.GlossaryDataId); - return data.getAsJSON(); + return data.GetAsJSON(); } // old version - return video.Glossary; @@ -230,7 +230,7 @@ public async Task> GetGlossaryTimestamp(string videoId) { Video video = await _context.Videos.FindAsync(videoId); if(video.HasGlossaryTimestamp()) { TextData data = await _context.TextData.FindAsync(video.GlossaryTimestampId); - return data.getAsJSON(); + return data.GetAsJSON(); } // old version - return NotFound(); diff --git a/ClassTranscribeServer/Utils/WakeDownloader.cs b/ClassTranscribeServer/Utils/WakeDownloader.cs index a5d10e14..9cff435f 100644 --- a/ClassTranscribeServer/Utils/WakeDownloader.cs +++ b/ClassTranscribeServer/Utils/WakeDownloader.cs @@ -14,104 +14,130 @@ public WakeDownloader(RabbitMQConnection rabbitMQ) //Todo: Fix field capitalization in here and QueueAwakerTask.cs public void UpdateAllPlaylists() { - JObject msg = new JObject(); - msg.Add("Type", TaskType.DownloadAllPlaylists.ToString()); + JObject msg = new JObject + { + { "Type", TaskType.DownloadAllPlaylists.ToString() } + }; Wake(msg); } public virtual void UpdatePlaylist(string playlistId) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.DownloadPlaylistInfo.ToString()); - msg.Add("PlaylistId", playlistId); + JObject msg = new JObject + { + { "Type", TaskType.DownloadPlaylistInfo.ToString() }, + { "PlaylistId", playlistId } + }; Wake(msg); } public virtual void UpdateVTTFile(string transcriptionId) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.GenerateVTTFile.ToString()); - msg.Add("TranscriptionId", transcriptionId); + JObject msg = new JObject + { + { "Type", TaskType.GenerateVTTFile.ToString() }, + { "TranscriptionId", transcriptionId } + }; Wake(msg); } public void UpdateOffering(string offeringId) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.UpdateOffering.ToString()); - msg.Add("offeringId", offeringId); + JObject msg = new JObject + { + { "Type", TaskType.UpdateOffering.ToString() }, + { "offeringId", offeringId } + }; Wake(msg); } public void PeriodicCheck() { - JObject msg = new JObject(); - msg.Add("Type", TaskType.PeriodicCheck.ToString()); + JObject msg = new JObject + { + { "Type", TaskType.PeriodicCheck.ToString() } + }; Wake(msg); } public void GenerateScenes(string mediaId) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.SceneDetection.ToString()); - msg.Add("mediaId", mediaId); + JObject msg = new JObject + { + { "Type", TaskType.SceneDetection.ToString() }, + { "mediaId", mediaId } + }; Wake(msg); } public void CreateBoxToken(string authCode) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.CreateBoxToken.ToString()); - msg.Add("authCode", authCode); + JObject msg = new JObject + { + { "Type", TaskType.CreateBoxToken.ToString() }, + { "authCode", authCode } + }; Wake(msg); } public void DownloadMedia(string mediaId) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.DownloadMedia.ToString()); - msg.Add("mediaId", mediaId); + JObject msg = new JObject + { + { "Type", TaskType.DownloadMedia.ToString() }, + { "mediaId", mediaId } + }; Wake(msg); } public void ConvertMedia(string videoId) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.ConvertMedia.ToString()); - msg.Add("videoId", videoId); + JObject msg = new JObject + { + { "Type", TaskType.ConvertMedia.ToString() }, + { "videoId", videoId } + }; Wake(msg); } public void TranscribeVideo(string videoOrMediaId, bool deleteExisting) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.TranscribeVideo.ToString()); - msg.Add("videoOrMediaId", videoOrMediaId); - msg.Add("DeleteExisting", deleteExisting); + JObject msg = new JObject + { + { "Type", TaskType.TranscribeVideo.ToString() }, + { "videoOrMediaId", videoOrMediaId }, + { "DeleteExisting", deleteExisting } + }; Wake(msg); } public virtual void SceneDetection(string videoMediaPlaylistId, bool deleteExisting) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.SceneDetection.ToString()); - msg.Add("videoMediaPlaylistId", videoMediaPlaylistId); - msg.Add("DeleteExisting", deleteExisting); + JObject msg = new JObject + { + { "Type", TaskType.SceneDetection.ToString() }, + { "videoMediaPlaylistId", videoMediaPlaylistId }, + { "DeleteExisting", deleteExisting } + }; Wake(msg); } public virtual void DescribeVideo(string videoMediaPlaylistId, bool deleteExisting) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.DescribeVideo.ToString()); - msg.Add("videoMediaPlaylistId", videoMediaPlaylistId); - msg.Add("DeleteExisting", deleteExisting); + JObject msg = new JObject + { + { "Type", TaskType.DescribeVideo.ToString() }, + { "videoMediaPlaylistId", videoMediaPlaylistId }, + { "DeleteExisting", deleteExisting } + }; Wake(msg); } public void UpdateASLVideo(string sourceId) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.PythonCrawler.ToString()); - msg.Add("SourceId", sourceId); + JObject msg = new JObject + { + { "Type", TaskType.PythonCrawler.ToString() }, + { "SourceId", sourceId } + }; Wake(msg); } @@ -123,9 +149,11 @@ private void Wake(JObject message, TaskParameters taskParameters = null) public void ReTranscribePlaylist(string playlistId) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.ReTranscribePlaylist.ToString()); - msg.Add("PlaylistId", playlistId); + JObject msg = new JObject + { + { "Type", TaskType.ReTranscribePlaylist.ToString() }, + { "PlaylistId", playlistId } + }; Wake(msg); } } diff --git a/TaskEngine/Tasks/BuildElasticIndexTask.cs b/TaskEngine/Tasks/BuildElasticIndexTask.cs index 7ad38018..893ba6af 100644 --- a/TaskEngine/Tasks/BuildElasticIndexTask.cs +++ b/TaskEngine/Tasks/BuildElasticIndexTask.cs @@ -38,7 +38,7 @@ public BuildElasticIndexTask(RabbitMQConnection rabbitMQ, } protected async override Task OnConsume(string example, TaskParameters taskParameters, ClientActiveTasks cleanup) { - registerTask(cleanup, "BuildElasticIndexTask"); // may throw AlreadyInProgress exception + RegisterTask(cleanup, "BuildElasticIndexTask"); // may throw AlreadyInProgress exception GetLogger().LogInformation("BuildElasticIndexTask Starting"); diff --git a/TaskEngine/Tasks/CleanUpElasticIndexTask.cs b/TaskEngine/Tasks/CleanUpElasticIndexTask.cs index 610d3c2a..661003f5 100644 --- a/TaskEngine/Tasks/CleanUpElasticIndexTask.cs +++ b/TaskEngine/Tasks/CleanUpElasticIndexTask.cs @@ -38,7 +38,7 @@ public CleanUpElasticIndexTask(RabbitMQConnection rabbitMQ, } protected async override Task OnConsume(string example, TaskParameters taskParameters, ClientActiveTasks cleanup) { - registerTask(cleanup, "CleanUpElasticIndexTask"); // may throw AlreadyInProgress exception + RegisterTask(cleanup, "CleanUpElasticIndexTask"); // may throw AlreadyInProgress exception GetLogger().LogInformation("CleanUpElasticIndexTask Starting"); var result = await _client.Indices.GetAsync(new GetIndexRequest(Indices.All)); diff --git a/TaskEngine/Tasks/ConvertVideoToWavTask.cs b/TaskEngine/Tasks/ConvertVideoToWavTask.cs index 0616a102..a8e2f363 100644 --- a/TaskEngine/Tasks/ConvertVideoToWavTask.cs +++ b/TaskEngine/Tasks/ConvertVideoToWavTask.cs @@ -32,7 +32,7 @@ public ConvertVideoToWavTask(RabbitMQConnection rabbitMQ, RpcClient rpcClient, T protected override Task OnConsume(string videoId, TaskParameters taskParameters, ClientActiveTasks cleanup) { - registerTask(cleanup, videoId); + RegisterTask(cleanup, videoId); throw new Exception("ConvertVideoToWavTask No longer used. Videoid= " + videoId); } diff --git a/TaskEngine/Tasks/CreateBoxTokenTask.cs b/TaskEngine/Tasks/CreateBoxTokenTask.cs index 9bba62fb..40d1c6ac 100644 --- a/TaskEngine/Tasks/CreateBoxTokenTask.cs +++ b/TaskEngine/Tasks/CreateBoxTokenTask.cs @@ -22,7 +22,7 @@ public CreateBoxTokenTask(RabbitMQConnection rabbitMQ, BoxAPI box, ILogger DownloadKalturaVideo(string subdir, Media media) public async Task /// - public void registerTask(HashSet cleanup, Object keyId) + public void RegisterTask(HashSet cleanup, Object keyId) { if (cleanup == null || keyId == null) { @@ -121,16 +123,16 @@ public void registerTask(HashSet cleanup, Object keyId) if (cleanup.Contains(keyId)) { // This is a programming error the same message may not register the same key twice - throw new Exception($"Cleanup set may not already contain key ({keyId.ToString()})"); + throw new Exception($"Cleanup set may not already contain key ({keyId})"); } // Now check that globally there is no other task working on the same id // This may happen rarely. The purpose of registerTask is to immediately stop (by throwing an exception) if we discover we are late to the party. - alreadyRunning = !_inProgress[_queueName].Add(keyId); + alreadyRunning = !_inProgress[QueueName].Add(keyId); } if (alreadyRunning) { - _logger.LogError("{0} for {1} Task already running, so skipping this request and throwing exception", _queueName, keyId); - throw new InProgressException($"{ _queueName} for {keyId} Task already running, so skipping this request"); + Logger.LogError("{0} for {1} Task already running, so skipping this request and throwing exception", QueueName, keyId); + throw new InProgressException($"{ QueueName} for {keyId} Task already running, so skipping this request"); } cleanup.Add(keyId); @@ -144,13 +146,13 @@ public ClientActiveTasks GetCurrentTasks() { lock (_inProgress) { - return new ClientActiveTasks(_inProgress[_queueName]); + return new ClientActiveTasks(_inProgress[QueueName]); } } protected ILogger GetLogger() { - return _logger; + return Logger; } } [Serializable] diff --git a/TaskEngine/Tasks/SceneDetectionTask.cs b/TaskEngine/Tasks/SceneDetectionTask.cs index 1df26661..1baf80c0 100644 --- a/TaskEngine/Tasks/SceneDetectionTask.cs +++ b/TaskEngine/Tasks/SceneDetectionTask.cs @@ -31,7 +31,7 @@ public SceneDetectionTask(RabbitMQConnection rabbitMQ,TranscriptionTask transcri /// Beware: It is possible to start another scene task while the first one is still running protected async override Task OnConsume(string videoId, TaskParameters taskParameters, ClientActiveTasks cleanup) { - registerTask(cleanup, videoId); // may throw AlreadyInProgress exception + RegisterTask(cleanup, videoId); // may throw AlreadyInProgress exception GetLogger().LogInformation($"SceneDetection({videoId}): Consuming Task"); var filepath = ""; diff --git a/TaskEngine/Tasks/TranscriptionTask.cs b/TaskEngine/Tasks/TranscriptionTask.cs index ee36cc4d..8d28538b 100644 --- a/TaskEngine/Tasks/TranscriptionTask.cs +++ b/TaskEngine/Tasks/TranscriptionTask.cs @@ -95,7 +95,7 @@ private async void buildMockCaptions(string videoId) /// protected async override Task OnConsume(string videoId, TaskParameters taskParameters, ClientActiveTasks cleanup) { - registerTask(cleanup, videoId); // may throw AlreadyInProgress exception + RegisterTask(cleanup, videoId); // may throw AlreadyInProgress exception if (Globals.appSettings.MOCK_RECOGNITION == "MOCK") { buildMockCaptions(videoId); From a7382c90f85b0b4962bb565d2c243a15d477cda2 Mon Sep 17 00:00:00 2001 From: Lawrence Angrave Date: Thu, 21 Dec 2023 21:25:35 -0600 Subject: [PATCH 4/5] Add Video and Image Description Support --- TaskEngine/Program.cs | 25 ++- TaskEngine/Tasks/DescribeImageTask.cs | 57 +++++-- TaskEngine/Tasks/DescribeVideoTask.cs | 212 ++++++++++++++------------ 3 files changed, 177 insertions(+), 117 deletions(-) diff --git a/TaskEngine/Program.cs b/TaskEngine/Program.cs index e7340510..6492220a 100644 --- a/TaskEngine/Program.cs +++ b/TaskEngine/Program.cs @@ -42,7 +42,11 @@ public static void Main() builder.AddConsole(); builder.AddFilter ("", LogLevel.Warning); - builder.AddApplicationInsights(configuration.GetValue("APPLICATION_INSIGHTS_KEY")); + string insightKey = configuration.GetValue("APPLICATION_INSIGHTS_KEY"); + if (!String.IsNullOrEmpty(insightKey) && insightKey.Trim().Length>1) + { + builder.AddApplicationInsights(insightKey); + } }) .AddOptions() .Configure(configuration) @@ -60,6 +64,8 @@ public static void Main() .AddSingleton() .AddSingleton() .AddSingleton() + .AddSingleton() + .AddSingleton() .AddSingleton() .AddSingleton() .AddSingleton() @@ -94,9 +100,11 @@ public static void Main() // Active queues managed by C# (concurrency > 0) are now purged after the queue is created and before messages are processed - ushort concurrent_videotasks = toUInt16(Globals.appSettings.MAX_CONCURRENT_VIDEO_TASKS, NO_CONCURRENCY); - ushort concurrent_synctasks = toUInt16(Globals.appSettings.MAX_CONCURRENT_SYNC_TASKS, MIN_CONCURRENCY); - ushort concurrent_transcriptions = toUInt16(Globals.appSettings.MAX_CONCURRENT_TRANSCRIPTIONS, MIN_CONCURRENCY); + ushort concurrent_videotasks = ToUInt16(Globals.appSettings.MAX_CONCURRENT_VIDEO_TASKS, NO_CONCURRENCY); + ushort concurrent_synctasks = ToUInt16(Globals.appSettings.MAX_CONCURRENT_SYNC_TASKS, MIN_CONCURRENCY); + ushort concurrent_transcriptions = ToUInt16(Globals.appSettings.MAX_CONCURRENT_TRANSCRIPTIONS, MIN_CONCURRENCY); + ushort concurrent_describe_images = 1; + ushort concurrent_describe_videos = 1; // Create and start consuming from all queues. If concurrency >=1 the queues are purged @@ -116,7 +124,12 @@ public static void Main() // Video Processing Related _logger.LogInformation($"Creating ProcessVideoTask consumer. Concurrency={concurrent_videotasks} "); serviceProvider.GetService().Consume(concurrent_videotasks); - + // Descriptions + serviceProvider.GetService().Consume(concurrent_describe_videos); + serviceProvider.GetService().Consume(concurrent_describe_images); + + + // SceneDetection now handled by native Python // See https://github.com/classtranscribe/pyapi serviceProvider.GetService().Consume(DISABLED_TASK); @@ -182,7 +195,7 @@ static void ExceptionHandler(object sender, UnhandledExceptionEventArgs args) _logger.LogError(e, "Unhandled Exception Caught"); } - private static ushort toUInt16(String val, ushort defaultVal) + private static ushort ToUInt16(String val, ushort defaultVal) { // ConvertToUInt16(String, int base) is not the droid you are looking for if (val != null && val.Length > 0) diff --git a/TaskEngine/Tasks/DescribeImageTask.cs b/TaskEngine/Tasks/DescribeImageTask.cs index d0f95235..13b00da8 100644 --- a/TaskEngine/Tasks/DescribeImageTask.cs +++ b/TaskEngine/Tasks/DescribeImageTask.cs @@ -10,10 +10,13 @@ using ClassTranscribeDatabase.Models; using ClassTranscribeDatabase.Services; using static ClassTranscribeDatabase.CommonUtils; +using Newtonsoft.Json.Linq; +using System; +using System.Text; -#pragma warning disable CA2007 +// #pragma warning disable CA2007 // https://learn.microsoft.com/en-us/dotnet/fundamentals/code-analysis/quality-rules/ca2007 // We are okay awaiting on a task in the same thread @@ -22,7 +25,7 @@ namespace TaskEngine.Tasks [SuppressMessage("Microsoft.Performance", "CA1812:MarkMembersAsStatic")] // This class is never directly instantiated class DescribeImageTask : RabbitMQTask { - private readonly DescribeImageTask _describeImageTask; + public DescribeImageTask(RabbitMQConnection rabbitMQ, ILogger logger) : base(rabbitMQ, TaskType.DescribeImage, logger) @@ -31,25 +34,53 @@ public DescribeImageTask(RabbitMQConnection rabbitMQ, ILogger } /// Extracts scene descriptions for a video. /// Beware: It is possible to start another scene task while the first one is still running - protected async override Task OnConsume(string imagePath, TaskParameters taskParameters, ClientActiveTasks cleanup) + protected async override Task OnConsume(string id, TaskParameters taskParameters, ClientActiveTasks cleanup) { - registerTask(cleanup, imagePath); // may throw AlreadyInProgress exception - GetLogger().LogInformation($"DescribeImageTask({imagePath}): Consuming Task"); - string captionId = taskParameters.Metadata["captionId"].ToString(); - - using (var _context = CTDbContext.CreateDbContext()) + RegisterTask(cleanup, id); // may throw AlreadyInProgress exception + GetLogger().LogInformation($"DescribeImageTask({id}): Consuming Task"); + JObject meta = taskParameters.Metadata; + string captionId = meta["CaptionId"].ToString(); + string imageFile = meta["ImageFile"].ToString(); + string ocrdata = meta["OCRText"].ToString(); + string ocrtext = ""; + try + { + JObject ocr = JObject.Parse(ocrdata); + JArray texts = ocr["text"] as JArray; + StringBuilder sb = new StringBuilder(); + foreach (var te in texts) { + string t = te.ToString(); + if (string.IsNullOrWhiteSpace(t)) continue; + if (sb.Length > 0) sb.Append(' '); + sb.Append(t); + } + ocrtext = sb.ToString(); + } catch(Exception ex) + { + GetLogger().LogError(ex, ex.Message); + } + GetLogger().LogInformation($"{captionId}: <{imageFile}> <{ocrtext}>"); + try { - Caption c= await _context.Captions.FindAsync(captionId); - if (c == null || c.HasPlaceHolderText()) { - GetLogger().LogInformation($"Describe Image {imagePath}: Caption Text changed or caption missing"); + using var _context = CTDbContext.CreateDbContext(); + Caption c = await _context.Captions.FindAsync(captionId); + + if (c == null || !c.HasPlaceHolderText()) + { + GetLogger().LogInformation($"Describe Image {id}: Caption Text changed or caption missing"); return; } - string result = $"A very interesting lecture slide ({captionId})"; + string result = $"MOCK AI output: An interesting lecture slide ({captionId}) for image {imageFile} and ocr (\"{ocrtext}\")"; c.Text = result; _context.Update(c); await _context.SaveChangesAsync(); } - GetLogger().LogInformation($"DescribeImageTask({imagePath}): Complete- end of task"); + catch (Exception ex) + { + GetLogger().LogError(ex, ex.Message); + throw; + } + GetLogger().LogInformation($"DescribeImageTask({id}): Complete - end of task"); } } } diff --git a/TaskEngine/Tasks/DescribeVideoTask.cs b/TaskEngine/Tasks/DescribeVideoTask.cs index b7625e97..7538a2e0 100644 --- a/TaskEngine/Tasks/DescribeVideoTask.cs +++ b/TaskEngine/Tasks/DescribeVideoTask.cs @@ -10,7 +10,7 @@ using static ClassTranscribeDatabase.CommonUtils; -#pragma warning disable CA2007 +// #pragma warning disable CA2007 // https://learn.microsoft.com/en-us/dotnet/fundamentals/code-analysis/quality-rules/ca2007 // We are okay awaiting on a task in the same thread @@ -21,122 +21,138 @@ class DescribeVideoTask : RabbitMQTask { private readonly DescribeImageTask _describeImageTask; - public DescribeVideoTask(RabbitMQConnection rabbitMQ, DescribeImageTask task, RpcClient rpcClient, ILogger logger) + public DescribeVideoTask(RabbitMQConnection rabbitMQ, DescribeImageTask describeImageTask, ILogger logger) : base(rabbitMQ, TaskType.DescribeVideo, logger) { - _describeImageTask = task; + _describeImageTask = describeImageTask; } /// Extracts scene descriptions for a video. /// Beware: It is possible to start another scene task while the first one is still running protected async override Task OnConsume(string videoId, TaskParameters taskParameters, ClientActiveTasks cleanup) { - registerTask(cleanup, videoId); // may throw AlreadyInProgress exception - GetLogger().LogInformation($"SceneDescriptionTask({videoId}): Consuming Task"); - - using (var _context = CTDbContext.CreateDbContext()) + RegisterTask(cleanup, videoId); // may throw AlreadyInProgress exception + GetLogger().LogInformation($"DescribeVideoTask({videoId}): Consuming Task"); + + using var _context = CTDbContext.CreateDbContext(); + Video video = await _context.Videos.FindAsync(videoId); + + if (!video.HasSceneObjectData()) { - Video video = await _context.Videos.FindAsync(videoId); - - if(!video.HasSceneObjectData()) { - GetLogger().LogInformation($"Describe Video {videoId}: Early return - no scene data to process"); - return; - } - TextData td = await _context.TextData.FindAsync(video.SceneObjectDataId); + GetLogger().LogInformation($"Describe Video {videoId}: Early return - no scene data to process"); + return; + } + TextData td = await _context.TextData.FindAsync(video.SceneObjectDataId); + + JObject sceneData = td.GetAsJSON() as JObject; + JArray scenes = sceneData["Scenes"] as JArray; + var captions = new List(); + + const string SIR = "ClassTranscribe/Scene-Describe"; // todo move into Model e.g. CaptionConstants +#pragma warning disable CS8632 // The annotation for nullable reference types should only be used in code within a '#nullable' annotations context. + Transcription? transcription = video.Transcriptions.Where(t => t.SourceInternalRef == SIR).FirstOrDefault(); +#pragma warning restore CS8632 // The annotation for nullable reference types should only be used in code within a '#nullable' annotations context. + if (transcription == null) + { + var theLanguage = Languages.ENGLISH_AMERICAN; - JObject sceneData = td.getAsJSON() as JObject; - JArray scenes = sceneData["Scenes"] as JArray; - var captions = new List(); + transcription = new Transcription() + { + TranscriptionType = TranscriptionType.TextDescription, + VideoId = video.Id, + Language = Languages.ENGLISH_AMERICAN, + Label = "Description", + SourceLabel = "ClassTranscribe", + SourceInternalRef = SIR + }; + GetLogger().LogInformation($"Describe Video {videoId}: Creating new (empty) Description Entry"); - const string SIR = "ClassTranscribe/Scene-Describe"; // todo move into Model e.g. CaptionConstants - Transcription? transcription = video.Transcriptions.Where(t=>t.SourceInternalRef == SIR).FirstOrDefault(); - if(transcription == null) { - var theLanguage = Languages.ENGLISH_AMERICAN; - - transcription = new Transcription() + _context.Add(transcription); + await _context.SaveChangesAsync(); + } + else + { + captions = transcription.Captions.ToList(); + GetLogger().LogInformation($"{videoId}: Reusing Description. Found {captions.Count} captions"); + } + // Step 2 Create Placeholder captions if they don't exist for every scene. + int alreadyDoneCount = 0, taskEnqueueCount = 0; ; + + int CaptionIndex = captions != null && captions.Count > 0 ? captions.Select(c => c.Index).Max() + 1 : 0; + var newCaptions = new List(); + var scenesWithNewCaption = new List(); + var describeScenes = new List(); + int sceneIndex = 0; + foreach (JObject scene in scenes) + { + var captionId = scene["captionId"]?.ToString(); + var caption = captionId != null ? captions.Where(c => c.Id == captionId).FirstOrDefault() : null; + if (caption == null) + { + var c = new Caption { - TranscriptionType = TranscriptionType.TextDescription, - VideoId = video.Id, - Language = Languages.ENGLISH_AMERICAN, - Label = "Description", - SourceLabel = "ClassTranscribe", - SourceInternalRef = SIR + Index = CaptionIndex++, + Begin = TimeSpan.Parse(scene["start"].ToString()), + End = TimeSpan.Parse(scene["end"].ToString()), + CaptionType = CaptionType.AudioDescription, + Text = CaptionConstants.PlaceHolderText, + TranscriptionId = transcription.Id }; - GetLogger().LogInformation($"Describe Video {videoId}: Creating new (empty) Description Entry"); - - _context.Add(transcription); - await _context.SaveChangesAsync(); - } - else { - captions = transcription.Captions.ToList(); - GetLogger().LogInformation($"{videoId}: Reusing Description. Found {captions.Count} captions"); + newCaptions.Add(c); + scenesWithNewCaption.Add(sceneIndex); + describeScenes.Add(sceneIndex); } - // Step 2 Create Placeholder captions if they don't exist for every scene. - int alreadyDoneCount = 0, taskEnqueueCount = 0;; - - int CaptionIndex = captions != null && captions.Count > 0 ? captions.Select(c=>c.Index).Max() + 1 : 0; - var newCaptions = new List(); - var scenesWithNewCaption = new List(); - var describeScenes = new List(); - int sceneIndex = 0; - foreach (JObject scene in scenes) { - string? captionId = scene["captionId"].ToString(); - var caption = captions.Where(c=>c.Id==captionId).FirstOrDefault(); - if( caption == null) { - var c = new Caption - { - Index = CaptionIndex++, - Begin = TimeSpan.Parse(scene["start"].ToString()), - End = TimeSpan.Parse(scene["end"].ToString()), - CaptionType = CaptionType.AudioDescription, - Text = CaptionConstants.PlaceHolderText, - TranscriptionId = transcription.Id - }; - newCaptions.Add(c); - scenesWithNewCaption.Add(sceneIndex); - describeScenes.Add(sceneIndex); - } else { - if(caption.HasPlaceHolderText()) { - describeScenes.Add(sceneIndex); - } else { - alreadyDoneCount ++; - } + else + { + if (caption.HasPlaceHolderText()) + { + describeScenes.Add(sceneIndex); } - - sceneIndex ++; // todo rewrite as map with index? - } - - GetLogger().LogInformation($"Describe Video {videoId}: ${newCaptions.Count} new captions to create"); - if(newCaptions.Any()) { - _context.AddRange(newCaptions); - await _context.SaveChangesAsync(); - // Now we can associate the captionIds with the scene data - foreach(int i in scenesWithNewCaption) { - scenes[i]["captionId"] = newCaptions[i].Id; + else + { + alreadyDoneCount++; } - sceneData.Remove("Scenes"); - sceneData.Add("Scenes", scenes); - td.setFromJSON(sceneData); - _context.Update(td); - await _context.SaveChangesAsync(); - GetLogger().LogInformation($"Describe Video {videoId}: Scene Data {td.Id} Updated with Caption references"); } - GetLogger().LogInformation($"Describe Video {videoId}: {describeScenes.Count} Description Tasks to enqueue"); - foreach (int i in describeScenes) { - JObject scene = scenes[i] as JObject; - string imageFilePath = scene[""].ToString(); - string captionId = scene["captionId"].ToString(); - taskEnqueueCount ++; - // Todo Create Parameter imageFilePath, captionId,transcription.Id - var taskParams = new TaskParameters(); - - GetLogger().LogInformation($"Describe Video {videoId}: {imageFilePath} {captionId} {transcription.Id}"); - _describeImageTask.Publish(imageFilePath,taskParams); - + sceneIndex++; // todo rewrite as map with index? + } + + GetLogger().LogInformation($"Describe Video {videoId}: ${newCaptions.Count} new captions to create"); + if (newCaptions.Any()) + { + _context.AddRange(newCaptions); + await _context.SaveChangesAsync(); + // Now we can associate the captionIds with the scene data + foreach (int i in scenesWithNewCaption) + { + dynamic scene = scenes[i] as JObject; + scene.captionId = newCaptions[i].Id; } - GetLogger().LogInformation($"Describe Video {videoId}: AlreadyDone={alreadyDoneCount}.enqueueCount={taskEnqueueCount}"); - GetLogger().LogInformation($"Describe Video {videoId}: Returning."); + sceneData.Remove("Scenes"); + sceneData.Add("Scenes", scenes); + td.SetFromJSON(sceneData); + _context.Update(td); + await _context.SaveChangesAsync(); + GetLogger().LogInformation($"Describe Video {videoId}: Scene Data {td.Id} Updated with Caption references"); + } + GetLogger().LogInformation($"Describe Video {videoId}: {describeScenes.Count} Description Tasks to enqueue"); + foreach (int i in describeScenes) + { + JObject scene = scenes[i] as JObject; + dynamic taskMeta = new JObject(); + string imageFile = scene["img_file"].ToString(); + taskMeta.ImageFile = imageFile; + taskMeta.OCRText = scene["raw_text"]?.ToString(); + taskMeta.CaptionId = scene["captionId"].ToString(); + + taskEnqueueCount++; + var taskParams = new TaskParameters(taskMeta); + + GetLogger().LogInformation($"Describe Video {videoId}: {imageFile} {taskMeta.CaptionId} {transcription.Id}"); + _describeImageTask.Publish(imageFile, taskParams); + } + GetLogger().LogInformation($"Describe Video {videoId}: AlreadyDone={alreadyDoneCount}.enqueueCount={taskEnqueueCount}"); + GetLogger().LogInformation($"Describe Video {videoId}: Returning."); } } } \ No newline at end of file From 2dbb466576cc1e69045e24ed51319057d8059f9d Mon Sep 17 00:00:00 2001 From: Lawrence Angrave Date: Fri, 5 Jan 2024 11:18:43 -0600 Subject: [PATCH 5/5] Describe Video Task --- TaskEngine/Tasks/DescribeVideoTask.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/TaskEngine/Tasks/DescribeVideoTask.cs b/TaskEngine/Tasks/DescribeVideoTask.cs index 7538a2e0..b23bb983 100644 --- a/TaskEngine/Tasks/DescribeVideoTask.cs +++ b/TaskEngine/Tasks/DescribeVideoTask.cs @@ -49,7 +49,7 @@ protected async override Task OnConsume(string videoId, TaskParameters taskParam const string SIR = "ClassTranscribe/Scene-Describe"; // todo move into Model e.g. CaptionConstants #pragma warning disable CS8632 // The annotation for nullable reference types should only be used in code within a '#nullable' annotations context. - Transcription? transcription = video.Transcriptions.Where(t => t.SourceInternalRef == SIR).FirstOrDefault(); + Transcription? transcription = video.Transcriptions.Where(t => t.SourceInternalRef == SIR).OrderByDescending(t=>t.CreatedAt).FirstOrDefault(); #pragma warning restore CS8632 // The annotation for nullable reference types should only be used in code within a '#nullable' annotations context. if (transcription == null) { @@ -116,7 +116,7 @@ protected async override Task OnConsume(string videoId, TaskParameters taskParam sceneIndex++; // todo rewrite as map with index? } - GetLogger().LogInformation($"Describe Video {videoId}: ${newCaptions.Count} new captions to create"); + GetLogger().LogInformation($"Describe Video {videoId}: {newCaptions.Count} new captions to create"); if (newCaptions.Any()) { _context.AddRange(newCaptions);