diff --git a/ClassTranscribeDatabase/CTDbContext.cs b/ClassTranscribeDatabase/CTDbContext.cs index 8e59b4a5..ee96b710 100644 --- a/ClassTranscribeDatabase/CTDbContext.cs +++ b/ClassTranscribeDatabase/CTDbContext.cs @@ -74,12 +74,13 @@ public static string ConnectionStringBuilder() // TODO: Max MaxPoolSize and port should be configurable var configurations = CTDbContext.GetConfigurations(); - return "Server=" + configurations["POSTGRES_SERVER_NAME"] - + ";Port=5432" - + ";Database=" + configurations["POSTGRES_DB"] - + ";User Id=" + configurations["ADMIN_USER_ID"] - + ";Password=" + configurations["ADMIN_PASSWORD"] - + ";MaxPoolSize=1000;"; + string conn = $"Server={configurations["POSTGRES_SERVER_NAME"]};" + + $"Port={configurations["POSTGRES_SERVER_PORT"] ?? "5432"};" + + $"Database={configurations["POSTGRES_DB"]};" + + $"User Id={configurations["ADMIN_USER_ID"]};" + + $"Password={configurations["ADMIN_PASSWORD"]};" + + $"MaxPoolSize={configurations["POSTGRES_CLIENT_MAX_POOL_SIZE"] ?? "1000"};"; + return conn; } /// @@ -112,21 +113,67 @@ public static CTDbContext CreateDbContext() /// The configurations public static IConfiguration GetConfigurations() { + var basedir = System.IO.Directory.GetCurrentDirectory(); + + if (String.IsNullOrEmpty(Environment.GetEnvironmentVariable("POSTGRES_DB"))) + { + LoadEnvFileIfExists($"{basedir}/../../../LocalEnvironmentVariables.txt"); + } + var configuration = new ConfigurationBuilder().AddEnvironmentVariables().Build(); if (configuration.GetValue("DEV_ENV", "NULL") != "DOCKER") { - string path = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); string appSettingsFileName = "vs_appsettings.json"; + string path = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); if (File.Exists(path + Path.DirectorySeparatorChar + appSettingsFileName)) { return new ConfigurationBuilder().SetBasePath(path).AddJsonFile(appSettingsFileName).Build(); - } + } + } return configuration; } + public static string DropQuotes(string s) + { + char first = s.Length < 2 ? 'a': s[0]; + if (! "\"'".Contains(first)) + { + return s; + } + char last = s[^1]; + if(first == last) + { + return s[1..^1].Replace($"\\{first}", first.ToString()); + } + return s; + } + public static bool LoadEnvFileIfExists(string filePath) + { + if (!File.Exists(filePath)) { + Console.WriteLine($"Env file {filePath} not found - ignoring"); + return false; + } + var count = 0; + foreach (var line in File.ReadAllLines(filePath)) + { + if (!line.Contains("=") || line.TrimStart().StartsWith("#")) + continue; + + var parts = line.Split( '=', 2); + var key = parts[0].Trim(); + var val = DropQuotes( parts[1].Trim()); + //Console.WriteLine($"{key}:{val.Length} chars"); + + Environment.SetEnvironmentVariable(key,val); + count += 1; + } + Console.WriteLine($"{count} environment variables set using {filePath}"); + return true; + } + protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder) { optionsBuilder.UseNpgsql(ConnectionStringBuilder()); @@ -268,7 +315,7 @@ public override int SaveChanges(bool acceptAllChangesOnSuccess) return base.SaveChanges(acceptAllChangesOnSuccess); } - public override Task SaveChangesAsync(bool acceptAllChangesOnSuccess, CancellationToken cancellationToken = default(CancellationToken)) + public override Task SaveChangesAsync(bool acceptAllChangesOnSuccess, CancellationToken cancellationToken = default) { OnBeforeSaving(); return base.SaveChangesAsync(acceptAllChangesOnSuccess, cancellationToken); diff --git a/ClassTranscribeDatabase/CaptionQueries.cs b/ClassTranscribeDatabase/CaptionQueries.cs index 717a67ce..df9d417e 100644 --- a/ClassTranscribeDatabase/CaptionQueries.cs +++ b/ClassTranscribeDatabase/CaptionQueries.cs @@ -25,7 +25,8 @@ public async Task> GetCaptionsAsync(string videoId, string languag { try { - var transcriptionId = _context.Transcriptions.Where(t => t.Language == language && t.VideoId == videoId).First().Id; + var transcriptionId = _context.Transcriptions.Where(t => t.Language == language && t.VideoId == videoId + && t.TranscriptionType == TranscriptionType.Caption).First().Id; return await GetCaptionsAsync(transcriptionId); } catch (System.InvalidOperationException) diff --git a/ClassTranscribeDatabase/CommonUtils.cs b/ClassTranscribeDatabase/CommonUtils.cs index 646aab8e..02e259b0 100644 --- a/ClassTranscribeDatabase/CommonUtils.cs +++ b/ClassTranscribeDatabase/CommonUtils.cs @@ -34,7 +34,11 @@ public enum TaskType BuildElasticIndex = 16, ExampleTask = 17, CleanUpElasticIndex = 18, - PythonCrawler = 19 + PythonCrawler = 19, + + DescribeVideo = 20, + DescribeImage = 21 + } public class Languages diff --git a/ClassTranscribeDatabase/Globals.cs b/ClassTranscribeDatabase/Globals.cs index 78491b07..9480b414 100644 --- a/ClassTranscribeDatabase/Globals.cs +++ b/ClassTranscribeDatabase/Globals.cs @@ -13,6 +13,7 @@ public class AppSettings public string JWT_KEY { get; set; } public string ALLOWED_HOSTS { get; set; } public string POSTGRES_SERVER_NAME { get; set; } + public string POSTGRES_SERVER_PORT { get; set; } = "5432"; public string POSTGRES_DB { get; set; } public string ADMIN_USER_ID { get; set; } diff --git a/ClassTranscribeDatabase/Models/Caption.cs b/ClassTranscribeDatabase/Models/Caption.cs index 0373540f..398ed19e 100644 --- a/ClassTranscribeDatabase/Models/Caption.cs +++ b/ClassTranscribeDatabase/Models/Caption.cs @@ -13,11 +13,18 @@ public enum CaptionType TextCaption = 0, AudioDescription = 1 } + + public static class CaptionConstants { + public const string PlaceHolderText = "...Processing..."; + + } /// /// Each line of caption is stored as a row in the database. /// public class Caption : Entity { + + public bool HasPlaceHolderText() { return this.Text == CaptionConstants.PlaceHolderText; } public int Index { get; set; } public TimeSpan Begin { get; set; } public TimeSpan End { get; set; } @@ -115,7 +122,7 @@ public static List ToCaptionEntitiesInterpolate(int captionsCount, Time else { caption = tempCaption.Substring(0, index); - tempCaption = tempCaption.Substring(index); + tempCaption = tempCaption[index..]; tempCaption = tempCaption.Trim(); } curEnd = curBegin.Add(new TimeSpan(0, 0, 0, 0, newDuration)); @@ -140,8 +147,8 @@ public static List ToCaptionEntitiesInterpolate(int captionsCount, Time End = curEnd, Text = tempCaption }); - curBegin = curEnd; - curDuration = End.Subtract(curBegin); + // curBegin = curEnd; + // curDuration = End.Subtract(curBegin); } return captions; } @@ -204,7 +211,7 @@ public static string GenerateWebVTTString(List captions, string languag // public static string GenerateDXFPString(List captions, string language) { - string now = DateTime.UtcNow.ToString("o", System.Globalization.CultureInfo.InvariantCulture); + // string now = DateTime.UtcNow.ToString("o", System.Globalization.CultureInfo.InvariantCulture); string header = @" public class KeyProvider { - private AppSettings _appSettings; - private List Keys; - private HashSet CurrentVideoIds; + private readonly AppSettings _appSettings; + private readonly List Keys; + private readonly HashSet CurrentVideoIds; public KeyProvider(AppSettings appSettings) { _appSettings = appSettings; - string subscriptionKeys = _appSettings.AZURE_SUBSCRIPTION_KEYS; + string subscriptionKeys = _appSettings.AZURE_SUBSCRIPTION_KEYS ?? ""; Keys = new List(); CurrentVideoIds = new HashSet(); diff --git a/ClassTranscribeDatabase/Services/RabbitMQConnection.cs b/ClassTranscribeDatabase/Services/RabbitMQConnection.cs index ad19b298..86a9800a 100644 --- a/ClassTranscribeDatabase/Services/RabbitMQConnection.cs +++ b/ClassTranscribeDatabase/Services/RabbitMQConnection.cs @@ -34,21 +34,21 @@ public ClientActiveTasks(ClientActiveTasks source) : base(source) public class RabbitMQConnection : IDisposable { // Created by the first instance, then re-used - private static IConnection _connection; - private static int _connectionRefCount; + private static IConnection Connection; + private static int ConnectionRefCount; - IModel _channel { get; set; } - String _expiration; // milliseconds + IModel Channel { get; set; } + String Expiration; // milliseconds - private readonly ILogger _logger; + private readonly ILogger Logger; public RabbitMQConnection(ILogger logger) { - _logger = logger; + Logger = logger; CreateSharedConnection(); // TODO/TOREVIEW: Check number of threads created // Potentially Model can be shared too - _channel = _connection.CreateModel(); + Channel = Connection.CreateModel(); uint time = Math.Max(1, Convert.ToUInt32(Globals.appSettings.RABBITMQ_TASK_TTL_MINUTES)); SetMessageExpiration(time); @@ -57,12 +57,12 @@ public RabbitMQConnection(ILogger logger) private void CreateSharedConnection() { - if (_connection != null) + if (Connection != null) { - _connectionRefCount++; + ConnectionRefCount++; return; } - _logger.LogInformation("Creating RabbitMQ connection"); + Logger.LogInformation("Creating RabbitMQ connection"); var factory = new ConnectionFactory() { @@ -77,22 +77,22 @@ private void CreateSharedConnection() // In 2021 we can remove support for the old variable if (Globals.appSettings.RabbitMQServer.Length > 0) { - _logger.LogError("*** Mixed case 'RabbitMQServer' environment variable is deprecated. Review your .env or vs_appsettings.json environment settings"); + Logger.LogError("*** Mixed case 'RabbitMQServer' environment variable is deprecated. Review your .env or vs_appsettings.json environment settings"); if (Globals.appSettings.RABBITMQ_SERVER_NAME.Length == 0) { - _logger.LogError("*** Update your environment to use RABBITMQ_SERVER_NAME."); + Logger.LogError("*** Update your environment to use RABBITMQ_SERVER_NAME."); } else if (Globals.appSettings.RABBITMQ_SERVER_NAME != Globals.appSettings.RabbitMQServer) { { - _logger.LogError("*** RABBITMQ_SERVER_NAME and RabbitMQServer are both set and different! Using RABBITMQ_SERVER_NAME"); + Logger.LogError("*** RABBITMQ_SERVER_NAME and RabbitMQServer are both set and different! Using RABBITMQ_SERVER_NAME"); } } } - _logger.LogInformation($"Connecting to RabbitMQ server {factory.HostName} with user {factory.UserName} on port {factory.Port}..."); - _connection = factory.CreateConnection(); - _connectionRefCount = 1; + Logger.LogInformation($"Connecting to RabbitMQ server {factory.HostName} with user {factory.UserName} on port {factory.Port}..."); + Connection = factory.CreateConnection(); + ConnectionRefCount = 1; } @@ -100,8 +100,8 @@ public void SetMessageExpiration(uint ttlMinutes) { uint OneMinuteAsMilliseconds = 1000 * 60; - _expiration = (OneMinuteAsMilliseconds * ttlMinutes).ToString(); - _logger.LogInformation("Using Message TTL {0} minutes", ttlMinutes); + Expiration = (OneMinuteAsMilliseconds * ttlMinutes).ToString(); + Logger.LogInformation("Using Message TTL {0} minutes", ttlMinutes); } /// @@ -118,23 +118,23 @@ public void PublishTask(string queueName, T data, TaskParameters taskParamete // they might publish a task to different queue? // Better? Should we create the queue (in Program.cs) separately from servicing or publishing the queue // Or is always (re-)declaring the queue best practices for RabbitNQ? - lock ((_channel)) + lock ((Channel)) { - _channel.QueueDeclare(queue: queueName, durable: true, exclusive: false, autoDelete: false, arguments: null); + Channel.QueueDeclare(queue: queueName, durable: true, exclusive: false, autoDelete: false, arguments: null); } var taskObject = new TaskObject { Data = data, TaskParameters = taskParameters }; var body = CommonUtils.MessageToBytes(taskObject); - var properties = _channel.CreateBasicProperties(); + var properties = Channel.CreateBasicProperties(); properties.Persistent = true; // Note delivered but unacked messages do not expire - properties.Expiration = _expiration; // milliseconds + properties.Expiration = Expiration; // milliseconds // See https://www.rabbitmq.com/dotnet-api-guide.html#concurrency-channel-sharing // Use a lock to ensure thread safety - lock (_channel) + lock (Channel) { - _channel.BasicPublish(exchange: "", routingKey: queueName, basicProperties: properties, body: body); + Channel.BasicPublish(exchange: "", routingKey: queueName, basicProperties: properties, body: body); } } @@ -148,10 +148,10 @@ public void PublishTask(string queueName, T data, TaskParameters taskParamete public void ConsumeTask(string queueName, Func OnConsume, Func PostConsumeCleanup, ushort concurrency) { // Caution. The queue is also declard inside PublishTask above - _logger.LogInformation("Prefetch concurrency count {0}", concurrency); - lock (_channel) + Logger.LogInformation("Prefetch concurrency count {0}", concurrency); + lock (Channel) { - _channel.QueueDeclare(queue: queueName, + Channel.QueueDeclare(queue: queueName, durable: true, exclusive: false, autoDelete: false, @@ -159,15 +159,15 @@ public void ConsumeTask(string queueName, Func { // This object exists so that we can wrap all OnConsumes with a try-finally here @@ -177,7 +177,7 @@ public void ConsumeTask(string queueName, Func>(ea.Body); - _logger.LogInformation(" [x] {0} Received {1}", queueName, taskObject.ToString()); + Logger.LogInformation(" [x] {0} Received {1}", queueName, taskObject.ToString()); // TODO: Update JobStatus table here (started timestamp) try { @@ -185,23 +185,23 @@ public void ConsumeTask(string queueName, Func(string queueName, Func /// Requests a re-download of missing media /// @@ -242,7 +251,7 @@ public async Task> UpdateSceneDataSchema(String requestId) } else { JToken olddata = video.SceneData; TextData data = new TextData(); - data.setFromJSON(olddata); + data.SetFromJSON(olddata); _context.TextData.Add(data); video.SceneObjectDataId = data.Id; System.Diagnostics.Trace.Assert(!string.IsNullOrEmpty(data.Id)); diff --git a/ClassTranscribeServer/Controllers/EPubsController.cs b/ClassTranscribeServer/Controllers/EPubsController.cs index 1e3a8323..bdc48e6b 100644 --- a/ClassTranscribeServer/Controllers/EPubsController.cs +++ b/ClassTranscribeServer/Controllers/EPubsController.cs @@ -96,7 +96,7 @@ public async Task>> GetEpubData(string mediaId, } TextData data = await _context.TextData.FindAsync(video.SceneObjectDataId); _logger.LogInformation($"GetEpubData({mediaId},{language}) getting scenedata as JArray"); - JArray sceneArray = data.getAsJSON()["Scenes"] as JArray; + JArray sceneArray = data.GetAsJSON()["Scenes"] as JArray; EPub epub = new EPub { @@ -124,7 +124,7 @@ public async Task> GetGlossaryData(string mediaId) Video video = await _context.Videos.FindAsync(media.VideoId); if (video.HasGlossaryData()) { TextData data = await _context.TextData.FindAsync(video.GlossaryDataId); - return data.getAsJSON(); + return data.GetAsJSON(); } return video.Glossary; diff --git a/ClassTranscribeServer/Controllers/PlaylistsController.cs b/ClassTranscribeServer/Controllers/PlaylistsController.cs index 6ecdffda..083ee78d 100644 --- a/ClassTranscribeServer/Controllers/PlaylistsController.cs +++ b/ClassTranscribeServer/Controllers/PlaylistsController.cs @@ -91,7 +91,7 @@ public async Task>> GetPlaylists(string of var authorizationResult = await _authorizationService.AuthorizeAsync(this.User, offering, Globals.POLICY_READ_OFFERING); if (!authorizationResult.Succeeded) { - return Unauthorized(new { Reason = "Insufficient Permission", AccessType = offering.AccessType }); + return Unauthorized(new { Reason = "Insufficient Permission", offering.AccessType }); } var temp = await _context.Playlists .Where(p => p.OfferingId == offeringId) @@ -129,7 +129,7 @@ public async Task>> GetPlaylists2(string o var authorizationResult = await _authorizationService.AuthorizeAsync(this.User, offering, Globals.POLICY_READ_OFFERING); if (!authorizationResult.Succeeded) { - return Unauthorized(new { Reason = "Insufficient Permission", AccessType = offering.AccessType }); + return Unauthorized(new { Reason = "Insufficient Permission", offering.AccessType }); } var playLists = await _context.Playlists @@ -163,7 +163,7 @@ public async Task>> GetPlaylists2(string o JsonMetadata = m.JsonMetadata, CreatedAt = m.CreatedAt, SceneDetectReady = m.Video.HasSceneObjectData(), - Ready = m.Video == null ? false : "NoError" == m.Video.TranscriptionStatus , + Ready = m.Video != null && "NoError" == m.Video.TranscriptionStatus , SourceType = m.SourceType, Duration = m.Video?.Duration, PublishStatus = m.PublishStatus, @@ -179,8 +179,8 @@ public async Task>> GetPlaylists2(string o Transcriptions = m.Video.Transcriptions.Select(t => new TranscriptionDTO { Id = t.Id, - Path = t.File != null ? t.File.Path : null, - SrtPath = t.SrtFile != null ? t.SrtFile.Path : null, + Path = t.File?.Path, + SrtPath = t.SrtFile?.Path, Language = t.Language, Label = t.Label, SourceLabel = t.SourceLabel, @@ -256,7 +256,7 @@ public async Task> GetPlaylist(string id) PublishStatus = m.PublishStatus, Options = m.getOptionsAsJson(), SceneDetectReady = m.Video != null && m.Video.HasSceneObjectData(), - Ready = m.Video == null ? false : "NoError" == m.Video.TranscriptionStatus , + Ready = m.Video != null && "NoError" == m.Video.TranscriptionStatus , Video = m.Video == null ? null : new VideoDTO { Id = m.Video.Id, @@ -265,11 +265,11 @@ public async Task> GetPlaylist(string id) ASLPath = m.Video.ProcessedASLVideo?.Path != null ? m.Video.ProcessedASLVideo.Path : m.Video.ASLVideo?.Path, TaskLog = m.Video.TaskLog }, - Transcriptions = m.Video == null ? null : m.Video.Transcriptions.Select(t => new TranscriptionDTO + Transcriptions = m.Video?.Transcriptions.Select(t => new TranscriptionDTO { Id = t.Id, - Path = t.File != null ? t.File.Path : null, - SrtPath = t.SrtFile != null ? t.SrtFile.Path : null, + Path = t.File?.Path, + SrtPath = t.SrtFile?.Path, Language = t.Language }).ToList(), WatchHistory = user != null ? partialWatchHistories.Where(w => w.MediaId == m.Id).FirstOrDefault() :null @@ -500,7 +500,7 @@ public async Task Reorder(string offeringId, List playlist } _context.Playlists.UpdateRange(playlists); await _context.SaveChangesAsync(); - return RedirectToAction("GetPlaylists", new { offeringId = offeringId }); + return RedirectToAction("GetPlaylists", new { offeringId }); } private bool PlaylistExists(string id) diff --git a/ClassTranscribeServer/Controllers/TaskController.cs b/ClassTranscribeServer/Controllers/TaskController.cs index 45cb56fe..0ab36ac1 100644 --- a/ClassTranscribeServer/Controllers/TaskController.cs +++ b/ClassTranscribeServer/Controllers/TaskController.cs @@ -69,7 +69,7 @@ public async Task UpdateSceneData(string videoId, JObject scene) } private void createDescriptionsIfNone(Video v, TextData scenedata) { - JArray scenes = scenedata.getAsJSON()["Scenes"] as JArray; + JArray scenes = scenedata.GetAsJSON()["Scenes"] as JArray; if (scenes == null || v == null || v.Id == null) { return; @@ -141,7 +141,7 @@ public async Task> GetSceneData(string videoId) { Video video = await _context.Videos.FindAsync(videoId); if(video.HasSceneObjectData()) { TextData data = await _context.TextData.FindAsync(video.SceneObjectDataId); - return data.getAsJSON(); + return data.GetAsJSON(); } // old version - return video.SceneData; @@ -205,7 +205,7 @@ public async Task> GetGlossary(string videoId) { Video video = await _context.Videos.FindAsync(videoId); if(video.HasGlossaryData()) { TextData data = await _context.TextData.FindAsync(video.GlossaryDataId); - return data.getAsJSON(); + return data.GetAsJSON(); } // old version - return video.Glossary; @@ -240,7 +240,7 @@ public async Task> GetGlossaryTimestamp(string videoId) { Video video = await _context.Videos.FindAsync(videoId); if(video.HasGlossaryTimestamp()) { TextData data = await _context.TextData.FindAsync(video.GlossaryTimestampId); - return data.getAsJSON(); + return data.GetAsJSON(); } // old version - return NotFound(); diff --git a/ClassTranscribeServer/Utils/WakeDownloader.cs b/ClassTranscribeServer/Utils/WakeDownloader.cs index 50ecd0f3..9cff435f 100644 --- a/ClassTranscribeServer/Utils/WakeDownloader.cs +++ b/ClassTranscribeServer/Utils/WakeDownloader.cs @@ -14,96 +14,130 @@ public WakeDownloader(RabbitMQConnection rabbitMQ) //Todo: Fix field capitalization in here and QueueAwakerTask.cs public void UpdateAllPlaylists() { - JObject msg = new JObject(); - msg.Add("Type", TaskType.DownloadAllPlaylists.ToString()); + JObject msg = new JObject + { + { "Type", TaskType.DownloadAllPlaylists.ToString() } + }; Wake(msg); } public virtual void UpdatePlaylist(string playlistId) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.DownloadPlaylistInfo.ToString()); - msg.Add("PlaylistId", playlistId); + JObject msg = new JObject + { + { "Type", TaskType.DownloadPlaylistInfo.ToString() }, + { "PlaylistId", playlistId } + }; Wake(msg); } public virtual void UpdateVTTFile(string transcriptionId) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.GenerateVTTFile.ToString()); - msg.Add("TranscriptionId", transcriptionId); + JObject msg = new JObject + { + { "Type", TaskType.GenerateVTTFile.ToString() }, + { "TranscriptionId", transcriptionId } + }; Wake(msg); } public void UpdateOffering(string offeringId) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.UpdateOffering.ToString()); - msg.Add("offeringId", offeringId); + JObject msg = new JObject + { + { "Type", TaskType.UpdateOffering.ToString() }, + { "offeringId", offeringId } + }; Wake(msg); } public void PeriodicCheck() { - JObject msg = new JObject(); - msg.Add("Type", TaskType.PeriodicCheck.ToString()); + JObject msg = new JObject + { + { "Type", TaskType.PeriodicCheck.ToString() } + }; Wake(msg); } public void GenerateScenes(string mediaId) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.SceneDetection.ToString()); - msg.Add("mediaId", mediaId); + JObject msg = new JObject + { + { "Type", TaskType.SceneDetection.ToString() }, + { "mediaId", mediaId } + }; Wake(msg); } public void CreateBoxToken(string authCode) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.CreateBoxToken.ToString()); - msg.Add("authCode", authCode); + JObject msg = new JObject + { + { "Type", TaskType.CreateBoxToken.ToString() }, + { "authCode", authCode } + }; Wake(msg); } public void DownloadMedia(string mediaId) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.DownloadMedia.ToString()); - msg.Add("mediaId", mediaId); + JObject msg = new JObject + { + { "Type", TaskType.DownloadMedia.ToString() }, + { "mediaId", mediaId } + }; Wake(msg); } public void ConvertMedia(string videoId) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.ConvertMedia.ToString()); - msg.Add("videoId", videoId); + JObject msg = new JObject + { + { "Type", TaskType.ConvertMedia.ToString() }, + { "videoId", videoId } + }; Wake(msg); } public void TranscribeVideo(string videoOrMediaId, bool deleteExisting) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.TranscribeVideo.ToString()); - msg.Add("videoOrMediaId", videoOrMediaId); - msg.Add("DeleteExisting", deleteExisting); + JObject msg = new JObject + { + { "Type", TaskType.TranscribeVideo.ToString() }, + { "videoOrMediaId", videoOrMediaId }, + { "DeleteExisting", deleteExisting } + }; Wake(msg); } public virtual void SceneDetection(string videoMediaPlaylistId, bool deleteExisting) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.SceneDetection.ToString()); - msg.Add("videoMediaPlaylistId", videoMediaPlaylistId); - msg.Add("DeleteExisting", deleteExisting); + JObject msg = new JObject + { + { "Type", TaskType.SceneDetection.ToString() }, + { "videoMediaPlaylistId", videoMediaPlaylistId }, + { "DeleteExisting", deleteExisting } + }; + Wake(msg); + } + + public virtual void DescribeVideo(string videoMediaPlaylistId, bool deleteExisting) { + JObject msg = new JObject + { + { "Type", TaskType.DescribeVideo.ToString() }, + { "videoMediaPlaylistId", videoMediaPlaylistId }, + { "DeleteExisting", deleteExisting } + }; Wake(msg); } public void UpdateASLVideo(string sourceId) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.PythonCrawler.ToString()); - msg.Add("SourceId", sourceId); + JObject msg = new JObject + { + { "Type", TaskType.PythonCrawler.ToString() }, + { "SourceId", sourceId } + }; Wake(msg); } @@ -115,9 +149,11 @@ private void Wake(JObject message, TaskParameters taskParameters = null) public void ReTranscribePlaylist(string playlistId) { - JObject msg = new JObject(); - msg.Add("Type", TaskType.ReTranscribePlaylist.ToString()); - msg.Add("PlaylistId", playlistId); + JObject msg = new JObject + { + { "Type", TaskType.ReTranscribePlaylist.ToString() }, + { "PlaylistId", playlistId } + }; Wake(msg); } } diff --git a/LocalEnvironmentVariables.txt b/LocalEnvironmentVariables.txt new file mode 100644 index 00000000..44444f9c --- /dev/null +++ b/LocalEnvironmentVariables.txt @@ -0,0 +1,264 @@ +MOCK_RECOGNITION = MOCK + +# Consider this a guide for the environment variables required to deploy ClassTranscribe +# Make a copy of this file and rename to - ".env" and place in Deployment/ +# ALL VARIABLES HAVE TO BE SET UNLESS MENTIONED OTHERWISE + +TRAEFIK_HTTP_PORT=8000 +TRAEFIK_HTTPS_PORT=8443 + +DATA=../../../../pyapi/devruntime/docker_data + +LTI_SHARED_SECRET=ltisecret +MEDIA_WORKER_SHARED_SECRET=mediasecret + +PERIODIC_CHECK_MINUTES=300 +# ---------------------------------------------------------------------- +# DATA DIRECTORY +# ---------------------------------------------------------------------- +# Description: The path to store all data of classtranscribe +# Instruction: Substitute with path passed an argument to create_directories.sh (For more info, check readme.md) +# Example: +# DATA=/classtranscribe_data +# ---------------------------------------------------------------------- + + +# ---------------------------------------------------------------------- +# HOST_NAME +# ---------------------------------------------------------------------- +# Description: The host name for where the website will be hosted +# Instruction: Subsititute with the host name. +# Example: +# HOST_NAME=my.classtranscribe.com +# ---------------------------------------------------------------------- +HOST_NAME=localhost + + +# ---------------------------------------------------------------------- +# ADMIN CONFIGURATION +# ---------------------------------------------------------------------- +# Description: These are used as the credentials for the following services - portainer, db, pgadmin, rabbitmq, traefik, letsencrypt +# Instruction: Subsititute with a valid email address and a strong password +# Protip: To generate password on bash run, "openssl rand -base64 16" +# Example: +# ADMIN_USER_ID=my_emailid@example.com +# ADMIN_PASSWORD=MyVeryStrongPassword123 +ADMIN_USER_ID=mahipal2@illinois.edu +ADMIN_PASSWORD=Test123 + + +# ---------------------------------------------------------------------- +# TEST ACCOUNT +# ---------------------------------------------------------------------- +# Description: This property is to enable a testuser for local development only. +# CAUTION: DO NOT ENABLE ON A PRODUCTION SERVER. +# Instruction: Subsititute with true or false +# Example: +# TEST_SIGN_IN=false +TEST_SIGN_IN=true + + +# ---------------------------------------------------------------------- +# JWT CONFIGURATION +# ---------------------------------------------------------------------- +# Description: This is used to generate authentication cookies by the api server. +# Instruction: Substitute with a long random string. +# Protip: To generate string on bash run, "openssl rand -base64 32" +# Example: +# JWT_KEY=cl9rdcPxDA6I9ARxJPlNYaZF7Xae7mgS7iqibI48xYQ= +JWT_KEY=hd0TnCmI7C4VUymKlQ6hLovlxN1QcgXM9jfReqXEP+A= + + +# ---------------------------------------------------------------------- +# AUTH0 CONFIGURATION +# ---------------------------------------------------------------------- +# Description: Auth0 is used as the authentication middleware for ClassTranscribe. +# Instructions: +# Step 1. Create a tenant on Auth0 +# Step 2. Set up the Connections on your Auth0 tenant (For more info, search google for "Auth0 Connections") +# Step 3. Create a Single Page Application on the Auth0 tenant.# +# Step 4. On auth0 fill in the following fields (If HOST_NAME used above is my.classtranscribe.com) +# a. Allowed Callback URLs - https://my.classtranscribe.com,https://my.classtranscribe.com/login +# b. Allowed Logout URLs - https://my.classtranscribe.com +# c. Allowed Web Origins - https://my.classtranscribe.com +# d. Allowed Origins(CORS) - https://my.classtranscribe.com +# Step 5. Save Changes +# Step 6. Subsititute with the auth0 domain and client Id generated. +# Example: +# AUTH0_CLIENT_ID=laksjfdsahfjkhfkjhsdhffj +# AUTH0_DOMAIN=something.auth0.com +AUTH0_CLIENT_ID=changeme +AUTH0_DOMAIN=changeme + +# ---------------------------------------------------------------------- +# CILOGON CONFIGURATION +# ---------------------------------------------------------------------- +CILOGON_CLIENT_ID=cilogon:/client_id/0 +CILOGON_CLIENT_SECRET=changeme +CILOGON_DOMAIN=cilogon.org + +# ---------------------------------------------------------------------- +# PORTAINER CONFIGURATION +# ---------------------------------------------------------------------- +# Description: To set the password for the portainer dashboard it must be passed to portainer as a hash. +# Instruction: 1. Run the following command below, (Substitute with the password generated above) +# "docker run --rm httpd:2.4-alpine htpasswd -nbB admin '' | cut -d ":" -f 2" +# 2. Substitute generated string below. +# (For more info, refer https://portainer.readthedocs.io/en/stable/configuration.html) +# Example: If ADMIN_PASSWORD is Test123, generated hash is $2y$05$KRCr38aM/Avz8fXMDPN3aetQSCql.H5fMXzMcdyX7XNw5HauB20ay +# PORTAINER_PASSWORD=$2y$05$KRCr38aM/Avz8fXMDPN3aetQSCql.H5fMXzMcdyX7XNw5HauB20ay +PORTAINER_PASSWORD=$$2y$$05$$KRCr38aM/Avz8fXMDPN3aetQSCql.H5fMXzMcdyX7XNw5HauB20ay + + +# ---------------------------------------------------------------------- +# TRAEFIK CONFIGURATION (Modify if deployed on server) +# ---------------------------------------------------------------------- +# Traefik is the reverse proxy used for routing, more info - https://docs.traefik.io/ +# Description: TRAEFIK_IPFILTER accepts / refuses requests based on the client IP. +# For more info - https://docs.traefik.io/v2.0/middlewares/ipwhitelist/ +# Instruction: Subsititute with required ipwhitelist +# Example: +# TRAEFIK_IPFILTER=123.123.0.0/12, 141.142.0.0/16, 0.0.0.0/0 +TRAEFIK_IPFILTER=123.123.0.0/12, 141.142.0.0/16, 0.0.0.0/0 + +# Description: Traefik could generate SSL certificates for additional host names. +# Instruction: Specify up to 2 additional hosts to generate SSL certificates for. +# Eg. TRAEFIK_ADDITIONAL_HOST1=my.classtranscribe.org +# Eg. TRAEFIK_ADDITIONAL_HOST2=my.classtranscribe.net +TRAEFIK_ADDITIONAL_HOST1= +TRAEFIK_ADDITIONAL_HOST2= + + +# ---------------------------------------------------------------------- +# AZURE APPLICATION INSIGHTS CONFIGURATION +# ---------------------------------------------------------------------- +# Description: Azure Application Insights allows reporting of all exceptions to Azure. +# Instruction: Create an Azure Application Insights service on Azure (For more info, google "Azure Application Insights") +# And, substitute below with the application insights key (aka Instruentation Key) +# Example: +# APPLICATION_INSIGHTS_KEY=6f7a5ee2-0811-4e3a-beaa-498af35d6ea2 +APPLICATION_INSIGHTS_KEY=0 + + +# ---------------------------------------------------------------------- +# SLACK WEBHOOK CONFIGURATION +# ---------------------------------------------------------------------- +# Description: To enable logging and monitoring messages to slack. +# Instruction: Create an incoming webhook on Slack, (For more info, https://api.slack.com/messaging/webhooks) +# Subsititute the url below, also substitute in dem_conf.yml +# Example: +# SLACK_WEBHOOK_URL=https://hooks.slack.com/services/abcdef/ghijkl/lkdsjfldskfksldljf +SLACK_WEBHOOK_URL=https://hooks.slack.com/services/changeme + + +# ---------------------------------------------------------------------- +# Task Engine Configuration +# ---------------------------------------------------------------------- +# Task Engine uses various media providers, their respective API keys need to be configured + +# Youtube +# Description: To enable Youtube Integration +# Instruction: Generate a youtube api key and subsititute below. (For more info, https://developers.google.com/youtube/registering_an_application) +# Eg. YOUTUBE_API_KEY=AIzaSyDKnpdznYOFxm_IRnrclGh4oSdQloZsdfsdo +YOUTUBE_API_KEY=changeme + +# Azure +# Description: ClassTranscribe uses Azure Cognitive Services to produce transcriptions, these require azure subscription keys +# Instruction: Generate and subsititute with subscription keys and regions, (For more info, https://azure.microsoft.com/en-us/services/cognitive-services/speech-to-text/) +# Multiple keys can be used, use a comma as separator between key and region and semi-colon to add multiple. +# Example: +AZURE_SUBSCRIPTION_KEYS=changeme,northcentralus + +# Kaltura +# Description: To enable a Katura/Mediaspace integration +# Instruction: Contact Mediaspace admin to obtain the partner_id, token_id and app_token. +# Example: + +KALTURA_PARTNER_ID=1329972 +KALTURA_TOKEN_ID=changeme +KALTURA_APP_TOKEN=changeme + + +# Box +# Description: To enable a Box integration +# Instruction: Contact Box admin to get client_id and client_secret +# Example: +# BOX_CLIENT_ID=jsdksdfhsdkfhskjdhfskjfhd +# BOX_CLIENT_SECRET=jlksdjfksjlsljslkfjlksjlkdsflk +BOX_CLIENT_ID= +BOX_CLIENT_SECRET= + +# ---------------------------------------------------------------------- +# FRONTEND CONFIGURATION (Optional) +# ---------------------------------------------------------------------- +# Description: Frontend typically points to the same host for the api service, if we need to change this explicitly setting this is required +# Instruction: Subsititute with alternate endpoint +# Example: +# REACT_APP_TESTING_BASE_URL=https://my.classtranscribe2.com +REACT_APP_TESTING_BASE_URL= +# Description: Setting this variable will trigger a hard reload on the uesr's device, whenever a new commit is made to the branch +# Instruction: Subsititute with an endpoint like below +# Example: +# REACT_APP_FRONTEND_COMMIT_ENDPOINT=https://api.github.com/repos/classtranscribe/Frontend/commits/master +REACT_APP_FRONTEND_COMMIT_ENDPOINT=https://api.github.com/repos/classtranscribe/Frontend/commits/master + +# ---------------------------------------------------------------------- +# RABBITMQ CONFIGURATION (Optional) +# ---------------------------------------------------------------------- +# Description: RabbitMQ Prefetch count sets the number of jobs rabbitmq can process in parallel. +# Instruction: Subsititute with an integer, default value is 10 if undefined +# Example: +# RABBITMQ_PREFETCH_COUNT=10 +#no longer used RABBITMQ_PREFETCH_COUNT=10 + +# New +# Max number of threads used by video processing +JOB_MAX_THREADS=1 + +MAX_CONCURRENT_VIDEO_TASKS=1 +MAX_CONCURRENT_SYNC_TASKS=1 +MAX_CONCURRENT_TRANSCRIPTIONS=1 + +#single threaded OCR +OMP_THREAD_LIMIT=1 + +SCENE_DETECT_FPS=0.5 +SCENE_DETECT_USE_FACE=true +SCENE_DETECT_USE_OCR=false + +#-------TRAEFIK----------- +TRAEFIK_HTTPS_OPTIONS=TLS +# Always use https, traffic to http is redirected to https +TRAEFIK_HTTP_REDIRECT=Redirect.EntryPoint:https +TRAEFIK_ACME_ENABLE=false + +#-------API----------- +ALLOWED_HOSTS=* +JWT_EXPIRE_DAYS=30 +ASPNETCORE_URLS=http://+:80 +DEV_ENV=OUTSIDEDOCKER +NODE_RPC_SERVER=localhost:50052 +PYTHON_RPC_SERVER=localhost:50051 +RabbitMQServer=localhost +#-------POSTGRES----------- +# Fixed server name, db name and data storage locations +POSTGRES_SERVER_NAME=localhost +POSTGRES_SERVER_PORT=5433 +PGDATA=/var/lib/postgresql/data/pgdata +POSTGRES_DB=ct2019db +#-------RPCSERVER----------- +DATA_DIRECTORY=/data +#TMPDIR=/data/temp +#-------NGINX----------- +NGINX_HTTP_PORT=80 +NGINX_HTTPS_PORT=443 +#VAR= +#-------FRONTEND----------- +NODE_PATH=src +#-------PGADMIN----------- +SCRIPT_NAME=/pgadmin + +#-------ELASTIC SEARCH------- +ES_CONNECTION_ADDR=http://localhost:9200 +ES_INDEX_TIME_TO_LIVE=2880 + diff --git a/TaskEngine/Program.cs b/TaskEngine/Program.cs index e7340510..6492220a 100644 --- a/TaskEngine/Program.cs +++ b/TaskEngine/Program.cs @@ -42,7 +42,11 @@ public static void Main() builder.AddConsole(); builder.AddFilter ("", LogLevel.Warning); - builder.AddApplicationInsights(configuration.GetValue("APPLICATION_INSIGHTS_KEY")); + string insightKey = configuration.GetValue("APPLICATION_INSIGHTS_KEY"); + if (!String.IsNullOrEmpty(insightKey) && insightKey.Trim().Length>1) + { + builder.AddApplicationInsights(insightKey); + } }) .AddOptions() .Configure(configuration) @@ -60,6 +64,8 @@ public static void Main() .AddSingleton() .AddSingleton() .AddSingleton() + .AddSingleton() + .AddSingleton() .AddSingleton() .AddSingleton() .AddSingleton() @@ -94,9 +100,11 @@ public static void Main() // Active queues managed by C# (concurrency > 0) are now purged after the queue is created and before messages are processed - ushort concurrent_videotasks = toUInt16(Globals.appSettings.MAX_CONCURRENT_VIDEO_TASKS, NO_CONCURRENCY); - ushort concurrent_synctasks = toUInt16(Globals.appSettings.MAX_CONCURRENT_SYNC_TASKS, MIN_CONCURRENCY); - ushort concurrent_transcriptions = toUInt16(Globals.appSettings.MAX_CONCURRENT_TRANSCRIPTIONS, MIN_CONCURRENCY); + ushort concurrent_videotasks = ToUInt16(Globals.appSettings.MAX_CONCURRENT_VIDEO_TASKS, NO_CONCURRENCY); + ushort concurrent_synctasks = ToUInt16(Globals.appSettings.MAX_CONCURRENT_SYNC_TASKS, MIN_CONCURRENCY); + ushort concurrent_transcriptions = ToUInt16(Globals.appSettings.MAX_CONCURRENT_TRANSCRIPTIONS, MIN_CONCURRENCY); + ushort concurrent_describe_images = 1; + ushort concurrent_describe_videos = 1; // Create and start consuming from all queues. If concurrency >=1 the queues are purged @@ -116,7 +124,12 @@ public static void Main() // Video Processing Related _logger.LogInformation($"Creating ProcessVideoTask consumer. Concurrency={concurrent_videotasks} "); serviceProvider.GetService().Consume(concurrent_videotasks); - + // Descriptions + serviceProvider.GetService().Consume(concurrent_describe_videos); + serviceProvider.GetService().Consume(concurrent_describe_images); + + + // SceneDetection now handled by native Python // See https://github.com/classtranscribe/pyapi serviceProvider.GetService().Consume(DISABLED_TASK); @@ -182,7 +195,7 @@ static void ExceptionHandler(object sender, UnhandledExceptionEventArgs args) _logger.LogError(e, "Unhandled Exception Caught"); } - private static ushort toUInt16(String val, ushort defaultVal) + private static ushort ToUInt16(String val, ushort defaultVal) { // ConvertToUInt16(String, int base) is not the droid you are looking for if (val != null && val.Length > 0) diff --git a/TaskEngine/Tasks/BuildElasticIndexTask.cs b/TaskEngine/Tasks/BuildElasticIndexTask.cs index 7ad38018..893ba6af 100644 --- a/TaskEngine/Tasks/BuildElasticIndexTask.cs +++ b/TaskEngine/Tasks/BuildElasticIndexTask.cs @@ -38,7 +38,7 @@ public BuildElasticIndexTask(RabbitMQConnection rabbitMQ, } protected async override Task OnConsume(string example, TaskParameters taskParameters, ClientActiveTasks cleanup) { - registerTask(cleanup, "BuildElasticIndexTask"); // may throw AlreadyInProgress exception + RegisterTask(cleanup, "BuildElasticIndexTask"); // may throw AlreadyInProgress exception GetLogger().LogInformation("BuildElasticIndexTask Starting"); diff --git a/TaskEngine/Tasks/CleanUpElasticIndexTask.cs b/TaskEngine/Tasks/CleanUpElasticIndexTask.cs index 610d3c2a..661003f5 100644 --- a/TaskEngine/Tasks/CleanUpElasticIndexTask.cs +++ b/TaskEngine/Tasks/CleanUpElasticIndexTask.cs @@ -38,7 +38,7 @@ public CleanUpElasticIndexTask(RabbitMQConnection rabbitMQ, } protected async override Task OnConsume(string example, TaskParameters taskParameters, ClientActiveTasks cleanup) { - registerTask(cleanup, "CleanUpElasticIndexTask"); // may throw AlreadyInProgress exception + RegisterTask(cleanup, "CleanUpElasticIndexTask"); // may throw AlreadyInProgress exception GetLogger().LogInformation("CleanUpElasticIndexTask Starting"); var result = await _client.Indices.GetAsync(new GetIndexRequest(Indices.All)); diff --git a/TaskEngine/Tasks/ConvertVideoToWavTask.cs b/TaskEngine/Tasks/ConvertVideoToWavTask.cs index 0616a102..a8e2f363 100644 --- a/TaskEngine/Tasks/ConvertVideoToWavTask.cs +++ b/TaskEngine/Tasks/ConvertVideoToWavTask.cs @@ -32,7 +32,7 @@ public ConvertVideoToWavTask(RabbitMQConnection rabbitMQ, RpcClient rpcClient, T protected override Task OnConsume(string videoId, TaskParameters taskParameters, ClientActiveTasks cleanup) { - registerTask(cleanup, videoId); + RegisterTask(cleanup, videoId); throw new Exception("ConvertVideoToWavTask No longer used. Videoid= " + videoId); } diff --git a/TaskEngine/Tasks/CreateBoxTokenTask.cs b/TaskEngine/Tasks/CreateBoxTokenTask.cs index 9bba62fb..40d1c6ac 100644 --- a/TaskEngine/Tasks/CreateBoxTokenTask.cs +++ b/TaskEngine/Tasks/CreateBoxTokenTask.cs @@ -22,7 +22,7 @@ public CreateBoxTokenTask(RabbitMQConnection rabbitMQ, BoxAPI box, ILogger + { + + + public DescribeImageTask(RabbitMQConnection rabbitMQ, ILogger logger) + : base(rabbitMQ, TaskType.DescribeImage, logger) + { + + } + /// Extracts scene descriptions for a video. + /// Beware: It is possible to start another scene task while the first one is still running + protected async override Task OnConsume(string id, TaskParameters taskParameters, ClientActiveTasks cleanup) + { + RegisterTask(cleanup, id); // may throw AlreadyInProgress exception + GetLogger().LogInformation($"DescribeImageTask({id}): Consuming Task"); + JObject meta = taskParameters.Metadata; + string captionId = meta["CaptionId"].ToString(); + string imageFile = meta["ImageFile"].ToString(); + string ocrdata = meta["OCRText"].ToString(); + string ocrtext = ""; + try + { + JObject ocr = JObject.Parse(ocrdata); + JArray texts = ocr["text"] as JArray; + StringBuilder sb = new StringBuilder(); + foreach (var te in texts) { + string t = te.ToString(); + if (string.IsNullOrWhiteSpace(t)) continue; + if (sb.Length > 0) sb.Append(' '); + sb.Append(t); + } + ocrtext = sb.ToString(); + } catch(Exception ex) + { + GetLogger().LogError(ex, ex.Message); + } + GetLogger().LogInformation($"{captionId}: <{imageFile}> <{ocrtext}>"); + try + { + using var _context = CTDbContext.CreateDbContext(); + Caption c = await _context.Captions.FindAsync(captionId); + + if (c == null || !c.HasPlaceHolderText()) + { + GetLogger().LogInformation($"Describe Image {id}: Caption Text changed or caption missing"); + return; + } + string result = $"MOCK AI output: An interesting lecture slide ({captionId}) for image {imageFile} and ocr (\"{ocrtext}\")"; + c.Text = result; + _context.Update(c); + await _context.SaveChangesAsync(); + } + catch (Exception ex) + { + GetLogger().LogError(ex, ex.Message); + throw; + } + GetLogger().LogInformation($"DescribeImageTask({id}): Complete - end of task"); + } + } +} diff --git a/TaskEngine/Tasks/DescribeVideoTask.cs b/TaskEngine/Tasks/DescribeVideoTask.cs new file mode 100644 index 00000000..b23bb983 --- /dev/null +++ b/TaskEngine/Tasks/DescribeVideoTask.cs @@ -0,0 +1,158 @@ +using ClassTranscribeDatabase; +using ClassTranscribeDatabase.Models; +using ClassTranscribeDatabase.Services;using Microsoft.Extensions.Logging; +using Newtonsoft.Json.Linq; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System; +using System.Linq; +using System.Threading.Tasks; +using static ClassTranscribeDatabase.CommonUtils; + + +// #pragma warning disable CA2007 +// https://learn.microsoft.com/en-us/dotnet/fundamentals/code-analysis/quality-rules/ca2007 +// We are okay awaiting on a task in the same thread + +namespace TaskEngine.Tasks +{ + [SuppressMessage("Microsoft.Performance", "CA1812:MarkMembersAsStatic")] // This class is never directly instantiated + class DescribeVideoTask : RabbitMQTask + { + private readonly DescribeImageTask _describeImageTask; + + public DescribeVideoTask(RabbitMQConnection rabbitMQ, DescribeImageTask describeImageTask, ILogger logger) + : base(rabbitMQ, TaskType.DescribeVideo, logger) + { + _describeImageTask = describeImageTask; + } + /// Extracts scene descriptions for a video. + /// Beware: It is possible to start another scene task while the first one is still running + protected async override Task OnConsume(string videoId, TaskParameters taskParameters, ClientActiveTasks cleanup) + { + RegisterTask(cleanup, videoId); // may throw AlreadyInProgress exception + GetLogger().LogInformation($"DescribeVideoTask({videoId}): Consuming Task"); + + using var _context = CTDbContext.CreateDbContext(); + Video video = await _context.Videos.FindAsync(videoId); + + if (!video.HasSceneObjectData()) + { + GetLogger().LogInformation($"Describe Video {videoId}: Early return - no scene data to process"); + return; + } + TextData td = await _context.TextData.FindAsync(video.SceneObjectDataId); + + JObject sceneData = td.GetAsJSON() as JObject; + JArray scenes = sceneData["Scenes"] as JArray; + var captions = new List(); + + const string SIR = "ClassTranscribe/Scene-Describe"; // todo move into Model e.g. CaptionConstants +#pragma warning disable CS8632 // The annotation for nullable reference types should only be used in code within a '#nullable' annotations context. + Transcription? transcription = video.Transcriptions.Where(t => t.SourceInternalRef == SIR).OrderByDescending(t=>t.CreatedAt).FirstOrDefault(); +#pragma warning restore CS8632 // The annotation for nullable reference types should only be used in code within a '#nullable' annotations context. + if (transcription == null) + { + var theLanguage = Languages.ENGLISH_AMERICAN; + + transcription = new Transcription() + { + TranscriptionType = TranscriptionType.TextDescription, + VideoId = video.Id, + Language = Languages.ENGLISH_AMERICAN, + Label = "Description", + SourceLabel = "ClassTranscribe", + SourceInternalRef = SIR + }; + GetLogger().LogInformation($"Describe Video {videoId}: Creating new (empty) Description Entry"); + + _context.Add(transcription); + await _context.SaveChangesAsync(); + } + else + { + captions = transcription.Captions.ToList(); + GetLogger().LogInformation($"{videoId}: Reusing Description. Found {captions.Count} captions"); + } + // Step 2 Create Placeholder captions if they don't exist for every scene. + int alreadyDoneCount = 0, taskEnqueueCount = 0; ; + + int CaptionIndex = captions != null && captions.Count > 0 ? captions.Select(c => c.Index).Max() + 1 : 0; + var newCaptions = new List(); + var scenesWithNewCaption = new List(); + var describeScenes = new List(); + int sceneIndex = 0; + foreach (JObject scene in scenes) + { + var captionId = scene["captionId"]?.ToString(); + var caption = captionId != null ? captions.Where(c => c.Id == captionId).FirstOrDefault() : null; + if (caption == null) + { + var c = new Caption + { + Index = CaptionIndex++, + Begin = TimeSpan.Parse(scene["start"].ToString()), + End = TimeSpan.Parse(scene["end"].ToString()), + CaptionType = CaptionType.AudioDescription, + Text = CaptionConstants.PlaceHolderText, + TranscriptionId = transcription.Id + }; + newCaptions.Add(c); + scenesWithNewCaption.Add(sceneIndex); + describeScenes.Add(sceneIndex); + } + else + { + if (caption.HasPlaceHolderText()) + { + describeScenes.Add(sceneIndex); + } + else + { + alreadyDoneCount++; + } + } + + sceneIndex++; // todo rewrite as map with index? + } + + GetLogger().LogInformation($"Describe Video {videoId}: {newCaptions.Count} new captions to create"); + if (newCaptions.Any()) + { + _context.AddRange(newCaptions); + await _context.SaveChangesAsync(); + // Now we can associate the captionIds with the scene data + foreach (int i in scenesWithNewCaption) + { + dynamic scene = scenes[i] as JObject; + scene.captionId = newCaptions[i].Id; + } + sceneData.Remove("Scenes"); + sceneData.Add("Scenes", scenes); + td.SetFromJSON(sceneData); + _context.Update(td); + await _context.SaveChangesAsync(); + GetLogger().LogInformation($"Describe Video {videoId}: Scene Data {td.Id} Updated with Caption references"); + } + GetLogger().LogInformation($"Describe Video {videoId}: {describeScenes.Count} Description Tasks to enqueue"); + foreach (int i in describeScenes) + { + JObject scene = scenes[i] as JObject; + dynamic taskMeta = new JObject(); + string imageFile = scene["img_file"].ToString(); + taskMeta.ImageFile = imageFile; + taskMeta.OCRText = scene["raw_text"]?.ToString(); + taskMeta.CaptionId = scene["captionId"].ToString(); + + taskEnqueueCount++; + var taskParams = new TaskParameters(taskMeta); + + GetLogger().LogInformation($"Describe Video {videoId}: {imageFile} {taskMeta.CaptionId} {transcription.Id}"); + _describeImageTask.Publish(imageFile, taskParams); + + } + GetLogger().LogInformation($"Describe Video {videoId}: AlreadyDone={alreadyDoneCount}.enqueueCount={taskEnqueueCount}"); + GetLogger().LogInformation($"Describe Video {videoId}: Returning."); + } + } +} \ No newline at end of file diff --git a/TaskEngine/Tasks/DownloadMediaTask.cs b/TaskEngine/Tasks/DownloadMediaTask.cs index 616a3d64..a5b698e9 100644 --- a/TaskEngine/Tasks/DownloadMediaTask.cs +++ b/TaskEngine/Tasks/DownloadMediaTask.cs @@ -11,7 +11,7 @@ using System.Threading.Tasks; using static ClassTranscribeDatabase.CommonUtils; -#pragma warning disable CA2007 +// #pragma warning disable CA2007 // https://learn.microsoft.com/en-us/dotnet/fundamentals/code-analysis/quality-rules/ca2007 // We are okay awaiting on a task in the same thread @@ -43,7 +43,7 @@ public DownloadMediaTask(RabbitMQConnection rabbitMQ, RpcClient rpcClient, protected override async Task OnConsume(string mediaId, TaskParameters taskParameters, ClientActiveTasks cleanup) { - registerTask(cleanup,mediaId); // may throw AlreadyInProgress exception + RegisterTask(cleanup,mediaId); // may throw AlreadyInProgress exception Media media; string subdir; @@ -199,7 +199,9 @@ public async Task /// - public void registerTask(HashSet cleanup, Object keyId) + public void RegisterTask(HashSet cleanup, Object keyId) { if (cleanup == null || keyId == null) { @@ -121,16 +123,16 @@ public void registerTask(HashSet cleanup, Object keyId) if (cleanup.Contains(keyId)) { // This is a programming error the same message may not register the same key twice - throw new Exception($"Cleanup set may not already contain key ({keyId.ToString()})"); + throw new Exception($"Cleanup set may not already contain key ({keyId})"); } // Now check that globally there is no other task working on the same id // This may happen rarely. The purpose of registerTask is to immediately stop (by throwing an exception) if we discover we are late to the party. - alreadyRunning = !_inProgress[_queueName].Add(keyId); + alreadyRunning = !_inProgress[QueueName].Add(keyId); } if (alreadyRunning) { - _logger.LogError("{0} for {1} Task already running, so skipping this request and throwing exception", _queueName, keyId); - throw new InProgressException($"{ _queueName} for {keyId} Task already running, so skipping this request"); + Logger.LogError("{0} for {1} Task already running, so skipping this request and throwing exception", QueueName, keyId); + throw new InProgressException($"{ QueueName} for {keyId} Task already running, so skipping this request"); } cleanup.Add(keyId); @@ -144,13 +146,13 @@ public ClientActiveTasks GetCurrentTasks() { lock (_inProgress) { - return new ClientActiveTasks(_inProgress[_queueName]); + return new ClientActiveTasks(_inProgress[QueueName]); } } protected ILogger GetLogger() { - return _logger; + return Logger; } } [Serializable] diff --git a/TaskEngine/Tasks/SceneDetectionTask.cs b/TaskEngine/Tasks/SceneDetectionTask.cs index 1df26661..1baf80c0 100644 --- a/TaskEngine/Tasks/SceneDetectionTask.cs +++ b/TaskEngine/Tasks/SceneDetectionTask.cs @@ -31,7 +31,7 @@ public SceneDetectionTask(RabbitMQConnection rabbitMQ,TranscriptionTask transcri /// Beware: It is possible to start another scene task while the first one is still running protected async override Task OnConsume(string videoId, TaskParameters taskParameters, ClientActiveTasks cleanup) { - registerTask(cleanup, videoId); // may throw AlreadyInProgress exception + RegisterTask(cleanup, videoId); // may throw AlreadyInProgress exception GetLogger().LogInformation($"SceneDetection({videoId}): Consuming Task"); var filepath = ""; diff --git a/TaskEngine/Tasks/TranscriptionTask.cs b/TaskEngine/Tasks/TranscriptionTask.cs index 27f8e825..8d28538b 100644 --- a/TaskEngine/Tasks/TranscriptionTask.cs +++ b/TaskEngine/Tasks/TranscriptionTask.cs @@ -50,7 +50,7 @@ private async void buildMockCaptions(string videoId) foreach (var language in languages) { - var transcription = video.Transcriptions.SingleOrDefault(t => t.Language == language); + var transcription = video.Transcriptions.SingleOrDefault(t => t.Language == language && t.TranscriptionType == TranscriptionType.Caption); // Did we get the default or an existing Transcription entity? if (transcription == null) { @@ -95,7 +95,7 @@ private async void buildMockCaptions(string videoId) /// protected async override Task OnConsume(string videoId, TaskParameters taskParameters, ClientActiveTasks cleanup) { - registerTask(cleanup, videoId); // may throw AlreadyInProgress exception + RegisterTask(cleanup, videoId); // may throw AlreadyInProgress exception if (Globals.appSettings.MOCK_RECOGNITION == "MOCK") { buildMockCaptions(videoId); @@ -206,11 +206,13 @@ protected async override Task OnConsume(string videoId, TaskParameters taskParam { t = new Transcription() { + TranscriptionType = TranscriptionType.Caption, Captions = theCaptions, Language = theLanguage, VideoId = video.Id, Label = $"{theLanguage} (ClassTranscribe)", - SourceInternalRef = "ClassTranscribe/Azure" + SourceInternalRef = "ClassTranscribe/Azure", + SourceLabel = "ClassTranscribe (Azure" + (phraseHints.Length>0 ?" with phrase hints)" : ")") }; _context.Add(t); }