Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove vtt support. Support MediaDownload on existing video #455

Merged
merged 3 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,8 @@ private async Task<MSTResult> performRecognitionAsync(string logId, string fileP

if (verboseLogging)
{
_logger.LogInformation($"{logId}: Begin={begin.Minutes}:{begin.Seconds},{begin.Milliseconds}", begin);
_logger.LogInformation($"{logId}: End={end.Minutes}:{end.Seconds},{end.Milliseconds}");
_logger.LogInformation($"{logId}: Begin={begin.Minutes}:{begin.Seconds}.{begin.Milliseconds}", begin);
_logger.LogInformation($"{logId}: End={end.Minutes}:{end.Seconds}.{end.Milliseconds}");
}
// TODO/TOREVIEW:
// ToCaptionEntitiesWithWordTiming vs ToCaptionEntitiesInterpolate
Expand Down Expand Up @@ -243,7 +243,8 @@ private async Task<MSTResult> performRecognitionAsync(string logId, string fileP
}
else if (e.Result.Reason == ResultReason.NoMatch)
{
_logger.LogInformation($"{logId}: NOMATCH: Speech could not be recognized.");
TimeSpan begin = (new TimeSpan(e.Result.OffsetInTicks)).Add(restartOffset);
_logger.LogInformation($"{logId}: NOMATCH: ({begin.Minutes}:{begin.Seconds}) Speech could not be recognized.");
}
};

Expand Down
80 changes: 42 additions & 38 deletions ClassTranscribeServer/Controllers/AdminController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,51 +70,55 @@ public ActionResult UpdateAllPlaylists()
/// <summary>
/// Regenerate one Caption (vtt, srt) file of the given Transcription
/// </summary>
[HttpPost("UpdateVTTFile")]
[Authorize(Roles = Globals.ROLE_ADMIN)]
public ActionResult UpdateVTTFile(string transcriptionId)
{
_logger.LogInformation($"Enqueueing {transcriptionId} caption regeneration");
_wakeDownloader.UpdateVTTFile(transcriptionId);
return Ok();
}
/// will be deleted soon - We now generate vtt files dynamically.
// [HttpPost("UpdateVTTFile")]
// [Authorize(Roles = Globals.ROLE_ADMIN)]
// public ActionResult UpdateVTTFile(string transcriptionId)
// {
// _logger.LogInformation($"Enqueueing {transcriptionId} caption regeneration");
// _wakeDownloader.UpdateVTTFile(transcriptionId);
// return Ok();
// }

/// <summary>
/// Regenerate all Caption (vtt, srt) files of the given course offering
/// </summary>
[HttpPost("UpdateVTTFilesInCourseOffering")]
[Authorize(Roles = Globals.ROLE_ADMIN)]
public async Task<ActionResult> UpdateVTTFilesInCourseOffering(string offeringId = null)
{

var playlistIds = await _context.Playlists.Where(p => p.OfferingId == offeringId).Select(p => p.Id).ToListAsync();
_logger.LogInformation($"UpdateVTTFilesinPlaylist(${offeringId}): Found {playlistIds.Count} playlists");

var videoIds = await _context.Medias.Where(m => playlistIds.Contains(m.PlaylistId)).Select(m => m.VideoId).ToListAsync();
_logger.LogInformation($"UpdateVTTFilesinPlaylist(): Found {videoIds.Count} videos");
var transcriptionIds = await _context.Transcriptions.Where(t => videoIds.Contains(t.VideoId)).Select(t => t.Id).ToListAsync();
_logger.LogInformation($"UpdateVTTFilesinPlaylist(): Found {transcriptionIds.Count} vtt transcriptions to regenerate");
foreach (var t in transcriptionIds)
{
_wakeDownloader.UpdateVTTFile(t);
}
return Ok($"Requested {transcriptionIds.Count} Transcriptions to be regenerated from {videoIds.Count} videos in {playlistIds.Count} playlists");
}
/// Will be deleted soon - we no longer store vtt files
// [HttpPost("UpdateVTTFilesInCourseOffering")]
// [Authorize(Roles = Globals.ROLE_ADMIN)]
// public async Task<ActionResult> UpdateVTTFilesInCourseOffering(string offeringId = null)
// {

// var playlistIds = await _context.Playlists.Where(p => p.OfferingId == offeringId).Select(p => p.Id).ToListAsync();
// _logger.LogInformation($"UpdateVTTFilesinPlaylist(${offeringId}): Found {playlistIds.Count} playlists");

// var videoIds = await _context.Medias.Where(m => playlistIds.Contains(m.PlaylistId)).Select(m => m.VideoId).ToListAsync();
// _logger.LogInformation($"UpdateVTTFilesinPlaylist(): Found {videoIds.Count} videos");
// var transcriptionIds = await _context.Transcriptions.Where(t => videoIds.Contains(t.VideoId)).Select(t => t.Id).ToListAsync();
// _logger.LogInformation($"UpdateVTTFilesinPlaylist(): Found {transcriptionIds.Count} vtt transcriptions to regenerate");
// foreach (var t in transcriptionIds)
// {
// _wakeDownloader.UpdateVTTFile(t);
// }
// return Ok($"Requested {transcriptionIds.Count} Transcriptions to be regenerated from {videoIds.Count} videos in {playlistIds.Count} playlists");
// }

/// <summary>
/// Regenerate all Caption (vtt, srt) files of all transcriptions
/// </summary>
[HttpPost("UpdateAllVTTFiles")]
[Authorize(Roles = Globals.ROLE_ADMIN)]
public async Task<ActionResult> UpdateAllVTTFiles()
{
var transcriptionIds = await _context.Transcriptions.Select(t => t.Id).ToListAsync();
_logger.LogInformation($"UpdateAllVTTFiles: Enqueueing {transcriptionIds.Count} vtt transcriptions to regenerate");
foreach (var t in transcriptionIds)
{
_wakeDownloader.UpdateVTTFile(t);
}
return Ok();
}
/// will be deleted soon - we no longer store vtt files
// [HttpPost("UpdateAllVTTFiles")]
// [Authorize(Roles = Globals.ROLE_ADMIN)]
// public async Task<ActionResult> UpdateAllVTTFiles()
// {
// var transcriptionIds = await _context.Transcriptions.Select(t => t.Id).ToListAsync();
// _logger.LogInformation($"UpdateAllVTTFiles: Enqueueing {transcriptionIds.Count} vtt transcriptions to regenerate");
// foreach (var t in transcriptionIds)
// {
// _wakeDownloader.UpdateVTTFile(t);
// }
// return Ok();
// }


/// <summary>
Expand Down
20 changes: 12 additions & 8 deletions TaskEngine/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,14 @@ public static void Main()
builder.AddConsole();
builder.AddFilter<Microsoft.Extensions.Logging.ApplicationInsights.ApplicationInsightsLoggerProvider>
("", LogLevel.Warning);
string insightKey = configuration.GetValue<string>("APPLICATION_INSIGHTS_KEY");
if (!String.IsNullOrEmpty(insightKey) && insightKey.Trim().Length>1)
{
builder.AddApplicationInsights(insightKey);
}
// If we use A.I. in the future -
// Use the AddApplicationInsights() overload which accepts Action<TelemetryConfiguration> and set TelemetryConfiguration.ConnectionString. See https://github.com/microsoft/ApplicationInsights-dotnet/issues/2560 for more details.

// string insightKey = configuration.GetValue<string>("APPLICATION_INSIGHTS_KEY");
// if (!String.IsNullOrEmpty(insightKey) && insightKey.Trim().Length>1)
// {
// builder.AddApplicationInsights(insightKey);
// }
})
.AddOptions()
.Configure<AppSettings>(configuration)
Expand All @@ -58,7 +61,7 @@ public static void Main()
.AddSingleton<ConvertVideoToWavTask>()
.AddSingleton<TranscriptionTask>()
.AddSingleton<QueueAwakerTask>()
.AddSingleton<GenerateVTTFileTask>()
// .AddSingleton<GenerateVTTFileTask>()
.AddSingleton<RpcClient>()
.AddSingleton<ProcessVideoTask>()
.AddSingleton<MSTranscriptionService>()
Expand Down Expand Up @@ -116,10 +119,11 @@ public static void Main()
serviceProvider.GetService<DownloadMediaTask>().Consume(concurrent_synctasks);

// Transcription Related
_logger.LogInformation($"Creating TranscriptionTask & GenerateVTTFileTask consumers. Concurrency={concurrent_transcriptions} ");
_logger.LogInformation($"Creating TranscriptionTask consumers. Concurrency={concurrent_transcriptions} ");

serviceProvider.GetService<TranscriptionTask>().Consume(concurrent_transcriptions);
serviceProvider.GetService<GenerateVTTFileTask>().Consume(concurrent_transcriptions);

// no more! - serviceProvider.GetService<GenerateVTTFileTask>().Consume(concurrent_transcriptions);

// Video Processing Related
_logger.LogInformation($"Creating ProcessVideoTask consumer. Concurrency={concurrent_videotasks} ");
Expand Down
83 changes: 65 additions & 18 deletions TaskEngine/Tasks/DownloadMediaTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -134,17 +134,45 @@ protected async Task<bool> updateMediaWithVideo(string mediaId, Video newVideo)
await newVideo.DeleteVideoAsync(_context);
return false;
}
GetLogger().LogInformation($"Media ({mediaId}): media.Video == null is {media.Video == null}");
GetLogger().LogInformation($"Media ({mediaId}): existing media.Video {media.Video != null}");
GetLogger().LogInformation($"Media ({mediaId}): media.Video?.Video1.Id={media.Video?.Video1.Id} ...Video2.Id={media.Video?.Video2.Id} ");

GetLogger().LogInformation($"Media ({mediaId}): downloaded: newVideo.Video1={newVideo.Video1} ...Video2={newVideo.Video2} ");
GetLogger().LogInformation($"Media ({mediaId}): downloaded: newVideo.Video1.Hash={newVideo.Video1?.Hash} ...Hash2={newVideo.Video2?.Hash} ");

// Don't add video if there are already videos for the given media.
//
if(newVideo.Id != null) {
GetLogger().LogError($"Media ({mediaId}): Huh? newVideo should not have an Id yet - that's my job!");
return false;
}
if (media.Video != null)

if (media.VideoId != null)
{
GetLogger().LogInformation($"Media ({mediaId}): Surprise - media already has video set (race condition?)- no further processing required.Discarding new files");
// Normally a DownloadMediaTask is only triggered if the video is null.
// So this code is run when a manual DownloadMediaTask is requested again
var changed = false;
var v = media.Video;
GetLogger().LogInformation($"Media ({mediaId}): Media already has video with video1Id <{media.VideoId}> Cherrypicking new files");
var pickVideo2 = newVideo.Video2 != null && (v.Video2Id == null || newVideo.Video2.Hash != v.Video2.Hash);
GetLogger().LogInformation($"Media ({mediaId}):pickVideo2={pickVideo2}");

if( newVideo.Video2 != null && (v.Video2Id == null || newVideo.Video2.Hash != v.Video2.Hash)){
_context.FileRecords.Add(newVideo.Video2);
_context.SaveChanges(); // now v2 has an Id, so we can use below
v.Video2 = newVideo.Video2;
newVideo.Video2 = null;
changed = true;
}
if(newVideo.ASLVideo != null && ( v.ASLVideoId == null || newVideo.ASLVideo.Hash != v.ASLVideo.Hash)) {
_context.FileRecords.Add(newVideo.ASLVideo);
_context.SaveChanges(); // now v2 has an Id, so we can use below
v.ASLVideo = newVideo.ASLVideo;
newVideo.ASLVideo = null;
changed = true;
}
if(changed) _context.SaveChanges();
await newVideo.DeleteVideoAsync(_context);
return false;
return changed;
}
// Time to find out what we have in the database
// Important idea: the newVideo and its filerecords are not yet part of the database.
Expand All @@ -155,7 +183,7 @@ protected async Task<bool> updateMediaWithVideo(string mediaId, Video newVideo)
var existingPrimaryVideo = existingPrimaryVideos?.FirstOrDefault(); // If non null we expect 0 or 1

GetLogger().LogInformation($"Media ({mediaId}): {matchingFiles.Count} FileRecord hash match found");
GetLogger().LogInformation($"Media ({mediaId}): {existingPrimaryVideos?.Count ?? 0} existing Videos found");
GetLogger().LogInformation($"Media ({mediaId}): {existingPrimaryVideos?.Count ?? 0} existing Primary Videos found");

// cherrypick case (see comment below)
if (existingPrimaryVideo != null)
Expand Down Expand Up @@ -240,48 +268,67 @@ public async Task<Video> DownloadKalturaVideo(string subdir, Media media)
string temp = video1Url;
video1Url = video2Url;
video2Url = temp;
GetLogger().LogInformation($"DownloadKalturaVideo ({media.Id}): swapped streams to 1:<{video1Url}> and 2:<{video2Url}>");
}
}
catch (Exception) { };

GetLogger().LogInformation($"DownloadKalturaVideo ({media.Id}): Requesting download of video1 ({video1Url})");
var mediaResponse = await _rpcClient.PythonServerClient.DownloadKalturaVideoRPCAsync(new CTGrpc.MediaRequest
{
VideoUrl = video1Url
});
GetLogger().LogInformation($"DownloadKalturaVideo ({media.Id}): Video1 downloaded to ({mediaResponse.FilePath})");

Video video;
if (FileRecord.IsValidFile(mediaResponse.FilePath))
// Sanity Check if the downloaded file is valid has at least a few bytes.
var isValid = FileRecord.IsValidFile(mediaResponse.FilePath);
GetLogger().LogInformation($"DownloadKalturaVideo ({media.Id}): video1 is valid: {isValid}");
if (isValid)
{
GetLogger().LogInformation($"DownloadKalturaVideo ({media.Id}): GetNewFileRecordAsync");
var video1record = await FileRecord.GetNewFileRecordAsync(mediaResponse.FilePath, mediaResponse.Ext, subdir);
GetLogger().LogInformation($"DownloadKalturaVideo ({media.Id}): {video1record}");
video = new Video
{
Video1 = await FileRecord.GetNewFileRecordAsync(mediaResponse.FilePath, mediaResponse.Ext, subdir)
Video1 = video1record
};
try
{
if (media.JsonMetadata["child"] != null && media.JsonMetadata["child"]["downloadUrl"] != null)
if (video2Url != null)
{
GetLogger().LogInformation($"Media ({media.Id}): Downloading child video");
GetLogger().LogInformation($"Media ({media.Id}): Downloading second video ({video2Url})");

var childMediaR = await _rpcClient.PythonServerClient.DownloadKalturaVideoRPCAsync(new CTGrpc.MediaRequest
var secondMediaR = await _rpcClient.PythonServerClient.DownloadKalturaVideoRPCAsync(new CTGrpc.MediaRequest
{
VideoUrl = video2Url
VideoUrl = video2Url //might be swapped
});
if (FileRecord.IsValidFile(childMediaR.FilePath))
GetLogger().LogInformation($"DownloadKalturaVideo ({media.Id}): Video2 downloaded to ({secondMediaR.FilePath})");

// Sanity Check if the downloaded file is valid has at least a few bytes.

var video2Valid = FileRecord.IsValidFile(secondMediaR.FilePath);
GetLogger().LogInformation($"Media ({media.Id}): Second video downloaded ({secondMediaR.FilePath}) is valid: {video2Valid}");
if (video2Valid)
{
video.Video2 = await FileRecord.GetNewFileRecordAsync(childMediaR.FilePath, childMediaR.Ext, subdir);
}
var video2record = await FileRecord.GetNewFileRecordAsync(secondMediaR.FilePath, secondMediaR.Ext, subdir);
GetLogger().LogInformation($"Media ({media.Id}): Second video record {video2record} ");
video.Video2 = video2record;
}
} else {
GetLogger().LogInformation($"Media ({media.Id}): No second video to download");
}
}
catch (Exception ignored)
{
GetLogger().LogInformation(ignored, $"Couldnt download second video for {media.Id}");
GetLogger().LogError(ignored, $"Media ({media.Id}): Exception {ignored}");
}
}
else
{
GetLogger().LogInformation($"DownloadKalturaVideo ({media.Id}): first downloaded file ({mediaResponse.FilePath}) was not valid>");
throw new Exception("DownloadKalturaVideo Failed + " + media.Id);
}

GetLogger().LogInformation($"Media ({media.Id}): DownloadKalturaVideo done Video1={video.Video1?.Id} Video2={video.Video2?.Id}");
return video;
}

Expand Down
Loading