-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathCommonUtils.cs
225 lines (186 loc) · 8.82 KB
/
CommonUtils.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
using ClassTranscribeDatabase.Models;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
namespace ClassTranscribeDatabase
{
public class CommonUtils
{
// Explicitly number entries because these will exist in the database
// and we dont want the meaning of existing entries to change due to future modifications of this list
public enum TaskType
{
PeriodicCheck = 1,
DownloadAllPlaylists = 2,
DownloadPlaylistInfo = 3,
DownloadMedia = 4,
ConvertMedia = 5,
TranscribeVideo = 6,
ProcessVideo = 7,
Aggregator = 8,
GenerateVTTFile = 9,
QueueAwaker = 10,
SceneDetection = 11,
UpdateBoxToken = 12,
CreateBoxToken = 13,
UpdateOffering = 14,
ReTranscribePlaylist = 15,
BuildElasticIndex = 16,
ExampleTask = 17,
CleanUpElasticIndex = 18,
PythonCrawler = 19,
DescribeVideo = 20,
DescribeImage = 21
}
public class Languages
{
// Speech recognition uses a dialect
public static string ENGLISH_AMERICAN = "en-US";
// Translations use just a short language code
public static string SIMPLIFIED_CHINESE = "zh-Hans";
public static string KOREAN = "ko";
public static string SPANISH = "es";
public static string FRENCH = "fr";
// See MSTRanscriptionTask for a full list of recognition and translation languages
}
public static string BOX_ACCESS_TOKEN = "BOX_ACCESS_TOKEN";
public static string BOX_REFRESH_TOKEN = "BOX_REFRESH_TOKEN";
public static byte[] MessageToBytes<T>(T obj)
{
string output = JsonConvert.SerializeObject(obj);
return Encoding.UTF8.GetBytes(output);
}
public static T BytesToMessage<T>(byte[] bytes)
{
return JsonConvert.DeserializeObject<T>(Encoding.UTF8.GetString(bytes));
}
public static string RandomString(int length)
{
// Drop aeiouy to prevent unwanted words; drop 1 and 0 because they are too similar to IO
const string chars = "bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZ23456789";
// This new GUID-as-source of random bytes implementation avoids an exclusive file lock in /dev/urandom
// ... And also overcomes the 2^32 seed limitation of the random class (4bn states is insufficient if we have a million files)
// ... And gives me an excuse to implement a simple fair random number generator.
// To understand the next line, imagine chars.length was 100. We would only accept the values 0 - 199
int maxFair = 256 - (256 % chars.Length);
StringBuilder result = new StringBuilder(length);
while (true)
{
foreach (byte b in Guid.NewGuid().ToByteArray()) // 16 bytes of chaos
{
if (b < maxFair)
{
result.Append( chars[b % chars.Length]);
if (result.Length == length)
{
return result.ToString();
}
}
}
}
}
public static string GetTmpFile()
{
// Align with python code.
const int filenameLength = 12; // was 8
while (true)
{
string candidate = Path.Combine(Globals.appSettings.DATA_DIRECTORY, RandomString(filenameLength));
if (!File.Exists(candidate)) return candidate;
}
}
public static string GetMediaName(Media media)
{
string name;
switch (media.SourceType)
{
case SourceType.Echo360:
name = media.JsonMetadata["title"]?.ToString();
if (string.IsNullOrEmpty(name))
{
string lessonName = media.JsonMetadata["lessonName"]?.ToString() ?? "Untitled";
DateTime createdAt = Convert.ToDateTime(
media.JsonMetadata["createdAt"]?.ToString() ?? "01/01/1970",
CultureInfo.InvariantCulture);
name = $"{lessonName} {createdAt.ToString("MMMM dd, yyyy", CultureInfo.InvariantCulture)}";
}
break;
case SourceType.Youtube:
var title = media.JsonMetadata["title"]?.ToString();
name = string.IsNullOrEmpty(title) ? "Untitled" : title;
break;
case SourceType.Local:
var fileName = media.JsonMetadata["filename"]?.ToString();
if (string.IsNullOrEmpty(fileName) && media.JsonMetadata.ContainsKey("video1")) {
fileName = JObject.Parse(media.JsonMetadata["video1"].ToString())?["FileName"]?.ToString();
}
fileName ??= "Untitled";
name = fileName.Replace(".mp4", "");
break;
case SourceType.Kaltura:
var videoName = media.JsonMetadata["name"]?.ToString() ?? "Untitled";
var videoDate = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc)
.AddSeconds(media.JsonMetadata["createdAt"]?.ToObject<int>() ?? 0);
name = $"{videoName} {videoDate.ToString("MMMM dd, yyyy", CultureInfo.InvariantCulture)}";
break;
case SourceType.Box:
name = media.JsonMetadata["name"]?.ToString() ?? "Untitled";
break;
default:
name = "Untitled";
break;
}
return name;
}
public static string ToCourseOfferingSubDirectory(CTDbContext ctx, Entity entity) {
#nullable enable
String? path = GetRelatedCourseOfferingFilePath(ctx, entity);
if( !string.IsNullOrEmpty(path ) ) {
return path;
}
#nullable disable
return "/data/"; //legacy, pre 2022, default = everything is stored in the same directory
}
#nullable enable
public static string? GetRelatedCourseOfferingFilePath(CTDbContext ctx, Entity entity)
{
// the only thing that we can trust exists on the given the entity Id
// Drop recursion... this may reduce the number of SQL calls
// Or maybe this needs to be rewritten - it is possible that each traversal
// is another lazy load of the next object.
switch (entity)
{
case CourseOffering co:
return ctx.CourseOfferings.Where(co2 => co2.Id == co.Id).OrderBy(co=>co.CreatedAt).FirstOrDefault()?.FilePath;
case Course c:
return ctx.CourseOfferings.Where(c2 => c2.CourseId == c.Id)
.OrderBy(co=>co.CreatedAt).FirstOrDefault()?.FilePath;
case Media m:
return ctx.Medias.FirstOrDefault(m2=>m2.Id == m.Id ).Playlist.Offering
.CourseOfferings.OrderBy(co=>co.CreatedAt).FirstOrDefault()?.FilePath;
case Offering o:
return ctx.CourseOfferings.Where(co2 => co2.OfferingId == o.Id)
.OrderBy(co=>co.CreatedAt).FirstOrDefault()?.FilePath;
case Playlist p:
return ctx.Playlists.FirstOrDefault(p2=>p2.Id == p.Id ).Offering
.CourseOfferings.OrderBy(co=>co.CreatedAt).FirstOrDefault()?.FilePath;
case Transcription t:
return ctx.Transcriptions.FirstOrDefault(t2=>t2.Id == t.Id)?.Video
.Medias.OrderBy(co=>co.CreatedAt).FirstOrDefault()
?.Playlist.Offering
.CourseOfferings.OrderBy(co=>co.CreatedAt).FirstOrDefault()?.FilePath;
case Video v:
return ctx.Medias.OrderBy(co=>co.CreatedAt).FirstOrDefault(m2=>m2.VideoId == v.Id )
.Playlist.Offering
.CourseOfferings.OrderBy(co=>co.CreatedAt).FirstOrDefault()?.FilePath;
default:
throw new InvalidOperationException($"GetRelatedCourseOffering not implemented for type {entity.GetType()} (Object ID: {entity.Id})");
}
}
#nullable disable
}
}