video2gif/processVideos.py at master · IsanHu/video2gif · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
#-*- coding:utf-8 -*-
# Import the video2gif package
import video2gif
'''
Compile the score function
On the GPU, the network will be using cuDNN layer implementations available in the Lasagne master

If the device is CPU, it will use the CPU version that requires my Lasagne fork with added 3D convolution and pooling.
You can get it from https://github.com/gyglim/Lasagne

'''
import Queue
import threading
from threading import Timer
score_function = video2gif.get_prediction_function()

from IPython.display import Image, display
import os
from moviepy.editor import VideoFileClip, AudioFileClip
import sys
import json
import time
from time import sleep

basedir = os.path.abspath(os.path.dirname(__file__))
config = {}
config['UPLOAD_FOLDER'] = basedir + '/unprocessedvideos/'
config['PROCESSED_FOLDER'] = basedir + '/processedvideos/'
config['THUMBNAIL_FOLDER'] = basedir + '/unprocessedvideos/thumbnail/'
config['GIF_FOLDER'] = basedir + '/static/gifs/'
config['ORIGINAL_GIF_FOLDER'] = basedir + '/static/original_gifs/'
config['BOTTLENECK'] = basedir + '/bottleneck/'
config['ZIPED_GIF_FOLDER'] = basedir + '/zipedgifs/'

config['XUNFEI_JAR'] = basedir + '/Lfasr.jar'
config['XUNFEI_APPID'] = '591c2c4d'
config['XUNFEI_KEY'] = 'c238e91e995ae7b31d313caba8ce28a5'


# 排队处理中
# 排队处理中（字幕）
# 处理中
# 生成字幕中
# 生成字幕成功
# 处理中（字幕）

# status
# tags
# caption  标记类型
# file_name
# xunfei_id

# videos = {'择天记_时间.mp4': {'status': "处理中"}, '择天记_时间2.mp4': {'status': "排队处理中"}, '择天记_字幕.mp4': {'status': "生成字幕中"}, '择天记_字幕2.mp4': {'status': "排队处理中（字幕）"}, '择天记_字幕3.mp4': {'status': "处理中（字幕）"}}
videos = {}
noCaptionQueue = Queue.Queue(maxsize=50)
captionQueue = Queue.Queue(maxsize=50)
uploadAudioQueue = Queue.Queue(maxsize=50)
getAudioQueue = Queue.Queue(maxsize=50)
topCount = 100
clipDuration = 2

## nocaption video 队列
def start_nocaption_video_queue():
	thread = threading.Thread(target=did_start_nocaption_video_queue)
	thread.daemon = True
	thread.start()

def did_start_nocaption_video_queue():
	for file_name, video_path, gif_path, info_file_path, processed_path in get_no_caption_video_path():
		process_video_to_generate_gifs(file_name, video_path, gif_path, info_file_path, processed_path)

def get_no_caption_video_path():
    item = noCaptionQueue.get()
    while item:
        yield item
        noCaptionQueue.task_done()
        item = noCaptionQueue.get()

def is_overlapping(x1,x2,y1,y2):
    return max(x1,y1) < min(x2,y2)

def process_video_to_generate_gifs(file_name, video_path, gif_path, info_file_path, processed_path):
	if not videos.has_key(file_name):
		return
	process_start = time.time()
	info = videos[file_name]
	info['status'] = '处理中'
	video = VideoFileClip(video_path)
	segmentsArray = []
	duration = info['duration']
	if duration <= 0:
		duration = clipDuration
	for videoStart in range(0, duration, 1):
		particalSegments = [(start, int(start+video.fps*duration)) for start in range(int(videoStart*video.fps),int(video.duration*video.fps),int(video.fps*duration))]
		print "particalSegments count:"
		print len(particalSegments)
		segmentsArray.append(particalSegments)

	print "segments array count:"
	print len(segmentsArray)

	# Score the segments
	scores = {}
	for particalSegments in segmentsArray:
		particalScores = video2gif.get_scores(score_function, particalSegments, video, stride=8)
		scores.update(particalScores)
		print "score count:"
		print len(scores)

	OUT_DIR=gif_path
	if not os.path.exists(OUT_DIR):
	    os.mkdir(OUT_DIR)
	ogiginal_gif_path = os.path.join(config['ORIGINAL_GIF_FOLDER'], file_name)
	print 'original path:'
	print ogiginal_gif_path
	if not os.path.exists(ogiginal_gif_path):
	    os.mkdir(ogiginal_gif_path)

	# Generate GIFs from the top scoring segments

	nr=0
	totalCount = len(scores)
	top_k=min(topCount, totalCount)
	occupiedTime = []
	height = videos[file_name]['height']
	print height
	for segment in sorted(scores, key=lambda x: -scores.get(x))[0:totalCount]:
		if nr >= top_k:
			break


		overlaping = 0

		for seg in occupiedTime:
			if is_overlapping(seg[0], seg[1], segment[0], segment[1]):
				overlaping = 1
				print "skip overlapping"
				break
		if overlaping == 0:
			occupiedTime.append(segment)
			clip = video.subclip(segment[0] / float(video.fps), segment[1] / float(video.fps))
			original_clip = video.subclip(segment[0] / float(video.fps), segment[1] / float(video.fps))
			out_gif = "%s/%s_%.2d.gif" % (OUT_DIR.decode('utf-8'),file_name.decode('utf-8'),nr)
			origianl_gif = "%s/%s_%.2d.mp4" % (ogiginal_gif_path.decode('utf-8'), file_name.decode('utf-8'), nr)
			## resize
			if height > 0:
				clip = clip.resize(height=height)
			else:
				clip = clip.resize(width=320)
			clip.write_gif(out_gif, fps=10, program="ImageMagick", opt="optimizeplus")
			original_clip.write_videofile(origianl_gif, fps=10, audio=False)
			nr += 1

	# 压缩原尺寸图片
	zip_path = os.path.join(config['ZIPED_GIF_FOLDER'], file_name + '.zip')
	cmd = "zip -rj " + zhuanyi(zip_path) + " " +  zhuanyi(ogiginal_gif_path)
	print cmd
	os.system(cmd)

	# 转移视频
	cmd1 = 'mv ' + zhuanyi(video_path) + " " + zhuanyi(processed_path)
	print cmd1
	os.system(cmd1)

	del videos[file_name]
	print("处理无字幕视频用时: %.2fs" % (time.time() - process_start))


## 初始化提取 audio 队列
def start_get_audio_queue():
	print "初始化提取音频队列"
	thread = threading.Thread(target=did_start_get_audio_queue)
	thread.daemon = True
	thread.start()

def did_start_get_audio_queue():
	print 'did_start_get_audio_queue'
	for file_name, video_path, gif_path, audio_path, caption_path, processed_path in get_video_to_audio_path():
		# 先检查audio_path是否有文件了
		# 如果有检查audio的时长跟video的时长是否一样，不一样的话删除audio，重新提取audio
		start = time.time()
		has_audio = False
		if os.path.isfile(audio_path):
			has_audio = True
			# audio = AudioFileClip(audio_path)
			# video = VideoFileClip(video_path)
			# if audio.duration == video.duration:
			# 	has_audio = True
			# 	print "音视频一般长"
			# else:
			# 	print "音视频不一样长"
			# 	os.remove(audio_path)

		if not has_audio:
			try:
				video = VideoFileClip(video_path)
				clip = video.subclip(0)
				clip.audio.write_audiofile(audio_path, bitrate="128k")
			except:
				print "%s提取音频失败" % file_name
				if os.path.isfile(audio_path):
					os.remove(audio_path)
				# 重新加入提取音频队列
				getAudioQueue.put((file_name, video_path, gif_path, audio_path, caption_path, processed_path))
				continue
		print("提取音频用时: %.2fs" % (time.time() - start))
		uploadAudioQueue.put((file_name, video_path, audio_path))


def get_video_to_audio_path():
	item = getAudioQueue.get()
	while item:
		yield item
		getAudioQueue.task_done()
		item = getAudioQueue.get()


## 初始化上传音频到讯飞的队列
def start_upload_audio_queue():
	thread = threading.Thread(target=did_start_upload_audio_queue)
	thread.daemon = True
	thread.start()

def did_start_upload_audio_queue():
	for file_name, video_path, audio_path in get_audio_path():
		start = time.time()
		cmd = "java -jar %s 0 %s %s %s" % (config['XUNFEI_JAR'], config['XUNFEI_APPID'], config['XUNFEI_KEY'], zhuanyi(audio_path))
		print cmd
		try:
			result = json.loads(os.popen(cmd).read())
		except:
			# 上传失败,重新加入上传音频队列
			print "上传失败"
			uploadAudioQueue.put((file_name, video_path, audio_path))
			continue

		print result
		if result['ok'] == 0:
			xunfei_id = result['data']
			info = videos[file_name]
			info['xunfei_id'] = xunfei_id
		else:
			# 上传失败,重新加入上传音频队列
			uploadAudioQueue.put((file_name, video_path, audio_path))
			continue

		# 临时
		# xunfei_id = 'd2931698fdf8413f8b496de400025295'
		# info = videos[file_name]
		# info['xunfei_id'] = xunfei_id

		info['status'] = "生成字幕中"
		print audio_path
		print("上传音频用时: %.2fs" % (time.time() - start))

def get_audio_path():
	item = uploadAudioQueue.get()
	while item:
		yield item
		uploadAudioQueue.task_done()
		item = uploadAudioQueue.get()


## 周期性遍历videos，去讯飞获取字幕，同时将获取成功的video添加进字幕video队列
def start_get_caption_loop():
	t = Timer(20, start_get_caption_loop)
	t.start()
	get_caption_from_xunfei()

def get_caption_from_xunfei():
	print 'get_caption_from_xunfei'
	for key in videos:
		vi = videos[key]
		if vi.has_key('xunfei_id') and vi['status'] != "生成字幕成功":
			print key
			xunfei_id = vi['xunfei_id']
			cmd = "java -jar %s 1 %s %s %s" % (config['XUNFEI_JAR'], config['XUNFEI_APPID'], config['XUNFEI_KEY'], xunfei_id)
			print cmd
			try:
				result = json.loads(os.popen(cmd).read())
			except:
				continue

			if result['ok'] != 0:
				print result
				continue

			print "%s 获取字幕成功" % xunfei_id
			vi['status'] = "生成字幕成功"
			content = {}
			content['file_name'] = vi['file_name']
			content['tags'] = vi['tags']
			content['is_chinese'] = vi['is_chinese']
			content['xunfei_id'] = xunfei_id
			caption_string = result['data']
			content['caption'] = json.loads(caption_string)

			caption_file_name = vi['file_name'] + '.txt'
			caption_file_path = os.path.join(config['BOTTLENECK'], caption_file_name)

			try:
				with open(caption_file_path, 'w') as f:
					f.write(json.dumps(content))
			except:
				print "%s 写文件失败" % xunfei_id

			# 加入字幕视屏队列
			video_path = os.path.join(config['UPLOAD_FOLDER'], key + ".mp4")
			gif_path = os.path.join(config['GIF_FOLDER'], vi['file_name'])
			print gif_path
			processed_path = os.path.join(config['PROCESSED_FOLDER'], vi['file_name'] + '.mp4')
			audio_name = vi['file_name'] + ".mp3"
			audio_path = os.path.join(config['BOTTLENECK'], audio_name)
			caption_path = caption_file_path
			captionQueue.put((vi['file_name'], video_path, gif_path, audio_path, caption_path, processed_path))


## 初始化有字幕 video 队列
def start_caption_video_queue():
	thread = threading.Thread(target=did_start_caption_video_queue)
	thread.daemon = True
	thread.start()

def did_start_caption_video_queue():
	for file_name, video_path, gif_path, audio_path, caption_path, processed_path in get_caption_video_path():
		process_caption_video_to_generate_gifs(file_name, video_path, gif_path, audio_path, caption_path,processed_path)


def get_caption_video_path():
	item = captionQueue.get()
	while item:
		yield item
		captionQueue.task_done()
		item = captionQueue.get()

def process_caption_video_to_generate_gifs(file_name, video_path, gif_path, audio_path, caption_path, processed_path):
	if not videos.has_key(file_name):
		return
	start = time.time()

	video = VideoFileClip(video_path)
	info = {}
	try:
		with open(caption_path, 'r') as f:
			info = json.loads(f.read())
	except:
		print "%s 读字幕文件失败" % file_name
		captionQueue.put((file_name, video_path, gif_path, audio_path, caption_path, processed_path))
		return

	captions = info['caption']
	segments = []
	fps = video.fps
	durations = []
	for ca in captions:
		bg = int(ca['bg'])
		ed = int(ca['ed'])
		start_frame = int(float(bg) / float(1000) * fps)
		end_frame = int(float(ed) / float(1000) * fps)

		duration = float(ed - bg) / 1000.0
		durations.append(duration)

		if duration > 5:
			print "大于5秒"
			continue
		if end_frame - 16 > start_frame:
			segments.append((start_frame, end_frame, ca['onebest']))
		else:
			print "不足16帧"

	scores = video2gif.get_scores(score_function, segments, video, stride=8)
	count = len(scores)
	print "segment count:"
	print count

	if not os.path.exists(gif_path):
		os.mkdir(gif_path)
	ogiginal_gif_path = os.path.join(config['ORIGINAL_GIF_FOLDER'], file_name)
	print 'original path:'
	print ogiginal_gif_path
	if not os.path.exists(ogiginal_gif_path):
		os.mkdir(ogiginal_gif_path)

	# Generate GIFs from the top scoring segments
	nr = 0
	top_k = min(topCount, count)
	result = []
	height = videos[file_name]['height']
	print height
	for segment in sorted(scores, key=lambda x: -scores.get(x))[0:count]:
		if nr >= top_k:
			break
		print segment[0] / float(fps)
		print segment[1] / float(fps)
		original_clip = video.subclip(segment[0] / float(fps), segment[1] / float(fps))
		clip = video.subclip(segment[0] / float(fps), segment[1] / float(fps))
		out_gif = "%s/%s_%.2d.gif" % (gif_path.decode('utf-8'), file_name.decode('utf-8'), nr)
		original_gif = "%s/%s_%.2d.mp4" % (ogiginal_gif_path.decode('utf-8'), file_name.decode('utf-8'), nr)
		gif_name = "%s_%.2d.gif" % (file_name, nr)
		## resize
		if height > 0:
			clip = clip.resize(height=height)
		else:
			clip = clip.resize(width=320)
		clip.write_gif(out_gif, fps=10, program="ImageMagick", opt="optimizeplus")
		original_clip.write_videofile(original_gif, fps=10, audio=False)
		result.append({"gif": gif_name, 'caption': segment[2]})
		nr += 1

	info['gif_caption'] = result
	print "处理带字幕的视频完成完成"
	try:
		with open(caption_path, 'w') as f:
			print '打开info,写入result'
			f.write(json.dumps(info))
	except:
		print "%s 依据字幕生成gif后,记录gif对应字幕失败" % file_name

	# 压缩原尺寸图片
	zip_path = os.path.join(config['ZIPED_GIF_FOLDER'], file_name + '.zip')
	cmd = "zip -rj " + zhuanyi(zip_path) + " " + zhuanyi(ogiginal_gif_path)
	print cmd
	os.system(cmd)

	print '准备转移视频'
	# 转移视频
	cmd1 = 'mv ' + zhuanyi(video_path) + " " + zhuanyi(processed_path)
	print cmd1
	os.system(cmd1)

	del videos[file_name]
	print("处理字幕视频用时: %.2fs" % (time.time() - start))


## helpers
def is_mp4(file):
    fileName, fileExtension = os.path.splitext(file.lower())
    print fileName
    print fileExtension
    print "is mp4"
    if fileExtension == '.mp4':
    	print "did is mp4"
        return True
    return False

def zhuanyi(original):
	chars = ['&','<','>','|','?','*','~','#',';','$','!']
	for char in chars:
		original = original.replace(char, '\\' + char)
	return original


## 接口
def get_file_status_info(fileName):
	op = "处理"
	status = "尚未处理"
	if videos.has_key(fileName):
		info = videos[fileName]
		if info.has_key('status'):
			status = info['status']
		if status == "生成字幕中" or status == "生成字幕成功" or status == "排队处理中（字幕）" or status == "处理中" or status == "处理中（字幕）":
			op = ""
		elif status == "排队处理中":
			op = "" ##TODO

	return status, op

def start_all_queues():
	print "初始化队列"
	start_nocaption_video_queue()
	start_get_audio_queue()
	start_upload_audio_queue()
	start_get_caption_loop()
	start_caption_video_queue()
	print "初始化队列完成"

def add_video_to_process(fileName, height, tags, caption, isChinese, duration):
	info = {}
	info['height'] = height
	info['tags'] = tags
	info['caption'] = caption
	info['is_chinese'] = isChinese
	info['duration'] = duration
	print "is chinese"
	print isChinese

	file_name=os.path.splitext(os.path.split(fileName)[1])[0]
	info['file_name'] = file_name
	video_path = os.path.join(config['UPLOAD_FOLDER'], fileName)
	print video_path

	gif_path = os.path.join(config['GIF_FOLDER'], file_name)
	processed_path = os.path.join(config['PROCESSED_FOLDER'], fileName)
	if caption == "true":
		info['status'] = "排队处理中（字幕）"
		audio_name = file_name + ".mp3"
		audio_path = os.path.join(config['BOTTLENECK'], audio_name)
		caption_name = file_name + ".txt"
		caption_path = os.path.join(config['BOTTLENECK'], caption_name)
		getAudioQueue.put((file_name, video_path, gif_path, audio_path, caption_path, processed_path))
	else:
		info['status'] = "排队处理中"
		content = {}
		content['file_name'] = file_name
		content['tags'] = tags

		info_file_name = file_name + '.txt'
		info_file_path = os.path.join(config['BOTTLENECK'], info_file_name)
		try:
			with open(info_file_path, 'w') as f:
				f.write(json.dumps(content))
		except:
			print "%s 写info失败" % file_name
			return
		noCaptionQueue.put((file_name, video_path, gif_path, info_file_path, processed_path))

	videos[file_name] = info
	print "添加的video"
	print file_name