-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathpostprocess.py
102 lines (82 loc) · 4.09 KB
/
postprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import numpy as np
import scipy as sp
MAYA_PHONEME_NAMES = ['Ah', 'Aa', 'Eh', 'Ee', 'Ih', 'Oh', 'Uh', 'U', 'Eu',
'Schwa', 'R', 'S', 'ShChZh', 'Th',
'JY', 'LNTD', 'GK', 'MBP', 'FV', 'W']
PHONEME_THRESHOLD = np.array([0.12, 0.23, 0.18, 0.02, 10, 0.19, 0.18, 0.05, 10, 0.16,
0.18, 0.29, 0.29, 0.27, 10, 10, 10, 0.004, 0.29, 0.16])
# PHONEME_THRESHOLD = np.array([0.35, 0.23, 0.18, 0.17, 10, 0.19, 0.18, 0.19, 10, 0.16,
# 0.18, 0.29, 0.29, 0.27, 10, 10, 10, 0.004, 0.29, 0.16]) # perfect
def smooth(x, window_len, window='hanning'):
if window_len < 3:
return x
if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
raise(ValueError, "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")
s = np.r_[x[window_len - 1:0:-1], x, x[-2:-window_len - 1:-1]]
if window == 'flat': # moving average
w = np.ones(window_len, 'd')
else:
w = eval('np.' + window + '(window_len)')
y = np.convolve(w / w.sum(), s, mode='valid')
return y
def postprocess_model_outputs(reg_output, cls_output):
""" Postproces raw outputs of the VisemeNet.
Args:
reg_output: Shape as (Num Frames, 22)
cls_output: Shape as (Num Frames, 22)
Return:
viseme_outputs: JALI based Lip blendshapes coefficients
"""
assert reg_output.shape == cls_output.shape
num_frames, num_maya_params = reg_output.shape
num_translate = 2
for i in range(num_translate, num_maya_params):
# Cls. output
cls_output[2:-3, i] = sp.signal.medfilt(cls_output[2:-3, i], kernel_size=[9])
cls_output[:, i] = smooth(cls_output[:, i], window_len=9)[4:-4]
# Reg. output
reg_output[:, i] = sp.signal.medfilt(reg_output[:, i], kernel_size=[9])
reg_output[:, i] = smooth(reg_output[:, i], window_len=9)[4:-4]
viseme_outputs = np.zeros_like(cls_output)
viseme_outputs[:, 0] = smooth(cls_output[:, 0], window_len=15)[7:-7]
viseme_outputs[:, 1] = smooth(cls_output[:, 1], window_len=15)[7:-7]
for i in range(num_translate, num_maya_params):
tmp = cls_output[:, i] * reg_output[:, i]
l_idx = tmp > PHONEME_THRESHOLD[i-2]
viseme_outputs[l_idx, i] = reg_output[l_idx, i]
viseme_outputs[:, i] = smooth(viseme_outputs[:, i], window_len=15)[7:-7]
r = 0
while r < viseme_outputs.shape[0]:
if viseme_outputs[r, i] > 0.1:
active_begin = r
for r2 in range(r, viseme_outputs.shape[0]):
if viseme_outputs[r2, i] < 0.1 or r2 == viseme_outputs.shape[0] - 1:
active_end = r2
r = r2
break
if (active_begin == active_end):
break
max_reg = np.max(reg_output[active_begin:active_end, i])
max_pred = np.max(viseme_outputs[active_begin:active_end, i])
rate = max_reg / max_pred
viseme_outputs[active_begin:active_end, i] = viseme_outputs[active_begin:active_end, i] * rate
r += 1
viseme_outputs[:, i] = smooth(viseme_outputs[:, i], 15)[7:-7]
r = 0
while r < viseme_outputs.shape[0]:
if viseme_outputs[r, i] > 0.1:
active_begin = r
for r2 in range(r, viseme_outputs.shape[0]):
if viseme_outputs[r2, i] < 0.1 or r2 == viseme_outputs.shape[0] - 1:
active_end = r2
r = r2
break
max_reg = np.max(reg_output[active_begin:active_end, i])
if(i==19 or i==20 or i==21):
if(max_reg>0.7):
max_reg = 1
max_pred = np.max(viseme_outputs[active_begin:active_end, i])
rate = max_reg / max_pred
viseme_outputs[active_begin:active_end, i] = viseme_outputs[active_begin:active_end, i] * rate
r += 1
return viseme_outputs