-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathviz_tool.py
146 lines (114 loc) · 4.12 KB
/
viz_tool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import numpy
import cv2
import itertools
import argparse
import pandas as pd
import multiprocessing
from moviepy.editor import AudioFileClip, VideoClip, CompositeAudioClip
from headbang import HeadbangBeatTracker
from headbang.util import load_wav, find_closest
def main():
parser = argparse.ArgumentParser(description="Vizualize the headbang beat tracker")
parser.add_argument("wav_in", type=str, help="wav file to process")
parser.add_argument("mp4_out", type=str, help="mp4 output path")
args = parser.parse_args()
pool = multiprocessing.Pool(multiprocessing.cpu_count() - 1)
hbt = HeadbangBeatTracker(pool)
audio = load_wav(args.wav_in)
# get beat locations
print("Getting beat locations using consensus beat tracking")
strong_beat_locations = hbt.beats(audio)
# get the inner multi-beat-tracker list from headbangbeattracker's consensusbeattracker object
individual_tracker_beat_locations = hbt.cbt.beat_results
colors = [
(255, 0, 0), # red
(0, 255, 0), # green
(0, 165, 255), # blue
(255, 0, 255), # magenta
(255, 255, 0), # yellow
(255, 69, 0), # orange
(0, 255, 255), # cyan
(145, 112, 235), # blue-violet
]
fps = 30
video_width = 1920
video_height = 1080
frame_duration = 1 / fps
total_duration = numpy.floor(float(audio.shape[0]) / 44100.0)
total_frames = int(numpy.ceil(total_duration / frame_duration))
times_vector = numpy.arange(0, total_duration, frame_duration)
all_beat_times = individual_tracker_beat_locations + [
hbt.beat_consensus,
strong_beat_locations,
]
all_beat_frames = [
numpy.concatenate(
(
numpy.zeros(
1,
),
find_closest(times_vector, beat_times),
numpy.ones(
1,
)
* (total_frames - 1),
)
).astype(numpy.int)
for beat_times in all_beat_times
]
off_beat_frames = [
((x[1:] + x[:-1]) / 2).astype(numpy.int) for x in all_beat_frames
]
all_positions = []
for i in range(len(all_beat_frames)):
x = (
numpy.empty(
total_frames,
)
* numpy.nan
)
x[all_beat_frames[i]] = 1
x[off_beat_frames[i]] = -1
a = pd.Series(x)
all_positions.append(a.interpolate().to_numpy())
blank_frame = numpy.zeros((video_height, video_width, 3), numpy.uint8)
box_width = int(video_width / 4)
box_edges_horiz = numpy.arange(0, video_width + 1, box_width)
box_centers_horiz = box_edges_horiz[:-1] + int(box_width / 2)
box_height = int(video_height / 2)
box_edges_vert = numpy.arange(0, video_height + 1, box_height)
box_centers_vert = box_edges_vert[:-1] + int(box_height / 2)
positions = list(itertools.product(box_centers_horiz, box_centers_vert))
frame_index = 0
def render_animations(*args, **kwargs):
nonlocal frame_index
video_frame = blank_frame.copy()
for i, beats in enumerate(all_beat_frames):
center = positions[i]
try:
interpolated_pos = all_positions[i][frame_index]
except IndexError:
interpolated_pos = 0
current_position = (
center[0],
int(center[1] + (box_height / 2 - 100) * interpolated_pos),
)
# draw some text, names of algorithms etc.
cv2.putText(
video_frame,
str(i),
current_position,
cv2.FONT_HERSHEY_SIMPLEX,
2.0,
colors[i],
3,
cv2.LINE_AA,
)
frame_index += 1
return video_frame
print("Processing video - rendering animations")
out_clip = VideoClip(make_frame=render_animations, duration=total_duration)
audio_clip = AudioFileClip(args.wav_in)
new_audioclip = CompositeAudioClip([audio_clip])
out_clip.audio = new_audioclip
out_clip.write_videofile(args.mp4_out, fps=fps)