-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpreprocessing_script.py
131 lines (103 loc) · 4.82 KB
/
preprocessing_script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import glob, os
from tqdm import tqdm
import subprocess
import soundfile
import librosa
import pandas as pd
import argparse
SAMPLE_RATE = 16000
DURATION = 1.0
def ffmpeg_convert(input_audiofile, output_audiofile, sr=SAMPLE_RATE):
"""
Convert an audio file to a resampled audio file with the desired
sampling rate specified by `sr`.
Parameters
----------
input_audiofile : string
Path to the video or audio file to be resampled.
output_audiofile
Path for saving the resampled audio file. Should have .wav extension.
sr : int
The sampling rate to use for resampling (e.g. 16000, 44100, 48000).
Returns
-------
completed_process : subprocess.CompletedProcess
A process completion object. If completed_process.returncode is 0 it
means the process completed successfully. 1 means it failed.
"""
# fmpeg command
cmd = ["ffmpeg", "-i", input_audiofile, "-ac", "1", "-af", "aresample=resampler=soxr", "-ar", str(sr), "-y", output_audiofile]
completed_process = subprocess.run(cmd)
# confrim process completed successfully
assert completed_process.returncode == 0
# confirm new file has desired sample rate
assert soundfile.info(output_audiofile).samplerate == sr
def reformat(ipt_folder, opt_folder, sr=SAMPLE_RATE):
"""
convert full-length MP3 files into wav files.
Parameters
----------
ipt_folder : str
folder path for full-length podcast episodes in the original MP3 format
opt_folder : str
folder path for saving full-length podcast episodes in converted WAV format
sr : int, optional
The target sampling rate to use for resampling
"""
audiofiles = glob.glob(os.path.join(ipt_folder, "**/*.mp3"), recursive=True)
for audiofile in tqdm(audiofiles):
folderpath = os.path.join(opt_folder, audiofile.split("/")[-2])
os.makedirs(folderpath, exist_ok=True)
opt_audiofile = os.path.join(folderpath, audiofile.split("/")[-1].split(".mp3")[0] + ".wav")
ffmpeg_convert(audiofile, opt_audiofile, sr)
def generate_clip_wav(master_csvfile, full_folder, clip_folder, duration=DURATION):
"""
generate clips files from full podcast episodes for training
filler classifier
Args:
master_csvfile (str): master csv filepath
full_folder (str): folder path for full length podcast episode in converted wav format
clip_folder (str): folder path for event wav clips
duration_offset (float): amount of time to increase or decrease over the original one second clip
"""
event_df = pd.read_csv(master_csvfile)
for i, event in event_df.iterrows():
episode_subset = event["episode_split_subset"]
clip_subset = event["clip_split_subset"]
tar_folder = os.path.join(clip_folder, clip_subset)
os.makedirs(tar_folder, exist_ok=True)
src_filepath = os.path.join(
full_folder, episode_subset, event["podcast_filename"] + ".wav"
)
start_time = event["clip_start_inepisode"]
end_time = event["clip_end_inepisode"]
tar_filepath = os.path.join(tar_folder, event["clip_name"])
if os.path.exists(tar_filepath):
continue
# cut wav into clips based on filler metainfo
duration_offset = (duration - 1.0)/2.0
cut_cmd = ["ffmpeg", "-i", src_filepath, "-ss", str(start_time-duration_offset), "-to", str(end_time+duration_offset), tar_filepath]
completed_process = subprocess.run(cut_cmd)
actual_duration = librosa.get_duration(filename=tar_filepath)
assert actual_duration == duration
# confrim process completed successfully
assert completed_process.returncode == 0
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-dataset_path", required=True, type=str, help="root path for PodcastFillers dataset")
parser.add_argument("-stage", required=True, type=str, choices=["reformat", "cut"], help="preprocessing step for extracting wav clips")
args = parser.parse_args()
dataset_path = args.dataset_path
master_csvfile = os.path.join(dataset_path, "metadata", "PodcastFillers.csv")
full_mp3_folder = os.path.join(dataset_path, "audio", "episode_mp3")
full_wav_folder = os.path.join(dataset_path, "audio", "episode_wav_regenerate")
clip_folder = os.path.join(dataset_path, "audio", "clip_wav_regenerate")
# convert full-length MP3 into WAV
if args.stage == "reformat":
reformat(full_mp3_folder, full_wav_folder, sr=SAMPLE_RATE)
# generate clip wavs from full length WAV
elif args.stage == "cut":
# it's required to run "reformat" stage first
generate_clip_wav(master_csvfile, full_wav_folder, clip_folder, duration=DURATION)
else:
print("Unknown operation!")