-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathdiarization.sh
More file actions
executable file
·184 lines (137 loc) · 7.55 KB
/
diarization.sh
File metadata and controls
executable file
·184 lines (137 loc) · 7.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/bin/bash
set -o errexit
if [ $# -ne 2 ]; then
echo "Usage: ./diarization.sh <input wav> <output folder>"
exit
fi
#define the directory where the results will be saved
datadir=$2
# make output folder
mkdir -p $2
echo "$1 1 0 1000000000 U U U 1" > $datadir/show.uem.seg
if [ -z $LOCALCLASSPATH ]; then
#LOCALCLASSPATH=lib/LIUM_SpkDiarization-4.2.jar
LOCALCLASSPATH=lium_spkdiarization-8.4.1.jar
fi
#turn off tracing
trace="--trace"
#trace=""
#turn off help text
#help="--help"
help=""
#the MFCC file
features="$1 1 0 1000000000 U U U 1"
#the MFCC corresponds to sphinx 12 MFCC + Energy
# sphinx=the mfcc was computed by the sphinx tools
# 1: static coefficients are present in the file
# 1: energy coefficient is present in the file
# 0: delta coefficients are not present in the file
# 0: delta energy coefficient is not present in the file
# 0: delta delta coefficients are not present in the file
# 0: delta delta energy coefficient is not present in the file
# 13: total size of a feature vector in the mfcc file
# 0:0:0: no feature normalization
fInputDesc="sphinx,1:1:0:0:0:0,13,0:0:0"
fInputDesc="audio2sphinx,1:1:0:0:0:0,13,0:0:0"
#this variable is use in CLR/NCLR clustering and gender detection
#the MFCC corresponds to sphinx 12 MFCC + E
# sphinx=the mfcc is computed by sphinx tools
# 1: static coefficients are present in the file
# 3: energy coefficient is present in the file but will not be used
# 2: delta coefficients are not present in the file and will be computed on the fly
# 0: delta energy coefficient is not present in the file
# 0: delta delta coefficients are not present in the file
# 0: delta delta energy coefficient is not present in the file
# 13: size of a feature vector in the mfcc file
# 1:1:300:4: the MFCC are wrapped (feature warping using a sliding windows of 300 features),
# next the features are centered and reduced: mean and variance are computed by segment
fInputDescCLR="sphinx,1:3:2:0:0:0,13,1:1:300:4"
fInputDescCLR="audio2sphinx,1:3:2:0:0:0,13,1:1:300:4"
show="show"
#get the initial segmentation file
#uem="$2"
uem=$datadir/show.uem.seg
#set the java virtual machine program
java="java"
#if [ -n $JAVA_BIN ]; then
# java=$JAVA_BIN
#fi
#define where the UBM GMM is
ubm=models/ubm.gmm
#define where the speech / non-speech set of GMMs is
#pmsgmm=./model/sms.gmms
pmsgmm=models/sms.gmms
#define where the silence set of GMMs is
sgmm=models/s.gmms
#define where the gender and bandwidth set of GMMs (4 models) is
#(female studio, male studio, female telephone, male telephone)
ggmm=models/gender.gmms
echo "#####################################################"
echo "# $show"
echo "#####################################################"
iseg=./$datadir/$show.i.seg
pmsseg=./$datadir/$show.pms.seg
adjseg=./$datadir/$show.adj.h.seg
# Check the validity of the MFCC
$java -Xmx1024m -classpath $LOCALCLASSPATH fr.lium.spkDiarization.programs.MSegInit $trace $help \
--fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=$uem --sOutputMask=./$datadir/show.i.seg $show
# Speech / non-speech segmentation using a set of GMMs
$java -Xmx1024m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MDecode $trace $help \
--fInputDesc=audio2sphinx,1:3:2:0:0:0,13,0:0:0 --fInputMask=$features --sInputMask=$iseg \
--sOutputMask=$pmsseg --dPenality=500,500,10 --tInputMask=$pmsgmm $show
# GLR-based segmentation, make small segments
$java -Xmx1024m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MSeg $trace $help \
--kind=FULL --sMethod=GLR --fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=./$datadir/show.i.seg \
--sOutputMask=./$datadir/show.s.seg $show
# Linear clustering, fuse consecutive segments of the same speaker from the start to the end
$java -Xmx1024m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MClust $trace $help \
--fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=./$datadir/show.s.seg \
--sOutputMask=./$datadir/show.l.seg --cMethod=l --cThr=2.5 $show
# Hierarchical bottom-up BIC clustering
$java -Xmx1024m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MClust $trace $help \
--fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=./$datadir/show.l.seg \
--sOutputMask=./$datadir/show.h.seg --cMethod=h --cThr=6 $show
# Initialize one speaker GMM with 8 diagonal Gaussian components for each cluster
$java -Xmx1024m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MTrainInit $help $trace \
--nbComp=8 --kind=DIAG --fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=./$datadir/show.h.seg \
--tOutputMask=./$datadir/show.init.gmms $show
# EM computation for each GMM
$java -Xmx1024m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MTrainEM $help $trace \
--nbComp=8 --kind=DIAG --fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=./$datadir/show.h.seg \
--tOutputMask=./$datadir/show.gmms --tInputMask=./$datadir/show.init.gmms $show
# Viterbi decoding using the set of GMMs trained by EM
$java -Xmx1024m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MDecode $trace $help \
--fInputMask=${features} --fInputDesc=$fInputDesc --sInputMask=./$datadir/show.h.seg \
--sOutputMask=./$datadir/show.d.seg --dPenality=250 --tInputMask=$datadir/show.gmms $show
# Adjust segment boundaries near silence sections
$java -Xmx1024m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.tools.SAdjSeg $help $trace \
--fInputMask=$features --fInputDesc=audio2sphinx,1:1:0:0:0:0,13,0:0:0 --sInputMask=./$datadir/show.d.seg \
--sOutputMask=$adjseg $show
# Filter speaker segmentation according to speech / non-speech segmentation
flt1seg=./$datadir/$show.flt1.seg
$java -Xmx1024m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.tools.SFilter $help $trace \
--fInputDesc=audio2sphinx,1:3:2:0:0:0,13,0:0:0 --fInputMask=$features --fltSegMinLenSpeech=150 --fltSegMinLenSil=25 \
--sFilterClusterName=music --fltSegPadding=25 --sFilterMask=$pmsseg --sInputMask=$adjseg --sOutputMask=$flt1seg $show
flt2seg=./$datadir/$show.flt2.seg
$java -Xmx1024m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.tools.SFilter $help $trace \
--fInputDesc=audio2sphinx,1:3:2:0:0:0,13,0:0:0 --fInputMask=$features --fltSegMinLenSpeech=150 --fltSegMinLenSil=25 \
--sFilterClusterName=jingle --fltSegPadding=25 --sFilterMask=$pmsseg --sInputMask=$flt1seg --sOutputMask=$flt2seg $show
# Split segments longer than 20s (useful for transcription)
splseg=./$datadir/$show.spl.seg
$java -Xmx1024m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.tools.SSplitSeg $help \
--sFilterMask=$pmsseg --sFilterClusterName=iS,iT,j --sInputMask=$flt2seg --sSegMaxLen=2000 --sSegMaxLenModel=2000 \
--sOutputMask=$splseg --fInputMask=$features --fInputDesc=audio2sphinx,1:3:2:0:0:0,13,0:0:0 --tInputMask=$sgmm $show
#-------------------------------------------------------------------------------
# Set gender and bandwidth
gseg=./$datadir/$show.g.seg
$java -Xmx1024m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MScore $help \
--sGender --sByCluster --fInputDesc=audio2sphinx,1:3:2:0:0:0,13,1:1:0 --fInputMask=$features --sInputMask=$splseg \
--sOutputMask=$gseg --tInputMask=$ggmm $show
# NCLR clustering
# Features contain static and delta and are centered and reduced (--fInputDesc)
c=1.7
spkseg=./$datadir/$show.c.seg
$java -Xmx1024m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MClust $help $trace \
--fInputMask=$features --fInputDesc=$fInputDescCLR --sInputMask=$gseg \
--sOutputMask=./$datadir/show.seg --cMethod=ce --cThr=$c --tInputMask=$ubm \
--emCtrl=1,5,0.01 --sTop=5,$ubm --tOutputMask=./$datadir/$show.c.gmm $show