Skip to content

Commit 59044e2

Browse files
committedApr 14, 2020
Tensorflow implementation of EnvNet-V2
0 parents  commit 59044e2

13 files changed

+2840
-0
lines changed
 

‎__pycache__/dataset.cpython-37.pyc

3.58 KB
Binary file not shown.

‎__pycache__/models.cpython-37.pyc

2.88 KB
Binary file not shown.

‎__pycache__/opts.cpython-37.pyc

2.83 KB
Binary file not shown.

‎__pycache__/training.cpython-37.pyc

4.84 KB
Binary file not shown.

‎__pycache__/utils.cpython-37.pyc

4.12 KB
Binary file not shown.

‎aug-fold-1-training.log

+2,001
Large diffs are not rendered by default.

‎aug-fold-2-training.log

+251
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
epoch,acc,loss,lr,val_acc,val_loss
2+
0,0.02625,4.364190301895142,0.01,3.75,-14.365
3+
1,0.019375,3.8057942676544187,0.01,2.0,-19.525
4+
2,0.045,3.5334793663024904,0.01,2.75,-10.5775
5+
3,0.03,3.402750825881958,0.01,2.25,-5.4975
6+
4,0.0425,3.341447534561157,0.01,5.0,-6.5825
7+
5,0.041875,3.2882890033721925,0.01,2.75,-2.6475
8+
6,0.051875,3.2502317333221438,0.01,5.0,-1.7825
9+
7,0.06,3.2152176570892332,0.01,2.5,-3.4275
10+
8,0.058125,3.1734259510040284,0.01,4.5,0.9675
11+
9,0.074375,3.170181379318237,0.01,2.25,-9.5425
12+
10,0.07875,3.1162731647491455,0.01,5.5,-2.0025
13+
11,0.05625,3.347873497009277,0.1,3.5000000000000004,-16.805
14+
12,0.04625,3.321718235015869,0.1,4.75,-18.57
15+
13,0.06375,3.260312385559082,0.1,5.25,2.945
16+
14,0.063125,3.1655212020874024,0.1,5.75,7.31
17+
15,0.07875,3.1224004936218264,0.1,7.249999999999999,-3.8125
18+
16,0.086875,3.0920931339263915,0.1,8.5,4.7375
19+
17,0.095,3.0571328449249267,0.1,9.5,-3.4475
20+
18,0.1075,2.990420503616333,0.1,11.25,1.895
21+
19,0.11625,2.9558618545532225,0.1,11.25,-1.31
22+
20,0.14,2.9303871250152587,0.1,13.750000000000002,-3.3125
23+
21,0.151875,2.8846695709228514,0.1,8.0,5.1075
24+
22,0.1425,2.856938695907593,0.1,11.25,-4.8275
25+
23,0.178125,2.8150680446624756,0.1,11.25,-6.595
26+
24,0.168125,2.835599546432495,0.1,16.5,-2.785
27+
25,0.175,2.7768293476104735,0.1,16.0,2.955
28+
26,0.191875,2.7219950771331787,0.1,20.5,-4.14
29+
27,0.1975,2.693011360168457,0.1,21.75,-2.1175
30+
28,0.215,2.697782726287842,0.1,22.25,2.545
31+
29,0.209375,2.658558101654053,0.1,26.75,0.6375
32+
30,0.215625,2.639135580062866,0.1,18.75,-1.985
33+
31,0.206875,2.67412109375,0.1,11.75,-8.7575
34+
32,0.228125,2.6060924530029297,0.1,22.75,-4.0725
35+
33,0.25,2.5560087966918945,0.1,30.25,1.08
36+
34,0.268125,2.5199835205078127,0.1,25.0,-1.325
37+
35,0.28625,2.4759314823150635,0.1,30.25,2.0475
38+
36,0.26375,2.5218370246887205,0.1,35.0,-1.1125
39+
37,0.2825,2.4893642711639403,0.1,31.5,1.065
40+
38,0.283125,2.4635579586029053,0.1,41.5,1.24
41+
39,0.28125,2.4503310680389405,0.1,34.75,-0.4975
42+
40,0.295,2.4193574142456056,0.1,37.5,-1.255
43+
41,0.328125,2.359979496002197,0.1,48.0,1.0725
44+
42,0.3025,2.4025496864318847,0.1,32.5,0.0425
45+
43,0.33625,2.3367537307739257,0.1,45.0,-0.42
46+
44,0.32125,2.3890170288085937,0.1,43.75,1.0325
47+
45,0.345,2.338145399093628,0.1,45.75,2.185
48+
46,0.331875,2.372704477310181,0.1,47.5,-1.47
49+
47,0.346875,2.2720289993286134,0.1,35.75,2.49
50+
48,0.343125,2.335392503738403,0.1,40.5,-0.07
51+
49,0.360625,2.2747845315933226,0.1,44.75,-0.025
52+
50,0.37375,2.23638192653656,0.1,38.5,-5.795
53+
51,0.369375,2.2347133445739744,0.1,44.75,3.55
54+
52,0.350625,2.23949875831604,0.1,38.0,-0.04
55+
53,0.37375,2.237742404937744,0.1,39.25,-2.335
56+
54,0.3575,2.260177659988403,0.1,43.0,1.955
57+
55,0.36625,2.2538868141174317,0.1,44.75,-0.76
58+
56,0.366875,2.234534044265747,0.1,50.5,-0.6725
59+
57,0.366875,2.198842668533325,0.1,52.5,1.3125
60+
58,0.396875,2.166809391975403,0.1,48.75,-0.7075
61+
59,0.4075,2.1453133821487427,0.1,46.75,1.0775
62+
60,0.3775,2.166514267921448,0.1,48.25,-0.605
63+
61,0.390625,2.1388467025756834,0.1,45.0,0.9025
64+
62,0.410625,2.0651877784729002,0.1,60.0,1.58
65+
63,0.42,2.0550661420822145,0.1,53.25,0.955
66+
64,0.418125,2.1030141830444338,0.1,45.25,3.5725
67+
65,0.421875,2.082075037956238,0.1,46.75,1.295
68+
66,0.424375,2.1001365518569948,0.1,55.25,1.19
69+
67,0.4075,2.0760802602767945,0.1,41.0,-2.4825
70+
68,0.4475,2.0292210388183594,0.1,59.0,1.835
71+
69,0.42,2.024934573173523,0.1,47.25,-1.6825
72+
70,0.444375,1.9973055744171142,0.1,57.75,1.9475
73+
71,0.419375,2.0591587018966675,0.1,54.0,2.8475
74+
72,0.443125,2.020278615951538,0.1,56.25,1.7975
75+
73,0.44875,1.9624472618103028,0.1,60.75000000000001,0.4775
76+
74,0.448125,2.0197728490829467,0.1,64.75,0.22
77+
75,0.434375,2.0080120754241944,0.1,63.74999999999999,1.155
78+
76,0.473125,1.9557863426208497,0.1,58.75,1.2925
79+
77,0.460625,1.9723664379119874,0.1,56.99999999999999,1.03
80+
78,0.44,1.9687378406524658,0.1,62.0,1.2075
81+
79,0.461875,1.930267457962036,0.1,63.74999999999999,1.0875
82+
80,0.45875,1.9535737323760987,0.1,49.25,-0.365
83+
81,0.461875,1.9347326517105103,0.1,63.74999999999999,0.5775
84+
82,0.465,1.945738344192505,0.1,58.75,1.2975
85+
83,0.468125,1.9488419532775878,0.1,64.0,0.435
86+
84,0.485625,1.9235300159454345,0.1,59.5,0.54
87+
85,0.4775,1.9150684881210327,0.1,59.75,3.4
88+
86,0.480625,1.885982494354248,0.1,66.25,1.83
89+
87,0.476875,1.890736665725708,0.1,61.5,0.0725
90+
88,0.4825,1.8848549795150757,0.1,62.0,-0.15
91+
89,0.47875,1.8957869052886962,0.1,62.74999999999999,0.2875
92+
90,0.485625,1.8895482540130615,0.1,57.49999999999999,0.15
93+
91,0.515625,1.8061808395385741,0.1,65.5,0.6225
94+
92,0.490625,1.8378184127807617,0.1,67.0,1.04
95+
93,0.519375,1.8322415590286254,0.1,60.25,1.6525
96+
94,0.50375,1.8292215967178345,0.1,67.25,1.72
97+
95,0.515,1.810122208595276,0.1,66.0,-0.0475
98+
96,0.498125,1.8442325830459594,0.1,62.0,0.565
99+
97,0.5125,1.81817777633667,0.1,60.75000000000001,1.3675
100+
98,0.49875,1.8474519538879395,0.1,65.0,-0.12
101+
99,0.50125,1.8302334928512574,0.1,67.5,0.2375
102+
100,0.5225,1.7856882429122924,0.1,62.0,1.1175
103+
101,0.528125,1.7906427383422852,0.1,59.0,-1.8225
104+
102,0.515,1.78394868850708,0.1,68.5,0.8325
105+
103,0.529375,1.8193429899215698,0.1,65.25,1.9925
106+
104,0.503125,1.8336401891708374,0.1,68.25,1.6575
107+
105,0.520625,1.7816782951354981,0.1,69.25,1.0875
108+
106,0.545,1.748461365699768,0.1,73.5,1.02
109+
107,0.528125,1.7566093254089354,0.1,69.0,1.12
110+
108,0.550625,1.713003478050232,0.1,67.25,0.305
111+
109,0.54625,1.714949312210083,0.1,68.5,1.325
112+
110,0.54125,1.7462983798980714,0.1,62.74999999999999,1.445
113+
111,0.52625,1.770948896408081,0.1,64.75,0.9075
114+
112,0.53375,1.7509346532821655,0.1,70.75,0.155
115+
113,0.53125,1.737281756401062,0.1,68.5,0.81
116+
114,0.5475,1.7143649244308472,0.1,66.25,1.5975
117+
115,0.54125,1.7084273672103882,0.1,69.25,1.275
118+
116,0.51625,1.7679480075836183,0.1,72.25,1.6725
119+
117,0.56,1.6622666597366333,0.1,72.25,0.4725
120+
118,0.54,1.7429784870147704,0.1,73.75,0.1875
121+
119,0.548125,1.6692375135421753,0.1,68.75,2.0125
122+
120,0.571875,1.6558413362503053,0.1,65.5,1.84
123+
121,0.5625,1.6640590620040894,0.1,70.5,1.0575
124+
122,0.545,1.7142630863189696,0.1,74.0,0.6025
125+
123,0.551875,1.6940502977371217,0.1,64.75,1.595
126+
124,0.5575,1.6804782915115357,0.1,63.24999999999999,1.6075
127+
125,0.549375,1.6725295448303223,0.1,68.25,0.9825
128+
126,0.526875,1.7000903701782226,0.1,68.75,0.23
129+
127,0.56,1.6593765497207642,0.1,60.25,1.0225
130+
128,0.560625,1.65841814994812,0.1,63.0,1.1575
131+
129,0.569375,1.6534293365478516,0.1,59.0,-0.43
132+
130,0.56625,1.6677143955230713,0.1,62.25000000000001,0.12
133+
131,0.570625,1.65065514087677,0.1,69.25,1.4825
134+
132,0.575625,1.6319794940948487,0.1,69.25,1.1675
135+
133,0.571875,1.6122065925598144,0.1,69.0,0.8925
136+
134,0.555625,1.6656177854537964,0.1,68.25,0.59
137+
135,0.5875,1.5952541542053222,0.1,66.0,1.345
138+
136,0.590625,1.6259658241271973,0.1,72.75,0.845
139+
137,0.58875,1.636475682258606,0.1,70.5,0.255
140+
138,0.5975,1.5679073905944825,0.1,68.75,1.21
141+
139,0.568125,1.6453906393051148,0.1,70.0,1.32
142+
140,0.555625,1.6275051546096801,0.1,66.75,1.4
143+
141,0.57375,1.6199491977691651,0.1,65.25,0.785
144+
142,0.584375,1.5847906589508056,0.1,70.75,-1.6875
145+
143,0.58625,1.6070477628707887,0.1,75.0,1.1425
146+
144,0.57625,1.6056494426727295,0.1,70.5,0.2825
147+
145,0.59125,1.5741839027404785,0.1,69.0,1.08
148+
146,0.584375,1.573354082107544,0.1,70.25,-0.8875
149+
147,0.579375,1.6006696128845215,0.1,71.0,0.835
150+
148,0.58,1.5997914028167726,0.1,72.5,0.8275
151+
149,0.585,1.5993724966049194,0.1,70.75,0.12
152+
150,0.573125,1.600631957054138,0.1,73.75,-0.435
153+
151,0.58625,1.575380530357361,0.1,66.75,0.8425
154+
152,0.59625,1.5427978658676147,0.1,74.0,0.6425
155+
153,0.61125,1.5141425848007202,0.1,70.75,0.39
156+
154,0.595625,1.554921679496765,0.1,72.25,0.675
157+
155,0.58875,1.5786282634735107,0.1,74.0,-0.0875
158+
156,0.580625,1.60705894947052,0.1,73.0,0.485
159+
157,0.61,1.5018576717376708,0.1,73.75,2.48
160+
158,0.60125,1.563011598587036,0.1,72.25,1.31
161+
159,0.593125,1.5781792354583741,0.1,63.74999999999999,1.115
162+
160,0.605,1.5102868938446046,0.1,75.75,0.16
163+
161,0.600625,1.5380147123336791,0.1,66.25,0.96
164+
162,0.595,1.5284412288665772,0.1,70.0,0.21
165+
163,0.6,1.5297650051116944,0.1,73.75,0.2125
166+
164,0.6025,1.5288518142700196,0.1,67.75,1.3325
167+
165,0.589375,1.518393030166626,0.1,70.5,0.0775
168+
166,0.596875,1.5591082429885865,0.1,73.25,0.1025
169+
167,0.595,1.5433367490768433,0.1,73.25,0.5525
170+
168,0.608125,1.5544326162338258,0.1,75.0,0.925
171+
169,0.61125,1.4983487462997436,0.1,76.0,1.255
172+
170,0.615,1.492077612876892,0.1,74.25,1.135
173+
171,0.6125,1.483194351196289,0.1,76.5,0.2425
174+
172,0.621875,1.5039149808883667,0.1,72.5,0.9575
175+
173,0.605625,1.509285626411438,0.1,74.75,0.585
176+
174,0.61875,1.484777307510376,0.1,70.75,-0.5475
177+
175,0.615625,1.4738255691528321,0.1,72.0,-0.225
178+
176,0.606875,1.5206553554534912,0.1,74.75,1.295
179+
177,0.613125,1.527477684020996,0.1,75.25,0.975
180+
178,0.62875,1.4848559617996215,0.1,72.75,1.44
181+
179,0.61375,1.4811051750183106,0.1,70.5,1.3525
182+
180,0.624375,1.428681445121765,0.1,76.75,0.7625
183+
181,0.618125,1.4879146862030028,0.1,76.0,0.2325
184+
182,0.638125,1.4628437995910644,0.1,68.0,-0.85
185+
183,0.615,1.4981183862686158,0.1,75.25,-0.4575
186+
184,0.639375,1.46513774394989,0.1,64.5,-1.4575
187+
185,0.631875,1.4421774005889894,0.1,74.25,0.3775
188+
186,0.6325,1.4636979913711547,0.1,77.25,0.5525
189+
187,0.625,1.4698228025436402,0.1,76.5,0.8775
190+
188,0.6175,1.4855970811843873,0.1,72.25,0.1175
191+
189,0.618125,1.5001065731048584,0.1,72.0,-0.695
192+
190,0.61,1.457563223838806,0.1,75.0,-0.2625
193+
191,0.629375,1.474877152442932,0.1,76.5,0.735
194+
192,0.630625,1.44171142578125,0.1,68.75,1.325
195+
193,0.615,1.4470804691314698,0.1,74.25,-0.2175
196+
194,0.6175,1.4602191638946533,0.1,71.75,-0.4975
197+
195,0.64125,1.464056167602539,0.1,73.75,-0.0475
198+
196,0.610625,1.4594062852859497,0.1,75.75,1.135
199+
197,0.6325,1.4404124593734742,0.1,74.5,0.58
200+
198,0.64375,1.4104800462722777,0.1,77.0,0.365
201+
199,0.61625,1.4931021928787231,0.1,73.0,0.875
202+
200,0.636875,1.4372695446014405,0.1,77.0,1.065
203+
201,0.638125,1.4314981698989868,0.1,72.75,-0.105
204+
202,0.628125,1.4575612878799438,0.1,75.25,1.16
205+
203,0.636875,1.4191424703598023,0.1,76.5,-0.25
206+
204,0.649375,1.391803684234619,0.1,76.25,0.6125
207+
205,0.625,1.4518143367767333,0.1,75.5,0.02
208+
206,0.670625,1.3947421789169312,0.1,74.5,1.305
209+
207,0.633125,1.4279969978332518,0.1,78.25,1.225
210+
208,0.658125,1.3850564098358153,0.1,78.0,-0.1875
211+
209,0.62375,1.4357863187789917,0.1,74.25,-0.215
212+
210,0.639375,1.4034555101394652,0.1,70.25,0.595
213+
211,0.630625,1.440563669204712,0.1,75.25,0.6075
214+
212,0.6425,1.4011264610290528,0.1,67.75,-0.1275
215+
213,0.648125,1.3843915224075318,0.1,71.75,0.3175
216+
214,0.65125,1.3846601343154907,0.1,74.0,-0.0675
217+
215,0.63875,1.3937037324905395,0.1,79.5,0.4725
218+
216,0.64,1.3968168210983276,0.1,77.75,1.3575
219+
217,0.64625,1.418349962234497,0.1,71.5,1.3675
220+
218,0.654375,1.385741081237793,0.1,75.75,0.125
221+
219,0.6425,1.3702531194686889,0.1,74.25,-0.5725
222+
220,0.650625,1.4011496353149413,0.1,76.0,-0.6675
223+
221,0.6375,1.3519666194915771,0.1,76.25,0.31
224+
222,0.64375,1.386059980392456,0.1,79.25,1.2075
225+
223,0.653125,1.401152286529541,0.1,75.75,0.4425
226+
224,0.659375,1.4069624137878418,0.1,78.0,0.2225
227+
225,0.649375,1.3809283542633057,0.1,75.5,0.6175
228+
226,0.665,1.3553387069702147,0.1,76.25,1.1825
229+
227,0.65125,1.3894909715652466,0.1,74.75,0.56
230+
228,0.655625,1.3804780054092407,0.1,74.75,-0.0175
231+
229,0.669375,1.3629000473022461,0.1,76.5,1.13
232+
230,0.61625,1.4367037057876586,0.1,73.5,0.8775
233+
231,0.655625,1.3824931383132935,0.1,76.75,0.285
234+
232,0.660625,1.3272888803482055,0.1,75.75,-0.3175
235+
233,0.66625,1.3430774545669555,0.1,78.75,1.2725
236+
234,0.65375,1.3274751663208009,0.1,77.0,1.2025
237+
235,0.64625,1.3825994873046874,0.1,77.25,0.0225
238+
236,0.64625,1.3729515075683594,0.1,75.25,0.2325
239+
237,0.674375,1.3624279642105102,0.1,76.0,1.0475
240+
238,0.653125,1.3579264783859253,0.1,74.75,1.6625
241+
239,0.666875,1.3289760446548462,0.1,77.25,0.8275
242+
240,0.6725,1.3199524784088135,0.1,77.5,1.055
243+
241,0.666875,1.3675604343414307,0.1,74.5,0.0875
244+
242,0.67875,1.3353768157958985,0.1,79.0,0.2475
245+
243,0.64875,1.3520609712600709,0.1,77.25,0.115
246+
244,0.66,1.3676192808151244,0.1,81.0,-0.32
247+
245,0.66375,1.3348275232315063,0.1,73.5,-0.37
248+
246,0.671875,1.3160452985763549,0.1,76.25,0.37
249+
247,0.685625,1.3256859731674195,0.1,78.25,0.615
250+
248,0.66625,1.3432723140716554,0.1,72.75,1.895
251+
249,0.658125,1.3149521493911742,0.1,72.0,0.5325

‎dataset.py

+139
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
import os;
2+
import numpy as np;
3+
import random;
4+
import utils as U;
5+
from tensorflow import keras;
6+
7+
class Generator(keras.utils.Sequence):
8+
'Generates data for Keras'
9+
def __init__(self, samples, labels, options, train=True):
10+
'Initialization'
11+
self.data = [(samples[i], labels[i]) for i in range (0, len(samples))];
12+
self.opt = options;
13+
self.train = train;
14+
self.batch_size = options.batchSize if train else options.batchSize // options.nCrops;
15+
self.mix = (options.BC and train);
16+
self.preprocess_funcs = self.preprocess_setup();
17+
#self.__getitem__(3);
18+
19+
def __len__(self):
20+
'Denotes the number of batches per epoch'
21+
return int(np.floor(len(self.data) / self.batch_size));
22+
#return len(self.samples);
23+
24+
def __getitem__(self, batchIndex):
25+
'Generate one batch of data'
26+
batchX, batchY = self.generate_batch(batchIndex);
27+
batchX = np.expand_dims(batchX, axis=1)
28+
batchX = np.expand_dims(batchX, axis=3)
29+
#print(batchX.shape);
30+
#exit();
31+
return batchX, batchY
32+
33+
def generate_batch(self, batchIndex):
34+
'Generates data containing batch_size samples'
35+
sounds = [];
36+
labels = [];
37+
indexes = None;
38+
for i in range(self.batch_size):
39+
# Generate indexes of the batch
40+
if self.mix: # Training phase of BC learning
41+
42+
# Select two training examples
43+
while True:
44+
sound1, label1 = self.data[random.randint(0, len(self.data) - 1)]
45+
sound2, label2 = self.data[random.randint(0, len(self.data) - 1)]
46+
if label1 != label2:
47+
break
48+
sound1 = self.preprocess(sound1)
49+
sound2 = self.preprocess(sound2)
50+
51+
# Mix two examples
52+
r = np.array(random.random())
53+
sound = U.mix(sound1, sound2, r, self.opt.fs).astype(np.float32)
54+
eye = np.eye(self.opt.nClasses)
55+
label = (eye[label1] * r + eye[label2] * (1 - r)).astype(np.float32)
56+
57+
else: # Training phase of standard learning or testing phase
58+
#print(batchIndex);
59+
if indexes == None:
60+
indexes = self.data[batchIndex*self.batch_size:(batchIndex+1)*self.batch_size];
61+
else:
62+
if i >= len(indexes):
63+
break;
64+
65+
sound, target = indexes[i];
66+
sound = self.preprocess(sound).astype(np.float32)
67+
#label = (np.eye(self.opt.nClasses))[int(label)]
68+
label = np.zeros((self.opt.nCrops, self.opt.nClasses));
69+
label[:,target] = 1;
70+
71+
if self.train and self.opt.strongAugment:
72+
sound = U.random_gain(6)(sound).astype(np.float32)
73+
74+
sounds.append(sound);
75+
labels.append(label);
76+
77+
sounds = np.asarray(sounds);
78+
labels = np.asarray(labels);
79+
if not self.train:
80+
sounds = sounds.reshape(sounds.shape[0]*sounds.shape[1], sounds.shape[2]);
81+
labels = labels.reshape(labels.shape[0]*labels.shape[1], labels.shape[2]);
82+
83+
return sounds, labels;
84+
85+
'''
86+
def on_epoch_end(self):
87+
'Updates indexes after each epoch'
88+
self.indexes = np.arange(len(self.list_IDs))
89+
if self.shuffle == True:
90+
np.random.shuffle(self.indexes)
91+
'''
92+
93+
94+
def preprocess_setup(self):
95+
if self.train:
96+
funcs = []
97+
if self.opt.strongAugment:
98+
funcs += [U.random_scale(1.25)]
99+
100+
funcs += [U.padding(self.opt.inputLength // 2),
101+
U.random_crop(self.opt.inputLength),
102+
U.normalize(32768.0),
103+
]
104+
else:
105+
funcs = [U.padding(self.opt.inputLength // 2),
106+
U.normalize(32768.0),
107+
U.multi_crop(self.opt.inputLength, self.opt.nCrops),
108+
]
109+
return funcs
110+
111+
def preprocess(self, sound):
112+
for f in self.preprocess_funcs:
113+
sound = f(sound)
114+
115+
return sound;
116+
117+
118+
def setup(opt, split):
119+
dataset = np.load(os.path.join(opt.data, opt.dataset, 'wav{}.npz'.format(opt.fs // 1000)))
120+
# Split to train and val
121+
train_sounds = []
122+
train_labels = []
123+
val_sounds = []
124+
val_labels = []
125+
for i in range(1, opt.nFolds + 1):
126+
sounds = dataset['fold{}'.format(i)].item()['sounds']
127+
labels = dataset['fold{}'.format(i)].item()['labels']
128+
if i == split:
129+
val_sounds.extend(sounds)
130+
val_labels.extend(labels)
131+
else:
132+
train_sounds.extend(sounds)
133+
train_labels.extend(labels)
134+
135+
# Iterator setup
136+
train_data = Generator(train_sounds, train_labels, opt, train=True)
137+
val_data = Generator(val_sounds, val_labels, opt, train=False)
138+
139+
return train_data, val_data

‎main.py

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import opts;
2+
from training import Trainer;
3+
#from keras.utils import plot_model
4+
5+
def Main():
6+
opt = opts.parse();
7+
for split in opt.splits:
8+
print('+-- Split {} --+'.format(split));
9+
trainer = Trainer(opt, split);
10+
trainer.Train();
11+
#break;
12+
13+
if __name__ == '__main__':
14+
Main()

‎models.py

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
from tensorflow import keras;
2+
from tensorflow.keras.models import Model;
3+
import tensorflow.keras.layers as L
4+
5+
class EnvNet2:
6+
def __init__(self, input_length, n_class):
7+
self.input_length = input_length;
8+
self.conv1 = ConvBNReLU(32, (1,64), (1,2));
9+
self.conv2 = ConvBNReLU(64, (1,16), (1,2));
10+
self.conv3 = ConvBNReLU(32, (8,8));
11+
self.conv4 = ConvBNReLU(32, (8,8));
12+
self.conv5 = ConvBNReLU(64, (1,4));
13+
self.conv6 = ConvBNReLU(64, (1,4));
14+
self.conv7 = ConvBNReLU(128, (1,2));
15+
self.conv8 = ConvBNReLU(128, (1,2));
16+
self.conv9 = ConvBNReLU(256, (1,2));
17+
self.conv10 = ConvBNReLU(256, (1,2));
18+
self.fc1 = FCDN(4096);
19+
self.fc2 = FCDN(4096);
20+
self.output = FCDN(n_class, 'softmax', 0);
21+
22+
def createModel(self):
23+
#batch, rows, columns, channels
24+
input = L.Input(shape=(1, self.input_length, 1));
25+
hl = self.conv1(input);
26+
#print(keras.backend.int_shape(hl));
27+
28+
hl = self.conv2(hl);
29+
hl = L.MaxPooling2D(pool_size=(1,64), strides=(1,64))(hl);
30+
31+
#swapaxes
32+
#hl = L.Reshape((64, 260, 1))(hl)
33+
hl = L.Permute((3, 2, 1))(hl)
34+
35+
hl = self.conv3(hl);
36+
hl = self.conv4(hl);
37+
hl = L.MaxPooling2D(pool_size=(5,3), strides=(5,3))(hl)
38+
39+
hl = self.conv5(hl);
40+
hl = self.conv6(hl);
41+
hl = L.MaxPooling2D(pool_size=(1,2), strides=(1,2))(hl)
42+
43+
hl = self.conv7(hl);
44+
hl = self.conv8(hl);
45+
hl = L.MaxPooling2D(pool_size=(1,2), strides=(1,2))(hl)
46+
47+
hl = self.conv9(hl);
48+
hl = self.conv10(hl);
49+
hl = L.MaxPooling2D(pool_size=(1,2), strides=(1,2))(hl)
50+
51+
hl = L.Flatten()(hl);
52+
53+
hl = self.fc1(hl);
54+
hl = self.fc2(hl);
55+
ol = self.output(hl);
56+
model = Model(inputs=input, outputs=ol);
57+
return model;
58+
59+
class ConvBNReLU:
60+
def __init__(self, filters, kernel_size, strides=(1,1), padding='valid', initial_w=keras.initializers.he_normal(), use_bias=False):
61+
self.conv = L.Conv2D(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, kernel_initializer=initial_w, use_bias=use_bias);
62+
63+
def __call__(self, x):
64+
layer = self.conv(x);
65+
layer = L.BatchNormalization()(layer);
66+
layer = L.Activation('relu')(layer);
67+
return layer;
68+
69+
class FCDN:
70+
def __init__(self, units=50, activation='relu', dropout=0.5, initial_w=keras.initializers.lecun_normal()):
71+
self.fcn = L.Dense(units, kernel_initializer=initial_w);
72+
self.activation = L.Activation(activation);
73+
self.dropout = L.Dropout(rate=dropout) if dropout > 0 else None;
74+
75+
def __call__(self, x):
76+
fc = self.fcn(x);
77+
fc = self.activation(fc);
78+
fc = self.dropout(fc) if self.dropout is not None else fc;
79+
return fc;

‎opts.py

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import os
2+
import argparse
3+
4+
5+
def parse():
6+
parser = argparse.ArgumentParser(description='BC learning for sounds')
7+
8+
# General settings
9+
parser.add_argument('--dataset', required=False, default='esc50', choices=['esc10', 'esc50', 'urbansound8k'])
10+
parser.add_argument('--netType', required=False, default='envnetv2', choices=['envnet', 'envnetv2'])
11+
parser.add_argument('--data', default='/home/mohaimen/Desktop/EXPERIMENTS/datasets/', required=False, help='Path to dataset')
12+
parser.add_argument('--split', type=int, default=-1, help='esc: 1-5, urbansound: 1-10 (-1: run on all splits)')
13+
parser.add_argument('--save', default='None', help='Directory to save the results')
14+
parser.add_argument('--testOnly', action='store_true')
15+
parser.add_argument('--gpu', type=int, default=0)
16+
17+
# Learning settings (default settings are defined below)
18+
parser.add_argument('--BC', default=True, action='store_true', help='BC learning')
19+
parser.add_argument('--strongAugment', default=True, action='store_true', help='Add scale and gain augmentation')
20+
parser.add_argument('--nEpochs', type=int, default=-1)
21+
parser.add_argument('--LR', type=float, default=-1, help='Initial learning rate')
22+
parser.add_argument('--schedule', type=float, nargs='*', default=-1, help='When to divide the LR')
23+
parser.add_argument('--warmup', type=int, default=-1, help='Number of epochs to warm up')
24+
parser.add_argument('--batchSize', type=int, default=64)
25+
parser.add_argument('--weightDecay', type=float, default=5e-4)
26+
parser.add_argument('--momentum', type=float, default=0.9)
27+
28+
# Testing settings
29+
parser.add_argument('--nCrops', type=int, default=10)
30+
31+
opt = parser.parse_args()
32+
33+
# Dataset details
34+
if opt.dataset == 'esc50':
35+
opt.nClasses = 50
36+
opt.nFolds = 5
37+
elif opt.dataset == 'esc10':
38+
opt.nClasses = 10
39+
opt.nFolds = 5
40+
else: # urbansound8k
41+
opt.nClasses = 10
42+
opt.nFolds = 10
43+
44+
if opt.split == -1:
45+
opt.splits = range(1, opt.nFolds + 1)
46+
else:
47+
opt.splits = [opt.split]
48+
49+
# Model details
50+
if opt.netType == 'envnet':
51+
opt.fs = 16000
52+
opt.inputLength = 24014
53+
else: # envnetv2
54+
opt.fs = 44100
55+
opt.inputLength = 66650
56+
57+
# Default settings (nEpochs will be doubled if opt.BC)
58+
default_settings = dict()
59+
default_settings['esc50'] = {
60+
'envnet': {'nEpochs': 600, 'LR': 0.01, 'schedule': [0.5, 0.75], 'warmup': 0},
61+
'envnetv2': {'nEpochs': 1000, 'LR': 0.1, 'schedule': [0.3, 0.6, 0.9], 'warmup': 10}
62+
}
63+
default_settings['esc10'] = {
64+
'envnet': {'nEpochs': 600, 'LR': 0.01, 'schedule': [0.5, 0.75], 'warmup': 0},
65+
'envnetv2': {'nEpochs': 600, 'LR': 0.01, 'schedule': [0.5, 0.75], 'warmup': 0}
66+
}
67+
default_settings['urbansound8k'] = {
68+
'envnet': {'nEpochs': 400, 'LR': 0.01, 'schedule': [0.5, 0.75], 'warmup': 0},
69+
'envnetv2': {'nEpochs': 600, 'LR': 0.1, 'schedule': [0.3, 0.6, 0.9], 'warmup': 10}
70+
}
71+
for key in ['nEpochs', 'LR', 'schedule', 'warmup']:
72+
if eval('opt.{}'.format(key)) == -1:
73+
setattr(opt, key, default_settings[opt.dataset][opt.netType][key])
74+
if key == 'nEpochs' and opt.BC:
75+
opt.nEpochs *= 2
76+
77+
78+
if opt.save != 'None' and not os.path.isdir(opt.save):
79+
os.makedirs(opt.save)
80+
81+
display_info(opt)
82+
83+
return opt
84+
85+
86+
def display_info(opt):
87+
if opt.BC:
88+
learning = 'BC'
89+
else:
90+
learning = 'standard'
91+
92+
print('+------------------------------+')
93+
print('| Sound classification')
94+
print('+------------------------------+')
95+
print('| dataset : {}'.format(opt.dataset))
96+
print('| netType : {}'.format(opt.netType))
97+
print('| learning : {}'.format(learning))
98+
print('| augment : {}'.format(opt.strongAugment))
99+
print('| nEpochs : {}'.format(opt.nEpochs))
100+
print('| LRInit : {}'.format(opt.LR))
101+
print('| schedule : {}'.format(opt.schedule))
102+
print('| warmup : {}'.format(opt.warmup))
103+
print('| batchSize: {}'.format(opt.batchSize))
104+
print('+------------------------------+')

‎training.py

+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import sys;
2+
import os;
3+
import utils as U;
4+
from tensorflow import keras;
5+
import models;
6+
import dataset;
7+
import math;
8+
import numpy as np;
9+
import time;
10+
#from keras.utils import plot_model
11+
12+
class Trainer:
13+
def __init__(self, opt=None, split=0):
14+
self.opt = opt;
15+
self.split = split;
16+
17+
def Train(self):
18+
envnet2 = models.EnvNet2(66650, 50);
19+
model = envnet2.createModel();
20+
print(model.summary());
21+
exit();
22+
trainGen, valGen = dataset.setup(self.opt, self.split);
23+
loss = 'kullback_leibler_divergence'
24+
optimizer = keras.optimizers.SGD(lr=self.opt.LR, decay=self.opt.weightDecay, momentum=self.opt.momentum, nesterov=True)
25+
26+
model.compile(loss=loss, optimizer=optimizer , metrics=['accuracy']);
27+
28+
# learning schedule callback
29+
lrate = keras.callbacks.LearningRateScheduler(self.GetLR);
30+
#best_model = keras.callbacks.ModelCheckpoint('best_model_fold-'+str(self.split )+'_epoch-{epoch:02d}_val_acc-{val_acc:.2f}.hdf5', monitor='val_acc', save_best_only=True, verbose=0);
31+
best_model = keras.callbacks.ModelCheckpoint('Split-'+str(self.split )+'_best_model.hdf5', monitor='val_acc', save_best_only=True, verbose=0);
32+
#early_stopping = keras.callbacks.EarlyStopping(monitor='loss', patience=100);
33+
csv_logger = keras.callbacks.CSVLogger('aug-fold-'+str(self.split)+'-training.log');
34+
custom_evaluator = CustomCallback(self.opt, trainGen, valGen);
35+
#callbacks_list = [lrate, custom_evaluator, best_model, early_stopping, csv_logger];
36+
callbacks_list = [lrate, custom_evaluator, best_model, csv_logger];
37+
#callbacks_list = [lrate, custom_evaluator];
38+
39+
#My custom data generator
40+
#model.fit_generator(trainGen, epochs=opt.nEpochs, steps_per_epoch=len(trainGen.data)//trainGen.batch_size, validation_data=valGen, validation_steps=math.ceil(len(valGen.data) / valGen.batch_size), callbacks=callbacks_list, verbose=1);
41+
model.fit_generator(trainGen, epochs=self.opt.nEpochs, steps_per_epoch=len(trainGen.data)//trainGen.batch_size, callbacks=callbacks_list, verbose=0);
42+
#model.fit_generator(trainGen, epochs=self.opt.nEpochs, steps_per_epoch=1, callbacks=callbacks_list, verbose=0);
43+
44+
#print(model.summary());
45+
def GetLR(self, epoch):
46+
divide_epoch = np.array([self.opt.nEpochs * i for i in self.opt.schedule]);
47+
decay = sum(epoch > divide_epoch);
48+
if epoch <= self.opt.warmup:
49+
decay = 1;
50+
return self.opt.LR * np.power(0.1, decay);
51+
52+
class CustomCallback(keras.callbacks.Callback):
53+
def __init__(self, opt, trainGen, valGen):
54+
self.opt = opt;
55+
self.train_gen = trainGen;
56+
self.val_gen = valGen;
57+
self.curEpoch = 0;
58+
self.curLr = opt.LR;
59+
self.start_time = time.time();
60+
self.cur_epoch_start_time = time.time();
61+
self.i = 'None';
62+
63+
def on_train_batch_begin(self, batch, logs=None):
64+
'''
65+
print('Training: batch {} begins at {}'.format(batch, time.time()));
66+
'''
67+
def on_train_batch_end(self, batch, logs=None):
68+
elapsed_time = time.time() - self.start_time;
69+
nTrain_batches = (len(self.train_gen.data) - 1) // self.opt.batchSize + 1;
70+
progress = (nTrain_batches * (self.curEpoch - 1) + batch + 1) * 1.0 / (nTrain_batches * self.opt.nEpochs);
71+
eta = elapsed_time / progress - elapsed_time;
72+
line = '* Epoch: {}/{} ({}/{}) | Train: LR {} | Time: {} (ETA: {})'.format(
73+
self.curEpoch, self.opt.nEpochs, batch+1, nTrain_batches, self.curLr, U.to_hms(elapsed_time), U.to_hms(eta));
74+
sys.stderr.write('\r\033[K');
75+
sys.stdout.write(line);
76+
sys.stdout.flush();
77+
78+
def on_epoch_begin(self, epoch, logs=None):
79+
self.curEpoch = epoch+1;
80+
self.curLr = Trainer(self.opt).GetLR(epoch+1);
81+
self.cur_epoch_start_time = time.time();
82+
83+
def on_epoch_end(self, epoch, logs=None):
84+
val_acc, val_loss = self.validate(self.model);
85+
logs['val_acc'] = val_acc;
86+
logs['val_loss'] = val_loss;
87+
time_taken = time.time() - self.cur_epoch_start_time;
88+
sys.stderr.write('\r\033[K')
89+
sys.stdout.write(
90+
'Epoch: {}/{} | Time: {} | Train: LR {} Loss {:.3f}% Acc {:.3f}% | Val: Loss {:.3f}% Acc(top1) {:.3f}%\n'.format(
91+
epoch+1, self.opt.nEpochs, U.to_hms(time_taken), self.curLr, logs['loss'], logs['acc'], val_loss, val_acc));
92+
sys.stdout.flush();
93+
94+
def validate(self, model):
95+
y_pred = None;
96+
y_target = None;
97+
for batchIndex in range(math.ceil(len(self.val_gen.data) / self.val_gen.batch_size)):
98+
testX, testY = self.val_gen.__getitem__(batchIndex);
99+
scores = model.predict(testX, batch_size=len(testX), verbose=0);
100+
y_pred = scores if y_pred is None else np.concatenate((y_pred, scores));
101+
y_target = testY if y_target is None else np.concatenate((y_target, testY));
102+
#break;
103+
104+
acc, loss = self.compute_accuracy(y_pred, y_target);
105+
return acc, loss;
106+
107+
#Calculating average prediction (10 crops) and final accuracy
108+
def compute_accuracy(self, y_pred, y_target):
109+
#Reshape y_pred to shape it like each sample comtains 10 samples.
110+
y_pred = y_pred.reshape(y_pred.shape[0]//self.opt.nCrops, self.opt.nCrops, y_pred.shape[1]);
111+
112+
#Calculate the average of class predictions for 10 crops of a sample
113+
y_pred = np.mean(y_pred, axis=1);
114+
115+
#Get the indices that has highest average value for each sample
116+
y_pred = y_pred.argmax(axis=1);
117+
118+
#Doing the samething for y_target
119+
y_target = (y_target.reshape(y_target.shape[0]//self.opt.nCrops, self.opt.nCrops, y_target.shape[1])).mean(axis=1).argmax(axis=1);
120+
121+
accuracy = (y_pred==y_target).mean()*100;
122+
loss = np.mean(y_target - y_pred);
123+
return accuracy, loss;

‎utils.py

+129
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
import numpy as np
2+
import random
3+
4+
# Default data augmentation
5+
def padding(pad):
6+
def f(sound):
7+
return np.pad(sound, pad, 'constant')
8+
9+
return f
10+
11+
12+
def random_crop(size):
13+
def f(sound):
14+
org_size = len(sound)
15+
start = random.randint(0, org_size - size)
16+
return sound[start: start + size]
17+
18+
return f
19+
20+
21+
def normalize(factor):
22+
def f(sound):
23+
return sound / factor
24+
25+
return f
26+
27+
28+
# For strong data augmentation
29+
def random_scale(max_scale, interpolate='Linear'):
30+
def f(sound):
31+
scale = np.power(max_scale, random.uniform(-1, 1))
32+
output_size = int(len(sound) * scale)
33+
ref = np.arange(output_size) / scale
34+
if interpolate == 'Linear':
35+
ref1 = ref.astype(np.int32)
36+
ref2 = np.minimum(ref1 + 1, len(sound) - 1)
37+
r = ref - ref1
38+
scaled_sound = sound[ref1] * (1 - r) + sound[ref2] * r
39+
elif interpolate == 'Nearest':
40+
scaled_sound = sound[ref.astype(np.int32)]
41+
else:
42+
raise Exception('Invalid interpolation mode {}'.format(interpolate))
43+
44+
return scaled_sound
45+
46+
return f
47+
48+
49+
def random_gain(db):
50+
def f(sound):
51+
return sound * np.power(10, random.uniform(-db, db) / 20.0)
52+
53+
return f
54+
55+
56+
# For testing phase
57+
def multi_crop(input_length, n_crops):
58+
def f(sound):
59+
stride = (len(sound) - input_length) // (n_crops - 1)
60+
sounds = [sound[stride * i: stride * i + input_length] for i in range(n_crops)]
61+
return np.array(sounds)
62+
63+
return f
64+
65+
66+
# For BC learning
67+
def a_weight(fs, n_fft, min_db=-80.0):
68+
freq = np.linspace(0, fs // 2, n_fft // 2 + 1)
69+
freq_sq = np.power(freq, 2)
70+
freq_sq[0] = 1.0
71+
weight = 2.0 + 20.0 * (2 * np.log10(12194) + 2 * np.log10(freq_sq)
72+
- np.log10(freq_sq + 12194 ** 2)
73+
- np.log10(freq_sq + 20.6 ** 2)
74+
- 0.5 * np.log10(freq_sq + 107.7 ** 2)
75+
- 0.5 * np.log10(freq_sq + 737.9 ** 2))
76+
weight = np.maximum(weight, min_db)
77+
78+
return weight
79+
80+
81+
def compute_gain(sound, fs, min_db=-80.0, mode='A_weighting'):
82+
if fs == 16000:
83+
n_fft = 2048
84+
elif fs == 44100:
85+
n_fft = 4096
86+
else:
87+
raise Exception('Invalid fs {}'.format(fs))
88+
stride = n_fft // 2
89+
90+
gain = []
91+
#MOHAIMEN: no xrange anymore
92+
for i in range(0, len(sound) - n_fft + 1, stride):
93+
if mode == 'RMSE':
94+
g = np.mean(sound[i: i + n_fft] ** 2)
95+
elif mode == 'A_weighting':
96+
spec = np.fft.rfft(np.hanning(n_fft + 1)[:-1] * sound[i: i + n_fft])
97+
power_spec = np.abs(spec) ** 2
98+
a_weighted_spec = power_spec * np.power(10, a_weight(fs, n_fft) / 10)
99+
g = np.sum(a_weighted_spec)
100+
else:
101+
raise Exception('Invalid mode {}'.format(mode))
102+
gain.append(g)
103+
104+
gain = np.array(gain)
105+
gain = np.maximum(gain, np.power(10, min_db / 10))
106+
gain_db = 10 * np.log10(gain)
107+
108+
return gain_db
109+
110+
111+
def mix(sound1, sound2, r, fs):
112+
gain1 = np.max(compute_gain(sound1, fs)) # Decibel
113+
gain2 = np.max(compute_gain(sound2, fs))
114+
t = 1.0 / (1 + np.power(10, (gain1 - gain2) / 20.) * (1 - r) / r)
115+
sound = ((sound1 * t + sound2 * (1 - t)) / np.sqrt(t ** 2 + (1 - t) ** 2))
116+
117+
return sound
118+
119+
# Convert time representation
120+
def to_hms(time):
121+
h = int(time // 3600)
122+
m = int((time - h * 3600) // 60)
123+
s = int(time - h * 3600 - m * 60)
124+
if h > 0:
125+
line = '{}h{:02d}m'.format(h, m)
126+
else:
127+
line = '{}m{:02d}s'.format(m, s)
128+
129+
return line

0 commit comments

Comments
 (0)
Please sign in to comment.