mohaimenz · Apr 14, 2020
diff --git a/‎__pycache__/dataset.cpython-37.pyc
3.58 KB b/‎__pycache__/dataset.cpython-37.pyc
3.58 KB
diff --git a/‎__pycache__/models.cpython-37.pyc
2.88 KB b/‎__pycache__/models.cpython-37.pyc
2.88 KB
diff --git a/‎__pycache__/opts.cpython-37.pyc
2.83 KB b/‎__pycache__/opts.cpython-37.pyc
2.83 KB
diff --git a/‎__pycache__/training.cpython-37.pyc
4.84 KB b/‎__pycache__/training.cpython-37.pyc
4.84 KB
diff --git a/‎__pycache__/utils.cpython-37.pyc
4.12 KB b/‎__pycache__/utils.cpython-37.pyc
4.12 KB
diff --git a/‎aug-fold-1-training.log
+2,001 b/‎aug-fold-1-training.log
+2,001
diff --git a/‎aug-fold-2-training.log
+251 b/‎aug-fold-2-training.log
+251
diff --git a/‎dataset.py
+139 b/‎dataset.py
+139
diff --git a/‎main.py
+14 b/‎main.py
+14
diff --git a/‎models.py
+79 b/‎models.py
+79
diff --git a/‎opts.py
+104 b/‎opts.py
+104
diff --git a/‎training.py
+123 b/‎training.py
+123
diff --git a/‎utils.py
+129 b/‎utils.py
+129
@@ -0,0 +1,251 @@
+epoch,acc,loss,lr,val_acc,val_loss
+0,0.02625,4.364190301895142,0.01,3.75,-14.365
+1,0.019375,3.8057942676544187,0.01,2.0,-19.525
+2,0.045,3.5334793663024904,0.01,2.75,-10.5775
+3,0.03,3.402750825881958,0.01,2.25,-5.4975
+4,0.0425,3.341447534561157,0.01,5.0,-6.5825
+5,0.041875,3.2882890033721925,0.01,2.75,-2.6475
+6,0.051875,3.2502317333221438,0.01,5.0,-1.7825
+7,0.06,3.2152176570892332,0.01,2.5,-3.4275
+8,0.058125,3.1734259510040284,0.01,4.5,0.9675
+9,0.074375,3.170181379318237,0.01,2.25,-9.5425
+10,0.07875,3.1162731647491455,0.01,5.5,-2.0025
+11,0.05625,3.347873497009277,0.1,3.5000000000000004,-16.805
+12,0.04625,3.321718235015869,0.1,4.75,-18.57
+13,0.06375,3.260312385559082,0.1,5.25,2.945
+14,0.063125,3.1655212020874024,0.1,5.75,7.31
+15,0.07875,3.1224004936218264,0.1,7.249999999999999,-3.8125
+16,0.086875,3.0920931339263915,0.1,8.5,4.7375
+17,0.095,3.0571328449249267,0.1,9.5,-3.4475
+18,0.1075,2.990420503616333,0.1,11.25,1.895
+19,0.11625,2.9558618545532225,0.1,11.25,-1.31
+20,0.14,2.9303871250152587,0.1,13.750000000000002,-3.3125
+21,0.151875,2.8846695709228514,0.1,8.0,5.1075
+22,0.1425,2.856938695907593,0.1,11.25,-4.8275
+23,0.178125,2.8150680446624756,0.1,11.25,-6.595
+24,0.168125,2.835599546432495,0.1,16.5,-2.785
+25,0.175,2.7768293476104735,0.1,16.0,2.955
+26,0.191875,2.7219950771331787,0.1,20.5,-4.14
+27,0.1975,2.693011360168457,0.1,21.75,-2.1175
+28,0.215,2.697782726287842,0.1,22.25,2.545
+29,0.209375,2.658558101654053,0.1,26.75,0.6375
+30,0.215625,2.639135580062866,0.1,18.75,-1.985
+31,0.206875,2.67412109375,0.1,11.75,-8.7575
+32,0.228125,2.6060924530029297,0.1,22.75,-4.0725
+33,0.25,2.5560087966918945,0.1,30.25,1.08
+34,0.268125,2.5199835205078127,0.1,25.0,-1.325
+35,0.28625,2.4759314823150635,0.1,30.25,2.0475
+36,0.26375,2.5218370246887205,0.1,35.0,-1.1125
+37,0.2825,2.4893642711639403,0.1,31.5,1.065
+38,0.283125,2.4635579586029053,0.1,41.5,1.24
+39,0.28125,2.4503310680389405,0.1,34.75,-0.4975
+40,0.295,2.4193574142456056,0.1,37.5,-1.255
+41,0.328125,2.359979496002197,0.1,48.0,1.0725
+42,0.3025,2.4025496864318847,0.1,32.5,0.0425
+43,0.33625,2.3367537307739257,0.1,45.0,-0.42
+44,0.32125,2.3890170288085937,0.1,43.75,1.0325
+45,0.345,2.338145399093628,0.1,45.75,2.185
+46,0.331875,2.372704477310181,0.1,47.5,-1.47
+47,0.346875,2.2720289993286134,0.1,35.75,2.49
+48,0.343125,2.335392503738403,0.1,40.5,-0.07
+49,0.360625,2.2747845315933226,0.1,44.75,-0.025
+50,0.37375,2.23638192653656,0.1,38.5,-5.795
+51,0.369375,2.2347133445739744,0.1,44.75,3.55
+52,0.350625,2.23949875831604,0.1,38.0,-0.04
+53,0.37375,2.237742404937744,0.1,39.25,-2.335
+54,0.3575,2.260177659988403,0.1,43.0,1.955
+55,0.36625,2.2538868141174317,0.1,44.75,-0.76
+56,0.366875,2.234534044265747,0.1,50.5,-0.6725
+57,0.366875,2.198842668533325,0.1,52.5,1.3125
+58,0.396875,2.166809391975403,0.1,48.75,-0.7075
+59,0.4075,2.1453133821487427,0.1,46.75,1.0775
+60,0.3775,2.166514267921448,0.1,48.25,-0.605
+61,0.390625,2.1388467025756834,0.1,45.0,0.9025
+62,0.410625,2.0651877784729002,0.1,60.0,1.58
+63,0.42,2.0550661420822145,0.1,53.25,0.955
+64,0.418125,2.1030141830444338,0.1,45.25,3.5725
+65,0.421875,2.082075037956238,0.1,46.75,1.295
+66,0.424375,2.1001365518569948,0.1,55.25,1.19
+67,0.4075,2.0760802602767945,0.1,41.0,-2.4825
+68,0.4475,2.0292210388183594,0.1,59.0,1.835
+69,0.42,2.024934573173523,0.1,47.25,-1.6825
+70,0.444375,1.9973055744171142,0.1,57.75,1.9475
+71,0.419375,2.0591587018966675,0.1,54.0,2.8475
+72,0.443125,2.020278615951538,0.1,56.25,1.7975
+73,0.44875,1.9624472618103028,0.1,60.75000000000001,0.4775
+74,0.448125,2.0197728490829467,0.1,64.75,0.22
+75,0.434375,2.0080120754241944,0.1,63.74999999999999,1.155
+76,0.473125,1.9557863426208497,0.1,58.75,1.2925
+77,0.460625,1.9723664379119874,0.1,56.99999999999999,1.03
+78,0.44,1.9687378406524658,0.1,62.0,1.2075
+79,0.461875,1.930267457962036,0.1,63.74999999999999,1.0875
+80,0.45875,1.9535737323760987,0.1,49.25,-0.365
+81,0.461875,1.9347326517105103,0.1,63.74999999999999,0.5775
+82,0.465,1.945738344192505,0.1,58.75,1.2975
+83,0.468125,1.9488419532775878,0.1,64.0,0.435
+84,0.485625,1.9235300159454345,0.1,59.5,0.54
+85,0.4775,1.9150684881210327,0.1,59.75,3.4
+86,0.480625,1.885982494354248,0.1,66.25,1.83
+87,0.476875,1.890736665725708,0.1,61.5,0.0725
+88,0.4825,1.8848549795150757,0.1,62.0,-0.15
+89,0.47875,1.8957869052886962,0.1,62.74999999999999,0.2875
+90,0.485625,1.8895482540130615,0.1,57.49999999999999,0.15
+91,0.515625,1.8061808395385741,0.1,65.5,0.6225
+92,0.490625,1.8378184127807617,0.1,67.0,1.04
+93,0.519375,1.8322415590286254,0.1,60.25,1.6525
+94,0.50375,1.8292215967178345,0.1,67.25,1.72
+95,0.515,1.810122208595276,0.1,66.0,-0.0475
+96,0.498125,1.8442325830459594,0.1,62.0,0.565
+97,0.5125,1.81817777633667,0.1,60.75000000000001,1.3675
+98,0.49875,1.8474519538879395,0.1,65.0,-0.12
+99,0.50125,1.8302334928512574,0.1,67.5,0.2375
+100,0.5225,1.7856882429122924,0.1,62.0,1.1175
+101,0.528125,1.7906427383422852,0.1,59.0,-1.8225
+102,0.515,1.78394868850708,0.1,68.5,0.8325
+103,0.529375,1.8193429899215698,0.1,65.25,1.9925
+104,0.503125,1.8336401891708374,0.1,68.25,1.6575
+105,0.520625,1.7816782951354981,0.1,69.25,1.0875
+106,0.545,1.748461365699768,0.1,73.5,1.02
+107,0.528125,1.7566093254089354,0.1,69.0,1.12
+108,0.550625,1.713003478050232,0.1,67.25,0.305
+109,0.54625,1.714949312210083,0.1,68.5,1.325
+110,0.54125,1.7462983798980714,0.1,62.74999999999999,1.445
+111,0.52625,1.770948896408081,0.1,64.75,0.9075
+112,0.53375,1.7509346532821655,0.1,70.75,0.155
+113,0.53125,1.737281756401062,0.1,68.5,0.81
+114,0.5475,1.7143649244308472,0.1,66.25,1.5975
+115,0.54125,1.7084273672103882,0.1,69.25,1.275
+116,0.51625,1.7679480075836183,0.1,72.25,1.6725
+117,0.56,1.6622666597366333,0.1,72.25,0.4725
+118,0.54,1.7429784870147704,0.1,73.75,0.1875
+119,0.548125,1.6692375135421753,0.1,68.75,2.0125
+120,0.571875,1.6558413362503053,0.1,65.5,1.84
+121,0.5625,1.6640590620040894,0.1,70.5,1.0575
+122,0.545,1.7142630863189696,0.1,74.0,0.6025
+123,0.551875,1.6940502977371217,0.1,64.75,1.595
+124,0.5575,1.6804782915115357,0.1,63.24999999999999,1.6075
+125,0.549375,1.6725295448303223,0.1,68.25,0.9825
+126,0.526875,1.7000903701782226,0.1,68.75,0.23
+127,0.56,1.6593765497207642,0.1,60.25,1.0225
+128,0.560625,1.65841814994812,0.1,63.0,1.1575
+129,0.569375,1.6534293365478516,0.1,59.0,-0.43
+130,0.56625,1.6677143955230713,0.1,62.25000000000001,0.12
+131,0.570625,1.65065514087677,0.1,69.25,1.4825
+132,0.575625,1.6319794940948487,0.1,69.25,1.1675
+133,0.571875,1.6122065925598144,0.1,69.0,0.8925
+134,0.555625,1.6656177854537964,0.1,68.25,0.59
+135,0.5875,1.5952541542053222,0.1,66.0,1.345
+136,0.590625,1.6259658241271973,0.1,72.75,0.845
+137,0.58875,1.636475682258606,0.1,70.5,0.255
+138,0.5975,1.5679073905944825,0.1,68.75,1.21
+139,0.568125,1.6453906393051148,0.1,70.0,1.32
+140,0.555625,1.6275051546096801,0.1,66.75,1.4
+141,0.57375,1.6199491977691651,0.1,65.25,0.785
+142,0.584375,1.5847906589508056,0.1,70.75,-1.6875
+143,0.58625,1.6070477628707887,0.1,75.0,1.1425
+144,0.57625,1.6056494426727295,0.1,70.5,0.2825
+145,0.59125,1.5741839027404785,0.1,69.0,1.08
+146,0.584375,1.573354082107544,0.1,70.25,-0.8875
+147,0.579375,1.6006696128845215,0.1,71.0,0.835
+148,0.58,1.5997914028167726,0.1,72.5,0.8275
+149,0.585,1.5993724966049194,0.1,70.75,0.12
+150,0.573125,1.600631957054138,0.1,73.75,-0.435
+151,0.58625,1.575380530357361,0.1,66.75,0.8425
+152,0.59625,1.5427978658676147,0.1,74.0,0.6425
+153,0.61125,1.5141425848007202,0.1,70.75,0.39
+154,0.595625,1.554921679496765,0.1,72.25,0.675
+155,0.58875,1.5786282634735107,0.1,74.0,-0.0875
+156,0.580625,1.60705894947052,0.1,73.0,0.485
+157,0.61,1.5018576717376708,0.1,73.75,2.48
+158,0.60125,1.563011598587036,0.1,72.25,1.31
+159,0.593125,1.5781792354583741,0.1,63.74999999999999,1.115
+160,0.605,1.5102868938446046,0.1,75.75,0.16
+161,0.600625,1.5380147123336791,0.1,66.25,0.96
+162,0.595,1.5284412288665772,0.1,70.0,0.21
+163,0.6,1.5297650051116944,0.1,73.75,0.2125
+164,0.6025,1.5288518142700196,0.1,67.75,1.3325
+165,0.589375,1.518393030166626,0.1,70.5,0.0775
+166,0.596875,1.5591082429885865,0.1,73.25,0.1025
+167,0.595,1.5433367490768433,0.1,73.25,0.5525
+168,0.608125,1.5544326162338258,0.1,75.0,0.925
+169,0.61125,1.4983487462997436,0.1,76.0,1.255
+170,0.615,1.492077612876892,0.1,74.25,1.135
+171,0.6125,1.483194351196289,0.1,76.5,0.2425
+172,0.621875,1.5039149808883667,0.1,72.5,0.9575
+173,0.605625,1.509285626411438,0.1,74.75,0.585
+174,0.61875,1.484777307510376,0.1,70.75,-0.5475
+175,0.615625,1.4738255691528321,0.1,72.0,-0.225
+176,0.606875,1.5206553554534912,0.1,74.75,1.295
+177,0.613125,1.527477684020996,0.1,75.25,0.975
+178,0.62875,1.4848559617996215,0.1,72.75,1.44
+179,0.61375,1.4811051750183106,0.1,70.5,1.3525
+180,0.624375,1.428681445121765,0.1,76.75,0.7625
+181,0.618125,1.4879146862030028,0.1,76.0,0.2325
+182,0.638125,1.4628437995910644,0.1,68.0,-0.85
+183,0.615,1.4981183862686158,0.1,75.25,-0.4575
+184,0.639375,1.46513774394989,0.1,64.5,-1.4575
+185,0.631875,1.4421774005889894,0.1,74.25,0.3775
+186,0.6325,1.4636979913711547,0.1,77.25,0.5525
+187,0.625,1.4698228025436402,0.1,76.5,0.8775
+188,0.6175,1.4855970811843873,0.1,72.25,0.1175
+189,0.618125,1.5001065731048584,0.1,72.0,-0.695
+190,0.61,1.457563223838806,0.1,75.0,-0.2625
+191,0.629375,1.474877152442932,0.1,76.5,0.735
+192,0.630625,1.44171142578125,0.1,68.75,1.325
+193,0.615,1.4470804691314698,0.1,74.25,-0.2175
+194,0.6175,1.4602191638946533,0.1,71.75,-0.4975
+195,0.64125,1.464056167602539,0.1,73.75,-0.0475
+196,0.610625,1.4594062852859497,0.1,75.75,1.135
+197,0.6325,1.4404124593734742,0.1,74.5,0.58
+198,0.64375,1.4104800462722777,0.1,77.0,0.365
+199,0.61625,1.4931021928787231,0.1,73.0,0.875
+200,0.636875,1.4372695446014405,0.1,77.0,1.065
+201,0.638125,1.4314981698989868,0.1,72.75,-0.105
+202,0.628125,1.4575612878799438,0.1,75.25,1.16
+203,0.636875,1.4191424703598023,0.1,76.5,-0.25
+204,0.649375,1.391803684234619,0.1,76.25,0.6125
+205,0.625,1.4518143367767333,0.1,75.5,0.02
+206,0.670625,1.3947421789169312,0.1,74.5,1.305
+207,0.633125,1.4279969978332518,0.1,78.25,1.225
+208,0.658125,1.3850564098358153,0.1,78.0,-0.1875
+209,0.62375,1.4357863187789917,0.1,74.25,-0.215
+210,0.639375,1.4034555101394652,0.1,70.25,0.595
+211,0.630625,1.440563669204712,0.1,75.25,0.6075
+212,0.6425,1.4011264610290528,0.1,67.75,-0.1275
+213,0.648125,1.3843915224075318,0.1,71.75,0.3175
+214,0.65125,1.3846601343154907,0.1,74.0,-0.0675
+215,0.63875,1.3937037324905395,0.1,79.5,0.4725
+216,0.64,1.3968168210983276,0.1,77.75,1.3575
+217,0.64625,1.418349962234497,0.1,71.5,1.3675
+218,0.654375,1.385741081237793,0.1,75.75,0.125
+219,0.6425,1.3702531194686889,0.1,74.25,-0.5725
+220,0.650625,1.4011496353149413,0.1,76.0,-0.6675
+221,0.6375,1.3519666194915771,0.1,76.25,0.31
+222,0.64375,1.386059980392456,0.1,79.25,1.2075
+223,0.653125,1.401152286529541,0.1,75.75,0.4425
+224,0.659375,1.4069624137878418,0.1,78.0,0.2225
+225,0.649375,1.3809283542633057,0.1,75.5,0.6175
+226,0.665,1.3553387069702147,0.1,76.25,1.1825
+227,0.65125,1.3894909715652466,0.1,74.75,0.56
+228,0.655625,1.3804780054092407,0.1,74.75,-0.0175
+229,0.669375,1.3629000473022461,0.1,76.5,1.13
+230,0.61625,1.4367037057876586,0.1,73.5,0.8775
+231,0.655625,1.3824931383132935,0.1,76.75,0.285
+232,0.660625,1.3272888803482055,0.1,75.75,-0.3175
+233,0.66625,1.3430774545669555,0.1,78.75,1.2725
+234,0.65375,1.3274751663208009,0.1,77.0,1.2025
+235,0.64625,1.3825994873046874,0.1,77.25,0.0225
+236,0.64625,1.3729515075683594,0.1,75.25,0.2325
+237,0.674375,1.3624279642105102,0.1,76.0,1.0475
+238,0.653125,1.3579264783859253,0.1,74.75,1.6625
+239,0.666875,1.3289760446548462,0.1,77.25,0.8275
+240,0.6725,1.3199524784088135,0.1,77.5,1.055
+241,0.666875,1.3675604343414307,0.1,74.5,0.0875
+242,0.67875,1.3353768157958985,0.1,79.0,0.2475
+243,0.64875,1.3520609712600709,0.1,77.25,0.115
+244,0.66,1.3676192808151244,0.1,81.0,-0.32
+245,0.66375,1.3348275232315063,0.1,73.5,-0.37
+246,0.671875,1.3160452985763549,0.1,76.25,0.37
+247,0.685625,1.3256859731674195,0.1,78.25,0.615
+248,0.66625,1.3432723140716554,0.1,72.75,1.895
+249,0.658125,1.3149521493911742,0.1,72.0,0.5325
@@ -0,0 +1,139 @@
+import os;
+import numpy as np;
+import random;
+import utils as U;
+from tensorflow import keras;
+
+class Generator(keras.utils.Sequence):
+    'Generates data for Keras'
+    def __init__(self, samples, labels, options, train=True):
+        'Initialization'
+        self.data = [(samples[i], labels[i]) for i in range (0, len(samples))];
+        self.opt = options;
+        self.train = train;
+        self.batch_size = options.batchSize if train else options.batchSize // options.nCrops;
+        self.mix = (options.BC and train);
+        self.preprocess_funcs = self.preprocess_setup();
+        #self.__getitem__(3);
+
+    def __len__(self):
+        'Denotes the number of batches per epoch'
+        return int(np.floor(len(self.data) / self.batch_size));
+        #return len(self.samples);
+
+    def __getitem__(self, batchIndex):
+        'Generate one batch of data'
+        batchX, batchY = self.generate_batch(batchIndex);
+        batchX = np.expand_dims(batchX, axis=1)
+        batchX = np.expand_dims(batchX, axis=3)
+        #print(batchX.shape);
+        #exit();
+        return batchX, batchY
+
+    def generate_batch(self, batchIndex):
+        'Generates data containing batch_size samples'
+        sounds = [];
+        labels = [];
+        indexes = None;
+        for i in range(self.batch_size):
+            # Generate indexes of the batch
+            if self.mix:  # Training phase of BC learning
+
+                # Select two training examples
+                while True:
+                    sound1, label1 = self.data[random.randint(0, len(self.data) - 1)]
+                    sound2, label2 = self.data[random.randint(0, len(self.data) - 1)]
+                    if label1 != label2:
+                        break
+                sound1 = self.preprocess(sound1)
+                sound2 = self.preprocess(sound2)
+
+                # Mix two examples
+                r = np.array(random.random())
+                sound = U.mix(sound1, sound2, r, self.opt.fs).astype(np.float32)
+                eye = np.eye(self.opt.nClasses)
+                label = (eye[label1] * r + eye[label2] * (1 - r)).astype(np.float32)
+
+            else:  # Training phase of standard learning or testing phase
+                #print(batchIndex);
+                if indexes == None:
+                    indexes = self.data[batchIndex*self.batch_size:(batchIndex+1)*self.batch_size];
+                else:
+                    if i >= len(indexes):
+                        break;
+
+                sound, target = indexes[i];
+                sound = self.preprocess(sound).astype(np.float32)
+                #label = (np.eye(self.opt.nClasses))[int(label)]
+                label = np.zeros((self.opt.nCrops, self.opt.nClasses));
+                label[:,target] = 1;
+
+            if self.train and self.opt.strongAugment:
+                sound = U.random_gain(6)(sound).astype(np.float32)
+
+            sounds.append(sound);
+            labels.append(label);
+
+        sounds = np.asarray(sounds);
+        labels = np.asarray(labels);
+        if not self.train:
+            sounds = sounds.reshape(sounds.shape[0]*sounds.shape[1], sounds.shape[2]);
+            labels = labels.reshape(labels.shape[0]*labels.shape[1], labels.shape[2]);
+
+        return sounds, labels;
+
+    '''
+    def on_epoch_end(self):
+        'Updates indexes after each epoch'
+        self.indexes = np.arange(len(self.list_IDs))
+        if self.shuffle == True:
+            np.random.shuffle(self.indexes)
+    '''
+
+
+    def preprocess_setup(self):
+        if self.train:
+            funcs = []
+            if self.opt.strongAugment:
+                funcs += [U.random_scale(1.25)]
+
+            funcs += [U.padding(self.opt.inputLength // 2),
+                      U.random_crop(self.opt.inputLength),
+                      U.normalize(32768.0),
+                      ]
+        else:
+            funcs = [U.padding(self.opt.inputLength // 2),
+                     U.normalize(32768.0),
+                     U.multi_crop(self.opt.inputLength, self.opt.nCrops),
+                     ]
+        return funcs
+
+    def preprocess(self, sound):
+        for f in self.preprocess_funcs:
+            sound = f(sound)
+
+        return sound;
+
+
+def setup(opt, split):
+    dataset = np.load(os.path.join(opt.data, opt.dataset, 'wav{}.npz'.format(opt.fs // 1000)))
+    # Split to train and val
+    train_sounds = []
+    train_labels = []
+    val_sounds = []
+    val_labels = []
+    for i in range(1, opt.nFolds + 1):
+        sounds = dataset['fold{}'.format(i)].item()['sounds']
+        labels = dataset['fold{}'.format(i)].item()['labels']
+        if i == split:
+            val_sounds.extend(sounds)
+            val_labels.extend(labels)
+        else:
+            train_sounds.extend(sounds)
+            train_labels.extend(labels)
+
+    # Iterator setup
+    train_data = Generator(train_sounds, train_labels, opt, train=True)
+    val_data = Generator(val_sounds, val_labels, opt, train=False)    
+
+    return train_data, val_data
@@ -0,0 +1,14 @@
+import opts;
+from training import Trainer;
+#from keras.utils import plot_model
+
+def Main():
+    opt = opts.parse();
+    for split in opt.splits:
+        print('+-- Split {} --+'.format(split));
+        trainer = Trainer(opt, split);
+        trainer.Train();
+        #break;
+
+if __name__ == '__main__':
+    Main()
@@ -0,0 +1,79 @@
+from tensorflow import keras;
+from tensorflow.keras.models import Model;
+import tensorflow.keras.layers as L
+
+class EnvNet2:
+    def __init__(self, input_length, n_class):
+        self.input_length = input_length;
+        self.conv1 = ConvBNReLU(32, (1,64), (1,2));
+        self.conv2 = ConvBNReLU(64, (1,16), (1,2));
+        self.conv3 = ConvBNReLU(32, (8,8));
+        self.conv4 = ConvBNReLU(32, (8,8));
+        self.conv5 = ConvBNReLU(64, (1,4));
+        self.conv6 = ConvBNReLU(64, (1,4));
+        self.conv7 = ConvBNReLU(128, (1,2));
+        self.conv8 = ConvBNReLU(128, (1,2));
+        self.conv9 = ConvBNReLU(256, (1,2));
+        self.conv10 = ConvBNReLU(256, (1,2));
+        self.fc1 = FCDN(4096);
+        self.fc2 = FCDN(4096);
+        self.output = FCDN(n_class, 'softmax', 0);
+
+    def createModel(self):
+        #batch, rows, columns, channels
+        input = L.Input(shape=(1, self.input_length, 1));
+        hl = self.conv1(input);
+        #print(keras.backend.int_shape(hl));
+
+        hl = self.conv2(hl);
+        hl = L.MaxPooling2D(pool_size=(1,64), strides=(1,64))(hl);
+
+        #swapaxes
+        #hl = L.Reshape((64, 260, 1))(hl)
+        hl = L.Permute((3, 2, 1))(hl)
+
+        hl = self.conv3(hl);
+        hl = self.conv4(hl);
+        hl = L.MaxPooling2D(pool_size=(5,3), strides=(5,3))(hl)
+
+        hl = self.conv5(hl);
+        hl = self.conv6(hl);
+        hl = L.MaxPooling2D(pool_size=(1,2), strides=(1,2))(hl)
+
+        hl = self.conv7(hl);
+        hl = self.conv8(hl);
+        hl = L.MaxPooling2D(pool_size=(1,2), strides=(1,2))(hl)
+
+        hl = self.conv9(hl);
+        hl = self.conv10(hl);
+        hl = L.MaxPooling2D(pool_size=(1,2), strides=(1,2))(hl)
+
+        hl = L.Flatten()(hl);
+
+        hl = self.fc1(hl);
+        hl = self.fc2(hl);
+        ol = self.output(hl);
+        model = Model(inputs=input, outputs=ol);
+        return model;
+
+class ConvBNReLU:
+    def __init__(self, filters, kernel_size, strides=(1,1), padding='valid', initial_w=keras.initializers.he_normal(), use_bias=False):
+        self.conv = L.Conv2D(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, kernel_initializer=initial_w, use_bias=use_bias);
+
+    def __call__(self, x):
+        layer = self.conv(x);
+        layer = L.BatchNormalization()(layer);
+        layer = L.Activation('relu')(layer);
+        return layer;
+
+class FCDN:
+    def __init__(self, units=50, activation='relu', dropout=0.5, initial_w=keras.initializers.lecun_normal()):
+        self.fcn = L.Dense(units, kernel_initializer=initial_w);
+        self.activation = L.Activation(activation);
+        self.dropout = L.Dropout(rate=dropout) if dropout > 0 else None;
+
+    def __call__(self, x):
+        fc = self.fcn(x);
+        fc = self.activation(fc);
+        fc = self.dropout(fc) if self.dropout is not None else fc;
+        return fc;
@@ -0,0 +1,104 @@
+import os
+import argparse
+
+
+def parse():
+    parser = argparse.ArgumentParser(description='BC learning for sounds')
+
+    # General settings
+    parser.add_argument('--dataset', required=False, default='esc50', choices=['esc10', 'esc50', 'urbansound8k'])
+    parser.add_argument('--netType', required=False, default='envnetv2', choices=['envnet', 'envnetv2'])
+    parser.add_argument('--data', default='/home/mohaimen/Desktop/EXPERIMENTS/datasets/', required=False, help='Path to dataset')
+    parser.add_argument('--split', type=int, default=-1, help='esc: 1-5, urbansound: 1-10 (-1: run on all splits)')
+    parser.add_argument('--save', default='None', help='Directory to save the results')
+    parser.add_argument('--testOnly', action='store_true')
+    parser.add_argument('--gpu', type=int, default=0)
+
+    # Learning settings (default settings are defined below)
+    parser.add_argument('--BC', default=True, action='store_true', help='BC learning')
+    parser.add_argument('--strongAugment', default=True,  action='store_true', help='Add scale and gain augmentation')
+    parser.add_argument('--nEpochs', type=int, default=-1)
+    parser.add_argument('--LR', type=float, default=-1, help='Initial learning rate')
+    parser.add_argument('--schedule', type=float, nargs='*', default=-1, help='When to divide the LR')
+    parser.add_argument('--warmup', type=int, default=-1, help='Number of epochs to warm up')
+    parser.add_argument('--batchSize', type=int, default=64)
+    parser.add_argument('--weightDecay', type=float, default=5e-4)
+    parser.add_argument('--momentum', type=float, default=0.9)
+
+    # Testing settings
+    parser.add_argument('--nCrops', type=int, default=10)
+
+    opt = parser.parse_args()
+
+    # Dataset details
+    if opt.dataset == 'esc50':
+        opt.nClasses = 50
+        opt.nFolds = 5
+    elif opt.dataset == 'esc10':
+        opt.nClasses = 10
+        opt.nFolds = 5
+    else:  # urbansound8k
+        opt.nClasses = 10
+        opt.nFolds = 10
+
+    if opt.split == -1:
+        opt.splits = range(1, opt.nFolds + 1)
+    else:
+        opt.splits = [opt.split]
+
+    # Model details
+    if opt.netType == 'envnet':
+        opt.fs = 16000
+        opt.inputLength = 24014
+    else:  # envnetv2
+        opt.fs = 44100
+        opt.inputLength = 66650
+
+    # Default settings (nEpochs will be doubled if opt.BC)
+    default_settings = dict()
+    default_settings['esc50'] = {
+        'envnet': {'nEpochs': 600, 'LR': 0.01, 'schedule': [0.5, 0.75], 'warmup': 0},
+        'envnetv2': {'nEpochs': 1000, 'LR': 0.1, 'schedule': [0.3, 0.6, 0.9], 'warmup': 10}
+    }
+    default_settings['esc10'] = {
+        'envnet': {'nEpochs': 600, 'LR': 0.01, 'schedule': [0.5, 0.75], 'warmup': 0},
+        'envnetv2': {'nEpochs': 600, 'LR': 0.01, 'schedule': [0.5, 0.75], 'warmup': 0}
+    }
+    default_settings['urbansound8k'] = {
+        'envnet': {'nEpochs': 400, 'LR': 0.01, 'schedule': [0.5, 0.75], 'warmup': 0},
+        'envnetv2': {'nEpochs': 600, 'LR': 0.1, 'schedule': [0.3, 0.6, 0.9], 'warmup': 10}
+    }
+    for key in ['nEpochs', 'LR', 'schedule', 'warmup']:
+        if eval('opt.{}'.format(key)) == -1:
+            setattr(opt, key, default_settings[opt.dataset][opt.netType][key])
+            if key == 'nEpochs' and opt.BC:
+                opt.nEpochs *= 2
+
+
+    if opt.save != 'None' and not os.path.isdir(opt.save):
+        os.makedirs(opt.save)
+
+    display_info(opt)
+
+    return opt
+
+
+def display_info(opt):
+    if opt.BC:
+        learning = 'BC'
+    else:
+        learning = 'standard'
+
+    print('+------------------------------+')
+    print('| Sound classification')
+    print('+------------------------------+')
+    print('| dataset  : {}'.format(opt.dataset))
+    print('| netType  : {}'.format(opt.netType))
+    print('| learning : {}'.format(learning))
+    print('| augment  : {}'.format(opt.strongAugment))
+    print('| nEpochs  : {}'.format(opt.nEpochs))
+    print('| LRInit   : {}'.format(opt.LR))
+    print('| schedule : {}'.format(opt.schedule))
+    print('| warmup   : {}'.format(opt.warmup))
+    print('| batchSize: {}'.format(opt.batchSize))
+    print('+------------------------------+')
@@ -0,0 +1,123 @@
+import sys;
+import os;
+import utils as U;
+from tensorflow import keras;
+import models;
+import dataset;
+import math;
+import numpy as np;
+import time;
+#from keras.utils import plot_model
+
+class Trainer:
+    def __init__(self, opt=None, split=0):
+        self.opt = opt;
+        self.split = split;
+
+    def Train(self):
+        envnet2 = models.EnvNet2(66650, 50);
+        model = envnet2.createModel();
+        print(model.summary());
+        exit();
+        trainGen, valGen = dataset.setup(self.opt, self.split);
+        loss = 'kullback_leibler_divergence'
+        optimizer = keras.optimizers.SGD(lr=self.opt.LR, decay=self.opt.weightDecay, momentum=self.opt.momentum, nesterov=True)
+
+        model.compile(loss=loss, optimizer=optimizer , metrics=['accuracy']);
+
+        # learning schedule callback
+        lrate = keras.callbacks.LearningRateScheduler(self.GetLR);
+        #best_model = keras.callbacks.ModelCheckpoint('best_model_fold-'+str(self.split )+'_epoch-{epoch:02d}_val_acc-{val_acc:.2f}.hdf5', monitor='val_acc', save_best_only=True, verbose=0);
+        best_model = keras.callbacks.ModelCheckpoint('Split-'+str(self.split )+'_best_model.hdf5', monitor='val_acc', save_best_only=True, verbose=0);
+        #early_stopping = keras.callbacks.EarlyStopping(monitor='loss', patience=100);
+        csv_logger = keras.callbacks.CSVLogger('aug-fold-'+str(self.split)+'-training.log');
+        custom_evaluator = CustomCallback(self.opt, trainGen, valGen);
+        #callbacks_list = [lrate, custom_evaluator, best_model, early_stopping, csv_logger];
+        callbacks_list = [lrate, custom_evaluator, best_model, csv_logger];
+        #callbacks_list = [lrate, custom_evaluator];
+
+        #My custom data generator
+        #model.fit_generator(trainGen, epochs=opt.nEpochs, steps_per_epoch=len(trainGen.data)//trainGen.batch_size, validation_data=valGen, validation_steps=math.ceil(len(valGen.data) / valGen.batch_size), callbacks=callbacks_list, verbose=1);
+        model.fit_generator(trainGen, epochs=self.opt.nEpochs, steps_per_epoch=len(trainGen.data)//trainGen.batch_size, callbacks=callbacks_list, verbose=0);
+        #model.fit_generator(trainGen, epochs=self.opt.nEpochs, steps_per_epoch=1, callbacks=callbacks_list, verbose=0);
+
+        #print(model.summary());
+    def GetLR(self, epoch):
+        divide_epoch = np.array([self.opt.nEpochs * i for i in self.opt.schedule]);
+        decay = sum(epoch > divide_epoch);
+        if epoch <= self.opt.warmup:
+            decay = 1;
+        return self.opt.LR * np.power(0.1, decay);
+
+class CustomCallback(keras.callbacks.Callback):
+    def __init__(self, opt, trainGen, valGen):
+        self.opt = opt;
+        self.train_gen = trainGen;
+        self.val_gen = valGen;
+        self.curEpoch = 0;
+        self.curLr = opt.LR;
+        self.start_time = time.time();
+        self.cur_epoch_start_time = time.time();
+        self.i = 'None';
+
+    def on_train_batch_begin(self, batch, logs=None):
+        '''
+        print('Training: batch {} begins at {}'.format(batch, time.time()));
+        '''
+    def on_train_batch_end(self, batch, logs=None):
+        elapsed_time = time.time() - self.start_time;
+        nTrain_batches = (len(self.train_gen.data) - 1) // self.opt.batchSize + 1;
+        progress = (nTrain_batches * (self.curEpoch - 1) + batch + 1) * 1.0 / (nTrain_batches * self.opt.nEpochs);
+        eta = elapsed_time / progress - elapsed_time;
+        line = '* Epoch: {}/{} ({}/{}) | Train: LR {} | Time: {} (ETA: {})'.format(
+            self.curEpoch, self.opt.nEpochs, batch+1, nTrain_batches, self.curLr, U.to_hms(elapsed_time), U.to_hms(eta));
+        sys.stderr.write('\r\033[K');
+        sys.stdout.write(line);
+        sys.stdout.flush();
+
+    def on_epoch_begin(self, epoch, logs=None):
+        self.curEpoch = epoch+1;
+        self.curLr = Trainer(self.opt).GetLR(epoch+1);
+        self.cur_epoch_start_time = time.time();
+
+    def on_epoch_end(self, epoch, logs=None):
+        val_acc, val_loss = self.validate(self.model);
+        logs['val_acc'] = val_acc;
+        logs['val_loss'] = val_loss;
+        time_taken = time.time() - self.cur_epoch_start_time;
+        sys.stderr.write('\r\033[K')
+        sys.stdout.write(
+            'Epoch: {}/{} | Time: {} | Train: LR {}  Loss {:.3f}%  Acc {:.3f}% | Val: Loss {:.3f}%  Acc(top1) {:.3f}%\n'.format(
+                epoch+1, self.opt.nEpochs, U.to_hms(time_taken), self.curLr, logs['loss'], logs['acc'], val_loss, val_acc));
+        sys.stdout.flush();
+
+    def validate(self, model):
+        y_pred = None;
+        y_target = None;
+        for batchIndex in range(math.ceil(len(self.val_gen.data) / self.val_gen.batch_size)):
+            testX, testY = self.val_gen.__getitem__(batchIndex);
+            scores = model.predict(testX, batch_size=len(testX), verbose=0);
+            y_pred = scores if y_pred is None else np.concatenate((y_pred, scores));
+            y_target = testY if y_target is None else np.concatenate((y_target, testY));
+            #break;
+
+        acc, loss = self.compute_accuracy(y_pred, y_target);
+        return acc, loss;
+
+    #Calculating average prediction (10 crops) and final accuracy
+    def compute_accuracy(self, y_pred, y_target):
+        #Reshape y_pred to shape it like each sample comtains 10 samples.
+        y_pred = y_pred.reshape(y_pred.shape[0]//self.opt.nCrops, self.opt.nCrops, y_pred.shape[1]);
+
+        #Calculate the average of class predictions for 10 crops of a sample
+        y_pred = np.mean(y_pred, axis=1);
+
+        #Get the indices that has highest average value for each sample
+        y_pred = y_pred.argmax(axis=1);
+
+        #Doing the samething for y_target
+        y_target = (y_target.reshape(y_target.shape[0]//self.opt.nCrops, self.opt.nCrops, y_target.shape[1])).mean(axis=1).argmax(axis=1);
+
+        accuracy = (y_pred==y_target).mean()*100;
+        loss = np.mean(y_target - y_pred);
+        return accuracy, loss;
@@ -0,0 +1,129 @@
+import numpy as np
+import random
+
+# Default data augmentation
+def padding(pad):
+    def f(sound):
+        return np.pad(sound, pad, 'constant')
+
+    return f
+
+
+def random_crop(size):
+    def f(sound):
+        org_size = len(sound)
+        start = random.randint(0, org_size - size)
+        return sound[start: start + size]
+
+    return f
+
+
+def normalize(factor):
+    def f(sound):
+        return sound / factor
+
+    return f
+
+
+# For strong data augmentation
+def random_scale(max_scale, interpolate='Linear'):
+    def f(sound):
+        scale = np.power(max_scale, random.uniform(-1, 1))
+        output_size = int(len(sound) * scale)
+        ref = np.arange(output_size) / scale
+        if interpolate == 'Linear':
+            ref1 = ref.astype(np.int32)
+            ref2 = np.minimum(ref1 + 1, len(sound) - 1)
+            r = ref - ref1
+            scaled_sound = sound[ref1] * (1 - r) + sound[ref2] * r
+        elif interpolate == 'Nearest':
+            scaled_sound = sound[ref.astype(np.int32)]
+        else:
+            raise Exception('Invalid interpolation mode {}'.format(interpolate))
+
+        return scaled_sound
+
+    return f
+
+
+def random_gain(db):
+    def f(sound):
+        return sound * np.power(10, random.uniform(-db, db) / 20.0)
+
+    return f
+
+
+# For testing phase
+def multi_crop(input_length, n_crops):
+    def f(sound):
+        stride = (len(sound) - input_length) // (n_crops - 1)
+        sounds = [sound[stride * i: stride * i + input_length] for i in range(n_crops)]
+        return np.array(sounds)
+
+    return f
+
+
+# For BC learning
+def a_weight(fs, n_fft, min_db=-80.0):
+    freq = np.linspace(0, fs // 2, n_fft // 2 + 1)
+    freq_sq = np.power(freq, 2)
+    freq_sq[0] = 1.0
+    weight = 2.0 + 20.0 * (2 * np.log10(12194) + 2 * np.log10(freq_sq)
+                           - np.log10(freq_sq + 12194 ** 2)
+                           - np.log10(freq_sq + 20.6 ** 2)
+                           - 0.5 * np.log10(freq_sq + 107.7 ** 2)
+                           - 0.5 * np.log10(freq_sq + 737.9 ** 2))
+    weight = np.maximum(weight, min_db)
+
+    return weight
+
+
+def compute_gain(sound, fs, min_db=-80.0, mode='A_weighting'):
+    if fs == 16000:
+        n_fft = 2048
+    elif fs == 44100:
+        n_fft = 4096
+    else:
+        raise Exception('Invalid fs {}'.format(fs))
+    stride = n_fft // 2
+
+    gain = []
+    #MOHAIMEN: no xrange anymore
+    for i in range(0, len(sound) - n_fft + 1, stride):
+        if mode == 'RMSE':
+            g = np.mean(sound[i: i + n_fft] ** 2)
+        elif mode == 'A_weighting':
+            spec = np.fft.rfft(np.hanning(n_fft + 1)[:-1] * sound[i: i + n_fft])
+            power_spec = np.abs(spec) ** 2
+            a_weighted_spec = power_spec * np.power(10, a_weight(fs, n_fft) / 10)
+            g = np.sum(a_weighted_spec)
+        else:
+            raise Exception('Invalid mode {}'.format(mode))
+        gain.append(g)
+
+    gain = np.array(gain)
+    gain = np.maximum(gain, np.power(10, min_db / 10))
+    gain_db = 10 * np.log10(gain)
+
+    return gain_db
+
+
+def mix(sound1, sound2, r, fs):
+    gain1 = np.max(compute_gain(sound1, fs))  # Decibel
+    gain2 = np.max(compute_gain(sound2, fs))
+    t = 1.0 / (1 + np.power(10, (gain1 - gain2) / 20.) * (1 - r) / r)
+    sound = ((sound1 * t + sound2 * (1 - t)) / np.sqrt(t ** 2 + (1 - t) ** 2))
+
+    return sound
+
+# Convert time representation
+def to_hms(time):
+    h = int(time // 3600)
+    m = int((time - h * 3600) // 60)
+    s = int(time - h * 3600 - m * 60)
+    if h > 0:
+        line = '{}h{:02d}m'.format(h, m)
+    else:
+        line = '{}m{:02d}s'.format(m, s)
+
+    return line