@@ -243,127 +243,128 @@ def test_batch_norm_vars():
243
243
244
244
def test_batch_norm ():
245
245
with make_scope () as session :
246
- import numpy as np
247
- net = TFNetwork (extern_data = ExternData ())
248
- net .train_flag = True
246
+ net = TFNetwork (extern_data = ExternData (), train_flag = True )
249
247
with tf_compat .v1 .variable_scope ("src_nchw" ):
250
- src_nhwc = InternalLayer (name = "src_nchw" , network = net , out_type = {"dim" : 16 ,
251
- "shape" : (None , 16 , 16 ),
252
- "batch_dim_axis" : 0 ,
253
- "time_dim_axis" : 1 ,
254
- "feature_dim_axis" : 3 ,
255
- "sparse" : False
256
- })
248
+ src_nhwc = InternalLayer (
249
+ name = "src_nchw" , network = net ,
250
+ out_type = {
251
+ "dim" : 16 ,
252
+ "shape" : (None , 16 , 16 ),
253
+ "batch_dim_axis" : 0 ,
254
+ "time_dim_axis" : 1 ,
255
+ "feature_dim_axis" : 3 ,
256
+ "sparse" : False })
257
257
src_nhwc .output .placeholder = tf_compat .v1 .placeholder (shape = (None , None , 16 , 16 ), dtype = tf .float32 )
258
258
src_nhwc .output .size_placeholder = {0 : tf_compat .v1 .placeholder (shape = (None ,), dtype = tf .int32 )}
259
259
260
- rnd = np .random .RandomState (42 )
261
- mean = tf .constant (rnd .rand (1 , 1 , 1 , 16 ), name = "rand_mean" , dtype = tf .float32 )
262
- variance = tf .constant (rnd .rand (1 , 1 , 1 , 16 ), name = "rand_var" , dtype = tf .float32 )
260
+ rnd = numpy .random .RandomState (42 )
263
261
input_data = rnd .rand (10 , 11 , 16 , 16 )
264
- seq_lens = np .array ([11 , 11 , 11 , 11 , 11 , 11 , 11 , 11 , 11 , 11 ] )
262
+ seq_lens = numpy .array ([11 ] * 10 )
265
263
266
264
with tf_compat .v1 .variable_scope ("batch_norm_masked_nchw" ):
267
- batch_norm_1 = BatchNormLayer (name = "batch_norm_masked_nchw" , network = net , masked_time = True ,
268
- sample_mean = mean , sample_variance = variance ,
269
- sources = [src_nhwc ],
270
- output = BatchNormLayer .get_out_data_from_opts (name = "batch_norm_masked_nchw" ,
271
- sources = [src_nhwc ],
272
- network = net ))
265
+ batch_norm_1 = BatchNormLayer (
266
+ name = "batch_norm_masked_nchw" , network = net , masked_time = True ,
267
+ sources = [src_nhwc ],
268
+ output = BatchNormLayer .get_out_data_from_opts (
269
+ name = "batch_norm_masked_nchw" ,
270
+ sources = [src_nhwc ],
271
+ network = net ))
273
272
batch_norm_1 .post_init (layer_desc = None )
274
273
with tf_compat .v1 .variable_scope ("batch_norm_nonmasked_nchw" ):
275
- batch_norm_2 = BatchNormLayer (name = "batch_norm_nonmasked_nchw" , network = net , masked_time = False ,
276
- sample_mean = mean , sample_variance = variance ,
277
- sources = [src_nhwc ],
278
- output = BatchNormLayer .get_out_data_from_opts (name = "batch_norm_nonmasked_nchw" ,
279
- sources = [src_nhwc ],
280
- network = net ))
274
+ batch_norm_2 = BatchNormLayer (
275
+ name = "batch_norm_nonmasked_nchw" , network = net , masked_time = False ,
276
+ sources = [src_nhwc ],
277
+ output = BatchNormLayer .get_out_data_from_opts (
278
+ name = "batch_norm_nonmasked_nchw" ,
279
+ sources = [src_nhwc ],
280
+ network = net ))
281
281
batch_norm_2 .post_init (layer_desc = None )
282
- tf_compat .v1 .global_variables_initializer ().run ()
283
- out_1 , seq_lens_1 = session .run ([ batch_norm_1 . output . placeholder ,
284
- batch_norm_1 .output .size_placeholder [0 ]],
285
- feed_dict = {src_nhwc . output . placeholder : input_data ,
286
- src_nhwc .output .size_placeholder [ 0 ]: seq_lens }
287
- )
288
- out_2 , seq_lens_2 = session .run ([ batch_norm_2 . output . placeholder ,
289
- batch_norm_2 .output .size_placeholder [0 ]],
290
- feed_dict = {src_nhwc . output . placeholder : input_data ,
291
- src_nhwc .output .size_placeholder [ 0 ]: seq_lens }
292
- )
293
- assert np .array_equal (out_1 , out_2 )
294
- print (np .sum (out_1 - out_2 ))
282
+ tf_compat .v1 .global_variables_initializer ().run (session = session )
283
+ out_1 , seq_lens_1 = session .run (
284
+ [ batch_norm_1 . output . placeholder , batch_norm_1 .output .size_placeholder [0 ]],
285
+ feed_dict = {
286
+ src_nhwc .output .placeholder : input_data ,
287
+ src_nhwc . output . size_placeholder [ 0 ]: seq_lens } )
288
+ out_2 , seq_lens_2 = session .run (
289
+ [ batch_norm_2 . output . placeholder , batch_norm_2 .output .size_placeholder [0 ]],
290
+ feed_dict = {
291
+ src_nhwc .output .placeholder : input_data ,
292
+ src_nhwc . output . size_placeholder [ 0 ]: seq_lens } )
293
+ assert numpy .array_equal (out_1 , out_2 )
294
+ print (numpy .sum (out_1 - out_2 ))
295
295
296
296
297
297
def test_batch_norm_unequal_seq_len ():
298
298
with make_scope () as session :
299
- import numpy as np
300
- import numpy .testing as npt
301
- net = TFNetwork (extern_data = ExternData ())
302
- net .train_flag = True
299
+ net = TFNetwork (extern_data = ExternData (), train_flag = True )
303
300
with tf_compat .v1 .variable_scope ("src_nhwc" ):
304
- src_nhwc = InternalLayer (name = "src_nhwc" , network = net , out_type = {"dim" : 16 ,
305
- "shape" : (None , 16 , 16 ),
306
- "batch_dim_axis" : 0 ,
307
- "time_dim_axis" : 1 ,
308
- "feature_dim_axis" : 3 ,
309
- "sparse" : False
310
- })
301
+ src_nhwc = InternalLayer (
302
+ name = "src_nhwc" , network = net ,
303
+ out_type = {
304
+ "dim" : 16 ,
305
+ "shape" : (None , 16 , 16 ),
306
+ "batch_dim_axis" : 0 ,
307
+ "time_dim_axis" : 1 ,
308
+ "feature_dim_axis" : 3 ,
309
+ "sparse" : False })
311
310
src_nhwc .output .placeholder = tf_compat .v1 .placeholder (shape = (None , None , 16 , 16 ), dtype = tf .float32 )
312
311
src_nhwc .output .size_placeholder = {0 : tf_compat .v1 .placeholder (shape = (None ,), dtype = tf .int32 )}
313
312
314
- rnd = np .random .RandomState (42 )
315
- mean = tf .constant (rnd .rand (1 , 1 , 1 , 16 ), name = "rand_mean" , dtype = tf .float32 )
316
- variance = tf .constant (rnd .rand (1 , 1 , 1 , 16 ), name = "rand_var" , dtype = tf .float32 )
313
+ rnd = numpy .random .RandomState (42 )
317
314
input_data = rnd .rand (10 , 11 , 16 , 16 ).astype ('f' )
318
315
input_data [2 , 5 :, :, :] = 0
319
- data_mean = np .mean (input_data , axis = (0 , 1 , 2 ), keepdims = True , dtype = np .float32 )
320
- data_var = np .var (input_data , axis = (0 , 1 , 2 ), keepdims = True , dtype = np .float32 )
321
- input_data_masked = np .copy (input_data )
322
- seq_lens = np .array ([11 , 11 , 5 , 11 , 11 , 11 , 11 , 11 , 11 , 11 ], dtype = np .float32 )
316
+ input_data_masked = numpy .copy (input_data )
317
+ seq_lens = numpy .array ([11 , 11 , 5 , 11 , 11 , 11 , 11 , 11 , 11 , 11 ], dtype = numpy .float32 )
323
318
n1 = 9 * 11 * 16 + 5 * 16
324
319
n2 = 10 * 11 * 16
325
320
326
321
with tf_compat .v1 .variable_scope ("batch_norm_masked_nchw" ):
327
- batch_norm_1 = BatchNormLayer (name = "batch_norm_masked_nchw" , network = net , masked_time = True ,
328
- sample_mean = mean , sample_variance = variance ,
329
- use_shift = False , use_std = False , epsilon = 0.0 ,
330
- sources = [src_nhwc ],
331
- output = BatchNormLayer .get_out_data_from_opts (name = "batch_norm_masked_nchw" ,
332
- sources = [src_nhwc ],
333
- network = net ))
322
+ batch_norm_1 = BatchNormLayer (
323
+ name = "batch_norm_masked_nchw" , network = net , masked_time = True ,
324
+ use_shift = False , use_std = False , epsilon = 0.0 ,
325
+ sources = [src_nhwc ],
326
+ output = BatchNormLayer .get_out_data_from_opts (
327
+ name = "batch_norm_masked_nchw" ,
328
+ sources = [src_nhwc ],
329
+ network = net ))
334
330
batch_norm_1 .post_init (layer_desc = None )
335
331
with tf_compat .v1 .variable_scope ("batch_norm_nonmasked_nchw" ):
336
- batch_norm_2 = BatchNormLayer (name = "batch_norm_nonmasked_nchw" , network = net , masked_time = False ,
337
- sample_mean = mean , sample_variance = variance ,
338
- use_shift = False , use_std = False , epsilon = 0 ,
339
- sources = [src_nhwc ],
340
- output = BatchNormLayer .get_out_data_from_opts (name = "batch_norm_nonmasked_nchw" ,
341
- sources = [src_nhwc ],
342
- network = net ))
332
+ batch_norm_2 = BatchNormLayer (
333
+ name = "batch_norm_nonmasked_nchw" , network = net , masked_time = False ,
334
+ use_shift = False , use_std = False , epsilon = 0 ,
335
+ sources = [src_nhwc ],
336
+ output = BatchNormLayer .get_out_data_from_opts (
337
+ name = "batch_norm_nonmasked_nchw" ,
338
+ sources = [src_nhwc ],
339
+ network = net ))
343
340
batch_norm_2 .post_init (layer_desc = None )
344
- tf_compat .v1 .global_variables_initializer ().run ()
345
- out_1 , seq_lens_1 = session .run ([batch_norm_1 .output .placeholder ,
346
- batch_norm_1 .output .size_placeholder [0 ]],
347
- feed_dict = {src_nhwc .output .placeholder : input_data ,
348
- src_nhwc .output .size_placeholder [0 ]: seq_lens }
349
- )
350
- out_2 , seq_lens_2 = session .run ([batch_norm_2 .output .placeholder ,
351
- batch_norm_2 .output .size_placeholder [0 ]],
352
- feed_dict = {src_nhwc .output .placeholder : input_data_masked ,
353
- src_nhwc .output .size_placeholder [0 ]: seq_lens }
354
- )
341
+ tf_compat .v1 .global_variables_initializer ().run (session = session )
342
+ out_1 , seq_lens_1 = session .run (
343
+ [batch_norm_1 .output .placeholder , batch_norm_1 .output .size_placeholder [0 ]],
344
+ feed_dict = {
345
+ src_nhwc .output .placeholder : input_data ,
346
+ src_nhwc .output .size_placeholder [0 ]: seq_lens })
347
+ out_2 , seq_lens_2 = session .run (
348
+ [batch_norm_2 .output .placeholder , batch_norm_2 .output .size_placeholder [0 ]],
349
+ feed_dict = {
350
+ src_nhwc .output .placeholder : input_data_masked ,
351
+ src_nhwc .output .size_placeholder [0 ]: seq_lens })
352
+
355
353
# Manually calculating batch_norm and compare to the tf output
356
- np_bn2 = (input_data - data_mean ) * (1.0 / np .sqrt (data_var ))
357
- npt .assert_array_almost_equal (np_bn2 , out_2 , decimal = 5 )
354
+ data_mean = numpy .mean (input_data , axis = (0 , 1 , 2 ), keepdims = True , dtype = numpy .float32 )
355
+ data_var = numpy .var (input_data , axis = (0 , 1 , 2 ), keepdims = True , dtype = numpy .float32 )
356
+ np_bn2 = (input_data - data_mean ) * (1.0 / numpy .sqrt (data_var ))
357
+ numpy .testing .assert_array_almost_equal (np_bn2 , out_2 , decimal = 5 )
358
358
# Manually calculating batch_norm with different seq_lens, having:
359
359
# Mean_1 = n2 / n1 * Mean_2
360
360
# Var_1 = n2 / n1 * (Var_2 + Mean_2 ^ 2 (1 - n2 / n1))
361
361
# bn_1 = (x - Mean_1) * 1 / sqrt(Var_1)
362
362
# Substituting Mean_1 and Var_1:
363
- np_bn1 = (input_data - n2 / n1 * data_mean ) * \
364
- (1.0 / np .sqrt (n2 / n1 * (data_var + data_mean ** 2 * (1 - n2 / n1 ))))
363
+ np_bn1 = (
364
+ (input_data - n2 / n1 * data_mean ) *
365
+ (1.0 / numpy .sqrt (n2 / n1 * (data_var + data_mean ** 2 * (1 - n2 / n1 )))))
365
366
# Check with tf output.
366
- npt .assert_array_almost_equal (np_bn1 , out_1 , decimal = 5 )
367
+ numpy . testing .assert_array_almost_equal (np_bn1 , out_1 , decimal = 5 )
367
368
368
369
369
370
def test_activation_layer_net_construct ():
0 commit comments