Skip to content

Commit 2f94e01

Browse files
committed
Add support for inverse transformation function for output; update README
1 parent 0f062da commit 2f94e01

File tree

6 files changed

+49
-17
lines changed

6 files changed

+49
-17
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ git clone --depth 1 https://github.com/nii-yamagishilab/project-NN-Pytorch-scrip
1717

1818
* Latest updates:
1919
1. Code, databases, and resources for paper below were added. Please check [project/10-asvspoof-vocoded-trn-ssl/](project/10-asvspoof-vocoded-trn-ssl/)
20-
> Xin Wang, and Junichi Yamagishi. Can Large-scale vocoded spoofed data improve speech spoofing countermeasure with a self-supervised front end?. Submitted
20+
> Xin Wang, and Junichi Yamagishi. Can Large-scale vocoded spoofed data improve speech spoofing countermeasure with a self-supervised front end?. Submitted
2121
1. Neural vocoders pretrained on VoxCeleb2 dev and other datasets are available in tutorial notebook **chapter_a3.ipynb** [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1xObWejhqcdSxFAjfWI7sudwPPMoCx-vA?usp=sharing)
2222
2. Code, databases, and resources for the paper below were added. Please check [project/09-asvspoof-vocoded-trn/](project/09-asvspoof-vocoded-trn/) for more details.
2323
> Xin Wang, and Junichi Yamagishi. Spoofed training data for speech spoofing countermeasure can be efficiently created using neural vocoders. Proc. ICASSP 2023, accepted. https://arxiv.org/abs/2210.10570

core_scripts/data_io/customize_dataset.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,8 @@ def __init__(self,
254254
dset_config = None,
255255
input_augment_funcs = None,
256256
output_augment_funcs = None,
257-
inoutput_augment_func = None):
257+
inoutput_augment_func = None,
258+
output_invtrans_funcs = None):
258259
""" Signature is similar to default_io.NIIDataSetLoader.
259260
file_list, input_dirs, and output_dirs are different.
260261
One additional optional argument is way_to_merge.
@@ -308,6 +309,9 @@ def __init__(self,
308309
default None
309310
inoutput_augment_func: a single data augmentation function
310311
default None
312+
output_invtrans_funcs: list of functions for inverse transformation
313+
of output features during inference. d
314+
efault None
311315
Methods
312316
-------
313317
get_loader(): return a torch.util.data.DataLoader
@@ -353,9 +357,15 @@ def __init__(self,
353357
for sub_input_dirs, sub_output_dirs, sub_file_list, tmp_name in \
354358
zip(list_input_dirs, list_output_dirs, list_file_list, tmp_dnames):
355359

360+
# augmentation (transformation) function for input
356361
inaug = input_augment_funcs[cnt] if input_augment_funcs else None
362+
363+
# augmentation (transformation) function for output
357364
ouaug = output_augment_funcs[cnt] if output_augment_funcs else None
358-
365+
366+
# inverse transformation for output data during inference
367+
oinf = output_invtrans_funcs[cnt] if output_invtrans_funcs else None
368+
359369
lst_dset.append(
360370
nii_default_dset.NIIDataSetLoader(
361371
tmp_name, sub_file_list, \
@@ -366,7 +376,7 @@ def __init__(self,
366376
stats_path, data_format, params, truncate_seq, min_seq_len,\
367377
save_mean_std, wav_samp_rate, flag_lang, \
368378
global_arg, dset_config, inaug, ouaug,\
369-
inoutput_augment_func))
379+
inoutput_augment_func, oinf))
370380
cnt += 1
371381

372382
# list of the datasets

core_scripts/data_io/default_data_io.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,8 @@ def __init__(self,
130130
dset_config = None, \
131131
input_augment_funcs = None, \
132132
output_augment_funcs = None,
133-
inoutput_augment_func = None):
133+
inoutput_augment_func = None,
134+
output_invtrans_funcs = None):
134135
"""
135136
args
136137
----
@@ -171,6 +172,8 @@ def __init__(self,
171172
default None
172173
inoutput_augment_func: a single data augmentation function,
173174
default None
175+
output_invtrans_funcs: list of inverse transformations to process
176+
generated data during inference. default None
174177
175178
"""
176179
# initialization
@@ -250,6 +253,11 @@ def _tmp_f(list2, default_value, length):
250253
else:
251254
self.m_inouaug_func = None
252255

256+
if output_invtrans_funcs:
257+
self.m_output_invtrans_funcs = output_invtrans_funcs
258+
else:
259+
self.m_output_invtrans_funcs = []
260+
253261
# dimensions
254262
self.m_input_all_dim = sum(self.m_input_dims)
255263
self.m_output_all_dim = sum(self.m_output_dims)
@@ -1410,7 +1418,8 @@ def f_calculate_stats(self, flag_cal_data_len, flag_cal_mean_std):
14101418

14111419
def f_putitem(self, output_data, save_dir, filename_prefix, data_infor_str):
14121420
"""
1413-
"""
1421+
"""
1422+
14141423
# Change the dimension to (length, dim)
14151424
if output_data.ndim == 3 and output_data.shape[0] == 1:
14161425
# When input data is (batchsize=1, length, dim)
@@ -1423,6 +1432,15 @@ def f_putitem(self, output_data, save_dir, filename_prefix, data_infor_str):
14231432
nii_warn.f_print("Format is not (batch, len, dim)", "error")
14241433
nii_warn.f_die("Please use batch_size = 1 in generation")
14251434

1435+
###
1436+
# before saving output data, we do (inverse) transformation
1437+
###
1438+
if self.m_output_invtrans_funcs:
1439+
# apply all the output transformation on the generated data
1440+
output_tmp = [x(output_data) for x in self.m_output_invtrans_funcs]
1441+
# concatenate by the last dimension
1442+
output_data = np.concatenate(output_tmp, axis=-1)
1443+
14261444
# Save output
14271445
if output_data.shape[1] != self.m_output_all_dim:
14281446
nii_warn.f_print("Output data dim != expected dim", "error")
@@ -1604,7 +1622,8 @@ def __init__(self,
16041622
dset_config = None,
16051623
input_augment_funcs = None,
16061624
output_augment_funcs = None,
1607-
inoutput_augment_func = None):
1625+
inoutput_augment_func = None,
1626+
output_invtrans_funcs = None):
16081627
"""
16091628
NIIDataSetLoader(
16101629
data_set_name,
@@ -1623,7 +1642,8 @@ def __init__(self,
16231642
dset_config = None,
16241643
input_augment_funcs = None,
16251644
output_augment_funcs = None,
1626-
inoutput_augment_func = None):
1645+
inoutput_augment_func = None,
1646+
output_invtrans_funcs = None):
16271647
Args
16281648
----
16291649
data_set_name: a string to name this dataset
@@ -1670,6 +1690,9 @@ def __init__(self,
16701690
default None
16711691
inoutput_augment_func: a single data augmentation function
16721692
default None
1693+
output_invtrans_funcs: list of functions for inverse transformation
1694+
of output features during inference. d
1695+
efault None
16731696
Methods
16741697
-------
16751698
get_loader(): return a torch.util.data.DataLoader
@@ -1696,7 +1719,8 @@ def __init__(self,
16961719
dset_config, \
16971720
input_augment_funcs,
16981721
output_augment_funcs,
1699-
inoutput_augment_func)
1722+
inoutput_augment_func,
1723+
output_invtrans_funcs)
17001724

17011725
# create torch.util.data.DataLoader
17021726
if params is None:

core_scripts/nn_manager/nn_manager.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,7 @@ def f_train_wrapper(args, pt_model, loss_wrapper, device, \
577577
return
578578

579579

580-
def f_inference_wrapper(args, pt_model, device, \
580+
def f_inference_wrapper(args, pt_model, device,
581581
test_dataset_wrapper, checkpoint):
582582
""" Wrapper for inference
583583
"""
@@ -618,8 +618,6 @@ def f_inference_wrapper(args, pt_model, device, \
618618
nii_display.f_print("Generate minibatch indexed within [{:d},{:d})".format(
619619
range_genidx_start, range_genidx_end))
620620

621-
622-
623621
# if a list of file to be processed is provided
624622
inf_datalist_path = args.inference_data_list
625623
if len(inf_datalist_path):
@@ -678,7 +676,6 @@ def f_inference_wrapper(args, pt_model, device, \
678676
else:
679677
pass
680678

681-
682679
# send data to device and convert data type
683680
if isinstance(data_in, torch.Tensor):
684681
data_in = data_in.to(device, dtype=nii_dconf.d_dtype)
@@ -705,7 +702,6 @@ def f_inference_wrapper(args, pt_model, device, \
705702
infer_func = pt_model.inference
706703
else:
707704
infer_func = pt_model.forward
708-
709705

710706
if hasattr(args, 'trunc_input_length_for_inference') and \
711707
args.trunc_input_length_for_inference > 0:
@@ -774,7 +770,7 @@ def f_inference_wrapper(args, pt_model, device, \
774770
mes = "Output data is not torch.tensor. Please check "
775771
mes += "model.forward or model.inference"
776772
nii_display.f_die(mes)
777-
773+
778774
# save output (in case batchsize > 1, )
779775
for idx, seq_info in enumerate(data_info):
780776
#nii_display.f_print(seq_info)

project/10-asvspoof-vocoded-trn-ssl/00_demo.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#
55
# Usage: bash 00_demo.sh PATH CONFIG RAND_SEED
66
# where
7-
# PATH can be model-B1
7+
# PATH can be model-ID-B1
88
# or other model-* folders
99
# CONFIG can be config_train_toyset_paired
1010
# if you prepare other config, you can use them as well

tutorials/b2_anti_spoofing/README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22

33
Here are the tutorial notebooks on speech anti-spoofing.
44

5-
It is unfinished, but the following notebooks are available:
5+
Chapter 1 and 2 were the materials used in the Interspeech 2023 tutorial.
66

77

88
| Chapter | Contents |
99
| --- | :-- |
10+
| chapter 1 Toy example | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/19amdsNeLZ0O6Ch-jDuIm9Z4pXeO_rF40?usp=sharing) Please check First session
11+
| chapter 2 Graph attention network CM | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/19amdsNeLZ0O6Ch-jDuIm9Z4pXeO_rF40?usp=sharing) Please check Second session
1012
| [chapter_a0_EER](./chapter_a0_EER.ipynb) | Detailed tutorial on equal error rate (EER)
1113
| [chapter_a1_stats_test](./chapter_a1_stats_test.ipynb) | Detailed tutorial on statistical analysis on EER
1214

0 commit comments

Comments
 (0)