Add support for inverse transformation function for output; update README

TonyWangX · TonyWangX · commit 2f94e019d90e · 2023-10-07T09:33:49.000+09:00
diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@ git clone --depth 1 https://github.com/nii-yamagishilab/project-NN-Pytorch-scrip
 
 * Latest updates:
    1. Code, databases, and resources for paper below were added. Please check [project/10-asvspoof-vocoded-trn-ssl/](project/10-asvspoof-vocoded-trn-ssl/)
-   > Xin Wang, and Junichi Yamagishi. Can Large-scale vocoded spoofed data improve speech spoofing countermeasure with a self-supervised front end?. Submitted
+       > Xin Wang, and Junichi Yamagishi. Can Large-scale vocoded spoofed data improve speech spoofing countermeasure with a self-supervised front end?. Submitted
    1. Neural vocoders pretrained on VoxCeleb2 dev and other datasets are available in tutorial notebook **chapter_a3.ipynb** [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1xObWejhqcdSxFAjfWI7sudwPPMoCx-vA?usp=sharing)
    2. Code, databases, and resources for the paper below were added. Please check [project/09-asvspoof-vocoded-trn/](project/09-asvspoof-vocoded-trn/) for more details.
       > Xin Wang, and Junichi Yamagishi. Spoofed training data for speech spoofing countermeasure can be efficiently created using neural vocoders. Proc. ICASSP 2023, accepted. https://arxiv.org/abs/2210.10570
diff --git a/core_scripts/data_io/customize_dataset.py b/core_scripts/data_io/customize_dataset.py
@@ -254,7 +254,8 @@ def __init__(self,
                  dset_config = None,
                  input_augment_funcs = None,
                  output_augment_funcs = None,
-                 inoutput_augment_func = None):
+                 inoutput_augment_func = None,
+                 output_invtrans_funcs = None):
         """ Signature is similar to default_io.NIIDataSetLoader.
         file_list, input_dirs, and output_dirs are different.
         One additional optional argument is way_to_merge.
@@ -308,6 +309,9 @@ def __init__(self,
                                  default None
             inoutput_augment_func: a single data augmentation function
                                  default None
+            output_invtrans_funcs: list of functions for inverse transformation
+                      of output features during inference. d
+                      efault None
         Methods
         -------
             get_loader(): return a torch.util.data.DataLoader
@@ -353,9 +357,15 @@ def __init__(self,
         for sub_input_dirs, sub_output_dirs, sub_file_list, tmp_name in \
             zip(list_input_dirs, list_output_dirs, list_file_list, tmp_dnames):
             
+            # augmentation (transformation) function for input
             inaug = input_augment_funcs[cnt] if input_augment_funcs else None
+            
+            # augmentation (transformation) function for output 
             ouaug = output_augment_funcs[cnt] if output_augment_funcs else None
-
+            
+            # inverse transformation for output data during inference
+            oinf = output_invtrans_funcs[cnt] if output_invtrans_funcs else None
+            
             lst_dset.append(
                 nii_default_dset.NIIDataSetLoader(
                     tmp_name, sub_file_list, \
@@ -366,7 +376,7 @@ def __init__(self,
                     stats_path, data_format, params, truncate_seq, min_seq_len,\
                     save_mean_std, wav_samp_rate, flag_lang, \
                     global_arg, dset_config, inaug, ouaug,\
-                    inoutput_augment_func))
+                    inoutput_augment_func, oinf))
             cnt += 1
 
         # list of the datasets
diff --git a/core_scripts/data_io/default_data_io.py b/core_scripts/data_io/default_data_io.py
@@ -130,7 +130,8 @@ def __init__(self,
                  dset_config = None, \
                  input_augment_funcs = None, \
                  output_augment_funcs = None,
-                 inoutput_augment_func = None):
+                 inoutput_augment_func = None,
+                 output_invtrans_funcs = None):
         """
         args
         ----
@@ -171,6 +172,8 @@ def __init__(self,
                                 default None
           inoutput_augment_func: a single data augmentation function, 
                                 default None
+          output_invtrans_funcs: list of inverse transformations to process 
+                                 generated data during inference. default None
 
         """
         # initialization
@@ -250,6 +253,11 @@ def _tmp_f(list2, default_value, length):
         else:
             self.m_inouaug_func = None
 
+        if output_invtrans_funcs:
+            self.m_output_invtrans_funcs = output_invtrans_funcs
+        else:
+            self.m_output_invtrans_funcs = []
+
         # dimensions
         self.m_input_all_dim = sum(self.m_input_dims)
         self.m_output_all_dim = sum(self.m_output_dims)
@@ -1410,7 +1418,8 @@ def f_calculate_stats(self, flag_cal_data_len, flag_cal_mean_std):
         
     def f_putitem(self, output_data, save_dir, filename_prefix, data_infor_str):
         """ 
-        """
+        """ 
+
         # Change the dimension to (length, dim)
         if output_data.ndim == 3 and output_data.shape[0] == 1:
             # When input data is (batchsize=1, length, dim)
@@ -1423,6 +1432,15 @@ def f_putitem(self, output_data, save_dir, filename_prefix, data_infor_str):
             nii_warn.f_print("Format is not (batch, len, dim)", "error")
             nii_warn.f_die("Please use batch_size = 1 in generation")
 
+        ###
+        # before saving output data, we do (inverse) transformation
+        ###
+        if self.m_output_invtrans_funcs:
+            # apply all the output transformation on the generated data
+            output_tmp = [x(output_data) for x in self.m_output_invtrans_funcs]
+            # concatenate by the last dimension
+            output_data = np.concatenate(output_tmp, axis=-1)
+           
         # Save output
         if output_data.shape[1] != self.m_output_all_dim:
             nii_warn.f_print("Output data dim != expected dim", "error")
@@ -1604,7 +1622,8 @@ def __init__(self,
                  dset_config = None,
                  input_augment_funcs = None,
                  output_augment_funcs = None,
-                 inoutput_augment_func = None):
+                 inoutput_augment_func = None,
+                 output_invtrans_funcs = None):
         """
         NIIDataSetLoader(
                data_set_name,
@@ -1623,7 +1642,8 @@ def __init__(self,
                dset_config = None,
                input_augment_funcs = None,
                output_augment_funcs = None,
-               inoutput_augment_func = None):
+               inoutput_augment_func = None,
+               output_invtrans_funcs = None):
         Args
         ----
             data_set_name: a string to name this dataset
@@ -1670,6 +1690,9 @@ def __init__(self,
                       default None
             inoutput_augment_func: a single data augmentation function
                       default None
+            output_invtrans_funcs: list of functions for inverse transformation
+                      of output features during inference. d
+                      efault None
         Methods
         -------
             get_loader(): return a torch.util.data.DataLoader
@@ -1696,7 +1719,8 @@ def __init__(self,
                                     dset_config, \
                                     input_augment_funcs,
                                     output_augment_funcs,
-                                    inoutput_augment_func)
+                                    inoutput_augment_func,
+                                    output_invtrans_funcs)
         
         # create torch.util.data.DataLoader
         if params is None:
diff --git a/core_scripts/nn_manager/nn_manager.py b/core_scripts/nn_manager/nn_manager.py
@@ -577,7 +577,7 @@ def f_train_wrapper(args, pt_model, loss_wrapper, device, \
     return
 
 
-def f_inference_wrapper(args, pt_model, device, \
+def f_inference_wrapper(args, pt_model, device, 
                         test_dataset_wrapper, checkpoint):
     """ Wrapper for inference
     """
@@ -618,8 +618,6 @@ def f_inference_wrapper(args, pt_model, device, \
     nii_display.f_print("Generate minibatch indexed within [{:d},{:d})".format(
         range_genidx_start, range_genidx_end))
 
-
-
     # if a list of file to be processed is provided
     inf_datalist_path = args.inference_data_list
     if len(inf_datalist_path):
@@ -678,7 +676,6 @@ def f_inference_wrapper(args, pt_model, device, \
                 else:
                     pass
 
-
             # send data to device and convert data type
             if isinstance(data_in, torch.Tensor):
                 data_in = data_in.to(device, dtype=nii_dconf.d_dtype)
@@ -705,7 +702,6 @@ def f_inference_wrapper(args, pt_model, device, \
                 infer_func = pt_model.inference
             else:
                 infer_func = pt_model.forward
-
             
             if hasattr(args, 'trunc_input_length_for_inference') and \
                args.trunc_input_length_for_inference > 0:
@@ -774,7 +770,7 @@ def f_inference_wrapper(args, pt_model, device, \
                     mes = "Output data is not torch.tensor. Please check "
                     mes += "model.forward or model.inference"
                     nii_display.f_die(mes)
-                
+
                 # save output (in case batchsize > 1, )
                 for idx, seq_info in enumerate(data_info):
                     #nii_display.f_print(seq_info)
diff --git a/project/10-asvspoof-vocoded-trn-ssl/00_demo.sh b/project/10-asvspoof-vocoded-trn-ssl/00_demo.sh
@@ -4,7 +4,7 @@
 # 
 # Usage: bash 00_demo.sh PATH CONFIG RAND_SEED
 # where 
-#   PATH can be model-B1
+#   PATH can be model-ID-B1
 #        or other model-* folders
 #   CONFIG can be config_train_toyset_paired
 #       if you prepare other config, you can use them as well
diff --git a/tutorials/b2_anti_spoofing/README.md b/tutorials/b2_anti_spoofing/README.md
@@ -2,11 +2,13 @@
 
 Here are the tutorial notebooks on speech anti-spoofing.
 
-It is unfinished, but the following notebooks are available:
+Chapter 1 and 2 were the materials used in the Interspeech 2023 tutorial.
 
 
 | Chapter | Contents |
 | --- | :-- |
+| chapter 1  Toy example | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/19amdsNeLZ0O6Ch-jDuIm9Z4pXeO_rF40?usp=sharing) Please check First session
+| chapter 2  Graph attention network CM | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/19amdsNeLZ0O6Ch-jDuIm9Z4pXeO_rF40?usp=sharing) Please check Second session
 | [chapter_a0_EER](./chapter_a0_EER.ipynb) | Detailed tutorial on equal error rate (EER)
 | [chapter_a1_stats_test](./chapter_a1_stats_test.ipynb) |  Detailed tutorial on statistical analysis on EER
 

Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,7 @@`
`4`	`4`	`#`
`5`	`5`	`# Usage: bash 00_demo.sh PATH CONFIG RAND_SEED`
`6`	`6`	`# where`
`7`		`-# PATH can be model-B1`
	`7`	`+# PATH can be model-ID-B1`
`8`	`8`	`# or other model-* folders`
`9`	`9`	`# CONFIG can be config_train_toyset_paired`
`10`	`10`	`# if you prepare other config, you can use them as well`