Tianming40
diff --git a/‎README.md
Lines changed: 2 additions & 0 deletions b/‎README.md
Lines changed: 2 additions & 0 deletions
diff --git a/‎env-s3prl-install.sh
Lines changed: 36 additions & 0 deletions b/‎env-s3prl-install.sh
Lines changed: 36 additions & 0 deletions
diff --git a/‎env-s3prl.sh
Lines changed: 15 additions & 0 deletions b/‎env-s3prl.sh
Lines changed: 15 additions & 0 deletions
diff --git a/‎project/10-asvspoof-vocoded-trn-ssl/00_demo.sh
Lines changed: 105 additions & 0 deletions b/‎project/10-asvspoof-vocoded-trn-ssl/00_demo.sh
Lines changed: 105 additions & 0 deletions
diff --git a/‎project/10-asvspoof-vocoded-trn-ssl/00_demo_pretrained.sh
Lines changed: 98 additions & 0 deletions b/‎project/10-asvspoof-vocoded-trn-ssl/00_demo_pretrained.sh
Lines changed: 98 additions & 0 deletions
diff --git a/‎project/10-asvspoof-vocoded-trn-ssl/01_download.sh
Lines changed: 65 additions & 0 deletions b/‎project/10-asvspoof-vocoded-trn-ssl/01_download.sh
Lines changed: 65 additions & 0 deletions
diff --git a/‎project/10-asvspoof-vocoded-trn-ssl/01_download_pretrained_cm.sh
Lines changed: 40 additions & 0 deletions b/‎project/10-asvspoof-vocoded-trn-ssl/01_download_pretrained_cm.sh
Lines changed: 40 additions & 0 deletions
@@ -16,6 +16,8 @@ git clone --depth 1 https://github.com/nii-yamagishilab/project-NN-Pytorch-scrip
 ```
 
 * Latest updates:
+   1. Code, databases, and resources for paper below were added. Please check [project/10-asvspoof-vocoded-trn-ssl/](project/10-asvspoof-vocoded-trn-ssl/)
+   > Xin Wang, and Junichi Yamagishi. Can Large-scale vocoded spoofed data improve speech spoofing countermeasure with a self-supervised front end?. Submitted
    1. Neural vocoders pretrained on VoxCeleb2 dev and other datasets are available in tutorial notebook **chapter_a3.ipynb** [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1xObWejhqcdSxFAjfWI7sudwPPMoCx-vA?usp=sharing)
    2. Code, databases, and resources for the paper below were added. Please check [project/09-asvspoof-vocoded-trn/](project/09-asvspoof-vocoded-trn/) for more details.
       > Xin Wang, and Junichi Yamagishi. Spoofed training data for speech spoofing countermeasure can be efficiently created using neural vocoders. Proc. ICASSP 2023, accepted. https://arxiv.org/abs/2210.10570
 
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Install dependency for s3prl
+
+# Name of the conda environment
+ENVNAME=s3prl-pip2
+
+eval "$(conda shell.bash hook)"
+conda activate ${ENVNAME}
+
+retVal=$?
+if [ $retVal -ne 0 ]; then
+    echo "Install conda environment ${ENVNAME}"
+    
+    # conda env
+    conda create -n ${ENVNAME} python=3.8 pip --yes
+    conda activate ${ENVNAME}
+
+    # install trorch
+    pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117
+    
+    # git clone s3prl
+    git clone https://github.com/s3prl/s3prl.git
+    
+    cd s3prl
+    #  checkout this specific commit. Latest commit does not work
+    git checkout 90d11f2faa6cc46f6d3c852604a9a80e09d18ab1
+    pip install -e .
+
+    # install scipy
+    conda install -c anaconda scipy=1.7.1 --yes
+    # install pandas
+    pip install pandas==1.4.3
+
+else
+    echo "Conda environment ${ENVNAME} has been installed"
+fi
@@ -0,0 +1,15 @@
+#!/bin/bash
+# if necessary, load conda environment
+eval "$(conda shell.bash hook)"
+
+conda activate s3prl-pip2
+retVal=$?
+if [ $retVal -ne 0 ]; then
+    echo "Cannot load s3prl-pip2"
+    exit 1
+fi
+
+# when running in ./projects/*/*, add this top directory
+# to python path
+export PYTHONPATH=$PWD/../../../:$PYTHONPATH
+
@@ -0,0 +1,105 @@
+#!/bin/bash
+########################
+# Demo script for training using vocoded data
+# 
+# Usage: bash 00_demo.sh PATH CONFIG RAND_SEED
+# where 
+#   PATH can be model-B1
+#        or other model-* folders
+#   CONFIG can be config_train_toyset_paired
+#       if you prepare other config, you can use them as well
+#   RAND_SEED can be 01, 02 or any other numbers
+# 
+# This script will 
+#  1. install pytorch env using conda 
+#  2. untar data set (toy_dataset)
+#  3. run training and scoring
+#
+# This script will use the data set in DATA/
+#
+# If GPU memory is less than 16GB, please reduce
+#   --batch-size in 01_train.sh
+# 
+########################
+RED='\033[0;32m'
+NC='\033[0m'
+
+PRJDIR=$1
+CONFIG=$2
+RAND_SEED=$3
+
+if [ "$#" -ne 3 ]; then
+    echo -e "Invalid input arguments. Please check the doc of script."
+    exit 1;
+fi
+
+###
+# Configurations
+###
+
+# we will use this toy data set for demonstration
+configs_name=${CONFIG}
+PRJDIR=${PRJDIR}/${CONFIG}
+
+# we will use scripts below to train and score 
+train_script=01_train.sh
+score_script=02_score.sh
+
+###
+# Configurations fixed
+##
+main_script=main.py
+
+condafile=$PWD/../../env-s3prl-install.sh
+envfile=$PWD/../../env-s3prl.sh
+trained_model=trained_network
+
+#####
+# check
+SUBDIR=${RAND_SEED}
+if [ ! -d ${PRJDIR}/${SUBDIR} ];
+then
+    mkdir -p ${PRJDIR}/${SUBDIR}
+fi
+
+#####
+# step 1 load environments
+echo -e "\n${RED}=======================================================${NC}"
+echo -e "${RED}Step1. Preparation: load environment and get toy data${NC}"
+# create conda environment
+bash ${condafile} || exit 1;
+
+# load env.sh. It must be loaded inside a sub-folder $PWD/../../../env.sh
+# otherwise, please follow env.sh and manually load the conda environment and 
+# add PYTHONPATH
+cd DATA 
+source ${envfile} || exit 1;
+cd ../
+
+#####
+# step 2 download 
+bash 01_download.sh
+
+#####
+# step 3 training
+echo -e "\n${RED}=======================================================${NC}"
+echo -e "${RED}Step3. run training process on toy set${NC}"
+
+com="bash ${train_script} ${RAND_SEED} ${configs_name} ${PRJDIR}/${SUBDIR}"
+echo ${com}
+eval ${com}
+
+#####
+# step 4 inference using trained model
+echo -e "\n${RED}=======================================================${NC}"
+echo -e "${RED}Step4. score the toy data set${NC}"
+
+trainedmodel=${trained_model}.pt
+if [ -e $PWD/${PRJDIR}/${SUBDIR}/${trainedmodel} ];
+then
+    com="bash ${score_script} $PWD/DATA/toy_example_vocoded/eval toy_eval_set 
+         $PWD/${PRJDIR}/${SUBDIR} $PWD/${PRJDIR}/${SUBDIR}/${trainedmodel}
+	 trained"
+    echo ${com}
+    eval ${com}
+fi
@@ -0,0 +1,98 @@
+#!/bin/bash
+########################
+# Demo script for scoring using pretrained models
+# 
+# Usage: bash 00_demo_pretrained.sh MODEL_DIR TESTSET_DIR TESTSET_NAME
+# where 
+#   MODEL_DIR can be $PWD/model-ID-7/trained-for-paper/01
+#        or other model-* folders
+#        It must be a path to the specific model with specific training 
+#        configuration folder and a specific random seed
+# 
+#   TESTSET_DIR is the path to the directory of the test set waveforms
+#        for example $PWD/DATA/toy_example_vocoded/eval 
+#
+#   TESTSET_NAME is the name of the test set
+#        it can be anything text string 
+#
+# This script will 
+#  1. install pytorch env using conda 
+#  2. download the toy data set for demonstration (if necessary)
+#  3. run scoring
+#
+# This script will use the data set in DATA/
+# 
+########################
+RED='\033[0;32m'
+NC='\033[0m'
+
+PRJDIR=$1
+
+
+if [ "$#" -ne 3 ]; then
+    TESTSET_DIR=$PWD/DATA/toy_example_vocoded/eval
+    TESTSET_NAME=toy_eval_set
+    echo -e "Use toy test set for demonstration."
+else
+    TESTSET_DIR=$2
+    TESTSET_NAME=$3
+fi
+
+###
+# Configurations
+###
+
+# we will use scripts below to train and score 
+score_script=02_score.sh
+
+###
+# Configurations fixed
+##
+main_script=main.py
+condafile=$PWD/../../env-s3prl-install.sh
+envfile=$PWD/../../env-s3prl.sh
+trained_model=trained_network
+
+#####
+# step 1
+echo -e "\n${RED}=======================================================${NC}"
+echo -e "${RED}Step1. Preparation: load environment and get toy data${NC}"
+# create conda environment
+bash ${condafile} || exit 1;
+
+# load env.sh. It must be loaded inside a sub-folder $PWD/../../../env.sh
+# otherwise, please follow env.sh and manually load the conda environment and 
+# add PYTHONPATH
+cd DATA 
+source ${envfile} || exit 1;
+cd ../
+
+#####
+# step 2 download 
+echo -e "\n${RED}=======================================================${NC}"
+echo -e "${RED}Step2. download pre-trained models${NC}"
+
+bash 01_download.sh
+bash 01_download_pretrained_cm.sh ${PRJDIR}
+
+#####
+# step 3 inference using trained model
+echo -e "\n${RED}=======================================================${NC}"
+echo -e "${RED}Step3. score the toy data set using models trained by Xin${NC}"
+
+trainedmodel=${trained_model}.pt
+if [ -e ${PRJDIR}/${trainedmodel} ];
+then
+    com="bash ${score_script} 
+    	      ${TESTSET_DIR}
+	      ${TESTSET_NAME}
+	      $PRJDIR
+	      $PRJDIR/${trainedmodel}
+	      trained"
+    echo ${com}
+    eval ${com}
+else
+    echo "Model not found ${PRJDIR}/${trainedmodel}"
+
+fi
+
@@ -0,0 +1,65 @@
+#!/bin/bash
+########################
+# Script to download files
+# 
+# Usage: bash download.sh
+# 
+# This will download a toy data set
+# and the SSL model from Fairseq
+########################
+RED='\033[0;32m'
+NC='\033[0m'
+
+##### 
+# Download toy data set
+link_set=https://zenodo.org/record/7315515/files/project-09-toy_example_vocoded.tar.gz
+set_name=project-09-toy_example_vocoded.tar.gz
+
+echo -e "\n${RED}=======================================================${NC}"
+
+cd DATA
+
+if [ ! -e ${set_name} ];
+then
+    echo -e "${RED}Download and untar the toy data set${NC}"
+    wget -q --show-progress ${link_set}
+else
+    echo -e "Use downloaded ${set_name}"
+fi
+
+if [ -e ${set_name} ];
+then
+    if [ ! -d toy_example ];
+    then
+	tar -xzf ${set_name}
+    fi
+else
+    echo -e "\nCannot download ${set_name}"
+fi
+
+cd ..
+
+#####
+# Download continually trained SSL model
+# pre-trained will be downloaded using 01_download_pretrainedcm.sh
+
+link_set=https://zenodo.org/record/8336949/files/wav2vec_ft2_vox_vocoded.pt
+set_name=wav2vec_ft2_vox_vocoded.pt
+
+cd SSL_pretrained
+
+echo -e "\n${RED}=======================================================${NC}"
+if [ ! -e ${set_name} ];
+then
+    echo -e "${RED}Download and untar the toy data set${NC}"
+    wget -q --show-progress ${link_set}
+else
+    echo -e "Use downloaded ${set_name}"
+fi
+
+if [ ! -e ${set_name} ];
+then
+    echo -e "\nCannot download ${set_name}"
+fi
+
+cd ..
@@ -0,0 +1,40 @@
+#!/bin/bash
+########################
+# Script to download files
+# 
+# Usage: bash download_pretrained.sh
+# 
+# This will download pretrained models
+########################
+RED='\033[0;32m'
+NC='\033[0m'
+
+
+MODELNAME=$1
+# if input $PWD/model-ID-P3/trained-for-paper-/01
+# get the name model-ID-P3
+MODELNAME=`echo ${MODELNAME} | awk -F '/' '{print $(NF-2)}'`
+##### 
+
+# Download trained models in the paper
+link_set=https://zenodo.org/record/8337778/files/project10-cm-ssl-${MODELNAME}.tar
+set_name=project10-cm-ssl-${MODELNAME}.tar
+
+
+echo -e "\n${RED}=======================================================${NC}"
+
+if [ ! -e ${set_name} ];
+then
+    echo -e "${RED}Download and untar the trained models${NC}"
+    wget -q --show-progress ${link_set}
+fi
+
+if [ -e ${set_name} ];
+then
+    if [ ! -d ${MODELNAME}/trained-for-paper ];
+    then
+	tar -xvf ${set_name}
+    fi
+else
+    echo -e "\nCannot download ${set_name}"
+fi