Skip to content

Commit 0f062da

Browse files
committed
project/10
1 parent 2ea46a9 commit 0f062da

28 files changed

+7122
-0
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ git clone --depth 1 https://github.com/nii-yamagishilab/project-NN-Pytorch-scrip
1616
```
1717

1818
* Latest updates:
19+
1. Code, databases, and resources for paper below were added. Please check [project/10-asvspoof-vocoded-trn-ssl/](project/10-asvspoof-vocoded-trn-ssl/)
20+
> Xin Wang, and Junichi Yamagishi. Can Large-scale vocoded spoofed data improve speech spoofing countermeasure with a self-supervised front end?. Submitted
1921
1. Neural vocoders pretrained on VoxCeleb2 dev and other datasets are available in tutorial notebook **chapter_a3.ipynb** [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1xObWejhqcdSxFAjfWI7sudwPPMoCx-vA?usp=sharing)
2022
2. Code, databases, and resources for the paper below were added. Please check [project/09-asvspoof-vocoded-trn/](project/09-asvspoof-vocoded-trn/) for more details.
2123
> Xin Wang, and Junichi Yamagishi. Spoofed training data for speech spoofing countermeasure can be efficiently created using neural vocoders. Proc. ICASSP 2023, accepted. https://arxiv.org/abs/2210.10570

env-s3prl-install.sh

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/bin/bash
2+
# Install dependency for s3prl
3+
4+
# Name of the conda environment
5+
ENVNAME=s3prl-pip2
6+
7+
eval "$(conda shell.bash hook)"
8+
conda activate ${ENVNAME}
9+
10+
retVal=$?
11+
if [ $retVal -ne 0 ]; then
12+
echo "Install conda environment ${ENVNAME}"
13+
14+
# conda env
15+
conda create -n ${ENVNAME} python=3.8 pip --yes
16+
conda activate ${ENVNAME}
17+
18+
# install trorch
19+
pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117
20+
21+
# git clone s3prl
22+
git clone https://github.com/s3prl/s3prl.git
23+
24+
cd s3prl
25+
# checkout this specific commit. Latest commit does not work
26+
git checkout 90d11f2faa6cc46f6d3c852604a9a80e09d18ab1
27+
pip install -e .
28+
29+
# install scipy
30+
conda install -c anaconda scipy=1.7.1 --yes
31+
# install pandas
32+
pip install pandas==1.4.3
33+
34+
else
35+
echo "Conda environment ${ENVNAME} has been installed"
36+
fi

env-s3prl.sh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/bin/bash
2+
# if necessary, load conda environment
3+
eval "$(conda shell.bash hook)"
4+
5+
conda activate s3prl-pip2
6+
retVal=$?
7+
if [ $retVal -ne 0 ]; then
8+
echo "Cannot load s3prl-pip2"
9+
exit 1
10+
fi
11+
12+
# when running in ./projects/*/*, add this top directory
13+
# to python path
14+
export PYTHONPATH=$PWD/../../../:$PYTHONPATH
15+
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
#!/bin/bash
2+
########################
3+
# Demo script for training using vocoded data
4+
#
5+
# Usage: bash 00_demo.sh PATH CONFIG RAND_SEED
6+
# where
7+
# PATH can be model-B1
8+
# or other model-* folders
9+
# CONFIG can be config_train_toyset_paired
10+
# if you prepare other config, you can use them as well
11+
# RAND_SEED can be 01, 02 or any other numbers
12+
#
13+
# This script will
14+
# 1. install pytorch env using conda
15+
# 2. untar data set (toy_dataset)
16+
# 3. run training and scoring
17+
#
18+
# This script will use the data set in DATA/
19+
#
20+
# If GPU memory is less than 16GB, please reduce
21+
# --batch-size in 01_train.sh
22+
#
23+
########################
24+
RED='\033[0;32m'
25+
NC='\033[0m'
26+
27+
PRJDIR=$1
28+
CONFIG=$2
29+
RAND_SEED=$3
30+
31+
if [ "$#" -ne 3 ]; then
32+
echo -e "Invalid input arguments. Please check the doc of script."
33+
exit 1;
34+
fi
35+
36+
###
37+
# Configurations
38+
###
39+
40+
# we will use this toy data set for demonstration
41+
configs_name=${CONFIG}
42+
PRJDIR=${PRJDIR}/${CONFIG}
43+
44+
# we will use scripts below to train and score
45+
train_script=01_train.sh
46+
score_script=02_score.sh
47+
48+
###
49+
# Configurations fixed
50+
##
51+
main_script=main.py
52+
53+
condafile=$PWD/../../env-s3prl-install.sh
54+
envfile=$PWD/../../env-s3prl.sh
55+
trained_model=trained_network
56+
57+
#####
58+
# check
59+
SUBDIR=${RAND_SEED}
60+
if [ ! -d ${PRJDIR}/${SUBDIR} ];
61+
then
62+
mkdir -p ${PRJDIR}/${SUBDIR}
63+
fi
64+
65+
#####
66+
# step 1 load environments
67+
echo -e "\n${RED}=======================================================${NC}"
68+
echo -e "${RED}Step1. Preparation: load environment and get toy data${NC}"
69+
# create conda environment
70+
bash ${condafile} || exit 1;
71+
72+
# load env.sh. It must be loaded inside a sub-folder $PWD/../../../env.sh
73+
# otherwise, please follow env.sh and manually load the conda environment and
74+
# add PYTHONPATH
75+
cd DATA
76+
source ${envfile} || exit 1;
77+
cd ../
78+
79+
#####
80+
# step 2 download
81+
bash 01_download.sh
82+
83+
#####
84+
# step 3 training
85+
echo -e "\n${RED}=======================================================${NC}"
86+
echo -e "${RED}Step3. run training process on toy set${NC}"
87+
88+
com="bash ${train_script} ${RAND_SEED} ${configs_name} ${PRJDIR}/${SUBDIR}"
89+
echo ${com}
90+
eval ${com}
91+
92+
#####
93+
# step 4 inference using trained model
94+
echo -e "\n${RED}=======================================================${NC}"
95+
echo -e "${RED}Step4. score the toy data set${NC}"
96+
97+
trainedmodel=${trained_model}.pt
98+
if [ -e $PWD/${PRJDIR}/${SUBDIR}/${trainedmodel} ];
99+
then
100+
com="bash ${score_script} $PWD/DATA/toy_example_vocoded/eval toy_eval_set
101+
$PWD/${PRJDIR}/${SUBDIR} $PWD/${PRJDIR}/${SUBDIR}/${trainedmodel}
102+
trained"
103+
echo ${com}
104+
eval ${com}
105+
fi
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/bin/bash
2+
########################
3+
# Demo script for scoring using pretrained models
4+
#
5+
# Usage: bash 00_demo_pretrained.sh MODEL_DIR TESTSET_DIR TESTSET_NAME
6+
# where
7+
# MODEL_DIR can be $PWD/model-ID-7/trained-for-paper/01
8+
# or other model-* folders
9+
# It must be a path to the specific model with specific training
10+
# configuration folder and a specific random seed
11+
#
12+
# TESTSET_DIR is the path to the directory of the test set waveforms
13+
# for example $PWD/DATA/toy_example_vocoded/eval
14+
#
15+
# TESTSET_NAME is the name of the test set
16+
# it can be anything text string
17+
#
18+
# This script will
19+
# 1. install pytorch env using conda
20+
# 2. download the toy data set for demonstration (if necessary)
21+
# 3. run scoring
22+
#
23+
# This script will use the data set in DATA/
24+
#
25+
########################
26+
RED='\033[0;32m'
27+
NC='\033[0m'
28+
29+
PRJDIR=$1
30+
31+
32+
if [ "$#" -ne 3 ]; then
33+
TESTSET_DIR=$PWD/DATA/toy_example_vocoded/eval
34+
TESTSET_NAME=toy_eval_set
35+
echo -e "Use toy test set for demonstration."
36+
else
37+
TESTSET_DIR=$2
38+
TESTSET_NAME=$3
39+
fi
40+
41+
###
42+
# Configurations
43+
###
44+
45+
# we will use scripts below to train and score
46+
score_script=02_score.sh
47+
48+
###
49+
# Configurations fixed
50+
##
51+
main_script=main.py
52+
condafile=$PWD/../../env-s3prl-install.sh
53+
envfile=$PWD/../../env-s3prl.sh
54+
trained_model=trained_network
55+
56+
#####
57+
# step 1
58+
echo -e "\n${RED}=======================================================${NC}"
59+
echo -e "${RED}Step1. Preparation: load environment and get toy data${NC}"
60+
# create conda environment
61+
bash ${condafile} || exit 1;
62+
63+
# load env.sh. It must be loaded inside a sub-folder $PWD/../../../env.sh
64+
# otherwise, please follow env.sh and manually load the conda environment and
65+
# add PYTHONPATH
66+
cd DATA
67+
source ${envfile} || exit 1;
68+
cd ../
69+
70+
#####
71+
# step 2 download
72+
echo -e "\n${RED}=======================================================${NC}"
73+
echo -e "${RED}Step2. download pre-trained models${NC}"
74+
75+
bash 01_download.sh
76+
bash 01_download_pretrained_cm.sh ${PRJDIR}
77+
78+
#####
79+
# step 3 inference using trained model
80+
echo -e "\n${RED}=======================================================${NC}"
81+
echo -e "${RED}Step3. score the toy data set using models trained by Xin${NC}"
82+
83+
trainedmodel=${trained_model}.pt
84+
if [ -e ${PRJDIR}/${trainedmodel} ];
85+
then
86+
com="bash ${score_script}
87+
${TESTSET_DIR}
88+
${TESTSET_NAME}
89+
$PRJDIR
90+
$PRJDIR/${trainedmodel}
91+
trained"
92+
echo ${com}
93+
eval ${com}
94+
else
95+
echo "Model not found ${PRJDIR}/${trainedmodel}"
96+
97+
fi
98+
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#!/bin/bash
2+
########################
3+
# Script to download files
4+
#
5+
# Usage: bash download.sh
6+
#
7+
# This will download a toy data set
8+
# and the SSL model from Fairseq
9+
########################
10+
RED='\033[0;32m'
11+
NC='\033[0m'
12+
13+
#####
14+
# Download toy data set
15+
link_set=https://zenodo.org/record/7315515/files/project-09-toy_example_vocoded.tar.gz
16+
set_name=project-09-toy_example_vocoded.tar.gz
17+
18+
echo -e "\n${RED}=======================================================${NC}"
19+
20+
cd DATA
21+
22+
if [ ! -e ${set_name} ];
23+
then
24+
echo -e "${RED}Download and untar the toy data set${NC}"
25+
wget -q --show-progress ${link_set}
26+
else
27+
echo -e "Use downloaded ${set_name}"
28+
fi
29+
30+
if [ -e ${set_name} ];
31+
then
32+
if [ ! -d toy_example ];
33+
then
34+
tar -xzf ${set_name}
35+
fi
36+
else
37+
echo -e "\nCannot download ${set_name}"
38+
fi
39+
40+
cd ..
41+
42+
#####
43+
# Download continually trained SSL model
44+
# pre-trained will be downloaded using 01_download_pretrainedcm.sh
45+
46+
link_set=https://zenodo.org/record/8336949/files/wav2vec_ft2_vox_vocoded.pt
47+
set_name=wav2vec_ft2_vox_vocoded.pt
48+
49+
cd SSL_pretrained
50+
51+
echo -e "\n${RED}=======================================================${NC}"
52+
if [ ! -e ${set_name} ];
53+
then
54+
echo -e "${RED}Download and untar the toy data set${NC}"
55+
wget -q --show-progress ${link_set}
56+
else
57+
echo -e "Use downloaded ${set_name}"
58+
fi
59+
60+
if [ ! -e ${set_name} ];
61+
then
62+
echo -e "\nCannot download ${set_name}"
63+
fi
64+
65+
cd ..
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/bin/bash
2+
########################
3+
# Script to download files
4+
#
5+
# Usage: bash download_pretrained.sh
6+
#
7+
# This will download pretrained models
8+
########################
9+
RED='\033[0;32m'
10+
NC='\033[0m'
11+
12+
13+
MODELNAME=$1
14+
# if input $PWD/model-ID-P3/trained-for-paper-/01
15+
# get the name model-ID-P3
16+
MODELNAME=`echo ${MODELNAME} | awk -F '/' '{print $(NF-2)}'`
17+
#####
18+
19+
# Download trained models in the paper
20+
link_set=https://zenodo.org/record/8337778/files/project10-cm-ssl-${MODELNAME}.tar
21+
set_name=project10-cm-ssl-${MODELNAME}.tar
22+
23+
24+
echo -e "\n${RED}=======================================================${NC}"
25+
26+
if [ ! -e ${set_name} ];
27+
then
28+
echo -e "${RED}Download and untar the trained models${NC}"
29+
wget -q --show-progress ${link_set}
30+
fi
31+
32+
if [ -e ${set_name} ];
33+
then
34+
if [ ! -d ${MODELNAME}/trained-for-paper ];
35+
then
36+
tar -xvf ${set_name}
37+
fi
38+
else
39+
echo -e "\nCannot download ${set_name}"
40+
fi

0 commit comments

Comments
 (0)