Skip to content
Merged
Show file tree
Hide file tree
Changes from 49 commits
Commits
Show all changes
84 commits
Select commit Hold shift + click to select a range
b45a929
initial setup of tts2
ftshijt Nov 6, 2023
59cb13d
fix setup
ftshijt Nov 8, 2023
688fde6
add mini_an4
ftshijt Nov 8, 2023
c534a80
update
ftshijt Nov 8, 2023
3fa8418
update tts2 template
ftshijt Nov 9, 2023
057ff14
remove tts readme from tts2
ftshijt Nov 9, 2023
8af6139
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 9, 2023
5b770fd
Merge branch 'tts2' of https://github.com/ftshijt/espnet into tts2
ftshijt Nov 9, 2023
ae48507
update ljspeech minor
ftshijt Nov 16, 2023
6e8d8de
Merge branch 'tts2' into tts2
ftshijt Nov 16, 2023
82b80a6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 16, 2023
1c8a568
Merge pull request #5541 from ftshijt/tts2
ftshijt Nov 16, 2023
cf0bf70
initial commit
jctian98 Dec 8, 2023
e66f4c0
more update
jctian98 Dec 8, 2023
b341ef0
implement the inference
jctian98 Dec 8, 2023
f254ba9
fix the pad bug
jctian98 Dec 8, 2023
adb21d5
update
jctian98 Dec 8, 2023
ed5c392
Merge pull request #9 from jctian98/tts2
ftshijt Dec 12, 2023
16c171b
support vocoder
jctian98 Dec 21, 2023
5aa661f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 21, 2023
4af0b29
Merge branch 'tts2' into tts2
ftshijt Dec 22, 2023
a16c6a0
Merge pull request #5600 from jctian98/tts2
ftshijt Dec 22, 2023
bbb9f79
Update fastspeech2_discrete.py
ftshijt Jan 25, 2024
f847876
Merge branch 'master' into tts2
ftshijt Jan 30, 2024
fb96e97
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 30, 2024
22f7d91
Create __init__.py
ftshijt Feb 1, 2024
ea394f2
Create __init__.py
ftshijt Feb 1, 2024
0a6caea
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 1, 2024
3b34577
Update slurm.conf
ftshijt Feb 1, 2024
27d6eca
Merge branch 'master' into tts2
ftshijt Feb 1, 2024
b98a773
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 1, 2024
5d5d54a
Merge branch 'master' into tts2
ftshijt Feb 26, 2024
3a19526
Revert "Update train_tacotron2.yaml"
ftshijt Apr 3, 2024
2d6a980
Update train_tacotron2.yaml
ftshijt Apr 3, 2024
e6d5d01
Merge branch 'tts2' of https://github.com/espnet/espnet into tts2
ftshijt Apr 3, 2024
363c26d
remove vocoder None case for tts2
ftshijt Apr 6, 2024
d9653bc
fix comment
ftshijt Apr 6, 2024
d9c52dd
update pack model
ftshijt Apr 6, 2024
c6732da
update an4
ftshijt Apr 6, 2024
541eaa1
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 6, 2024
cbdd1ac
Merge branch 'master' into tts2
ftshijt Apr 8, 2024
56903cc
Merge branch 'master' into tts2
sw005320 Apr 8, 2024
8a3ee7d
fix typeguard
ftshijt Apr 9, 2024
3d17c51
fix typeguard in models
ftshijt Apr 9, 2024
fdbe290
update the usage in tts
ftshijt Apr 9, 2024
8c15ae6
Merge branch 'tts2' of https://github.com/espnet/espnet into tts2
ftshijt Apr 9, 2024
4cd249e
typeguard substitution
ftshijt Apr 9, 2024
63b157f
fix typeguard for all modules
ftshijt Apr 9, 2024
760b853
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 9, 2024
7d275d2
Merge pull request #5739 from espnet/master
ftshijt Apr 10, 2024
ba87977
Update egs2/TEMPLATE/tts2/tts2.sh
ftshijt Apr 12, 2024
0e412c8
Update espnet2/tts2/fastspeech2/fastspeech2_discrete.py
ftshijt Apr 12, 2024
9663af1
Update egs2/TEMPLATE/tts2/tts2.sh
ftshijt Apr 12, 2024
3cecc2a
Update egs2/TEMPLATE/tts2/tts2.sh
ftshijt Apr 12, 2024
6660650
Update egs2/TEMPLATE/tts2/tts2.sh
ftshijt Apr 12, 2024
c7399c7
Update egs2/ljspeech/tts2/run.sh
ftshijt Apr 12, 2024
c8e9655
Update egs2/TEMPLATE/tts2/tts2.sh
ftshijt Apr 12, 2024
3806a36
Update espnet2/tts2/espnet_model.py
ftshijt Apr 12, 2024
26d9e05
Update espnet2/tts2/espnet_model.py
ftshijt Apr 12, 2024
a3893fd
symlink to tts1
ftshijt Apr 12, 2024
e193c9d
Update espnet2/tts2/fastspeech2/fastspeech2_discrete.py
ftshijt Apr 12, 2024
a0f124e
fix docstring
ftshijt Apr 12, 2024
c90ed41
update readme
ftshijt Apr 12, 2024
19f8999
update vocoder info
ftshijt Apr 12, 2024
b6ee0c2
remove style encoder -> resolve conflict
ftshijt Apr 12, 2024
a67da54
Merge branch 'master' into tts2
ftshijt Apr 12, 2024
476c0a1
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 12, 2024
f1b8cce
Update setup_anaconda.sh
ftshijt Apr 14, 2024
895e46b
add ci test
ftshijt Apr 16, 2024
263a298
fix bug in weighted masking
ftshijt Apr 16, 2024
b8e7edc
remove normalizer, feats as they are not used
ftshijt Apr 16, 2024
62cb4e1
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 16, 2024
e39e3fa
fix shellcheck
ftshijt Apr 16, 2024
5bc5a4f
Merge branch 'tts2' of https://github.com/espnet/espnet into tts2
ftshijt Apr 16, 2024
2c6c54d
fix unused setting
ftshijt Apr 16, 2024
eba47c4
add new ci test (tts2 train inference)
ftshijt Apr 16, 2024
8bdf0fc
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 16, 2024
7470352
fix a few unimported errors
ftshijt Apr 17, 2024
c6918a1
remove unnecessary log info
ftshijt Apr 17, 2024
7f32dc6
Merge branch 'codec' of https://github.com/espnet/espnet into tts2
ftshijt Apr 17, 2024
8ffa28d
fix conflict
ftshijt Apr 17, 2024
cc4d1e1
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 17, 2024
5df2947
rename test fastspeech2 discrete
ftshijt Apr 17, 2024
2806671
Update egs2/TEMPLATE/tts2/tts2.sh
ftshijt Apr 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 110 additions & 0 deletions egs2/TEMPLATE/tts2/cmd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
# Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
# e.g.
# run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
#
# Options:
# --time <time>: Limit the maximum time to execute.
# --mem <mem>: Limit the maximum memory usage.
# -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
# --num-threads <ngpu>: Specify the number of CPU core.
# --gpu <ngpu>: Specify the number of GPU devices.
# --config: Change the configuration file from default.
#
# "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
# The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
# e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
# Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
#
# run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
# These options are mapping to specific options for each backend and
# it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
# If jobs failed, your configuration might be wrong for your environment.
#
#
# The official documentation for run.pl, queue.pl, slurm.pl, and ssh.pl:
# "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
# =========================================================~


# Select the backend used by run.sh from "local", "stdout", "sge", "slurm", or "ssh"
cmd_backend='local'

# Local machine, without any Job scheduling system
if [ "${cmd_backend}" = local ]; then

# The other usage
export train_cmd="run.pl"
# Used for "*_train.py": "--gpu" is appended optionally by run.sh
export cuda_cmd="run.pl"
# Used for "*_recog.py"
export decode_cmd="run.pl"

# Local machine logging to stdout and log file, without any Job scheduling system
elif [ "${cmd_backend}" = stdout ]; then

# The other usage
export train_cmd="stdout.pl"
# Used for "*_train.py": "--gpu" is appended optionally by run.sh
export cuda_cmd="stdout.pl"
# Used for "*_recog.py"
export decode_cmd="stdout.pl"


# "qsub" (Sun Grid Engine, or derivation of it)
elif [ "${cmd_backend}" = sge ]; then
# The default setting is written in conf/queue.conf.
# You must change "-q g.q" for the "queue" for your environment.
# To know the "queue" names, type "qhost -q"
# Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.

export train_cmd="queue.pl"
export cuda_cmd="queue.pl"
export decode_cmd="queue.pl"


# "qsub" (Torque/PBS.)
elif [ "${cmd_backend}" = pbs ]; then
# The default setting is written in conf/pbs.conf.

export train_cmd="pbs.pl"
export cuda_cmd="pbs.pl"
export decode_cmd="pbs.pl"


# "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then
# The default setting is written in conf/slurm.conf.
# You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
# To know the "partion" names, type "sinfo".
# You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
# The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

export train_cmd="slurm.pl"
export cuda_cmd="slurm.pl"
export decode_cmd="slurm.pl"

elif [ "${cmd_backend}" = ssh ]; then
# You have to create ".queue/machines" to specify the host to execute jobs.
# e.g. .queue/machines
# host1
# host2
# host3
# Assuming you can login them without any password, i.e. You have to set ssh keys.

export train_cmd="ssh.pl"
export cuda_cmd="ssh.pl"
export decode_cmd="ssh.pl"

# This is an example of specifying several unique options in the JHU CLSP cluster setup.
# Users can modify/add their own command options according to their cluster environments.
elif [ "${cmd_backend}" = jhu ]; then

export train_cmd="queue.pl --mem 2G"
export cuda_cmd="queue-freegpu.pl --mem 2G --gpu 1 --config conf/queue.conf"
export decode_cmd="queue.pl --mem 4G"

else
echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
return 1
fi
7 changes: 7 additions & 0 deletions egs2/TEMPLATE/tts2/conf/mfcc.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
--sample-frequency=16000
--frame-length=25 # the default is 25
--low-freq=20 # the default.
--high-freq=7600 # the default is zero meaning use the Nyquist (8k in this case).
--num-mel-bins=30
--num-ceps=30
--snip-edges=false
11 changes: 11 additions & 0 deletions egs2/TEMPLATE/tts2/conf/pbs.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Default configuration
command qsub -V -v PATH -S /bin/bash
option name=* -N $0
option mem=* -l mem=$0
option mem=0 # Do not add anything to qsub_opts
option num_threads=* -l ncpus=$0
option num_threads=1 # Do not add anything to qsub_opts
option num_nodes=* -l nodes=$0:ppn=1
default gpu=0
option gpu=0
option gpu=* -l ngpus=$0
12 changes: 12 additions & 0 deletions egs2/TEMPLATE/tts2/conf/queue.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Default configuration
command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
option name=* -N $0
option mem=* -l mem_free=$0,ram_free=$0
option mem=0 # Do not add anything to qsub_opts
option num_threads=* -pe smp $0
option num_threads=1 # Do not add anything to qsub_opts
option max_jobs_run=* -tc $0
option num_nodes=* -pe mpi $0 # You must set this PE as allocation_rule=1
default gpu=0
option gpu=0
option gpu=* -l gpu=$0 -q g.q
14 changes: 14 additions & 0 deletions egs2/TEMPLATE/tts2/conf/slurm.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Default configuration
command sbatch --export=PATH
option name=* --job-name $0
option time=* --time $0
option mem=* --mem-per-cpu $0
option mem=0
option num_threads=* --cpus-per-task $0
option num_threads=1 --cpus-per-task 1
option num_nodes=* --nodes $0
default gpu=0
option gpu=0 -p cpu
option gpu=* -p gpu --gres=gpu:$0 -c $0 # Recommend allocating more CPU than, or equal to the number of GPU
# note: the --max-jobs-run option is supported as a special case
# by slurm.pl and you don't have to handle it in the config file.
4 changes: 4 additions & 0 deletions egs2/TEMPLATE/tts2/conf/vad.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
--vad-energy-threshold=5.5
--vad-energy-mean-scale=0.5
--vad-proportion-threshold=0.12
--vad-frames-context=2
1 change: 1 addition & 0 deletions egs2/TEMPLATE/tts2/db.sh
Empty file.
23 changes: 23 additions & 0 deletions egs2/TEMPLATE/tts2/path.sh
Comment thread
ftshijt marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
MAIN_ROOT=$PWD/../../..

export PATH=$PWD/utils/:$PATH
export LC_ALL=C

if [ -f "${MAIN_ROOT}"/tools/activate_python.sh ]; then
. "${MAIN_ROOT}"/tools/activate_python.sh
else
echo "[INFO] "${MAIN_ROOT}"/tools/activate_python.sh is not present"
fi
. "${MAIN_ROOT}"/tools/extra_path.sh

export OMP_NUM_THREADS=1

# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8

# You need to change or unset NCCL_SOCKET_IFNAME according to your network environment
# https://docs.nvidia.com/deeplearning/sdk/nccl-developer-guide/docs/env.html#nccl-socket-ifname
export NCCL_SOCKET_IFNAME="^lo,docker,virbr,vmnet,vboxnet"

# NOTE(kamo): Source at the last to overwrite the setting
. local/path.sh
1 change: 1 addition & 0 deletions egs2/TEMPLATE/tts2/pyscripts
1 change: 1 addition & 0 deletions egs2/TEMPLATE/tts2/scripts
58 changes: 58 additions & 0 deletions egs2/TEMPLATE/tts2/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env bash
# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set -e
set -u
set -o pipefail

log() {
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%dT%H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
help_message=$(cat << EOF
Usage: $0 <target-dir>
EOF
)


if [ $# -ne 1 ]; then
log "${help_message}"
log "Error: 1 positional argument is required."
exit 2
fi


dir=$1
mkdir -p "${dir}"

if [ ! -d "${dir}"/../../TEMPLATE ]; then
log "Error: ${dir}/../../TEMPLATE should exist. You may specify wrong directory."
exit 1
fi

targets=""

# Copy
for f in cmd.sh conf local; do
target="${dir}"/../../TEMPLATE/tts2/"${f}"
cp -r "${target}" "${dir}"
targets+="${dir}/${target} "
done


# Symlinks to TEMPLATE/tts2
for f in tts2.sh path.sh sid; do
target=../../TEMPLATE/tts2/"${f}"
ln -sf "${target}" "${dir}"
targets+="${dir}/${target} "
done


# Symlinks to TEMPLATE/asr1
for f in db.sh scripts pyscripts utils steps; do
target=../../TEMPLATE/asr1/"${f}"
ln -sf "${target}" "${dir}"
targets+="${dir}/${target} "
done

log "Created: ${targets}"
1 change: 1 addition & 0 deletions egs2/TEMPLATE/tts2/sid/README.md
Comment thread
ftshijt marked this conversation as resolved.
Outdated
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The scripts in this directory were copied from Kaldi-ASR, https://github.com/kaldi-asr/kaldi, and their licenses follow the original license, https://github.com/kaldi-asr/kaldi/blob/master/COPYING, of Kaldi.
86 changes: 86 additions & 0 deletions egs2/TEMPLATE/tts2/sid/compute_vad_decision.sh
Comment thread
ftshijt marked this conversation as resolved.
Outdated
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/bin/bash

# Copyright 2017 Vimal Manohar
# Apache 2.0

# To be run from .. (one directory up from here)
# see ../run.sh for example

# Compute energy based VAD output

nj=4
cmd=run.pl
vad_config=conf/vad.conf

echo "$0 $@" # Print the command line for logging

if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;

if [ $# -lt 1 ] || [ $# -gt 3 ]; then
echo "Usage: $0 [options] <data-dir> [<log-dir> [<vad-dir>]]";
echo "e.g.: $0 data/train exp/make_vad mfcc"
echo "Note: <log-dir> defaults to <data-dir>/log, and <vad-dir> defaults to <data-dir>/data"
echo " Options:"
echo " --vad-config <config-file> # config passed to compute-vad-energy"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi

data=$1
if [ $# -ge 2 ]; then
logdir=$2
else
logdir=$data/log
fi
if [ $# -ge 3 ]; then
vaddir=$3
else
vaddir=$data/data
fi


# make $vaddir an absolute pathname.
vaddir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $vaddir ${PWD}`

# use "name" as part of name of the archive.
name=`basename $data`

mkdir -p $vaddir || exit 1;
mkdir -p $logdir || exit 1;

if [ -f $data/vad.scp ]; then
mkdir -p $data/.backup
echo "$0: moving $data/vad.scp to $data/.backup"
mv $data/vad.scp $data/.backup
fi

for f in $data/feats.scp "$vad_config"; do
if [ ! -f $f ]; then
echo "compute_vad_decision.sh: no such file $f"
exit 1;
fi
done

utils/split_data.sh $data $nj || exit 1;
sdata=$data/split$nj;

$cmd JOB=1:$nj $logdir/vad_${name}.JOB.log \
compute-vad --config=$vad_config scp:$sdata/JOB/feats.scp \
ark,scp:$vaddir/vad_${name}.JOB.ark,$vaddir/vad_${name}.JOB.scp || exit 1

for ((n=1; n<=nj; n++)); do
cat $vaddir/vad_${name}.$n.scp || exit 1;
done > $data/vad.scp

nc=`cat $data/vad.scp | wc -l`
nu=`cat $data/feats.scp | wc -l`
if [ $nc -ne $nu ]; then
echo "**Warning it seems not all of the speakers got VAD output ($nc != $nu);"
echo "**validate_data_dir.sh will fail; you might want to use fix_data_dir.sh"
[ $nc -eq 0 ] && exit 1;
fi


echo "Created VAD output for $name"
Loading