PaddlePaddle
diff --git a/Diff for: ‎CHANGELOG.md
+22-7 b/Diff for: ‎CHANGELOG.md
+22-7
diff --git a/Diff for: ‎README.md
+13-7 b/Diff for: ‎README.md
+13-7
diff --git a/Diff for: ‎README_cn.md
+13-8 b/Diff for: ‎README_cn.md
+13-8
diff --git a/Diff for: ‎docs/source/released_model.md
+3-1 b/Diff for: ‎docs/source/released_model.md
+3-1
diff --git a/Diff for: ‎docs/source/tts/quick_start_cn.md
+1-1 b/Diff for: ‎docs/source/tts/quick_start_cn.md
+1-1
diff --git a/Diff for: ‎examples/aishell3/vc0/README.md
+1-2 b/Diff for: ‎examples/aishell3/vc0/README.md
+1-2
diff --git a/Diff for: ‎examples/aishell3/vc0/path.sh
+1-1 b/Diff for: ‎examples/aishell3/vc0/path.sh
+1-1
diff --git a/Diff for: ‎examples/aishell3/vc1/README.md
-1 b/Diff for: ‎examples/aishell3/vc1/README.md
-1
diff --git a/Diff for: ‎examples/csmsc/tts0/README.md
+2 b/Diff for: ‎examples/csmsc/tts0/README.md
+2
diff --git a/Diff for: ‎examples/csmsc/tts0/local/synthesize_e2e.sh
+1 b/Diff for: ‎examples/csmsc/tts0/local/synthesize_e2e.sh
+1
diff --git a/Diff for: ‎examples/csmsc/tts0/path.sh
+1-1 b/Diff for: ‎examples/csmsc/tts0/path.sh
+1-1
diff --git a/Diff for: ‎examples/csmsc/tts0/run.sh
+5 b/Diff for: ‎examples/csmsc/tts0/run.sh
+5
diff --git a/Diff for: ‎examples/csmsc/tts3/run.sh
+5 b/Diff for: ‎examples/csmsc/tts3/run.sh
+5
@@ -1,23 +1,38 @@
 # Changelog
 
-Date: 2022-1-19, Author: yt605155624.  
-Add features to: T2S:  
-  - Add csmsc Tacotron2.  
+Date: 2022-1-29, Author: yt605155624.
+Add features to: T2S:
+  - Update aishell3 vc0 with new Tacotron2.
+  - PRLink: https://github.com/PaddlePaddle/PaddleSpeech/pull/1419
+
+Date: 2022-1-29, Author: yt605155624.
+Add features to: T2S:
+  - Add ljspeech Tacotron2.
+  - PRLink: https://github.com/PaddlePaddle/PaddleSpeech/pull/1416
+
+Date: 2022-1-24, Author: yt605155624.
+Add features to: T2S:
+  - Add csmsc WaveRNN.
+  - PRLink: https://github.com/PaddlePaddle/PaddleSpeech/pull/1379
+
+Date: 2022-1-19, Author: yt605155624.
+Add features to: T2S:
+  - Add csmsc Tacotron2.
   - PRLink: https://github.com/PaddlePaddle/PaddleSpeech/pull/1314
 
 
 Date: 2022-1-10, Author: Jackwaterveg.  
-Add features to: CLI:  
-  - Support English (librispeech/asr1/transformer).  
+Add features to: CLI:
+  - Support English (librispeech/asr1/transformer).
   - Support choosing `decode_method` for conformer and transformer models.  
   - Refactor the config, using the unified config.  
   - PRLink: https://github.com/PaddlePaddle/PaddleSpeech/pull/1297
 
 ***
 
 Date: 2022-1-17, Author: Jackwaterveg.  
-Add features to: CLI:  
-  - Support deepspeech2 online/offline model(aishell). 
+Add features to: CLI:
+  - Support deepspeech2 online/offline model(aishell).
   - PRLink: https://github.com/PaddlePaddle/PaddleSpeech/pull/1356
 
 ***
 
@@ -317,14 +317,15 @@ PaddleSpeech supports a series of most popular models. They are summarized in [r
     </tr>
     <tr>
       <td rowspan="4">Acoustic Model</td>
-      <td >Tacotron2</td>
-      <td rowspan="2" >LJSpeech</td>
+      <td>Tacotron2</td>
+      <td>LJSpeech / CSMSC</td>
       <td>
-      <a href = "./examples/ljspeech/tts0">tacotron2-ljspeech</a>
+      <a href = "./examples/ljspeech/tts0">tacotron2-ljspeech</a> / <a href = "./examples/csmsc/tts0">tacotron2-csmsc</a>
       </td>
     </tr>
     <tr>
       <td>Transformer TTS</td>
+      <td>LJSpeech</td>
       <td>
       <a href = "./examples/ljspeech/tts1">transformer-ljspeech</a>
       </td>
@@ -344,7 +345,7 @@ PaddleSpeech supports a series of most popular models. They are summarized in [r
       </td>
     </tr>
    <tr>
-      <td rowspan="5">Vocoder</td>
+      <td rowspan="6">Vocoder</td>
       <td >WaveFlow</td>
       <td >LJSpeech</td>
       <td>
@@ -378,7 +379,14 @@ PaddleSpeech supports a series of most popular models. They are summarized in [r
       <td>
       <a href = "./examples/csmsc/voc5">HiFiGAN-csmsc</a> 
       </td>
-    <tr>                                                                                                                                       
+    </tr>
+    <tr>
+      <td >WaveRNN</td>
+      <td >CSMSC</td>
+      <td>
+      <a href = "./examples/csmsc/voc6">WaveRNN-csmsc</a>
+      </td>
+    </tr>
     <tr>
       <td rowspan="3">Voice Cloning</td>
       <td>GE2E</td>
@@ -416,7 +424,6 @@ PaddleSpeech supports a series of most popular models. They are summarized in [r
     </tr>
   </thead>
   <tbody>
-  
   <tr>
       <td>Audio Classification</td>
       <td>ESC-50</td>
@@ -440,7 +447,6 @@ PaddleSpeech supports a series of most popular models. They are summarized in [r
     </tr>
   </thead>
   <tbody>
-  
   <tr>
       <td>Punctuation Restoration</td>
       <td>IWLST2012_zh</td>
 
@@ -315,14 +315,15 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声
     </tr>
     <tr>
       <td rowspan="4">声学模型</td>
-      <td >Tacotron2</td>
-      <td rowspan="2" >LJSpeech</td>
+      <td>Tacotron2</td>
+      <td>LJSpeech / CSMSC</td>
       <td>
-      <a href = "./examples/ljspeech/tts0">tacotron2-ljspeech</a>
+      <a href = "./examples/ljspeech/tts0">tacotron2-ljspeech</a> / <a href = "./examples/csmsc/tts0">tacotron2-csmsc</a>
       </td>
     </tr>
     <tr>
       <td>Transformer TTS</td>
+      <td>LJSpeech</td>
       <td>
       <a href = "./examples/ljspeech/tts1">transformer-ljspeech</a>
       </td>
@@ -342,7 +343,7 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声
       </td>
     </tr>
    <tr>
-      <td rowspan="5">声码器</td>
+      <td rowspan="6">声码器</td>
       <td >WaveFlow</td>
       <td >LJSpeech</td>
       <td>
@@ -376,7 +377,14 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声
       <td>
       <a href = "./examples/csmsc/voc5">HiFiGAN-csmsc</a> 
       </td>
-    <tr>                                                                                                                                       
+    </tr>
+    <tr>
+      <td >WaveRNN</td>
+      <td >CSMSC</td>
+      <td>
+      <a href = "./examples/csmsc/voc6">WaveRNN-csmsc</a>
+      </td>
+    </tr>
     <tr>
       <td rowspan="3">声音克隆</td>
       <td>GE2E</td>
@@ -415,8 +423,6 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声
     </tr>
   </thead>
   <tbody>
-  
-
   <tr>
       <td>声音分类</td>
       <td>ESC-50</td>
@@ -440,7 +446,6 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声
     </tr>
   </thead>
   <tbody>
-  
   <tr>
       <td>标点恢复</td>
       <td>IWLST2012_zh</td>
 
@@ -1,3 +1,4 @@
+
 # Released Models
 
 ## Speech-to-Text Models
@@ -32,7 +33,8 @@ Language Model | Training Data | Token-based | Size | Descriptions
 ### Acoustic Models
 Model Type | Dataset| Example Link | Pretrained Models|Static Models|Size (static)
 :-------------:| :------------:| :-----: | :-----:| :-----:| :-----:
-Tacotron2|LJSpeech|[tacotron2-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts0)|[tacotron2_ljspeech_ckpt_0.3.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_ljspeech_ckpt_0.3.zip)|||
+Tacotron2|LJSpeech|[tacotron2-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts0)|[tacotron2_ljspeech_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_ljspeech_ckpt_0.2.0.zip)|||
+Tacotron2|CSMSC|[tacotron2-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts0)|[tacotron2_csmsc_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_ckpt_0.2.0.zip)|[tacotron2_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_static_0.2.0.zip)|94.95MB|
 TransformerTTS| LJSpeech| [transformer-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts1)|[transformer_tts_ljspeech_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/transformer_tts/transformer_tts_ljspeech_ckpt_0.4.zip)|||
 SpeedySpeech| CSMSC | [speedyspeech-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts2) |[speedyspeech_nosil_baker_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_nosil_baker_ckpt_0.5.zip)|[speedyspeech_nosil_baker_static_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_nosil_baker_static_0.5.zip)|12MB|
 FastSpeech2| CSMSC |[fastspeech2-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3)|[fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)|[fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip)|157MB|
 
@@ -202,4 +202,4 @@ sf.write(
         audio_path,
         wav.numpy(),
         samplerate=fastspeech2_config.fs)
-```
+```
@@ -1,4 +1,3 @@
-
 # Tacotron2 + AISHELL-3 Voice Cloning
 This example contains code used to train a [Tacotron2](https://arxiv.org/abs/1712.05884) model with [AISHELL-3](http://www.aishelltech.com/aishell_3). The trained model can be used in Voice Cloning Task, We refer to the model structure of  [Transfer Learning from Speaker Veriﬁcation to Multispeaker Text-To-Speech Synthesis](https://arxiv.org/pdf/1806.04558.pdf). The general steps are as follows:
 1. Speaker Encoder: We use Speaker Verification to train a speaker encoder. Datasets used in this task are different from those used in `Tacotron2` because the transcriptions are not needed, we use more datasets, refer to  [ge2e](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/ge2e).
@@ -17,7 +16,7 @@ mkdir data_aishell3
 tar zxvf data_aishell3.tgz -C data_aishell3
 ```
 ### Get MFA Result and Extract
-We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for aishell3_fastspeech2.
+We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get phonemes for Tacotron2, the durations of MFA are not needed here.
 You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
 
 ## Pretrained GE2E Model
 
@@ -9,5 +9,5 @@ export PYTHONDONTWRITEBYTECODE=1
 export PYTHONIOENCODING=UTF-8
 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
 
-MODEL=new_tacotron2
+MODEL=tacotron2
 export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}
@@ -1,4 +1,3 @@
-
 # FastSpeech2 + AISHELL-3 Voice Cloning
 This example contains code used to train a [FastSpeech2](https://arxiv.org/abs/2006.04558) model with [AISHELL-3](http://www.aishelltech.com/aishell_3). The trained model can be used in Voice Cloning Task, We refer to the model structure of  [Transfer Learning from Speaker Veriﬁcation to Multispeaker Text-To-Speech Synthesis](https://arxiv.org/pdf/1806.04558.pdf). The general steps are as follows:
 1. Speaker Encoder: We use Speaker Verification to train a speaker encoder. Datasets used in this task are different from those used in `FastSpeech2` because the transcriptions are not needed, we use more datasets, refer to  [ge2e](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/ge2e).
 
@@ -212,6 +212,8 @@ optional arguments:
 Pretrained Tacotron2 model with no silence in the edge of audios:
 - [tacotron2_csmsc_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_ckpt_0.2.0.zip)
 
+The static model can be downloaded here [tacotron2_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_static_0.2.0.zip).
+
 
 Model | Step | eval/loss | eval/l1_loss | eval/mse_loss | eval/bce_loss| eval/attn_loss 
 :-------------:| :------------:| :-----: | :-----: | :--------: |:--------:|:---------:
 
@@ -7,6 +7,7 @@ ckpt_name=$3
 stage=0
 stop_stage=0
 
+# TODO: tacotron2 动转静的结果没有静态图的响亮, 可能还是 decode 的时候某个函数动静不对齐
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     FLAGS_allocator_strategy=naive_best_fit \
     FLAGS_fraction_of_gpu_memory_to_use=0.01 \
 
@@ -9,5 +9,5 @@ export PYTHONDONTWRITEBYTECODE=1
 export PYTHONIOENCODING=UTF-8
 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
 
-MODEL=new_tacotron2
+MODEL=tacotron2
 export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}
@@ -35,3 +35,8 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # synthesize_e2e, vocoder is pwgan
     CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
+
+if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
+    # inference with static model
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} || exit -1
+fi
@@ -36,3 +36,8 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
+if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
+    # inference with static model
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} || exit -1
+fi
+
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-`
`2`	`1`	`# FastSpeech2 + AISHELL-3 Voice Cloning`
`3`	`2`	`This example contains code used to train a [FastSpeech2](https://arxiv.org/abs/2006.04558) model with [AISHELL-3](http://www.aishelltech.com/aishell_3). The trained model can be used in Voice Cloning Task, We refer to the model structure of [Transfer Learning from Speaker Veriﬁcation to Multispeaker Text-To-Speech Synthesis](https://arxiv.org/pdf/1806.04558.pdf). The general steps are as follows:`
`4`	`3`	1. Speaker Encoder: We use Speaker Verification to train a speaker encoder. Datasets used in this task are different from those used in `FastSpeech2` because the transcriptions are not needed, we use more datasets, refer to [ge2e](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/ge2e).