StyleTTS2 Demo Notebook (#288)

rsxdalv · Mar 14, 2024 · c1013de · c1013de
1 parent c71e1a7
commit c1013de
Show file tree

Hide file tree

Showing 5 changed files with 53 additions and 0 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -30,6 +30,7 @@ RUN pip3 install -r requirements_audiocraft_deps.txt
 RUN pip3 install -r requirements_bark_hubert_quantizer.txt
 RUN pip3 install -r requirements_rvc.txt
 RUN pip3 install hydra-core==1.3.2
+RUN pip3 install -r requirements_styletts2.txt
 
 # Add React webui (testing)
 # RUN cd react-ui && npm install && npm run build

diff --git a/README.md b/README.md
@@ -41,6 +41,9 @@ https://rsxdalv.github.io/bark-speaker-directory/
 https://github.com/rsxdalv/tts-generation-webui/discussions/186#discussioncomment-7291274
 
 ## Changelog
+Mar 14:
+* StyleTTS2 Demo Notebook
+
 Mar 13:
 * Add Experimental Pipeline (Bark / Tortoise / MusicGen / AudioGen / MAGNeT -> RVC / Demucs / Vocos) (#287)
 * Fix RVC bug with model reloading on each generation. For short inputs that results in a visible speedup.

diff --git a/notebooks/styleTTS2.ipynb b/notebooks/styleTTS2.ipynb
@@ -0,0 +1,46 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from styletts2 import tts\n",
+    "\n",
+    "# No paths provided means default checkpoints/configs will be downloaded/cached.\n",
+    "my_tts = tts.StyleTTS2()\n",
+    "\n",
+    "# Optionally create/write an output WAV file.\n",
+    "out = my_tts.inference(\"Hello there, I am now a python package.\", output_wav_file=\"test.wav\")\n",
+    "\n",
+    "# Specific paths to a checkpoint and config can also be provided.\n",
+    "# other_tts = tts.StyleTTS2(model_checkpoint_path='/PATH/TO/epochs_2nd_00020.pth', config_path='/PATH/TO/config.yml')\n",
+    "\n",
+    "# # Specify target voice to clone. When no target voice is provided, a default voice will be used.\n",
+    "# other_tts.inference(\"Hello there, I am now a python package.\", target_voice_path=\"/PATH/TO/some_voice.wav\", output_wav_file=\"another_test.wav\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/requirements_styletts2.txt b/requirements_styletts2.txt
@@ -0,0 +1,2 @@
+torch==2.0.0 # BSD 3-Clause - ensures torch is not reinstalled
+styletts2 @ git+https://github.com/rsxdalv/StyleTTS2@cc36a19610df42123230237a73e798d4fdd3dcb8#egg=styletts2 # MIT License
diff --git a/update.py b/update.py
@@ -71,6 +71,7 @@ def main():
     try_install("requirements_rvc.txt", "RVC")
     # hydracore fix because of fairseq
     do("pip install hydra-core==1.3.2")
+    try_install("requirements_styletts2.txt", "StyleTTS")
 
     if is_node_installed():
         setup_node_modules()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		torch==2.0.0 # BSD 3-Clause - ensures torch is not reinstalled
		styletts2 @ git+https://github.com/rsxdalv/StyleTTS2@cc36a19610df42123230237a73e798d4fdd3dcb8#egg=styletts2 # MIT License