From 780bf06fa066e41f1c8c53a43c43ba71689d3149 Mon Sep 17 00:00:00 2001 From: Roberts Slisans Date: Fri, 29 Mar 2024 08:02:54 +0200 Subject: [PATCH] Add Tab Voice Clone and Utils (#296) * add information about voice cloning to tab voice clone * readme * add GPU Info tab * readme --- README.md | 6 ++++ server.py | 3 ++ src/bark/clone/tab_voice_clone.py | 25 +++++++++++++++-- src/utils/gpu_info_tab.py | 46 +++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+), 3 deletions(-) create mode 100644 src/utils/gpu_info_tab.py diff --git a/README.md b/README.md index a8d21a7d..6e3e281c 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,12 @@ https://rsxdalv.github.io/bark-speaker-directory/ https://github.com/rsxdalv/tts-generation-webui/discussions/186#discussioncomment-7291274 ## Changelog +Mar 28: +* Add GPU Info tab + +Mar 27: +* Add information about voice cloning to tab voice clone + Mar 26: * Add Maha TTS demo notebook diff --git a/server.py b/server.py index 950c3b82..3c452aa3 100644 --- a/server.py +++ b/server.py @@ -31,6 +31,7 @@ from src.css.css import full_css from src.Joutai import Joutai from src.history_tab.collections_directories_atom import collections_directories_atom +from src.utils.gpu_info_tab import gpu_info_tab setup_or_recover.dummy() @@ -53,6 +54,7 @@ def reload_config_and_restart_ui(): else default_config ) + with gr.Blocks( css=full_css, title="TTS Generation WebUI", @@ -167,6 +169,7 @@ def reload_config_and_restart_ui(): model_location_settings_tab() remixer_input = simple_remixer_tab() + gpu_info_tab() Joutai.singleton.tabs.render() diff --git a/src/bark/clone/tab_voice_clone.py b/src/bark/clone/tab_voice_clone.py index d6706004..3daa1d9c 100644 --- a/src/bark/clone/tab_voice_clone.py +++ b/src/bark/clone/tab_voice_clone.py @@ -140,8 +140,29 @@ def tab_voice_clone(register_use_as_history_button): """ Unethical use of this technology is prohibited. This demo is based on https://github.com/gitmylo/bark-voice-cloning-HuBERT-quantizer repository. + + Information from the original repository (https://github.com/gitmylo/bark-voice-cloning-HuBERT-quantizer?tab=readme-ov-file#voices-cloned-arent-very-convincing-why-are-other-peoples-cloned-voices-better-than-mine) + + ## Voices cloned aren't very convincing, why are other people's cloned voices better than mine? + Make sure these things are **NOT** in your voice input: (in no particular order) + * Noise (You can use a noise remover before) + * Music (There are also music remover tools) (Unless you want music in the background) + * A cut-off at the end (This will cause it to try and continue on the generation) + * Under 1 second of training data (i personally suggest around 10 seconds for good potential, but i've had great results with 5 seconds as well.) + + What makes for good prompt audio? (in no particular order) + * Clearly spoken + * No weird background noises + * Only one speaker + * Audio which ends after a sentence ends + * Regular/common voice (They usually have more success, it's still capable of cloning complex voices, but not as good at it) + * Around 10 seconds of data + """ ) + + + with gr.Column(): tokenizer_dropdown = gr.Dropdown( label="Tokenizer", choices=[ @@ -165,7 +186,6 @@ def tab_voice_clone(register_use_as_history_button): source="upload", interactive=True, ) - with gr.Row(): use_gpu_checkbox = gr.Checkbox(label="Use GPU", value=True) clear_models_button = gr.Button( @@ -207,8 +227,7 @@ def load_tokenizer(tokenizer_and_repo: str, use_gpu: bool): outputs=[tokenizer_dropdown], api_name="bark_voice_tokenizer_load", ) - - with gr.Column(): + gr.Markdown("Generated voice:") voice_file_name = gr.Textbox( label="Voice file name", value="", interactive=False diff --git a/src/utils/gpu_info_tab.py b/src/utils/gpu_info_tab.py new file mode 100644 index 00000000..c445b67d --- /dev/null +++ b/src/utils/gpu_info_tab.py @@ -0,0 +1,46 @@ +import gradio as gr +import torch + + +def gpu_info_tab(): + with gr.Tab("GPU Info"): + gpu_info = gr.Markdown(render_gpu_info(get_gpu_info())) + + gr.Button("Refresh").click( + fn=refresh_gpu_info, outputs=gpu_info, api_name="refresh_gpu_info" + ) + + gr.Button("API_GET_GPU_INFO", visible=False).click( + fn=get_gpu_info, api_name="get_gpu_info" + ) + + +def get_gpu_info(): + if torch.cuda.is_available(): + vram = torch.cuda.get_device_properties(0).total_memory / 1024**2 + name = torch.cuda.get_device_properties(0).name + cuda_capabilities = torch.cuda.get_device_capability(0) + used_vram = torch.cuda.memory_allocated(0) / 1024**2 + used_vram_total = ( + torch.cuda.mem_get_info(0)[1] - torch.cuda.mem_get_info(0)[0] + ) / 1024**2 + return { + "vram": vram, + "name": name, + "cuda_capabilities": cuda_capabilities, + "used_vram": used_vram, + "used_vram_total": used_vram_total, + } + else: + return "No GPU with CUDA support detected by PyTorch" + + +def render_gpu_info(gpu_info): + if isinstance(gpu_info, dict): + return f"VRAM: {gpu_info['vram']} MB\n\nUsed VRAM: {gpu_info['used_vram']} MB\n\nTotal Used VRAM: {gpu_info['used_vram_total']} MB\n\nName: {gpu_info['name']}\n\nCUDA Capabilities: {gpu_info['cuda_capabilities']}" + else: + return gpu_info + + +def refresh_gpu_info(): + return render_gpu_info(get_gpu_info())