Skip to content

Commit

Permalink
Add Tab Voice Clone and Utils (#296)
Browse files Browse the repository at this point in the history
* add information about voice cloning to tab voice clone

* readme

* add GPU Info tab

* readme
  • Loading branch information
rsxdalv authored Mar 29, 2024
1 parent 8342ff0 commit 780bf06
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 3 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ https://rsxdalv.github.io/bark-speaker-directory/
https://github.com/rsxdalv/tts-generation-webui/discussions/186#discussioncomment-7291274

## Changelog
Mar 28:
* Add GPU Info tab

Mar 27:
* Add information about voice cloning to tab voice clone

Mar 26:
* Add Maha TTS demo notebook

Expand Down
3 changes: 3 additions & 0 deletions server.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from src.css.css import full_css
from src.Joutai import Joutai
from src.history_tab.collections_directories_atom import collections_directories_atom
from src.utils.gpu_info_tab import gpu_info_tab


setup_or_recover.dummy()
Expand All @@ -53,6 +54,7 @@ def reload_config_and_restart_ui():
else default_config
)


with gr.Blocks(
css=full_css,
title="TTS Generation WebUI",
Expand Down Expand Up @@ -167,6 +169,7 @@ def reload_config_and_restart_ui():
model_location_settings_tab()

remixer_input = simple_remixer_tab()
gpu_info_tab()
Joutai.singleton.tabs.render()


Expand Down
25 changes: 22 additions & 3 deletions src/bark/clone/tab_voice_clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,29 @@ def tab_voice_clone(register_use_as_history_button):
"""
Unethical use of this technology is prohibited.
This demo is based on https://github.com/gitmylo/bark-voice-cloning-HuBERT-quantizer repository.
Information from the original repository (https://github.com/gitmylo/bark-voice-cloning-HuBERT-quantizer?tab=readme-ov-file#voices-cloned-arent-very-convincing-why-are-other-peoples-cloned-voices-better-than-mine)
## Voices cloned aren't very convincing, why are other people's cloned voices better than mine?
Make sure these things are **NOT** in your voice input: (in no particular order)
* Noise (You can use a noise remover before)
* Music (There are also music remover tools) (Unless you want music in the background)
* A cut-off at the end (This will cause it to try and continue on the generation)
* Under 1 second of training data (i personally suggest around 10 seconds for good potential, but i've had great results with 5 seconds as well.)
What makes for good prompt audio? (in no particular order)
* Clearly spoken
* No weird background noises
* Only one speaker
* Audio which ends after a sentence ends
* Regular/common voice (They usually have more success, it's still capable of cloning complex voices, but not as good at it)
* Around 10 seconds of data
"""
)


with gr.Column():
tokenizer_dropdown = gr.Dropdown(
label="Tokenizer",
choices=[
Expand All @@ -165,7 +186,6 @@ def tab_voice_clone(register_use_as_history_button):
source="upload",
interactive=True,
)

with gr.Row():
use_gpu_checkbox = gr.Checkbox(label="Use GPU", value=True)
clear_models_button = gr.Button(
Expand Down Expand Up @@ -207,8 +227,7 @@ def load_tokenizer(tokenizer_and_repo: str, use_gpu: bool):
outputs=[tokenizer_dropdown],
api_name="bark_voice_tokenizer_load",
)

with gr.Column():

gr.Markdown("Generated voice:")
voice_file_name = gr.Textbox(
label="Voice file name", value="", interactive=False
Expand Down
46 changes: 46 additions & 0 deletions src/utils/gpu_info_tab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import gradio as gr
import torch


def gpu_info_tab():
with gr.Tab("GPU Info"):
gpu_info = gr.Markdown(render_gpu_info(get_gpu_info()))

gr.Button("Refresh").click(
fn=refresh_gpu_info, outputs=gpu_info, api_name="refresh_gpu_info"
)

gr.Button("API_GET_GPU_INFO", visible=False).click(
fn=get_gpu_info, api_name="get_gpu_info"
)


def get_gpu_info():
if torch.cuda.is_available():
vram = torch.cuda.get_device_properties(0).total_memory / 1024**2
name = torch.cuda.get_device_properties(0).name
cuda_capabilities = torch.cuda.get_device_capability(0)
used_vram = torch.cuda.memory_allocated(0) / 1024**2
used_vram_total = (
torch.cuda.mem_get_info(0)[1] - torch.cuda.mem_get_info(0)[0]
) / 1024**2
return {
"vram": vram,
"name": name,
"cuda_capabilities": cuda_capabilities,
"used_vram": used_vram,
"used_vram_total": used_vram_total,
}
else:
return "No GPU with CUDA support detected by PyTorch"


def render_gpu_info(gpu_info):
if isinstance(gpu_info, dict):
return f"VRAM: {gpu_info['vram']} MB\n\nUsed VRAM: {gpu_info['used_vram']} MB\n\nTotal Used VRAM: {gpu_info['used_vram_total']} MB\n\nName: {gpu_info['name']}\n\nCUDA Capabilities: {gpu_info['cuda_capabilities']}"
else:
return gpu_info


def refresh_gpu_info():
return render_gpu_info(get_gpu_info())

0 comments on commit 780bf06

Please sign in to comment.