forked from JarodMica/rvc-tts-pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds to files needed for the pipeline
- Loading branch information
0 parents
commit 116f64b
Showing
8 changed files
with
331 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
/output | ||
/rvc | ||
|
||
*.wav | ||
|
||
hubert_base.pt | ||
rmvpe.pt | ||
rvc.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Pipeline for TTS to RVC. This seems to produce the best sounding TTS with the closest representation to the original speakers voice that one may have trained on using RVC/Tortoise. | ||
|
||
Work in progress, rvc_infer.py works just fine but kneading out the small issues to make this a quicker install. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
* | ||
!.gitignore |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
* | ||
!.gitignore |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
pyyaml==6.0.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# If in quotes, do NOT remove quotes as code is expecting strings | ||
transpose: 0 # change pitch of voice | ||
audio_file: "path to audio file" # audio file path | ||
output_dir: "" # If you wanna change the name of output dir | ||
model_path: "models\\enter_pth_name" # Pytorch model name | ||
device: "cuda:0" # Uses CUDA GPU | ||
is_half: "False" | ||
f0method: "rmvpe" # options are: dio, harvest, crepe (good), rmvpe(also good) | ||
file_index: "" # path to voice index file if using it, leave blank if not | ||
file_index2: "" | ||
index_rate: 1 # strength of the index from 0 to 1 | ||
filter_radius: 3 | ||
resample_sr: 0 | ||
rms_mix_rate: 1.0 | ||
protect: 0.33 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,299 @@ | ||
import os,sys,pdb,torch | ||
now_dir = os.getcwd() | ||
sys.path.append(now_dir) | ||
import argparse | ||
import glob | ||
import sys | ||
import torch | ||
import numpy as np | ||
import yaml | ||
import pkg_resources | ||
|
||
from multiprocessing import cpu_count | ||
from vc_infer_pipeline import VC | ||
from lib.infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono, SynthesizerTrnMs768NSFsid, SynthesizerTrnMs768NSFsid_nono | ||
from lib.audio import load_audio | ||
|
||
from fairseq import checkpoint_utils | ||
from scipy.io import wavfile | ||
|
||
|
||
class Config: | ||
def __init__(self,device,is_half): | ||
self.device = device | ||
self.is_half = is_half | ||
self.n_cpu = 0 | ||
self.gpu_name = None | ||
self.gpu_mem = None | ||
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config() | ||
|
||
def device_config(self) -> tuple: | ||
if torch.cuda.is_available(): | ||
i_device = int(self.device.split(":")[-1]) | ||
self.gpu_name = torch.cuda.get_device_name(i_device) | ||
if ( | ||
("16" in self.gpu_name and "V100" not in self.gpu_name.upper()) | ||
or "P40" in self.gpu_name.upper() | ||
or "1060" in self.gpu_name | ||
or "1070" in self.gpu_name | ||
or "1080" in self.gpu_name | ||
): | ||
print("16系/10系显卡和P40强制单精度") | ||
self.is_half = False | ||
for config_file in ["32k.json", "40k.json", "48k.json"]: | ||
with open(f"configs/{config_file}", "r") as f: | ||
strr = f.read().replace("true", "false") | ||
with open(f"configs/{config_file}", "w") as f: | ||
f.write(strr) | ||
with open("trainset_preprocess_pipeline_print.py", "r") as f: | ||
strr = f.read().replace("3.7", "3.0") | ||
with open("trainset_preprocess_pipeline_print.py", "w") as f: | ||
f.write(strr) | ||
else: | ||
self.gpu_name = None | ||
self.gpu_mem = int( | ||
torch.cuda.get_device_properties(i_device).total_memory | ||
/ 1024 | ||
/ 1024 | ||
/ 1024 | ||
+ 0.4 | ||
) | ||
if self.gpu_mem <= 4: | ||
with open("trainset_preprocess_pipeline_print.py", "r") as f: | ||
strr = f.read().replace("3.7", "3.0") | ||
with open("trainset_preprocess_pipeline_print.py", "w") as f: | ||
f.write(strr) | ||
elif torch.backends.mps.is_available(): | ||
print("没有发现支持的N卡, 使用MPS进行推理") | ||
self.device = "mps" | ||
else: | ||
print("没有发现支持的N卡, 使用CPU进行推理") | ||
self.device = "cpu" | ||
self.is_half = True | ||
|
||
if self.n_cpu == 0: | ||
self.n_cpu = cpu_count() | ||
|
||
if self.is_half: | ||
# 6G显存配置 | ||
x_pad = 3 | ||
x_query = 10 | ||
x_center = 60 | ||
x_max = 65 | ||
else: | ||
# 5G显存配置 | ||
x_pad = 1 | ||
x_query = 6 | ||
x_center = 38 | ||
x_max = 41 | ||
|
||
if self.gpu_mem != None and self.gpu_mem <= 4: | ||
x_pad = 1 | ||
x_query = 5 | ||
x_center = 30 | ||
x_max = 32 | ||
|
||
return x_pad, x_query, x_center, x_max | ||
|
||
|
||
def get_path(name): | ||
''' | ||
Built to get the path of a file based on where the initial script is running | ||
Args: | ||
- name(str) : name of the file/folder | ||
''' | ||
current_dir = os.path.dirname(os.path.abspath(__file__)) | ||
return os.path.join(current_dir, name) | ||
|
||
def create_directory(name): | ||
''' | ||
Creates a directory based on the current scripts location. Relies on | ||
get_path() | ||
Args: | ||
- name(str) : name of the file/folder | ||
''' | ||
dir_name = get_path(name) | ||
if not os.path.exists(dir_name): | ||
os.makedirs(dir_name) | ||
|
||
def load_hubert(): | ||
global hubert_model | ||
file_path = "hubert_base.pt" | ||
models, _, _ = checkpoint_utils.load_model_ensemble_and_task( | ||
[file_path], | ||
suffix="", | ||
) | ||
hubert_model = models[0] | ||
hubert_model = hubert_model.to(config.device) | ||
if config.is_half: | ||
hubert_model = hubert_model.half() | ||
else: | ||
hubert_model = hubert_model.float() | ||
hubert_model.eval() | ||
|
||
def vc_single( | ||
sid, | ||
input_audio_path, | ||
f0_up_key, | ||
f0_file, | ||
f0_method, | ||
file_index, | ||
file_index2, | ||
# file_big_npy, | ||
index_rate, | ||
filter_radius, | ||
resample_sr, | ||
rms_mix_rate, | ||
protect, | ||
): # spk_item, input_audio0, vc_transform0,f0_file,f0method0 | ||
global tgt_sr, net_g, vc, hubert_model, version | ||
f0_file = None | ||
if input_audio_path is None: | ||
return "You need to upload an audio", None | ||
f0_up_key = int(f0_up_key) | ||
audio = load_audio(input_audio_path, 16000) | ||
audio_max = np.abs(audio).max() / 0.95 | ||
if audio_max > 1: | ||
audio /= audio_max | ||
times = [0, 0, 0] | ||
if not hubert_model: | ||
load_hubert() | ||
if_f0 = cpt.get("f0", 1) | ||
file_index = ( | ||
( | ||
file_index.strip(" ") | ||
.strip('"') | ||
.strip("\n") | ||
.strip('"') | ||
.strip(" ") | ||
.replace("trained", "added") | ||
) | ||
if file_index != "" | ||
else file_index2 | ||
) # 防止小白写错,自动帮他替换掉 | ||
# file_big_npy = ( | ||
# file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ") | ||
# ) | ||
audio_opt = vc.pipeline( | ||
hubert_model, | ||
net_g, | ||
sid, | ||
audio, | ||
input_audio_path, | ||
times, | ||
f0_up_key, | ||
f0_method, | ||
file_index, | ||
# file_big_npy, | ||
index_rate, | ||
if_f0, | ||
filter_radius, | ||
tgt_sr, | ||
resample_sr, | ||
rms_mix_rate, | ||
version, | ||
protect, | ||
f0_file=f0_file, | ||
) | ||
return audio_opt | ||
|
||
def get_vc(model_path): | ||
global n_spk,tgt_sr,net_g,vc,cpt,device,is_half, version | ||
print("loading pth %s"%model_path) | ||
cpt = torch.load(model_path, map_location="cpu") | ||
tgt_sr = cpt["config"][-1] | ||
cpt["config"][-3]=cpt["weight"]["emb_g.weight"].shape[0]#n_spk | ||
if_f0=cpt.get("f0",1) | ||
version = cpt.get("version", "v1") | ||
if version == "v1": | ||
if if_f0 == 1: | ||
net_g = SynthesizerTrnMs256NSFsid( | ||
*cpt["config"], is_half=config.is_half | ||
) | ||
else: | ||
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) | ||
elif version == "v2": | ||
if if_f0 == 1: | ||
net_g = SynthesizerTrnMs768NSFsid( | ||
*cpt["config"], is_half=config.is_half | ||
) | ||
else: | ||
net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) | ||
del net_g.enc_q | ||
print(net_g.load_state_dict(cpt["weight"], strict=False)) | ||
net_g.eval().to(device) | ||
if (is_half):net_g = net_g.half() | ||
else:net_g = net_g.float() | ||
vc = VC(tgt_sr, config) | ||
n_spk=cpt["config"][-3] | ||
# return {"visible": True,"maximum": n_spk, "__type__": "update"} | ||
|
||
def load_config(): | ||
current_dir = os.path.dirname(os.path.abspath(__file__)) | ||
yaml_file = os.path.join(current_dir, "rvc.yaml") | ||
|
||
with open(yaml_file, "r") as file: | ||
rvc_conf = yaml.safe_load(file) | ||
|
||
return rvc_conf | ||
|
||
def rvc_run(input_path=None, output_dir=None): | ||
''' | ||
Function to call for the rvc voice conversion. First, set-up appropriate settings inside | ||
of the rvc.yaml | ||
input_path (str) : path to audio file (use wav file) | ||
output_dir (str) : path to output directory, default name of wav file is "out.wav" | ||
''' | ||
global config, now_dir, hubert_model, tgt_sr, net_g, vc, cpt, device, is_half, version | ||
output_file_name = "out.wav" | ||
|
||
settings = load_config() | ||
|
||
f0_up_key = settings["transpose"] | ||
input_path = settings["audio_file"] | ||
# output_dir = settings["output_dir"] | ||
model_path = get_path(settings["model_path"]) | ||
device = settings["device"] | ||
is_half = settings["is_half"] | ||
f0method = settings["f0method"] | ||
file_index = settings["file_index"] | ||
file_index2 = settings["file_index2"] | ||
index_rate = settings["index_rate"] | ||
filter_radius = settings["filter_radius"] | ||
resample_sr = settings["resample_sr"] | ||
rms_mix_rate = settings["rms_mix_rate"] | ||
protect = settings["protect"] | ||
print(settings) | ||
|
||
output_file_path = os.path.join(output_dir,output_file_name) | ||
|
||
if(is_half.lower() == 'true'): | ||
is_half = True | ||
else: | ||
is_half = False | ||
|
||
config=Config(device,is_half) | ||
now_dir=os.getcwd() | ||
sys.path.append(now_dir) | ||
|
||
hubert_model=None | ||
|
||
get_vc(model_path) | ||
wav_opt=vc_single(0,input_path,f0_up_key,None,f0method,file_index,file_index2,index_rate,filter_radius,resample_sr,rms_mix_rate,protect) | ||
wavfile.write(output_file_path, tgt_sr, wav_opt) | ||
print(f"\nFile finished writing to: {output_file_path}") | ||
|
||
output_dir_name = "output" | ||
create_directory(output_dir_name) | ||
output_dir = get_path(output_dir_name) | ||
|
||
def main(): | ||
# Need to comment out yaml setting for input audio | ||
rvc_run(output_dir=output_dir) | ||
|
||
if __name__ == "__main__": | ||
main() |