Skip to content

Commit

Permalink
Adds to files needed for the pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
JarodMica committed Aug 10, 2023
0 parents commit 116f64b
Show file tree
Hide file tree
Showing 8 changed files with 331 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/output
/rvc

*.wav

hubert_base.pt
rmvpe.pt
rvc.yaml
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Pipeline for TTS to RVC. This seems to produce the best sounding TTS with the closest representation to the original speakers voice that one may have trained on using RVC/Tortoise.

Work in progress, rvc_infer.py works just fine but kneading out the small issues to make this a quicker install.
1 change: 1 addition & 0 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

2 changes: 2 additions & 0 deletions index/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*
!.gitignore
2 changes: 2 additions & 0 deletions models/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*
!.gitignore
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pyyaml==6.0.1
15 changes: 15 additions & 0 deletions rvc copy.yaml.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# If in quotes, do NOT remove quotes as code is expecting strings
transpose: 0 # change pitch of voice
audio_file: "path to audio file" # audio file path
output_dir: "" # If you wanna change the name of output dir
model_path: "models\\enter_pth_name" # Pytorch model name
device: "cuda:0" # Uses CUDA GPU
is_half: "False"
f0method: "rmvpe" # options are: dio, harvest, crepe (good), rmvpe(also good)
file_index: "" # path to voice index file if using it, leave blank if not
file_index2: ""
index_rate: 1 # strength of the index from 0 to 1
filter_radius: 3
resample_sr: 0
rms_mix_rate: 1.0
protect: 0.33
299 changes: 299 additions & 0 deletions rvc_infer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,299 @@
import os,sys,pdb,torch
now_dir = os.getcwd()
sys.path.append(now_dir)
import argparse
import glob
import sys
import torch
import numpy as np
import yaml
import pkg_resources

from multiprocessing import cpu_count
from vc_infer_pipeline import VC
from lib.infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono, SynthesizerTrnMs768NSFsid, SynthesizerTrnMs768NSFsid_nono
from lib.audio import load_audio

from fairseq import checkpoint_utils
from scipy.io import wavfile


class Config:
def __init__(self,device,is_half):
self.device = device
self.is_half = is_half
self.n_cpu = 0
self.gpu_name = None
self.gpu_mem = None
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()

def device_config(self) -> tuple:
if torch.cuda.is_available():
i_device = int(self.device.split(":")[-1])
self.gpu_name = torch.cuda.get_device_name(i_device)
if (
("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
or "P40" in self.gpu_name.upper()
or "1060" in self.gpu_name
or "1070" in self.gpu_name
or "1080" in self.gpu_name
):
print("16系/10系显卡和P40强制单精度")
self.is_half = False
for config_file in ["32k.json", "40k.json", "48k.json"]:
with open(f"configs/{config_file}", "r") as f:
strr = f.read().replace("true", "false")
with open(f"configs/{config_file}", "w") as f:
f.write(strr)
with open("trainset_preprocess_pipeline_print.py", "r") as f:
strr = f.read().replace("3.7", "3.0")
with open("trainset_preprocess_pipeline_print.py", "w") as f:
f.write(strr)
else:
self.gpu_name = None
self.gpu_mem = int(
torch.cuda.get_device_properties(i_device).total_memory
/ 1024
/ 1024
/ 1024
+ 0.4
)
if self.gpu_mem <= 4:
with open("trainset_preprocess_pipeline_print.py", "r") as f:
strr = f.read().replace("3.7", "3.0")
with open("trainset_preprocess_pipeline_print.py", "w") as f:
f.write(strr)
elif torch.backends.mps.is_available():
print("没有发现支持的N卡, 使用MPS进行推理")
self.device = "mps"
else:
print("没有发现支持的N卡, 使用CPU进行推理")
self.device = "cpu"
self.is_half = True

if self.n_cpu == 0:
self.n_cpu = cpu_count()

if self.is_half:
# 6G显存配置
x_pad = 3
x_query = 10
x_center = 60
x_max = 65
else:
# 5G显存配置
x_pad = 1
x_query = 6
x_center = 38
x_max = 41

if self.gpu_mem != None and self.gpu_mem <= 4:
x_pad = 1
x_query = 5
x_center = 30
x_max = 32

return x_pad, x_query, x_center, x_max


def get_path(name):
'''
Built to get the path of a file based on where the initial script is running
Args:
- name(str) : name of the file/folder
'''
current_dir = os.path.dirname(os.path.abspath(__file__))
return os.path.join(current_dir, name)

def create_directory(name):
'''
Creates a directory based on the current scripts location. Relies on
get_path()
Args:
- name(str) : name of the file/folder
'''
dir_name = get_path(name)
if not os.path.exists(dir_name):
os.makedirs(dir_name)

def load_hubert():
global hubert_model
file_path = "hubert_base.pt"
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
[file_path],
suffix="",
)
hubert_model = models[0]
hubert_model = hubert_model.to(config.device)
if config.is_half:
hubert_model = hubert_model.half()
else:
hubert_model = hubert_model.float()
hubert_model.eval()

def vc_single(
sid,
input_audio_path,
f0_up_key,
f0_file,
f0_method,
file_index,
file_index2,
# file_big_npy,
index_rate,
filter_radius,
resample_sr,
rms_mix_rate,
protect,
): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
global tgt_sr, net_g, vc, hubert_model, version
f0_file = None
if input_audio_path is None:
return "You need to upload an audio", None
f0_up_key = int(f0_up_key)
audio = load_audio(input_audio_path, 16000)
audio_max = np.abs(audio).max() / 0.95
if audio_max > 1:
audio /= audio_max
times = [0, 0, 0]
if not hubert_model:
load_hubert()
if_f0 = cpt.get("f0", 1)
file_index = (
(
file_index.strip(" ")
.strip('"')
.strip("\n")
.strip('"')
.strip(" ")
.replace("trained", "added")
)
if file_index != ""
else file_index2
) # 防止小白写错,自动帮他替换掉
# file_big_npy = (
# file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
# )
audio_opt = vc.pipeline(
hubert_model,
net_g,
sid,
audio,
input_audio_path,
times,
f0_up_key,
f0_method,
file_index,
# file_big_npy,
index_rate,
if_f0,
filter_radius,
tgt_sr,
resample_sr,
rms_mix_rate,
version,
protect,
f0_file=f0_file,
)
return audio_opt

def get_vc(model_path):
global n_spk,tgt_sr,net_g,vc,cpt,device,is_half, version
print("loading pth %s"%model_path)
cpt = torch.load(model_path, map_location="cpu")
tgt_sr = cpt["config"][-1]
cpt["config"][-3]=cpt["weight"]["emb_g.weight"].shape[0]#n_spk
if_f0=cpt.get("f0",1)
version = cpt.get("version", "v1")
if version == "v1":
if if_f0 == 1:
net_g = SynthesizerTrnMs256NSFsid(
*cpt["config"], is_half=config.is_half
)
else:
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
elif version == "v2":
if if_f0 == 1:
net_g = SynthesizerTrnMs768NSFsid(
*cpt["config"], is_half=config.is_half
)
else:
net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
del net_g.enc_q
print(net_g.load_state_dict(cpt["weight"], strict=False))
net_g.eval().to(device)
if (is_half):net_g = net_g.half()
else:net_g = net_g.float()
vc = VC(tgt_sr, config)
n_spk=cpt["config"][-3]
# return {"visible": True,"maximum": n_spk, "__type__": "update"}

def load_config():
current_dir = os.path.dirname(os.path.abspath(__file__))
yaml_file = os.path.join(current_dir, "rvc.yaml")

with open(yaml_file, "r") as file:
rvc_conf = yaml.safe_load(file)

return rvc_conf

def rvc_run(input_path=None, output_dir=None):
'''
Function to call for the rvc voice conversion. First, set-up appropriate settings inside
of the rvc.yaml
input_path (str) : path to audio file (use wav file)
output_dir (str) : path to output directory, default name of wav file is "out.wav"
'''
global config, now_dir, hubert_model, tgt_sr, net_g, vc, cpt, device, is_half, version
output_file_name = "out.wav"

settings = load_config()

f0_up_key = settings["transpose"]
input_path = settings["audio_file"]
# output_dir = settings["output_dir"]
model_path = get_path(settings["model_path"])
device = settings["device"]
is_half = settings["is_half"]
f0method = settings["f0method"]
file_index = settings["file_index"]
file_index2 = settings["file_index2"]
index_rate = settings["index_rate"]
filter_radius = settings["filter_radius"]
resample_sr = settings["resample_sr"]
rms_mix_rate = settings["rms_mix_rate"]
protect = settings["protect"]
print(settings)

output_file_path = os.path.join(output_dir,output_file_name)

if(is_half.lower() == 'true'):
is_half = True
else:
is_half = False

config=Config(device,is_half)
now_dir=os.getcwd()
sys.path.append(now_dir)

hubert_model=None

get_vc(model_path)
wav_opt=vc_single(0,input_path,f0_up_key,None,f0method,file_index,file_index2,index_rate,filter_radius,resample_sr,rms_mix_rate,protect)
wavfile.write(output_file_path, tgt_sr, wav_opt)
print(f"\nFile finished writing to: {output_file_path}")

output_dir_name = "output"
create_directory(output_dir_name)
output_dir = get_path(output_dir_name)

def main():
# Need to comment out yaml setting for input audio
rvc_run(output_dir=output_dir)

if __name__ == "__main__":
main()

0 comments on commit 116f64b

Please sign in to comment.