forked from PlayVoice/whisper-vits-svc
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
630 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import io | ||
import logging | ||
|
||
import soundfile | ||
import torch | ||
import torchaudio | ||
from flask import Flask, request, send_file | ||
from flask_cors import CORS | ||
|
||
from inference.infer_tool import Svc, RealTimeVC | ||
|
||
app = Flask(__name__) | ||
|
||
CORS(app) | ||
|
||
logging.getLogger('numba').setLevel(logging.WARNING) | ||
|
||
|
||
@app.route("/voiceChangeModel", methods=["POST"]) | ||
def voice_change_model(): | ||
request_form = request.form | ||
wave_file = request.files.get("sample", None) | ||
# 变调信息 | ||
f_pitch_change = float(request_form.get("fPitchChange", 0)) | ||
# DAW所需的采样率 | ||
daw_sample = int(float(request_form.get("sampleRate", 0))) | ||
speaker_id = int(float(request_form.get("sSpeakId", 0))) | ||
# http获得wav文件并转换 | ||
input_wav_path = io.BytesIO(wave_file.read()) | ||
|
||
# 模型推理 | ||
if raw_infer: | ||
out_audio, out_sr = svc_model.infer(speaker_id, f_pitch_change, input_wav_path) | ||
tar_audio = torchaudio.functional.resample(out_audio, svc_model.target_sample, daw_sample) | ||
else: | ||
out_audio = svc.process(svc_model, speaker_id, f_pitch_change, input_wav_path) | ||
tar_audio = torchaudio.functional.resample(torch.from_numpy(out_audio), svc_model.target_sample, daw_sample) | ||
# 返回音频 | ||
out_wav_path = io.BytesIO() | ||
soundfile.write(out_wav_path, tar_audio.cpu().numpy(), daw_sample, format="wav") | ||
out_wav_path.seek(0) | ||
return send_file(out_wav_path, download_name="temp.wav", as_attachment=True) | ||
|
||
|
||
if __name__ == '__main__': | ||
# 启用则为直接切片合成,False为交叉淡化方式 | ||
# vst插件调整0.3-0.5s切片时间可以降低延迟,直接切片方法会有连接处爆音、交叉淡化会有轻微重叠声音 | ||
# 自行选择能接受的方法,或将vst最大切片时间调整为1s,此处设为Ture,延迟大音质稳定一些 | ||
raw_infer = True | ||
# 每个模型和config是唯一对应的 | ||
model_name = "logs/48k/G_174000-Copy1.pth" | ||
config_name = "configs/config.json" | ||
svc_model = Svc(model_name, config_name) | ||
svc = RealTimeVC() | ||
# 此处与vst插件对应,不建议更改 | ||
app.run(port=6842, host="0.0.0.0", debug=False, threaded=False) |
Empty file.
Oops, something went wrong.