From 7ac2b9d7afd4a78f1b1af3faf06882d7810ea149 Mon Sep 17 00:00:00 2001 From: ikaros <327209194@qq.com> Date: Wed, 2 Apr 2025 16:19:56 +0800 Subject: [PATCH] add: gradio webui for train and use --- README.md | 51 ++++ README_zh.md | 51 ++++ app.py | 638 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 5 + 4 files changed, 745 insertions(+) create mode 100644 app.py create mode 100644 requirements.txt diff --git a/README.md b/README.md index f7bdfc6..fcc5433 100644 --- a/README.md +++ b/README.md @@ -190,6 +190,57 @@ Installation using Docker, docker-compose as follows: 1. Directly download the [officially built installation package](https://github.com/GuijiAI/HeyGem.ai/releases) 2. Double-click `HeyGem-x.x.x-setup.exe` to install +### WebUI Usage Guide + +HeyGem provides a simple and intuitive interface to help you train and use digital human models (Please make sure you have completed the server-side deployment on Windows): + +#### Environment Requirements + +- Python dependencies: Ensure you have installed the necessary Python libraries + ``` + gradio==4.44.1 + requests~=2.32.3 + pydub==0.25.1 + uuid==1.30 + python-json-logger==3.3.0 + ``` + +#### Language Switching Feature + +HeyGem supports Chinese and English interface switching: +- Default launch with Chinese interface: `python app.py` +- Launch with English interface: `python app.py --lang en` +- Launch with Chinese interface: `python app.py --lang zh` + +#### Data Storage Paths + +- Audio files storage path: `D:\heygem_data\voice\data` +- Video files storage path: `D:\heygem_data\face2face\temp` + +#### Training Digital Humans + +1. After opening the software, switch to the "Digital Human Training" tab +2. Upload a reference video: Click on the "Upload Reference Video" area to select a video file with a person speaking +3. Enter a name for the digital human: Name your digital human model +4. Click the "Start Training" button +5. Wait for the training to complete; upon success, the message "Training Successful!" will be displayed + +#### Digital Human Synthesis + +1. Switch to the "Digital Human Synthesis" tab +2. Select a trained digital human model from the dropdown menu +3. Choose a synthesis method: + - **Text Input**: Enter text content and click the "Synthesize" button + - **Audio Upload**: Upload an audio file and click the "Synthesize" button +4. The system will assign a task ID and begin processing +5. Click the "Query Synthesis Status" button to check the progress +6. Once synthesis is complete, the video result will be displayed in the interface + +#### Other Features + +- **Refresh Digital Human Model List**: Click this button to update the dropdown menu of trained models +- To view storage paths and other detailed information, expand the "Storage Path Information" area at the top of the interface + ## Open APIs We have opened APIs for model training and video synthesis. After Docker starts, several ports will be exposed locally, accessible through `http://127.0.0.1`. diff --git a/README_zh.md b/README_zh.md index 213764e..a124791 100644 --- a/README_zh.md +++ b/README_zh.md @@ -190,6 +190,57 @@ Heygem是一款专为Windows系统设计的全离线视频合成工具,它能 1. 直接下载[官方构建的安装包](https://github.com/GuijiAI/HeyGem.ai/releases) 2. 双击`HeyGem-x.x.x-setup.exe`即可安装 +### WebUI使用说明 + +HeyGem提供了简单直观的界面,帮助您训练和使用数字人模型(请确保您已经在Windows完成了服务端部署): + +#### 环境要求 + +- Python依赖:确保安装了必要的Python库 + ``` + gradio==4.44.1 + requests~=2.32.3 + pydub==0.25.1 + uuid==1.30 + python-json-logger==3.3.0 + ``` + +#### 语言切换功能 + +HeyGem支持中英文界面切换: +- 默认启动为中文界面: `python app.py` +- 启动英文界面: `python app.py --lang en` +- 启动中文界面: `python app.py --lang zh` + +#### 数据存储路径 + +- 音频文件存储路径:`D:\heygem_data\voice\data` +- 视频文件存储路径:`D:\heygem_data\face2face\temp` + +#### 数字人训练 + +1. 打开软件后,切换到"数字人训练"标签页 +2. 上传参考视频:点击"上传参考视频"区域选择一个包含人物说话的视频文件 +3. 输入数字人名称:为您的数字人模型命名 +4. 点击"开始训练"按钮 +5. 等待训练完成,成功后将显示"训练成功!"消息 + +#### 数字人合成 + +1. 切换到"数字人合成"标签页 +2. 从下拉菜单中选择已训练的数字人模型 +3. 选择合成方式: + - **文字输入**:输入文字内容,点击"合成"按钮 + - **音频上传**:上传音频文件,点击"合成"按钮 +4. 系统会分配一个任务ID并开始处理 +5. 点击"查询合成状态"按钮查看进度 +6. 合成完成后,视频结果将显示在界面上 + +#### 其他功能 + +- **刷新数字人模型列表**:点击此按钮可更新已训练模型的下拉菜单 +- 如需查看存储路径等详细信息,可展开界面上方的"存储路径信息"区域 + ## 开放 API 我们开放了模特训练和视频合成的API,Docker 启动后会在本地暴露几个端口,通过`http://127.0.0.1`可以调用。 diff --git a/app.py b/app.py new file mode 100644 index 0000000..5f961a9 --- /dev/null +++ b/app.py @@ -0,0 +1,638 @@ +import os +import uuid +import time +import json +import requests +import gradio as gr +import shutil +import argparse +from pydub import AudioSegment + +# 命令行参数解析 +parser = argparse.ArgumentParser(description='HeyGem数字人训练与合成系统') +parser.add_argument('--lang', type=str, default='en', choices=['zh', 'en'], help='界面语言 (zh: 中文, en: 英文)') +args = parser.parse_args() + +# 翻译字典 +translations = { + 'zh': { + 'title': '数字人训练与合成系统', + 'paths_info': '存储路径信息', + 'audio_path': '音频文件存储路径', + 'video_path': '视频文件存储路径', + 'api_server1': 'API服务器地址1', + 'api_server2': 'API服务器地址2', + 'train_tab': '数字人训练', + 'upload_video': '上传参考视频', + 'model_name': '数字人名称', + 'current_status': '当前状态', + 'ready': '就绪', + 'start_training': '开始训练', + 'trained_models': '已训练的数字人', + 'training_result': '训练结果', + 'synthesis_tab': '数字人合成', + 'select_model': '选择数字人模型', + 'text_input_tab': '文字输入', + 'input_text': '输入文字内容', + 'synthesize': '合成', + 'audio_upload_tab': '音频上传', + 'upload_audio': '上传音频文件', + 'task_id': '任务ID', + 'synthesis_status': '合成状态', + 'synthesis_result': '合成结果', + 'query_status': '查询合成状态', + 'refresh_models': '刷新数字人模型列表', + 'error_no_video': '错误: 请上传视频文件', + 'error_no_name': '错误: 请输入数字人名称', + 'processing_video': '正在处理视频...', + 'error_no_model': '错误: 请选择数字人模型', + 'error_no_text': '错误: 请输入文字内容', + 'processing_text': '正在处理文字转语音...', + 'error_no_audio': '错误: 请上传音频文件', + 'processing_audio': '正在处理音频...', + 'enter_task_id': '请输入任务ID', + 'upload_video_and_name': '请上传视频文件并输入数字人名称', + 'training_error': '训练过程出错: {0}\n详细错误: {1}', + 'training_success': '训练成功! 模型ID: {0}', + 'api_response_error': '训练失败: API响应错误 ({0}), {1}', + 'training_failed': '训练失败: {0}', + 'unknown_error': '未知错误', + 'model_not_found': '未找到数字人模型', + 'audio_synthesis_success': '音频合成成功', + 'audio_synthesis_error': '音频合成出错: {0}\n详细错误: {1}', + 'audio_synthesis_failed': '音频合成失败: {0}', + 'select_model_prompt': '请选择数字人模型', + 'upload_audio_or_text': '请上传音频文件或输入文字', + 'task_submit_error': '提交任务出错: {0}\n详细错误: {1}', + 'task_submit_failed': '任务提交失败: {0}', + 'task_submitted': '任务已提交,任务ID: {0}', + 'query_failed': '查询失败: {0}', + 'download_failed': '视频下载失败,但任务已完成。\n音频文件可能位于: {0}\n视频结果: {1}', + 'synthesis_complete': '合成完成 (100%)', + 'download_error': '视频下载过程出错: {0}\n但任务已完成,音频文件可能位于: {1}\n视频结果: {2}', + 'no_video_url': '合成完成但没有视频URL,音频文件可能位于: {0}', + 'synthesis_progress': '正在合成中 ({0}%)', + 'task_queuing': '任务排队中', + 'task_failed': '任务失败: {0}', + 'query_error': '查询任务出错: {0}' + }, + 'en': { + 'title': 'Digital Human Training and Synthesis System', + 'paths_info': 'Storage Path Information', + 'audio_path': 'Audio files storage path', + 'video_path': 'Video files storage path', + 'api_server1': 'API Server Address 1', + 'api_server2': 'API Server Address 2', + 'train_tab': 'Digital Human Training', + 'upload_video': 'Upload Reference Video', + 'model_name': 'Digital Human Name', + 'current_status': 'Current Status', + 'ready': 'Ready', + 'start_training': 'Start Training', + 'trained_models': 'Trained Digital Humans', + 'training_result': 'Training Result', + 'synthesis_tab': 'Digital Human Synthesis', + 'select_model': 'Select Digital Human Model', + 'text_input_tab': 'Text Input', + 'input_text': 'Enter Text Content', + 'synthesize': 'Synthesize', + 'audio_upload_tab': 'Audio Upload', + 'upload_audio': 'Upload Audio File', + 'task_id': 'Task ID', + 'synthesis_status': 'Synthesis Status', + 'synthesis_result': 'Synthesis Result', + 'query_status': 'Query Synthesis Status', + 'refresh_models': 'Refresh Digital Human Model List', + 'error_no_video': 'Error: Please upload a video file', + 'error_no_name': 'Error: Please enter a name for the digital human', + 'processing_video': 'Processing video...', + 'error_no_model': 'Error: Please select a digital human model', + 'error_no_text': 'Error: Please enter text content', + 'processing_text': 'Processing text to speech...', + 'error_no_audio': 'Error: Please upload an audio file', + 'processing_audio': 'Processing audio...', + 'enter_task_id': 'Please enter a Task ID', + 'upload_video_and_name': 'Please upload a video file and enter a digital human name', + 'training_error': 'Training error: {0}\nDetailed error: {1}', + 'training_success': 'Training successful! Model ID: {0}', + 'api_response_error': 'Training failed: API response error ({0}), {1}', + 'training_failed': 'Training failed: {0}', + 'unknown_error': 'Unknown error', + 'model_not_found': 'Digital human model not found', + 'audio_synthesis_success': 'Audio synthesis successful', + 'audio_synthesis_error': 'Audio synthesis error: {0}\nDetailed error: {1}', + 'audio_synthesis_failed': 'Audio synthesis failed: {0}', + 'select_model_prompt': 'Please select a digital human model', + 'upload_audio_or_text': 'Please upload an audio file or enter text', + 'task_submit_error': 'Task submission error: {0}\nDetailed error: {1}', + 'task_submit_failed': 'Task submission failed: {0}', + 'task_submitted': 'Task submitted, Task ID: {0}', + 'query_failed': 'Query failed: {0}', + 'download_failed': 'Video download failed, but task completed.\nAudio file may be located at: {0}\nVideo result: {1}', + 'synthesis_complete': 'Synthesis complete (100%)', + 'download_error': 'Video download error: {0}\nBut task completed, audio file may be located at: {1}\nVideo result: {2}', + 'no_video_url': 'Synthesis complete but no video URL, audio file may be located at: {0}', + 'synthesis_progress': 'Synthesis in progress ({0}%)', + 'task_queuing': 'Task queuing', + 'task_failed': 'Task failed: {0}', + 'query_error': 'Query task error: {0}' + } +} + +# 使用选择的语言 +lang = args.lang +t = translations[lang] + +# 配置 +VOICE_DATA_PATH = r"D:\heygem_data\voice\data" +FACE2FACE_TEMP_PATH = r"D:\heygem_data\face2face\temp" +MODEL_INFO_FILE = "digital_human_models.json" +API_BASE_URL = "http://localhost:18180" # 请根据实际API地址调整 +API_BASE_URL2 = "http://localhost:8383" + +# 确保数据目录存在 +os.makedirs(VOICE_DATA_PATH, exist_ok=True) +os.makedirs(FACE2FACE_TEMP_PATH, exist_ok=True) + +# 读取已有模型信息 +def load_models(): + if os.path.exists(MODEL_INFO_FILE): + with open(MODEL_INFO_FILE, "r", encoding="utf-8") as f: + return json.load(f) + return {} + +# 保存模型信息 +def save_models(models_data): + with open(MODEL_INFO_FILE, "w", encoding="utf-8") as f: + json.dump(models_data, f, ensure_ascii=False, indent=2) + +# 从视频中提取音频 +def extract_audio_from_video(video_path): + audio_filename = f"{uuid.uuid4()}.wav" + audio_path = os.path.join(VOICE_DATA_PATH, audio_filename) + + video = AudioSegment.from_file(video_path) + audio = video.set_channels(1).set_frame_rate(16000).set_sample_width(2) + audio.export(audio_path, format="wav") + + return audio_path + +# 训练数字人 +def train_digital_human(video_file, name): + # 检查输入 + if not video_file or not name: + return None, t['upload_video_and_name'] + + try: + # 处理视频文件路径 + video_path = video_file + + # 检查video_file是对象还是字符串 + if hasattr(video_file, 'name'): + video_path = video_file.name + + # 将上传的视频复制到指定路径 + video_filename = f"{uuid.uuid4()}.mp4" + target_video_path = os.path.join(FACE2FACE_TEMP_PATH, video_filename) + shutil.copy(video_path, target_video_path) + + # 提取音频用于训练 + audio_path = extract_audio_from_video(video_path) + + # 检查音频路径是否符合API的要求(必须在D:\heygem_data\voice\data目录下) + if not audio_path.startswith(VOICE_DATA_PATH): + # 确保音频文件路径正确 + audio_filename = os.path.basename(audio_path) + correct_audio_path = os.path.join(VOICE_DATA_PATH, audio_filename) + + # 如果路径不正确,复制到正确位置 + if audio_path != correct_audio_path: + shutil.copy(audio_path, correct_audio_path) + audio_path = correct_audio_path + + # 获取相对路径(从VOICE_DATA_PATH开始的部分) + audio_filename = os.path.basename(audio_path) + relative_audio_path = audio_filename + + # 调用训练API + api_data = { + "format": "wav", + "reference_audio": relative_audio_path, + "lang": "zh" + } + + print(f"发送API请求: {api_data}") + print(f"原音频路径: {audio_path}") + + response = requests.post( + f"{API_BASE_URL}/v1/preprocess_and_tran", + json=api_data + ) + + if response.status_code != 200: + return None, t['api_response_error'].format(response.status_code, response.text) + + print(f"API响应: {response.text}") + + result = response.json() + + if result.get("code") != 0: + return None, t['training_failed'].format(result.get('msg', t['unknown_error'])) + + # 处理可能的多个文本和音频URL(用|||分隔),只取第一个 + reference_text = result["reference_audio_text"].split("|||")[0].strip() + reference_audio = result["asr_format_audio_url"].split("|||")[0].strip() + + # 创建模型信息 + model_id = str(uuid.uuid4()) + model_info = { + "id": model_id, + "name": name, + "video_path": target_video_path, + "audio_path": audio_path, + "reference_audio": reference_audio, + "reference_text": reference_text, + "created_at": time.strftime("%Y-%m-%d %H:%M:%S") + } + + # 保存模型信息 + models = load_models() + if "models" not in models: + models["models"] = [] + + models["models"].append(model_info) + save_models(models) + + # 训练成功的消息 + return True, t['training_success'].format(model_id) + + except Exception as e: + import traceback + error_trace = traceback.format_exc() + return False, t['training_error'].format(str(e), error_trace) + +# 获取模型详细信息 +def get_model_by_name(name): + models = load_models() + for model in models.get("models", []): + if model["name"] == name: + return model + return None + +# 通过文字合成音频 +def synthesize_audio(model_name, text): + model = get_model_by_name(model_name) + if not model: + return None, t['model_not_found'] + + try: + # 处理model中的reference_audio和reference_text可能包含多个项目的情况 + reference_audio = model["reference_audio"].split("|||")[0].strip() if "|||" in model["reference_audio"] else model["reference_audio"] + reference_text = model["reference_text"].split("|||")[0].strip() if "|||" in model["reference_text"] else model["reference_text"] + + # 调用语音合成API + api_data = { + "speaker": model["id"], + "text": text, + "format": "wav", + "topP": 0.7, + "max_new_tokens": 1024, + "chunk_length": 100, + "repetition_penalty": 1.2, + "temperature": 0.7, + "need_asr": False, + "streaming": False, + "is_fixed_seed": 0, + "is_norm": 0, + "reference_audio": reference_audio, + "reference_text": reference_text + } + + print(f"语音合成API请求: {api_data}") + + response = requests.post( + f"{API_BASE_URL}/v1/invoke", + json=api_data + ) + + if response.status_code != 200: + return None, t['audio_synthesis_failed'].format(response.text) + + # 保存音频文件 + audio_filename = f"{uuid.uuid4()}.wav" + # 直接保存到临时目录 + audio_path = os.path.join(FACE2FACE_TEMP_PATH, audio_filename) + + with open(audio_path, "wb") as f: + f.write(response.content) + + # 同时在voice目录保留一份副本(可选) + voice_audio_path = os.path.join(VOICE_DATA_PATH, audio_filename) + shutil.copy(audio_path, voice_audio_path) + + return audio_path, t['audio_synthesis_success'] + + except Exception as e: + import traceback + error_trace = traceback.format_exc() + return None, t['audio_synthesis_error'].format(str(e), error_trace) + +# 提交数字人合成任务 +def submit_synthesis_job(model_name, audio_file=None, text=None): + if not model_name: + return None, t['select_model_prompt'] + + if not audio_file and not text: + return None, t['upload_audio_or_text'] + + model = get_model_by_name(model_name) + if not model: + return None, t['model_not_found'] + + try: + # 确定音频文件路径 + audio_path = None + + if audio_file: + # 使用上传的音频文件,直接存到临时目录 + audio_filename = f"{uuid.uuid4()}.wav" + audio_path = os.path.join(FACE2FACE_TEMP_PATH, audio_filename) + + # 检查audio_file是对象还是字符串 + audio_file_path = audio_file + if hasattr(audio_file, 'name'): + audio_file_path = audio_file.name + + # 复制音频文件 + shutil.copy(audio_file_path, audio_path) + + # 同时在voice目录保留一份副本(可选) + voice_audio_path = os.path.join(VOICE_DATA_PATH, audio_filename) + shutil.copy(audio_path, voice_audio_path) + elif text: + # 通过文字合成音频(已经保存在临时目录) + audio_path, message = synthesize_audio(model_name, text) + if not audio_path: + return None, message + + # 生成唯一任务ID + task_id = str(uuid.uuid4()) + + # 获取相对路径(仅文件名) + relative_audio_path = os.path.basename(audio_path) + relative_video_path = os.path.basename(model["video_path"]) + + # 提交合成任务 + api_data = { + "audio_url": relative_audio_path, + "video_url": relative_video_path, + "code": task_id, + "chaofen": 0, + "watermark_switch": 0, + "pn": 1 + } + + print(f"合成任务API请求: {api_data}") + print(f"音频路径: {audio_path}") + print(f"视频路径: {model['video_path']}") + + response = requests.post( + f"{API_BASE_URL2}/easy/submit", + json=api_data + ) + + if response.status_code != 200: + return None, t['task_submit_failed'].format(response.text) + + result = response.json() + + if not result.get("success"): + return None, t['task_submit_failed'].format(result.get('msg')) + + return task_id, t['task_submitted'].format(task_id) + + except Exception as e: + import traceback + error_trace = traceback.format_exc() + return None, t['task_submit_error'].format(str(e), error_trace) + +# 查询合成任务状态 +def query_synthesis_status(task_id): + if not task_id: + return t['enter_task_id'], None + + try: + response = requests.get( + f"{API_BASE_URL2}/easy/query", + params={"code": task_id} + ) + + if response.status_code != 200: + return t['query_failed'].format(response.text), None + + result = response.json() + + if not result.get("success"): + return t['query_failed'].format(result.get('msg')), None + + data = result.get("data", {}) + status = data.get("status") + progress = data.get("progress", 0) + + if status == 2: # 任务完成 + video_url = data.get("result") + if video_url: + try: + # 尝试下载视频 + video_response = requests.get(f"{API_BASE_URL2}/easy/download/{video_url.lstrip('/')}") + + if video_response.status_code != 200: + # 下载失败,显示音频和视频的路径信息 + return t['download_failed'].format(FACE2FACE_TEMP_PATH, video_url), None + + # 下载成功,保存视频 + video_filename = f"{task_id}.mp4" + video_path = os.path.join(FACE2FACE_TEMP_PATH, video_filename) + + with open(video_path, "wb") as f: + f.write(video_response.content) + + return t['synthesis_complete'], video_path + except Exception as e: + # 捕获下载过程中的任何错误 + error_msg = t['download_error'].format(str(e), FACE2FACE_TEMP_PATH, video_url) + print(error_msg) + return error_msg, None + else: + return t['no_video_url'].format(FACE2FACE_TEMP_PATH), None + elif status == 1: # 进行中 + return t['synthesis_progress'].format(progress), None + elif status == 0: # 排队中 + return t['task_queuing'], None + else: # 失败 + return t['task_failed'].format(data.get('msg')), None + + except Exception as e: + return t['query_error'].format(str(e)), None + +# 创建Gradio界面 +with gr.Blocks(title=t['title']) as app: + gr.Markdown(f"# {t['title']}") + + # 添加一些路径信息 + with gr.Accordion(t['paths_info'], open=False): + gr.Markdown(f""" + - {t['audio_path']}: `{VOICE_DATA_PATH}` + - {t['video_path']}: `{FACE2FACE_TEMP_PATH}` + - {t['api_server1']}: `{API_BASE_URL}` + - {t['api_server2']}: `{API_BASE_URL2}` + """) + + # 加载现有模型 + models = load_models() + model_names = [model["name"] for model in models.get("models", [])] + + # 状态变量 + training_status = gr.State(t['ready']) + + with gr.Tab(t['train_tab']): + with gr.Row(): + with gr.Column(): + train_video = gr.Video(label=t['upload_video']) + model_name = gr.Textbox(label=t['model_name']) + status_display = gr.Textbox(label=t['current_status'], value=t['ready'], interactive=False) + train_btn = gr.Button(t['start_training']) + + with gr.Column(): + model_dropdown = gr.Dropdown(choices=model_names, label=t['trained_models'], interactive=True) + train_output = gr.Textbox(label=t['training_result'], lines=5) + + with gr.Tab(t['synthesis_tab']): + with gr.Row(): + with gr.Column(): + synth_model = gr.Dropdown(choices=model_names, label=t['select_model'], interactive=True) + + with gr.Tabs(): + with gr.TabItem(t['text_input_tab']): + text_input = gr.Textbox(label=t['input_text'], lines=5) + text_submit_btn = gr.Button(t['synthesize']) + + with gr.TabItem(t['audio_upload_tab']): + audio_input = gr.Audio(label=t['upload_audio'], type="filepath") + audio_submit_btn = gr.Button(t['synthesize']) + + task_id_output = gr.Textbox(label=t['task_id']) + + with gr.Column(): + status_output = gr.Textbox(label=t['synthesis_status'], lines=3) + video_output = gr.Video(label=t['synthesis_result']) + query_btn = gr.Button(t['query_status']) + + # 添加刷新按钮 + with gr.Row(): + refresh_btn = gr.Button(t['refresh_models']) + + # 绑定事件 + def start_training(video_file, name): + if not video_file: + return t['ready'], t['error_no_video'] + if not name: + return t['ready'], t['error_no_name'] + + # 设置状态 + status = t['processing_video'] + + # 执行训练 + success, message = train_digital_human(video_file, name) + + # 返回状态和消息 + return t['ready'], message + + def update_models(): + # 加载最新的模型列表 + models = load_models() + model_names = [m["name"] for m in models.get("models", [])] + + # 返回更新后的下拉框内容 - 使用gr.update而不是gr.Dropdown.update + return gr.update(choices=model_names), gr.update(choices=model_names) + + # 提交文字合成任务 + def submit_with_text(model, text): + if not model: + return None, t['error_no_model'] + if not text: + return None, t['error_no_text'] + + # 提交任务 + task_id, message = submit_synthesis_job(model, text=text) + + # 返回任务ID和消息 + return task_id, f"{t['processing_text']}\n{message}" + + # 提交音频合成任务 + def submit_with_audio(model, audio): + if not model: + return None, t['error_no_model'] + if not audio: + return None, t['error_no_audio'] + + # 提交任务 + task_id, message = submit_synthesis_job(model, audio_file=audio) + + # 返回任务ID和消息 + return task_id, f"{t['processing_audio']}\n{message}" + + # 修改状态查询函数,返回结果 + def query_task_status(task_id): + if not task_id: + return t['enter_task_id'], None + + status, video_path = query_synthesis_status(task_id) + return status, video_path if video_path else None + + # 训练按钮点击事件 + train_btn.click( + start_training, + inputs=[train_video, model_name], + outputs=[status_display, train_output] + ).then( + update_models, + inputs=None, + outputs=[model_dropdown, synth_model] + ) + + # 刷新按钮点击事件 + refresh_btn.click( + update_models, + inputs=None, + outputs=[model_dropdown, synth_model] + ) + + # 提交合成任务事件 + text_submit_btn.click( + submit_with_text, + inputs=[synth_model, text_input], + outputs=[task_id_output, status_output] + ) + + audio_submit_btn.click( + submit_with_audio, + inputs=[synth_model, audio_input], + outputs=[task_id_output, status_output] + ) + + # 手动查询状态事件 + query_btn.click( + query_task_status, + inputs=[task_id_output], + outputs=[status_output, video_output] + ) + +# 启动应用 +if __name__ == "__main__": + app.launch( + # server_name="0.0.0.0", + # server_port=7860, + inbrowser=True, + # share=True + ) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6a03f02 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +gradio==4.44.1 +requests~=2.32.3 +pydub==0.25.1 +uuid==1.30 +python-json-logger==3.3.0 \ No newline at end of file