修复不同多音频格式引发的兼容性问题。

pth2000 · pth2000 · commit bf2cb7eba516 · 2026-03-23T21:49:29.000+08:00
diff --git a/main.py b/main.py
@@ -26,3 +26,4 @@ def main():
 
 if __name__ == '__main__':
     sys.exit(main())
+
diff --git a/tasks/audio_generation_task.py b/tasks/audio_generation_task.py
@@ -4,6 +4,8 @@
 from pathlib import Path
 import shutil
 import wave
+from mutagen import File as MutagenFile
+from mutagen.mp3 import MP3
 
 from PySide6.QtCore import QThread, Signal
 
@@ -37,35 +39,58 @@ def _safe_copy(src: Path, dst: Path):
 
     def _save_one_note_wav(self, index, note_dict, generation_profile):
         """保存单条讲稿并返回索引、音频时长和是否命中缓存的标志"""
-        path = self.reviewer_page.wav_temp_path / f'{note_dict["page"]}_{index + 1}.wav'
+        output_ext = self.tts_engine.get_output_extension()
+        path = self.reviewer_page.wav_temp_path / f'{note_dict["page"]}_{index + 1}.{output_ext}'
         cache_key = self.tts_engine.build_audio_cache_key(note_dict['text'], generation_profile)
-        cache_path = self.audio_cache_path / f'{cache_key}.wav'
+        cache_path = self.audio_cache_path / f'{cache_key}.{output_ext}'
 
         cache_hit = False
         if cache_path.exists() and cache_path.stat().st_size > 0:
             self._safe_copy(cache_path, path)
             cache_hit = True
         else:
-            temp_path = self.audio_cache_path / f'{cache_key}.{index}.tmp.wav'
+            temp_path = self.audio_cache_path / f'{cache_key}.{index}.tmp.{output_ext}'
             self.tts_engine.save_file(note_dict['text'], str(temp_path))
             temp_path.replace(cache_path)
             self._safe_copy(cache_path, path)
 
-        duration = self.get_wav_duration(path)
-        return index, duration, cache_key, cache_hit
+        duration = self.get_audio_duration(path)
+        return index, duration, cache_key, output_ext, cache_hit
 
     @staticmethod
-    def get_wav_duration(path: Path) -> float:
-        """使用 wave 模块读取 wav 时长"""
-        with wave.open(str(path), 'rb') as wav_file:
-            return wav_file.getnframes() / float(wav_file.getframerate())
+    def get_audio_duration(path: Path) -> float:
+        """读取音频时长（支持 wav/mp3）"""
+        suffix = path.suffix.lower()
+        if suffix == '.wav':
+            try:
+                with wave.open(str(path), 'rb') as wav_file:
+                    return wav_file.getnframes() / float(wav_file.getframerate())
+            except Exception:
+                pass
+
+        if suffix == '.mp3':
+            audio = MP3(str(path))
+            if getattr(audio, 'info', None):
+                length = float(getattr(audio.info, 'length', 0.0))
+                if length > 0:
+                    return length
+
+        # 对其他格式尝试 mutagen
+        audio = MutagenFile(str(path))
+        if getattr(audio, 'info', None):
+            length = float(getattr(audio.info, 'length', 0.0))
+            if length > 0:
+                return length
+
+        raise RuntimeError(f'无法读取音频时长：{path.name}')
 
     def save_wav(self):
         """调用 TTS 保存文字为 wav"""
         notes_list = self.reviewer_page.notes_list
         total = len(notes_list)
         info_list = [0.0] * total
         cache_key_list = [''] * total
+        cache_ext_list = [''] * total
         cache_hit_count = 0
         generation_profile = self.tts_engine.get_generation_profile()
 
@@ -80,25 +105,28 @@ def save_wav(self):
 
                 for future in as_completed(future_map):
                     index = future_map[future]
-                    result_index, duration, cache_key, cache_hit = future.result()
+                    result_index, duration, cache_key, cache_ext, cache_hit = future.result()
                     info_list[result_index] = duration
                     cache_key_list[result_index] = cache_key
+                    cache_ext_list[result_index] = cache_ext
                     if cache_hit:
                         cache_hit_count += 1
 
                     completed += 1
                     self.signal_import_index.emit(completed)
         else:
             for index, note_dict in enumerate(notes_list):
-                result_index, duration, cache_key, cache_hit = self._save_one_note_wav(index, note_dict, generation_profile)
+                result_index, duration, cache_key, cache_ext, cache_hit = self._save_one_note_wav(index, note_dict, generation_profile)
                 info_list[result_index] = duration
                 cache_key_list[result_index] = cache_key
+                cache_ext_list[result_index] = cache_ext
                 if cache_hit:
                     cache_hit_count += 1
                 self.signal_import_index.emit(index + 1)
 
         self.reviewer_page.notes_duration_list = info_list
         self.reviewer_page.note_cache_keys = cache_key_list
+        self.reviewer_page.note_cache_exts = cache_ext_list
         self.signal_cache_hit_count.emit(cache_hit_count)
 
     def save_countdown_wav(self):
diff --git a/tts_engine.py b/tts_engine.py
@@ -191,6 +191,13 @@ def get_generation_profile(self):
             'voice_index': int(self._voice_index_map.get(mode, 0)),
         }
 
+    def get_output_extension(self, mode: Optional[str] = None) -> str:
+        """返回指定引擎的默认输出后缀（不带点）"""
+        target_mode = mode or self.get_mode()
+        if target_mode == 'edge':
+            return 'mp3'
+        return 'wav'
+
     @staticmethod
     def normalize_text_for_cache(text: str) -> str:
         """最小化规整文本，降低空白差异导致的重复生成"""
diff --git a/ui/pages/reviewer_page.py b/ui/pages/reviewer_page.py
@@ -3,7 +3,6 @@
 import json
 import re
 from datetime import datetime
-import wave
 from pathlib import Path
 
 import pyautogui
@@ -63,6 +62,7 @@ def __init__(self, context: AppContext, parent=None):
         self.notes_list = []  # 每块讲稿
         self.notes_duration_list = []
         self.note_cache_keys = []
+        self.note_cache_exts = []
         self.cache_hit_count = 0  # 新增：缓存命中数
         self.is_play_notes = False
         self.is_import = False
@@ -317,6 +317,7 @@ def clean_and_reset(self):
         self.current_index = 0
         self.notes_duration_list = []
         self.note_cache_keys = []
+        self.note_cache_exts = []
         self.cache_hit_count = 0
         self.is_import = False
         self.check_import()
@@ -327,8 +328,7 @@ def refresh_notes_duration_list(self):
             self.load_audio_files()
         duration_list = []
         for path in self.media_list:
-            with wave.open(str(path), 'rb') as wav_file:
-                duration = wav_file.getnframes() / float(wav_file.getframerate())
+            duration = AudioGenerationTask.get_audio_duration(path)
             duration_list.append(duration)
         self.notes_duration_list = duration_list
 
@@ -397,13 +397,14 @@ def mark_split(self):
 
     @staticmethod
     def clean_temp_folder(path: Path):
-        """清理缓存 wav"""
-        for file_path in path.glob('*.wav'):
-            try:
-                file_path.unlink()
-                print(f'已清理 {file_path.name}')
-            except Exception as e:
-                print(f'清理文件失败: {file_path.name}, 原因: {e}')
+        """清理临时音频（wav/mp3）"""
+        for pattern in ('*.wav', '*.mp3'):
+            for file_path in path.glob(pattern):
+                try:
+                    file_path.unlink()
+                    print(f'已清理 {file_path.name}')
+                except Exception as e:
+                    print(f'清理文件失败: {file_path.name}, 原因: {e}')
         print('转换完成')
 
     def thread_print_index(self, import_index):
@@ -449,6 +450,10 @@ def save_session_record(self):
                 for item in self.notes_list
             ]
 
+        if len(self.note_cache_exts) != len(self.notes_list):
+            output_ext = self.ctx.tts_engine.get_output_extension()
+            self.note_cache_exts = [output_ext] * len(self.notes_list)
+
         durations = self.notes_duration_list[:]
         if len(durations) != len(self.notes_list):
             durations = [0.0] * len(self.notes_list)
@@ -461,6 +466,7 @@ def save_session_record(self):
                 'text': note['text'],
                 'duration': float(durations[index]),
                 'cache_key': self.note_cache_keys[index],
+                'cache_ext': self.note_cache_exts[index],
             })
 
         now = datetime.now()
@@ -531,6 +537,7 @@ def load_session_record(self, record_path: Path):
         notes_list = []
         duration_list = []
         cache_keys = []
+        cache_exts = []
 
         for idx, item in enumerate(items):
             page = int(item.get('page', 0))
@@ -540,16 +547,29 @@ def load_session_record(self, record_path: Path):
                 profile = record.get('generation_profile', {})
                 cache_key = self.ctx.tts_engine.build_audio_cache_key(text, profile)
 
-            cache_path = self.audio_cache_path / f'{cache_key}.wav'
-
-            if not cache_path.exists() or cache_path.stat().st_size <= 0:
+            cache_ext = str(item.get('cache_ext', '')).strip().lower().lstrip('.')
+            ext_candidates = [cache_ext] if cache_ext else []
+            for ext in ('wav', 'mp3'):
+                if ext not in ext_candidates:
+                    ext_candidates.append(ext)
+
+            cache_path = None
+            for ext in ext_candidates:
+                candidate = self.audio_cache_path / f'{cache_key}.{ext}'
+                if candidate.exists() and candidate.stat().st_size > 0:
+                    cache_path = candidate
+                    cache_ext = ext
+                    break
+
+            if cache_path is None:
                 missing_list.append(f'第{page}页-第{idx + 1}条')
                 continue
 
             media_list.append(cache_path)
             notes_list.append({'page': page, 'text': text})
             duration_list.append(float(item.get('duration', 0.0)))
             cache_keys.append(cache_key)
+            cache_exts.append(cache_ext)
 
         if missing_list:
             missing_text = '、'.join(missing_list[:10])
@@ -572,6 +592,7 @@ def load_session_record(self, record_path: Path):
         self.notes_list = notes_list
         self.notes_duration_list = duration_list
         self.note_cache_keys = cache_keys
+        self.note_cache_exts = cache_exts
 
         self.media_list = media_list
         self.current_index = 0
@@ -608,9 +629,10 @@ def play_notes(self):
         self.play_audio()
 
     def load_audio_files(self):
-        """查找所有 wav，添加到 media_list 中"""
+        """查找所有正文音频（wav/mp3），添加到 media_list 中"""
+        audio_files = list(self.wav_temp_path.glob('*.wav')) + list(self.wav_temp_path.glob('*.mp3'))
         audio_files = sorted(
-            self.wav_temp_path.glob('*.wav'),
+            audio_files,
             key=lambda path: [int(part) if part.isdigit() else part for part in path.stem.split('_')]
         )
         self.media_list = audio_files
diff --git a/ui/pages/settings_page.py b/ui/pages/settings_page.py
@@ -125,7 +125,8 @@ def preview_audio(self):
             text = '这是一个试听音频，用于测试当前的语音设置'
             temp_dir = Path('./temp').resolve()
             temp_dir.mkdir(parents=True, exist_ok=True)
-            preview_path = temp_dir / f'preview_{int(time.time())}.wav'
+            preview_ext = self.ctx.tts_engine.get_output_extension()
+            preview_path = temp_dir / f'preview_{int(time.time())}.{preview_ext}'
 
             self.previewButton.setEnabled(False)
             self.previewButton.setText('正在生成...')

Original file line number	Diff line number	Diff line change
`@@ -26,3 +26,4 @@ def main():`
`26`	`26`
`27`	`27`	`if __name__ == '__main__':`
`28`	`28`	`sys.exit(main())`
	`29`	`+`