We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
When preparing the training data, is there a problem with 44100hz when converting to a byte sequence?
import os import pandas as pd import soundfile as sf import io from tqdm import tqdm from scipy.signal import resample def convert_audio_to_wav_bytes(audio_path: str, target_samplerate: int = 44100) -> bytes: """音声ファイルをWAV形式のバイト列に変換(指定されたサンプルレートにリサンプリング)""" # 音声ファイルを読み込む audio_data, samplerate = sf.read(audio_path) # サンプルレートが異なる場合はリサンプリング if samplerate != target_samplerate: num_samples = int(len(audio_data) * target_samplerate / samplerate) audio_data = resample(audio_data, num_samples) samplerate = target_samplerate # モノラルに変換 if len(audio_data.shape) > 1: audio_data = audio_data.mean(axis=1) # WAV形式でバイト列に変換 buffer = io.BytesIO() sf.write(buffer, audio_data, samplerate, format='WAV', subtype='PCM_16') return buffer.getvalue() def create_sample(audio_path: str, transcript: str) -> dict: """音声ファイルとテキストからデータサンプルを生成""" try: wav_bytes = convert_audio_to_wav_bytes(audio_path) return { "transcript": transcript, "audio": { "bytes": wav_bytes # バイト列そのものを返す } } except Exception as e: print(f"エラーが発生しました: {audio_path} - {str(e)}") return None def process_csv(csv_path: str, output_dir: str): """CSVファイルを処理してデータサンプルを生成""" # CSVファイルを読み込む df = pd.read_csv(csv_path) # 出力ディレクトリを作成 os.makedirs(output_dir, exist_ok=True) # データを処理 samples = [] for _, row in tqdm(df.iterrows(), total=len(df)): audio_path = row['FilePath'] transcript = row['Text'] # データサンプルを生成 sample = create_sample(audio_path, transcript) if sample: samples.append(sample) # Parquet形式で保存 output_path = os.path.join(output_dir, "dataset.parquet") samples_df = pd.DataFrame(samples) samples_df.to_parquet(output_path, index=False) print(f"データセットがParquet形式で保存されました: {output_path}") # メイン処理 if __name__ == "__main__": # 入力CSVファイルのパス csv_path = "/home/nidera515/OuteTTS/out.csv" # 出力ディレクトリ output_dir = "/home/nidera515/OuteTTS/output_dataset" # CSVを処理してデータセットを生成 process_csv(csv_path, output_dir)
The text was updated successfully, but these errors were encountered:
Should be fine, the interface automatically resamples to the required sample rate, in this case, v0.3 uses 24kHz.
Sorry, something went wrong.
No branches or pull requests
When preparing the training data, is there a problem with 44100hz when converting to a byte sequence?
The text was updated successfully, but these errors were encountered: