cau_engineering_design_introduction/MFCC_transformation_code.py at main · ghnoh1008/cau_engineering_design_introduction · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
from pydub import AudioSegment
import librosa
import numpy as np
import soundfile as sf

# 설정
input_directory = r"C:\Users\kikio\Desktop\전자전기공학\2학년\공설입\project\mp3_data"
temp_wav_dir = r"C:\Users\kikio\Desktop\전자전기공학\2학년\공설입\project\split_wavs"
feature_dir = r"C:\Users\kikio\Desktop\전자전기공학\2학년\공설입\project\features"

os.makedirs(temp_wav_dir, exist_ok=True)
os.makedirs(feature_dir, exist_ok=True)

keywords = ['mosquito', 'flies']

# 지원하는 오디오 확장자
supported_formats = ['.mp3', '.wav']

# 파일 처리
for audio_file in os.listdir(input_directory):
    file_ext = os.path.splitext(audio_file)[1].lower()
    if file_ext not in supported_formats:
        continue  # mp3나 wav가 아니면 스킵

    audio_path = os.path.join(input_directory, audio_file)
    print(f"처리 중: {audio_path}")

    # 확장자에 따라 불러오기
    if file_ext == '.mp3':
        audio = AudioSegment.from_mp3(audio_path)
    elif file_ext == '.wav':
        audio = AudioSegment.from_wav(audio_path)

    duration_sec = int(audio.duration_seconds)
    print(f"총 길이: {duration_sec}초")

    # 키워드 기반 카테고리 결정
    category = next((k for k in keywords if k in audio_file.lower()), "others")

    # 디렉토리 설정
    category_wav_dir = os.path.join(temp_wav_dir, category)
    category_feature_dir = os.path.join(feature_dir, category)

    os.makedirs(category_wav_dir, exist_ok=True)
    os.makedirs(category_feature_dir, exist_ok=True)

    # 1초 단위로 잘라서 저장
    base_name = os.path.splitext(audio_file)[0]
    for i in range(duration_sec):
        chunk = audio[i*1000:(i+1)*1000]
        chunk_path = os.path.join(category_wav_dir, f"{base_name}_chunk_{i}.wav")
        chunk.export(chunk_path, format="wav")

    print(f"1초 단위로 자르기 완료: {audio_file}")

    # Mel spectrogram 변환
    sr_desired = 22050
    n_mels = 128

    for fname in os.listdir(category_wav_dir):
        if fname.endswith(".wav") and fname.startswith(base_name):
            fpath = os.path.join(category_wav_dir, fname)
            y, sr = librosa.load(fpath, sr=sr_desired)
            mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
            mel_db = librosa.power_to_db(mel, ref=np.max)
            mel_db = mel_db[np.newaxis, ...]  # 채널 추가 (1, 128, time)

            save_path = os.path.join(category_feature_dir, fname.replace(".wav", ".npy"))
            np.save(save_path, mel_db)

    print(f"mel spectrogram 추출 및 저장 완료: {audio_file}")

print("모든 오디오 파일 처리 완료.")