-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmicrophone_interface.py
78 lines (62 loc) · 3.1 KB
/
microphone_interface.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import time
import wave
import pyaudio
CHUNK_SIZE = 4096
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000 # Amount of bits per second
p = pyaudio.PyAudio()
# https://people.csail.mit.edu/hubert/pyaudio/docs/#example-callback-mode-audio-i-o
class AudioRecordingHandler:
"""Handles the recording of audio from the microphone, compiling it as a single string of bytes (instead of frames)."""
def callback(self, in_data: bytes, frame_count: int, time_info: dict[str, float], status: int) -> tuple[bytes, int]:
"""Used as a callback for the pyaudio module, which runs this on it's own thread."""
self.frames.append(in_data)
# If len(data) is less than requested frame_count, PyAudio automatically assumes the stream is finished, and the stream stops.
return (in_data, pyaudio.paContinue) # This is required
def start_recording(self) -> None:
"""Starts capturing audio data from the microphone and also starts a new thread to regularly save the audio data"""
self.frames: list[bytes] = [] # Each frame should be a chunk of audio
stream: pyaudio.Stream = p.open(
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK_SIZE,
stream_callback=self.callback, # type: ignore[arg-type]
)
self.stream = stream
def stop_recording(self, file_name: str) -> bytes:
"""Stops capturing audio data from the microphone and returns the audio bytes."""
time.sleep(0.5) # This is to make sure the chunks don't get truncated early
assert self.stream is not None
self.stream.stop_stream()
self.stream.close()
self.save_audio_file(self.frames, file_name) # We save and load because for some reason this fixes things
return self.load_audio_file(file_name) # b"".join(self.frames) # This doesn't work for some reason, we have to save and load
@staticmethod
def load_audio_file(file_name: str) -> bytes:
"""Loads an audio file from file and returns it as a byte string"""
with open(file_name, "rb") as file:
return file.read()
@staticmethod
def save_audio_file(audio_bytes: list[bytes], file_name: str) -> None:
"""Takes a byte string of the complete audio data and saves it as a .wav file"""
with wave.open(file_name, "wb") as file:
file.setnchannels(CHANNELS)
file.setsampwidth(p.get_sample_size(FORMAT))
file.setframerate(RATE)
file.writeframes(b"".join(audio_bytes))
if __name__ == "__main__":
from units.speech_to_text import STTHandler
file_name = "assets/audio/recent_user_speech.wav"
audio_handler = AudioRecordingHandler()
print("About to start recording!")
audio_handler.start_recording()
time.sleep(5)
print("Slept 5 seconds, stop recording!")
audio_bytes = audio_handler.stop_recording(file_name)
print("Done recording")
print("Now transcribing")
text = STTHandler(time_transcription=True).transcribe(audio_bytes)
print(f"Stop recording function is returning: {text=}")