-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnew_main.py
83 lines (66 loc) · 2.95 KB
/
new_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import json
import wave
import subprocess
from typing import Iterator
import openai
import pyaudio
import keyboard
import faster_whisper
from elevenlabs import set_api_key, voices, generate
with open("keys/openai_key.txt") as f:
openai.api_key = f.read().strip()
with open("keys/elevenlabs.txt") as f:
set_api_key(f.read().strip())
with open("assets/files/prompts.json") as f:
prompts = json.load(f)
voice = [x for x in voices() if x.name == "Dorothy"][0]
model, answer, history = faster_whisper.WhisperModel(model_size_or_path="tiny.en", device='cpu'), "", []
def generate_next_response(messages):
global answer
answer = ""
for chunk in openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages, stream=True):
if (text_chunk := chunk["choices"][0]["delta"].get("content")): # type: ignore
answer += text_chunk
print(text_chunk, end="", flush=True)
yield text_chunk
def custom_stream(audio_stream: Iterator[bytes]) -> bytes:
mpv_command = ["C:/Users/Ben/Desktop/mpv_base/mpv.exe", "--no-cache", "--no-terminal", "--", "fd://0"]
mpv_process = subprocess.Popen(
mpv_command,
stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
audio = b""
for chunk in audio_stream:
if chunk is not None:
mpv_process.stdin.write(chunk) # type: ignore
mpv_process.stdin.flush() # type: ignore
audio += chunk
if mpv_process.stdin:
mpv_process.stdin.close()
mpv_process.wait()
return audio
while True:
# Wait until user presses space bar
print("\n\nTap space when you're ready. ", end="", flush=True)
keyboard.wait('space')
while keyboard.is_pressed('space'): pass
# Record from microphone until user presses space bar again
print("I'm all ears. Tap space when you're done.\n")
audio, frames = pyaudio.PyAudio(), []
audio_stream = audio.open(rate=16000, format=pyaudio.paInt16, channels=1, input=True, frames_per_buffer=512)
while not keyboard.is_pressed('space'):
frames.append(audio_stream.read(512))
audio_stream.stop_stream(), audio_stream.close(), audio.terminate() # type: ignore
# Transcribe recording using whisper
with wave.open("voice_record.wav", 'wb') as wf:
wf.setparams((1, audio.get_sample_size(pyaudio.paInt16), 16000, 0, 'NONE', 'NONE'))
wf.writeframes(b''.join(frames))
user_text = " ".join(seg.text for seg in model.transcribe("voice_record.wav", language="en")[0])
print(f'>>>{user_text}\n<<< ', end="", flush=True)
history.append({'role': 'user', 'content': user_text})
# Generate and stream output
generator = generate_next_response([{"role": "system", "content": prompts["initial"]}] + history[-10:])
custom_stream(generate(text=generator, voice=voice, model="eleven_monolingual_v1", stream=True)) # type: ignore
history.append({'role': 'assistant', 'content': answer})