-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvirtual_assistant.py
More file actions
154 lines (144 loc) · 5.3 KB
/
virtual_assistant.py
File metadata and controls
154 lines (144 loc) · 5.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
from fastrtc import Stream, ReplyOnPause
from fastrtc.tracks import StreamHandlerBase
import requests
import json
from dotenv import load_dotenv
import os
import cv2
import base64
from PIL import Image
import io
class AudioHandler(StreamHandlerBase):
def __init__(self, virtual_assistant):
super().__init__(
expected_layout="mono",
output_sample_rate=24000,
output_frame_size=960,
input_sample_rate=48000
)
self.virtual_assistant = virtual_assistant
async def on_track(self, track):
while True:
try:
# Get audio frames
frame = await track.recv()
# Process the audio frame
result = self.virtual_assistant.process_audio(audio=frame)
# Send the processed result back
if result:
yield result["audio"]
except Exception as e:
print(f"Audio processing error: {str(e)}")
break
class VirtualAssistant:
def __init__(self):
# Load environment variables
load_dotenv()
# Load API key from .env
self.api_key = os.getenv("OPENROUTER_API_KEY")
if not self.api_key:
raise ValueError("OPENROUTER_API_KEY not found in .env file")
# Configure free model
self.model = "google/gemini-2.0-flash-thinking-exp"
# Create audio handler instance
self.audio_handler = AudioHandler(self)
# Initialize FastRTC streams
self.audio_stream = Stream(
handler=self.audio_handler,
modality="audio",
mode="send-receive"
)
self.video_stream = Stream(
handler=self.process_video,
modality="video",
mode="send-receive"
)
def process_audio(self, audio=None):
try:
if audio:
prompt = f"""
You are a virtual assistant helping users troubleshoot technical issues.
The user has provided audio input: {audio}
Identify the user's problem based on the audio and provide concrete,
step-by-step instructions to solve it. Focus on practical steps.
"""
response = self.get_ai_response(prompt)
return {"audio": response}
return None
except Exception as e:
print(f"Error processing audio: {str(e)}")
return None
def frame_to_base64(self, frame):
_, buffer = cv2.imencode('.jpg', frame)
base64_frame = base64.b64encode(buffer).decode('utf-8')
return base64_frame
def process_video(self, video_frame=None):
try:
if video_frame:
video_data = self.frame_to_base64(video_frame)
prompt = f"""
You are a virtual assistant helping users troubleshoot technical issues.
The user has provided video input showing: {video_data}
Identify the user's problem based on the visual information and provide
concrete, step-by-step instructions to solve it. Focus on practical steps.
"""
response = self.get_ai_response(prompt)
processed_frame = self.highlight_problem_area(video_frame, response)
return {
"video": processed_frame,
"response": response
}
return None
except Exception as e:
print(f"Error processing video: {str(e)}")
return None
def get_ai_response(self, prompt):
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
data = {
"model": self.model,
"messages": [{
"role": "system",
"content": """
You are a virtual assistant with expertise in computer vision and AI.
You analyze audio and video inputs to provide comprehensive support,
offering concrete, step-by-step instructions to solve technical issues.
Maintain a professional and helpful tone, being direct and concise.
"""
}, {
"role": "user",
"content": prompt
}]
}
response = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers=headers,
data=json.dumps(data)
)
if response.status_code == 200:
return response.json()["choices"][0]["message"]["content"]
return None
def highlight_problem_area(self, frame, ai_response):
try:
cv2.putText(frame, "Problem Area Highlighted",
(10, 20), cv2.FONT_HERSHEY_SIMPLEX,
0.5, (0, 0, 255), 2)
return frame
except Exception as e:
print(f"Error in highlighting: {str(e)}")
return frame
def run(self):
# Launch the Gradio UI
self.audio_stream.ui.launch(
show_api=False,
show_error=False,
)
self.video_stream.ui.launch(
show_api=False,
show_error=False,
)
if __name__ == "__main__":
app = VirtualAssistant()
app.run()