forked from Pushpak1203/jarvis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSpeechToText.py
140 lines (116 loc) · 5.05 KB
/
SpeechToText.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from dotenv import dotenv_values
import os
import mtranslate as mt
# Load the environment variables from the .env file.
env_vars = dotenv_values(".env")
# Get the input language setting from the environment variables.
InputLanguage = env_vars.get("inputLanguage", "en")
# Define the HTML code for the speech-to-text conversion.
HtmlCode = """<!DOCTYPE html>
<html lang="en">
<head>
<title>Speech Recognition</title>
</head>
<body>
<button id="start" onclick="startRecognition()">Start Recognition</button>
<button id="end" onclick="stopRecognition()">Stop Recognition</button>
<p id="output"></p>
<script>
const output = document.getElementById('output');
let recognition;
function startRecognition() {
recognition = new webkitSpeechRecognition() || new SpeechRecognition();
recognition.lang = '';
recognition.continuous = true;
recognition.onresult = function(event) {
const transcript = event.results[event.results.length - 1][0].transcript;
output.textContent += transcript;
};
recognition.onend = function() {
recognition.start();
};
recognition.start();
}
function stopRecognition() {
recognition.stop();
output.innerHTML = "";
}
</script>
</body>
</html>"""
# Replace the language setting in the HTML code with the input language.
HtmlCode = HtmlCode.replace("recognition.lang = ' ';", f"recognition.lang = '{InputLanguage}';")
# Ensure the Data directory exists
os.makedirs("Data", exist_ok=True)
# Write the modified HTML code to a file.
with open(r"Data/voice.html", "w", encoding="utf-8") as f:
f.write(HtmlCode)
# Get the current working directory.
current_dir = os.getcwd()
# Generate the file path for the HTML file.
Link = f"file:///{current_dir}/Data/voice.html"
# Set Chrome options for the WebDriver.
chrome_options = Options()
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
chrome_options.add_argument(f"user-agent={user_agent}")
chrome_options.add_argument("--use-fake-ui-for-media-stream")
chrome_options.add_argument("--use-fake-device-for-media-stream")
chrome_options.add_argument("--headless=new")
# Initialize the Chrome WebDriver.
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)
# Define the path for temporary files.
TempDirPath = os.path.join(current_dir, "Frontend", "Files")
os.makedirs(TempDirPath, exist_ok=True)
# Function to set the assistant's status by writing to a file.
def SetAssistantStatus(Status):
with open(os.path.join(TempDirPath, "Status.data"), "w", encoding="utf-8") as file:
file.write(Status)
# Function to modify a query to ensure proper punctuation.
def QueryModifier(Query):
new_query = Query.lower().strip()
query_words = new_query.split()
question_words = ["how", "what", "who", "where", "when", "why", "which", "whose", "whom", "can you", "what's", "where's", "who's", "when's", "why's", "which's", "whose's", "whom's", "can you"]
# Check if the query is a question and add a question mark.
if any(word + " " in new_query for word in question_words):
new_query = new_query.rstrip(".!?") + "?"
else:
new_query = new_query.rstrip(".!?") + "."
return new_query.capitalize()
# Function to translate text into English using the mtranslate library.
def UniversalTranslator(Text):
english_translation = mt.translate(Text, "en", "auto")
return english_translation.capitalize()
# Function to perform speech recognition using the WebDriver.
def SpeechRecognition():
# Open the HTML file in the browser.
driver.get(Link)
# Start speech recognition by clicking the start button.
driver.find_element(By.ID, "start").click()
while True:
try:
# Get the recognized text from the HTML output element.
Text = driver.find_element(By.ID, "output").text
if Text:
# Stop recognition by clicking the stop button.
driver.find_element(By.ID, "end").click()
# If the input language is English, return the modified query.
if "en" in InputLanguage.lower():
return QueryModifier(Text)
else:
# If the input language is not English, translate the text and return it.
SetAssistantStatus("Translating...")
return QueryModifier(UniversalTranslator(Text))
except Exception:
pass
# Main execution block.
if __name__ == "__main__":
while True:
# Continuously perform speech recognition and print the recognized text.
Text = SpeechRecognition()
print(Text)