@@ -82,12 +82,71 @@ def recognize_whisper_api_from_file(file_name: str, whisper_model: str):
82
82
transcript = recognize_whisper_api (audio_file , whisper_model )
83
83
return transcript
84
84
85
+
85
86
def recognize_azure_speech_to_text_from_file (file_path : str , key : str , region : str ):
86
- speech_config = speechsdk .SpeechConfig (subscription = key , region = region )
87
- audio_config = speechsdk .AudioConfig (filename = file_path )
88
- speech_recognizer = speechsdk .SpeechRecognizer (speech_config = speech_config , audio_config = audio_config )
89
- result = speech_recognizer .recognize_once_async ().get ()
90
- return result .text
87
+ """
88
+ Recognize speech from an audio file with automatic language detection
89
+ across the top 6 spoken languages globally.
90
+
91
+ Args:
92
+ file_path (str): Path to the audio file.
93
+ key (str): Azure Speech Service subscription key.
94
+ region (str): Azure service region.
95
+
96
+ Returns:
97
+ string: Transcribed text.
98
+
99
+ Raises:
100
+ RuntimeError: If an error occurs during speech recognition.
101
+ """
102
+ try :
103
+ # Create a speech configuration with your subscription key and region
104
+ speech_config = speechsdk .SpeechConfig (subscription = key , region = region )
105
+
106
+ # Create an audio configuration pointing to the audio file
107
+ audio_config = speechsdk .AudioConfig (filename = file_path )
108
+
109
+ # Top 4 most spoken languages (ISO language codes)
110
+ # SDK only supports 4 languages as options
111
+ languages = ["en-US" , "zh-CN" , "hi-IN" , "es-ES" ]
112
+
113
+ # Configure auto language detection with the specified languages
114
+ auto_detect_source_language_config = speechsdk .languageconfig .AutoDetectSourceLanguageConfig (languages = languages )
115
+
116
+ # Create a speech recognizer with the auto language detection configuration
117
+ speech_recognizer = speechsdk .SpeechRecognizer (
118
+ speech_config = speech_config ,
119
+ audio_config = audio_config ,
120
+ auto_detect_source_language_config = auto_detect_source_language_config
121
+ )
122
+
123
+ # Perform speech recognition
124
+ result = speech_recognizer .recognize_once_async ().get ()
125
+
126
+ # Check the result
127
+ if result .reason == speechsdk .ResultReason .RecognizedSpeech :
128
+ # Retrieve the detected language
129
+ detected_language = result .properties .get (
130
+ speechsdk .PropertyId .SpeechServiceConnection_AutoDetectSourceLanguageResult ,
131
+ "Unknown"
132
+ )
133
+ logging .debug ("Detected Language %s" , detected_language , exc_info = True )
134
+ return result .text
135
+
136
+ elif result .reason == speechsdk .ResultReason .NoMatch :
137
+ raise RuntimeError ("No speech could be recognized from the audio." )
138
+
139
+ elif result .reason == speechsdk .ResultReason .Canceled :
140
+ cancellation_details = speechsdk .CancellationDetails (result )
141
+ raise RuntimeError (f"Speech Recognition canceled: { cancellation_details .reason } . "
142
+ f"Error details: { cancellation_details .error_details } " )
143
+
144
+ else :
145
+ raise RuntimeError ("Unknown error occurred during speech recognition." )
146
+
147
+ except Exception as e :
148
+ raise RuntimeError (f"An error occurred during speech recognition: { e } " )
149
+
91
150
92
151
def speech_to_text_from_file (file_path : str ):
93
152
"""
0 commit comments