-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
166 lines (117 loc) · 5.58 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
'''
A program that takes a YouTube video link as an user input on terminal, extract high-quality of audio from the video.
Using Open AI whisper's translate function is used to translate the audio into English and transcribe it.
If audio file size is more than 25MB, it will be split into 25MB chunks and transcribe or translate each chunk.
Finally, the transcribe or translate function is done, print the text to the terminal.
Price of the API call is $0.006 per minute.
Make this code to integrate with Fast API.
'''
from dotenv import load_dotenv
import os
import openai
from pytube import YouTube
import argparse
from google.oauth2 import service_account
from google.cloud import translate_v3 as translate
load_dotenv()
GOOGLE_PROJECT_ID=os.getenv("GOOGLE_PROJECT_ID")
MAX_SIZE=25000000 # 25MB
# main 함수의 역할은 터미널에서 테스트하는 용도.
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--input", '-i', type=str,required=True, help="YouTube video link")
args = parser.parse_args()
url = args.input
#extract_audio(url)
# assign audio and audio_size as variable
audio_stream, audio_size_MB =extract_audio(url)
if audio_size_MB > MAX_SIZE:
print("Audio size is more than 25MB, starting chunk files... ")
# chunkAudio(audio_stream)
# transcribe_to_english(chunked_files)
else:
# transcript audio to english
ENG_TEXT=transcribe_to_english(audio_stream)
translate_text(ENG_TEXT, GOOGLE_PROJECT_ID)
return url
# extract audio from youtube video
# Maybe allow users to download extracted audio using this function and save it to their local machine.
def extract_audio(url: str):
yt=YouTube(url)
title = yt.title
length = yt.length # length in seconds
# Print video length in hrs, mins, secs
hours = length // 3600
minutes = (length % 3600) // 60
seconds = length % 60
print(f"Video title: {title} \n")
print(f"Video length: {hours} hrs {minutes} mins {seconds} secs")
# Get the highest resolution audio stream
# Type of audio-stream is : <class 'pytube.streams.Stream'>
audio_stream = yt.streams.filter(only_audio=True).order_by('abr').desc().first() # Orders streams by their audio bit rate (abr) in descending order, and finally selects the first (i.e., highest bit rate) audio stream.
# audio size in bytes
audio_size=audio_stream.filesize
# audio size in MB, 1MB=1000000bytes
audio_size_MB=audio_size/1000000
print(f"Audio file size: {audio_size_MB} MB")
return audio_stream, audio_size_MB
# Transcribe in English text
def transcribe_to_english(audio_stream):
openai.api_key = os.getenv("OPENAI_API_KEY")
# audio file path
media_file_path=audio_stream.download(
output_path=os.path.join(os.getcwd(), 'audio'),
filename='audio.wav',
)
media_file=open(media_file_path, 'rb')
# transcribe audio to english
response=openai.Audio.translate(
model='whisper-1',
file=media_file,
to_language='en',
)
#print(f"English text: \n {response.text}")
# remove audio file
os.remove(media_file_path)
return response.text
# translate functiion using Google translate API
# get english text from transcribe_to_en function
credentials = service_account.Credentials.from_service_account_file('credential.json')
def translate_text(text, project_id=GOOGLE_PROJECT_ID):
client = translate.TranslationServiceClient(credentials=credentials)
location = "global"
parent = f"projects/{project_id}/locations/{location}"
response = client.translate_text(
request={
"parent": parent,
"contents": [text],
"mime_type": "text/plain", # mime types: text/plain, text/html
"source_language_code": "en-US",
"target_language_code": "ko",
}
)
# Display the translated text from response
print(f"Translated text: \n {response.translations[0].translated_text}")
# Chunk audio file into 25MB chunks and return a list of chunks
# def chunkAudio(audio_stream):
# pass
if __name__ == "__main__":
main()
'''
Traceback (most recent call last):
File "/Users/seokhyeonbyun/Desktop/STT/backend/app.py", line 148, in <module>
main()
File "/Users/seokhyeonbyun/Desktop/STT/backend/app.py", line 43, in main
translate_text(ENG_TEXT, GOOGLE_PROJECT_ID)
File "/Users/seokhyeonbyun/Desktop/STT/backend/app.py", line 118, in translate_text
client = translate.TranslationServiceClient()
File "/Users/seokhyeonbyun/Desktop/STT/backend/stt-venv/lib/python3.10/site-packages/google/cloud/translate_v3/services/translation_service/client.py", line 436, in __init__
self._transport = Transport(
File "/Users/seokhyeonbyun/Desktop/STT/backend/stt-venv/lib/python3.10/site-packages/google/cloud/translate_v3/services/translation_service/transports/grpc.py", line 152, in __init__
super().__init__(
File "/Users/seokhyeonbyun/Desktop/STT/backend/stt-venv/lib/python3.10/site-packages/google/cloud/translate_v3/services/translation_service/transports/base.py", line 103, in __init__
credentials, _ = google.auth.default(
File "/Users/seokhyeonbyun/Desktop/STT/backend/stt-venv/lib/python3.10/site-packages/google/auth/_default.py", line 648, in default
raise exceptions.DefaultCredentialsError(_CLOUD_SDK_MISSING_CREDENTIALS)
google.auth.exceptions.DefaultCredentialsError: Your default credentials were not found. To set up Application Default Credentials, see https://cloud.google.com/docs/authentication/external/set-up-adc for more information.
'''