-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpoc.py
92 lines (80 loc) · 2.59 KB
/
poc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import requests
import json
import os
from pydub import AudioSegment
import speech_recognition as sr
# The endpoint URL for your GraphQL API
GRAPHQL_URL = "https://openapi.radiofrance.fr/v1/graphql" # This URL might differ. Please replace with the correct endpoint.
# The GraphQL query
GRAPHQL_QUERY = """
{
diffusionsOfShowByUrl(
url: "https://www.radiofrance.fr/franceculture/podcasts/de-cause-a-effets-le-magazine-de-l-environnement"
first: 1
) {
edges {
node {
podcastEpisode {
url
playerUrl
title
}
}
}
}
}
"""
# Headers
HEADERS = {
"x-token": "PUT_YOUR_API_TOKEN_HERE",
}
def download_mp3(url, filename="temp.mp3"):
response = requests.get(url, stream=True)
response.raise_for_status() # Raise an exception for HTTP errors
with open(filename, "wb") as mp3_file:
for chunk in response.iter_content(chunk_size=8192):
mp3_file.write(chunk)
return filename
def convert_mp3_to_wav(mp3_filepath, wav_filepath="temp.wav"):
audio = AudioSegment.from_mp3(mp3_filepath)
audio.export(wav_filepath, format="wav")
return wav_filepath
def speech_to_text(wav_filepath):
recognizer = sr.Recognizer()
with sr.AudioFile(wav_filepath) as source:
audio = recognizer.listen(source)
try:
return recognizer.recognize_google(audio)
except sr.UnknownValueError:
return "Could not understand audio"
except sr.RequestError as e:
return f"API unavailable or unresponsive: {e}"
def get_all_text_from_podcast(data):
first_episode_url = get_podcast_url(data)
print(f"Downloading from: {first_episode_url}")
mp3_filepath = download_mp3(first_episode_url)
wav_filepath = convert_mp3_to_wav(mp3_filepath)
text = speech_to_text(wav_filepath)
print("Transcription:\n", text)
# Clean up temporary files
os.remove(mp3_filepath)
os.remove(wav_filepath)
def get_podcast_url(data):
url = data["data"]["diffusionsOfShowByUrl"]["edges"][0]["node"]["podcastEpisode"]["url"]
cleaned_result = url.replace("\nnull", "")
return cleaned_result
def fetch_data():
# Making the request
response = requests.post(
GRAPHQL_URL,
headers=HEADERS,
json={"query": GRAPHQL_QUERY}
)
# Error handling
if response.status_code == 200:
get_all_text_from_podcast(response.json())
else:
raise Exception(f"Failed to fetch data. Status code: {response.status_code}, Response: {response.text}")
if __name__ == "__main__":
result = fetch_data()
print(json.dumps(result, indent=4))