Skip to content

Commit

Permalink
format+lint
Browse files Browse the repository at this point in the history
  • Loading branch information
henrikskog committed Sep 5, 2024
1 parent 5475169 commit dce2b30
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 100 deletions.
9 changes: 5 additions & 4 deletions episode_poller/src/core/config.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import os
import abc


def is_running_on_azure():
# Check if the WEBSITE_SITE_NAME environment variable is set
return 'AzureWebJobsStorage' in os.environ
return "AzureWebJobsStorage" in os.environ


class Singleton(abc.ABCMeta, type):
"""
Expand All @@ -14,9 +16,7 @@ class Singleton(abc.ABCMeta, type):

def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
cls._instances[cls] = super(
Singleton, cls).__call__(
*args, **kwargs)
cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
return cls._instances[cls]


Expand All @@ -28,6 +28,7 @@ class Config(metaclass=Singleton):
def __init__(self):
if not is_running_on_azure():
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

Expand Down
63 changes: 34 additions & 29 deletions episode_poller/src/core/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
import logging


class GithubClient:
def __init__(self, token, github_owner, github_repo):
self.access_token = token
Expand All @@ -11,72 +12,76 @@ def __init__(self, token, github_owner, github_repo):

def get_file(self, filename) -> str:
# URL for the GitHub API endpoint
url = f'https://api.github.com/repos/{self.github_owner}/{self.github_repo}/contents/{filename}'
url = f"https://api.github.com/repos/{self.github_owner}/{self.github_repo}/contents/{filename}"

# Get the file content using the GitHub API
response = requests.get(url, headers={'Authorization': f'token {self.access_token}'})

response = requests.get(
url, headers={"Authorization": f"token {self.access_token}"}
)

# Check the response
if response.status_code == 200:
content_base64 = response.json()['content']
content_base64 = response.json()["content"]
content_bytes = base64.b64decode(content_base64)

# return text
return content_bytes.decode('utf-8')
return content_bytes.decode("utf-8")
else:
logging.error('Error getting github file:', response.json())
logging.error("Error getting github file:", response.json())
return None


def write_file(self, filename, content, commit_msg):
# Convert the JSON content to a string and encode it in base64
new_content_bytes = content.encode('utf-8')
new_content_base64 = base64.b64encode(new_content_bytes).decode('utf-8')
new_content_bytes = content.encode("utf-8")
new_content_base64 = base64.b64encode(new_content_bytes).decode("utf-8")

# URL for the GitHub API endpoint
url = f'https://api.github.com/repos/{self.github_owner}/{self.github_repo}/contents/{filename}'
url = f"https://api.github.com/repos/{self.github_owner}/{self.github_repo}/contents/{filename}"

# Get the current SHA of the file (required for updating the file)
response = requests.get(url, headers={'Authorization': f'token {self.access_token}'})
current_sha = response.json()['sha']
response = requests.get(
url, headers={"Authorization": f"token {self.access_token}"}
)
current_sha = response.json()["sha"]

# Prepare the data for the update request
data = {
'message': commit_msg,
'content': new_content_base64,
'sha': current_sha # Include the current SHA to update the file
"message": commit_msg,
"content": new_content_base64,
"sha": current_sha, # Include the current SHA to update the file
}

# Update the file using the GitHub API
response = requests.put(url, headers={'Authorization': f'token {self.access_token}'}, json=data)
response = requests.put(
url, headers={"Authorization": f"token {self.access_token}"}, json=data
)

# Check the response
if response.status_code == 200:
logging.info('Github file updated successfully')
logging.info("Github file updated successfully")
else:
logging.error('Error updating github file:', response.text)
logging.error("Error updating github file:", response.text)

# Not used but keeping to remember that you may need to handle creating a file that does not exist yet
def create_file(self, filename, new):
# Convert the JSON content to a string and encode it in base64
new_content_str = json.dumps(new, indent=4)
new_content_bytes = new_content_str.encode('utf-8')
new_content_base64 = base64.b64encode(new_content_bytes).decode('utf-8')
new_content_bytes = new_content_str.encode("utf-8")
new_content_base64 = base64.b64encode(new_content_bytes).decode("utf-8")

# URL for the GitHub API endpoint
url = f'https://api.github.com/repos/{self.github_owner}/{self.github_repo}/contents/{filename}'
url = f"https://api.github.com/repos/{self.github_owner}/{self.github_repo}/contents/{filename}"

# Prepare the data for the create request
data = {
'message': 'Create JSON file',
'content': new_content_base64
}
data = {"message": "Create JSON file", "content": new_content_base64}

# Create the file using the GitHub API
response = requests.put(url, headers={'Authorization': f'token {self.access_token}'}, json=data)
response = requests.put(
url, headers={"Authorization": f"token {self.access_token}"}, json=data
)

# Check the response
if response.status_code == 201:
logging.info('Github file created successfully')
logging.info("Github file created successfully")
else:
logging.error('Error creating github file:', response.json())
logging.error("Error creating github file:", response.json())
9 changes: 2 additions & 7 deletions episode_poller/src/core/gpt.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
import openai
import logging


class GptClient:
def __init__(self, openai_api_key):
openai.api_key = openai_api_key

def chat_completion(self, messages):
response = openai.ChatCompletion.create(
messages=messages,
model="gpt-4o-mini"
)
response = openai.ChatCompletion.create(messages=messages, model="gpt-4o-mini")

return response.choices[0].message.content


78 changes: 51 additions & 27 deletions episode_poller/src/core/podcast198land.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,25 @@
from core.spotify import SpotifyClient
from core.github import GithubClient


class Podcast198LandService:
def __init__(self):
config = Config()
GITHUB_OWNER = 'henrikskog'
GITHUB_REPO = '198-land-kart'
GITHUB_OWNER = "henrikskog"
GITHUB_REPO = "198-land-kart"
logging.basicConfig(level=logging.INFO)


self.BY_COUNTRY_PATH = "episodes_by_country.json"
self.file_path = 'episodes_by_country.json'
self.file_path = "episodes_by_country.json"
self.RAW_EPISODES_PATH = "raw_episodes.json"
self.SPOTIFY_SHOW_ID = '7gVC1AP7O35An9TK6l2XpJ'
self.github_client = GithubClient(config.github_api_key, GITHUB_OWNER, GITHUB_REPO)
self.SPOTIFY_SHOW_ID = "7gVC1AP7O35An9TK6l2XpJ"
self.github_client = GithubClient(
config.github_api_key, GITHUB_OWNER, GITHUB_REPO
)
self.gpt_client = GptClient(config.openai_api_key)
self.spotify_client = SpotifyClient(config.spotify_client_id, config.spotify_client_secret)
self.spotify_client = SpotifyClient(
config.spotify_client_id, config.spotify_client_secret
)

@staticmethod
def GPT_PROMPT(episode_name, episode_description):
Expand Down Expand Up @@ -61,7 +65,12 @@ def get_198_land_episodes(self):
return self.spotify_client.get_episodes(self.SPOTIFY_SHOW_ID)

def extract_country(self, episode_name: str, episode_description: str):
messages = [{"role": "user", "content": self.GPT_PROMPT(episode_name, episode_description)}]
messages = [
{
"role": "user",
"content": self.GPT_PROMPT(episode_name, episode_description),
}
]

gpt_response = self.gpt_client.chat_completion(messages)

Expand All @@ -71,8 +80,10 @@ def extract_country(self, episode_name: str, episode_description: str):
try:
country, cc = gpt_response.split(", ")
return country, cc
except:
logging.error(f"Got unexpected answer from gpt: {gpt_response} given the prompt: {self.GPT_PROMPT(episode_name, episode_description)}")
except Exception as _:
logging.error(
f"Got unexpected answer from gpt: {gpt_response} given the prompt: {self.GPT_PROMPT(episode_name, episode_description)}"
)
return None, None

def get_raw_episodes_file(self):
Expand All @@ -89,39 +100,43 @@ def raw_episodes_to_by_country(self, new_episodes: list) -> dict:
for episode in new_episodes:
country, cc = self.extract_country(episode["name"], episode["description"])

if country == None or cc == None:
logging.info(f"Could not extract country from episode {episode['name']}")
if country is None or cc is None:
logging.info(
f"Could not extract country from episode {episode['name']}"
)
continue

new = {
"country": country,
"ep": episode
}
new = {"country": country, "ep": episode}

logging.info(f"Episode {episode['name']} got classified as {country} ({cc})")
logging.info(
f"Episode {episode['name']} got classified as {country} ({cc})"
)

if cc in by_country:
for e in by_country[cc]:
if e["ep"]["name"] == new["ep"]["name"]:
logging.warn(f"Episode {episode['name']} already exists in list. Exiting.")
logging.warn(
f"Episode {episode['name']} already exists in list. Exiting."
)
return None

by_country[cc].append(new)
else:
by_country[cc] = [new]
by_country[cc] = [new]

return by_country

def process_new_episodes(self, new_episodes: list):
logging.info("Checking for new episodes...")



logging.info(f"Found {len(new_episodes)} new episodes.\n" + "\n".join([f"- {e['name']}" for e in new_episodes]))
logging.info(
f"Found {len(new_episodes)} new episodes.\n"
+ "\n".join([f"- {e['name']}" for e in new_episodes])
)

by_country = self.raw_episodes_to_by_country(new_episodes)

if by_country == None: # Meaning we found a duplicate
if by_country is None: # Meaning we found a duplicate
logging.info("Duplicate found. Exiting and not writing to github.")
return

Expand All @@ -131,16 +146,25 @@ def update_episode_data(self):
# all are ordered by date, newest first
all_episodes = self.get_198_land_episodes()
stored_episodes = self.get_raw_episodes_file()
new_episodes = all_episodes[0: len(all_episodes) - len(stored_episodes)]
new_episodes = all_episodes[0 : len(all_episodes) - len(stored_episodes)]

if len(new_episodes) == 0:
logging.info("No new episodes found. Exiting.")
return

episodes_by_country = self.process_new_episodes(new_episodes)
self.github_client.write_file(self.RAW_EPISODES_PATH, json.dumps(all_episodes, indent=4), "Automatic update of json file with new podcast episode!")
self.github_client.write_file(self.BY_COUNTRY_PATH, json.dumps(episodes_by_country, indent=4), "Automatic update of json file with new podcast episode!")
self.github_client.write_file(
self.RAW_EPISODES_PATH,
json.dumps(all_episodes, indent=4),
"Automatic update of json file with new podcast episode!",
)
self.github_client.write_file(
self.BY_COUNTRY_PATH,
json.dumps(episodes_by_country, indent=4),
"Automatic update of json file with new podcast episode!",
)


if __name__ == "__main__":
service = Podcast198LandService()
service.update_episode_data()
service.update_episode_data()
38 changes: 18 additions & 20 deletions episode_poller/src/core/spotify.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,48 @@
import requests

AUTH_URL = 'https://accounts.spotify.com/api/token'
AUTH_URL = "https://accounts.spotify.com/api/token"

class SpotifyClient:

class SpotifyClient:
def __init__(self, client_id, client_secret):
self.client_id = client_id
self.client_secret = client_secret
self.access_token = self.get_auth_token()

def get_auth_token(self):
auth_response = requests.post(AUTH_URL, {
'grant_type': 'client_credentials',
'client_id': self.client_id,
'client_secret': self.client_secret,
})
auth_response = requests.post(
AUTH_URL,
{
"grant_type": "client_credentials",
"client_id": self.client_id,
"client_secret": self.client_secret,
},
)

auth_response_data = auth_response.json()
access_token = auth_response_data['access_token']
access_token = auth_response_data["access_token"]
return access_token

def get_episodes(self, show_id):
headers = {
'Authorization': 'Bearer {token}'.format(token=self.access_token)
}
headers = {"Authorization": "Bearer {token}".format(token=self.access_token)}

BASE_URL = 'https://api.spotify.com/v1/'
BASE_URL = "https://api.spotify.com/v1/"
limit = 50 # Maximum allowed by Spotify API
offset = 0
episodes = []

while True:
response = requests.get(
BASE_URL + f'shows/{show_id}/episodes',
BASE_URL + f"shows/{show_id}/episodes",
headers=headers,
params={
'limit': limit,
'offset': offset,
'market': 'NO'
}
params={"limit": limit, "offset": offset, "market": "NO"},
)

response_data = response.json()
episodes.extend(response_data['items'])
episodes.extend(response_data["items"])

# Check if there's a next page
if response_data['next']:
if response_data["next"]:
offset += limit
else:
break
Expand Down
Loading

0 comments on commit dce2b30

Please sign in to comment.