44import pytz
55from bs4 import BeautifulSoup
66from feedgen .feed import FeedGenerator
7- from utils import ( fetch_page , save_rss_feed , setup_feed_links , setup_logging ,
8- sort_posts_for_feed )
7+
8+ from utils import fetch_page , save_rss_feed , setup_feed_links , setup_logging , sort_posts_for_feed
99
1010logger = setup_logging ()
1111
@@ -18,7 +18,7 @@ def fetch_engineering_content(url=BLOG_URL):
1818 try :
1919 return fetch_page (url )
2020 except Exception as e :
21- logger .error (f"Error fetching engineering content: { str ( e ) } " )
21+ logger .error (f"Error fetching engineering content: { e !s } " )
2222 raise
2323
2424
@@ -28,9 +28,7 @@ def validate_article(article):
2828 return False
2929 if not article .get ("link" ) or not article ["link" ].startswith ("http" ):
3030 return False
31- if not article .get ("date" ):
32- return False
33- return True
31+ return bool (article .get ("date" ))
3432
3533
3634def parse_engineering_html (html_content ):
@@ -42,18 +40,12 @@ def parse_engineering_html(html_content):
4240 # Find the Next.js script tag containing article data
4341 script_tag = None
4442 for script in soup .find_all ("script" ):
45- if (
46- script .string
47- and "publishedOn" in script .string
48- and "engineeringArticle" in script .string
49- ):
43+ if script .string and "publishedOn" in script .string and "engineeringArticle" in script .string :
5044 script_tag = script
5145 break
5246
5347 if not script_tag :
54- logger .error (
55- "Could not find Next.js data script containing article information"
56- )
48+ logger .error ("Could not find Next.js data script containing article information" )
5749 return []
5850
5951 script_content = script_tag .string
@@ -82,26 +74,16 @@ def parse_engineering_html(html_content):
8274
8375 # Extract title and summary (they appear AFTER the slug in the data)
8476 # Use negative lookbehind to handle escaped quotes correctly
85- title_match = re .search (
86- r'\\"title\\":\\"(.*?)(?<!\\)\\"' , search_section
87- )
88- title = (
89- title_match .group (1 )
90- if title_match
91- else slug .replace ("-" , " " ).title ()
92- )
77+ title_match = re .search (r'\\"title\\":\\"(.*?)(?<!\\)\\"' , search_section )
78+ title = title_match .group (1 ) if title_match else slug .replace ("-" , " " ).title ()
9379 # Unescape the title using re.sub to handle all escaped characters
9480 title = re .sub (r"\\(.)" , r"\1" , title ) if title else title
9581
9682 # Extract summary/description
97- summary_match = re .search (
98- r'\\"summary\\":\\"(.*?)(?<!\\)\\"' , search_section
99- )
83+ summary_match = re .search (r'\\"summary\\":\\"(.*?)(?<!\\)\\"' , search_section )
10084 description = summary_match .group (1 ) if summary_match else title
10185 # Unescape the description
102- description = (
103- re .sub (r"\\(.)" , r"\1" , description ) if description else description
104- )
86+ description = re .sub (r"\\(.)" , r"\1" , description ) if description else description
10587
10688 # Parse the date
10789 date = datetime .strptime (published_date , "%Y-%m-%d" )
@@ -120,14 +102,14 @@ def parse_engineering_html(html_content):
120102 logger .info (f"Found article: { title } ({ published_date } )" )
121103
122104 except Exception as e :
123- logger .warning (f"Error parsing article { slug } : { str ( e ) } " )
105+ logger .warning (f"Error parsing article { slug } : { e !s } " )
124106 continue
125107
126108 logger .info (f"Successfully parsed { len (articles )} articles from JSON data" )
127109 return articles
128110
129111 except Exception as e :
130- logger .error (f"Error parsing HTML content: { str ( e ) } " )
112+ logger .error (f"Error parsing HTML content: { e !s } " )
131113 raise
132114
133115
@@ -136,9 +118,7 @@ def generate_rss_feed(articles, feed_name=FEED_NAME):
136118 try :
137119 fg = FeedGenerator ()
138120 fg .title ("Anthropic Engineering Blog" )
139- fg .description (
140- "Latest engineering articles and insights from Anthropic's engineering team"
141- )
121+ fg .description ("Latest engineering articles and insights from Anthropic's engineering team" )
142122 setup_feed_links (fg , BLOG_URL , feed_name )
143123 fg .language ("en" )
144124
@@ -164,7 +144,7 @@ def generate_rss_feed(articles, feed_name=FEED_NAME):
164144 return fg
165145
166146 except Exception as e :
167- logger .error (f"Error generating RSS feed: { str ( e ) } " )
147+ logger .error (f"Error generating RSS feed: { e !s } " )
168148 raise
169149
170150
@@ -191,7 +171,7 @@ def main(feed_name=FEED_NAME):
191171 return True
192172
193173 except Exception as e :
194- logger .error (f"Failed to generate RSS feed: { str ( e ) } " )
174+ logger .error (f"Failed to generate RSS feed: { e !s } " )
195175 return False
196176
197177
0 commit comments