@@ -125,20 +125,21 @@ def extract_text(para):
125
125
def get_tech_news (): # I'm adventuring with regular expressions for parsing!
126
126
"""Finds news for tweeting, along with their links."""
127
127
news_block_expr = re .compile (
128
- r'(?s)<a class="story-link" .*?href="(.*?)".*?>.*?<h2 .*?> (.*?)</h2> .*?'
129
- r'<img .*?src=" (.*?)".*? >.*?</a>'
128
+ r'(?s)<li .*?a href="(.*?)".*?>.*?<img .*?src=" (.*?)" .*?> '
129
+ r'<h2 .*?> (.*?)</h2 >.*?</a>'
130
130
)
131
131
latest_expr = re .compile (
132
- r'(?s)<ol class="story-menu theme- stream initial-set" >(.*)</ol>'
132
+ r'(?s)<section id=" stream-panel".*ol >(.*)</ol>'
133
133
)
134
134
nyTech = requests .get ('https://nytimes.com/section/technology' )
135
135
latest = latest_expr .search (nyTech .text )
136
136
news_blocks = news_block_expr .findall (latest .group (1 ))
137
- news = []
137
+
138
138
for i in range (len (news_blocks )):
139
139
item = (
140
- news_blocks [i ][1 ].strip () + ' ' + shorten_url (news_blocks [i ][0 ]),
141
- news_blocks [i ][2 ].strip ()) # This is img src.
140
+ news_blocks [i ][2 ].strip () + ' ' + shorten_url (
141
+ 'https://nytimes.com' + news_blocks [i ][0 ]),
142
+ news_blocks [i ][1 ].strip ()) # This is img src.
142
143
if item [1 ].startswith ('Daily Report: ' ):
143
144
item = item [14 :]
144
145
yield item
@@ -238,6 +239,9 @@ def find_news(newsfuncs):
238
239
239
240
def shorten_url (url ):
240
241
"""Shortens the passed url using shorte.st's API."""
242
+ # QUICK AND DIRTY AND TEMPORARY STUFF:
243
+ return url
244
+
241
245
from chirps .credentials import SHORTE_ST_TOKEN
242
246
response = requests .put (
243
247
"https://api.shorte.st/v1/data/url" ,
0 commit comments