Olshansk · WizeIdea · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026
@@ -9,10 +9,15 @@ concurrency:
   group: request-feeds
   cancel-in-progress: true
 
+permissions:
+  contents: write
+
 jobs:
   run-feeds:
     runs-on: ubuntu-latest
     timeout-minutes: 30
+    env:
+      RSS_REPO_SLUG: ${{ github.repository }}
 
     steps:
       - name: Checkout repository
@@ -49,5 +54,5 @@ jobs:
             echo "No changes to commit"
           else
             git commit -m 'Update RSS feeds'
-            git push || { git pull --rebase && git push; }
+            git push origin HEAD:main || { git pull --rebase origin main && git push origin HEAD:main; }
           fi
@@ -9,10 +9,15 @@ concurrency:
   group: selenium-feeds
   cancel-in-progress: true
 
+permissions:
+  contents: write
+
 jobs:
   run-selenium-feeds:
     runs-on: ubuntu-latest
     timeout-minutes: 60
+    env:
+      RSS_REPO_SLUG: ${{ github.repository }}
 
     steps:
       - name: Checkout repository
@@ -55,5 +60,5 @@ jobs:
             echo "No changes to commit"
           else
             git commit -m 'Update RSS feeds (Selenium)'
-            git push || { git pull --rebase && git push; }
+            git push origin HEAD:main || { git pull --rebase origin main && git push origin HEAD:main; }
           fi
@@ -28,6 +28,9 @@ Scraped feeds are generated hourly. "Official RSS" rows point to native feeds th
 | [Google DeepMind Blog](https://deepmind.google/blog/)                                             | [Official RSS](https://deepmind.google/blog/rss.xml)                                                                                 |
 | [Google Developers Blog - AI](https://developers.googleblog.com/search/?technology_categories=AI) | [feed_google_ai.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_google_ai.xml)                             |
 | [Groq Blog](https://groq.com/blog/)                                                               | [feed_groq.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_groq.xml)                                       |
+| [Hugging Face Blog](https://huggingface.co/blog)                                                  | [Official RSS](https://huggingface.co/blog/feed.xml)                                                                                 |
+| [Hugging Face Blog (Ethics)](https://huggingface.co/blog?tag=ethics)                              | [feed_huggingface_ethics.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_huggingface_ethics.xml)             |
+| [Hugging Face Blog (Research)](https://huggingface.co/blog?tag=research)                        | [feed_huggingface_research.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_huggingface_research.xml)       |
 | [Hamel Husain's Blog](https://hamel.dev/)                                                         | [Official RSS](https://hamel.dev/index.xml)                                                                                          |
 | [Interconnected (Matt Webb)](https://interconnected.org/home)                                     | [Official RSS](https://interconnected.org/home/feed)                                                                                 |
 | [Mistral AI News](https://mistral.ai/news)                                                        | [feed_mistral.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_mistral.xml)                                 |
@@ -38,6 +41,7 @@ Scraped feeds are generated hourly. "Official RSS" rows point to native feeds th
 | [Perplexity Hub](https://www.perplexity.ai/hub)                                                   | [feed_perplexity_hub.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_perplexity_hub.xml)                   |
 | [Pinecone Blog](https://www.pinecone.io/blog/)                                                    | [feed_pinecone.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_pinecone.xml)                               |
 | [Simon Willison's Blog (Tools)](https://simonwillison.net/)                                       | [Official RSS](https://simonwillison.net/atom/beats/tool/)                                                                           |
+| [Stanford HAI News](https://hai.stanford.edu/news)                                                | [feed_stanford_hai_news.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_stanford_hai_news.xml)             |
 | [Supabase Blog](https://supabase.com/blog)                                                        | [Official RSS](https://supabase.com/rss.xml)                                                                                         |
 | [Surge AI Blog](https://www.surgehq.ai/blog)                                                      | [feed_blogsurgeai.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_blogsurgeai.xml)                         |
 | [The Batch by DeepLearning.AI](https://www.deeplearning.ai/the-batch/)                            | [feed_the_batch.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_the_batch.xml)                             |

@@ -0,0 +1,184 @@
+"""Shared helpers for Hugging Face blog tag feeds."""
+
+from datetime import datetime
+
+import pytz
+import requests
+from feedgen.feed import FeedGenerator
+
+from utils import (
+    deserialize_entries,
+    load_cache,
+    merge_entries,
+    save_cache,
+    save_rss_feed,
+    setup_feed_links,
+    setup_logging,
+    sort_posts_for_feed,
+    stable_fallback_date,
+)
+
+logger = setup_logging(__name__)
+
+HF_API_URL = "https://huggingface.co/api/blog"
+HF_BASE_URL = "https://huggingface.co"
+API_HEADERS = {
+    "User-Agent": "Mozilla/5.0 (compatible; RSS Feed Generator)",
+    "Accept": "application/json",
+}
+
+
+def fetch_posts_page(tag: str, page: int) -> dict:
+    """Fetch a single page of blog posts for a tag from the Hugging Face API."""
+    response = requests.get(
+        HF_API_URL,
+        params={"tag": tag, "p": page},
+        headers=API_HEADERS,
+        timeout=30,
+    )
+    response.raise_for_status()
+    return response.json()
+
+
+def parse_api_posts(blogs: list[dict]) -> list[dict]:
+    """Extract post dicts from Hugging Face API blog objects."""
+    posts = []
+    for blog in blogs:
+        title = (blog.get("title") or "").strip()
+        if not title:
+            continue
+
+        url = blog.get("url") or f"/blog/{blog.get('slug', '')}"
+        link = f"{HF_BASE_URL}{url}" if url.startswith("/") else url
+
+        date = None
+        published_at = blog.get("publishedAt")
+        if published_at:
+            try:
+                date = datetime.fromisoformat(published_at.replace("Z", "+00:00"))
+                if date.tzinfo is None:
+                    date = date.replace(tzinfo=pytz.UTC)
+            except ValueError:
+                logger.warning(f"Could not parse date for: {title}")
+        if not date:
+            date = stable_fallback_date(link)
+
+        tags = blog.get("tags") or []
+        description = title
+        if tags:
+            description = f"{title} ({', '.join(tags)})"
+
+        posts.append(
+            {
+                "title": title,
+                "link": link,
+                "date": date,
+                "description": description,
+                "category": tags[0] if tags else "Blog",
+            }
+        )
+    return posts
+
+
+def fetch_all_posts(tag: str) -> list[dict]:
+    """Fetch all posts for a tag across paginated API results."""
+    all_posts: list[dict] = []
+    seen_links: set[str] = set()
+    page = 0
+
+    while True:
+        logger.info(f"Fetching page {page} for tag={tag!r}")
+        api_data = fetch_posts_page(tag, page)
+        blogs = api_data.get("allBlogs", [])
+        if not blogs:
+            logger.info(f"No posts returned on page {page}, stopping")
+            break
+
+        page_posts = parse_api_posts(blogs)
+        for post in page_posts:
+            if post["link"] not in seen_links:
+                all_posts.append(post)
+                seen_links.add(post["link"])
+
+        total = api_data.get("numTotalItems", len(all_posts))
+        logger.info(f"Page {page}: {len(page_posts)} posts (total: {len(all_posts)}/{total})")
+        if len(all_posts) >= total:
+            break
+        page += 1
+
+    return all_posts
+
+
+def fetch_latest_posts(tag: str) -> list[dict]:
+    """Fetch only the newest page of posts for incremental updates."""
+    api_data = fetch_posts_page(tag, page=0)
+    posts = parse_api_posts(api_data.get("allBlogs", []))
+    logger.info(f"Fetched {len(posts)} latest posts for tag={tag!r}")
+    return posts
+
+
+def generate_rss_feed(
+    posts: list[dict],
+    *,
+    feed_name: str,
+    blog_url: str,
+    feed_title: str,
+    feed_description: str,
+) -> FeedGenerator:
+    fg = FeedGenerator()
+    fg.title(feed_title)
+    fg.description(feed_description)
+    fg.language("en")
+    fg.author({"name": "Hugging Face"})
+    setup_feed_links(fg, blog_url=blog_url, feed_name=feed_name)
+
+    for post in sort_posts_for_feed(posts, date_field="date"):
+        fe = fg.add_entry()
+        fe.title(post["title"])
+        fe.description(post["description"])
+        fe.link(href=post["link"])
+        fe.id(post["link"])
+        fe.category(term=post["category"])
+        if post.get("date"):
+            fe.published(post["date"])
+
+    logger.info(f"Generated RSS feed with {len(posts)} entries")
+    return fg
+
+
+def run_tag_feed(
+    *,
+    tag: str,
+    feed_name: str,
+    blog_url: str,
+    feed_title: str,
+    feed_description: str,
+    full_reset: bool = False,
+) -> bool:
+    cache = load_cache(feed_name)
+    cached_entries = deserialize_entries(cache.get("entries", []))
+
+    if full_reset or not cached_entries:
+        mode = "full reset" if full_reset else "no cache exists"
+        logger.info(f"Running full fetch ({mode}) for tag={tag!r}")
+        posts = sort_posts_for_feed(fetch_all_posts(tag), date_field="date")
+    else:
+        logger.info(f"Running incremental update for tag={tag!r}")
+        new_posts = fetch_latest_posts(tag)
+        posts = merge_entries(new_posts, cached_entries)
+
+    if not posts:
+        logger.warning(f"No posts found for tag={tag!r}. Check the Hugging Face API response.")
+        return False
+
+    save_cache(feed_name, posts)
+    feed = generate_rss_feed(
+        posts,
+        feed_name=feed_name,
+        blog_url=blog_url,
+        feed_title=feed_title,
+        feed_description=feed_description,
+    )
+    save_rss_feed(feed, feed_name)
+    logger.info("Done!")
+    return True
@@ -0,0 +1,30 @@
+"""Generate RSS feed for Hugging Face Blog posts tagged ethics."""
+
+import argparse
+
+from huggingface_blog_common import run_tag_feed
+from utils import setup_logging
+
+logger = setup_logging()
+
+FEED_NAME = "huggingface_ethics"
+BLOG_URL = "https://huggingface.co/blog?tag=ethics"
+TAG = "ethics"
+
+
+def main(full_reset: bool = False) -> bool:
+    return run_tag_feed(
+        tag=TAG,
+        feed_name=FEED_NAME,
+        blog_url=BLOG_URL,
+        feed_title="Hugging Face Blog (Ethics)",
+        feed_description="Ethics posts from the Hugging Face blog",
+        full_reset=full_reset,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Generate Hugging Face ethics blog RSS feed")
+    parser.add_argument("--full", action="store_true", help="Force full reset (fetch all tagged posts)")
+    args = parser.parse_args()
+    main(full_reset=args.full)
@@ -0,0 +1,30 @@
+"""Generate RSS feed for Hugging Face Blog posts tagged research."""
+
+import argparse
+
+from huggingface_blog_common import run_tag_feed
+from utils import setup_logging
+
+logger = setup_logging()
+
+FEED_NAME = "huggingface_research"
+BLOG_URL = "https://huggingface.co/blog?tag=research"
+TAG = "research"
+
+
+def main(full_reset: bool = False) -> bool:
+    return run_tag_feed(
+        tag=TAG,
+        feed_name=FEED_NAME,
+        blog_url=BLOG_URL,
+        feed_title="Hugging Face Blog (Research)",
+        feed_description="Research posts from the Hugging Face blog",
+        full_reset=full_reset,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Generate Hugging Face research blog RSS feed")
+    parser.add_argument("--full", action="store_true", help="Force full reset (fetch all tagged posts)")
+    args = parser.parse_args()
+    main(full_reset=args.full)