From ea2af3bc2f71b47b6e88755ec46516e855554ea9 Mon Sep 17 00:00:00 2001 From: "Scott N." Date: Thu, 25 Jun 2026 13:47:25 +1000 Subject: [PATCH 1/3] Add Hugging Face blog feeds for official RSS and tag filters. Document the native feed and generate research/ethics feeds from Hugging Face's blog API so readers can subscribe to smaller, tag-specific lists. Co-authored-by: Cursor --- README.md | 3 + feed_generators/huggingface_blog_common.py | 184 +++ feed_generators/huggingface_ethics_blog.py | 30 + feed_generators/huggingface_research_blog.py | 30 + feeds.yaml | 10 + feeds/feed_huggingface_ethics.xml | 189 +++ feeds/feed_huggingface_research.xml | 1165 ++++++++++++++++++ makefiles/feeds.mk | 28 + 8 files changed, 1639 insertions(+) create mode 100644 feed_generators/huggingface_blog_common.py create mode 100644 feed_generators/huggingface_ethics_blog.py create mode 100644 feed_generators/huggingface_research_blog.py create mode 100644 feeds/feed_huggingface_ethics.xml create mode 100644 feeds/feed_huggingface_research.xml diff --git a/README.md b/README.md index 25427a623a..7450ab2939 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,9 @@ Scraped feeds are generated hourly. "Official RSS" rows point to native feeds th | [Google DeepMind Blog](https://deepmind.google/blog/) | [Official RSS](https://deepmind.google/blog/rss.xml) | | [Google Developers Blog - AI](https://developers.googleblog.com/search/?technology_categories=AI) | [feed_google_ai.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_google_ai.xml) | | [Groq Blog](https://groq.com/blog/) | [feed_groq.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_groq.xml) | +| [Hugging Face Blog](https://huggingface.co/blog) | [Official RSS](https://huggingface.co/blog/feed.xml) | +| [Hugging Face Blog (Ethics)](https://huggingface.co/blog?tag=ethics) | [feed_huggingface_ethics.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_huggingface_ethics.xml) | +| [Hugging Face Blog (Research)](https://huggingface.co/blog?tag=research) | [feed_huggingface_research.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_huggingface_research.xml) | | [Hamel Husain's Blog](https://hamel.dev/) | [Official RSS](https://hamel.dev/index.xml) | | [Interconnected (Matt Webb)](https://interconnected.org/home) | [Official RSS](https://interconnected.org/home/feed) | | [Mistral AI News](https://mistral.ai/news) | [feed_mistral.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_mistral.xml) | diff --git a/feed_generators/huggingface_blog_common.py b/feed_generators/huggingface_blog_common.py new file mode 100644 index 0000000000..7262075501 --- /dev/null +++ b/feed_generators/huggingface_blog_common.py @@ -0,0 +1,184 @@ +"""Shared helpers for Hugging Face blog tag feeds.""" + +from datetime import datetime + +import pytz +import requests +from feedgen.feed import FeedGenerator + +from utils import ( + deserialize_entries, + load_cache, + merge_entries, + save_cache, + save_rss_feed, + setup_feed_links, + setup_logging, + sort_posts_for_feed, + stable_fallback_date, +) + +logger = setup_logging(__name__) + +HF_API_URL = "https://huggingface.co/api/blog" +HF_BASE_URL = "https://huggingface.co" +API_HEADERS = { + "User-Agent": "Mozilla/5.0 (compatible; RSS Feed Generator)", + "Accept": "application/json", +} + + +def fetch_posts_page(tag: str, page: int) -> dict: + """Fetch a single page of blog posts for a tag from the Hugging Face API.""" + response = requests.get( + HF_API_URL, + params={"tag": tag, "p": page}, + headers=API_HEADERS, + timeout=30, + ) + response.raise_for_status() + return response.json() + + +def parse_api_posts(blogs: list[dict]) -> list[dict]: + """Extract post dicts from Hugging Face API blog objects.""" + posts = [] + for blog in blogs: + title = (blog.get("title") or "").strip() + if not title: + continue + + url = blog.get("url") or f"/blog/{blog.get('slug', '')}" + link = f"{HF_BASE_URL}{url}" if url.startswith("/") else url + + date = None + published_at = blog.get("publishedAt") + if published_at: + try: + date = datetime.fromisoformat(published_at.replace("Z", "+00:00")) + if date.tzinfo is None: + date = date.replace(tzinfo=pytz.UTC) + except ValueError: + logger.warning(f"Could not parse date for: {title}") + if not date: + date = stable_fallback_date(link) + + tags = blog.get("tags") or [] + description = title + if tags: + description = f"{title} ({', '.join(tags)})" + + posts.append( + { + "title": title, + "link": link, + "date": date, + "description": description, + "category": tags[0] if tags else "Blog", + } + ) + return posts + + +def fetch_all_posts(tag: str) -> list[dict]: + """Fetch all posts for a tag across paginated API results.""" + all_posts: list[dict] = [] + seen_links: set[str] = set() + page = 0 + + while True: + logger.info(f"Fetching page {page} for tag={tag!r}") + api_data = fetch_posts_page(tag, page) + blogs = api_data.get("allBlogs", []) + if not blogs: + logger.info(f"No posts returned on page {page}, stopping") + break + + page_posts = parse_api_posts(blogs) + for post in page_posts: + if post["link"] not in seen_links: + all_posts.append(post) + seen_links.add(post["link"]) + + total = api_data.get("numTotalItems", len(all_posts)) + logger.info(f"Page {page}: {len(page_posts)} posts (total: {len(all_posts)}/{total})") + if len(all_posts) >= total: + break + page += 1 + + return all_posts + + +def fetch_latest_posts(tag: str) -> list[dict]: + """Fetch only the newest page of posts for incremental updates.""" + api_data = fetch_posts_page(tag, page=0) + posts = parse_api_posts(api_data.get("allBlogs", [])) + logger.info(f"Fetched {len(posts)} latest posts for tag={tag!r}") + return posts + + +def generate_rss_feed( + posts: list[dict], + *, + feed_name: str, + blog_url: str, + feed_title: str, + feed_description: str, +) -> FeedGenerator: + fg = FeedGenerator() + fg.title(feed_title) + fg.description(feed_description) + fg.language("en") + fg.author({"name": "Hugging Face"}) + setup_feed_links(fg, blog_url=blog_url, feed_name=feed_name) + + for post in sort_posts_for_feed(posts, date_field="date"): + fe = fg.add_entry() + fe.title(post["title"]) + fe.description(post["description"]) + fe.link(href=post["link"]) + fe.id(post["link"]) + fe.category(term=post["category"]) + if post.get("date"): + fe.published(post["date"]) + + logger.info(f"Generated RSS feed with {len(posts)} entries") + return fg + + +def run_tag_feed( + *, + tag: str, + feed_name: str, + blog_url: str, + feed_title: str, + feed_description: str, + full_reset: bool = False, +) -> bool: + cache = load_cache(feed_name) + cached_entries = deserialize_entries(cache.get("entries", [])) + + if full_reset or not cached_entries: + mode = "full reset" if full_reset else "no cache exists" + logger.info(f"Running full fetch ({mode}) for tag={tag!r}") + posts = sort_posts_for_feed(fetch_all_posts(tag), date_field="date") + else: + logger.info(f"Running incremental update for tag={tag!r}") + new_posts = fetch_latest_posts(tag) + posts = merge_entries(new_posts, cached_entries) + + if not posts: + logger.warning(f"No posts found for tag={tag!r}. Check the Hugging Face API response.") + return False + + save_cache(feed_name, posts) + feed = generate_rss_feed( + posts, + feed_name=feed_name, + blog_url=blog_url, + feed_title=feed_title, + feed_description=feed_description, + ) + save_rss_feed(feed, feed_name) + logger.info("Done!") + return True diff --git a/feed_generators/huggingface_ethics_blog.py b/feed_generators/huggingface_ethics_blog.py new file mode 100644 index 0000000000..6bb9c1398a --- /dev/null +++ b/feed_generators/huggingface_ethics_blog.py @@ -0,0 +1,30 @@ +"""Generate RSS feed for Hugging Face Blog posts tagged ethics.""" + +import argparse + +from huggingface_blog_common import run_tag_feed +from utils import setup_logging + +logger = setup_logging() + +FEED_NAME = "huggingface_ethics" +BLOG_URL = "https://huggingface.co/blog?tag=ethics" +TAG = "ethics" + + +def main(full_reset: bool = False) -> bool: + return run_tag_feed( + tag=TAG, + feed_name=FEED_NAME, + blog_url=BLOG_URL, + feed_title="Hugging Face Blog (Ethics)", + feed_description="Ethics posts from the Hugging Face blog", + full_reset=full_reset, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate Hugging Face ethics blog RSS feed") + parser.add_argument("--full", action="store_true", help="Force full reset (fetch all tagged posts)") + args = parser.parse_args() + main(full_reset=args.full) diff --git a/feed_generators/huggingface_research_blog.py b/feed_generators/huggingface_research_blog.py new file mode 100644 index 0000000000..4d68cfa5ce --- /dev/null +++ b/feed_generators/huggingface_research_blog.py @@ -0,0 +1,30 @@ +"""Generate RSS feed for Hugging Face Blog posts tagged research.""" + +import argparse + +from huggingface_blog_common import run_tag_feed +from utils import setup_logging + +logger = setup_logging() + +FEED_NAME = "huggingface_research" +BLOG_URL = "https://huggingface.co/blog?tag=research" +TAG = "research" + + +def main(full_reset: bool = False) -> bool: + return run_tag_feed( + tag=TAG, + feed_name=FEED_NAME, + blog_url=BLOG_URL, + feed_title="Hugging Face Blog (Research)", + feed_description="Research posts from the Hugging Face blog", + full_reset=full_reset, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate Hugging Face research blog RSS feed") + parser.add_argument("--full", action="store_true", help="Force full reset (fetch all tagged posts)") + args = parser.parse_args() + main(full_reset=args.full) diff --git a/feeds.yaml b/feeds.yaml index 89f0a15a71..d8d313fd99 100644 --- a/feeds.yaml +++ b/feeds.yaml @@ -80,6 +80,16 @@ feeds: type: requests blog_url: https://groq.com/blog/ + huggingface_ethics: + script: huggingface_ethics_blog.py + type: requests + blog_url: https://huggingface.co/blog?tag=ethics + + huggingface_research: + script: huggingface_research_blog.py + type: requests + blog_url: https://huggingface.co/blog?tag=research + meta_ai: script: meta_ai_blog.py type: selenium diff --git a/feeds/feed_huggingface_ethics.xml b/feeds/feed_huggingface_ethics.xml new file mode 100644 index 0000000000..e135c71c20 --- /dev/null +++ b/feeds/feed_huggingface_ethics.xml @@ -0,0 +1,189 @@ + + + + Hugging Face Blog (Ethics) + https://huggingface.co/blog?tag=ethics + Ethics posts from the Hugging Face blog + + http://www.rssboard.org/rss-specification + python-feedgen + en + Thu, 25 Jun 2026 03:43:30 +0000 + + AI and the Future of Cybersecurity: Why Openness Matters + https://huggingface.co/blog/cybersecurity-openness + AI and the Future of Cybersecurity: Why Openness Matters (cybersecurity, open-source, community, agents, llm, ethics) + https://huggingface.co/blog/cybersecurity-openness + cybersecurity + Tue, 21 Apr 2026 00:00:00 +0000 + + + Voice Cloning with Consent + https://huggingface.co/blog/voice-consent-gate + Voice Cloning with Consent (ethics, guide, speech, audio, text-to-speech, voice, voice-cloning) + https://huggingface.co/blog/voice-consent-gate + ethics + Tue, 28 Oct 2025 00:00:00 +0000 + + + Visible Watermarking with Gradio + https://huggingface.co/blog/watermarking-with-gradio + Visible Watermarking with Gradio (ethics, text-generation, guide, text-to-image, text-to-video) + https://huggingface.co/blog/watermarking-with-gradio + ethics + Mon, 15 Sep 2025 00:00:00 +0000 + + + AI Policy @🤗: Response to the White House AI Action Plan RFI + https://huggingface.co/blog/ai-action-wh-2025 + AI Policy @🤗: Response to the White House AI Action Plan RFI (ethics, policy) + https://huggingface.co/blog/ai-action-wh-2025 + ethics + Wed, 19 Mar 2025 00:00:00 +0000 + + + AI Agents Are Here. What Now? + https://huggingface.co/blog/ethics-soc-7 + AI Agents Are Here. What Now? (ethics, agents) + https://huggingface.co/blog/ethics-soc-7 + ethics + Mon, 13 Jan 2025 00:00:00 +0000 + + + Open Source Developers Guide to the EU AI Act + https://huggingface.co/blog/eu-ai-act-for-oss-developers + Open Source Developers Guide to the EU AI Act (policy, guide, ethics) + https://huggingface.co/blog/eu-ai-act-for-oss-developers + policy + Mon, 02 Dec 2024 00:00:00 +0000 + + + Ethics and Society Newsletter #6: Building Better AI: The Importance of Data Quality + https://huggingface.co/blog/ethics-soc-6 + Ethics and Society Newsletter #6: Building Better AI: The Importance of Data Quality (ethics) + https://huggingface.co/blog/ethics-soc-6 + ethics + Mon, 24 Jun 2024 00:00:00 +0000 + + + Public Policy at Hugging Face + https://huggingface.co/blog/policy-blog + Public Policy at Hugging Face (ethics) + https://huggingface.co/blog/policy-blog + ethics + Mon, 08 Apr 2024 00:00:00 +0000 + + + AI Watermarking 101: Tools and Techniques + https://huggingface.co/blog/watermarking + AI Watermarking 101: Tools and Techniques (ethics, research, nlp, guide) + https://huggingface.co/blog/watermarking + ethics + Mon, 26 Feb 2024 00:00:00 +0000 + + + Ethics and Society Newsletter #5: Hugging Face Goes To Washington and Other Summer 2023 Musings + https://huggingface.co/blog/ethics-soc-5 + Ethics and Society Newsletter #5: Hugging Face Goes To Washington and Other Summer 2023 Musings (ethics) + https://huggingface.co/blog/ethics-soc-5 + ethics + Fri, 29 Sep 2023 00:00:00 +0000 + + + AI Policy @🤗: Open ML Considerations in the EU AI Act + https://huggingface.co/blog/eu-ai-act-oss + AI Policy @🤗: Open ML Considerations in the EU AI Act (ethics) + https://huggingface.co/blog/eu-ai-act-oss + ethics + Mon, 24 Jul 2023 00:00:00 +0000 + + + Ethics and Society Newsletter #4: Bias in Text-to-Image Models + https://huggingface.co/blog/ethics-soc-4 + Ethics and Society Newsletter #4: Bias in Text-to-Image Models (ethics) + https://huggingface.co/blog/ethics-soc-4 + ethics + Mon, 26 Jun 2023 00:00:00 +0000 + + + AI Policy @🤗: Response to the U.S. NTIA's Request for Comment on AI Accountability + https://huggingface.co/blog/policy-ntia-rfc + AI Policy @🤗: Response to the U.S. NTIA's Request for Comment on AI Accountability (community, ethics) + https://huggingface.co/blog/policy-ntia-rfc + community + Tue, 20 Jun 2023 00:00:00 +0000 + + + Announcing our new Content Guidelines and Policy + https://huggingface.co/blog/content-guidelines-update + Announcing our new Content Guidelines and Policy (community, ethics) + https://huggingface.co/blog/content-guidelines-update + community + Thu, 15 Jun 2023 00:00:00 +0000 + + + Hugging Face Selected for the French Data Protection Agency Enhanced Support Program + https://huggingface.co/blog/cnil + Hugging Face Selected for the French Data Protection Agency Enhanced Support Program (ethics) + https://huggingface.co/blog/cnil + ethics + Mon, 15 May 2023 00:00:00 +0000 + + + Ethics and Society Newsletter #3: Ethical Openness at Hugging Face + https://huggingface.co/blog/ethics-soc-3 + Ethics and Society Newsletter #3: Ethical Openness at Hugging Face (ethics) + https://huggingface.co/blog/ethics-soc-3 + ethics + Thu, 30 Mar 2023 00:00:00 +0000 + + + Ethical Guidelines for developing the Diffusers library + https://huggingface.co/blog/ethics-diffusers + Ethical Guidelines for developing the Diffusers library (ethics, diffusers) + https://huggingface.co/blog/ethics-diffusers + ethics + Thu, 02 Mar 2023 00:00:00 +0000 + + + Model Cards + https://huggingface.co/blog/model-cards + Model Cards (community, research, ethics, guide) + https://huggingface.co/blog/model-cards + community + Tue, 20 Dec 2022 00:00:00 +0000 + + + Let's talk about biases in machine learning! Ethics and Society Newsletter #2 + https://huggingface.co/blog/ethics-soc-2 + Let's talk about biases in machine learning! Ethics and Society Newsletter #2 (ethics) + https://huggingface.co/blog/ethics-soc-2 + ethics + Thu, 15 Dec 2022 00:00:00 +0000 + + + Evaluating Language Model Bias with 🤗 Evaluate + https://huggingface.co/blog/evaluating-llm-bias + Evaluating Language Model Bias with 🤗 Evaluate (ethics, research, nlp) + https://huggingface.co/blog/evaluating-llm-bias + ethics + Mon, 24 Oct 2022 00:00:00 +0000 + + + Ethics and Society Newsletter #1 + https://huggingface.co/blog/ethics-soc-1 + Ethics and Society Newsletter #1 (ethics) + https://huggingface.co/blog/ethics-soc-1 + ethics + Thu, 22 Sep 2022 00:00:00 +0000 + + + Comments on U.S. National AI Research Resource Interim Report + https://huggingface.co/blog/us-national-ai-research-resource + Comments on U.S. National AI Research Resource Interim Report (community, ethics) + https://huggingface.co/blog/us-national-ai-research-resource + community + Mon, 01 Aug 2022 00:00:00 +0000 + + + diff --git a/feeds/feed_huggingface_research.xml b/feeds/feed_huggingface_research.xml new file mode 100644 index 0000000000..af95eb2dc4 --- /dev/null +++ b/feeds/feed_huggingface_research.xml @@ -0,0 +1,1165 @@ + + + + Hugging Face Blog (Research) + https://huggingface.co/blog?tag=research + Research posts from the Hugging Face blog + + http://www.rssboard.org/rss-specification + python-feedgen + en + Thu, 25 Jun 2026 03:43:29 +0000 + + Introducing the Ettin Reranker Family + https://huggingface.co/blog/ettin-reranker + Introducing the Ettin Reranker Family (nlp, community, research, open-source) + https://huggingface.co/blog/ettin-reranker + nlp + Tue, 19 May 2026 00:00:00 +0000 + + + DeepSeek-V4: a million-token context that agents can actually use + https://huggingface.co/blog/deepseekv4 + DeepSeek-V4: a million-token context that agents can actually use (llm, moe, long-context, agents, open-source, research, community) + https://huggingface.co/blog/deepseekv4 + llm + Fri, 24 Apr 2026 00:00:00 +0000 + + + Ecom-RLVE: Adaptive Verifiable Environments for E-Commerce Conversational Agents + https://huggingface.co/blog/ecom-rlve + Ecom-RLVE: Adaptive Verifiable Environments for E-Commerce Conversational Agents (reinforcement-learning, rlvr, e-commerce, agents, llm, training, research, open-source) + https://huggingface.co/blog/ecom-rlve + reinforcement-learning + Thu, 16 Apr 2026 00:00:00 +0000 + + + Introducing RTEB: A New Standard for Retrieval Evaluation + https://huggingface.co/blog/rteb + Introducing RTEB: A New Standard for Retrieval Evaluation (nlp, evaluation, retrieval, benchmark, community, research) + https://huggingface.co/blog/rteb + nlp + Wed, 01 Oct 2025 00:00:00 +0000 + + + Jupyter Agents: training LLMs to reason with notebooks + https://huggingface.co/blog/jupyter-agent-2 + Jupyter Agents: training LLMs to reason with notebooks (agents, jupyter, llm, data-science, research) + https://huggingface.co/blog/jupyter-agent-2 + agents + Wed, 10 Sep 2025 00:00:00 +0000 + + + mmBERT: ModernBERT goes Multilingual + https://huggingface.co/blog/mmbert + mmBERT: ModernBERT goes Multilingual (llm, nlp, community, research, open-source-collab) + https://huggingface.co/blog/mmbert + llm + Tue, 09 Sep 2025 00:00:00 +0000 + + + MCP for Research: How to Connect AI to Research Tools + https://huggingface.co/blog/mcp-for-research + MCP for Research: How to Connect AI to Research Tools (mcp, research, guide) + https://huggingface.co/blog/mcp-for-research + mcp + Mon, 18 Aug 2025 00:00:00 +0000 + + + TextQuests: How Good are LLMs at Text-Based Video Games? + https://huggingface.co/blog/textquests + TextQuests: How Good are LLMs at Text-Based Video Games? (research, llm, evaluation, agents) + https://huggingface.co/blog/textquests + research + Tue, 12 Aug 2025 00:00:00 +0000 + + + Introducing Trackio: A Lightweight Experiment Tracking Library from Hugging Face + https://huggingface.co/blog/trackio + Introducing Trackio: A Lightweight Experiment Tracking Library from Hugging Face (research, gradio, open-source) + https://huggingface.co/blog/trackio + research + Tue, 29 Jul 2025 00:00:00 +0000 + + + Back to The Future: Evaluating AI Agents on Predicting Future Events + https://huggingface.co/blog/futurebench + Back to The Future: Evaluating AI Agents on Predicting Future Events (research, evaluation, ai) + https://huggingface.co/blog/futurebench + research + Thu, 17 Jul 2025 00:00:00 +0000 + + + Ettin Suite: SoTA Paired Encoders and Decoders + https://huggingface.co/blog/ettin + Ettin Suite: SoTA Paired Encoders and Decoders (llm, nlp, community, research, open-source-collab) + https://huggingface.co/blog/ettin + llm + Wed, 16 Jul 2025 00:00:00 +0000 + + + SmolLM3: smol, multilingual, long-context reasoner + https://huggingface.co/blog/smollm3 + SmolLM3: smol, multilingual, long-context reasoner (llm, nlp, reasoning, community, research) + https://huggingface.co/blog/smollm3 + llm + Tue, 08 Jul 2025 00:00:00 +0000 + + + Efficient MultiModal Data Pipeline + https://huggingface.co/blog/mmdp + Efficient MultiModal Data Pipeline (vlm, data, nanovlm, research, community, open) + https://huggingface.co/blog/mmdp + vlm + Tue, 08 Jul 2025 00:00:00 +0000 + + + Gemma 3n fully available in the open-source ecosystem! + https://huggingface.co/blog/gemma3n + Gemma 3n fully available in the open-source ecosystem! (audio, vision, llm, vlm, community, research, multimodal) + https://huggingface.co/blog/gemma3n + audio + Thu, 26 Jun 2025 00:00:00 +0000 + + + nanoVLM: The simplest repository to train your VLM in pure PyTorch + https://huggingface.co/blog/nanovlm + nanoVLM: The simplest repository to train your VLM in pure PyTorch (vlm, vision, llm, nanovlm, research, community, open) + https://huggingface.co/blog/nanovlm + vlm + Wed, 21 May 2025 00:00:00 +0000 + + + Vision Language Models (Better, faster, stronger) + https://huggingface.co/blog/vlms-2025 + Vision Language Models (Better, faster, stronger) (vlm, vision, multimodal, community, research) + https://huggingface.co/blog/vlms-2025 + vlm + Mon, 12 May 2025 00:00:00 +0000 + + + Introducing HELMET: Holistically Evaluating Long-context Language Models + https://huggingface.co/blog/helmet + Introducing HELMET: Holistically Evaluating Long-context Language Models (long-context, benchmark, nlp, community, research, intel) + https://huggingface.co/blog/helmet + long-context + Wed, 16 Apr 2025 00:00:00 +0000 + + + Arabic Leaderboards: Introducing Arabic Instruction Following, Updating AraGen, and More + https://huggingface.co/blog/leaderboard-3c3h-aragen-ifeval + Arabic Leaderboards: Introducing Arabic Instruction Following, Updating AraGen, and More (leaderboard, evaluation, nlp, LLM, research) + https://huggingface.co/blog/leaderboard-3c3h-aragen-ifeval + leaderboard + Tue, 08 Apr 2025 00:00:00 +0000 + + + Open R1: How to use OlympicCoder locally for coding + https://huggingface.co/blog/olympic-coder-lmstudio + Open R1: How to use OlympicCoder locally for coding (open-source, llm, research, reasoning, local) + https://huggingface.co/blog/olympic-coder-lmstudio + open-source + Thu, 20 Mar 2025 00:00:00 +0000 + + + NVIDIA's GTC 2025 Announcement for Physical AI Developers: New Open Models and Datasets + https://huggingface.co/blog/nvidia-physical-ai + NVIDIA's GTC 2025 Announcement for Physical AI Developers: New Open Models and Datasets (robotics, ai, datasets, community, research) + https://huggingface.co/blog/nvidia-physical-ai + robotics + Tue, 18 Mar 2025 00:00:00 +0000 + + + Welcome Gemma 3: Google's all new multimodal, multilingual, long context open LLM + https://huggingface.co/blog/gemma3 + Welcome Gemma 3: Google's all new multimodal, multilingual, long context open LLM (nlp, llm, vlm, community, research) + https://huggingface.co/blog/gemma3 + nlp + Wed, 12 Mar 2025 00:00:00 +0000 + + + The Open Arabic LLM Leaderboard 2 + https://huggingface.co/blog/leaderboard-arabic-v2 + The Open Arabic LLM Leaderboard 2 (nlp, research, leaderboard, LLM, arabic) + https://huggingface.co/blog/leaderboard-arabic-v2 + nlp + Mon, 10 Feb 2025 00:00:00 +0000 + + + Open-source DeepResearch – Freeing our search agents + https://huggingface.co/blog/open-deep-research + Open-source DeepResearch – Freeing our search agents (llms, agents, research, smolagents) + https://huggingface.co/blog/open-deep-research + llms + Tue, 04 Feb 2025 00:00:00 +0000 + + + DABStep: Data Agent Benchmark for Multi-step Reasoning + https://huggingface.co/blog/dabstep + DABStep: Data Agent Benchmark for Multi-step Reasoning (llms, reasoning, research, evaluation) + https://huggingface.co/blog/dabstep + llms + Tue, 04 Feb 2025 00:00:00 +0000 + + + Open-R1: a fully open reproduction of DeepSeek-R1 + https://huggingface.co/blog/open-r1 + Open-R1: a fully open reproduction of DeepSeek-R1 (llms, reasoning, research) + https://huggingface.co/blog/open-r1 + llms + Tue, 28 Jan 2025 00:00:00 +0000 + + + Controlling Language Model Generation with NVIDIA's LogitsProcessorZoo + https://huggingface.co/blog/logits-processor-zoo + Controlling Language Model Generation with NVIDIA's LogitsProcessorZoo (llm, research, open-source) + https://huggingface.co/blog/logits-processor-zoo + llm + Mon, 23 Dec 2024 00:00:00 +0000 + + + Evaluating Audio Reasoning with Big Bench Audio + https://huggingface.co/blog/big-bench-audio-release + Evaluating Audio Reasoning with Big Bench Audio (leaderboard, research, collaboration, community) + https://huggingface.co/blog/big-bench-audio-release + leaderboard + Fri, 20 Dec 2024 00:00:00 +0000 + + + Finally, a Replacement for BERT: Introducing ModernBERT + https://huggingface.co/blog/modernbert + Finally, a Replacement for BERT: Introducing ModernBERT (nlp, community, research, open-source-collab) + https://huggingface.co/blog/modernbert + nlp + Thu, 19 Dec 2024 00:00:00 +0000 + + + Bamba: Inference-Efficient Hybrid Mamba2 Model + https://huggingface.co/blog/bamba + Bamba: Inference-Efficient Hybrid Mamba2 Model (research, nlp, community, model) + https://huggingface.co/blog/bamba + research + Wed, 18 Dec 2024 00:00:00 +0000 + + + Welcome to the Falcon 3 Family of Open Models! + https://huggingface.co/blog/falcon3 + Welcome to the Falcon 3 Family of Open Models! (nlp, llm, community, research) + https://huggingface.co/blog/falcon3 + nlp + Tue, 17 Dec 2024 00:00:00 +0000 + + + Welcome PaliGemma 2 – New vision language models by Google + https://huggingface.co/blog/paligemma2 + Welcome PaliGemma 2 – New vision language models by Google (multimodal, gemma, LLM, vision, VLM, research) + https://huggingface.co/blog/paligemma2 + multimodal + Thu, 05 Dec 2024 00:00:00 +0000 + + + Rethinking LLM Evaluation with 3C3H: AraGen Benchmark and Leaderboard + https://huggingface.co/blog/leaderboard-3c3h-aragen + Rethinking LLM Evaluation with 3C3H: AraGen Benchmark and Leaderboard (leaderboard, evaluation, nlp, LLM, research) + https://huggingface.co/blog/leaderboard-3c3h-aragen + leaderboard + Wed, 04 Dec 2024 00:00:00 +0000 + + + You could have designed state of the art positional encoding + https://huggingface.co/blog/designing-positional-encoding + You could have designed state of the art positional encoding (research, multimodal, tutorial) + https://huggingface.co/blog/designing-positional-encoding + research + Mon, 25 Nov 2024 00:00:00 +0000 + + + Letting Large Models Debate: The First Multilingual LLM Debate Competition + https://huggingface.co/blog/debate + Letting Large Models Debate: The First Multilingual LLM Debate Competition (community, research, nlp, evaluation, leaderboard, collaboration) + https://huggingface.co/blog/debate + community + Wed, 20 Nov 2024 00:00:00 +0000 + + + Faster Text Generation with Self-Speculative Decoding + https://huggingface.co/blog/layerskip + Faster Text Generation with Self-Speculative Decoding (research, nlp, open-source, collaboration) + https://huggingface.co/blog/layerskip + research + Wed, 20 Nov 2024 00:00:00 +0000 + + + Introducing the Open Leaderboard for Japanese LLMs! + https://huggingface.co/blog/leaderboard-japanese + Introducing the Open Leaderboard for Japanese LLMs! (community, research, nlp, evaluation, leaderboard, collaboration) + https://huggingface.co/blog/leaderboard-japanese + community + Wed, 20 Nov 2024 00:00:00 +0000 + + + Share your open ML datasets on Hugging Face Hub! + https://huggingface.co/blog/researcher-dataset-sharing + Share your open ML datasets on Hugging Face Hub! (community, research, datasets, guide) + https://huggingface.co/blog/researcher-dataset-sharing + community + Tue, 12 Nov 2024 00:00:00 +0000 + + + Universal Assisted Generation: Faster Decoding with Any Assistant Model + https://huggingface.co/blog/universal_assisted_generation + Universal Assisted Generation: Faster Decoding with Any Assistant Model (research, nlp, open-source, collaboration) + https://huggingface.co/blog/universal_assisted_generation + research + Tue, 29 Oct 2024 00:00:00 +0000 + + + Faster Assisted Generation with Dynamic Speculation + https://huggingface.co/blog/dynamic_speculation_lookahead + Faster Assisted Generation with Dynamic Speculation (research, nlp) + https://huggingface.co/blog/dynamic_speculation_lookahead + research + Tue, 08 Oct 2024 00:00:00 +0000 + + + A Short Summary of Chinese AI Global Expansion + https://huggingface.co/blog/chinese-ai-expansion + A Short Summary of Chinese AI Global Expansion (research, community) + https://huggingface.co/blog/chinese-ai-expansion + research + Thu, 03 Oct 2024 00:00:00 +0000 + + + 🇨🇿 BenCzechMark - Can your LLM Understand Czech? + https://huggingface.co/blog/benczechmark + 🇨🇿 BenCzechMark - Can your LLM Understand Czech? (nlp, research, leaderboard, LLM) + https://huggingface.co/blog/benczechmark + nlp + Tue, 01 Oct 2024 00:00:00 +0000 + + + Exploring the Daily Papers Page on Hugging Face + https://huggingface.co/blog/daily-papers + Exploring the Daily Papers Page on Hugging Face (research, community) + https://huggingface.co/blog/daily-papers + research + Mon, 23 Sep 2024 00:00:00 +0000 + + + Fine-tuning LLMs to 1.58bit: extreme quantization made easy + https://huggingface.co/blog/1_58_llm_extreme_quantization + Fine-tuning LLMs to 1.58bit: extreme quantization made easy (nlp, research, community) + https://huggingface.co/blog/1_58_llm_extreme_quantization + nlp + Wed, 18 Sep 2024 00:00:00 +0000 + + + Welcome Falcon Mamba: The first strong attention-free 7B model + https://huggingface.co/blog/falconmamba + Welcome Falcon Mamba: The first strong attention-free 7B model (nlp, community, research, LLM, Mamba) + https://huggingface.co/blog/falconmamba + nlp + Mon, 12 Aug 2024 00:00:00 +0000 + + + Introducing TextImage Augmentation for Document Images + https://huggingface.co/blog/doc_aug_hf_alb + Introducing TextImage Augmentation for Document Images (document ai, data augmentation, synthetic-data, albumentations, research) + https://huggingface.co/blog/doc_aug_hf_alb + document ai + Tue, 06 Aug 2024 00:00:00 +0000 + + + Google releases Gemma 2 2B, ShieldGemma and Gemma Scope + https://huggingface.co/blog/gemma-july-update + Google releases Gemma 2 2B, ShieldGemma and Gemma Scope (nlp, community, research, LLM, gcp) + https://huggingface.co/blog/gemma-july-update + nlp + Wed, 31 Jul 2024 00:00:00 +0000 + + + LAVE: Zero-shot VQA Evaluation on Docmatix with LLMs - Do We Still Need Fine-Tuning? + https://huggingface.co/blog/zero-shot-vqa-docmatix + LAVE: Zero-shot VQA Evaluation on Docmatix with LLMs - Do We Still Need Fine-Tuning? (community, evaluation, synthetic-data, vqa, vlm, zero-shot, research) + https://huggingface.co/blog/zero-shot-vqa-docmatix + community + Thu, 25 Jul 2024 00:00:00 +0000 + + + Llama 3.1 - 405B, 70B & 8B with multilinguality and long context + https://huggingface.co/blog/llama31 + Llama 3.1 - 405B, 70B & 8B with multilinguality and long context (nlp, community, research, LLM) + https://huggingface.co/blog/llama31 + nlp + Tue, 23 Jul 2024 00:00:00 +0000 + + + Docmatix - a huge dataset for Document Visual Question Answering + https://huggingface.co/blog/docmatix + Docmatix - a huge dataset for Document Visual Question Answering (community, datasets, synthetic-data, open-source, cv, vlm, announcement, research) + https://huggingface.co/blog/docmatix + community + Thu, 18 Jul 2024 00:00:00 +0000 + + + SmolLM - blazingly fast and remarkably powerful + https://huggingface.co/blog/smollm + SmolLM - blazingly fast and remarkably powerful (llm, nlp, synthetic-data, research, datasets, community) + https://huggingface.co/blog/smollm + llm + Tue, 16 Jul 2024 00:00:00 +0000 + + + How NuminaMath Won the 1st AIMO Progress Prize + https://huggingface.co/blog/winning-aimo-progress-prize + How NuminaMath Won the 1st AIMO Progress Prize (ai4math, nlp, community, research, leaderboard, open-science-collab) + https://huggingface.co/blog/winning-aimo-progress-prize + ai4math + Thu, 11 Jul 2024 00:00:00 +0000 + + + Our Transformers Code Agent beats the GAIA benchmark 🏅 + https://huggingface.co/blog/beating-gaia + Our Transformers Code Agent beats the GAIA benchmark 🏅 (agents, smolagents, nlp, community, research, leaderboard) + https://huggingface.co/blog/beating-gaia + agents + Mon, 01 Jul 2024 00:00:00 +0000 + + + Welcome Gemma 2 - Google’s new open LLM + https://huggingface.co/blog/gemma2 + Welcome Gemma 2 - Google’s new open LLM (nlp, community, research, LLM, gcp) + https://huggingface.co/blog/gemma2 + nlp + Thu, 27 Jun 2024 00:00:00 +0000 + + + Fine-tuning Florence-2 - Microsoft's Cutting-edge Vision Language Models + https://huggingface.co/blog/finetune-florence2 + Fine-tuning Florence-2 - Microsoft's Cutting-edge Vision Language Models (collaboration, community, open-source, research) + https://huggingface.co/blog/finetune-florence2 + collaboration + Mon, 24 Jun 2024 00:00:00 +0000 + + + BigCodeBench: The Next Generation of HumanEval + https://huggingface.co/blog/leaderboard-bigcodebench + BigCodeBench: The Next Generation of HumanEval (leaderboard, research, collaboration, community) + https://huggingface.co/blog/leaderboard-bigcodebench + leaderboard + Tue, 18 Jun 2024 00:00:00 +0000 + + + From DeepSpeed to FSDP and Back Again with Hugging Face Accelerate + https://huggingface.co/blog/deepspeed-to-fsdp-and-back + From DeepSpeed to FSDP and Back Again with Hugging Face Accelerate (open-source, guide, research, collaboration) + https://huggingface.co/blog/deepspeed-to-fsdp-and-back + open-source + Thu, 13 Jun 2024 00:00:00 +0000 + + + Putting RL back in RLHF + https://huggingface.co/blog/putting_rl_back_in_rlhf_with_rloo + Putting RL back in RLHF (research, rl, rlhf) + https://huggingface.co/blog/putting_rl_back_in_rlhf_with_rloo + research + Wed, 12 Jun 2024 00:00:00 +0000 + + + Launching the Artificial Analysis Text to Image Leaderboard & Arena + https://huggingface.co/blog/leaderboard-artificial-analysis2 + Launching the Artificial Analysis Text to Image Leaderboard & Arena (leaderboard, research, collaboration, community) + https://huggingface.co/blog/leaderboard-artificial-analysis2 + leaderboard + Thu, 06 Jun 2024 00:00:00 +0000 + + + Falcon 2: An 11B parameter pretrained language model and VLM, trained on over 5000B tokens and 11 languages + https://huggingface.co/blog/falcon2-11b + Falcon 2: An 11B parameter pretrained language model and VLM, trained on over 5000B tokens and 11 languages (nlp, community, research, LLM, multimodal, vision, open-source) + https://huggingface.co/blog/falcon2-11b + nlp + Fri, 24 May 2024 00:00:00 +0000 + + + CyberSecEval 2 - A Comprehensive Evaluation Framework for Cybersecurity Risks and Capabilities of Large Language Models + https://huggingface.co/blog/leaderboard-llamaguard + CyberSecEval 2 - A Comprehensive Evaluation Framework for Cybersecurity Risks and Capabilities of Large Language Models (nlp, research, leaderboard, LLM) + https://huggingface.co/blog/leaderboard-llamaguard + nlp + Fri, 24 May 2024 00:00:00 +0000 + + + Introducing the Open Arabic LLM Leaderboard + https://huggingface.co/blog/leaderboard-arabic + Introducing the Open Arabic LLM Leaderboard (nlp, research, leaderboard, LLM) + https://huggingface.co/blog/leaderboard-arabic + nlp + Tue, 14 May 2024 00:00:00 +0000 + + + Introducing the Open Leaderboard for Hebrew LLMs! + https://huggingface.co/blog/leaderboard-hebrew + Introducing the Open Leaderboard for Hebrew LLMs! (nlp, research, leaderboard, LLM) + https://huggingface.co/blog/leaderboard-hebrew + nlp + Sun, 05 May 2024 00:00:00 +0000 + + + Bringing the Artificial Analysis LLM Performance Leaderboard to Hugging Face + https://huggingface.co/blog/leaderboard-artificial-analysis + Bringing the Artificial Analysis LLM Performance Leaderboard to Hugging Face (leaderboard, research, collaboration, community) + https://huggingface.co/blog/leaderboard-artificial-analysis + leaderboard + Fri, 03 May 2024 00:00:00 +0000 + + + Improving Prompt Consistency with Structured Generations + https://huggingface.co/blog/evaluation-structured-outputs + Improving Prompt Consistency with Structured Generations (evaluation, collaboration, research, leaderboard) + https://huggingface.co/blog/evaluation-structured-outputs + evaluation + Tue, 30 Apr 2024 00:00:00 +0000 + + + StarCoder2-Instruct: Fully Transparent and Permissive Self-Alignment for Code Generation + https://huggingface.co/blog/sc2-instruct + StarCoder2-Instruct: Fully Transparent and Permissive Self-Alignment for Code Generation (nlp, community, research, LLM) + https://huggingface.co/blog/sc2-instruct + nlp + Mon, 29 Apr 2024 00:00:00 +0000 + + + Introducing the Open Chain of Thought Leaderboard + https://huggingface.co/blog/leaderboard-cot + Introducing the Open Chain of Thought Leaderboard (leaderboard, research, collaboration, community) + https://huggingface.co/blog/leaderboard-cot + leaderboard + Tue, 23 Apr 2024 00:00:00 +0000 + + + The Open Medical-LLM Leaderboard: Benchmarking Large Language Models in Healthcare + https://huggingface.co/blog/leaderboard-medicalllm + The Open Medical-LLM Leaderboard: Benchmarking Large Language Models in Healthcare (leaderboard, collaboration, research) + https://huggingface.co/blog/leaderboard-medicalllm + leaderboard + Fri, 19 Apr 2024 00:00:00 +0000 + + + Welcome Llama 3 - Meta's new open LLM + https://huggingface.co/blog/llama3 + Welcome Llama 3 - Meta's new open LLM (nlp, community, research, LLM) + https://huggingface.co/blog/llama3 + nlp + Thu, 18 Apr 2024 00:00:00 +0000 + + + Introducing the LiveCodeBench Leaderboard - Holistic and Contamination-Free Evaluation of Code LLMs + https://huggingface.co/blog/leaderboard-livecodebench + Introducing the LiveCodeBench Leaderboard - Holistic and Contamination-Free Evaluation of Code LLMs (leaderboard, research, collaboration, community) + https://huggingface.co/blog/leaderboard-livecodebench + leaderboard + Tue, 16 Apr 2024 00:00:00 +0000 + + + Running Privacy-Preserving Inferences on Hugging Face Endpoints + https://huggingface.co/blog/fhe-endpoints + Running Privacy-Preserving Inferences on Hugging Face Endpoints (guide, privacy, research, FHE) + https://huggingface.co/blog/fhe-endpoints + guide + Tue, 16 Apr 2024 00:00:00 +0000 + + + Introducing Idefics2: A Powerful 8B Vision-Language Model for the community + https://huggingface.co/blog/idefics2 + Introducing Idefics2: A Powerful 8B Vision-Language Model for the community (research, nlp, cv, vlm, multimodal) + https://huggingface.co/blog/idefics2 + research + Mon, 15 Apr 2024 00:00:00 +0000 + + + CodeGemma - an official Google release for code LLMs + https://huggingface.co/blog/codegemma + CodeGemma - an official Google release for code LLMs (nlp, community, research, LLM, gcp) + https://huggingface.co/blog/codegemma + nlp + Tue, 09 Apr 2024 00:00:00 +0000 + + + Binary and Scalar Embedding Quantization for Significantly Faster & Cheaper Retrieval + https://huggingface.co/blog/embedding-quantization + Binary and Scalar Embedding Quantization for Significantly Faster & Cheaper Retrieval (nlp, community, guide, collaboration, research) + https://huggingface.co/blog/embedding-quantization + nlp + Fri, 22 Mar 2024 00:00:00 +0000 + + + Unlocking the conversion of Web Screenshots into HTML Code with the WebSight Dataset + https://huggingface.co/blog/websight + Unlocking the conversion of Web Screenshots into HTML Code with the WebSight Dataset (nlp, cv, data, research) + https://huggingface.co/blog/websight + nlp + Fri, 15 Mar 2024 00:00:00 +0000 + + + Introducing ConTextual: How well can your Multimodal model jointly reason over text and image in text-rich scenes? + https://huggingface.co/blog/leaderboard-contextual + Introducing ConTextual: How well can your Multimodal model jointly reason over text and image in text-rich scenes? (leaderboard, collaboration, research) + https://huggingface.co/blog/leaderboard-contextual + leaderboard + Tue, 05 Mar 2024 00:00:00 +0000 + + + StarCoder2 and The Stack v2 + https://huggingface.co/blog/starcoder2 + StarCoder2 and The Stack v2 (nlp, community, research, LLM) + https://huggingface.co/blog/starcoder2 + nlp + Wed, 28 Feb 2024 00:00:00 +0000 + + + AI Watermarking 101: Tools and Techniques + https://huggingface.co/blog/watermarking + AI Watermarking 101: Tools and Techniques (ethics, research, nlp, guide) + https://huggingface.co/blog/watermarking + ethics + Mon, 26 Feb 2024 00:00:00 +0000 + + + Fine-Tuning Gemma Models in Hugging Face + https://huggingface.co/blog/gemma-peft + Fine-Tuning Gemma Models in Hugging Face (nlp, community, research, LLM, gcp, peft) + https://huggingface.co/blog/gemma-peft + nlp + Fri, 23 Feb 2024 00:00:00 +0000 + + + Welcome Gemma - Google’s new open LLM + https://huggingface.co/blog/gemma + Welcome Gemma - Google’s new open LLM (nlp, community, research, LLM, gcp) + https://huggingface.co/blog/gemma + nlp + Wed, 21 Feb 2024 00:00:00 +0000 + + + NPHardEval Leaderboard: Unveiling the Reasoning Abilities of Large Language Models through Complexity Classes and Dynamic Updates + https://huggingface.co/blog/leaderboard-nphardeval + NPHardEval Leaderboard: Unveiling the Reasoning Abilities of Large Language Models through Complexity Classes and Dynamic Updates (leaderboard, guide, collaboration, research) + https://huggingface.co/blog/leaderboard-nphardeval + leaderboard + Fri, 02 Feb 2024 00:00:00 +0000 + + + Constitutional AI with Open LLMs + https://huggingface.co/blog/constitutional_ai + Constitutional AI with Open LLMs (research, rl, rlhf, constitutional-ai) + https://huggingface.co/blog/constitutional_ai + research + Thu, 01 Feb 2024 00:00:00 +0000 + + + Patch Time Series Transformer in Hugging Face + https://huggingface.co/blog/patchtst + Patch Time Series Transformer in Hugging Face (guide, research, time-series) + https://huggingface.co/blog/patchtst + guide + Thu, 01 Feb 2024 00:00:00 +0000 + + + The Hallucinations Leaderboard, an Open Effort to Measure Hallucinations in Large Language Models + https://huggingface.co/blog/leaderboard-hallucinations + The Hallucinations Leaderboard, an Open Effort to Measure Hallucinations in Large Language Models (leaderboard, guide, collaboration, research) + https://huggingface.co/blog/leaderboard-hallucinations + leaderboard + Mon, 29 Jan 2024 00:00:00 +0000 + + + An Introduction to AI Secure LLM Safety Leaderboard + https://huggingface.co/blog/leaderboard-decodingtrust + An Introduction to AI Secure LLM Safety Leaderboard (leaderboard, guide, collaboration, research) + https://huggingface.co/blog/leaderboard-decodingtrust + leaderboard + Fri, 26 Jan 2024 00:00:00 +0000 + + + PatchTSMixer in HuggingFace + https://huggingface.co/blog/patchtsmixer + PatchTSMixer in HuggingFace (guide, research, time-series) + https://huggingface.co/blog/patchtsmixer + guide + Fri, 19 Jan 2024 00:00:00 +0000 + + + Preference Tuning LLMs with Direct Preference Optimization Methods + https://huggingface.co/blog/pref-tuning + Preference Tuning LLMs with Direct Preference Optimization Methods (rl, rlhf, nlp, research) + https://huggingface.co/blog/pref-tuning + rl + Thu, 18 Jan 2024 00:00:00 +0000 + + + Welcome aMUSEd: Efficient Text-to-Image Generation + https://huggingface.co/blog/amused + Welcome aMUSEd: Efficient Text-to-Image Generation (guide, vision, research, diffusers) + https://huggingface.co/blog/amused + guide + Thu, 04 Jan 2024 00:00:00 +0000 + + + 2023, year of open LLMs + https://huggingface.co/blog/2023-in-llms + 2023, year of open LLMs (research, nlp, llm, guide) + https://huggingface.co/blog/2023-in-llms + research + Mon, 18 Dec 2023 00:00:00 +0000 + + + SetFitABSA: Few-Shot Aspect Based Sentiment Analysis using SetFit + https://huggingface.co/blog/setfit-absa + SetFitABSA: Few-Shot Aspect Based Sentiment Analysis using SetFit (research, nlp) + https://huggingface.co/blog/setfit-absa + research + Wed, 06 Dec 2023 00:00:00 +0000 + + + Open LLM Leaderboard: DROP deep dive + https://huggingface.co/blog/open-llm-leaderboard-drop + Open LLM Leaderboard: DROP deep dive (community, research, nlp, evaluation, open-llm-leaderboard, leaderboard) + https://huggingface.co/blog/open-llm-leaderboard-drop + community + Fri, 01 Dec 2023 00:00:00 +0000 + + + The N Implementation Details of RLHF with PPO + https://huggingface.co/blog/the_n_implementation_details_of_rlhf_with_ppo + The N Implementation Details of RLHF with PPO (research, rl, rlhf) + https://huggingface.co/blog/the_n_implementation_details_of_rlhf_with_ppo + research + Tue, 24 Oct 2023 00:00:00 +0000 + + + Optimizing your LLM in production + https://huggingface.co/blog/optimize-llm + Optimizing your LLM in production (nlp, research, LLM) + https://huggingface.co/blog/optimize-llm + nlp + Fri, 15 Sep 2023 00:00:00 +0000 + + + Spread Your Wings: Falcon 180B is here + https://huggingface.co/blog/falcon-180b + Spread Your Wings: Falcon 180B is here (nlp, community, research, LLM) + https://huggingface.co/blog/falcon-180b + nlp + Wed, 06 Sep 2023 00:00:00 +0000 + + + Code Llama: Llama 2 learns to code + https://huggingface.co/blog/codellama + Code Llama: Llama 2 learns to code (nlp, community, research, LLM) + https://huggingface.co/blog/codellama + nlp + Fri, 25 Aug 2023 00:00:00 +0000 + + + Introducing IDEFICS: An Open Reproduction of State-of-the-art Visual Langage Model + https://huggingface.co/blog/idefics + Introducing IDEFICS: An Open Reproduction of State-of-the-art Visual Langage Model (research, nlp, cv) + https://huggingface.co/blog/idefics + research + Tue, 22 Aug 2023 00:00:00 +0000 + + + Huggy Lingo: Using Machine Learning to Improve Language Metadata on the Hugging Face Hub + https://huggingface.co/blog/huggy-lingo + Huggy Lingo: Using Machine Learning to Improve Language Metadata on the Hugging Face Hub (announcement, research) + https://huggingface.co/blog/huggy-lingo + announcement + Wed, 02 Aug 2023 00:00:00 +0000 + + + Towards Encrypted Large Language Models with FHE + https://huggingface.co/blog/encrypted-llm + Towards Encrypted Large Language Models with FHE (guide, privacy, research, FHE, llm) + https://huggingface.co/blog/encrypted-llm + guide + Wed, 02 Aug 2023 00:00:00 +0000 + + + Open-sourcing Knowledge Distillation Code and Weights of SD-Small and SD-Tiny + https://huggingface.co/blog/sd_distillation + Open-sourcing Knowledge Distillation Code and Weights of SD-Small and SD-Tiny (stable-diffusion, research, diffusers) + https://huggingface.co/blog/sd_distillation + stable-diffusion + Tue, 01 Aug 2023 00:00:00 +0000 + + + Llama 2 is here - get it on Hugging Face + https://huggingface.co/blog/llama2 + Llama 2 is here - get it on Hugging Face (nlp, community, research, LLM) + https://huggingface.co/blog/llama2 + nlp + Tue, 18 Jul 2023 00:00:00 +0000 + + + What's going on with the Open LLM Leaderboard? + https://huggingface.co/blog/open-llm-leaderboard-mmlu + What's going on with the Open LLM Leaderboard? (community, research, nlp, evaluation, open-llm-leaderboard, leaderboard) + https://huggingface.co/blog/open-llm-leaderboard-mmlu + community + Fri, 23 Jun 2023 00:00:00 +0000 + + + Fine-Tune MMS Adapter Models for low-resource ASR + https://huggingface.co/blog/mms_adapters + Fine-Tune MMS Adapter Models for low-resource ASR (audio, research) + https://huggingface.co/blog/mms_adapters + audio + Mon, 19 Jun 2023 00:00:00 +0000 + + + Yes, Transformers are Effective for Time Series Forecasting (+ Autoformer) + https://huggingface.co/blog/autoformer + Yes, Transformers are Effective for Time Series Forecasting (+ Autoformer) (guide, research, time-series) + https://huggingface.co/blog/autoformer + guide + Fri, 16 Jun 2023 00:00:00 +0000 + + + The Falcon has landed in the Hugging Face ecosystem + https://huggingface.co/blog/falcon + The Falcon has landed in the Hugging Face ecosystem (nlp, community, research) + https://huggingface.co/blog/falcon + nlp + Mon, 05 Jun 2023 00:00:00 +0000 + + + Instruction-tuning Stable Diffusion with InstructPix2Pix + https://huggingface.co/blog/instruction-tuning-sd + Instruction-tuning Stable Diffusion with InstructPix2Pix (diffusers, diffusion, instruction-tuning, research, guide) + https://huggingface.co/blog/instruction-tuning-sd + diffusers + Tue, 23 May 2023 00:00:00 +0000 + + + Introducing RWKV - An RNN with the advantages of a transformer + https://huggingface.co/blog/rwkv + Introducing RWKV - An RNN with the advantages of a transformer (nlp, community, research) + https://huggingface.co/blog/rwkv + nlp + Mon, 15 May 2023 00:00:00 +0000 + + + Assisted Generation: a new direction toward low-latency text generation + https://huggingface.co/blog/assisted-generation + Assisted Generation: a new direction toward low-latency text generation (nlp, research) + https://huggingface.co/blog/assisted-generation + nlp + Thu, 11 May 2023 00:00:00 +0000 + + + Creating a Coding Assistant with StarCoder + https://huggingface.co/blog/starchat-alpha + Creating a Coding Assistant with StarCoder (nlp, community, research) + https://huggingface.co/blog/starchat-alpha + nlp + Tue, 09 May 2023 00:00:00 +0000 + + + StarCoder: A State-of-the-Art LLM for Code + https://huggingface.co/blog/starcoder + StarCoder: A State-of-the-Art LLM for Code (nlp, community, research) + https://huggingface.co/blog/starcoder + nlp + Thu, 04 May 2023 00:00:00 +0000 + + + Multivariate Probabilistic Time Series Forecasting with Informer + https://huggingface.co/blog/informer + Multivariate Probabilistic Time Series Forecasting with Informer (guide, research, time-series) + https://huggingface.co/blog/informer + guide + Fri, 10 Mar 2023 00:00:00 +0000 + + + Model Cards + https://huggingface.co/blog/model-cards + Model Cards (community, research, ethics, guide) + https://huggingface.co/blog/model-cards + community + Tue, 20 Dec 2022 00:00:00 +0000 + + + Probabilistic Time Series Forecasting with 🤗 Transformers + https://huggingface.co/blog/time-series-transformers + Probabilistic Time Series Forecasting with 🤗 Transformers (research, time-series) + https://huggingface.co/blog/time-series-transformers + research + Thu, 01 Dec 2022 00:00:00 +0000 + + + Director of Machine Learning Insights [Part 4] + https://huggingface.co/blog/ml-director-insights-4 + Director of Machine Learning Insights [Part 4] (community, research) + https://huggingface.co/blog/ml-director-insights-4 + community + Wed, 23 Nov 2022 00:00:00 +0000 + + + Hugging Face Machine Learning Demos on arXiv + https://huggingface.co/blog/arxiv + Hugging Face Machine Learning Demos on arXiv (research, community) + https://huggingface.co/blog/arxiv + research + Thu, 17 Nov 2022 00:00:00 +0000 + + + Sentiment Analysis on Encrypted Data with Homomorphic Encryption + https://huggingface.co/blog/sentiment-analysis-fhe + Sentiment Analysis on Encrypted Data with Homomorphic Encryption (guide, privacy, research, FHE) + https://huggingface.co/blog/sentiment-analysis-fhe + guide + Thu, 17 Nov 2022 00:00:00 +0000 + + + Generating Human-level Text with Contrastive Search in Transformers 🤗 + https://huggingface.co/blog/introducing-csearch + Generating Human-level Text with Contrastive Search in Transformers 🤗 (nlp, text generation, research) + https://huggingface.co/blog/introducing-csearch + nlp + Tue, 08 Nov 2022 00:00:00 +0000 + + + Evaluating Language Model Bias with 🤗 Evaluate + https://huggingface.co/blog/evaluating-llm-bias + Evaluating Language Model Bias with 🤗 Evaluate (ethics, research, nlp) + https://huggingface.co/blog/evaluating-llm-bias + ethics + Mon, 24 Oct 2022 00:00:00 +0000 + + + From PyTorch DDP to Accelerate to Trainer, mastery of distributed training with ease + https://huggingface.co/blog/pytorch-ddp-accelerate-transformers + From PyTorch DDP to Accelerate to Trainer, mastery of distributed training with ease (guide, research, open-source-collab) + https://huggingface.co/blog/pytorch-ddp-accelerate-transformers + guide + Fri, 21 Oct 2022 00:00:00 +0000 + + + MTEB: Massive Text Embedding Benchmark + https://huggingface.co/blog/mteb + MTEB: Massive Text Embedding Benchmark (nlp, research, llm) + https://huggingface.co/blog/mteb + nlp + Wed, 19 Oct 2022 00:00:00 +0000 + + + Optimization story: Bloom inference + https://huggingface.co/blog/bloom-inference-optimization + Optimization story: Bloom inference (open-source-collab, community, research) + https://huggingface.co/blog/bloom-inference-optimization + open-source-collab + Wed, 12 Oct 2022 00:00:00 +0000 + + + Very Large Language Models and How to Evaluate Them + https://huggingface.co/blog/zero-shot-eval-on-the-hub + Very Large Language Models and How to Evaluate Them (autotrain, research, nlp) + https://huggingface.co/blog/zero-shot-eval-on-the-hub + autotrain + Mon, 03 Oct 2022 00:00:00 +0000 + + + How 🤗 Accelerate runs very large models thanks to PyTorch + https://huggingface.co/blog/accelerate-large-models + How 🤗 Accelerate runs very large models thanks to PyTorch (guide, research, open-source-collab) + https://huggingface.co/blog/accelerate-large-models + guide + Tue, 27 Sep 2022 00:00:00 +0000 + + + SetFit: Efficient Few-Shot Learning Without Prompts + https://huggingface.co/blog/setfit + SetFit: Efficient Few-Shot Learning Without Prompts (research, nlp) + https://huggingface.co/blog/setfit + research + Mon, 26 Sep 2022 00:00:00 +0000 + + + Visualize proteins on Hugging Face Spaces + https://huggingface.co/blog/spaces_3dmoljs + Visualize proteins on Hugging Face Spaces (research) + https://huggingface.co/blog/spaces_3dmoljs + research + Wed, 24 Aug 2022 00:00:00 +0000 + + + Nyströmformer: Approximating self-attention in linear time and memory via the Nyström method + https://huggingface.co/blog/nystromformer + Nyströmformer: Approximating self-attention in linear time and memory via the Nyström method (research, nlp) + https://huggingface.co/blog/nystromformer + research + Tue, 02 Aug 2022 00:00:00 +0000 + + + Introducing The World's Largest Open Multilingual Language Model: BLOOM + https://huggingface.co/blog/bloom + Introducing The World's Largest Open Multilingual Language Model: BLOOM (open-source-collab, community, research) + https://huggingface.co/blog/bloom + open-source-collab + Tue, 12 Jul 2022 00:00:00 +0000 + + + Director of Machine Learning Insights [Part 3: Finance Edition] + https://huggingface.co/blog/ml-director-insights-3 + Director of Machine Learning Insights [Part 3: Finance Edition] (community, research) + https://huggingface.co/blog/ml-director-insights-3 + community + Tue, 14 Jun 2022 00:00:00 +0000 + + + Efficient Table Pre-training without Real Data: An Introduction to TAPEX + https://huggingface.co/blog/tapex + Efficient Table Pre-training without Real Data: An Introduction to TAPEX (research, nlp, community) + https://huggingface.co/blog/tapex + research + Mon, 23 May 2022 00:00:00 +0000 + + + Putting ethical principles at the core of the research lifecycle + https://huggingface.co/blog/ethical-charter-multimodal + Putting ethical principles at the core of the research lifecycle (research, nlp, audio, cv) + https://huggingface.co/blog/ethical-charter-multimodal + research + Thu, 19 May 2022 00:00:00 +0000 + + + Director of Machine Learning Insights [Part 2: SaaS Edition] + https://huggingface.co/blog/ml-director-insights-2 + Director of Machine Learning Insights [Part 2: SaaS Edition] (community, research) + https://huggingface.co/blog/ml-director-insights-2 + community + Fri, 13 May 2022 00:00:00 +0000 + + + Director of Machine Learning Insights + https://huggingface.co/blog/ml-director-insights + Director of Machine Learning Insights (community, research) + https://huggingface.co/blog/ml-director-insights + community + Wed, 27 Apr 2022 00:00:00 +0000 + + + Announcing the 🤗 AI Research Residency Program + https://huggingface.co/blog/ai-residency + Announcing the 🤗 AI Research Residency Program (community, research) + https://huggingface.co/blog/ai-residency + community + Tue, 22 Mar 2022 00:00:00 +0000 + + + Making automatic speech recognition work on large files with Wav2Vec2 in 🤗 Transformers + https://huggingface.co/blog/asr-chunking + Making automatic speech recognition work on large files with Wav2Vec2 in 🤗 Transformers (guide, research, audio) + https://huggingface.co/blog/asr-chunking + guide + Tue, 01 Feb 2022 00:00:00 +0000 + + + Boosting Wav2Vec2 with n-grams in 🤗 Transformers + https://huggingface.co/blog/wav2vec2-with-ngram + Boosting Wav2Vec2 with n-grams in 🤗 Transformers (research, guide, audio) + https://huggingface.co/blog/wav2vec2-with-ngram + research + Wed, 12 Jan 2022 00:00:00 +0000 + + + Active Learning with AutoNLP and Prodigy + https://huggingface.co/blog/autonlp-prodigy + Active Learning with AutoNLP and Prodigy (research, partnerships, nlp) + https://huggingface.co/blog/autonlp-prodigy + research + Thu, 23 Dec 2021 00:00:00 +0000 + + + Perceiver IO: a scalable, fully-attentional model that works on any modality + https://huggingface.co/blog/perceiver + Perceiver IO: a scalable, fully-attentional model that works on any modality (research, guide, nlp, audio, cv) + https://huggingface.co/blog/perceiver + research + Wed, 15 Dec 2021 00:00:00 +0000 + + + Training CodeParrot 🦜 from Scratch + https://huggingface.co/blog/codeparrot + Training CodeParrot 🦜 from Scratch (guide, research, nlp) + https://huggingface.co/blog/codeparrot + guide + Wed, 08 Dec 2021 00:00:00 +0000 + + + Introducing Snowball Fight ☃️, our first ML-Agents environment + https://huggingface.co/blog/snowball-fight + Introducing Snowball Fight ☃️, our first ML-Agents environment (research, rl) + https://huggingface.co/blog/snowball-fight + research + Thu, 02 Dec 2021 00:00:00 +0000 + + + Introducing the Data Measurements Tool: an Interactive Tool for Looking at Datasets + https://huggingface.co/blog/data-measurements-tool + Introducing the Data Measurements Tool: an Interactive Tool for Looking at Datasets (research) + https://huggingface.co/blog/data-measurements-tool + research + Mon, 29 Nov 2021 00:00:00 +0000 + + + Deep Learning over the Internet: Training Language Models Collaboratively + https://huggingface.co/blog/collaborative-training + Deep Learning over the Internet: Training Language Models Collaboratively (research) + https://huggingface.co/blog/collaborative-training + research + Thu, 15 Jul 2021 00:00:00 +0000 + + + Understanding BigBird's Block Sparse Attention + https://huggingface.co/blog/big-bird + Understanding BigBird's Block Sparse Attention (community, research, nlp) + https://huggingface.co/blog/big-bird + community + Wed, 31 Mar 2021 00:00:00 +0000 + + + Hugging Face Reads, Feb. 2021 - Long-range Transformers + https://huggingface.co/blog/long-range-transformers + Hugging Face Reads, Feb. 2021 - Long-range Transformers (research, nlp) + https://huggingface.co/blog/long-range-transformers + research + Tue, 09 Mar 2021 00:00:00 +0000 + + + Transformer-based Encoder-Decoder Models + https://huggingface.co/blog/encoder-decoder + Transformer-based Encoder-Decoder Models (research, nlp) + https://huggingface.co/blog/encoder-decoder + research + Sat, 10 Oct 2020 00:00:00 +0000 + + + Block Sparse Matrices for Smaller and Faster Language Models + https://huggingface.co/blog/pytorch_block_sparse + Block Sparse Matrices for Smaller and Faster Language Models (research, nlp) + https://huggingface.co/blog/pytorch_block_sparse + research + Thu, 10 Sep 2020 00:00:00 +0000 + + + The Reformer - Pushing the limits of language modeling + https://huggingface.co/blog/reformer + The Reformer - Pushing the limits of language modeling (research, nlp) + https://huggingface.co/blog/reformer + research + Fri, 03 Jul 2020 00:00:00 +0000 + + + diff --git a/makefiles/feeds.mk b/makefiles/feeds.mk index 8455e95171..646c6d153f 100644 --- a/makefiles/feeds.mk +++ b/makefiles/feeds.mk @@ -178,6 +178,34 @@ feeds_groq: ## Generate RSS feed for Groq Blog $(Q)uv run feed_generators/groq_blog.py $(call print_success,Groq Blog feed generated) +.PHONY: feeds_huggingface_ethics +feeds_huggingface_ethics: ## Generate RSS feed for Hugging Face Blog (Ethics) + $(call check_venv) + $(call print_info,Generating Hugging Face ethics feed) + $(Q)uv run feed_generators/huggingface_ethics_blog.py + $(call print_success,Hugging Face ethics feed generated) + +.PHONY: feeds_huggingface_ethics_full +feeds_huggingface_ethics_full: ## Generate RSS feed for Hugging Face Blog (Ethics, full reset) + $(call check_venv) + $(call print_info,Generating Hugging Face ethics feed - FULL RESET) + $(Q)uv run feed_generators/huggingface_ethics_blog.py --full + $(call print_success,Hugging Face ethics feed generated - full reset) + +.PHONY: feeds_huggingface_research +feeds_huggingface_research: ## Generate RSS feed for Hugging Face Blog (Research) + $(call check_venv) + $(call print_info,Generating Hugging Face research feed) + $(Q)uv run feed_generators/huggingface_research_blog.py + $(call print_success,Hugging Face research feed generated) + +.PHONY: feeds_huggingface_research_full +feeds_huggingface_research_full: ## Generate RSS feed for Hugging Face Blog (Research, full reset) + $(call check_venv) + $(call print_info,Generating Hugging Face research feed - FULL RESET) + $(Q)uv run feed_generators/huggingface_research_blog.py --full + $(call print_success,Hugging Face research feed generated - full reset) + .PHONY: feeds_meta_ai feeds_meta_ai: ## Generate RSS feed for AI at Meta Blog (incremental) $(call check_venv) From 53d0fbab3a1ee4b6d121d20796570b5cd13fe4d2 Mon Sep 17 00:00:00 2001 From: "Scott N." Date: Thu, 25 Jun 2026 14:05:22 +1000 Subject: [PATCH 2/3] Fix GitHub Actions push permissions for fork feed updates. Grant contents:write to feed workflows and set RSS_REPO_SLUG from the current repository so the bot can push regenerated feeds on forks. Co-authored-by: Cursor --- .github/workflows/run_feeds.yml | 7 ++++++- .github/workflows/run_selenium_feeds.yml | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run_feeds.yml b/.github/workflows/run_feeds.yml index 7a73b6da8b..35dbc6be86 100644 --- a/.github/workflows/run_feeds.yml +++ b/.github/workflows/run_feeds.yml @@ -9,10 +9,15 @@ concurrency: group: request-feeds cancel-in-progress: true +permissions: + contents: write + jobs: run-feeds: runs-on: ubuntu-latest timeout-minutes: 30 + env: + RSS_REPO_SLUG: ${{ github.repository }} steps: - name: Checkout repository @@ -49,5 +54,5 @@ jobs: echo "No changes to commit" else git commit -m 'Update RSS feeds' - git push || { git pull --rebase && git push; } + git push origin HEAD:main || { git pull --rebase origin main && git push origin HEAD:main; } fi diff --git a/.github/workflows/run_selenium_feeds.yml b/.github/workflows/run_selenium_feeds.yml index 5d8f21b863..b260cfeb35 100644 --- a/.github/workflows/run_selenium_feeds.yml +++ b/.github/workflows/run_selenium_feeds.yml @@ -9,10 +9,15 @@ concurrency: group: selenium-feeds cancel-in-progress: true +permissions: + contents: write + jobs: run-selenium-feeds: runs-on: ubuntu-latest timeout-minutes: 60 + env: + RSS_REPO_SLUG: ${{ github.repository }} steps: - name: Checkout repository @@ -55,5 +60,5 @@ jobs: echo "No changes to commit" else git commit -m 'Update RSS feeds (Selenium)' - git push || { git pull --rebase && git push; } + git push origin HEAD:main || { git pull --rebase origin main && git push origin HEAD:main; } fi From 91ad957c815cc678673fb338fce609e140266dd5 Mon Sep 17 00:00:00 2001 From: "Scott N." Date: Thu, 25 Jun 2026 19:44:25 +1000 Subject: [PATCH 3/3] Add Stanford HAI News feed generator. Integrate a new Stanford HAI news feed by adding a generator, registering it in the feed registry and Makefile, and documenting the new feed URL in README. Co-authored-by: Cursor --- README.md | 1 + feed_generators/stanford_hai_news_blog.py | 164 ++++++++++++++++++++++ feeds.yaml | 5 + feeds/feed_stanford_hai_news.xml | 53 +++++++ makefiles/feeds.mk | 7 + 5 files changed, 230 insertions(+) create mode 100644 feed_generators/stanford_hai_news_blog.py create mode 100644 feeds/feed_stanford_hai_news.xml diff --git a/README.md b/README.md index 7450ab2939..f38fa1e77c 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ Scraped feeds are generated hourly. "Official RSS" rows point to native feeds th | [Perplexity Hub](https://www.perplexity.ai/hub) | [feed_perplexity_hub.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_perplexity_hub.xml) | | [Pinecone Blog](https://www.pinecone.io/blog/) | [feed_pinecone.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_pinecone.xml) | | [Simon Willison's Blog (Tools)](https://simonwillison.net/) | [Official RSS](https://simonwillison.net/atom/beats/tool/) | +| [Stanford HAI News](https://hai.stanford.edu/news) | [feed_stanford_hai_news.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_stanford_hai_news.xml) | | [Supabase Blog](https://supabase.com/blog) | [Official RSS](https://supabase.com/rss.xml) | | [Surge AI Blog](https://www.surgehq.ai/blog) | [feed_blogsurgeai.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_blogsurgeai.xml) | | [The Batch by DeepLearning.AI](https://www.deeplearning.ai/the-batch/) | [feed_the_batch.xml](https://raw.githubusercontent.com/Olshansk/rss-feeds/main/feeds/feed_the_batch.xml) | diff --git a/feed_generators/stanford_hai_news_blog.py b/feed_generators/stanford_hai_news_blog.py new file mode 100644 index 0000000000..8840d0d76a --- /dev/null +++ b/feed_generators/stanford_hai_news_blog.py @@ -0,0 +1,164 @@ +"""Generate RSS feed for Stanford HAI News (https://hai.stanford.edu/news).""" + +import argparse +import re +from datetime import datetime + +import pytz +from bs4 import BeautifulSoup +from feedgen.feed import FeedGenerator + +from utils import fetch_page, save_rss_feed, setup_feed_links, setup_logging, sort_posts_for_feed, stable_fallback_date + +logger = setup_logging() + +FEED_NAME = "stanford_hai_news" +BLOG_URL = "https://hai.stanford.edu/news" +BASE_URL = "https://hai.stanford.edu" + +DATE_RE = re.compile( + r"(January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},\s+\d{4}" +) + + +def parse_date(date_text: str) -> datetime | None: + """Parse date strings like 'May 27, 2026' into UTC datetimes.""" + for fmt in ("%B %d, %Y", "%b %d, %Y"): + try: + return datetime.strptime(date_text.strip(), fmt).replace(tzinfo=pytz.UTC) + except ValueError: + continue + return None + + +def extract_article_date(article_url: str) -> datetime: + """Fetch an article page and extract its publish date.""" + try: + article_html = fetch_page(article_url) + soup = BeautifulSoup(article_html, "html.parser") + + # Common detail-page pattern: "Date June 01, 2026" + for row in soup.select('[class*="DetailMeta_row"]'): + text = row.get_text(" ", strip=True) + match = DATE_RE.search(text) + if match: + parsed = parse_date(match.group(0)) + if parsed: + return parsed + + # Featured story pattern where date is shown directly + for elem in soup.select('[class*="FeatureArticleMeta_date"]'): + text = elem.get_text(" ", strip=True) + parsed = parse_date(text) + if parsed: + return parsed + + # Fallback: any standalone month day, year text + for elem in soup.find_all(["div", "span", "p"]): + text = elem.get_text(" ", strip=True) + match = DATE_RE.search(text) + if match: + parsed = parse_date(match.group(0)) + if parsed: + return parsed + + logger.warning(f"Could not parse publish date for {article_url}") + except Exception as e: + logger.warning(f"Failed to fetch article metadata for {article_url}: {e}") + + return stable_fallback_date(article_url) + + +def parse_news_listing(html_content: str) -> list[dict]: + """Parse the HAI News listing page and extract article cards.""" + soup = BeautifulSoup(html_content, "html.parser") + cards = soup.select('div[class*="ContentCard_root__"]') + logger.info(f"Found {len(cards)} content cards") + + posts = [] + seen_links = set() + + for card in cards: + try: + link_elem = card.select_one('a[href*="/news/"]') + title_elem = card.select_one("h2, h3, h4") + desc_elem = card.select_one("p") + + if not link_elem or not title_elem: + continue + + href = link_elem.get("href", "").strip() + if not href or href in ("/news", "/news/"): + continue + + link = f"{BASE_URL}{href}" if href.startswith("/") else href + if link in seen_links: + continue + seen_links.add(link) + + title = title_elem.get_text(" ", strip=True) + if not title: + continue + + description = desc_elem.get_text(" ", strip=True) if desc_elem else title + date = extract_article_date(link) + + posts.append( + { + "title": title, + "link": link, + "description": description, + "date": date, + "category": "News", + } + ) + except Exception as e: + logger.warning(f"Skipping malformed content card: {e}") + + logger.info(f"Parsed {len(posts)} HAI news posts") + return posts + + +def generate_rss_feed(posts: list[dict]) -> FeedGenerator: + fg = FeedGenerator() + fg.title("Stanford HAI News") + fg.description("Latest news and research updates from Stanford HAI") + fg.language("en") + fg.author({"name": "Stanford HAI"}) + fg.subtitle("Stanford Institute for Human-Centered Artificial Intelligence news") + setup_feed_links(fg, blog_url=BLOG_URL, feed_name=FEED_NAME) + + for post in sort_posts_for_feed(posts, date_field="date"): + fe = fg.add_entry() + fe.title(post["title"]) + fe.description(post["description"]) + fe.link(href=post["link"]) + fe.id(post["link"]) + fe.category(term=post["category"]) + fe.published(post["date"]) + + logger.info(f"Generated RSS feed with {len(posts)} entries") + return fg + + +def main() -> bool: + logger.info(f"Fetching {BLOG_URL}") + html = fetch_page(BLOG_URL) + posts = parse_news_listing(html) + + if not posts: + logger.warning("No HAI news posts found. Check selectors.") + return False + + feed = generate_rss_feed(posts) + save_rss_feed(feed, FEED_NAME) + logger.info("Done!") + return True + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate Stanford HAI News RSS feed") + # --full is accepted for orchestrator compatibility even though this generator has no cache. + parser.add_argument("--full", action="store_true", help="No-op (Stanford HAI news has no cache)") + parser.parse_args() + main() diff --git a/feeds.yaml b/feeds.yaml index d8d313fd99..af8016bac7 100644 --- a/feeds.yaml +++ b/feeds.yaml @@ -90,6 +90,11 @@ feeds: type: requests blog_url: https://huggingface.co/blog?tag=research + stanford_hai_news: + script: stanford_hai_news_blog.py + type: requests + blog_url: https://hai.stanford.edu/news + meta_ai: script: meta_ai_blog.py type: selenium diff --git a/feeds/feed_stanford_hai_news.xml b/feeds/feed_stanford_hai_news.xml new file mode 100644 index 0000000000..da82da2921 --- /dev/null +++ b/feeds/feed_stanford_hai_news.xml @@ -0,0 +1,53 @@ + + + + Stanford HAI News + https://hai.stanford.edu/news + Stanford Institute for Human-Centered Artificial Intelligence news + + http://www.rssboard.org/rss-specification + python-feedgen + en + Thu, 25 Jun 2026 09:40:06 +0000 + + Today's AI Talks Like “Nobody.” New Research Gives It Real Personality. + https://hai.stanford.edu/news/todays-ai-talks-like-nobody-new-research-gives-it-real-personality + PsychAdapter lets researchers dial in on personality traits, age, and mental health characteristics to generate text that sounds like real individuals, opening the door to training simulations and personalized content. + https://hai.stanford.edu/news/todays-ai-talks-like-nobody-new-research-gives-it-real-personality + News + Mon, 08 Jun 2026 00:00:00 +0000 + + + Reading Today’s Headlines Through AI: A Real-Time Audit of Six Commercial Chatbots + https://hai.stanford.edu/news/reading-todays-headlines-through-ai-a-real-time-audit-of-six-commercial-chatbots + In a new study, scholars measured how accurately popular AI chatbots answered questions about the emerging news and found substantial regional disparity, dependence on distinct information ecosystems, and acute fragility under imperfect prompts. + https://hai.stanford.edu/news/reading-todays-headlines-through-ai-a-real-time-audit-of-six-commercial-chatbots + News + Wed, 03 Jun 2026 00:00:00 +0000 + + + AI Coding Agents Fail at Teamwork + https://hai.stanford.edu/news/ai-coding-agents-fail-at-teamwork + Two models working together perform worse than one alone, exposing a critical gap in artificial intelligence capabilities. + https://hai.stanford.edu/news/ai-coding-agents-fail-at-teamwork + News + Mon, 01 Jun 2026 00:00:00 +0000 + + + How AI is Transforming Scientific Discovery While Keeping Humans at the Center + https://hai.stanford.edu/news/how-ai-is-transforming-scientific-discovery-while-keeping-humans-at-the-center + From designing new antibodies to simulating 1,000 years of climate in a day, AI is transforming what's possible—but humans remain the ones deciding what matters. + https://hai.stanford.edu/news/how-ai-is-transforming-scientific-discovery-while-keeping-humans-at-the-center + News + Wed, 27 May 2026 00:00:00 +0000 + + + AI Hiring Tools Can Yield Racial Bias and Systemic Rejection + https://hai.stanford.edu/news/ai-hiring-tools-can-yield-racial-bias-and-systemic-rejection + The first large-scale study of hiring algorithms in the wild finds concerning patterns to how systems reject candidates. + https://hai.stanford.edu/news/ai-hiring-tools-can-yield-racial-bias-and-systemic-rejection + News + Tue, 26 May 2026 00:00:00 +0000 + + + diff --git a/makefiles/feeds.mk b/makefiles/feeds.mk index 646c6d153f..b24dd06fd4 100644 --- a/makefiles/feeds.mk +++ b/makefiles/feeds.mk @@ -206,6 +206,13 @@ feeds_huggingface_research_full: ## Generate RSS feed for Hugging Face Blog (Res $(Q)uv run feed_generators/huggingface_research_blog.py --full $(call print_success,Hugging Face research feed generated - full reset) +.PHONY: feeds_stanford_hai_news +feeds_stanford_hai_news: ## Generate RSS feed for Stanford HAI News + $(call check_venv) + $(call print_info,Generating Stanford HAI News feed) + $(Q)uv run feed_generators/stanford_hai_news_blog.py + $(call print_success,Stanford HAI News feed generated) + .PHONY: feeds_meta_ai feeds_meta_ai: ## Generate RSS feed for AI at Meta Blog (incremental) $(call check_venv)