Infra: replace feedgen/lxml dependency to test Python 3.12-dev (python#2973)

hugovk · web-flow · commit b173099ac68e · 2023-01-21T16:08:38.000+02:00
diff --git a/.github/workflows/render.yml b/.github/workflows/render.yml
@@ -9,7 +9,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.x", "3.11-dev"]
+        python-version: ["3.x", "3.12-dev"]
 
     steps:
       - name: 🛎️ Checkout
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -22,11 +22,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.9", "3.10", "3.11-dev"]
+        python-version: ["3.9", "3.10", "3.11", "3.12-dev"]
         os: [windows-latest, macos-latest, ubuntu-latest]
-        # lxml doesn't yet install for 3.11 on Windows
-        exclude:
-        - { python-version: "3.11-dev", os: windows-latest }
 
     steps:
       - uses: actions/checkout@v3
diff --git a/Makefile b/Makefile
@@ -29,7 +29,7 @@ check-links: venv
 ## rss            to generate the peps.rss file
 .PHONY: rss
 rss: venv
-	$(VENVDIR)/bin/python3 generate_rss.py
+	$(VENVDIR)/bin/python3 generate_rss.py -o $(OUTPUT_DIR)
 
 ## clean          to remove the venv and build files
 .PHONY: clean
diff --git a/generate_rss.py b/generate_rss.py
@@ -2,8 +2,10 @@
 # This file is placed in the public domain or under the
 # CC0-1.0-Universal license, whichever is more permissive.
 
+import argparse
 import datetime
 import email.utils
+from html import escape
 from pathlib import Path
 import re
 
@@ -12,19 +14,16 @@
 from docutils import utils
 from docutils.parsers import rst
 from docutils.parsers.rst import roles
-from feedgen import entry
-from feedgen import feed
 
 # get the directory with the PEP sources
 PEP_ROOT = Path(__file__).parent
 
 
-# Monkeypatch feedgen.util.formatRFC2822
 def _format_rfc_2822(dt: datetime.datetime) -> str:
+    dt = dt.replace(tzinfo=datetime.timezone.utc)
     return email.utils.format_datetime(dt, usegmt=True)
 
 
-entry.formatRFC2822 = feed.formatRFC2822 = _format_rfc_2822
 line_cache: dict[Path, dict[str, str]] = {}
 
 # Monkeypatch PEP and RFC reference roles to match Sphinx behaviour
@@ -137,6 +136,15 @@ def pep_abstract(full_path: Path) -> str:
 
 
 def main():
+    parser = argparse.ArgumentParser(description="Generate RSS feed")
+    parser.add_argument(
+        "-o",
+        "--output-dir",
+        default="build",  # synchronise with render.yaml -> deploy step
+        help="Output directory, relative to root. Default 'build'.",
+    )
+    args = parser.parse_args()
+
     # get list of peps with creation time (from "Created:" string in pep source)
     peps_with_dt = sorted((pep_creation(path), path) for path in PEP_ROOT.glob("pep-????.???"))
 
@@ -152,21 +160,20 @@ def main():
         author = first_line_starting_with(full_path, "Author:")
         if "@" in author or " at " in author:
             parsed_authors = email.utils.getaddresses([author])
-            # ideal would be to pass as a list of dicts with names and emails to
-            # item.author, but FeedGen's RSS <author/> output doesn't pass W3C
-            # validation (as of 12/06/2021)
             joined_authors = ", ".join(f"{name} ({email_address})" for name, email_address in parsed_authors)
         else:
             joined_authors = author
         url = f"https://peps.python.org/pep-{pep_num:0>4}/"
 
-        item = entry.FeedEntry()
-        item.title(f"PEP {pep_num}: {title}")
-        item.link(href=url)
-        item.description(pep_abstract(full_path))
-        item.guid(url, permalink=True)
-        item.published(dt.replace(tzinfo=datetime.timezone.utc))  # ensure datetime has a timezone
-        item.author(email=joined_authors)
+        item = f"""\
+    <item>
+      <title>PEP {pep_num}: {escape(title, quote=False)}</title>
+      <link>{escape(url, quote=False)}</link>
+      <description>{escape(pep_abstract(full_path), quote=False)}</description>
+      <author>{escape(joined_authors, quote=False)}</author>
+      <guid isPermaLink="true">{url}</guid>
+      <pubDate>{_format_rfc_2822(dt)}</pubDate>
+    </item>"""
         items.append(item)
 
     # The rss envelope
@@ -175,28 +182,28 @@ def main():
     language features, and some meta-information like release
     procedure and schedules.
     """
-
-    # Setup feed generator
-    fg = feed.FeedGenerator()
-    fg.language("en")
-    fg.generator("")
-    fg.docs("https://cyber.harvard.edu/rss/rss.html")
-
-    # Add metadata
-    fg.title("Newest Python PEPs")
-    fg.link(href="https://peps.python.org")
-    fg.link(href="https://peps.python.org/peps.rss", rel="self")
-    fg.description(" ".join(desc.split()))
-    fg.lastBuildDate(datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc))
-
-    # Add PEP information (ordered by newest first)
-    for item in items:
-        fg.add_entry(item)
+    last_build_date = _format_rfc_2822(datetime.datetime.utcnow())
+    items = "\n".join(reversed(items))
+    output = f"""\
+<?xml version='1.0' encoding='UTF-8'?>
+<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/" version="2.0">
+  <channel>
+    <title>Newest Python PEPs</title>
+    <link>https://peps.python.org/peps.rss</link>
+    <description>{" ".join(desc.split())}</description>
+    <atom:link href="https://peps.python.org/peps.rss" rel="self"/>
+    <docs>https://cyber.harvard.edu/rss/rss.html</docs>
+    <language>en</language>
+    <lastBuildDate>{last_build_date}</lastBuildDate>
+{items}
+  </channel>
+</rss>
+"""
 
     # output directory for target HTML files
-    out_dir = PEP_ROOT / "build"
-    out_dir.mkdir(exist_ok=True)
-    out_dir.joinpath("peps.rss").write_bytes(fg.rss_str(pretty=True))
+    out_dir = PEP_ROOT / args.output_dir
+    out_dir.mkdir(exist_ok=True, parents=True)
+    out_dir.joinpath("peps.rss").write_text(output)
 
 
 if __name__ == "__main__":
diff --git a/readthedocs.yaml b/readthedocs.yaml
@@ -3,7 +3,7 @@ version: 2
 build:
   os: ubuntu-22.04
   tools:
-    python: "3.10"
+    python: "3.11"
 
   commands:
     - make dirhtml JOBS=$(nproc) OUTPUT_DIR=_readthedocs/html
diff --git a/requirements.txt b/requirements.txt
@@ -5,9 +5,6 @@ Pygments >= 2.9.0
 Sphinx >= 5.1.1, != 6.1.0, != 6.1.1
 docutils >= 0.19.0
 
-# For RSS
-feedgen >= 0.9.0  # For RSS feed
-
 # For tests
 pytest
 pytest-cov
diff --git a/tox.ini b/tox.ini
@@ -1,6 +1,6 @@
 [tox]
 envlist =
-    py{311, 310, 39}
+    py{312, 311, 310, 39}
 skipsdist = true
 
 [testenv]