diff --git a/archive.py b/archive.py index cdb1a4a95bca0f..9695800c5a9622 100755 --- a/archive.py +++ b/archive.py @@ -49,6 +49,8 @@ from lib.website import build_website +from lib.sitemap import build_sitemap + try: import settings except ModuleNotFoundError: @@ -145,6 +147,12 @@ def run(): parser.add_argument( "-b", action="store_true", default=False, help="Build .md files" ) + parser.add_argument( + "--no-sitemap", + action="store_true", + default=False, + help="Don't build sitemap files", + ) parser.add_argument( "-t", action="store_true", default=False, help="Make a clean json archive" ) @@ -206,6 +214,8 @@ def run(): settings.page_head_html, settings.page_footer_html, ) + if not results.no_sitemap: + build_sitemap(settings.site_url, md_root.as_posix(), md_root.as_posix()) if __name__ == "__main__": diff --git a/entrypoint.sh b/entrypoint.sh index 55a2e0d3456e38..0dd4d9452c0449 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -32,8 +32,7 @@ python3 get-pip.py pip install virtualenv virtualenv -p python3 . source bin/activate -pip3 install zulip==0.6.3 -pip3 install pyyaml==5.2 +pip3 install install -r requirements.txt # crudini is not available as an Alpine pkg, so we install via pip. pip3 install crudini diff --git a/lib/sitemap.py b/lib/sitemap.py new file mode 100644 index 00000000000000..2c294537ae506d --- /dev/null +++ b/lib/sitemap.py @@ -0,0 +1,16 @@ +from glob import iglob +from typing import Iterator + +from xml_sitemap_writer import XMLSitemap + + +def build_sitemap(base_url: str, archive_dir_path: str, sitemap_write_dir_path: str): + def iterate_html_files() -> Iterator[str]: + # Iterator yields relative path like + # archive/stream/10-errors/topic/laptop.html + # TODO: Investigate when running in windows + # TODO: Must ensure that the relative URLs are valid + return iglob("**/*.html", root_dir=archive_dir_path, recursive=True) + + with XMLSitemap(sitemap_write_dir_path, base_url) as sitemap: + sitemap.add_urls(iterate_html_files()) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000000000..eca59207142342 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +pyyaml==5.2 +xml-sitemap-writer==0.5.0 +zulip==0.6.3