Skip to content

Commit 0674b09

Browse files
feat: added sitemap script to update-gh-pages.yml (#745)
* feat: added sitemap script to update-gh-pages.yml * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 6b149f2 commit 0674b09

File tree

2 files changed

+149
-3
lines changed

2 files changed

+149
-3
lines changed

.github/workflows/update-gh-pages.yml

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,41 @@
1-
2-
name: Update index.html in GH Pages
1+
name: Custom Github pages post-processing
32

43
on:
54
workflow_dispatch:
65
workflow_call:
6+
push:
7+
tags:
8+
- "*"
9+
10+
env:
11+
MAIN_PYTHON_VERSION: '3.12'
712

813
jobs:
914
update-gh-pages:
1015
runs-on: ubuntu-latest
1116

1217
steps:
18+
- name: Install Python
19+
uses: actions/setup-python@v5
20+
with:
21+
python-version: ${{ env.MAIN_PYTHON_VERSION }}
22+
23+
- name: Install Python dependencies
24+
run: |
25+
python -m pip install --upgrade pip
26+
python -m pip install requests
27+
1328
- name: Checkout repository
1429
uses: actions/checkout@v4
30+
31+
- name: Copy tools folder to /tmp and run sitemap script
32+
working-directory: /tmp
33+
run: |
34+
cp -r /home/runner/work/pyansys/pyansys/tools/ .
35+
python ./tools/catsitemap.py
36+
37+
- name: Checkout repository gh-pages branch
38+
uses: actions/checkout@v4
1539
with:
1640
ref: gh-pages
1741

@@ -24,8 +48,12 @@ jobs:
2448
# Replace "version/stable" with "version/dev" in the sitemap.xml
2549
sed -i 's/version\/stable/version\/dev/g' sitemap.xml
2650
51+
- name: Move sitemaps/ to gh-pages root
52+
run: |
53+
rm -rf sitemaps/ && mv /tmp/sitemaps/ .
54+
2755
- name: "Commit changes"
2856
uses: EndBug/add-and-commit@v9
2957
with:
3058
default_author: github_actions
31-
message: "update index.html"
59+
message: "update index.html and sitemaps folder"

tools/catsitemap.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
"""Script for automatic generation/download of sitemaps for pyansys projects.
2+
3+
Intended for the pyansys project and to be used with the update-gh-pages
4+
workflow.
5+
6+
"""
7+
8+
from pathlib import Path
9+
from xml.dom import minidom
10+
import xml.etree.ElementTree as ET
11+
12+
from links import LINKS
13+
import requests
14+
15+
16+
def download_file(url: str, dest_path: Path) -> None:
17+
"""Given a sitemap url, this function downloads the file into (dest_path).
18+
19+
Parameters
20+
----------
21+
url : str
22+
The url of the sitemap file to be downloaded
23+
dest_path : Path
24+
The destination path to save the downloaded file
25+
26+
Raises
27+
------
28+
requests.exceptions.Timeout
29+
Raises this exception when accessing a link takes too long
30+
"""
31+
# Send the request
32+
response = requests.get(url, stream=True, timeout=30)
33+
34+
# Write the file content to the specified location
35+
with dest_path.open(mode="w", encoding="utf-8") as file:
36+
file.write(response.text)
37+
38+
39+
def extract_urls_and_headers(links_dict: dict) -> tuple:
40+
"""Extract valid project names and sitemap urls from metadata dictionary.
41+
42+
Parameters
43+
----------
44+
links_dict : dict
45+
Dictionary containing metadata of projects
46+
47+
Returns
48+
-------
49+
tuple
50+
Contains the list of project names and the list of sitemap urls
51+
"""
52+
valid_project_names = []
53+
valid_urls = []
54+
for project_name, url in links_dict.items():
55+
# The form of url is "https://subdomain.docs.pyansys.com/version/stable
56+
# where subdomain may contain nested subdomains depending on the project
57+
# see links.py from which LINKS was imported for examples
58+
if url is None:
59+
continue
60+
# url is changed to "https://subdomain.docs.pyansys.com/sitemap.xml
61+
# this is general form of the link to the sitemap file of each project
62+
updated_url = url.split("docs.pyansys.com")[0] + "docs.pyansys.com/sitemap.xml"
63+
if requests.get(url).status_code == 404:
64+
continue
65+
else:
66+
valid_project_names.append(project_name)
67+
valid_urls.append(updated_url)
68+
69+
return valid_project_names, valid_urls
70+
71+
72+
def generate_sitemap_index(project_names: list, dest_path: Path) -> None:
73+
"""Generate the global sitemap file which will point to all other sitemaps.
74+
75+
Parameters
76+
----------
77+
project_names : list
78+
List of project names with a downloadable sitemap file
79+
dest_path : Path
80+
The destination path to save the generated sitemap file
81+
"""
82+
# Create the root element with namespace
83+
sitemap_index = ET.Element("sitemapindex", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")
84+
85+
# Create sitemap elements for each URL
86+
for project in project_names:
87+
# Modify the url to point to the correct gh-pages directory
88+
modified_url = f"https://docs.pyansys.com/sitemap/{project}_sitemap.xml"
89+
90+
sitemap = ET.SubElement(sitemap_index, "sitemap")
91+
loc = ET.SubElement(sitemap, "loc")
92+
loc.text = modified_url
93+
94+
# Format XML with indentation
95+
rough_string = ET.tostring(sitemap_index, "utf-8")
96+
reparsed = minidom.parseString(rough_string)
97+
pretty_xml = reparsed.toprettyxml(indent=" ")
98+
99+
# Create the tree and write to XML file
100+
with dest_path.open(mode="w", encoding="utf-8") as file:
101+
file.write(pretty_xml)
102+
103+
104+
if __name__ == "__main__":
105+
# Create path
106+
folder_path = Path(".") / "sitemaps"
107+
folder_path.mkdir(parents=True, exist_ok=True)
108+
109+
# Get actual valid URLS and corresponding project names
110+
project_names, project_urls = extract_urls_and_headers(LINKS)
111+
112+
# Generate global sitemap file
113+
file_path = folder_path / "globalsitemap.xml"
114+
generate_sitemap_index(project_names, file_path)
115+
116+
for ith_url, url in enumerate(project_urls):
117+
file_path = folder_path / (project_names[ith_url] + "_sitemap.xml")
118+
download_file(url, file_path)

0 commit comments

Comments
 (0)