|
| 1 | +import os |
| 2 | +from urllib.parse import urljoin |
| 3 | +from datetime import datetime |
| 4 | +import argparse |
| 5 | + |
| 6 | +def parse_summary(): |
| 7 | + """Parse URLs from the SUMMARY.md file.""" |
| 8 | + with open("src/SUMMARY.md", "r") as file: |
| 9 | + for line in file: |
| 10 | + if "](" in line: |
| 11 | + url = line.split("](")[1].split(")")[0] |
| 12 | + # Add .html extension if not the root URL |
| 13 | + if url.endswith(".md"): |
| 14 | + url = url[:-3] + ".html" |
| 15 | + yield url |
| 16 | + |
| 17 | +def determine_priority(url_path, higher_priority_section): |
| 18 | + """Determine the priority based on the URL path and specified higher priority section.""" |
| 19 | + if url_path.count("/") <= 1: # Pages directly under the base URL |
| 20 | + return "1.0" |
| 21 | + elif higher_priority_section and url_path.startswith(f"./{higher_priority_section}"): # Pages in the specified higher priority section |
| 22 | + return "0.8" |
| 23 | + else: |
| 24 | + return "0.5" # All other pages |
| 25 | + |
| 26 | +def generate_sitemap(domain, output_path, higher_priority_section): |
| 27 | + """Generate a sitemap XML file from SUMMARY.md structure.""" |
| 28 | + domain = "https://" + domain |
| 29 | + urls = parse_summary() # Add base URL to the list of URLs |
| 30 | + urls = [""] + list(urls) |
| 31 | + |
| 32 | + sitemap = '<?xml version="1.0" encoding="UTF-8"?>\n' |
| 33 | + sitemap += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n' |
| 34 | + |
| 35 | + for url in urls: |
| 36 | + full_url = urljoin(domain, url) |
| 37 | + priority = determine_priority(url, higher_priority_section) |
| 38 | + |
| 39 | + sitemap += " <url>\n" |
| 40 | + sitemap += f" <loc>{full_url}</loc>\n" |
| 41 | + sitemap += " <changefreq>weekly</changefreq>\n" |
| 42 | + sitemap += f" <priority>{priority}</priority>\n" |
| 43 | + sitemap += " </url>\n" |
| 44 | + |
| 45 | + sitemap += "</urlset>" |
| 46 | + |
| 47 | + # Write the sitemap to the specified output path |
| 48 | + with open(output_path, "w") as file: |
| 49 | + file.write(sitemap) |
| 50 | + |
| 51 | +if __name__ == "__main__": |
| 52 | + parser = argparse.ArgumentParser(description="Generate a sitemap for mdBook") |
| 53 | + parser.add_argument("-d", "--domain", required=True, help="Domain for the mdBook site (e.g., component-model.bytecodealliance.org)") |
| 54 | + parser.add_argument("-o", "--output-path", default="sitemap.xml", help="Output path for the sitemap file") |
| 55 | + parser.add_argument("-p", "--higher-priority", help="Subsection path (e.g., 'design') to assign a higher priority of 0.8") |
| 56 | + args = parser.parse_args() |
| 57 | + |
| 58 | + generate_sitemap(args.domain, args.output_path, args.higher_priority) |
0 commit comments