felixmde
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md
Lines changed: 16 additions & 0 deletions b/‎README.md
Lines changed: 16 additions & 0 deletions
diff --git a/‎pytest.ini
Lines changed: 2 additions & 0 deletions b/‎pytest.ini
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/epub.py
Lines changed: 11 additions & 6 deletions b/‎src/epub.py
Lines changed: 11 additions & 6 deletions
diff --git a/‎src/main.py
Lines changed: 2 additions & 1 deletion b/‎src/main.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/pokemon.py
Lines changed: 71 additions & 55 deletions b/‎src/pokemon.py
Lines changed: 71 additions & 55 deletions
@@ -7,6 +7,7 @@ __pycache__/
 pokemon
 tmp
 ptoos.epub
+ptoos-with-links.epub
 
 # C extensions
 *.so
 
@@ -5,8 +5,24 @@ to descriptions and pictures of the Pokemon within the e-book itself.
 
 It works with the epub that you can download from [Daystar Eld's Patreon](https://www.patreon.com/daystareld/).
 
+## Usage
+
 ```shell
+pip install --user pipenv
 pipenv install
 pipenv shell
 python ptoos-xray.py "DaystarEld - Pokemon The Origin of Species.epub"
 ```
+
+## Run tests
+
+```shell
+pipenv install --dev
+pipenv run pytest
+```
+
+## Credits
+
+Full credit for the Pokemon names, images, and descriptions goes to
+[Bulbapedia](https://bulbapedia.bulbagarden.net) under
+[Attribution-NonCommercial-ShareAlike 2.5](https://creativecommons.org/licenses/by-nc-sa/2.5/).
@@ -0,0 +1,2 @@
+[pytest]
+pythonpath = src
@@ -1,6 +1,7 @@
 import ebooklib
 import logging
 import re
+import sys
 from bs4 import BeautifulSoup, Tag
 from bs4.element import NavigableString
 from ebooklib import epub
@@ -24,7 +25,7 @@ def create_pokedex_chapter(pokemon: List[Pokemon]) -> epub.EpubHtml:
     for p in pokemon:
         content.append(f'<h2 id="{POKEMON_ID_PREFIX}{p.name.lower()}">{p.name}</h2>')
         content.append(
-            f'  <p><img alt="[Pokemon {p.name}]" src="../{p.img_filepath}"/><br/></p>'
+            f'  <p><img alt="[Pokemon {p.name}]" src="../{p.img_filename}"/><br/></p>'
         )
         for paragraph in p.description.split("\n"):
             content.append(f"  <p>{paragraph}</p>")
@@ -80,8 +81,12 @@ def patch_paragraph(paragraph: Tag):
     chapter.content = str(soup)
 
 
-def patch(epub_filepath: str, pokemon: List[Pokemon]):
-    book = epub.read_epub(epub_filepath)
+def patch(epub_filename: str, pokemon: List[Pokemon]):
+    try:
+        book = epub.read_epub(epub_filename)
+    except Exception:
+        logging.exception("Failed to open epub.")
+        sys.exit(1)
 
     pokemon_lookup = {p.name.lower(): p for p in pokemon}
     chapters = [
@@ -103,17 +108,17 @@ def patch(epub_filepath: str, pokemon: List[Pokemon]):
     book.spine.append((chapter.id, "yes"))
 
     for p in pokemon:
-        image_content = open(p.img_filepath, "rb").read()
+        image_content = open(p.img_filename, "rb").read()
         img = epub.EpubItem(
             uid=p.name,
-            file_name=p.img_filepath,
+            file_name=p.img_filename,
             media_type="image/png",
             content=image_content,
         )
         book.add_item(img)
 
     console = Console()
-    epub_out = epub_filepath.replace(".", "-with-links.")
+    epub_out = epub_filename.replace(".", "-with-links.")
     with console.status(f"Writing {epub_out}"):
         epub.write_epub(epub_out, book, {})
     console.print(f"[green]✓[/green] [orange1]{epub_out}[/orange1] written")
@@ -11,11 +11,12 @@ def main():
         level=logging.INFO,
         format="%(message)s",
         datefmt="[%X]",
-        handlers=[RichHandler()],
+        handlers=[RichHandler(rich_tracebacks=True)],
     )
     try:
         ptoos_epub = sys.argv[1]
     except IndexError:
         ptoos_epub = "ptoos.epub"
+        logging.warning(f"No epub file provided. Defaulting to '{ptoos_epub}'.")
     pokemon = src.pokemon.get_pokemon()
     src.epub.patch(ptoos_epub, pokemon)
@@ -20,90 +20,106 @@ class Pokemon(BaseModel):
     index: str
     html_url: str
     img_url: str
-    html_filepath: str
-    img_filepath: str
-    json_filepath: str
+    html_filename: str
+    img_filename: str
+    json_filename: str
     description: str = ""
     appears_in_book: bool = False
 
 
-def download_to_file(url: str, filepath: str, override=False):
-    """Downloads url into filepath."""
-    if os.path.isfile(filepath) and override is False:
-        logging.debug(f"'{filepath}' exists.")
+def download_to_file(url: str, filename: str, override=False):
+    """Downloads url into filename."""
+    if os.path.isfile(filename) and override is False:
+        logging.debug(f"'{filename}' exists.")
         return
 
     headers = {
         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0"
     }
     r = requests.get(url, headers=headers)
     if r.status_code != 200:
-        logging.warning(f"Could not download '{filepath}'")
-        return
+        logging.critical(f"Could not download '{filename}'.")
+        sys.exit(1)
 
     # Works for text and images
-    with open(filepath, "wb") as f:
+    with open(filename, "wb") as f:
         for c in r:
             f.write(c)
-    logging.debug(f"'{filepath}' downloaded.")
+    logging.debug(f"'{filename}' downloaded.")
 
 
-def get_pokemon() -> List[Pokemon]:
-    """Scrape Pokemon from the Bulbapedia national dex"""
-    NATIONAL_INDEX_FILEPATH = os.path.join(POKEMON_CACHE_DIRECTORY, "pokedex.html")
-    download_to_file(NATIONAL_INDEX_URL, NATIONAL_INDEX_FILEPATH)
-    with open(NATIONAL_INDEX_FILEPATH, "r") as r:
-        soup = BeautifulSoup(r, "html.parser")
-    pokemon_list_soup: BeautifulSoup = soup.find(
-        id="List_of_Pokémon_by_National_Pokédex_number"
-    ).parent
-    generation_soups: BeautifulSoup = pokemon_list_soup.find_next_siblings("h3")
+def download_national_index_html(national_index_filename: str):
+    download_to_file(NATIONAL_INDEX_URL, national_index_filename)
+
 
+def get_pokemon_table_row_soups(national_index_filename: str) -> List[BeautifulSoup]:
+    with open(national_index_filename, "r") as r:
+        soup = BeautifulSoup(r, "html.parser")
+    pokemon_list_soup = soup.find(id="List_of_Pokémon_by_National_Pokédex_number").parent
+    generation_soups = pokemon_list_soup.find_next_siblings("h3")
     table_row_soups = []
     for generation_soup in generation_soups:
-        table_soup: BeautifulSoup = generation_soup.find_next_sibling("table")
-        tbody_soup: BeautifulSoup = generation_soup.find_next("tbody")
+        table_soup = generation_soup.find_next_sibling("table")
+        tbody_soup = generation_soup.find_next("tbody")
         # skip first row because it is the header
         table_row_soups += tbody_soup.find_all("tr", recursive=False)[1:]
+    return table_row_soups
+
+
+def extract_pokemon_from_table_row(table_row_soup: BeautifulSoup) -> Pokemon:
+    name = table_row_soup.find_next("th").next_element.attrs["title"]
+
+    # load Pokemon from JSON if it already exists
+    json_filename = os.path.join(POKEMON_CACHE_DIRECTORY, name.lower() + ".json")
+    if os.path.isfile(json_filename):
+        p = Pokemon.parse_file(json_filename)
+        logging.debug(f"Loaded '{p.json_filename}'.")
+        return p
+
+    index = table_row_soup.find_next("td").next_sibling.next_sibling.text.strip()
+    html_url = (
+        BULBAPEDIA_BASE_URL
+        + table_row_soup.find_next("th").next_element.attrs["href"]
+    )
+    img_url = table_row_soup.find("img").attrs["src"]
+    html_filename = os.path.join(POKEMON_CACHE_DIRECTORY, name.lower() + ".html")
+    img_filename = os.path.join(POKEMON_CACHE_DIRECTORY, name.lower() + ".png")
+    return Pokemon(
+        name=name,
+        index=index,
+        html_url=html_url,
+        img_url=img_url,
+        html_filename=html_filename,
+        img_filename=img_filename,
+        json_filename=json_filename,
+    )
+
+
+def get_pokemon() -> List[Pokemon]:
+    """Scrape Pokemon from the Bulbapedia national dex"""
+    if not os.path.isdir(POKEMON_CACHE_DIRECTORY):
+        os.mkdir(POKEMON_CACHE_DIRECTORY)
+    national_index_filename = os.path.join(POKEMON_CACHE_DIRECTORY, "pokedex.html")
+    download_national_index_html(national_index_filename)
+    table_row_soups = get_pokemon_table_row_soups(national_index_filename) 
 
     pokemon = []
     for table_row_soup in track(table_row_soups, description="Download Pokemon"):
-        name = table_row_soup.find_next("th").next_element.attrs["title"]
+        p = extract_pokemon_from_table_row(table_row_soup)
 
-        # ignore Galarian and Alolan Pokemon so
-        if pokemon and pokemon[-1].name == name:
+        # Ignore Galarian and Alolan Pokemon (Pokemon with the same name)
+        if pokemon and pokemon[-1].name == p.name:
             continue
+        pokemon.append(p)
 
-        # load Pokemon from JSON if it already exists
-        json_filepath = os.path.join(POKEMON_CACHE_DIRECTORY, name.lower() + ".json")
-        if os.path.isfile(json_filepath):
-            p = Pokemon.parse_file(json_filepath)
-            pokemon.append(p)
-            logging.debug(f"Loaded {p.json_filepath}.")
+        # Pokemon has already been downloaded
+        if p.description and os.path.isfile(p.img_filename):
             continue
 
-        index = table_row_soup.find_next("td").next_sibling.next_sibling.text.strip()
-        html_url = (
-            BULBAPEDIA_BASE_URL
-            + table_row_soup.find_next("th").next_element.attrs["href"]
-        )
-        img_url = table_row_soup.find("img").attrs["src"]
-        html_filepath = os.path.join(POKEMON_CACHE_DIRECTORY, name.lower() + ".html")
-        img_filepath = os.path.join(POKEMON_CACHE_DIRECTORY, name.lower() + ".png")
-        p = Pokemon(
-            name=name,
-            index=index,
-            html_url=html_url,
-            img_url=img_url,
-            html_filepath=html_filepath,
-            img_filepath=img_filepath,
-            json_filepath=json_filepath,
-        )
-        pokemon.append(p)
         extend_pokemon(p)
-        with open(p.json_filepath, "w") as f:
+        with open(p.json_filename, "w") as f:
             f.write(p.json())
-            logging.debug(f"Saved {p.json_filepath}.")
+            logging.debug(f"Saved {p.json_filename}.")
 
     # Filter out speculative Pokemon
     pokemon = [
@@ -117,8 +133,8 @@ def get_pokemon() -> List[Pokemon]:
 
 def extend_pokemon(p: Pokemon):
     """Add description and download Pokemon image"""
-    download_to_file(p.html_url, p.html_filepath)
-    with open(p.html_filepath, "r") as r:
+    download_to_file(p.html_url, p.html_filename)
+    with open(p.html_filename, "r") as r:
         soup = BeautifulSoup(r, "html.parser")
     content_soup: BeautifulSoup = soup.find(id="mw-content-text").contents[0]
 
@@ -136,4 +152,4 @@ def extend_pokemon(p: Pokemon):
     )
     img_url = img_url.replace("//", "https://")
     p.img_url = img_url
-    download_to_file(img_url, p.img_filepath)
+    download_to_file(img_url, p.img_filename)