diff --git a/kemono_dl/__main__.py b/kemono_dl/__main__.py index d8401bf..935525a 100644 --- a/kemono_dl/__main__.py +++ b/kemono_dl/__main__.py @@ -34,6 +34,8 @@ def parse_args(): parser.add_argument("--skip-extensions", metavar="EXTs", type=str, help="A comma seperated list of file extensions to skip (Do not include the period) (Checks the extention of the filename not the server filename).") parser.add_argument("--skip-attachments", action="store_true", help="Skip downloading post attachments.") parser.add_argument("--write-content", action="store_true", help="Write Post content to an html file.") + parser.add_argument("--cyberdrop-dl-appdata", type=str, help="Cyberdrop-dl pip module appdata folder path.") + return parser.parse_args() @@ -96,6 +98,7 @@ def main() -> None: output_templates = { "attachments": KemonoDL.DEFAULT_OUTPUT_TEMPLATE, + "links": KemonoDL.LINK_TEMPLATE, # "pfp": KemonoDL.DEFAULT_OUTPUT_TEMPLATE, # "banner": KemonoDL.DEFAULT_OUTPUT_TEMPLATE, "content": KemonoDL.DEFAULT_OUTPUT_TEMPLATE, @@ -140,14 +143,14 @@ def main() -> None: print(kemono_dl.isLoggedin(KemonoDL.KEMONO_DOMAIN)) if args.favorite_creators_coomer: - kemono_dl.download_favorite_creators(KemonoDL.COOMER_DOMAIN) + kemono_dl.download_favorite_creators(KemonoDL.COOMER_DOMAIN, args.cyberdrop_dl_appdata) if args.favorite_creators_kemono: - kemono_dl.download_favorite_creators(KemonoDL.KEMONO_DOMAIN) + kemono_dl.download_favorite_creators(KemonoDL.KEMONO_DOMAIN, args.cyberdrop_dl_appdata) if args.URL: for url in args.URL: - kemono_dl.download_url(url) + kemono_dl.download_url(url, args.cyberdrop_dl_appdata) if args.batch_file: for batch_file in args.batch_file: @@ -159,7 +162,7 @@ def main() -> None: batch_urls = [line.strip() for line in f.readlines() if not line.startswith("#")] for url in batch_urls: - kemono_dl.download_url(url) + kemono_dl.download_url(url, args.cyberdrop_dl_appdata) print("Complete") diff --git a/kemono_dl/kemono_dl.py b/kemono_dl/kemono_dl.py index 89d40c2..91a360b 100644 --- a/kemono_dl/kemono_dl.py +++ b/kemono_dl/kemono_dl.py @@ -2,8 +2,11 @@ import mimetypes import os import re +import subprocess import time +from bs4 import BeautifulSoup from http.cookiejar import LoadError +from pathlib import Path from typing import List, Literal from requests.exceptions import RequestException @@ -24,12 +27,13 @@ class KemonoDL: POST_STEP_SIZE = 50 URL_PARSE_PATTERN = r"^https://(kemono|coomer)\.\w+/([^/]+)/user/([^/]+)(?:/post/([^/]+))?$" DEFAULT_OUTPUT_TEMPLATE = "{service}/{creator_id}/{post_id}/{filename}" - + LINK_TEMPLATE = "{service}/{creator_id}/{post_id}" def __init__( self, path: str = os.getcwd(), output_templates: dict = { "attachments": DEFAULT_OUTPUT_TEMPLATE, + "links": LINK_TEMPLATE, # "pfp": DEFAULT_OUTPUT_TEMPLATE, # "banner": DEFAULT_OUTPUT_TEMPLATE, "content": DEFAULT_OUTPUT_TEMPLATE, @@ -186,7 +190,7 @@ def get_favorit_post_ids(self, domain: str) -> List[str] | None: print(f"[Error] Failed to fetch favorite posts from {url!r}: {e}") return None - def download_favorite_creators(self, domain: str) -> None: + def download_favorite_creators(self, domain: str, cyberdrop_dl_appdata: None) -> None: if not self.isLoggedin(domain): print(f"[Error] You are not logged into {domain!r}") return @@ -202,12 +206,12 @@ def download_favorite_creators(self, domain: str) -> None: time.sleep(0.5) post = self.get_post(domain, creator.service, creator.id, post_id) if post: - self.download_post(domain, post) + self.download_post(domain, post, cyberdrop_dl_appdata) def download_favorite_posts(self, domain: str): pass - def download_url(self, url: str) -> None: + def download_url(self, url: str, cyberdrop_dl_appdata: None) -> None: parsed_url = self.parse_url(url) if parsed_url is None: @@ -218,14 +222,14 @@ def download_url(self, url: str) -> None: if parsed_url["post_id"]: post = self.get_post(domain, parsed_url["service"], parsed_url["creator_id"], parsed_url["post_id"]) if post: - self.download_post(domain, post) + self.download_post(domain, post, cyberdrop_dl_appdata) else: post_ids = self.get_all_creator_post_ids(domain, parsed_url["service"], parsed_url["creator_id"]) for post_id in post_ids: time.sleep(0.5) post = self.get_post(domain, parsed_url["service"], parsed_url["creator_id"], post_id) if post: - self.download_post(domain, post) + self.download_post(domain, post, cyberdrop_dl_appdata) def download_creator_banner(self, domain: str, service: str, creator_id: str) -> None: self._download_special(domain, service, creator_id, "banner") @@ -265,8 +269,12 @@ def _download_special(self, domain: str, service: str, creator_id: str, type: st # url=url, # filepath=file_path, # ) + def get_links(self, content:str) -> List: + soup = BeautifulSoup(content, 'html.parser') + links = [a['href'] for a in soup.find_all('a', href=True)] + return links - def download_post(self, domain: str, post: Post) -> None: + def download_post(self, domain: str, post: Post, cyberdrop_dl_appdata: None) -> None: if f"{post.service}/user/{post.user}/post/{post.id}" in self.archived_posts: print(f"[info] Post {post.id!r} already archived. Skipping.") return @@ -287,6 +295,27 @@ def download_post(self, domain: str, post: Post) -> None: else: self.download_post_attachments(domain, creator, post) + if cyberdrop_dl_appdata is not None: + # Download URLs that may be in the post content. + links = self.get_links(post.content) + for link in links: + appdata_folder = Path(cyberdrop_dl_appdata) + template_variables = FileTemplateVaribales(creator, post, None) + file_path = generate_file_path( + self.path, + self.output_templates.get("links", {}), + template_variables.toDict(self.custom_template_variables), + self.restrict_names, + ) + result = subprocess.run(['cyberdrop-dl ', '--appdata-folder', appdata_folder.resolve(), link, '--download-folder', file_path, '--download', '--ui', 'DISABLED'], capture_output=True, text=True) + time.sleep(1.2) + print("[CYBERDROP-DL] START") + print("[STDOUT]") + print(result.stdout) + print("[STDERR]") + print(result.stderr) + print("[CYBERDROP-DL] END") + if self.write_content: self.write_post_content(creator, post) diff --git a/kemono_dl/models.py b/kemono_dl/models.py index 89f9d37..fc70adb 100644 --- a/kemono_dl/models.py +++ b/kemono_dl/models.py @@ -167,12 +167,22 @@ class FileTemplateVaribales: index: int def __init__(self, creator: Creator, post: Post, attachment: Attachment) -> None: - server_filename = attachment.path.split("/")[-1] - server_file_name, server_file_ext = splitext(server_filename) - sha256 = server_file_name - filename = attachment.name - file_name, file_ext = splitext(filename) - index = attachment.index + if attachment is not None: + server_filename = attachment.path.split("/")[-1] + filename = attachment.name + index = attachment.index + server_file_name, server_file_ext = splitext(server_filename) + sha256 = server_file_name + file_name, file_ext = splitext(filename) + else: + server_filename = "" + filename = "" + index = None + server_file_name = "" + server_file_ext = "" + sha256 = "" + file_name = "" + file_ext = "" self.service = creator.service self.creator_id = creator.id