Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions kemono_dl/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def parse_args():
parser.add_argument("--skip-extensions", metavar="EXTs", type=str, help="A comma seperated list of file extensions to skip (Do not include the period) (Checks the extention of the filename not the server filename).")
parser.add_argument("--skip-attachments", action="store_true", help="Skip downloading post attachments.")
parser.add_argument("--write-content", action="store_true", help="Write Post content to an html file.")
parser.add_argument("--cyberdrop-dl-appdata", type=str, help="Cyberdrop-dl pip module appdata folder path.")


return parser.parse_args()

Expand Down Expand Up @@ -96,6 +98,7 @@ def main() -> None:

output_templates = {
"attachments": KemonoDL.DEFAULT_OUTPUT_TEMPLATE,
"links": KemonoDL.LINK_TEMPLATE,
# "pfp": KemonoDL.DEFAULT_OUTPUT_TEMPLATE,
# "banner": KemonoDL.DEFAULT_OUTPUT_TEMPLATE,
"content": KemonoDL.DEFAULT_OUTPUT_TEMPLATE,
Expand Down Expand Up @@ -140,14 +143,14 @@ def main() -> None:
print(kemono_dl.isLoggedin(KemonoDL.KEMONO_DOMAIN))

if args.favorite_creators_coomer:
kemono_dl.download_favorite_creators(KemonoDL.COOMER_DOMAIN)
kemono_dl.download_favorite_creators(KemonoDL.COOMER_DOMAIN, args.cyberdrop_dl_appdata)

if args.favorite_creators_kemono:
kemono_dl.download_favorite_creators(KemonoDL.KEMONO_DOMAIN)
kemono_dl.download_favorite_creators(KemonoDL.KEMONO_DOMAIN, args.cyberdrop_dl_appdata)

if args.URL:
for url in args.URL:
kemono_dl.download_url(url)
kemono_dl.download_url(url, args.cyberdrop_dl_appdata)

if args.batch_file:
for batch_file in args.batch_file:
Expand All @@ -159,7 +162,7 @@ def main() -> None:
batch_urls = [line.strip() for line in f.readlines() if not line.startswith("#")]

for url in batch_urls:
kemono_dl.download_url(url)
kemono_dl.download_url(url, args.cyberdrop_dl_appdata)

print("Complete")

Expand Down
43 changes: 36 additions & 7 deletions kemono_dl/kemono_dl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
import mimetypes
import os
import re
import subprocess
import time
from bs4 import BeautifulSoup
from http.cookiejar import LoadError
from pathlib import Path
from typing import List, Literal

from requests.exceptions import RequestException
Expand All @@ -24,12 +27,13 @@ class KemonoDL:
POST_STEP_SIZE = 50
URL_PARSE_PATTERN = r"^https://(kemono|coomer)\.\w+/([^/]+)/user/([^/]+)(?:/post/([^/]+))?$"
DEFAULT_OUTPUT_TEMPLATE = "{service}/{creator_id}/{post_id}/{filename}"

LINK_TEMPLATE = "{service}/{creator_id}/{post_id}"
def __init__(
self,
path: str = os.getcwd(),
output_templates: dict = {
"attachments": DEFAULT_OUTPUT_TEMPLATE,
"links": LINK_TEMPLATE,
# "pfp": DEFAULT_OUTPUT_TEMPLATE,
# "banner": DEFAULT_OUTPUT_TEMPLATE,
"content": DEFAULT_OUTPUT_TEMPLATE,
Expand Down Expand Up @@ -186,7 +190,7 @@ def get_favorit_post_ids(self, domain: str) -> List[str] | None:
print(f"[Error] Failed to fetch favorite posts from {url!r}: {e}")
return None

def download_favorite_creators(self, domain: str) -> None:
def download_favorite_creators(self, domain: str, cyberdrop_dl_appdata: None) -> None:
if not self.isLoggedin(domain):
print(f"[Error] You are not logged into {domain!r}")
return
Expand All @@ -202,12 +206,12 @@ def download_favorite_creators(self, domain: str) -> None:
time.sleep(0.5)
post = self.get_post(domain, creator.service, creator.id, post_id)
if post:
self.download_post(domain, post)
self.download_post(domain, post, cyberdrop_dl_appdata)

def download_favorite_posts(self, domain: str):
pass

def download_url(self, url: str) -> None:
def download_url(self, url: str, cyberdrop_dl_appdata: None) -> None:
parsed_url = self.parse_url(url)

if parsed_url is None:
Expand All @@ -218,14 +222,14 @@ def download_url(self, url: str) -> None:
if parsed_url["post_id"]:
post = self.get_post(domain, parsed_url["service"], parsed_url["creator_id"], parsed_url["post_id"])
if post:
self.download_post(domain, post)
self.download_post(domain, post, cyberdrop_dl_appdata)
else:
post_ids = self.get_all_creator_post_ids(domain, parsed_url["service"], parsed_url["creator_id"])
for post_id in post_ids:
time.sleep(0.5)
post = self.get_post(domain, parsed_url["service"], parsed_url["creator_id"], post_id)
if post:
self.download_post(domain, post)
self.download_post(domain, post, cyberdrop_dl_appdata)

def download_creator_banner(self, domain: str, service: str, creator_id: str) -> None:
self._download_special(domain, service, creator_id, "banner")
Expand Down Expand Up @@ -265,8 +269,12 @@ def _download_special(self, domain: str, service: str, creator_id: str, type: st
# url=url,
# filepath=file_path,
# )
def get_links(self, content:str) -> List:
soup = BeautifulSoup(content, 'html.parser')
links = [a['href'] for a in soup.find_all('a', href=True)]
return links

def download_post(self, domain: str, post: Post) -> None:
def download_post(self, domain: str, post: Post, cyberdrop_dl_appdata: None) -> None:
if f"{post.service}/user/{post.user}/post/{post.id}" in self.archived_posts:
print(f"[info] Post {post.id!r} already archived. Skipping.")
return
Expand All @@ -287,6 +295,27 @@ def download_post(self, domain: str, post: Post) -> None:
else:
self.download_post_attachments(domain, creator, post)

if cyberdrop_dl_appdata is not None:
# Download URLs that may be in the post content.
links = self.get_links(post.content)
for link in links:
appdata_folder = Path(cyberdrop_dl_appdata)
template_variables = FileTemplateVaribales(creator, post, None)
file_path = generate_file_path(
self.path,
self.output_templates.get("links", {}),
template_variables.toDict(self.custom_template_variables),
self.restrict_names,
)
result = subprocess.run(['cyberdrop-dl ', '--appdata-folder', appdata_folder.resolve(), link, '--download-folder', file_path, '--download', '--ui', 'DISABLED'], capture_output=True, text=True)
time.sleep(1.2)
print("[CYBERDROP-DL] START")
print("[STDOUT]")
print(result.stdout)
print("[STDERR]")
print(result.stderr)
print("[CYBERDROP-DL] END")

if self.write_content:
self.write_post_content(creator, post)

Expand Down
22 changes: 16 additions & 6 deletions kemono_dl/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,22 @@ class FileTemplateVaribales:
index: int

def __init__(self, creator: Creator, post: Post, attachment: Attachment) -> None:
server_filename = attachment.path.split("/")[-1]
server_file_name, server_file_ext = splitext(server_filename)
sha256 = server_file_name
filename = attachment.name
file_name, file_ext = splitext(filename)
index = attachment.index
if attachment is not None:
server_filename = attachment.path.split("/")[-1]
filename = attachment.name
index = attachment.index
server_file_name, server_file_ext = splitext(server_filename)
sha256 = server_file_name
file_name, file_ext = splitext(filename)
else:
server_filename = ""
filename = ""
index = None
server_file_name = ""
server_file_ext = ""
sha256 = ""
file_name = ""
file_ext = ""

self.service = creator.service
self.creator_id = creator.id
Expand Down