diff --git a/.gitignore b/.gitignore index b6e4761..03be3ba 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,9 @@ dmypy.json # Pyre type checker .pyre/ + +# Bluezip +/bluezip/bluezip.db + +# Testing +/repack \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..033cb57 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "bluezip"] + path = bluezip + url = https://github.com/FlashpointProject/bluezip.git diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..988ce4b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.10 + +WORKDIR /usr/local/bin + +COPY ./ ./ + +RUN pip install -r requirements.txt +RUN pip install -r ./bluezip/requirements.txt +RUN python -m pytest + +CMD ["python", "-m", "uvicorn", "validator-server:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/bluezip b/bluezip new file mode 160000 index 0000000..e2b1e48 --- /dev/null +++ b/bluezip @@ -0,0 +1 @@ +Subproject commit e2b1e48ae5e94c921868640387efbf40bf3945f7 diff --git a/curation_validator.py b/curation_validator.py index 297974e..78ab5be 100644 --- a/curation_validator.py +++ b/curation_validator.py @@ -1,9 +1,14 @@ import base64 +import random import shutil import json import re from enum import Enum, auto -from typing import Optional +import string +from typing import Optional, TypedDict +from datetime import datetime, timedelta +from pydantic import BaseModel +from fastapi import UploadFile import py7zr from cachetools import TTLCache, cached @@ -24,6 +29,260 @@ class CurationType(Enum): OTHER_GAME = auto() ANIMATION = auto() +class EditCurationMeta(BaseModel): + Title: str | None = None + AlternateTitles: str | None = None + Version: str | None = None + Developer: str | None = None + Publisher: str | None = None + ReleaseDate: str | None = None + Series: str | None = None + Source: str | None = None + Status: str | None = None + Tags: str | None = None + Languages: str | None = None + OriginalDescription: str | None = None + GameNotes: str | None = None + +max_uncompressed_size = 50 * 1000 * 1000 * 1000 + + +def update_meta(filename: str, new_meta: EditCurationMeta | None, new_logo: UploadFile | None, new_ss: UploadFile | None): + repack_folder = os.environ["REPACK_DIR"] + errors: list = [] + warnings: list = [] + + meta_content = None + meta_filename = None + + if filename.endswith(".7z"): + try: + l.debug(f"reading archive '{filename}'...") + archive = py7zr.SevenZipFile(filename, mode='r') + + uncompressed_size = archive.archiveinfo().uncompressed + if uncompressed_size > max_uncompressed_size: + warnings.append( + f"The archive is too large to be validated (`{uncompressed_size // 1000000}MB/{max_uncompressed_size // 1000000}MB`).") + archive.close() + return errors, warnings, filename + + filenames = archive.getnames() + base_path = tempfile.mkdtemp(prefix="curation_validator_") + "/" + archive.extractall(path=base_path) + archive.close() + except Exception as e: + l.error(f"there was an error while reading file '{filename}': {e}") + errors.append("There seems to a problem with your 7z file.") + return errors, warnings, filename + elif filename.endswith(".zip"): + try: + l.debug(f"reading archive '{filename}'...") + archive = zipfile.ZipFile(filename, mode='r') + + uncompressed_size = sum([zinfo.file_size for zinfo in archive.filelist]) + if uncompressed_size > max_uncompressed_size: + warnings.append( + f"The archive is too large to be validated (`{uncompressed_size // 1000000}MB/{max_uncompressed_size // 1000000}MB`).") + archive.close() + return errors, warnings, filename + + filenames = archive.namelist() + base_path = tempfile.mkdtemp(prefix="curation_validator_") + "/" + archive.extractall(path=base_path) + archive.close() + except Exception as e: + l.error(f"there was an error while reading file '{filename}': {e}") + errors.append("There seems to a problem with your zip file.") + return errors, warnings, filename + elif filename.endswith(".rar"): + errors.append("Curations must be either .zip or .7z, not .rar.") + return errors, warnings, filename + else: + l.warn(f"file type of file '{filename}' not supported") + errors.append(f"file type of file '{filename}' not supported") + return errors, warnings, filename + + # check files + l.debug(f"validating archive data for '{filename}'...") + uuid_folder_regex = re.compile(r"^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}/?$") + uuid_folder = [match for match in filenames if uuid_folder_regex.match(match) is not None] + + meta = [] + logo = [] + logo_case = [] + ss = [] + ss_case = [] + + if len(uuid_folder) == 0: # legacy or broken curation + meta_regex = re.compile(r"^[^/]+/meta\.(yaml|yml|txt)$") + logo_regex = re.compile(r"^[^/]+/logo\.(png)$") + logo_regex_case = re.compile(r"(?i)^[^/]+/logo\.(png)$") + ss_regex = re.compile(r"^[^/]+/ss\.(png)$") + ss_regex_case = re.compile(r"(?i)^[^/]+/ss\.(png)$") + + meta = [match for match in filenames if meta_regex.match(match) is not None] + logo = [match for match in filenames if logo_regex.match(match) is not None] + logo_case = [match for match in filenames if logo_regex_case.match(match) is not None] + ss = [match for match in filenames if ss_regex.match(match) is not None] + ss_case = [match for match in filenames if ss_regex_case.match(match) is not None] + else: # core curation + meta_regex = re.compile( + r"^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}/meta\.(yaml|yml|txt)$") + logo_regex = re.compile(r"^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}/logo\.png$") + logo_regex_case = re.compile( + r"(?i)^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}/logo\.(png)$") + ss_regex = re.compile(r"^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}/ss\.png$") + ss_regex_case = re.compile( + r"(?i)^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}/ss\.(png)$") + + meta = [match for match in filenames if meta_regex.match(match) is not None] + logo = [match for match in filenames if logo_regex.match(match) is not None] + logo_case = [match for match in filenames if logo_regex_case.match(match) is not None] + ss = [match for match in filenames if ss_regex.match(match) is not None] + ss_case = [match for match in filenames if ss_regex_case.match(match) is not None] + + if len(meta) == 0: + errors.append("Did not find a meta file to edit") + archive_cleanup(filename, base_path) + return errors, warnings, filename + + meta_filename = meta[0] + props = {} + l.debug(f"Reading metadata file in: '{base_path + meta_filename}'") + with open(base_path + meta_filename, mode='r', encoding='utf8') as meta_file: + if meta_filename.endswith(".yml") or meta_filename.endswith(".yaml"): + try: + yaml = YAML(typ="safe") + props: dict = yaml.load(meta_file) + if props is None: + errors.append("The meta file seems to be empty.") + archive_cleanup(filename, base_path) + return errors, warnings, filename + except YAMLError: + errors.append(f"Unable to load meta YAML file") + archive_cleanup(filename, base_path) + return errors, warnings, filename + except ValueError as e: + l.debug(f"ValueError reading meta file: {e}") + errors.append("Invalid release date. Ensure entered date is valid.") + archive_cleanup(filename, base_path) + return errors, warnings, filename + elif meta_filename.endswith(".txt"): + break_index: int = 0 + while break_index != -1: + props, break_index = parse_lines_until_multiline(meta_file.readlines(), props, + break_index) + props, break_index = parse_multiline(meta_file.readlines(), props, break_index) + if props.get("Genre") is not None: + props["Tags"] = props["Genre"] + else: + errors.append( + "Meta file is either missing or its filename is incorrect. Are you using Flashpoint Core for curating?") + archive_cleanup(filename, base_path) + return errors, warnings, filename + + # translate legacy fields + if props.get("Platform") is not None: + props["Platforms"] = props["Platform"] + + # add primary platform if missing + if "Platforms" in props and "Primary Platform" not in props: + props["Primary Platform"] = props["Platforms"].split(';')[0].strip() + + if new_meta is not None: + if new_meta.Title is not None: + props["Title"] = new_meta.Title + + if new_meta.AlternateTitles is not None: + props["Alternate Titles"] = new_meta.AlternateTitles + + if new_meta.Version is not None: + props["Version"] = new_meta.Version + + if new_meta.Developer is not None: + props["Developer"] = new_meta.Developer + + if new_meta.Publisher is not None: + props["Publisher"] = new_meta.Publisher + + if new_meta.ReleaseDate is not None: + props["Release Date"] = new_meta.ReleaseDate + + if new_meta.Series is not None: + props["Series"] = new_meta.Series + + if new_meta.Source is not None: + props["Source"] = new_meta.Source + + if new_meta.Status is not None: + props["Status"] = new_meta.Status + + if new_meta.Tags is not None: + props["Tags"] = new_meta.Tags + + if new_meta.Languages is not None: + props["Languages"] = new_meta.Languages + + if new_meta.OriginalDescription is not None: + props["Original Description"] = new_meta.OriginalDescription + + if new_meta.GameNotes is not None: + props["Game Notes"] = new_meta.GameNotes + + if len(logo) > 0 and new_logo is not None: + if new_logo.size > 26214400: + errors.append("New logo larger than 25mb, rejected") + return errors, warnings, filename + with open(base_path + logo[0], "wb") as logo_file: + logo_file.write(new_logo.file.read()) + + if len(ss) > 0 and new_ss is not None: + if new_ss.size > 26214400: + errors.append("New screenshot larger than 25mb, rejected") + return errors, warnings, filename + with open(base_path + ss[0], "wb") as ss_file: + ss_file.write(new_ss.file.read()) + + if meta_filename.endswith('.txt'): + # Delete the original .txt file and save back as .yaml instead + os.remove(base_path + meta_filename) + meta_filename = meta_filename.replace('.txt', '.yaml') + + with open(base_path + meta_filename, mode='w', encoding='utf8') as meta_file: + yaml.dump(props, meta_file) + + try: + if filename.endswith(".zip"): + filename = filename.replace(".zip", ".7z") + + filename = os.path.basename(filename) + temp_folder = os.path.join(repack_folder, ''.join(random.choices(string.ascii_letters + string.digits, k=10))) + if not os.path.exists(temp_folder): + # If not, create the directory and its parents recursively + os.makedirs(temp_folder, 0o777) + + filename = os.path.join(temp_folder, filename) + + # Create new 7z archive with all files including modified meta + with py7zr.SevenZipFile(filename, mode='w') as new_archive: + for root, dirs, files in os.walk(base_path): + for file in files: + file_path = os.path.join(root, file) + arcname = os.path.relpath(file_path, base_path) + new_archive.write(file_path, arcname) + + l.debug(f"Successfully rebuilt archive '{filename}' with updated metadata") + + except Exception as e: + l.error(f"Error rebuilding archive '{filename}': {e}") + errors.append(f"Failed to rebuild archive: {str(e)}") + archive_cleanup(filename, base_path) + return errors, warnings, filename + + + # Return the meta content and filename for further processing + return errors, warnings, filename def validate_curation(filename: str) -> tuple[list, list, @@ -37,7 +296,6 @@ def validate_curation(filename: str) -> tuple[list, # process archive filenames: list = [] - max_uncompressed_size = 50 * 1000 * 1000 * 1000 base_path = None @@ -98,21 +356,29 @@ def validate_curation(filename: str) -> tuple[list, ss = [] if len(uuid_folder) == 0: # legacy or broken curation - content_folder_regex = re.compile(r"^[^/]+/content/?$") meta_regex = re.compile(r"^[^/]+/meta\.(yaml|yml|txt)$") logo_regex = re.compile(r"^[^/]+/logo\.(png)$") logo_regex_case = re.compile(r"(?i)^[^/]+/logo\.(png)$") ss_regex = re.compile(r"^[^/]+/ss\.(png)$") ss_regex_case = re.compile(r"(?i)^[^/]+/ss\.(png)$") - content_folder = [match for match in filenames if content_folder_regex.match(match) is not None] + + content_folder = None + for f in filenames: + index = f.find("/content") + if index != -1: + # Always save the shortest content path to avoid content folders inside weirdly named folders first + new_path = f[:index + len("/content")] + if content_folder is not None and len(content_folder) > len(new_path): + content_folder = new_path + elif content_folder is None: + content_folder = new_path + meta = [match for match in filenames if meta_regex.match(match) is not None] logo = [match for match in filenames if logo_regex.match(match) is not None] logo_case = [match for match in filenames if logo_regex_case.match(match) is not None] ss = [match for match in filenames if ss_regex.match(match) is not None] ss_case = [match for match in filenames if ss_regex_case.match(match) is not None] else: # core curation - content_folder_regex = re.compile( - r"^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}/content/?$") meta_regex = re.compile( r"^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}/meta\.(yaml|yml|txt)$") logo_regex = re.compile(r"^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}/logo\.png$") @@ -121,15 +387,25 @@ def validate_curation(filename: str) -> tuple[list, ss_regex = re.compile(r"^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}/ss\.png$") ss_regex_case = re.compile( r"(?i)^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}/ss\.(png)$") + + content_folder = None + for f in filenames: + index = f.find("/content") + if index != -1: + # Always save the shortest content path to avoid content folders inside weirdly named folders first + new_path = f[:index + len("/content")] + if content_folder is not None and len(content_folder) > len(new_path): + content_folder = new_path + elif content_folder is None: + content_folder = new_path - content_folder = [match for match in filenames if content_folder_regex.match(match) is not None] meta = [match for match in filenames if meta_regex.match(match) is not None] logo = [match for match in filenames if logo_regex.match(match) is not None] logo_case = [match for match in filenames if logo_regex_case.match(match) is not None] ss = [match for match in filenames if ss_regex.match(match) is not None] ss_case = [match for match in filenames if ss_regex_case.match(match) is not None] - if len(logo) == 0 and len(ss) == 0 and len(content_folder) == 0 and len(meta) == 0: + if len(logo) == 0 and len(ss) == 0 and content_folder is None and len(meta) == 0: errors.append("Logo, screenshot, content folder and meta not found. Is your curation structured properly?") archive_cleanup(filename, base_path) return errors, warnings, None, None, None, None @@ -147,10 +423,10 @@ def validate_curation(filename: str) -> tuple[list, errors.append("Screenshot file is either missing or its filename is incorrect.") # check content - if len(content_folder) == 0: + if content_folder is None: errors.append("Content folder not found.") else: - content_folder_path = base_path + content_folder[0] + content_folder_path = base_path + content_folder filecount_in_content = sum([len(files) for r, d, files in os.walk(content_folder_path)]) if filecount_in_content == 0: errors.append("No files found in content folder.") @@ -179,6 +455,7 @@ def validate_curation(filename: str) -> tuple[list, "Meta file is either missing or its filename is incorrect. Are you using Flashpoint Core for curating?") else: meta_filename = meta[0] + l.debug(f"Reading metadata file in: '{base_path + meta_filename}'") with open(base_path + meta_filename, mode='r', encoding='utf8') as meta_file: if meta_filename.endswith(".yml") or meta_filename.endswith(".yaml"): try: @@ -189,10 +466,11 @@ def validate_curation(filename: str) -> tuple[list, archive_cleanup(filename, base_path) return errors, warnings, None, None, None, None except YAMLError: - errors.append("Unable to load meta YAML file") + errors.append(f"Unable to load meta YAML file") archive_cleanup(filename, base_path) return errors, warnings, None, None, None, None - except ValueError: + except ValueError as e: + l.debug(f"ValueError reading meta file: {e}") errors.append("Invalid release date. Ensure entered date is valid.") archive_cleanup(filename, base_path) return errors, warnings, None, None, None, None @@ -210,6 +488,14 @@ def validate_curation(filename: str) -> tuple[list, archive_cleanup(filename, base_path) return errors, warnings, None, None, None, None + # translate legacy fields + if props.get("Platform") is not None: + props["Platforms"] = props["Platform"] + + # add primary platform if missing + if "Platforms" in props and "Primary Platform" not in props: + props["Primary Platform"] = props["Platforms"].split(';')[0].strip() + title: tuple[str, bool] = ("Title", bool(props.get("Title"))) # developer: tuple[str, bool] = ("Developer", bool(props["Developer"])) release_date: tuple[str, bool] = ("Release Date", bool(props.get("Release Date"))) @@ -221,6 +507,26 @@ def validate_curation(filename: str) -> tuple[list, errors.append( f"Release date {date_string} is incorrect. Release dates should always be in `YYYY-MM-DD` format.") + # check age of release + year = None + month = None + day = None + + if date_string.count("-") == 0: + year = int(date_string) + elif date_string.count("-") == 1: + date_split = date_string.split("-") + year = int(date_split[0]) + month = int(date_split[1]) + elif date_string.count("-") == 2: + date_split = date_string.split("-") + year = int(date_split[0]) + month = int(date_split[1]) + day = int(date_split[2]) + + if not is_date_more_than_three_years_ago(datetime.now(), year, month, day): + warnings.append(f"Release date {date_string} is less than 3 years ago. Curation should be frozen.") + language_properties: tuple[str, bool] = "Languages", bool(props.get("Languages")) if language_properties[1]: with open("data/language-codes.json") as f: @@ -300,23 +606,28 @@ def validate_curation(filename: str) -> tuple[list, warnings.append(f"Tag `{tag}` is not a known tag, please verify (did you write it correctly?).") extreme: tuple[str, bool] = ("Extreme", bool(props.get("Extreme"))) + blacklisted_tags = get_blacklisted_tag_list_file() extreme_tags = get_extreme_tag_list_file() + is_blacklisted = False is_extreme = False if extreme[1] and (props["Extreme"] == "Yes" or props["Extreme"] is True): is_extreme = True if tags: has_extreme_tags = bool([tag for tag in tags if tag in extreme_tags]) + has_blacklisted_tags = bool([tag for tag in tags if tag in blacklisted_tags]) has_legacy_extreme = "LEGACY-Extreme" in tags - if has_extreme_tags or has_legacy_extreme: + if has_blacklisted_tags or has_extreme_tags or has_legacy_extreme: is_extreme = True if is_extreme and not has_extreme_tags: errors.append("Curation is extreme but lacks extreme tags.") + if has_blacklisted_tags: + errors.append("Contains blacklisted tags") if props.get("Library") is not None and "theatre" in props.get("Library"): curation_type = CurationType.ANIMATION else: - platform: Optional[str] = props.get("Platform") - if platform is None or "Flash" in platform: + platforms: Optional[str] = props.get("Platforms") + if platforms is None or "Flash" in platforms: curation_type = CurationType.FLASH_GAME else: curation_type = CurationType.OTHER_GAME @@ -332,6 +643,31 @@ def validate_curation(filename: str) -> tuple[list, image_path = f"{base_path}{screenshot}" images.append({"type": f"screenshot", "data": encode_image(image_path)}) + # map add apps to more 'Extras', 'Message' props and an 'Add Apps' array + addApps = props.get("Additional Applications") + addAppsArr = [] + if addApps is not None: + keys = list(addApps) + for key in keys: + if key == "Extras": + props["Extras"] = addApps["Extras"] + elif key == "Message": + props["Message"] = addApps["Message"] + else: + addAppsArr.append({ + "Heading": key, + "Application Path": addApps[key]["Application Path"], + "Launch Command": addApps[key]["Launch Command"] + }) + props["Additional Applications"] = addAppsArr + print(props["Additional Applications"]) + + validRuffleValues = ["standalone"] + ruffleSupport = props.get("Ruffle Support") + if ruffleSupport is not None: + if ruffleSupport not in ["standalone"] and ruffleSupport.strip() != "": + errors.append(f"Ruffle Support must be '' or a value in '" + str(validRuffleValues) + "'") + archive_cleanup(filename, base_path) return errors, warnings, is_extreme, curation_type, props, images @@ -362,47 +698,61 @@ def get_tag_list_bluebot() -> list[str]: @cached(cache=TTLCache(maxsize=1, ttl=3600)) -def get_tag_list_file() -> list[str]: +def get_tag_list_file() -> list[dict[str, str]]: l.debug(f"getting tags from file...") with open("data/category_tags.json", "r", encoding="utf-8") as f: data = json.load(f) return data["tags"] +@cached(cache=TTLCache(maxsize=1, ttl=3600)) +def get_blacklisted_tag_list_file() -> list[str]: + l.debug(f"getting blacklisted tags from file...") + with open("data/blacklisted_tags.json", "r", encoding="utf-8") as f: + data = json.load(f) + return data["tags"] @cached(cache=TTLCache(maxsize=1, ttl=3600)) def get_extreme_tag_list_file() -> list[str]: - l.debug(f"getting tags from file...") + l.debug(f"getting extreme tags from file...") with open("data/extreme_tags.json", "r", encoding="utf-8") as f: data = json.load(f) return data["tags"] @cached(cache=TTLCache(maxsize=1, ttl=60)) -def get_tag_list_wiki() -> list[str]: +def get_tag_list_wiki() -> list[dict[str, str]]: l.debug(f"getting tags from wiki...") tags = [] - resp = requests.get(url="https://bluemaxima.org/flashpoint/datahub/Tags") + resp = requests.get(url="https://flashpointarchive.org/datahub/Tags") soup = BeautifulSoup(resp.text, "html.parser") tables = soup.find_all("table") for table in tables: rows = table.find_all("tr") for row in rows: cols = row.find_all('td') - if len(cols) > 0: + if len(cols) >= 2: + tag = {} col = cols[0] links = row.find_all('a') if len(links) > 0: - tags.append(links[0].contents[0].strip()) + tag["name"] = links[0].contents[0].strip() else: - tags.append(col.contents[0].strip()) + tag["name"] = col.contents[0].strip() + + try: + desc = cols[1] + tag["description"] = desc.contents[0].strip() + tags.append(tag) + except: + pass return tags def get_tag_list() -> list[str]: bluebot_tags = get_tag_list_bluebot() - file_tags = get_tag_list_file() - wiki_tags = get_tag_list_wiki() - return list(set(file_tags + wiki_tags + bluebot_tags)) + wiki_tags = [tag["name"] for tag in get_tag_list_wiki()] + file_tags = [tag["name"] for tag in get_tag_list_file()] + return list(set(file_tags + bluebot_tags + wiki_tags)) def parse_lines_until_multiline(lines: list[str], d: dict, starting_number: int): @@ -436,3 +786,26 @@ def parse_multiline(lines: list[str], d: dict, starting_number: int): break d.update({key: val}) return d, break_number + + +def is_date_more_than_three_years_ago(now, year, month=None, day=None): + if day is None: + if month is None: + # Only the year is known, assume it's the last day of the year + month = 12 + day = 31 + else: + # The year and month are known, assume it's the last day of the month + if month in {1, 3, 5, 7, 8, 10, 12}: + day = 31 + elif month in {4, 6, 9, 11}: + day = 30 + else: + if (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0): + day = 29 + else: + day = 28 + + date = datetime(year, month, day) + three_years_ago = now - timedelta(days=3*365) + return date < three_years_ago diff --git a/curation_validator_test.py b/curation_validator_test.py index 19432b1..6b1730c 100644 --- a/curation_validator_test.py +++ b/curation_validator_test.py @@ -1,7 +1,16 @@ +import os +import asyncio +import pytest import unittest from unittest.mock import patch +from repack import repack +from datetime import datetime -from curation_validator import validate_curation, CurationType +from curation_validator import validate_curation, CurationType, is_date_more_than_three_years_ago + +pytest_plugins = ('pytest_asyncio',) + +os.environ["REPACK_DIR"] = os.path.dirname(os.path.realpath(__file__)) + '/repack/' def mock_get_tag_list() -> list[str]: @@ -35,21 +44,24 @@ def test_valid_yaml_meta(self): def test_invalid_yaml_meta_extreme(self): for extension in ["7z", "zip"]: - errors, warnings, is_extreme, _, _, _ = validate_curation(f"test_curations/test_curation_invalid_extreme.{extension}") + errors, warnings, is_extreme, _, _, _ = validate_curation( + f"test_curations/test_curation_invalid_extreme.{extension}") self.assertCountEqual(errors, ["Curation is extreme but lacks extreme tags."]) self.assertCountEqual(warnings, []) self.assertTrue(is_extreme) def test_valid_yaml_meta_extreme(self): for extension in ["7z", "zip"]: - errors, warnings, is_extreme, _, _, _ = validate_curation(f"test_curations/test_curation_valid_extreme.{extension}") + errors, warnings, is_extreme, _, _, _ = validate_curation( + f"test_curations/test_curation_valid_extreme.{extension}") self.assertCountEqual(errors, []) self.assertCountEqual(warnings, []) self.assertTrue(is_extreme) def test_valid_legacy(self): for extension in ["7z", "zip"]: - errors, warnings, is_extreme, _, _, _ = validate_curation(f"test_curations/test_curation_valid_legacy.{extension}") + errors, warnings, is_extreme, _, _, _ = validate_curation( + f"test_curations/test_curation_valid_legacy.{extension}") self.assertCountEqual(errors, []) self.assertCountEqual(warnings, []) self.assertFalse(is_extreme) @@ -71,7 +83,8 @@ def test_curation_invalid_archive(self): def test_curation_empty_meta(self): for extension in ["7z", "zip"]: - errors, warnings, is_extreme, _, _, _ = validate_curation(f"test_curations/test_curation_empty_meta.{extension}") + errors, warnings, is_extreme, _, _, _ = validate_curation( + f"test_curations/test_curation_empty_meta.{extension}") self.assertCountEqual(errors, ["The meta file seems to be empty."]) self.assertCountEqual(warnings, []) @@ -117,7 +130,8 @@ def test_empty_content(self): def test_missing_content(self): for extension in ["7z", "zip"]: - errors, warnings, _, _, _, _ = validate_curation(f"test_curations/test_curation_missing_content.{extension}") + errors, warnings, _, _, _, _ = validate_curation( + f"test_curations/test_curation_missing_content.{extension}") self.assertCountEqual(errors, ["Content folder not found."]) self.assertCountEqual(warnings, []) @@ -137,7 +151,8 @@ def test_missing_meta(self): def test_missing_root_folder(self): for extension in ["7z", "zip"]: - errors, warnings, _, _, _, _ = validate_curation(f"test_curations/test_curation_missing_root_folder.{extension}") + errors, warnings, _, _, _, _ = validate_curation( + f"test_curations/test_curation_missing_root_folder.{extension}") self.assertCountEqual(errors, [ "Logo, screenshot, content folder and meta not found. Is your curation structured properly?"]) self.assertCountEqual(warnings, []) @@ -177,32 +192,37 @@ def test_missing_application_path_warning(self): def test_missing_launch_command(self): for extension in ["7z", "zip"]: - errors, warnings, _, _, _, _ = validate_curation(f"test_curations/test_curation_missing_launch_command.{extension}") + errors, warnings, _, _, _, _ = validate_curation( + f"test_curations/test_curation_missing_launch_command.{extension}") self.assertCountEqual(errors, ["The `Launch Command` property in the meta file is mandatory."]) self.assertCountEqual(warnings, []) def test_missing_languages(self): for extension in ["7z", "zip"]: - errors, warnings, _, _, _, _ = validate_curation(f"test_curations/test_curation_missing_languages.{extension}") + errors, warnings, _, _, _, _ = validate_curation( + f"test_curations/test_curation_missing_languages.{extension}") self.assertCountEqual(errors, ["The `Languages` property in the meta file is mandatory."]) self.assertCountEqual(warnings, []) def test_comma_in_languages(self): for extension in ["7z", "zip"]: - errors, warnings, _, _, _, _ = validate_curation(f"test_curations/test_curation_comma_in_languages.{extension}") + errors, warnings, _, _, _, _ = validate_curation( + f"test_curations/test_curation_comma_in_languages.{extension}") self.assertCountEqual(errors, ["Languages should be separated with semicolons, not commas."]) self.assertCountEqual(warnings, []) def test_common_bad_language(self): for extension in ["7z", "zip"]: - errors, warnings, _, _, _, _ = validate_curation(f"test_curations/test_curation_common_bad_language.{extension}") + errors, warnings, _, _, _, _ = validate_curation( + f"test_curations/test_curation_common_bad_language.{extension}") self.assertCountEqual(errors, ["The correct ISO 639-1 language code for Japanese is `ja`, not `jp`."]) self.assertCountEqual(warnings, []) def test_language_name(self): for extension in ["7z", "zip"]: errors, warnings, _, _, _, _ = validate_curation(f"test_curations/test_curation_language_name.{extension}") - self.assertCountEqual(errors, ["Languages must be in ISO 639-1 format, so please use `ja` instead of `Japanese`"]) + self.assertCountEqual(errors, + ["Languages must be in ISO 639-1 format, so please use `ja` instead of `Japanese`"]) self.assertCountEqual(warnings, []) def test_missing_source(self): @@ -230,7 +250,8 @@ def test_rar(self): def test_trailing_language_semicolon(self): for extension in ["7z", "zip"]: - errors, warnings, _, _, _, _ = validate_curation(f"test_curations/test_curation_languages_semicolon.{extension}") + errors, warnings, _, _, _, _ = validate_curation( + f"test_curations/test_curation_languages_semicolon.{extension}") self.assertCountEqual(errors, []) self.assertCountEqual(warnings, []) @@ -242,29 +263,93 @@ def test_valid_date(self): def test_localflash_too_many_files(self): for extension in ["7z", "zip"]: - errors, warnings, _, _, _, _ = validate_curation(f"test_curations/test_curation_localflash_too_many_files.{extension}") - self.assertCountEqual(errors, ["Content must be in additional folder in localflash rather than in localflash directly."]) + errors, warnings, _, _, _, _ = validate_curation( + f"test_curations/test_curation_localflash_too_many_files.{extension}") + self.assertCountEqual(errors, [ + "Content must be in additional folder in localflash rather than in localflash directly."]) self.assertCountEqual(warnings, []) def test_localflash_no_folder(self): for extension in ["7z", "zip"]: - errors, warnings, _, _, _, _ = validate_curation(f"test_curations/test_curation_localflash_no_folder.{extension}") - self.assertCountEqual(errors, ["Content must be in additional folder in localflash rather than in localflash directly."]) + errors, warnings, _, _, _, _ = validate_curation( + f"test_curations/test_curation_localflash_no_folder.{extension}") + self.assertCountEqual(errors, [ + "Content must be in additional folder in localflash rather than in localflash directly."]) self.assertCountEqual(warnings, []) def test_localflash_bad_name(self): for extension in ["7z", "zip"]: - errors, warnings, _, _, _, _ = validate_curation(f"test_curations/test_curation_localflash_bad_name.{extension}") + errors, warnings, _, _, _, _ = validate_curation( + f"test_curations/test_curation_localflash_bad_name.{extension}") self.assertCountEqual(errors, ["Extremely common localflash containing folder name, please change."]) self.assertCountEqual(warnings, []) def test_no_library(self): for extension in ["7z", "zip"]: - errors, warnings, _, curation_type, _, _ = validate_curation(f"test_curations/test_curation_none_library.{extension}") + errors, warnings, _, curation_type, _, _ = validate_curation( + f"test_curations/test_curation_none_library.{extension}") self.assertCountEqual(errors, []) self.assertCountEqual(warnings, []) self.assertEqual(curation_type, CurationType.FLASH_GAME) + def test_convert_platform_field(self): + for extension in ["7z", "zip"]: + errors, warnings, _, _, meta, _ = validate_curation(f"test_curations/test_curation_valid.{extension}") + self.assertCountEqual(errors, []) + self.assertCountEqual(warnings, []) + self.assertEqual(meta["Platforms"], "Flash") + + def test_addapps(self): + for extension in ["7z", "zip"]: + errors, warnings, _, _, meta, _ = validate_curation( + f"test_curations/test_curation_valid_addapps.{extension}") + self.assertCountEqual(errors, []) + self.assertEqual(meta["Extras"], "test") + self.assertEqual(meta["Message"], "test") + self.assertEqual(len(meta["Additional Applications"]), 1) + self.assertEqual(meta["Additional Applications"][0]["Heading"], "Test") + self.assertEqual(meta["Additional Applications"][0]["Application Path"], "test") + self.assertEqual(meta["Additional Applications"][0]["Launch Command"], "test") + + def test_primary_platform(self): + for extension in ["7z", "zip"]: + # From empty + errors, warnings, _, _, meta, _ = validate_curation(f"test_curations/test_curation_valid.{extension}") + self.assertCountEqual(errors, []) + self.assertEqual(meta["Primary Platform"], "Flash") + # From stated + errors, warnings, _, _, meta, _ = validate_curation( + f"test_curations/test_curation_primary_platform.{extension}") + self.assertCountEqual(errors, []) + self.assertEqual(meta["Primary Platform"], "HTML5") + + def test_ruffle_support(self): + for extension in ["7z", "zip"]: + errors, warnings, _, _, meta, _ = validate_curation( + f"test_curations/test_curation_invalid_ruffle.{extension}") + self.assertNotEqual(len(errors), 0) + + +@pytest.mark.asyncio +async def test_bluezip(): + for extension in ["7z", "zip"]: + errors, output = await repack(f"test_curations/test_curation_valid.{extension}") + assert len(errors) == 0 + assert os.path.exists(output) + + +def test_is_date_more_than_three_years_ago(): + cases = [ + {"now": datetime(2003, 1, 1), "year": 2000, "month": 1, "day": 1, "return": True}, + {"now": datetime(2003, 1, 1), "year": 2000, "month": 1, "day": 2, "return": False}, + {"now": datetime(2003, 1, 1), "year": 2002, "month": 2, "day": 20, "return": False}, + {"now": datetime(2003, 1, 1), "year": 2002, "month": None, "day": None, "return": False}, + {"now": datetime(2003, 1, 1), "year": 2002, "month": 1, "day": None, "return": False}, + ] + + for case in cases: + assert case["return"] == is_date_more_than_three_years_ago(case["now"], case["year"], case["month"], case["day"]), case + if __name__ == '__main__': unittest.main() diff --git a/data/blacklisted_tags.json b/data/blacklisted_tags.json new file mode 100644 index 0000000..8e5c6c9 --- /dev/null +++ b/data/blacklisted_tags.json @@ -0,0 +1,9 @@ +{ + "tags": [ + "Cub", + "Lolicon", + "Shotacon", + "Toddlercon", + "Teenager" + ] +} \ No newline at end of file diff --git a/data/category_tags.json b/data/category_tags.json index 4868a76..4a415bc 100644 --- a/data/category_tags.json +++ b/data/category_tags.json @@ -1,15 +1,56 @@ { "tags": [ - "Action", - "Adventure", - "Arcade", - "Creative", - "Educational", - "Puzzle", - "Simulation", - "Sports", - "Strategy", - "Other", - "Game Jam" + { + "name": "Action", + "description": "These games are active and action-packed, whether they involve running, jumping, shooting, or something else." + }, + { + "name": "Adventure", + "description": "In Adventure games, the player experiences an interactive story driven by exploration and/or puzzle solving." + }, + { + "name": "Arcade", + "description": "Arcade games have easily graspable gameplay and a focus on getting a high score." + }, + { + "name": "Card", + "description": "Games where the gameplay revolves around using cards. Some are accompanied by the 'Gambling' tag." + }, + { + "name": "Creative", + "description": "Games facilitating user made content, from customizing characters to coloring to making your own picture." + }, + { + "name": "Educational", + "description": "Educational games aim to teach something as you play." + }, + { + "name": "Mathematical", + "description": "Games which are generally themed around fields of recreational mathematics." + }, + { + "name": "Puzzle", + "description": "Puzzle games involve brainteasers of all types, from fast-paced matching games to hidden object puzzles." + }, + { + "name": "Simulation", + "description": "This type of a game attempts to simulate something from real life, like a card game or just walking around." + }, + { + "name": "Sports", + "description": "Sports games attempt to replicate the gameplay of physical sports. This also encompasses fictional sports such as Quidditch. A game does not have to perfectly replicate a sport, so long as it is inspired by sports." + }, + { + "name": "Strategy", + "description": "Strategy games come in many forms, but usually involve controlling units in an efficient way to defeat some kind of enemy." + }, + { + "name": "Other", + "description": "If a game doesn't seem to fit anywhere else, it will probably fit in one of these tags." + }, + { + "name": "Game Jam", + "description": "Games created from scratch during a very limited amount of time, usually between 24 to 72 hours, following a provided theme." + } ] } \ No newline at end of file diff --git a/data/extreme_tags.json b/data/extreme_tags.json index cccd185..6547e14 100644 --- a/data/extreme_tags.json +++ b/data/extreme_tags.json @@ -1,6 +1,5 @@ { "tags": [ - "Cub", "Inflation", "Bestiality", "Cannibalism", @@ -9,11 +8,8 @@ "Fisting", "Flatulence", "Frottage", - "Lolicon", "Necrophilia", "Scat", - "Shotacon", - "Toddlercon", "Vomit", "Vore", "Homophobia", @@ -46,7 +42,6 @@ "Sexual Violence", "Sex Toys", "Spanking", - "Teenager", "Tentacles", "Touching", "Tribadism", diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..4c21e36 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +addopts = --ignore=bluezip +asyncio_mode=auto \ No newline at end of file diff --git a/repack.py b/repack.py new file mode 100644 index 0000000..d414c17 --- /dev/null +++ b/repack.py @@ -0,0 +1,61 @@ +import os.path +import subprocess +import tempfile +import zipfile +import random +import string +import asyncio + +import py7zr +from logger import getLogger + +l = getLogger("repack") + + +async def repack(filename: str): + filename = os.path.abspath(filename) + errors: list = [] + repack_folder = os.environ["REPACK_DIR"] + temp_folder = os.path.join(repack_folder, ''.join(random.choices(string.ascii_letters + string.digits, k=10))) + if not os.path.exists(temp_folder): + # If not, create the directory and its parents recursively + os.makedirs(temp_folder, 0o777) + + # unpack file + + # repack with bluezip + l.debug(f"bluezipping '{filename}'...") + + # Spawn a process with arguments + process_args = ["./bluezip.py", os.path.abspath(filename), "-o", temp_folder] + print(process_args) + process = await asyncio.subprocess.create_subprocess_exec("python", *process_args, cwd='bluezip', stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE) + + # Wait for the process to exit and get the output + stdout, stderr = await process.communicate() + + # Get the exit code of the process + exit_code = process.returncode + + if exit_code != 0: + l.error(f"bluezip failed for '{filename}'") + l.error(stderr.decode("utf-8")) + l.error(stdout.decode("utf-8")) + errors.append("Error during bluezip") + return errors, "" + + for r, d, f in os.walk(temp_folder): + os.chmod(r, 0o777) + os.chmod(temp_folder, 0o777) + +# Get the list of files inside the folder + files = os.listdir(temp_folder) + + # Filter out any subdirectories and get the first file + first_file = next(file for file in files if os.path.isfile(os.path.join(temp_folder, file))) + first_file = os.path.join(temp_folder, first_file) + l.debug(first_file) + + return errors, first_file + + diff --git a/requirements.txt b/requirements.txt index 76b3f55..9ec6cf0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,10 +5,12 @@ python-dotenv ruamel.yaml colorlog pytest +pytest-asyncio requests beautifulsoup4 cachetools discord-pretty-help +pydantic fastapi uvicorn diff --git a/test_curations/test_curation_invalid_ruffle.7z b/test_curations/test_curation_invalid_ruffle.7z new file mode 100644 index 0000000..6fa1200 Binary files /dev/null and b/test_curations/test_curation_invalid_ruffle.7z differ diff --git a/test_curations/test_curation_invalid_ruffle.zip b/test_curations/test_curation_invalid_ruffle.zip new file mode 100644 index 0000000..d7b9cc2 Binary files /dev/null and b/test_curations/test_curation_invalid_ruffle.zip differ diff --git a/test_curations/test_curation_old_platforms.7z b/test_curations/test_curation_old_platforms.7z new file mode 100644 index 0000000..acbbf97 Binary files /dev/null and b/test_curations/test_curation_old_platforms.7z differ diff --git a/test_curations/test_curation_primary_platform.7z b/test_curations/test_curation_primary_platform.7z new file mode 100644 index 0000000..df0523b Binary files /dev/null and b/test_curations/test_curation_primary_platform.7z differ diff --git a/test_curations/test_curation_primary_platform.zip b/test_curations/test_curation_primary_platform.zip new file mode 100644 index 0000000..75d54d8 Binary files /dev/null and b/test_curations/test_curation_primary_platform.zip differ diff --git a/test_curations/test_curation_valid_addapps.7z b/test_curations/test_curation_valid_addapps.7z new file mode 100644 index 0000000..2d91b59 Binary files /dev/null and b/test_curations/test_curation_valid_addapps.7z differ diff --git a/test_curations/test_curation_valid_addapps.zip b/test_curations/test_curation_valid_addapps.zip new file mode 100644 index 0000000..eae6822 Binary files /dev/null and b/test_curations/test_curation_valid_addapps.zip differ diff --git a/validator-server.py b/validator-server.py index a5b1a26..4bb1de8 100644 --- a/validator-server.py +++ b/validator-server.py @@ -1,11 +1,18 @@ +from http.client import HTTPException +import json import pathlib import tempfile import traceback +import os -from fastapi import FastAPI, File, UploadFile, Response, status +from pydantic import ValidationError +from repack import repack + +from fastapi import FastAPI, File, UploadFile, Response, status, Form +from typing import Annotated import shutil -from curation_validator import validate_curation +from curation_validator import EditCurationMeta, update_meta, validate_curation, get_tag_list_wiki, get_tag_list_file from logger import getLogger l = getLogger("api") @@ -13,16 +20,20 @@ app = FastAPI() -@app.post("/upload/") +@app.post("/upload") async def create_upload_file(response: Response, file: UploadFile = File(...)): l.debug(f"received file '{file.filename}'") + base_path = tempfile.mkdtemp(prefix="curation_validator_") new_filepath = base_path + "/file" + pathlib.Path(file.filename).suffix + with open(new_filepath, "wb") as dest: l.debug(f"copying file '{file.filename}' into '{new_filepath}'.") shutil.copyfileobj(file.file, dest) try: - curation_errors, curation_warnings, is_extreme, curation_type, meta, image_dict = validate_curation(new_filepath) + curation_errors, curation_warnings, is_extreme, curation_type, meta, image_dict = validate_curation( + new_filepath) + except Exception as e: response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR return { @@ -45,3 +56,107 @@ async def create_upload_file(response: Response, file: UploadFile = File(...)): "meta": meta, "images": image_dict } + + +# just hand over absolute path to the file instead of uploading it, saves some unnecessary copying ay? +@app.post("/provide-path") +async def provide_file(response: Response, path: str): + try: + l.debug(f"validating provided file '{path}'") + curation_errors, curation_warnings, is_extreme, curation_type, meta, image_dict = validate_curation(path) + + except Exception as e: + response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR + return { + "exception": "".join( + traceback.format_exception( + etype=type(e), value=e, tb=e.__traceback__ + ) + ) + } + + return { + "filename": pathlib.Path(path).name, + "path": path, + "curation_errors": curation_errors, + "curation_warnings": curation_warnings, + "is_extreme": is_extreme, + "curation_type": curation_type, + "meta": meta, + "images": image_dict + } + +@app.post("/edit-meta") +async def edit_meta(response: Response, path: str, metadata: Annotated[str | None, Form()] = None, logo: Annotated[UploadFile | None, File()] = None, screenshot: Annotated[UploadFile | None, File()] = None): + try: + l.debug(f"editing meta of provided file '{path}'") + if metadata: + try: + print(metadata) + # Parse the JSON string into the Pydantic model + metadata_dict = json.loads(metadata) + metadata = EditCurationMeta(**metadata_dict) + except (json.JSONDecodeError, ValidationError) as e: + raise HTTPException(status_code=400, detail=f"Invalid metadata JSON: {str(e)}") + curation_errors, curation_warnings, filename = update_meta(path, metadata, logo, screenshot) + + except Exception as e: + response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR + return { + "exception": "".join( + traceback.format_exception( + etype=type(e), value=e, tb=e.__traceback__ + ) + ) + } + + return { + "filename": os.path.basename(filename), + "path": filename, + "curation_errors": curation_errors, + "curation_warnings": curation_warnings, + } + +# TODO this does not return all valid tags because the wiki page sucks +@app.get("/tags") +async def get_wiki_tags(): + return {"tags": get_tag_list_file() + get_tag_list_wiki()} + +@app.post("/pack-path") +async def pack_path(response: Response, path: str): + try: + l.debug(f"validating provided file before import '{path}'") + curation_errors, curation_warnings, is_extreme, curation_type, meta, image_dict = validate_curation(path) + + except Exception as e: + response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR + return { + "exception": "".join( + traceback.format_exception( + etype=type(e), value=e, tb=e.__traceback__ + ) + ) + } + + try: + l.debug(f"packing '{path}'") + errors, output_file = await repack(path) + if len(errors) > 0: + return { + "error": "error repacking curation" + } + else: + return { + "path": output_file, + "meta": meta, + "images": image_dict + } + except Exception as e: + response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR + return { + "exception": "".join( + traceback.format_exception( + etype=type(e), value=e, tb=e.__traceback__ + ) + ) + } \ No newline at end of file