From c400a14cfee9d1b6ff6a680cd7c3cce6c2d10bf6 Mon Sep 17 00:00:00 2001 From: baxtree Date: Mon, 10 Jul 2023 18:36:06 +0100 Subject: [PATCH] improve setup and deprecate dependiencies --- .github/workflows/ci-pipeline.yml | 3 +- Makefile | 8 +---- Pipfile | 8 ++--- README.md | 2 +- docker/Dockerfile-Debian11 | 4 +++ pyproject.toml | 2 ++ ...ents-aarch64.txt => requirements-arm64.txt | 4 --- requirements.txt | 9 ++--- setup.py | 35 ++++++++++++++----- subaligner/__init__.py | 5 +++ subaligner/utils.py | 6 ++-- tests/subaligner/test_utils.py | 4 +-- 12 files changed, 50 insertions(+), 40 deletions(-) create mode 100644 pyproject.toml rename requirements-aarch64.txt => requirements-arm64.txt (93%) diff --git a/.github/workflows/ci-pipeline.yml b/.github/workflows/ci-pipeline.yml index 65bfb67..147e736 100644 --- a/.github/workflows/ci-pipeline.yml +++ b/.github/workflows/ci-pipeline.yml @@ -1,4 +1,4 @@ -name: ci pipeline +name: CI Pipeline on: push: @@ -28,6 +28,7 @@ jobs: sudo apt-get -y install espeak libespeak1 libespeak-dev espeak-data sudo apt-get -y install libsndfile-dev python -m pip install --upgrade pip + python -m pip install --upgrade setuptools wheel cat requirements.txt | xargs -L 1 pip install cat requirements-stretch.txt | xargs -L 1 pip install cat requirements-llm.txt | xargs -L 1 pip install diff --git a/Makefile b/Makefile index 8c6a7a6..2463e21 100644 --- a/Makefile +++ b/Makefile @@ -10,12 +10,6 @@ else PLATFORM := linux-x86_64-cp-38-cp38 endif -ifdef PYTHON_TAG -PYTHON_TAG := $(PYTHON_TAG) -else -PYTHON_TAG := py38 -endif - SUBALIGNER_VERSION := $(SUBALIGNER_VERSION) TRIGGER_URL := ${TRIGGER_URL} @@ -159,7 +153,7 @@ test-dist: dist: clean-dist test-dist cat requirements-dev.txt | xargs -L 1 .$(PYTHON)/bin/pip install; \ - .$(PYTHON)/bin/python setup.py sdist bdist_wheel --python-tag=$(PYTHON_TAG) + .$(PYTHON)/bin/python setup.py sdist bdist_wheel release: .$(PYTHON)/bin/twine upload dist/* diff --git a/Pipfile b/Pipfile index 1c37619..1a4f1a7 100644 --- a/Pipfile +++ b/Pipfile @@ -29,11 +29,10 @@ bleach = "==3.3.0" cachetools = "==3.1.1" captionstransformer = "~=1.2.1" certifi = "==2019.11.28" -chardet = "==3.0.4" +chardet = "~=3.0.4" click = "==5.1" cloudpickle = "==0.5.3" cycler = "==0.10.0" -Cython = "~=0.29.22" dask = ">=2021.10.0,<2022.1.0" decorator = "==4.3.0" distributed = "==1.13.0" @@ -49,8 +48,6 @@ isort = "==4.3.4" joblib = ">=1.2.0" Keras-Applications = ">=1.0.8" Keras-Preprocessing = ">=1.0.9" -kiwisolver = "==1.0.1" -lazy-object-proxy = "==1.4.3" le-pycaption = "==2.2.0a1" librosa = "<0.10.0" locket = "==0.2.0" @@ -61,8 +58,8 @@ numpy = "<1.24.0" oauthlib = "==3.1.0" openai-whisper = "==20230314" pbr = "==4.0.2" +pkgconfig = "~=1.5.5" pluggy = "==0.13.1" -psutil = "==5.6.7" py = "==1.10.0" pyasn1 = "==0.4.8" pyasn1-modules = "==0.2.7" @@ -83,7 +80,6 @@ rsa = "==4.7" scipy = "<=1.8.1" scikit-learn = ">=0.19.1" sentencepiece = "~=0.1.95" -setuptools = ">=41.0.0" six = "~=1.15.0" tblib = "==1.3.2" tensorflow = ">=1.15.5,<2.12" diff --git a/README.md b/README.md index 71d9207..c34e715 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ $ brew install ffmpeg ## Basic Installation ``` -$ pip install -U pip && pip install -U setuptools +$ pip install -U pip && pip install -U setuptools wheel $ pip install subaligner ``` or install from source: diff --git a/docker/Dockerfile-Debian11 b/docker/Dockerfile-Debian11 index 1d91f42..475ff82 100644 --- a/docker/Dockerfile-Debian11 +++ b/docker/Dockerfile-Debian11 @@ -11,9 +11,13 @@ RUN ["/bin/bash", "-c", "apt -y update &&\ apt -y install ffmpeg &&\ apt -y install espeak libespeak1 libespeak-dev espeak-data &&\ apt -y install libsndfile-dev &&\ + apt -y install libblas-dev liblapack-dev &&\ apt -y install python3-dev &&\ apt -y install python3-tk &&\ apt -y install python3-pip &&\ + apt -y install python3-venv &&\ + python3 -m venv .venv &&\ + source .venv/bin/activate &&\ python3 -m pip install --upgrade pip &&\ python3 -m pip install \"subaligner==${RELEASE_VERSION}\" &&\ python3 -m pip install \"subaligner[harmony]==${RELEASE_VERSION}\""] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..eb56957 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[build-system] +requires = ["setuptools", "wheel", "Cython"] \ No newline at end of file diff --git a/requirements-aarch64.txt b/requirements-arm64.txt similarity index 93% rename from requirements-aarch64.txt rename to requirements-arm64.txt index 71e5d49..92ff604 100644 --- a/requirements-aarch64.txt +++ b/requirements-arm64.txt @@ -3,13 +3,11 @@ beautifulsoup4<4.9.0 bleach==3.3.0 cachetools==3.1.1 captionstransformer~=1.2.1 -cchardet==2.1.7 certifi==2019.11.28 chardet==3.0.4 click==5.1 cloudpickle~=1.6.0 cycler==0.10.0 -Cython~=0.29.22 dask>=2021.10.0,<2022.1.0 decorator==4.3.0 distributed==1.13.0 @@ -25,8 +23,6 @@ idna==2.8 isort==4.3.4 joblib>=1.2.0 keras~=2.12.0 -kiwisolver==1.0.1 -lazy-object-proxy==1.4.3 le-pycaption==2.2.0a1 librosa<0.10.0 locket==0.2.0 diff --git a/requirements.txt b/requirements.txt index 01611cd..167d439 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,13 +3,11 @@ beautifulsoup4<4.9.0 bleach==3.3.0 cachetools==3.1.1 captionstransformer~=1.2.1 -cchardet==2.1.7 certifi==2019.11.28 -chardet==3.0.4 +chardet~=3.0.4 click==5.1 cloudpickle~=1.6.0 cycler==0.10.0 -Cython~=0.29.22 dask>=2021.10.0,<2022.1.0 decorator==4.3.0 distributed==1.13.0 @@ -25,8 +23,6 @@ isort==4.3.4 joblib>=1.2.0 Keras-Applications>=1.0.8 Keras-Preprocessing>=1.0.9 -kiwisolver==1.0.1 -lazy-object-proxy==1.4.3 le-pycaption==2.2.0a1 librosa<0.10.0 locket==0.2.0 @@ -37,8 +33,8 @@ numba>=0.50.0 numpy<1.24.0 oauthlib==3.1.0 pbr==4.0.2 +pkgconfig~=1.5.5 pluggy==0.13.1 -psutil==5.6.7 py==1.10.0 pyasn1==0.4.8 pyasn1-modules==0.2.7 @@ -55,7 +51,6 @@ PyYAML>=4.2b1 rsa==4.7 scipy<=1.8.1 scikit-learn<1.2.0 -setuptools>=41.0.0 six~=1.15.0 tblib==1.3.2 tensorflow>=1.15.5,<2.13 diff --git a/setup.py b/setup.py index 3b6545d..b84e1a4 100644 --- a/setup.py +++ b/setup.py @@ -2,9 +2,10 @@ # -*- coding: utf-8 -*- import os -import platform - +import sys +from platform import architecture, machine from setuptools import setup +from wheel.bdist_wheel import bdist_wheel with open(os.path.join(os.getcwd(), "subaligner", "_version.py")) as f: exec(f.read()) @@ -12,16 +13,13 @@ with open("README.md") as readme_file: readme = readme_file.read() -if platform.machine() == "arm64": - with open("requirements-aarch64.txt") as requirements_file: +if machine() == "arm64": + with open("requirements-arm64.txt") as requirements_file: requirements = requirements_file.read().splitlines()[::-1] else: with open("requirements.txt") as requirements_file: requirements = requirements_file.read().splitlines()[::-1] -with open("requirements.txt") as requirements_file: - requirements = requirements_file.read().splitlines()[::-1] - with open("requirements-stretch.txt") as stretch_requirements_file: stretch_requirements = stretch_requirements_file.read().splitlines()[::-1] @@ -43,6 +41,24 @@ "llm": llm_requirements, } +architecture = architecture()[0] if sys.platform == "win32" else machine() + + +class bdist_wheel_local(bdist_wheel): + + def get_tag(self): + python = f"py{sys.version_info.major}{sys.version_info.minor}" + if sys.platform == "darwin" and architecture == "arm64": + os_arch = "macosx_11_0_arm64" + elif sys.platform == "win32": + os_arch = "win32" if architecture == "32bit" else "win_amd64" + # elif sys.platform == "linux": + # os_arch = f"manylinux_2_17_{architecture}" + else: + os_arch = "any" + return python, "none", os_arch + + setup(name="subaligner", version=__version__, author="Xi Bai", @@ -58,7 +74,7 @@ url="https://subaligner.readthedocs.io/en/latest/", description="Automatically synchronize and translate subtitles, or create new ones by transcribing, using pre-trained DNNs, Forced Alignments and Transformers.", long_description=readme + "\n\n", - long_description_content_type='text/markdown', + long_description_content_type="text/markdown", python_requires=">=3.8", wheel=True, package_dir={"subaligner": "subaligner"}, @@ -102,4 +118,5 @@ "subaligner_train=subaligner.subaligner_train.__main__:main", "subaligner_tune=subaligner.subaligner_tune.__main__:main", ] - }) + }, + cmdclass={"bdist_wheel": bdist_wheel_local}) diff --git a/subaligner/__init__.py b/subaligner/__init__.py index ae79fcb..70d02fe 100644 --- a/subaligner/__init__.py +++ b/subaligner/__init__.py @@ -1,5 +1,6 @@ import os import warnings +import logging import multiprocessing as mp from ._version import __version__ @@ -10,3 +11,7 @@ mp.set_start_method("spawn", force=True) os.environ["KMP_WARNINGS"] = "0" + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" +os.environ["TF_CPP_MIN_VLOG_LEVEL"] = "0" +logging.getLogger("tensorflow").disabled = True diff --git a/subaligner/utils.py b/subaligner/utils.py index fa50d0f..1a771dd 100644 --- a/subaligner/utils.py +++ b/subaligner/utils.py @@ -3,7 +3,7 @@ import pysubs2 import requests import shutil -import cchardet +import chardet import shlex import pycountry @@ -595,9 +595,9 @@ def detect_encoding(subtitle_file_path: str) -> str: # and hence this less memory-efficient solution: raw = b"".join(file.readlines()) - detected = cchardet.detect(raw) + detected = chardet.detect(raw) detected = detected or {} - return detected["encoding"] if "encoding" in detected else None + return detected["encoding"] if "encoding" in detected and detected["encoding"] is not None else "utf-8" @staticmethod def get_file_root_and_extension(file_path: str) -> Tuple[str, str]: diff --git a/tests/subaligner/test_utils.py b/tests/subaligner/test_utils.py index da6689c..b4f9cbd 100644 --- a/tests/subaligner/test_utils.py +++ b/tests/subaligner/test_utils.py @@ -281,8 +281,8 @@ def test_contains_embedded_subtitle(self): self.assertFalse(Undertest.contains_embedded_subtitles(self.mp4_file_path)) def test_detect_encoding(self): - self.assertEqual("ASCII", Undertest.detect_encoding(self.real_srt_path)) - self.assertEqual("UTF-8", Undertest.detect_encoding(self.mkv_file_path)) + self.assertEqual("ascii", Undertest.detect_encoding(self.real_srt_path)) + self.assertEqual("utf-8", Undertest.detect_encoding(self.mkv_file_path)) def test_get_file_root_and_extension(self): root, extension = Undertest.get_file_root_and_extension("/path/to/root.ext1.ext2")