Skip to content

Commit

Permalink
prepare for the new release (#69)
Browse files Browse the repository at this point in the history
* add the manual shift mode for the subaligner cli

* support alignment on a single video and multiple subtitles
  • Loading branch information
baxtree authored Mar 10, 2022
1 parent 1a8b6a1 commit 069a8cf
Show file tree
Hide file tree
Showing 17 changed files with 282 additions and 194 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/ci-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7]
python-version: [3.8]

steps:
- uses: actions/checkout@v2
Expand All @@ -31,6 +31,9 @@ jobs:
cat requirements-dev.txt | xargs -L 1 pip install
cat requirements-site.txt | xargs -L 1 pip install
pip install -e . --ignore-installed
- name: Type checking
run: |
python -m mypy --follow-imports=skip subaligner
- name: Linting
run: |
pycodestyle subaligner tests examples misc bin/subaligner bin/subaligner_1pass bin/subaligner_2pass bin/subaligner_batch bin/subaligner_convert bin/subaligner_train bin/subaligner_tune setup.py --ignore=E203,E501,W503 --exclude="subaligner/lib"
Expand Down
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ click = "==5.1"
cloudpickle = "==0.5.3"
cycler = "==0.10.0"
Cython = "~=0.29.22"
dask = "==0.15.0"
dask = ">=2021.10.0"
decorator = "==4.3.0"
distributed = "==1.13.0"
filelock = "==3.0.12"
Expand Down
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,12 @@ $ subaligner -m script -v test.mp4 -s subtitle.txt -o subtitle_aligned.srt
$ subaligner -m script -v https://example.com/video.mp4 -s https://example.com/subtitle.txt -o subtitle_aligned.srt
```
```
# Alignment on multiple subtitles against the single media file
$ subaligner -m script -v test.mp4 -s subtitle_lang_1.txt -s subtitle_lang_2.txt
$ subaligner -m script -v test.mp4 -s subtitle_lang_1.txt subtitle_lang_2.txt
```
```
# Translative alignment with the ISO 639-3 language code pair (src,tgt)
$ subaligner_1pass --languages
Expand All @@ -135,6 +141,12 @@ $ subaligner -m dual -v video.mp4 -s subtitle.srt -t src,tgt
$ subaligner -m script -v test.mp4 -s subtitle.txt -o subtitle_aligned.srt -t src,tgt
```
```
# Shift subtitle manually by offset in seconds
$ subaligner -m shift --subtitle_path subtitle.srt -os 5.5
$ subaligner -m shift --subtitle_path subtitle.srt -os -5.5 -o subtitle_shifted.srt
```
```
# Run batch alignment against directories
$ subaligner_batch -m single -vd videos/ -sd subtitles/ -od aligned_subtitles/
Expand Down
8 changes: 8 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Global options:

[mypy]
ignore_missing_imports = True
no_implicit_optional = True
allow_redefinition = True

# Per-module options:
4 changes: 3 additions & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ line-profiler==3.1.0
scikit-build==0.11.1
radish-bdd~=0.13.3
pex==2.1.34
mypy==0.910
mypy==0.931
types-requests==2.27.9
types-setuptools==57.4.9
parameterized==0.8.1
pylint~=2.8.2
pygments==2.7.4
10 changes: 10 additions & 0 deletions site/source/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ Make sure you have got the virtual environment activated upfront.
(.venv) $ subaligner -m script -v test.mp4 -s subtitle.txt -o subtitle_aligned.srt
(.venv) $ subaligner -m script -v https://example.com/video.mp4 -s https://example.com/subtitle.txt -o subtitle_aligned.srt

**Alignment on multiple subtitles against the single media file**::

(.venv) $ subaligner -m script -v test.mp4 -s subtitle_lang_1.txt -s subtitle_lang_2.txt
(.venv) $ subaligner -m script -v test.mp4 -s subtitle_lang_1.txt subtitle_lang_2.txt

**Translative alignment with the ISO 639-3 language code pair (src,tgt)**::

(.venv) $ subaligner_1pass --languages
Expand All @@ -42,6 +47,11 @@ Make sure you have got the virtual environment activated upfront.
(.venv) $ subaligner -m dual -v video.mp4 -s subtitle.srt -t src,tgt
(.venv) $ subaligner -m script -v test.mp4 -s subtitle.txt -o subtitle_aligned.srt -t src,tgt

**Shift subtitle manually by offset in seconds**::

(.venv) $ subaligner -m shift --subtitle_path subtitle.srt -os 5.5
(.venv) $ subaligner -m shift --subtitle_path subtitle.srt -os -5.5 -o subtitle_shifted.srt

**Run batch alignment against directories**::

(.venv) $ subaligner_batch -m single -vd videos/ -sd subtitles/ -od aligned_subtitles/
Expand Down
310 changes: 170 additions & 140 deletions subaligner/__main__.py

Large diffs are not rendered by default.

29 changes: 0 additions & 29 deletions subaligner/lib/to_srt.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,32 +598,3 @@ def parseChildTree(element_list):

def __iter__(self):
return iter(self.subs)

if __name__ == '__main__':

from optparse import OptionParser
import sys

parser = OptionParser(usage = 'usage: %prog [options] input output')
parser.set_defaults(reader_class=STL)
parser.add_option('-d', '--debug', dest='debug_level', action='store_const', const=logging.DEBUG, default=logging.ERROR)
parser.add_option('-r', '--rich', dest='rich_formatting', action='store_true', default=False, help='Output text with some formatting, the following HTML tags are used: b i u font(color)')
parser.add_option("-s", "--stl", dest="reader_class", action="store_const", const=STL,
help="Set input file format as STL (default)")
parser.add_option("-t", "--tt", dest="reader_class", action="store_const", const=TT,
help="Set input file format as TT, handles both the EBU and SMPTE variants")

(options, args) = parser.parse_args()

if len(args) != 2:
parser.print_help()
sys.exit(1)

logging.basicConfig(level=options.debug_level)

input = options.reader_class(args[0], options.rich_formatting)
c = SRT(args[1])
for sub in input:
(tci, tco, txt) = sub
c.write(tci, tco, txt)
c.file.close()
2 changes: 1 addition & 1 deletion subaligner/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
absl_logging._warn_preinit_stderr = 0


class Logger(Singleton):
class Logger(metaclass=Singleton):
"""Common logging."""

VERBOSE = False
Expand Down
12 changes: 6 additions & 6 deletions subaligner/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import logging
import numpy as np
import multiprocessing as mp
from typing import Tuple, List, Optional, Dict, Any
from typing import Tuple, List, Optional, Dict, Any, Iterable
from pysrt import SubRipTime, SubRipItem, SubRipFile
from sklearn.metrics import log_loss
from copy import deepcopy
Expand All @@ -23,7 +23,7 @@
from .logger import Logger


class Predictor(Singleton):
class Predictor(metaclass=Singleton):
""" Predictor for working out the time to shift subtitles
"""
__MAX_SHIFT_IN_SECS = (
Expand Down Expand Up @@ -456,8 +456,8 @@ def _predict_in_multithreads(
os.remove(segment_path)

@staticmethod
def __minibatch(total, batch_size):
batch = []
def __minibatch(total: int, batch_size: int) -> Iterable[List[int]]:
batch: List = []
for i in range(total):
if len(batch) == batch_size:
yield batch
Expand Down Expand Up @@ -708,8 +708,8 @@ def __predict(
subtitles: Optional[SubRipFile] = None,
max_shift_secs: Optional[float] = None,
previous_gap: Optional[float] = None,
lock: threading.RLock = None,
network: Network = None
lock: Optional[threading.RLock] = None,
network: Optional[Network] = None
) -> Tuple[List[SubRipItem], str, "np.ndarray[float]"]:
"""Shift out-of-sync subtitle cues by sending the audio track of an video to the trained network.
Expand Down
8 changes: 2 additions & 6 deletions subaligner/singleton.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
from typing import Dict, Any


class _Singleton(type): # type: ignore
class Singleton(type): # type: ignore
""" A metaclass that creates a Singleton base class when called. """

_instances: Dict[Any, Any] = {}

def __call__(cls, *args, **kwargs) -> Any:
if cls not in cls._instances:
cls._instances[cls] = super(_Singleton, cls).__call__(
cls._instances[cls] = super(Singleton, cls).__call__(
*args, **kwargs
)
return cls._instances[cls]


class Singleton(_Singleton("SingletonMeta", (object,), {})): # type: ignore
pass
2 changes: 1 addition & 1 deletion subaligner/subaligner_batch/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[-sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
[-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-od OUTPUT_DIRECTORY] [-t TRANSLATE] [-lgs] [-d] [-q] [-ver]
Batch align multiple subtitle files and audiovisual files (v0.1.4)
Batch align multiple subtitle files and audiovisual files
Subtitle files and their companion audiovisual files need to be stored in two separate directories.
Each file pair needs to share the same base filename, the part before the extension.
Expand Down
10 changes: 5 additions & 5 deletions subaligner/subtitle.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,10 @@ def shift_subtitle(
string -- The path to the shifted subtitle file.
"""
_, file_extension = os.path.splitext(subtitle_file_path)
if shifted_subtitle_file_path is None:
shifted_subtitle_file_path = subtitle_file_path.replace(
file_extension, "{}{}".format(suffix, file_extension)
)
if file_extension.lower() in cls.TTML_EXTENSIONS:
subs = cls(cls.__secret, subtitle_file_path, "ttml").subs
subs.shift(seconds=seconds)
Expand All @@ -333,10 +337,6 @@ def shift_subtitle(
for index, cue in enumerate(cues):
cue.attrib["begin"] = str(subs[index].start).replace(",", ".")
cue.attrib["end"] = str(subs[index].end).replace(",", ".")
if shifted_subtitle_file_path is None:
shifted_subtitle_file_path = subtitle_file_path.replace(
file_extension, "{}{}".format(suffix, file_extension)
)
encoding = Utils.detect_encoding(subtitle_file_path)
tree.write(shifted_subtitle_file_path, encoding=encoding)
elif file_extension.lower() in cls.STL_EXTENSIONS:
Expand Down Expand Up @@ -741,7 +741,7 @@ def __save_subtitle_by_extension(file_extension: str,
# Change single quotes in the XML header to double quotes
with open(target_file_path, "w", encoding=encoding) as target:
if "xml_declaration" in inspect.getfullargspec(ElementTree.tostring).kwonlyargs: # for >= python 3.8
encoded = ElementTree.tostring(tt, encoding=encoding, method="xml", xml_declaration=True)
encoded = ElementTree.tostring(tt, encoding=encoding, method="xml", xml_declaration=True) # type: ignore
else:
encoded = ElementTree.tostring(tt, encoding=encoding, method="xml")
normalised = encoded.decode(encoding) \
Expand Down
2 changes: 1 addition & 1 deletion subaligner/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from .logger import Logger


class Translator(Singleton):
class Translator(metaclass=Singleton):
"""Translate subtitles.
"""

Expand Down
24 changes: 23 additions & 1 deletion tests/integration/feature/subaligner.feature
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ Feature: Subaligner CLI
| subaligner_2pass | <NULL> |
| subaligner | dual |

@quality-management
@quality-control
Scenario Outline: Test exit when alignment log loss is too high
Given I have a video file "test.mp4"
And I have a subtitle file "test.srt"
Expand Down Expand Up @@ -302,3 +302,25 @@ Feature: Subaligner CLI
| subaligner_1pass |
| subaligner_2pass |
| subaligner |

@manual_shift
Scenario Outline: Shift the subtitle by offset in seconds
Given I have a subtitle file <subtitle-in>
When I run the manual shift with offset of <offset> in seconds
Then a new subtitle file <subtitle-out> is generated
Examples:
| subtitle-in | subtitle-out | offset |
| "test.srt" | "test_shifted.srt" | 1.1 |
| "test.ttml" | "test_shifted.ttml" | 2.2 |
| "test.xml" | "test_shifted.xml" | 3 |
| "test.dfxp" | "test_shifted.dfxp" | 4.25 |
| "test.vtt" | "test_shifted.vtt" | +0 |
| "test.sami" | "test_shifted.sami" | 0 |
| "test.ssa" | "test_shifted.ssa" | -0 |
| "test.ass" | "test_shifted.ass" | -1.1 |
| "test.sub" | "test_shifted.sub" | -2.2 |
| "test.tmp" | "test_shifted.tmp" | -3 |
| "test.smi" | "test_shifted.smi" | -4.25 |
| "test.scc" | "test_shifted.scc" | 1.1 |
| "test.sbv" | "test_shifted.sbv" | 2.2 |
| "test.ytt" | "test_shifted.ytt" | 3 |
34 changes: 34 additions & 0 deletions tests/integration/radish/step.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,29 @@ def subtitle_file(step, file_name):
step.context.subtitle_path_or_selector = os.path.join(PWD, "..", "..", "subaligner", "resource", file_name).replace("[]", " ")


@given('I have a list of subtitle files "{file_names:S}"')
def subtitle_file_list(step, file_names):
step.context.subtitle_path_or_selector = [os.path.join(PWD, "..", "..", "subaligner", "resource", file_name).replace("[]", " ") for file_name in file_names.split(",")]


@when('I run the alignment with subaligner on all of them')
def run_subaligner_on_multi_subtitles(step):
process = subprocess.Popen([
os.path.join(PWD, "..", "..", "..", "bin", "subaligner"),
"-m", "single",
"-v", step.context.video_file_path,
"-q"] + [["-s", path] for path in step.context.subtitle_path_or_selector], shell=False)
step.context.exit_code = process.wait(timeout=WAIT_TIMEOUT_IN_SECONDS)


@then('a list of subtitle files "{file_names:S}" are generated')
def expect_result_list(step, file_names):
for file_name in file_names.split(","):
output_file_path = os.path.join(step.context.aligning_output, file_name)
assert os.path.isfile(output_file_path) is True
assert step.context.exit_code == 0


@given('I have selector "{selector:S}" for the embedded subtitle')
def subtitle_selector(step, selector):
step.context.subtitle_path_or_selector = selector
Expand All @@ -49,6 +72,17 @@ def run_subaligner(step, aligner, mode):
step.context.exit_code = process.wait(timeout=WAIT_TIMEOUT_IN_SECONDS)


@when("I run the manual shift with offset of {offset_seconds:g} in seconds")
def run_subaligner_manual_shift(step, offset_seconds):
process = subprocess.Popen([
os.path.join(PWD, "..", "..", "..", "bin", "subaligner"),
"-m", "shift",
"-s", step.context.subtitle_path_or_selector,
"-os", str(offset_seconds),
"-q"], shell=False)
step.context.exit_code = process.wait(timeout=WAIT_TIMEOUT_IN_SECONDS)


@when("I run the alignment with {aligner:S} on them with {mode:S} stage and {language_pair:S} for translation")
def run_subaligner_with_translation(step, aligner, mode, language_pair):
if mode == "<NULL>":
Expand Down
2 changes: 1 addition & 1 deletion tests/subaligner/test_singleton.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

class SingletonTests(unittest.TestCase):
def test_singleton(self):
class Single(Singleton):
class Single(metaclass=Singleton):
pass

a = Single()
Expand Down

0 comments on commit 069a8cf

Please sign in to comment.