Skip to content

Commit

Permalink
preprocess overlapping subtitle segments before aligning
Browse files Browse the repository at this point in the history
  • Loading branch information
baxtree committed Oct 4, 2020
1 parent 6b0f4a3 commit 5473413
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 27 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ or
```
brew install ffmpeg espeak
```

## Installation
```
# Install from PyPI (pre-emptive NumPy)
Expand Down Expand Up @@ -92,6 +91,7 @@ $ docker run -v `pwd`:`pwd` -w `pwd` -it baxtree/subaligner subaligner_2pass -v
```
The aligned subtitle will be saved at `subtitle_aligned.srt`. For details on CLI, run `subaligner_1pass --help`, `subaligner_2pass --help` or `subaligner --help`.

![](figures/screencast.gif)
## Supported Formats
Subtitle: SubRip, TTML, WebVTT, (Advanced) SubStation Alpha, MicroDVD, MPL2 and TMP

Expand Down
2 changes: 1 addition & 1 deletion subaligner/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def get_logger(self, name):
if Logger.QUIET:
logger.setLevel(logging.ERROR)
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(threadName)-9s - %(message)s"
"%(name)s - %(levelname)s - %(threadName)-9s - %(message)s"
)

file_handler = logging.FileHandler(self.__output_log, "w+")
Expand Down
50 changes: 31 additions & 19 deletions subaligner/media_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import atexit
import signal

from copy import deepcopy
from pysrt import SubRipFile
from decimal import Decimal
from .embedder import FeatureEmbedder
Expand Down Expand Up @@ -259,46 +260,45 @@ def get_audio_segment_starts_and_ends(subs):
tuple -- A list of start times, a list of end times and a list of grouped SubRip files.
"""

local_subs = MediaHelper.__preprocess_subs(subs)

segment_starts = []
segment_ends = []
combined = []
new_subs = []
current_start = str(subs[0].start)
for i in range(len(subs)):
# Ignore subsequent overlapped subtitles
# (But if this means the subtitle is malformed, an exception should be raised.)
if i != 0 and subs[i].start < subs[i - 1].end:
continue
if i == len(subs) - 1:
combined.append(subs[i])
current_start = str(local_subs[0].start)

for i in range(len(local_subs)):
if i == len(local_subs) - 1:
combined.append(local_subs[i])
segment_starts.append(current_start)
segment_ends.append(str(subs[i].end))
segment_ends.append(str(local_subs[i].end))
new_subs.append(SubRipFile(combined))
del combined[:]
else:
# Do not segment when the subtitle is too short
duration = FeatureEmbedder.time_to_sec(
subs[i].end
) - FeatureEmbedder.time_to_sec(subs[i].start)
local_subs[i].end
) - FeatureEmbedder.time_to_sec(local_subs[i].start)
if duration < MediaHelper.__MIN_SECS_PER_WORD:
combined.append(subs[i])
combined.append(local_subs[i])
continue
# Do not segment consecutive subtitles having little or no gap.
gap = FeatureEmbedder.time_to_sec(
subs[i + 1].start
) - FeatureEmbedder.time_to_sec(subs[i].end)
local_subs[i + 1].start
) - FeatureEmbedder.time_to_sec(local_subs[i].end)
if (
subs[i].end == subs[i + 1].start
local_subs[i].end == local_subs[i + 1].start
or gap < MediaHelper.__MIN_GAP_IN_SECS
):
combined.append(subs[i])
combined.append(local_subs[i])
continue
combined.append(subs[i])
combined.append(local_subs[i])
# The start time is set to last cue's end time
segment_starts.append(current_start)
# The end time cannot be set to next cue's start time due to possible overlay
segment_ends.append(str(subs[i].end))
current_start = str(subs[i].end)
segment_ends.append(str(local_subs[i].end))
current_start = str(local_subs[i].end)
new_subs.append(SubRipFile(combined))
del combined[:]
return segment_starts, segment_ends, new_subs
Expand Down Expand Up @@ -366,3 +366,15 @@ def get_frame_rate(file_path):
process.kill()
proc.kill()
os.system("stty sane")

@staticmethod
def __preprocess_subs(subs):
local_subs = deepcopy(subs)

# Preprocess overlapping subtitles
for i in range(len(local_subs)):
if i != 0 and local_subs[i].start < local_subs[i - 1].end:
MediaHelper.__LOGGER.warning("Found overlapping subtitle cues and the earlier one's duration will be shortened.")
local_subs[i - 1].end = local_subs[i].start

return local_subs
7 changes: 4 additions & 3 deletions subaligner/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import threading
import concurrent.futures
import gc
import math
import numpy as np
import multiprocessing as mp

Expand Down Expand Up @@ -80,7 +81,7 @@ def predict_single_pass(
self.__feature_embedder.step_sample = 1 / frame_rate
self.__on_frame_timecodes(subs)
except NoFrameRateException:
Predictor.__LOGGER.warn("Cannot find frame rate for %s" % video_file_path)
Predictor.__LOGGER.warning("Cannot find frame rate for %s" % video_file_path)
return subs, audio_file_path, voice_probabilities, frame_rate
finally:
if os.path.exists(audio_file_path):
Expand Down Expand Up @@ -123,7 +124,7 @@ def predict_dual_pass(
self.__feature_embedder.step_sample = 1 / frame_rate
self.__on_frame_timecodes(new_subs)
except NoFrameRateException:
Predictor.__LOGGER.warn("Cannot find frame rate for %s" % video_file_path)
Predictor.__LOGGER.warning("Cannot find frame rate for %s" % video_file_path)
Predictor.__LOGGER.debug("Aligned segments generated")
return new_subs, subs, voice_probabilities, frame_rate
finally:
Expand Down Expand Up @@ -418,7 +419,7 @@ def __predict_2nd_pass(self, audio_file_path, subs, weights_file_path, stretch,

subs_list = []

max_workers = int(os.getenv("MAX_WORKERS", mp.cpu_count() / 2))
max_workers = math.ceil(os.getenv("MAX_WORKERS", mp.cpu_count() / 2))
Predictor.__LOGGER.debug("Number of workers: {}".format(max_workers))

with _ThreadPoolExecutorLocal(
Expand Down
3 changes: 2 additions & 1 deletion subaligner/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import h5py
import traceback
import concurrent.futures
import math
import numpy as np
import multiprocessing as mp

Expand Down Expand Up @@ -238,7 +239,7 @@ def __extract_data_and_label_from_avs(
)

extraction_start = datetime.datetime.now()
max_workers = int(os.getenv("MAX_WORKERS", mp.cpu_count() / 2))
max_workers = math.ceil(os.getenv("MAX_WORKERS", mp.cpu_count() / 2))
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
Expand Down
2 changes: 0 additions & 2 deletions subaligner/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,6 @@ def __convert_subtitle(source_file_path, source_ext, target_file_path, target_ex
if target_file_path is None:
target_file_path = source_file_path.replace(".%s" % source_ext, ".%s" % target_ext)
if frame_rate is None:
print(">>>>>>>>>>>>>>>>{}".format(format))
subs.save(target_file_path, encoding="utf-8", format_=format)
else:
print("<<<<<<<<>>>>>>>>{}".format(format))
subs.save(target_file_path, encoding="utf-8", format_=format, fps=frame_rate)

0 comments on commit 5473413

Please sign in to comment.