preprocess overlapping subtitle segments before aligning

baxtree · Oct 4, 2020 · 5473413 · 5473413
1 parent 6b0f4a3
commit 5473413
Show file tree

Hide file tree

Showing 6 changed files with 39 additions and 27 deletions.
diff --git a/README.md b/README.md
@@ -14,7 +14,6 @@ or
 ```
 brew install ffmpeg espeak
 ```
-
 ## Installation
 ```
 # Install from PyPI (pre-emptive NumPy)
@@ -92,6 +91,7 @@ $ docker run -v `pwd`:`pwd` -w `pwd` -it baxtree/subaligner subaligner_2pass -v
 ```
 The aligned subtitle will be saved at `subtitle_aligned.srt`. For details on CLI, run `subaligner_1pass --help`, `subaligner_2pass --help` or `subaligner --help`.
 
+![](figures/screencast.gif)
 ## Supported Formats
 Subtitle: SubRip, TTML, WebVTT, (Advanced) SubStation Alpha, MicroDVD, MPL2 and TMP
 

diff --git a/subaligner/logger.py b/subaligner/logger.py
@@ -26,7 +26,7 @@ def get_logger(self, name):
             if Logger.QUIET:
                 logger.setLevel(logging.ERROR)
             formatter = logging.Formatter(
-                "%(asctime)s - %(name)s - %(levelname)s - %(threadName)-9s - %(message)s"
+                "%(name)s - %(levelname)s - %(threadName)-9s - %(message)s"
             )
 
             file_handler = logging.FileHandler(self.__output_log, "w+")

diff --git a/subaligner/media_helper.py b/subaligner/media_helper.py
@@ -7,6 +7,7 @@
 import atexit
 import signal
 
+from copy import deepcopy
 from pysrt import SubRipFile
 from decimal import Decimal
 from .embedder import FeatureEmbedder
@@ -259,46 +260,45 @@ def get_audio_segment_starts_and_ends(subs):
             tuple -- A list of start times, a list of end times and a list of grouped SubRip files.
         """
 
+        local_subs = MediaHelper.__preprocess_subs(subs)
+
         segment_starts = []
         segment_ends = []
         combined = []
         new_subs = []
-        current_start = str(subs[0].start)
-        for i in range(len(subs)):
-            # Ignore subsequent overlapped subtitles
-            # (But if this means the subtitle is malformed, an exception should be raised.)
-            if i != 0 and subs[i].start < subs[i - 1].end:
-                continue
-            if i == len(subs) - 1:
-                combined.append(subs[i])
+        current_start = str(local_subs[0].start)
+
+        for i in range(len(local_subs)):
+            if i == len(local_subs) - 1:
+                combined.append(local_subs[i])
                 segment_starts.append(current_start)
-                segment_ends.append(str(subs[i].end))
+                segment_ends.append(str(local_subs[i].end))
                 new_subs.append(SubRipFile(combined))
                 del combined[:]
             else:
                 # Do not segment when the subtitle is too short
                 duration = FeatureEmbedder.time_to_sec(
-                    subs[i].end
-                ) - FeatureEmbedder.time_to_sec(subs[i].start)
+                    local_subs[i].end
+                ) - FeatureEmbedder.time_to_sec(local_subs[i].start)
                 if duration < MediaHelper.__MIN_SECS_PER_WORD:
-                    combined.append(subs[i])
+                    combined.append(local_subs[i])
                     continue
                 # Do not segment consecutive subtitles having little or no gap.
                 gap = FeatureEmbedder.time_to_sec(
-                    subs[i + 1].start
-                ) - FeatureEmbedder.time_to_sec(subs[i].end)
+                    local_subs[i + 1].start
+                ) - FeatureEmbedder.time_to_sec(local_subs[i].end)
                 if (
-                    subs[i].end == subs[i + 1].start
+                    local_subs[i].end == local_subs[i + 1].start
                     or gap < MediaHelper.__MIN_GAP_IN_SECS
                 ):
-                    combined.append(subs[i])
+                    combined.append(local_subs[i])
                     continue
-                combined.append(subs[i])
+                combined.append(local_subs[i])
                 # The start time is set to last cue's end time
                 segment_starts.append(current_start)
                 # The end time cannot be set to next cue's start time due to possible overlay
-                segment_ends.append(str(subs[i].end))
-                current_start = str(subs[i].end)
+                segment_ends.append(str(local_subs[i].end))
+                current_start = str(local_subs[i].end)
                 new_subs.append(SubRipFile(combined))
                 del combined[:]
         return segment_starts, segment_ends, new_subs
@@ -366,3 +366,15 @@ def get_frame_rate(file_path):
                     process.kill()
                     proc.kill()
                     os.system("stty sane")
+
+    @staticmethod
+    def __preprocess_subs(subs):
+        local_subs = deepcopy(subs)
+
+        # Preprocess overlapping subtitles
+        for i in range(len(local_subs)):
+            if i != 0 and local_subs[i].start < local_subs[i - 1].end:
+                MediaHelper.__LOGGER.warning("Found overlapping subtitle cues and the earlier one's duration will be shortened.")
+                local_subs[i - 1].end = local_subs[i].start
+
+        return local_subs
diff --git a/subaligner/predictor.py b/subaligner/predictor.py
@@ -4,6 +4,7 @@
 import threading
 import concurrent.futures
 import gc
+import math
 import numpy as np
 import multiprocessing as mp
 
@@ -80,7 +81,7 @@ def predict_single_pass(
                 self.__feature_embedder.step_sample = 1 / frame_rate
                 self.__on_frame_timecodes(subs)
             except NoFrameRateException:
-                Predictor.__LOGGER.warn("Cannot find frame rate for %s" % video_file_path)
+                Predictor.__LOGGER.warning("Cannot find frame rate for %s" % video_file_path)
             return subs, audio_file_path, voice_probabilities, frame_rate
         finally:
             if os.path.exists(audio_file_path):
@@ -123,7 +124,7 @@ def predict_dual_pass(
                 self.__feature_embedder.step_sample = 1 / frame_rate
                 self.__on_frame_timecodes(new_subs)
             except NoFrameRateException:
-                Predictor.__LOGGER.warn("Cannot find frame rate for %s" % video_file_path)
+                Predictor.__LOGGER.warning("Cannot find frame rate for %s" % video_file_path)
             Predictor.__LOGGER.debug("Aligned segments generated")
             return new_subs, subs, voice_probabilities, frame_rate
         finally:
@@ -418,7 +419,7 @@ def __predict_2nd_pass(self, audio_file_path, subs, weights_file_path, stretch,
 
         subs_list = []
 
-        max_workers = int(os.getenv("MAX_WORKERS", mp.cpu_count() / 2))
+        max_workers = math.ceil(os.getenv("MAX_WORKERS", mp.cpu_count() / 2))
         Predictor.__LOGGER.debug("Number of workers: {}".format(max_workers))
 
         with _ThreadPoolExecutorLocal(

diff --git a/subaligner/trainer.py b/subaligner/trainer.py
@@ -3,6 +3,7 @@
 import h5py
 import traceback
 import concurrent.futures
+import math
 import numpy as np
 import multiprocessing as mp
 
@@ -238,7 +239,7 @@ def __extract_data_and_label_from_avs(
         )
 
         extraction_start = datetime.datetime.now()
-        max_workers = int(os.getenv("MAX_WORKERS", mp.cpu_count() / 2))
+        max_workers = math.ceil(os.getenv("MAX_WORKERS", mp.cpu_count() / 2))
         with concurrent.futures.ThreadPoolExecutor(
             max_workers=max_workers
         ) as executor:

diff --git a/subaligner/utils.py b/subaligner/utils.py
@@ -272,8 +272,6 @@ def __convert_subtitle(source_file_path, source_ext, target_file_path, target_ex
         if target_file_path is None:
             target_file_path = source_file_path.replace(".%s" % source_ext, ".%s" % target_ext)
         if frame_rate is None:
-            print(">>>>>>>>>>>>>>>>{}".format(format))
             subs.save(target_file_path, encoding="utf-8", format_=format)
         else:
-            print("<<<<<<<<>>>>>>>>{}".format(format))
             subs.save(target_file_path, encoding="utf-8", format_=format, fps=frame_rate)