diff --git a/.readthedocs.yml b/.readthedocs.yml index 7e7990e..e125ff4 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -19,5 +19,5 @@ formats: all # Optionally set the version of Python and requirements required to build your docs python: version: 3.6 - install: - - requirements: site/requirements-docs.txt +# install: +# - requirements: site/requirements-docs.txt diff --git a/Makefile b/Makefile index ea25f22..faf4341 100644 --- a/Makefile +++ b/Makefile @@ -54,6 +54,7 @@ pydoc: clean-doc ## generate pydoc HTML documentation based on docstrings cat requirements.txt | xargs -L 1 .venv/bin/pip install; \ .venv/bin/python -m pydoc -w subaligner; mv subaligner.html docs/index.html .venv/bin/python -m pydoc -w subaligner.embedder; mv subaligner.embedder.html docs + .venv/bin/python -m pydoc -w subaligner.hparam_tuner; mv subaligner.hparam_tuner.html docs .venv/bin/python -m pydoc -w subaligner.hyperparameters; mv subaligner.hyperparameters.html docs .venv/bin/python -m pydoc -w subaligner.media_helper; mv subaligner.media_helper.html docs .venv/bin/python -m pydoc -w subaligner.network; mv subaligner.network.html docs diff --git a/docs/.gitkeep b/docs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/site/requirements-docs.txt b/site/requirements-docs.txt index 1223806..21934c7 100644 --- a/site/requirements-docs.txt +++ b/site/requirements-docs.txt @@ -1 +1,2 @@ -psutil==5.6.6 \ No newline at end of file +hyperopt==0.2.4 +psutil==5.6.6 diff --git a/site/source/conf.py b/site/source/conf.py index ce7be9a..c8ccc28 100644 --- a/site/source/conf.py +++ b/site/source/conf.py @@ -70,9 +70,11 @@ "absl-py", "aeneas", "h5py", + "hyperopt", "librosa", "matplotlib", "numpy", + "psutil", "pycaption", "pysrt", "sklearn", diff --git a/subaligner/hparam_tuner.py b/subaligner/hparam_tuner.py index 00a5a0c..456c883 100644 --- a/subaligner/hparam_tuner.py +++ b/subaligner/hparam_tuner.py @@ -8,6 +8,7 @@ class HyperParameterTuner(object): + """Hyper parameter tuning using the Bayesian Optimizer""" SEARCH_SPACE = { "learning_rate": hp.loguniform("learning_rate", np.log(0.00001), np.log(0.1)), @@ -25,6 +26,17 @@ def __init__(self, training_dump_dir, num_of_trials=5, tuning_epochs=5): + """Hyper parameter tuner initialiser + + Arguments: + av_file_paths {list} -- A list of paths to the input audio/video files. + subtitle_file_paths {list} -- A list of paths to the subtitle files. + training_dump_dir {string} -- The directory of the training data dump file. + + Keyword Arguments: + num_of_trials {int} -- The number of trials for tuning (default: {5}). + tuning_epochs {int} -- The number of training epochs for each trial (default: {5}). + """ self.__trainer = Trainer(FeatureEmbedder()) self.__av_file_paths = av_file_paths self.__subtitle_file_paths = subtitle_file_paths @@ -40,6 +52,8 @@ def hyperparameters(self): return self.__hyperparameters.clone() def tune_hyperparameters(self): + """Tune the hyper parameters""" + trials = hyperopt.Trials() minimised = hyperopt.fmin(fn=self.__get_val_loss, space=self.SEARCH_SPACE, diff --git a/subaligner/hyperparameters.py b/subaligner/hyperparameters.py index 7210abb..d19de5a 100644 --- a/subaligner/hyperparameters.py +++ b/subaligner/hyperparameters.py @@ -7,6 +7,8 @@ class Hyperparameters(object): """ def __init__(self): + """Hyper parameters initialiser setting default values""" + self.__learning_rate = 0.001 self.__hidden_size = { "front_layers": [64], @@ -26,6 +28,8 @@ def __init__(self): self.__network_type = "lstm" def __eq__(self, other): + """Comparator for Hyperparameters objects""" + if isinstance(other, Hyperparameters): return all([ self.__learning_rate == other.learning_rate, @@ -172,22 +176,53 @@ def network_type(self, value): self.__network_type = value def to_json(self): + """Serialise hyper parameters into JSON string + + Returns: + string -- The serialised hyper parameters in JSON + """ return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4) def to_file(self, file_path): + """Serialise hyper parameters into JSON and save the content to a file + + Arguments: + file_path {string} -- The path to the file containing saved hyper parameters. + """ with open(file_path, "w", encoding="utf8") as file: file.write(self.to_json()) def clone(self): + """Make a cloned hyper parameters object + + Returns: + Hyperparameters -- The cloned Hyperparameters object. + """ return self.from_json(self.to_json()) @classmethod def from_json(cls, json_str): + """Deserialise JSON string into a Hyperparameters object + + Arguments: + json_str {string} -- Hyper parameters in JSON. + + Returns: + Hyperparameters -- The deserialised Hyperparameters object. + """ hp = cls() hp.__dict__ = json.loads(json_str) return hp @classmethod def from_file(cls, file_path): + """Deserialise a file content into a Hyperparameters object + + Arguments: + file_path {string} -- The path to the file containing hyper parameters. + + Returns: + Hyperparameters -- The deserialised Hyperparameters object. + """ with open(file_path, "r", encoding="utf8") as file: return cls.from_json(file.read())