diff --git a/genomad/cli.py b/genomad/cli.py index 2bbb516..c3621cb 100644 --- a/genomad/cli.py +++ b/genomad/cli.py @@ -7,7 +7,7 @@ from rich.console import Console from rich.padding import Padding from rich.panel import Panel - +# from genomad.modules.lazy_group import LazyGroup CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) click.rich_click.USE_RICH_MARKUP = True @@ -23,6 +23,7 @@ click.rich_click.COMMAND_GROUPS = { "genomad": [ { + # "cls" : LazyGroup, "name": "Database download", "commands": [ "download-database", diff --git a/genomad/lazy_genomad.py b/genomad/lazy_genomad.py new file mode 100644 index 0000000..0b0e3d2 --- /dev/null +++ b/genomad/lazy_genomad.py @@ -0,0 +1,33 @@ +import os +import rich_click as click +from importlib import resources +from genomad.modules.lazy_group import LazyGroup #, help_long + +@click.group(name="Main", + cls=LazyGroup, + context_settings={'show_default': True, "help_option_names": ['-h',"-H", '--help']}, + lazy_subcommands={ + "download-database": "genomad.modules.download.download_database", + "end-to-end": "genomad.modules.endtoend.end_to_end", + "annotate": "genomad.modules.annotate.annotate", + "find-proviruses": "genomad.modules.find_proviruses.find_proviruses", + "marker-classification": "genomad.modules.marker_classification.marker_classification", + "summary": "genomad.modules.summary.summary", + "nn-classification": "genomad.modules.nn_classification.nn_classification", + # "aggregated-classification": "genomad.modules.aggregated_classification.aggregated_classification", # where is this + # "plasmid-score": "genomad.modules.plasmid_score.plasmid_score", + # "virus-score": "genomad.modules.virus_score.virus_score", + "taxonomy": "genomad.modules.taxonomy.taxonomy", + "mini-annotate": "genomad.modules.mini_annotate.mini_annotate", + "convert-genbank": "genomad.modules.convert_genbank.convert_genbank", + "convert-fasta": "genomad.modules.convert_fasta.convert_fasta", + "help": "genomad.modules.lazy_group.help_long", + } + ) +@click.version_option(prog_name="geNomad") +def genomad(): + """geNomad: Identification of mobile genetic elements""" + pass + +if __name__ == "__main__": + genomad() diff --git a/genomad/modules/aggregated_classification.py b/genomad/modules/aggregated_classification.py index f735e77..e920d53 100644 --- a/genomad/modules/aggregated_classification.py +++ b/genomad/modules/aggregated_classification.py @@ -3,7 +3,8 @@ import numpy as np from genomad import sequence, utils from genomad._paths import GenomadOutputs - +import rich_click as click +from pathlib import Path def branch_attention(w: np.array, b1: np.array, b2: np.array, temperature: float = 2): w_1 = np.array( @@ -26,6 +27,13 @@ def branch_attention(w: np.array, b1: np.array, b2: np.array, temperature: float output = np.matmul((b1 + b2) / 2, dense_layer_weights) + dense_layer_bias return utils.softmax(output, temperature) +@click.command() +@click.option("--input_path", type=click.Path(path_type=Path), help="Path to the input FASTA file.") +@click.option("--output_path", type=click.Path(path_type=Path), help="Path to the output directory.") +@click.option("--restart", is_flag=True, help="Restart the execution of the module.") +@click.option("--verbose", is_flag=True, help="Enable verbose output.") +def aggregated_classification(input_path, output_path, restart, verbose): + main(input_path, output_path, restart, verbose) def main(input_path, output_path, restart, verbose): # Create `output_path` if it does not exist diff --git a/genomad/modules/annotate.py b/genomad/modules/annotate.py index 8b25436..2dd7ad3 100644 --- a/genomad/modules/annotate.py +++ b/genomad/modules/annotate.py @@ -1,6 +1,7 @@ import shutil import sys - +import rich_click as click +from pathlib import Path from genomad import database, mmseqs2, prodigal, sequence, taxonomy, utils from genomad._paths import GenomadOutputs @@ -46,8 +47,20 @@ def write_genes_output(genes_output, database_obj, prodigal_obj, mmseqs2_obj): f"{taxid}\t{taxname}\t{conjscan}\t{amr}\t{accession}\t{description}\n" ) - -def main( +@click.command() +@click.option("--input-path", type=click.Path(path_type=Path), help="Path to the input file.") +@click.option("--output-path", type=click.Path(path_type=Path), help="Path to the output directory.") +@click.option("--database-path", type=click.Path(path_type=Path), help="Path to the database directory.") +@click.option("--use-minimal-db", is_flag=True, help="Use minimal database.") +@click.option("--restart", is_flag=True, help="Restart the execution of the module.") +@click.option("--threads", type=int, help="Number of threads to use.") +@click.option("--verbose", is_flag=True, help="Enable verbose output.") +@click.option("--conservative-taxonomy", is_flag=True, help="Use conservative taxonomy.") +@click.option("--sensitivity", type=str, help="Sensitivity level for MMseqs2.") +@click.option("--evalue", type=float, help="E-value threshold for MMseqs2.") +@click.option("--splits", type=int, help="Number of splits for MMseqs2.") +@click.option("--cleanup", is_flag=True, help="Remove temporary files.") +def annotate( input_path, output_path, database_path, @@ -61,6 +74,9 @@ def main( splits, cleanup, ): + main(input_path, output_path, database_path, use_minimal_db, restart, threads, verbose, conservative_taxonomy, sensitivity, evalue, splits, cleanup) + +def main(input_path, output_path, database_path, use_minimal_db, restart, threads, verbose, conservative_taxonomy, sensitivity, evalue, splits, cleanup): # Create `output_path` if it does not exist if not output_path.is_dir(): output_path.mkdir() diff --git a/genomad/modules/download.py b/genomad/modules/download.py index 2076208..cf12a2e 100644 --- a/genomad/modules/download.py +++ b/genomad/modules/download.py @@ -3,7 +3,8 @@ import urllib from functools import partial from urllib.request import urlopen - +import rich_click as click +from pathlib import Path import genomad from genomad import utils from rich.progress import ( @@ -79,7 +80,13 @@ def download(self): def extract(self): shutil.unpack_archive(self.output_file, self.destination, "gztar") - +@click.command() +@click.option("--destination", type=click.Path(path_type=Path), help="Path to the directory where the database will be downloaded.") +@click.option("--keep", is_flag=True, help="Keep the downloaded database file.") +@click.option("--verbose", is_flag=True, help="Enable verbose output.") +def download_database(destination, keep, verbose): + main(destination, keep, verbose) + def main(destination, keep, verbose): console = utils.HybridConsole(verbose=verbose) database_downloader = DatabaseDownloader(destination, console) @@ -103,3 +110,5 @@ def main(destination, keep, verbose): f"geNomad database (v{database_downloader.version}) is ready to be used!", style="yellow", ) + + diff --git a/genomad/modules/find_proviruses.py b/genomad/modules/find_proviruses.py index 7496846..a0ff30a 100644 --- a/genomad/modules/find_proviruses.py +++ b/genomad/modules/find_proviruses.py @@ -4,6 +4,7 @@ from dataclasses import dataclass, field from pathlib import Path from typing import List, Optional +import rich_click as click import numpy as np import pycrfsuite @@ -351,7 +352,7 @@ def yield_proviruses( threshold: float, in_edge_threshold: float, has_integrase_threshold: float, -) -> Provirus: +) -> Provirus: # what is this? invalud type antonio total_count = 0 count_array, value_array = utils.rle_encode(provirus_labels) n_islands = len(count_array) @@ -388,8 +389,25 @@ def yield_proviruses( ) total_count += count - -def main( +@click.command() +@click.option("--input-path", type=click.Path(path_type=Path), help="Path to the input file.") +@click.option("--output-path", type=click.Path(path_type=Path), help="Path to the output directory.") +@click.option("--database-path", type=click.Path(path_type=Path), help="Path to the database directory.") +@click.option("--cleanup", is_flag=True, help="Remove temporary files.") +@click.option("--restart", is_flag=True, help="Restart the execution of the module.") +@click.option("--skip-integrase-identification", is_flag=True, help="Skip integrase identification.") +@click.option("--skip-trna-identification", is_flag=True, help="Skip tRNA identification.") +@click.option("--crf-threshold", type=float, help="CRF threshold.") +@click.option("--marker-threshold", type=float, help="Marker threshold.") +@click.option("--marker-threshold-integrase", type=float, help="Marker threshold for integrases.") +@click.option("--marker-threshold-edge", type=float, help="Marker threshold for edges.") +@click.option("--max-integrase-distance", type=int, help="Maximum distance for integrases.") +@click.option("--max-trna-distance", type=int, help="Maximum distance for tRNAs.") +@click.option("--sensitivity", type=str, help="Sensitivity level for MMseqs2.") +@click.option("--evalue", type=float, help="E-value threshold for MMseqs2.") +@click.option("--threads", type=int, help="Number of threads to use.") +@click.option("--verbose", is_flag=True, help="Enable verbose output.") +def find_proviruses( input_path, output_path, database_path, @@ -408,6 +426,9 @@ def main( sensitivity, evalue, ): + main(input_path, output_path, database_path, cleanup, restart, skip_integrase_identification, skip_trna_identification, threads, verbose, crf_threshold, marker_threshold, marker_threshold_integrase, marker_threshold_edge, max_integrase_distance, max_trna_distance, sensitivity, evalue) + +def main(input_path, output_path, database_path, cleanup, restart, skip_integrase_identification, skip_trna_identification, threads, verbose, crf_threshold, marker_threshold, marker_threshold_integrase, marker_threshold_edge, max_integrase_distance, max_trna_distance, sensitivity, evalue): # Create `output_path` if it does not exist if not output_path.is_dir(): output_path.mkdir() diff --git a/genomad/modules/lazy_group.py b/genomad/modules/lazy_group.py new file mode 100644 index 0000000..4376d6b --- /dev/null +++ b/genomad/modules/lazy_group.py @@ -0,0 +1,53 @@ +import importlib +import rich_click as click +from rich.console import Console +import pathlib as pt +console = Console() + +# Adopted from https://click.palletsprojects.com/en/8.1.x/complex/#lazily-loading-subcommands +class LazyGroup(click.RichGroup): + def __init__(self, *args, lazy_subcommands=None, **kwargs): + super().__init__(*args, **kwargs) + # lazy_subcommands is a map of the form: + # + # {command-name} -> {module-name}.{command-object-name} + # + self.lazy_subcommands = lazy_subcommands or {} + + def list_commands(self, ctx): + base = super().list_commands(ctx) + lazy = sorted(self.lazy_subcommands.keys()) + return base + lazy + + def get_command(self, ctx, cmd_name): + if cmd_name in self.lazy_subcommands: + return self._lazy_load(cmd_name) + return super().get_command(ctx, cmd_name) + + def _lazy_load(self, cmd_name): + # lazily loading a command, first get the module name and attribute name + import_path = self.lazy_subcommands[cmd_name] + modname, cmd_object_name = import_path.rsplit(".", 1) + # do the import + mod = importlib.import_module(modname) + # get the Command object from that module + cmd_object = getattr(mod, cmd_object_name) # type: ignore + # check the result to make debugging easier + if not isinstance(cmd_object, click.BaseCommand): # type: ignore + raise ValueError( + f"Lazy loading of {import_path} failed by returning " + "a non-command object" + ) + return cmd_object + + +@click.command() +def help_long(**kwargs): + """ + genomad does things + """ + console.print("genomad does things") + + +if __name__ == "__main__": + help_long() \ No newline at end of file diff --git a/genomad/modules/marker_classification.py b/genomad/modules/marker_classification.py index e8956a3..2566532 100644 --- a/genomad/modules/marker_classification.py +++ b/genomad/modules/marker_classification.py @@ -4,7 +4,7 @@ from dataclasses import dataclass, field from pathlib import Path from typing import List - +import rich_click as click import numpy as np import xgboost as xgb from genomad import database, sequence, utils @@ -334,7 +334,16 @@ def get_feature_array( np.array(marker_enrichment_array), ) - +@click.command() +@click.option("--input-path", type=click.Path(path_type=Path), help="Path to the input file.") +@click.option("--output-path", type=click.Path(path_type=Path), help="Path to the output directory.") +@click.option("--database-path", type=click.Path(path_type=Path), help="Path to the database directory.") +@click.option("--restart", is_flag=True, help="Restart the execution of the module.") +@click.option("--threads", type=int, help="Number of threads to use.") +@click.option("--verbose", is_flag=True, help="Enable verbose output.") +def marker_classification(input_path, output_path, database_path, restart, threads, verbose): + main(input_path, output_path, database_path, restart, threads, verbose) + def main(input_path, output_path, database_path, restart, threads, verbose): # Create `output_path` if it does not exist if not output_path.is_dir(): diff --git a/genomad/modules/nn_classification.py b/genomad/modules/nn_classification.py index 06caac3..c6b2870 100644 --- a/genomad/modules/nn_classification.py +++ b/genomad/modules/nn_classification.py @@ -2,7 +2,7 @@ import shutil import sys from pathlib import Path - +import rich_click as click os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" os.environ["CUDA_VISIBLE_DEVICES"] = "-1" @@ -17,8 +17,16 @@ TimeRemainingColumn, ) - -def main( +@click.command() +@click.option("--input-path", type=click.Path(path_type=Path), help="Path to the input file.") +@click.option("--output-path", type=click.Path(path_type=Path), help="Path to the output directory.") +@click.option("--single-window", is_flag=True, help="Use single window.") +@click.option("--batch-size", type=int, help="Batch size.") +@click.option("--restart", is_flag=True, help="Restart the execution of the module.") +@click.option("--threads", type=int, help="Number of threads to use.") +@click.option("--verbose", is_flag=True, help="Enable verbose output.") +@click.option("--cleanup", is_flag=True, help="Remove temporary files.") +def nn_classification( input_path, output_path, single_window, @@ -28,6 +36,9 @@ def main( verbose, cleanup, ): + main(input_path, output_path, single_window, batch_size, restart, threads, verbose, cleanup) + +def main(input_path, output_path, single_window, batch_size, restart, threads, verbose, cleanup): # To avoid having other modules lagging due to the slow TensorFlow import, # the `tensorflow` and `genomad.neural_network` modules are loaded inside `main`. # Additionally, the following functions that use the `tensorflow` module are diff --git a/genomad/modules/score_calibration.py b/genomad/modules/score_calibration.py index 7d4a0b4..3a7661b 100644 --- a/genomad/modules/score_calibration.py +++ b/genomad/modules/score_calibration.py @@ -1,6 +1,7 @@ import sys from collections import Counter - +import rich_click as click +from pathlib import Path import numpy as np from genomad import utils from genomad._paths import GenomadData, GenomadOutputs @@ -49,6 +50,14 @@ def write_score_output(output_path, name_array, score_array): for n, (c_score, p_score, v_score) in zip(name_array, score_array): fout.write(f"{n}\t{c_score:.4f}\t{p_score:.4f}\t{v_score:.4f}\n") +@click.command() +@click.option("--input_path", type=click.Path(path_type=Path), help="Path to the input FASTA file.") +@click.option("--output_path", type=click.Path(path_type=Path), help="Path to the output directory.") +@click.option("--composition", type=str, help="Composition to use for the score calibration.") +@click.option("--force_auto", is_flag=True, help="Force the use of the auto composition.") +@click.option("--verbose", is_flag=True, help="Enable verbose output.") +def score_calibration(input_path, output_path, composition, force_auto, verbose): + main(input_path, output_path, composition, force_auto, verbose) def main(input_path, output_path, composition, force_auto, verbose): # Create `output_path` if it does not exist diff --git a/genomad/modules/summary.py b/genomad/modules/summary.py index dbecbae..48b9eab 100644 --- a/genomad/modules/summary.py +++ b/genomad/modules/summary.py @@ -1,7 +1,8 @@ import itertools import sys from collections import defaultdict - +import rich_click as click +from pathlib import Path import numpy as np from genomad import sequence, utils from genomad._paths import GenomadOutputs @@ -102,6 +103,24 @@ def flag_sequences( fdr_array[fdr_array < max_fdr], ) +@click.command() +@click.option("--input_path", type=click.Path(path_type=Path), help="Path to the input FASTA file.") +@click.option("--output_path", type=click.Path(path_type=Path), help="Path to the output directory.") +@click.option("--verbose", is_flag=True, help="Enable verbose output.") +@click.option("--min_score", type=float, help="Minimum score to use for the summary.") +@click.option("--max_fdr", type=float, help="Maximum FDR to use for the summary.") +@click.option("--min_number_genes", type=int, help="Minimum number of genes to use for the summary.") +@click.option("--min_plasmid_marker_enrichment", type=float, help="Minimum plasmid marker enrichment to use for the summary.") +@click.option("--min_virus_marker_enrichment", type=float, help="Minimum virus marker enrichment to use for the summary.") +@click.option("--min_plasmid_hallmarks", type=int, help="Minimum plasmid hallmarks to use for the summary.") +@click.option("--min_plasmid_hallmarks_short_seqs", type=int, help="Minimum plasmid hallmarks for short sequences to use for the summary.") +@click.option("--min_virus_hallmarks", type=int, help="Minimum virus hallmarks to use for the summary.") +@click.option("--min_virus_hallmarks_short_seqs", type=int, help="Minimum virus hallmarks for short sequences to use for the summary.") +@click.option("--max_uscg", type=int, help="Maximum USCGs to use for the summary.") +@click.option("--restart", is_flag=True, help="Restart the execution of the module.") +def summary(input_path, output_path, verbose, min_score, max_fdr, min_number_genes, min_plasmid_marker_enrichment, min_virus_marker_enrichment, min_plasmid_hallmarks, min_plasmid_hallmarks_short_seqs, min_virus_hallmarks, min_virus_hallmarks_short_seqs, max_uscg, restart): + main(input_path, output_path, verbose, min_score, max_fdr, min_number_genes, min_plasmid_marker_enrichment, min_virus_marker_enrichment, min_plasmid_hallmarks, min_plasmid_hallmarks_short_seqs, min_virus_hallmarks, min_virus_hallmarks_short_seqs, max_uscg, restart) + def main( input_path,