diff --git a/src/modules/createdb.rs b/src/modules/createdb.rs index b126265..d8dd0d6 100644 --- a/src/modules/createdb.rs +++ b/src/modules/createdb.rs @@ -26,18 +26,11 @@ pub fn run(args: &Args, bin: &var::BinaryPaths) -> Result<(), Box 3, 3 => 2, _ => var::verbosity() }).to_string(); - // Either use_foldseek or use_python must be true - if !use_foldseek && !use_python { - err::error(err::ERR_ARGPARSE, Some("Either use_foldseek or use_python must be true".to_string())); - } - // Check afdb_lookup let afdb_local = if afdb_lookup && !afdb_local.is_some() { err::error(err::ERR_ARGPARSE, Some("afdb-lookup is provided but afdb-local is not given".to_string())); @@ -135,44 +128,37 @@ pub fn run(args: &Args, bin: &var::BinaryPaths) -> Result<(), Box &bin.path, - _none => { err::error(err::ERR_BINARY_NOT_FOUND, Some("foldseek".to_string())); } - }; - - // Check if old weights exist - if Path::new(&model).join("cnn.safetensors").exists() || Path::new(&model).join(format!("model{}cnn.safetensors", SEP)).exists() { - err::error(err::ERR_GENERAL, Some("Old weight files detected from the given path. Please provide different path for the model weights".to_string())); - } - // Check if weights exist - if !Path::new(&model).join("prostt5-f16.gguf").exists() { - // Download the model - std::fs::create_dir_all(format!("{}{}tmp", model, SEP))?; - let mut cmd = std::process::Command::new(foldseek_path); - let mut cmd = cmd - .arg("databases").arg("ProstT5").arg(&model).arg(format!("{}{}tmp", model, SEP)).arg("--threads").arg(threads.to_string()); - cmd::run(&mut cmd); - } + // Use foldseek to create the database + let foldseek_path = match &bin.get("foldseek") { + Some(bin) => &bin.path, + _none => { err::error(err::ERR_BINARY_NOT_FOUND, Some("foldseek".to_string())); } + }; - // Run foldseek createdb + // Check if old weights exist + if Path::new(&model).join("cnn.safetensors").exists() || Path::new(&model).join(format!("model{}cnn.safetensors", SEP)).exists() { + err::error(err::ERR_GENERAL, Some("Old weight files detected from the given path. Please provide different path for the model weights".to_string())); + } + // Check if weights exist + if !Path::new(&model).join("prostt5-f16.gguf").exists() { + // Download the model + std::fs::create_dir_all(format!("{}{}tmp", model, SEP))?; let mut cmd = std::process::Command::new(foldseek_path); - let cmd = cmd - .arg("createdb").arg(&combined_aa).arg(&output) - .arg("--prostt5-model").arg(&model) - .arg("--threads").arg(threads.to_string()); - let mut cmd = if gpu { - cmd.arg("--gpu").arg("1") - } else { cmd }; + let mut cmd = cmd + .arg("databases").arg("ProstT5").arg(&model).arg(format!("{}{}tmp", model, SEP)).arg("--threads").arg(threads.to_string()); cmd::run(&mut cmd); - } else if use_python { - let _ = _run_python(&combined_aa, &curr_dir, &parent, &output, &model, keep, bin, threads.to_string()); - } else { - err::error(err::ERR_GENERAL, Some("Either use_foldseek or use_python must be true".to_string())); } + // Run foldseek createdb + let mut cmd = std::process::Command::new(foldseek_path); + let cmd = cmd + .arg("createdb").arg(&combined_aa).arg(&output) + .arg("--prostt5-model").arg(&model) + .arg("--threads").arg(threads.to_string()); + let mut cmd = if gpu { + cmd.arg("--gpu").arg("1") + } else { cmd }; + cmd::run(&mut cmd); + if afdb_lookup { let foldseek_path = match &bin.get("foldseek") { Some(bin) => &bin.path, @@ -221,57 +207,5 @@ pub fn run(args: &Args, bin: &var::BinaryPaths) -> Result<(), Box Result<(), Box> { - let input_3di = format!("{}{}{}{}combined_3di.fasta", curr_dir, SEP, parent, SEP); - let inter_prob = format!("{}{}{}{}output_probabilities.csv", curr_dir, SEP, parent, SEP); - let output_3di = format!("{}{}{}_ss", curr_dir, SEP, output); - let foldseek_verbosity = (match var::verbosity() { 4 => 3, 3 => 2, _ => var::verbosity() }).to_string(); - - // Run python script - let mut cmd = std::process::Command::new("python"); - let mut cmd = cmd - .arg(var::locate_encoder_py()) - .arg("-i").arg(&combined_aa) - .arg("-o").arg(&input_3di) - .arg("--model").arg(&model) - .arg("--half").arg("0") - .arg("--threads").arg(threads); - cmd::run(&mut cmd); - - // Build foldseek db - let foldseek_path = match &bin.get("foldseek") { - Some(bin) => &bin.path, - _none => { err::error(err::ERR_BINARY_NOT_FOUND, Some("foldseek".to_string())); } - }; - let mut cmd = std::process::Command::new(foldseek_path); - let mut cmd = cmd - .arg("base:createdb").arg(&combined_aa).arg(&output) - .arg("--shuffle").arg("0") - .arg("-v").arg(foldseek_verbosity.as_str()); - - cmd::run(&mut cmd); - - // Build foldseek 3di db - let mut cmd = std::process::Command::new(foldseek_path); - let mut cmd = cmd - .arg("base:createdb").arg(&input_3di).arg(&output_3di) - .arg("--shuffle").arg("0") - .arg("-v").arg(foldseek_verbosity.as_str()); - cmd::run(&mut cmd); - - // Delete intermediate files - if !keep { - // std::fs::remove_file(mapping_file)?; - // std::fs::remove_file(combined_aa)?; - std::fs::remove_file(input_3di)?; - std::fs::remove_file(inter_prob)?; - } - - // // Write the checkpoint file - // chkpnt::write_checkpoint(&format!("{}/createdb.chk", parent), "1")?; - Ok(()) } diff --git a/src/util/arg_parser.rs b/src/util/arg_parser.rs index f747a5e..d763fcd 100644 --- a/src/util/arg_parser.rs +++ b/src/util/arg_parser.rs @@ -65,12 +65,6 @@ pub enum Commands { /// Use GPU for foldseek createdb #[arg(short, long, default_value="false")] gpu: bool, - /// Use python script instead. hidden option - #[arg(long, default_value="false", hide = true)] - use_python: bool, - /// Use foldseek for createdb. hidden option - #[arg(long, default_value="true", hide = true)] - use_foldseek: bool, /// Use AFDB lookup for foldseek createdb. Useful for large databases #[arg(long, default_value="false")] afdb_lookup: bool, @@ -249,12 +243,6 @@ pub enum Commands { /// Use GPU for foldseek createdb #[arg(short, long, default_value="false")] gpu: bool, - /// Use python script instead. hidden option - #[arg(long, default_value="false", hide = true)] - use_python: bool, - /// Use foldseek for createdb. hidden option - #[arg(long, default_value="true", hide = true)] - use_foldseek: bool, /// Use AFDB lookup for foldseek createdb. Useful for large databases #[arg(long, default_value="false")] afdb_lookup: bool, @@ -317,12 +305,6 @@ pub enum Commands { /// Use GPU for foldseek createdb #[arg(short, long, default_value="false")] gpu: bool, - /// Use python script instead. hidden option - #[arg(long, default_value="false", hide = true)] - use_python: bool, - /// Use foldseek for createdb. hidden option - #[arg(long, default_value="true", hide = true)] - use_foldseek: bool, /// Use AFDB lookup for foldseek createdb. Useful for large databases #[arg(long, default_value="false")] afdb_lookup: bool, @@ -376,8 +358,6 @@ pub struct Args { pub createdb_overwrite: Option, pub createdb_max_len: Option>, pub createdb_gpu: Option, - pub createdb_use_python: Option, - pub createdb_use_foldseek: Option, pub createdb_afdb_lookup: Option, pub createdb_afdb_local: Option>, @@ -480,16 +460,6 @@ impl Args { Some(EasyCore { gpu, .. }) => Some(*gpu), Some(EasySearch { gpu, .. }) => Some(*gpu), _ => None, }; - let createdb_use_python = match &args.command { - Some(Createdb { use_python, .. }) => Some(*use_python), - Some(EasyCore { use_python, .. }) => Some(*use_python), - Some(EasySearch { use_python, .. }) => Some(*use_python), _ => None, - }; - let createdb_use_foldseek = match &args.command { - Some(Createdb { use_foldseek, .. }) => Some(*use_foldseek), - Some(EasyCore { use_foldseek, .. }) => Some(*use_foldseek), - Some(EasySearch { use_foldseek, .. }) => Some(*use_foldseek), _ => None, - }; let createdb_afdb_lookup = match &args.command { Some(Createdb { afdb_lookup, .. }) => Some(*afdb_lookup), Some(EasyCore { afdb_lookup, .. }) => Some(*afdb_lookup), @@ -641,7 +611,7 @@ impl Args { Args { command: args.command, version: args.version, threads, verbosity, - createdb_input, createdb_output, createdb_model, createdb_keep, createdb_overwrite, createdb_max_len, createdb_gpu, createdb_use_python, createdb_use_foldseek, createdb_afdb_lookup, createdb_afdb_local, + createdb_input, createdb_output, createdb_model, createdb_keep, createdb_overwrite, createdb_max_len, createdb_gpu, createdb_afdb_lookup, createdb_afdb_local, profile_input_db, profile_input_tsv, profile_output, profile_threshold, profile_print_copiness, search_input, search_target, search_output, search_tmp, search_keep_aln_db, search_search_options, cluster_input, cluster_output, cluster_tmp, cluster_keep_cluster_db, cluster_cluster_options,