Skip to content

Commit

Permalink
Removed Rust code lines related to use_python
Browse files Browse the repository at this point in the history
  • Loading branch information
pskvins committed Jan 30, 2025
1 parent 9a14ad8 commit 8fe9599
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 123 deletions.
118 changes: 26 additions & 92 deletions src/modules/createdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,11 @@ pub fn run(args: &Args, bin: &var::BinaryPaths) -> Result<(), Box<dyn std::error
let overwrite = args.createdb_overwrite.unwrap_or_else(|| { err::error(err::ERR_ARGPARSE, Some("createdb - overwrite".to_string())); });
let max_len = args.createdb_max_len.unwrap_or_else(|| { err::error(err::ERR_ARGPARSE, Some("createdb - max_len".to_string())); });
let gpu = args.createdb_gpu.unwrap_or_else(|| { err::error(err::ERR_ARGPARSE, Some("createdb - gpu".to_string())); });
let use_python = args.createdb_use_python.unwrap_or_else(|| { err::error(err::ERR_ARGPARSE, Some("createdb - use_python".to_string())); });
let use_foldseek = args.createdb_use_foldseek.unwrap_or_else(|| { err::error(err::ERR_ARGPARSE, Some("createdb - use_foldseek".to_string())); });
let afdb_lookup = args.createdb_afdb_lookup.unwrap_or_else(|| { err::error(err::ERR_ARGPARSE, Some("createdb - afdb_lookup".to_string())); });
let afdb_local = args.createdb_afdb_local.clone().unwrap_or_else(|| { err::error(err::ERR_ARGPARSE, Some("createdb - afdb_local".to_string())); });
let threads = crate::envs::variables::threads();
let foldseek_verbosity = (match var::verbosity() { 4 => 3, 3 => 2, _ => var::verbosity() }).to_string();

// Either use_foldseek or use_python must be true
if !use_foldseek && !use_python {
err::error(err::ERR_ARGPARSE, Some("Either use_foldseek or use_python must be true".to_string()));
}

// Check afdb_lookup
let afdb_local = if afdb_lookup && !afdb_local.is_some() {
err::error(err::ERR_ARGPARSE, Some("afdb-lookup is provided but afdb-local is not given".to_string()));
Expand Down Expand Up @@ -135,44 +128,37 @@ pub fn run(args: &Args, bin: &var::BinaryPaths) -> Result<(), Box<dyn std::error
fasta::write_fasta(&combined_aa, &fasta_data)?;
}

if use_foldseek {
// Added use_foldseek temporarily.
// TODO: Remove use_foldseek when foldseek is ready
let foldseek_path = match &bin.get("foldseek") {
Some(bin) => &bin.path,
_none => { err::error(err::ERR_BINARY_NOT_FOUND, Some("foldseek".to_string())); }
};

// Check if old weights exist
if Path::new(&model).join("cnn.safetensors").exists() || Path::new(&model).join(format!("model{}cnn.safetensors", SEP)).exists() {
err::error(err::ERR_GENERAL, Some("Old weight files detected from the given path. Please provide different path for the model weights".to_string()));
}
// Check if weights exist
if !Path::new(&model).join("prostt5-f16.gguf").exists() {
// Download the model
std::fs::create_dir_all(format!("{}{}tmp", model, SEP))?;
let mut cmd = std::process::Command::new(foldseek_path);
let mut cmd = cmd
.arg("databases").arg("ProstT5").arg(&model).arg(format!("{}{}tmp", model, SEP)).arg("--threads").arg(threads.to_string());
cmd::run(&mut cmd);
}
// Use foldseek to create the database
let foldseek_path = match &bin.get("foldseek") {
Some(bin) => &bin.path,
_none => { err::error(err::ERR_BINARY_NOT_FOUND, Some("foldseek".to_string())); }
};

// Run foldseek createdb
// Check if old weights exist
if Path::new(&model).join("cnn.safetensors").exists() || Path::new(&model).join(format!("model{}cnn.safetensors", SEP)).exists() {
err::error(err::ERR_GENERAL, Some("Old weight files detected from the given path. Please provide different path for the model weights".to_string()));
}
// Check if weights exist
if !Path::new(&model).join("prostt5-f16.gguf").exists() {
// Download the model
std::fs::create_dir_all(format!("{}{}tmp", model, SEP))?;
let mut cmd = std::process::Command::new(foldseek_path);
let cmd = cmd
.arg("createdb").arg(&combined_aa).arg(&output)
.arg("--prostt5-model").arg(&model)
.arg("--threads").arg(threads.to_string());
let mut cmd = if gpu {
cmd.arg("--gpu").arg("1")
} else { cmd };
let mut cmd = cmd
.arg("databases").arg("ProstT5").arg(&model).arg(format!("{}{}tmp", model, SEP)).arg("--threads").arg(threads.to_string());
cmd::run(&mut cmd);
} else if use_python {
let _ = _run_python(&combined_aa, &curr_dir, &parent, &output, &model, keep, bin, threads.to_string());
} else {
err::error(err::ERR_GENERAL, Some("Either use_foldseek or use_python must be true".to_string()));
}

// Run foldseek createdb
let mut cmd = std::process::Command::new(foldseek_path);
let cmd = cmd
.arg("createdb").arg(&combined_aa).arg(&output)
.arg("--prostt5-model").arg(&model)
.arg("--threads").arg(threads.to_string());
let mut cmd = if gpu {
cmd.arg("--gpu").arg("1")
} else { cmd };
cmd::run(&mut cmd);

if afdb_lookup {
let foldseek_path = match &bin.get("foldseek") {
Some(bin) => &bin.path,
Expand Down Expand Up @@ -221,57 +207,5 @@ pub fn run(args: &Args, bin: &var::BinaryPaths) -> Result<(), Box<dyn std::error
chkpnt::write_checkpoint(&checkpoint_file, "1")?;


Ok(())
}

fn _run_python(combined_aa: &String, curr_dir: &str, parent: &str, output: &str, model: &str, keep: bool, bin: &crate::envs::variables::BinaryPaths, threads: String) -> Result<(), Box<dyn std::error::Error>> {
let input_3di = format!("{}{}{}{}combined_3di.fasta", curr_dir, SEP, parent, SEP);
let inter_prob = format!("{}{}{}{}output_probabilities.csv", curr_dir, SEP, parent, SEP);
let output_3di = format!("{}{}{}_ss", curr_dir, SEP, output);
let foldseek_verbosity = (match var::verbosity() { 4 => 3, 3 => 2, _ => var::verbosity() }).to_string();

// Run python script
let mut cmd = std::process::Command::new("python");
let mut cmd = cmd
.arg(var::locate_encoder_py())
.arg("-i").arg(&combined_aa)
.arg("-o").arg(&input_3di)
.arg("--model").arg(&model)
.arg("--half").arg("0")
.arg("--threads").arg(threads);
cmd::run(&mut cmd);

// Build foldseek db
let foldseek_path = match &bin.get("foldseek") {
Some(bin) => &bin.path,
_none => { err::error(err::ERR_BINARY_NOT_FOUND, Some("foldseek".to_string())); }
};
let mut cmd = std::process::Command::new(foldseek_path);
let mut cmd = cmd
.arg("base:createdb").arg(&combined_aa).arg(&output)
.arg("--shuffle").arg("0")
.arg("-v").arg(foldseek_verbosity.as_str());

cmd::run(&mut cmd);

// Build foldseek 3di db
let mut cmd = std::process::Command::new(foldseek_path);
let mut cmd = cmd
.arg("base:createdb").arg(&input_3di).arg(&output_3di)
.arg("--shuffle").arg("0")
.arg("-v").arg(foldseek_verbosity.as_str());
cmd::run(&mut cmd);

// Delete intermediate files
if !keep {
// std::fs::remove_file(mapping_file)?;
// std::fs::remove_file(combined_aa)?;
std::fs::remove_file(input_3di)?;
std::fs::remove_file(inter_prob)?;
}

// // Write the checkpoint file
// chkpnt::write_checkpoint(&format!("{}/createdb.chk", parent), "1")?;

Ok(())
}
32 changes: 1 addition & 31 deletions src/util/arg_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,6 @@ pub enum Commands {
/// Use GPU for foldseek createdb
#[arg(short, long, default_value="false")]
gpu: bool,
/// Use python script instead. hidden option
#[arg(long, default_value="false", hide = true)]
use_python: bool,
/// Use foldseek for createdb. hidden option
#[arg(long, default_value="true", hide = true)]
use_foldseek: bool,
/// Use AFDB lookup for foldseek createdb. Useful for large databases
#[arg(long, default_value="false")]
afdb_lookup: bool,
Expand Down Expand Up @@ -249,12 +243,6 @@ pub enum Commands {
/// Use GPU for foldseek createdb
#[arg(short, long, default_value="false")]
gpu: bool,
/// Use python script instead. hidden option
#[arg(long, default_value="false", hide = true)]
use_python: bool,
/// Use foldseek for createdb. hidden option
#[arg(long, default_value="true", hide = true)]
use_foldseek: bool,
/// Use AFDB lookup for foldseek createdb. Useful for large databases
#[arg(long, default_value="false")]
afdb_lookup: bool,
Expand Down Expand Up @@ -317,12 +305,6 @@ pub enum Commands {
/// Use GPU for foldseek createdb
#[arg(short, long, default_value="false")]
gpu: bool,
/// Use python script instead. hidden option
#[arg(long, default_value="false", hide = true)]
use_python: bool,
/// Use foldseek for createdb. hidden option
#[arg(long, default_value="true", hide = true)]
use_foldseek: bool,
/// Use AFDB lookup for foldseek createdb. Useful for large databases
#[arg(long, default_value="false")]
afdb_lookup: bool,
Expand Down Expand Up @@ -376,8 +358,6 @@ pub struct Args {
pub createdb_overwrite: Option<bool>,
pub createdb_max_len: Option<Option<usize>>,
pub createdb_gpu: Option<bool>,
pub createdb_use_python: Option<bool>,
pub createdb_use_foldseek: Option<bool>,
pub createdb_afdb_lookup: Option<bool>,
pub createdb_afdb_local: Option<Option<String>>,

Expand Down Expand Up @@ -480,16 +460,6 @@ impl Args {
Some(EasyCore { gpu, .. }) => Some(*gpu),
Some(EasySearch { gpu, .. }) => Some(*gpu), _ => None,
};
let createdb_use_python = match &args.command {
Some(Createdb { use_python, .. }) => Some(*use_python),
Some(EasyCore { use_python, .. }) => Some(*use_python),
Some(EasySearch { use_python, .. }) => Some(*use_python), _ => None,
};
let createdb_use_foldseek = match &args.command {
Some(Createdb { use_foldseek, .. }) => Some(*use_foldseek),
Some(EasyCore { use_foldseek, .. }) => Some(*use_foldseek),
Some(EasySearch { use_foldseek, .. }) => Some(*use_foldseek), _ => None,
};
let createdb_afdb_lookup = match &args.command {
Some(Createdb { afdb_lookup, .. }) => Some(*afdb_lookup),
Some(EasyCore { afdb_lookup, .. }) => Some(*afdb_lookup),
Expand Down Expand Up @@ -641,7 +611,7 @@ impl Args {

Args {
command: args.command, version: args.version, threads, verbosity,
createdb_input, createdb_output, createdb_model, createdb_keep, createdb_overwrite, createdb_max_len, createdb_gpu, createdb_use_python, createdb_use_foldseek, createdb_afdb_lookup, createdb_afdb_local,
createdb_input, createdb_output, createdb_model, createdb_keep, createdb_overwrite, createdb_max_len, createdb_gpu, createdb_afdb_lookup, createdb_afdb_local,
profile_input_db, profile_input_tsv, profile_output, profile_threshold, profile_print_copiness,
search_input, search_target, search_output, search_tmp, search_keep_aln_db, search_search_options,
cluster_input, cluster_output, cluster_tmp, cluster_keep_cluster_db, cluster_cluster_options,
Expand Down

0 comments on commit 8fe9599

Please sign in to comment.