Skip to content

Commit

Permalink
chore: hydra and pip
Browse files Browse the repository at this point in the history
  • Loading branch information
YaoYinYing committed Aug 17, 2024
1 parent 78d157f commit 662bbff
Show file tree
Hide file tree
Showing 12 changed files with 248 additions and 186 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -164,10 +164,13 @@ def prediction_to_mmcif(pred_atom_pos: Union[np.ndarray, paddle.Tensor],
- maxit_binary: path to maxit_binary, use to convert pdb to cif
- mmcif_path: path to save *.cif
"""
assert maxit_binary is not None and os.path.exists(maxit_binary), (
if os.path.isfile(maxit_binary):
raise FileNotFoundError(
f'maxit_binary: {maxit_binary} not exists. '
f'link: https://sw-tools.rcsb.org/apps/MAXIT/source.html')
assert mmcif_path.endswith('.cif'), f'mmcif_path should endswith .cif; got {mmcif_path}'

if not mmcif_path.endswith('.cif'):
raise ValueError(f'mmcif_path should endswith .cif; got {mmcif_path}')

pdb_path = mmcif_path.replace('.cif', '.pdb')
pdb_path = prediction_to_pdb(pred_atom_pos, FeatsDict, pdb_path)
Expand Down
59 changes: 59 additions & 0 deletions apps/protein_folding/helixfold3/helixfold/config/helixfold.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
defaults:
- _self_

# General configuration

bf16_infer: false # Corresponds to --bf16_infer
seed: null # Corresponds to --seed
logging_level: DEBUG # Corresponds to --logging_level
job_id: 'structure_prediction' # Corresponds to --model_name
weight_path: /mnt/db/weights/helixfold/HelixFold3-params-240814/HelixFold3-240814.pdparams # Corresponds to --init_model
precision: fp32 # Corresponds to --precision
amp_level: O1 # Corresponds to --amp_level
infer_times: 1 # Corresponds to --infer_times
diff_batch_size: -1 # Corresponds to --diff_batch_size
use_small_bfd: false # Corresponds to --use_small_bfd

# File paths

input: null # Corresponds to --input_json, required field
output: null # Corresponds to --output_dir, required field


# Binary tool paths, leave them as null to find proper ones under PATH or conda bin path
bin:
jackhmmer: null # Corresponds to --jackhmmer_binary_path
hhblits: null # Corresponds to --hhblits_binary_path
hhsearch: null # Corresponds to --hhsearch_binary_path
kalign: null # Corresponds to --kalign_binary_path
hmmsearch: null # Corresponds to --hmmsearch_binary_path
hmmbuild: null # Corresponds to --hmmbuild_binary_path
nhmmer: null # Corresponds to --nhmmer_binary_path
obabel: null

# Database paths
db:
uniprot: /mnt/db/uniprot/uniprot.fasta # Corresponds to --uniprot_database_path, required field
pdb_seqres: /mnt/db/pdb_seqres/pdb_seqres.txt # Corresponds to --pdb_seqres_database_path, required field
uniref90: /mnt/db/uniref90/uniref90.fasta # Corresponds to --uniref90_database_path, required field
mgnify: /mnt/db/mgnify/mgy_clusters.fa # Corresponds to --mgnify_database_path, required field
bfd: /mnt/db/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt # Corresponds to --bfd_database_path
small_bfd: null # Corresponds to --small_bfd_database_path
uniclust30: /mnt/db/uniref30_uc30/UniRef30_2022_02/UniRef30_2022_02 # Corresponds to --uniclust30_database_path
rfam: /mnt/db/helixfold/rna/Rfam-14.9_rep_seq.fasta # Corresponds to --rfam_database_path, required field
ccd_preprocessed: /mnt/db/ccd/ccd_preprocessed_etkdg.pkl.gz # Corresponds to --ccd_preprocessed_path, required field

# Template and PDB information
template:
mmcif_dir: /mnt/db/pdb_mmcif/mmcif_files # Corresponds to --template_mmcif_dir, required field
max_date: '2023-03-15' # Corresponds to --max_template_date, required field
obsolete_pdbs: /mnt/db/pdb_mmcif/obsolete.dat # Corresponds to --obsolete_pdbs_path, required field

# Preset configuration
preset:
preset: full_dbs # Corresponds to --preset, choices=['reduced_dbs', 'full_dbs']

# Other configurations
other:
maxit_binary: /mnt/data/yinying/software/maxit/maxit-v11.100-prod-src/bin/maxit # Corresponds to --maxit_binary
no_msa_templ_feats: false # Corresponds to --no_msa_templ_feats
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@
'seqs': ccd_seqs,
'msa_seqs': msa_seqs,
'count': count,
'extra_mol_infos': {} for which seqs has the modify residue type or smiles.
'extra_mol_infos': {}, for which seqs has the modify residue type or smiles.
"""
import collections
import copy
import gzip
import os
import json
import sys
import subprocess
import tempfile
import itertools
sys.path.append('../')
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem
Expand Down Expand Up @@ -52,9 +52,7 @@
3: 'Unknown error.'
}

OBABEL_BIN = os.getenv('OBABEL_BIN')
if not os.path.exists(OBABEL_BIN):
raise FileNotFoundError(f'Cannot find obabel binary at {OBABEL_BIN}.')



def read_json(path):
Expand Down Expand Up @@ -144,6 +142,11 @@ def smiles_toMol_obabel(smiles):
"""
generate mol from smiles using obabel;
"""

OBABEL_BIN = os.getenv('OBABEL_BIN')
if not (OBABEL_BIN and os.path.isfile(OBABEL_BIN)):
raise FileNotFoundError(f'Cannot find obabel binary at {OBABEL_BIN}.')

with tempfile.NamedTemporaryFile(suffix=".mol2") as temp_file:
print(f"[OBABEL] Temporary file created: {temp_file.name}")
obabel_cmd = f"{OBABEL_BIN} -:'{smiles}' -omol2 -O{temp_file.name} --gen3d"
Expand Down
Loading

0 comments on commit 662bbff

Please sign in to comment.