Skip to content

Commit

Permalink
fix: covalent bond
Browse files Browse the repository at this point in the history
  • Loading branch information
YaoYinYing committed Aug 23, 2024
1 parent c39a3e4 commit 3bba852
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 11 deletions.
2 changes: 1 addition & 1 deletion apps/protein_folding/helixfold3/data/demo_7s69_coval.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
},
{
"type": "bond",
"bond": "A,ASN,74,ND2,B,UNK1,1,C,covale,2.3",
"bond": "A,ASN,74,ND2,B,UNK-1,1,C16,covale,2.3",
"_comment": "'A,74,ND2:B,1:CW,null' from RF2AA"
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def parse_covalent_bond_input(input_string: str) -> List[CovalentBond]:

# Append the CovalentBond instance to the list
covalent_bonds.append(covalent_bond)
logging.info(f"Added {len(covalent_bonds)} bonds: {covalent_bonds}")

return covalent_bonds

Expand Down Expand Up @@ -306,6 +307,8 @@ def make_bond_features(covalent_bond: List[CovalentBond], all_chain_info, ccd_pr
chainId_to_type = {}
ligand_bond_type = [] # (i, j, bond_type), represent the bond between token i and token j
bond_index = [] # (i,j) represent the bond between token i and token j

ccd_id2atom_ids: dict[str, list]={}
ccd_standard_set = residue_constants.STANDARD_LIST
for chain_type_id, ccd_seq in all_chain_info.items():
chain_type, chain_id = chain_type_id.rsplit('_', 1)
Expand All @@ -326,6 +329,8 @@ def make_bond_features(covalent_bond: List[CovalentBond], all_chain_info, ccd_pr
else:
_ccd_feats = ccd_preprocessed_dict[ccd_id]
atom_ids = _ccd_feats['atom_ids']

ccd_id2atom_ids[ccd_id] = atom_ids
assert len(atom_ids) > 0, f'TODO filter - Got CCD <{ccd_id}>: 0 atom nums.'

all_token_nums += len(atom_ids)
Expand Down Expand Up @@ -372,16 +377,43 @@ def make_bond_features(covalent_bond: List[CovalentBond], all_chain_info, ccd_pr
ptnr2_label_seq_id = ptnr2_auth_seq_id

try:
assert ptnr1_label_asym_id in chainId_to_ccd_list and ptnr2_label_asym_id in chainId_to_ccd_list
if not (ptnr1_label_asym_id in chainId_to_ccd_list and ptnr2_label_asym_id in chainId_to_ccd_list):
raise ValueError(f"Invalid chain id:\n{ptnr1_label_asym_id}/{ptnr2_label_asym_id}\n{chainId_to_ccd_list}")
ptnr1_ccd_id = chainId_to_ccd_list[ptnr1_label_asym_id][int(ptnr1_label_seq_id) - 1]
ptnr2_ccd_id = chainId_to_ccd_list[ptnr2_label_asym_id][int(ptnr2_label_seq_id) - 1]
assert ptnr1_ccd_id == ptnr1_label_comp_id and ptnr2_ccd_id == ptnr2_label_comp_id
except:


# renamed ligand residues


if ptnr1_ccd_id != ptnr1_label_comp_id:
logging.warning(f"Find ligand residue: {ptnr1_label_comp_id} -> {ptnr1_ccd_id}")
#ptnr1_label_comp_id = ptnr1_ccd_id

if ptnr2_ccd_id != ptnr2_label_comp_id:
logging.warning(f"Find ligand residue: {ptnr2_label_comp_id} -> {ptnr2_ccd_id}")
#ptnr2_label_comp_id = ptnr2_ccd_id

except ValueError as e:
## some convalent-bond from mmcif is misslead, pass it.
logging.warning(f'Error occurred during covalent bond processing: {e}')
continue



ptnr1_ccd_atoms_list = ccd_preprocessed_dict[ptnr1_ccd_id]['atom_ids']
ptnr2_ccd_atoms_list = ccd_preprocessed_dict[ptnr2_ccd_id]['atom_ids']
if ptnr1_ccd_id in ccd_preprocessed_dict:
ptnr1_ccd_atoms_list = ccd_preprocessed_dict[ptnr1_ccd_id]['atom_ids']
else:
ptnr1_ccd_atoms_list = ccd_id2atom_ids[ptnr1_ccd_id]

logging.debug(f'{ptnr1_ccd_id=}: {ptnr1_ccd_atoms_list=}')

if ptnr2_ccd_id in ccd_preprocessed_dict:
ptnr2_ccd_atoms_list = ccd_preprocessed_dict[ptnr2_ccd_id]['atom_ids']
else:
ptnr2_ccd_atoms_list = ccd_id2atom_ids[ptnr2_ccd_id]

logging.debug(f'{ptnr2_ccd_id=}: {ptnr2_ccd_atoms_list=}')

if ptnr1_ccd_id in ccd_standard_set:
## if ccd_id is in the standard residue in HF3 (table 13), we didn't have to map to atom-leval index
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ def process_input_json(all_entitys: List[Entity], ccd_preprocessed_path,


# gather all defined covalent bonds
all_covalent_bonds=[bond for entity in all_entitys for bond in entity.msa_seqs if bond.dtype == 'bond']
all_covalent_bonds=[bond for entity in all_entitys for bond in entity.msa_seqs if entity.dtype == 'bond']

assert num_chains == len(all_chain_features.keys())
all_feats = add_assembly_features(all_chain_features, ccd_preprocessed_dict, no_msa_templ_feats, all_covalent_bonds)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,16 +180,20 @@ def _perform_conversion(self, input_type: str, input_value: str, generate_3d: bo
if input_type == 'mol2' and input_value.endswith('.mol2'):
return self._load_mol(mol2_file=input_value)

with tempfile.NamedTemporaryFile(suffix=".mol2") as temp_file, utils.timing(f'converting {input_type} to mol2: {input_value}'):
save_path=os.path.join('ligands',f'{os.path.basename(input_value)[:-(len(input_type)+1)] if input_type != "smiles" else "UNK"}.mol2')

os.makedirs(os.path.dirname(save_path), exist_ok=True)

with utils.timing(f'converting {input_type} to mol2: {input_value}'):
if input_type == 'smiles':
obabel_cmd = f"{self.obabel_bin} -:'{input_value}' -omol2 -O{temp_file.name} {'--gen3d' if generate_3d else ''}"
obabel_cmd = f"{self.obabel_bin} -:'{input_value}' -omol2 -O{save_path} {'--gen3d' if generate_3d else ''}"
if len(input_value)>60:
logging.warning(f'This takes a while ...')
else:
obabel_cmd = f"{self.obabel_bin} -i {input_type} {input_value} -omol2 -O{temp_file.name} {'--gen3d' if generate_3d else ''}"
obabel_cmd = f"{self.obabel_bin} -i {input_type} {input_value} -omol2 -O{save_path} {'--gen3d' if generate_3d else ''}"
logging.debug(f'Launching command: `{obabel_cmd}`')
ret = subprocess.run(obabel_cmd, shell=True, capture_output=True, text=True)
return self._load_mol(mol2_file=temp_file.name, ret=ret)
return self._load_mol(mol2_file=save_path, ret=ret)

def _convert_to_mol(self, input_type: str, input_value: str, generate_3d: bool=True) -> Chem.Mol:
if input_type not in self.supported_formats:
Expand Down

0 comments on commit 3bba852

Please sign in to comment.