Skip to content

Commit

Permalink
doc: covalently
Browse files Browse the repository at this point in the history
  • Loading branch information
YaoYinYing committed Aug 23, 2024
1 parent 3bba852 commit 7653585
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 1 deletion.
43 changes: 43 additions & 0 deletions apps/protein_folding/helixfold3/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,49 @@ A example of input data is as follows:
}
```

Another example of **covalently modified** input:
```json
{
"entities": [
{
"type": "protein",
"sequence": "MDALYKSTVAKFNEVIQLDCSTEFFSIALSSIAGILLLLLLFRSKRHSSLKLPPGKLGIPFIGESFIFLRALRSNSLEQFFDERVKKFGLVFKTSLIGHPTVVLCGPAGNRLILSNEEKLVQMSWPAQFMKLMGENSVATRRGEDHIVMRSALAGFFGPGALQSYIGKMNTEIQSHINEKWKGKDEVNVLPLVRELVFNISAILFFNIYDKQEQDRLHKLLETILVGSFALPIDLPGFGFHRALQGRAKLNKIMLSLIKKRKEDLQSGSATATQDLLSVLLTFRDDKGTPLTNDEILDNFSSLLHASYDTTTSPMALIFKLLSSNPECYQKVVQEQLEILSNKEEGEEITWKDLKAMKYTWQVAQETLRMFPPVFGTFRKAITDIQYDGYTIPKGWKLLWTTYSTHPKDLYFNEPEKFMPSRFDQEGKHVAPYTFLPFGGGQRSCVGWEFSKMEILLFVHHFVKTFSSYTPVDPDEKISGDPLPPLPSKGFSIKLFPRP",
"count": 1
},
{
"type": "ligand",
"ccd": "HEM",
"count": 1
},
{
"type": "ligand",
"smiles": "CC1=C2CC[C@@]3(CCCC(=C)[C@H]3C[C@@H](C2(C)C)CC1)C",
"count": 1
},
{
"type": "bond",
"bond": "A,CYS,445,SG,B,HEM,1,FE,covale,2.3",
"_comment": "<chain-id>,<residue name>,<residue index>,<atom id>,<chain-id>,<residue name>,<residue index>,<atom id>,<bond type>,<bond length>",
"_also_comment": "For ccd input, use CCD key as residue name; for smiles and file input, use `UNK-<index>` where index is the chain order you input"
}
]
}
```

For seaking all atom ids in CCD database:
```shell
helixfold_show_ccd +ccd_id=HEM
```

This command outputs like:
```text
# output:
[2024-08-23 22:44:36,324][absl][INFO] - Started Loading CCD dataset from /mnt/db/ccd/ccd_preprocessed_etkdg.pkl.gz
[2024-08-23 22:44:43,236][absl][INFO] - Finished Loading CCD dataset from /mnt/db/ccd/ccd_preprocessed_etkdg.pkl.gz in 6.912 seconds
[2024-08-23 22:44:43,237][absl][INFO] - CCD dataset contains 43488 entries.
[2024-08-23 22:44:43,237][absl][INFO] - Atoms in HEM: ['CHA', 'CHB', 'CHC', 'CHD', 'C1A', 'C2A', 'C3A', 'C4A', 'CMA', 'CAA', 'CBA', 'CGA', 'O1A', 'O2A', 'C1B', 'C2B', 'C3B', 'C4B', 'CMB', 'CAB', 'CBB', 'C1C', 'C2C', 'C3C', 'C4C', 'CMC', 'CAC', 'CBC', 'C1D', 'C2D', 'C3D', 'C4D', 'CMD', 'CAD', 'CBD', 'CGD', 'O1D', 'O2D', 'NA', 'NB', 'NC', 'ND', 'FE']
```

#### Running HelixFold for Inference
To run inference on a sequence or multiple sequences using HelixFold3's pretrained parameters, run e.g.:
Expand Down
3 changes: 2 additions & 1 deletion apps/protein_folding/helixfold3/data/demo_7s69_coval.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
{
"type": "bond",
"bond": "A,ASN,74,ND2,B,UNK-1,1,C16,covale,2.3",
"_comment": "'A,74,ND2:B,1:CW,null' from RF2AA"
"_comment": "'A,74,ND2:B,1:CW,null' from RF2AA.",
"_also_comment": "For ccd input, use CCD key as residue name; for smiles and file input, use `UNK-<index>` where index is the chain order you input"
}
]
}
25 changes: 25 additions & 0 deletions apps/protein_folding/helixfold3/data/demo_p450_heme_coval.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"entities": [
{
"type": "protein",
"sequence": "MDALYKSTVAKFNEVIQLDCSTEFFSIALSSIAGILLLLLLFRSKRHSSLKLPPGKLGIPFIGESFIFLRALRSNSLEQFFDERVKKFGLVFKTSLIGHPTVVLCGPAGNRLILSNEEKLVQMSWPAQFMKLMGENSVATRRGEDHIVMRSALAGFFGPGALQSYIGKMNTEIQSHINEKWKGKDEVNVLPLVRELVFNISAILFFNIYDKQEQDRLHKLLETILVGSFALPIDLPGFGFHRALQGRAKLNKIMLSLIKKRKEDLQSGSATATQDLLSVLLTFRDDKGTPLTNDEILDNFSSLLHASYDTTTSPMALIFKLLSSNPECYQKVVQEQLEILSNKEEGEEITWKDLKAMKYTWQVAQETLRMFPPVFGTFRKAITDIQYDGYTIPKGWKLLWTTYSTHPKDLYFNEPEKFMPSRFDQEGKHVAPYTFLPFGGGQRSCVGWEFSKMEILLFVHHFVKTFSSYTPVDPDEKISGDPLPPLPSKGFSIKLFPRP",
"count": 1
},
{
"type": "ligand",
"ccd": "HEM",
"count": 1
},
{
"type": "ligand",
"smiles": "CC1=C2CC[C@@]3(CCCC(=C)[C@H]3C[C@@H](C2(C)C)CC1)C",
"count": 1
},
{
"type": "bond",
"bond": "A,CYS,445,SG,B,HEM,1,FE,covale,2.3",
"_comment": "<chain-id>,<residue name>,<residue index>,<atom id>,<chain-id>,<residue name>,<residue index>,<atom id>,<bond type>,<bond length>",
"_also_comment": "For ccd input, use CCD key as residue name; for smiles and file input, use `UNK-<index>` where index is the chain order you input"
}
]
}
20 changes: 20 additions & 0 deletions apps/protein_folding/helixfold3/helixfold/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import hydra

from helixfold.common import all_atom_pdb_save
from helixfold.data.pipeline_conf_bonds import load_ccd_dict
from helixfold.model import config, utils
from helixfold.data import pipeline_parallel as pipeline
from helixfold.data import pipeline_multimer_parallel as pipeline_multimer
Expand Down Expand Up @@ -565,5 +566,24 @@ def main(cfg: DictConfig):
ranking_all_predictions(all_pred_path)
print(f'============ Inference finished ! ============')


@hydra.main(version_base=None, config_path=os.path.join(script_path,'config',),config_name='helixfold')
def show_atom_id_ccd(cfg: DictConfig):
ccd_preprocessed_path = cfg.db.ccd_preprocessed


ccd_id=cfg.ccd_id
if len(ccd_id) <= 3 and ccd_id in (ccd_dict:=load_ccd_dict(ccd_preprocessed_path)):
logging.info(f'Atoms in {ccd_id}: {ccd_dict[ccd_id]["atom_ids"]}')
return









if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions apps/protein_folding/helixfold3/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ joblib = "1.4.2"

[tool.poetry.scripts]
helixfold = 'helixfold.inference:main'
helixfold_show_ccd = 'helixfold.inference:show_atom_id_ccd'

0 comments on commit 7653585

Please sign in to comment.