From 7408b6f054e700daae18a7f32b476641d6d86cdf Mon Sep 17 00:00:00 2001 From: richard gowers Date: Tue, 12 Sep 2023 13:53:31 +0100 Subject: [PATCH 01/12] lift ring atom determination into separate function --- boresch_restraints.py | 68 +++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/boresch_restraints.py b/boresch_restraints.py index 61180a8..bf6f93d 100644 --- a/boresch_restraints.py +++ b/boresch_restraints.py @@ -19,6 +19,45 @@ T = 298.15 RT = R*T + +def determine_rings_oechem(lig: str): + """Assign atoms into different ring systems + + Parameters + ---------- + lig : str + path to the mol2 file + + + Returns + ------- + atoms + for each ring system system, the indices of atoms within + """ + from openeye import oechem + + # Load in ligand from mol2 into openeye + ifs = oechem.oemolistream(lig) + mol = oechem.OEGraphMol() + oechem.OEReadMolecule(ifs, mol) + + # Find ring systems (here not specifically aromatic, could change that) + nr_ring_systems, parts = oechem.OEDetermineRingSystems(mol) + atoms = [] + + if nr_ring_systems: + # Loop through ringsystems, count number of atoms + for ringidx in range(1, nr_ring_systems + 1): + single_ring = [] + # store atom indices of ring systems + for atom in mol.GetAtoms(): + if parts[atom.GetIdx()] == ringidx: + single_ring.append(atom.GetIdx()) + atoms.append(single_ring) + + return atoms + + def select_ligand_atoms(lig, traj, ligand='LIG'): """Select three ligand atoms for Boresch-style restraints. Parameters @@ -43,10 +82,6 @@ def select_ligand_atoms(lig, traj, ligand='LIG'): #Store length of the ligand lig_length = len(ligand) ligand_traj = traj.atom_slice(ligand, inplace=False) - #Load in ligand from mol2 into openeye - ifs = oechem.oemolistream(lig) - mol = oechem.OEGraphMol() - oechem.OEReadMolecule(ifs, mol) #Use openff to make graph from molecule molecule = Molecule(lig) @@ -70,28 +105,12 @@ def select_ligand_atoms(lig, traj, ligand='LIG'): longest_paths.append(value) #there might be multiple longest path, just choose first one for now center = longest_paths[0][int(len(longest_paths[0])/2)] - #Load in ligand from mol2 into openeye - ifs = oechem.oemolistream(lig) - mol = oechem.OEGraphMol() - oechem.OEReadMolecule(ifs, mol) - # Find ring systems (here not specifically aromatic, could change that) - nr_ring_systems, parts = oechem.OEDetermineRingSystems(mol) - ring_systems = [] - atoms = [] + atoms = determine_rings_oechem(lig) + nr_ring_systems = len(atoms) + # If we find a ring system in our molecule if nr_ring_systems >= 1: - #Loop through ringsystems, count number of atoms - for ringidx in range(1, nr_ring_systems + 1): - single_ring = [] - nr_atoms = parts.count(ringidx) - ring_systems.append(nr_atoms) - #store atom indices of ring systems - for atom in mol.GetAtoms(): - if parts[atom.GetIdx()] == ringidx: - single_ring.append(atom.GetIdx()) - atoms.append(single_ring) - # Compute RMSF ligand atoms ligand_traj.superpose(ligand_traj) rmsf = list(md.rmsf(ligand_traj, ligand_traj, 0)) @@ -99,8 +118,7 @@ def select_ligand_atoms(lig, traj, ligand='LIG'): ring_atoms = [] # also save ring atoms ring by ring in nested list to be able to select atoms from a single ring later ring_atoms_2 = [] - for inx,r in enumerate(ring_systems): - atoms_ring = atoms[inx] + for atoms_ring in enumerate(atoms): rmsf_ring = [rmsf[a] for a in atoms_ring] #Only choose ring systems that are stable (RMSF<0.1) if max(rmsf_ring) < 0.1: From a37d8c26466f19a24f387f5c4f61740827f5fca2 Mon Sep 17 00:00:00 2001 From: richard gowers Date: Tue, 12 Sep 2023 13:55:33 +0100 Subject: [PATCH 02/12] add use_oechem option to substructure search --- boresch_restraints.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/boresch_restraints.py b/boresch_restraints.py index bf6f93d..ba15619 100644 --- a/boresch_restraints.py +++ b/boresch_restraints.py @@ -327,19 +327,7 @@ def check_angle(angle): return False return True -def substructure_search(mol2_lig, smarts): - """Pick ligand atoms for restraints based on substructure search. - Parameters - ---------- - mol2_lig : str - mol2 file of the ligand. - smarts : str - Smarts pattern used for substructure search. One atom is flagged and picked for restraints. - Returns - ------- - matches: list - Indices of ligand atoms that match the picked atom in the substructure. - """ +def substructure_search_oechem(mol2_lig, smarts): ifs = oechem.oemolistream(mol2_lig) mol = oechem.OEGraphMol() @@ -361,6 +349,27 @@ def substructure_search(mol2_lig, smarts): return matches +def substructure_search(mol2_lig, smarts, use_oechem=True): + """Pick ligand atoms for restraints based on substructure search. + + Parameters + ---------- + mol2_lig : str + mol2 file of the ligand. + smarts : str + Smarts pattern used for substructure search. One atom is flagged and picked for restraints. + + Returns + ------- + matches: list + Indices of ligand atoms that match the picked atom in the substructure. + """ + if use_oechem: + return substructure_search_oechem(mol2_lig, smarts) + else: + raise NotImplementedError() + + def select_Boresch_atoms(traj, mol2_lig, ligand_atoms = None, protein_atoms = None, substructure = None, ligand='LIG'): """Select possible protein atoms for Boresch-style restraints. Parameters From 2865c0a53f1bd345f76f936639aa6a55317224e8 Mon Sep 17 00:00:00 2001 From: richard gowers Date: Tue, 12 Sep 2023 14:00:08 +0100 Subject: [PATCH 03/12] add oechem option to determine rings --- boresch_restraints.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/boresch_restraints.py b/boresch_restraints.py index ba15619..c1a553b 100644 --- a/boresch_restraints.py +++ b/boresch_restraints.py @@ -21,19 +21,6 @@ def determine_rings_oechem(lig: str): - """Assign atoms into different ring systems - - Parameters - ---------- - lig : str - path to the mol2 file - - - Returns - ------- - atoms - for each ring system system, the indices of atoms within - """ from openeye import oechem # Load in ligand from mol2 into openeye @@ -58,6 +45,26 @@ def determine_rings_oechem(lig: str): return atoms +def determine_rings(lig: str, use_oechem=True): + """Assign atoms into different ring systems + + Parameters + ---------- + lig : str + path to the mol2 file + + + Returns + ------- + atoms + for each ring system system, the indices of atoms within + """ + if use_oechem: + return determine_rings_oechem(lig) + else: + raise NotImplementedError() + + def select_ligand_atoms(lig, traj, ligand='LIG'): """Select three ligand atoms for Boresch-style restraints. Parameters From 9ba61589ae229c72a43ceceb7e49b1c634dfa367 Mon Sep 17 00:00:00 2001 From: richard gowers Date: Tue, 12 Sep 2023 14:01:08 +0100 Subject: [PATCH 04/12] remove oechem import from top level --- boresch_restraints.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/boresch_restraints.py b/boresch_restraints.py index c1a553b..3326f7d 100644 --- a/boresch_restraints.py +++ b/boresch_restraints.py @@ -7,7 +7,6 @@ import itertools from simtk import unit from scipy.spatial import distance -from openeye import oechem import math from scipy import stats #from openforcefield.topology import Molecule @@ -335,6 +334,8 @@ def check_angle(angle): return True def substructure_search_oechem(mol2_lig, smarts): + from openeye import oechem + ifs = oechem.oemolistream(mol2_lig) mol = oechem.OEGraphMol() @@ -857,6 +858,7 @@ def analytical_Boresch_correction(r0, thA, thB, fc_r, fc_thA, fc_thB, fc_phiA, f def ligand_sdf(pdb, outfile): ''' Function to get ligand sdf from complex pdb. Only use if no sdf of ligands available. ''' + from openeye import oechem complex_A = oechem.OEGraphMol() ifs = oechem.oemolistream() From 77d327f5ff1847df373d0545450c9de8fee30dc1 Mon Sep 17 00:00:00 2001 From: richard gowers Date: Tue, 12 Sep 2023 14:30:54 +0100 Subject: [PATCH 05/12] add use_oechem option to ligand_sdf --- boresch_restraints.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/boresch_restraints.py b/boresch_restraints.py index 3326f7d..67966e2 100644 --- a/boresch_restraints.py +++ b/boresch_restraints.py @@ -856,8 +856,8 @@ def analytical_Boresch_correction(r0, thA, thB, fc_r, fc_thA, fc_thB, fc_phiA, f dG = dG / 4.184 return dG -def ligand_sdf(pdb, outfile): - ''' Function to get ligand sdf from complex pdb. Only use if no sdf of ligands available. ''' + +def ligand_sdf_oechem(pdb, outfile): from openeye import oechem complex_A = oechem.OEGraphMol() @@ -880,3 +880,13 @@ def ligand_sdf(pdb, outfile): oechem.OEWriteMolecule(ofs, lig) return + + +def ligand_sdf(pdb, outfile, use_oechem=True): + """Get ligand sdf from complex pdb. + + Only use if no sdf of ligands available.""" + if use_oechem: + return ligand_sdf_oechem(pdb, outfile) + else: + raise NotImplementedError() From 7e78bf5643582312e94dfbb81e349f8635151e4d Mon Sep 17 00:00:00 2001 From: richard gowers Date: Tue, 12 Sep 2023 14:32:57 +0100 Subject: [PATCH 06/12] add stubs for rdkit functions --- boresch_restraints.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/boresch_restraints.py b/boresch_restraints.py index 67966e2..06c5739 100644 --- a/boresch_restraints.py +++ b/boresch_restraints.py @@ -44,6 +44,10 @@ def determine_rings_oechem(lig: str): return atoms +def determine_rings_rdkit(lig: str): + raise NotImplementedError() + + def determine_rings(lig: str, use_oechem=True): """Assign atoms into different ring systems @@ -61,7 +65,7 @@ def determine_rings(lig: str, use_oechem=True): if use_oechem: return determine_rings_oechem(lig) else: - raise NotImplementedError() + return determine_rings_rdkit(lig) def select_ligand_atoms(lig, traj, ligand='LIG'): @@ -357,6 +361,10 @@ def substructure_search_oechem(mol2_lig, smarts): return matches +def substructure_search_rdkit(mol2_lig, smarts): + raise NotImplementedError() + + def substructure_search(mol2_lig, smarts, use_oechem=True): """Pick ligand atoms for restraints based on substructure search. @@ -375,7 +383,7 @@ def substructure_search(mol2_lig, smarts, use_oechem=True): if use_oechem: return substructure_search_oechem(mol2_lig, smarts) else: - raise NotImplementedError() + return substructure_search_rdkit(mol2_lig, smarts) def select_Boresch_atoms(traj, mol2_lig, ligand_atoms = None, protein_atoms = None, substructure = None, ligand='LIG'): @@ -882,6 +890,10 @@ def ligand_sdf_oechem(pdb, outfile): return +def ligand_sdf_rdkit(pdb, outfile): + raise NotImplementedError() + + def ligand_sdf(pdb, outfile, use_oechem=True): """Get ligand sdf from complex pdb. @@ -889,4 +901,4 @@ def ligand_sdf(pdb, outfile, use_oechem=True): if use_oechem: return ligand_sdf_oechem(pdb, outfile) else: - raise NotImplementedError() + return ligand_sdf_rdkit(pdb, outfile) From 26bf59429e281fd4c1919458e037179d07c1ef5c Mon Sep 17 00:00:00 2001 From: richard gowers Date: Tue, 12 Sep 2023 14:45:20 +0100 Subject: [PATCH 07/12] add stubs for rdkit functions in combined_coordinates --- combine_coordinates.py | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/combine_coordinates.py b/combine_coordinates.py index e9da4fa..5bdd918 100644 --- a/combine_coordinates.py +++ b/combine_coordinates.py @@ -1,19 +1,11 @@ -from openeye import oechem, oespruce import parmed as pmd import shutil import mdtraj as md -def align_complexes(pdb_A, pdb_B, out_B): - """Align 2 structures with oespruce, Structure A is the reference - Parameters - ---------- - pdb_A : str - pdb structure Complex A - pdb_B : str - pdb structure Complex B - out_B : str - pdb structure, Aligned structure of complex B - """ + +def align_complexes_oechem(pdb_A, pdb_B, out_B): + from openeye import oechem, oespruce + complex_A = oechem.OEGraphMol() ifs = oechem.oemolistream() ifs.SetFlavor(oechem.OEFormat_PDB, @@ -45,6 +37,28 @@ def align_complexes(pdb_A, pdb_B, out_B): ofs.close() return + +def align_complexes_rdkit(pdb_A, pdb_B, out_B): + raise NotImplementedError() + + +def align_complexes(pdb_A, pdb_B, out_B, use_oechem=True): + """Align 2 structures with oespruce, Structure A is the reference + Parameters + ---------- + pdb_A : str + pdb structure Complex A + pdb_B : str + pdb structure Complex B + out_B : str + pdb structure, Aligned structure of complex B + """ + if use_oechem: + return align_complexes_oechem(pdb_A, pdb_B, out_B) + else: + return align_complexes_rdkit(pdb_A, pdb_B, out_B) + + def combine_ligands_gro(in_file_A, in_file_B, out_file, ligand_A='MOL', ligand_B='MOL'): """Add ligand B coordinates to coordinate (.gro) file of ligand A in complex with protein Parameters From d5c05ad241c60e211f23f0d3340a4b9ea9e064d6 Mon Sep 17 00:00:00 2001 From: richard gowers Date: Wed, 13 Sep 2023 13:55:05 +0100 Subject: [PATCH 08/12] add align function using mdtraj currently doesn't preserve a lot of data from the original PDB file if this is necessary, one option would be to manually rewrite the file and change the positions on ATOM lines by playing with strings... --- combine_coordinates.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/combine_coordinates.py b/combine_coordinates.py index 5bdd918..5c08034 100644 --- a/combine_coordinates.py +++ b/combine_coordinates.py @@ -38,8 +38,16 @@ def align_complexes_oechem(pdb_A, pdb_B, out_B): return -def align_complexes_rdkit(pdb_A, pdb_B, out_B): - raise NotImplementedError() +def align_complexes_mdtraj(pdb_A, pdb_B, out_B): + ref_traj = md.Trajectory(pdb_A) + traj = md.Trajectory(pdb_B) + + traj.superpose(ref_traj, atom_indices=ref_traj.topology.select('backbone')) + + with md.formats.PDBTrajectoryFile(out_B, mode='w') as w: + w.write(traj.xyz[0], traj.topology, + unitcell_lengths=traj.unitcell_lengths[0], + unitcell_angles=traj.unitcell_angles[0]) def align_complexes(pdb_A, pdb_B, out_B, use_oechem=True): @@ -52,11 +60,14 @@ def align_complexes(pdb_A, pdb_B, out_B, use_oechem=True): pdb structure Complex B out_B : str pdb structure, Aligned structure of complex B + use_oechem : bool + if True, will use oechem spruce (requiring license) otherwise will use + mdtraj """ if use_oechem: return align_complexes_oechem(pdb_A, pdb_B, out_B) else: - return align_complexes_rdkit(pdb_A, pdb_B, out_B) + return align_complexes_mdtraj(pdb_A, pdb_B, out_B) def combine_ligands_gro(in_file_A, in_file_B, out_file, ligand_A='MOL', ligand_B='MOL'): From 4d9cee98e7bef320051b5361e49637ad1dc109ba Mon Sep 17 00:00:00 2001 From: richard gowers Date: Wed, 13 Sep 2023 16:59:49 +0100 Subject: [PATCH 09/12] rdkit implementation of ligand_sdf --- boresch_restraints.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/boresch_restraints.py b/boresch_restraints.py index 06c5739..a893158 100644 --- a/boresch_restraints.py +++ b/boresch_restraints.py @@ -891,7 +891,34 @@ def ligand_sdf_oechem(pdb, outfile): def ligand_sdf_rdkit(pdb, outfile): - raise NotImplementedError() + from rdkit import Chem + + # these resnames get skipped, i.e. aren't considered the ligand + water = {'SOL', 'HOH', 'WAT'} + AAs = {'GLY', 'ALA', 'VAL', 'LEU', 'ILE', + 'MET', 'PHE', 'TYR', 'TRP', 'ARG', + 'CYS', 'ASN', 'GLN', 'THR', 'SER', + 'PRO', 'HIS', 'LYS', 'ASP', 'GLU', + 'CYX', 'HID', 'HIP', 'HIE'} + + m = Chem.MolFromPDBFile(pdb, removeHs=False) + + lig = None + mols = Chem.GetMolFrags(m, asMols=True, sanitizeFrags=False) + + for mol in mols: + resname = mol.GetAtomWithIdx(0).GetMonomerInfo().GetResidueName().upper() + if resname in water: + continue + if resname in AAs: + continue + + if lig is None: + lig = mol + else: + lig = Chem.CombineMols(lig, mol) + + Chem.MolToMolFile(lig, outfile) def ligand_sdf(pdb, outfile, use_oechem=True): From 7059ba69159e1647d8019d5cbd20a4270b9223a2 Mon Sep 17 00:00:00 2001 From: richard gowers Date: Wed, 13 Sep 2023 18:12:19 +0100 Subject: [PATCH 10/12] first pass of substructure rdkit search will have to check how mapped atoms are used on the SMARTS patterns --- boresch_restraints.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/boresch_restraints.py b/boresch_restraints.py index a893158..f19bf6e 100644 --- a/boresch_restraints.py +++ b/boresch_restraints.py @@ -362,7 +362,15 @@ def substructure_search_oechem(mol2_lig, smarts): def substructure_search_rdkit(mol2_lig, smarts): - raise NotImplementedError() + from rdkit import Chem + + m = Chem.MolFromMol2File(mol2_lig, removeHs=False) + + query = Chem.MolFromSmarts(smarts) + + matches = m.GetSubstructMatches(query, uniquify=True) + + raise matches def substructure_search(mol2_lig, smarts, use_oechem=True): From 58ba3f52ab658cd36bfe90d6eed02766240a63e2 Mon Sep 17 00:00:00 2001 From: richard gowers Date: Thu, 14 Sep 2023 15:50:28 +0100 Subject: [PATCH 11/12] rdkit implementation of determine_rings --- boresch_restraints.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/boresch_restraints.py b/boresch_restraints.py index f19bf6e..e00fca4 100644 --- a/boresch_restraints.py +++ b/boresch_restraints.py @@ -2,6 +2,7 @@ #Ligand atoms: largest ring systems, atoms closest to ligand COM #protein atoms: middle of a helix, backbone/C-beta, >1nm away from ligand COM, check angles +from functools import reduce import numpy as np import mdtraj as md import itertools @@ -45,7 +46,26 @@ def determine_rings_oechem(lig: str): def determine_rings_rdkit(lig: str): - raise NotImplementedError() + from rdkit import Chem + + m = Chem.MolFromMol2File(lig, removeHs=False) + + ringinfo = m.GetRingInfo() + + rings = ringinfo.AtomRings() + # unlike oechem, this is each ring, rather than ring systems, so let's merge + # 1) make a graph of all rings + g = nx.Graph() + g.add_nodes_from(frozenset(r) for r in rings) + for a, b in itertools.combinations(g.nodes, 2): + # 2) connect rings that have at least one atom in common + if a & b: + g.add_edge(a, b) + # 3) merge connected components to find fused ring systems + ringsystems = [list(reduce(lambda x, y: x | y, n)) + for n in nx.connected_components(g)] + + return ringsystems def determine_rings(lig: str, use_oechem=True): From 3c4031069154084767f7ef6ce7b1cce1832084c8 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Thu, 14 Sep 2023 17:57:45 +0100 Subject: [PATCH 12/12] Update boresch_restraints.py --- boresch_restraints.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boresch_restraints.py b/boresch_restraints.py index e00fca4..85db16c 100644 --- a/boresch_restraints.py +++ b/boresch_restraints.py @@ -148,7 +148,7 @@ def select_ligand_atoms(lig, traj, ligand='LIG'): ring_atoms = [] # also save ring atoms ring by ring in nested list to be able to select atoms from a single ring later ring_atoms_2 = [] - for atoms_ring in enumerate(atoms): + for atoms_ring in atoms: rmsf_ring = [rmsf[a] for a in atoms_ring] #Only choose ring systems that are stable (RMSF<0.1) if max(rmsf_ring) < 0.1: