diff --git a/boresch_restraints.py b/boresch_restraints.py index 61180a8..85db16c 100644 --- a/boresch_restraints.py +++ b/boresch_restraints.py @@ -2,12 +2,12 @@ #Ligand atoms: largest ring systems, atoms closest to ligand COM #protein atoms: middle of a helix, backbone/C-beta, >1nm away from ligand COM, check angles +from functools import reduce import numpy as np import mdtraj as md import itertools from simtk import unit from scipy.spatial import distance -from openeye import oechem import math from scipy import stats #from openforcefield.topology import Molecule @@ -19,6 +19,75 @@ T = 298.15 RT = R*T + +def determine_rings_oechem(lig: str): + from openeye import oechem + + # Load in ligand from mol2 into openeye + ifs = oechem.oemolistream(lig) + mol = oechem.OEGraphMol() + oechem.OEReadMolecule(ifs, mol) + + # Find ring systems (here not specifically aromatic, could change that) + nr_ring_systems, parts = oechem.OEDetermineRingSystems(mol) + atoms = [] + + if nr_ring_systems: + # Loop through ringsystems, count number of atoms + for ringidx in range(1, nr_ring_systems + 1): + single_ring = [] + # store atom indices of ring systems + for atom in mol.GetAtoms(): + if parts[atom.GetIdx()] == ringidx: + single_ring.append(atom.GetIdx()) + atoms.append(single_ring) + + return atoms + + +def determine_rings_rdkit(lig: str): + from rdkit import Chem + + m = Chem.MolFromMol2File(lig, removeHs=False) + + ringinfo = m.GetRingInfo() + + rings = ringinfo.AtomRings() + # unlike oechem, this is each ring, rather than ring systems, so let's merge + # 1) make a graph of all rings + g = nx.Graph() + g.add_nodes_from(frozenset(r) for r in rings) + for a, b in itertools.combinations(g.nodes, 2): + # 2) connect rings that have at least one atom in common + if a & b: + g.add_edge(a, b) + # 3) merge connected components to find fused ring systems + ringsystems = [list(reduce(lambda x, y: x | y, n)) + for n in nx.connected_components(g)] + + return ringsystems + + +def determine_rings(lig: str, use_oechem=True): + """Assign atoms into different ring systems + + Parameters + ---------- + lig : str + path to the mol2 file + + + Returns + ------- + atoms + for each ring system system, the indices of atoms within + """ + if use_oechem: + return determine_rings_oechem(lig) + else: + return determine_rings_rdkit(lig) + + def select_ligand_atoms(lig, traj, ligand='LIG'): """Select three ligand atoms for Boresch-style restraints. Parameters @@ -43,10 +112,6 @@ def select_ligand_atoms(lig, traj, ligand='LIG'): #Store length of the ligand lig_length = len(ligand) ligand_traj = traj.atom_slice(ligand, inplace=False) - #Load in ligand from mol2 into openeye - ifs = oechem.oemolistream(lig) - mol = oechem.OEGraphMol() - oechem.OEReadMolecule(ifs, mol) #Use openff to make graph from molecule molecule = Molecule(lig) @@ -70,28 +135,12 @@ def select_ligand_atoms(lig, traj, ligand='LIG'): longest_paths.append(value) #there might be multiple longest path, just choose first one for now center = longest_paths[0][int(len(longest_paths[0])/2)] - #Load in ligand from mol2 into openeye - ifs = oechem.oemolistream(lig) - mol = oechem.OEGraphMol() - oechem.OEReadMolecule(ifs, mol) - # Find ring systems (here not specifically aromatic, could change that) - nr_ring_systems, parts = oechem.OEDetermineRingSystems(mol) - ring_systems = [] - atoms = [] + atoms = determine_rings_oechem(lig) + nr_ring_systems = len(atoms) + # If we find a ring system in our molecule if nr_ring_systems >= 1: - #Loop through ringsystems, count number of atoms - for ringidx in range(1, nr_ring_systems + 1): - single_ring = [] - nr_atoms = parts.count(ringidx) - ring_systems.append(nr_atoms) - #store atom indices of ring systems - for atom in mol.GetAtoms(): - if parts[atom.GetIdx()] == ringidx: - single_ring.append(atom.GetIdx()) - atoms.append(single_ring) - # Compute RMSF ligand atoms ligand_traj.superpose(ligand_traj) rmsf = list(md.rmsf(ligand_traj, ligand_traj, 0)) @@ -99,8 +148,7 @@ def select_ligand_atoms(lig, traj, ligand='LIG'): ring_atoms = [] # also save ring atoms ring by ring in nested list to be able to select atoms from a single ring later ring_atoms_2 = [] - for inx,r in enumerate(ring_systems): - atoms_ring = atoms[inx] + for atoms_ring in atoms: rmsf_ring = [rmsf[a] for a in atoms_ring] #Only choose ring systems that are stable (RMSF<0.1) if max(rmsf_ring) < 0.1: @@ -309,19 +357,9 @@ def check_angle(angle): return False return True -def substructure_search(mol2_lig, smarts): - """Pick ligand atoms for restraints based on substructure search. - Parameters - ---------- - mol2_lig : str - mol2 file of the ligand. - smarts : str - Smarts pattern used for substructure search. One atom is flagged and picked for restraints. - Returns - ------- - matches: list - Indices of ligand atoms that match the picked atom in the substructure. - """ +def substructure_search_oechem(mol2_lig, smarts): + from openeye import oechem + ifs = oechem.oemolistream(mol2_lig) mol = oechem.OEGraphMol() @@ -343,6 +381,39 @@ def substructure_search(mol2_lig, smarts): return matches +def substructure_search_rdkit(mol2_lig, smarts): + from rdkit import Chem + + m = Chem.MolFromMol2File(mol2_lig, removeHs=False) + + query = Chem.MolFromSmarts(smarts) + + matches = m.GetSubstructMatches(query, uniquify=True) + + raise matches + + +def substructure_search(mol2_lig, smarts, use_oechem=True): + """Pick ligand atoms for restraints based on substructure search. + + Parameters + ---------- + mol2_lig : str + mol2 file of the ligand. + smarts : str + Smarts pattern used for substructure search. One atom is flagged and picked for restraints. + + Returns + ------- + matches: list + Indices of ligand atoms that match the picked atom in the substructure. + """ + if use_oechem: + return substructure_search_oechem(mol2_lig, smarts) + else: + return substructure_search_rdkit(mol2_lig, smarts) + + def select_Boresch_atoms(traj, mol2_lig, ligand_atoms = None, protein_atoms = None, substructure = None, ligand='LIG'): """Select possible protein atoms for Boresch-style restraints. Parameters @@ -821,8 +892,9 @@ def analytical_Boresch_correction(r0, thA, thB, fc_r, fc_thA, fc_thB, fc_phiA, f dG = dG / 4.184 return dG -def ligand_sdf(pdb, outfile): - ''' Function to get ligand sdf from complex pdb. Only use if no sdf of ligands available. ''' + +def ligand_sdf_oechem(pdb, outfile): + from openeye import oechem complex_A = oechem.OEGraphMol() ifs = oechem.oemolistream() @@ -844,3 +916,44 @@ def ligand_sdf(pdb, outfile): oechem.OEWriteMolecule(ofs, lig) return + + +def ligand_sdf_rdkit(pdb, outfile): + from rdkit import Chem + + # these resnames get skipped, i.e. aren't considered the ligand + water = {'SOL', 'HOH', 'WAT'} + AAs = {'GLY', 'ALA', 'VAL', 'LEU', 'ILE', + 'MET', 'PHE', 'TYR', 'TRP', 'ARG', + 'CYS', 'ASN', 'GLN', 'THR', 'SER', + 'PRO', 'HIS', 'LYS', 'ASP', 'GLU', + 'CYX', 'HID', 'HIP', 'HIE'} + + m = Chem.MolFromPDBFile(pdb, removeHs=False) + + lig = None + mols = Chem.GetMolFrags(m, asMols=True, sanitizeFrags=False) + + for mol in mols: + resname = mol.GetAtomWithIdx(0).GetMonomerInfo().GetResidueName().upper() + if resname in water: + continue + if resname in AAs: + continue + + if lig is None: + lig = mol + else: + lig = Chem.CombineMols(lig, mol) + + Chem.MolToMolFile(lig, outfile) + + +def ligand_sdf(pdb, outfile, use_oechem=True): + """Get ligand sdf from complex pdb. + + Only use if no sdf of ligands available.""" + if use_oechem: + return ligand_sdf_oechem(pdb, outfile) + else: + return ligand_sdf_rdkit(pdb, outfile) diff --git a/combine_coordinates.py b/combine_coordinates.py index e9da4fa..5c08034 100644 --- a/combine_coordinates.py +++ b/combine_coordinates.py @@ -1,19 +1,11 @@ -from openeye import oechem, oespruce import parmed as pmd import shutil import mdtraj as md -def align_complexes(pdb_A, pdb_B, out_B): - """Align 2 structures with oespruce, Structure A is the reference - Parameters - ---------- - pdb_A : str - pdb structure Complex A - pdb_B : str - pdb structure Complex B - out_B : str - pdb structure, Aligned structure of complex B - """ + +def align_complexes_oechem(pdb_A, pdb_B, out_B): + from openeye import oechem, oespruce + complex_A = oechem.OEGraphMol() ifs = oechem.oemolistream() ifs.SetFlavor(oechem.OEFormat_PDB, @@ -45,6 +37,39 @@ def align_complexes(pdb_A, pdb_B, out_B): ofs.close() return + +def align_complexes_mdtraj(pdb_A, pdb_B, out_B): + ref_traj = md.Trajectory(pdb_A) + traj = md.Trajectory(pdb_B) + + traj.superpose(ref_traj, atom_indices=ref_traj.topology.select('backbone')) + + with md.formats.PDBTrajectoryFile(out_B, mode='w') as w: + w.write(traj.xyz[0], traj.topology, + unitcell_lengths=traj.unitcell_lengths[0], + unitcell_angles=traj.unitcell_angles[0]) + + +def align_complexes(pdb_A, pdb_B, out_B, use_oechem=True): + """Align 2 structures with oespruce, Structure A is the reference + Parameters + ---------- + pdb_A : str + pdb structure Complex A + pdb_B : str + pdb structure Complex B + out_B : str + pdb structure, Aligned structure of complex B + use_oechem : bool + if True, will use oechem spruce (requiring license) otherwise will use + mdtraj + """ + if use_oechem: + return align_complexes_oechem(pdb_A, pdb_B, out_B) + else: + return align_complexes_mdtraj(pdb_A, pdb_B, out_B) + + def combine_ligands_gro(in_file_A, in_file_B, out_file, ligand_A='MOL', ligand_B='MOL'): """Add ligand B coordinates to coordinate (.gro) file of ligand A in complex with protein Parameters