Merge pull request #92 from NDoering99/main

Added PyMOL support for visualization.
wolberlab · Jul 25, 2024 · 3f6da55 · 3f6da55
2 parents c1a2c6d + 154f179
commit 3f6da55
Show file tree

Hide file tree

Showing 12 changed files with 385 additions and 74 deletions.
diff --git a/README.md b/README.md
@@ -109,9 +109,10 @@ Start the analysis with the following Inputs:
 
 -d = trajectory file of the simulation (in .dcd format)
 
-#### Optional:
 -n = Ligand name (3 letter code in PDB)
 
+#### Optional:
+
 -l = Ligand in SDF format
 
 -b = binding mode threshold. Is used to remove interactions under the defined procentual occurence from the binding mode generation. The default is 40% (accepted values: 0-100)
@@ -138,6 +139,8 @@ Start the analysis with the following Inputs:
 
 --watereps = the EPS of the clustering part during the water analysis. will only result in something if "-w True" is added. Accepts float (in Angstrom).
 
+--figure = File type for the figures, default is png. Can be changed to all file types supported by matplotlib.
+
 #### Command line example with default values
 
     openmmdl_analysis -t {path/to/topology} -d {path/to/trajectory} -n {Ligand_name}
@@ -146,13 +149,25 @@ Start the analysis with the following Inputs:
 #### Visualization
 Most of the analysis outputs are JEPG images and do not need any further preparation to be viewed.
 
-For the visualization of your trajectory with interaction pointclouds you can use the jupyter notebook prepared in the **OpenMMDL** repository.
+For the visualization of your complex with interaction pointclouds you can use NGLView with the jupyter notebook prepared in the **OpenMMDL** repository or visualize the pointclouds in PyMOL.
 
-Or use this command:
+### Usage
 ```
 openmmdl_visualization
 ```
-Then edit the notebook to include the output of your analysis.
+#### Optional:
+--type = Software you wish to visualize openmmdl interactions with. Options: nglview, pymol. Default: nglview
+#### NGLView
+After running the start comand a jupyter notebook will automatically open.
+Edit the notebook to include the output files of your analysis.
+Then run all cells.
+#### PyMOL
+After running the start comand a python skript will apear in your directory.
+Open up PyMOL then run these two comands in the PyMOL console:
+```
+run visualization_pymol.py
+openmdl_visualization PATH_TO_interacting_waters.pdb, modulePATH_TO_clouds.json
+```
 ## Copyright
 Copyright (c) 2022, Valerij Talagayev, Yu Chen,  Niklas Piet Doering & Leon Obendorf (Wolber lab)
 

diff --git a/docs/openmmdl_analysis.rst b/docs/openmmdl_analysis.rst
@@ -22,13 +22,13 @@ Mandatory:
 
     -t = topology file of the simulation (in .pdb format)
     -d = trajectory file of the simulation (in .dcd format)
+    -n = Ligand name (3 letter code in PDB)
 
 
 Optional:
 
 .. code-block:: text
 
-    -n = Ligand name (3 letter code in PDB)
     -l = Ligand in SDF format
     -b = binding mode threshold. Is used to remove interactions under the defined procentual occurence from the binding mode generation. The default is 40% (accepted values: 0-100)
     -df = Dataframe (use if the interactions were already calculated, default name would be "interactions_gathered.csv")
@@ -39,9 +39,10 @@ Optional:
     -nuc = Treat nucleic acids as receptor
     -pep = Calculate interactions with peptides. Give the peptide chain name as input. Defaults to None
     -ref = Add a reference PDB to renumber the residue numbers. Defaults to None (accepted values: str of PDB)
-    -r = Calculate the RMSD difference between frames. The default is False (accepted values: True/False)
+    -r = Calculate the RMSD difference between frames. The default is False (accepted values: True/False) (if False no representative frame for the binding modes will be generated)
     -w = stable-water-analysis. Defines if the analysis of stable water molecules should be performed. The default is False (accepted values: True/False)
     --watereps = the EPS of the clustering part during the water analysis. will only result in something if "-w True" is added. Accepts float (in Angstrom). 
+    --figure = File type for the figures, default is png. Can be changed to all file types supported by matplotlib.
 
 Application
 ------------------------------

diff --git a/openmmdl/_version.py b/openmmdl/_version.py
@@ -1 +1 @@
-__version__ = "1.0.0+724.g9044002.dirty"
+__version__ = "1.0.0+751.g26e6c47.dirty"
diff --git a/openmmdl/openmmdl_analysis/barcode_generation.py b/openmmdl/openmmdl_analysis/barcode_generation.py
@@ -105,7 +105,9 @@ def plot_barcodes(barcodes, save_path):
     plt.savefig(f"./Barcodes/{save_path}", dpi=300, bbox_inches="tight")
 
 
-def plot_waterbridge_piechart(df_all, waterbridge_barcodes, waterbridge_interactions, fig_type):
+def plot_waterbridge_piechart(
+    df_all, waterbridge_barcodes, waterbridge_interactions, fig_type
+):
     """Generates piecharts for each waterbridge interaction with the water ids of the interacting waters.
 
     Args:

diff --git a/openmmdl/openmmdl_analysis/binding_mode_processing.py b/openmmdl/openmmdl_analysis/binding_mode_processing.py
@@ -6,7 +6,7 @@
 from MDAnalysis.analysis.distances import dist
 from tqdm import tqdm
 from pathlib import Path
-from numba import jit  
+from numba import jit
 
 
 def gather_interactions(df, ligand_rings, peptide=None):
@@ -640,6 +640,7 @@ def update_values(df, new, unique_data):
         values_to_update = new.loc[frame_value, list(unique_data.values())]
         df.loc[idx, list(unique_data.values())] = values_to_update
 
+
 @jit
 def calc_rmsd_2frames(ref, frame):
     """

diff --git a/openmmdl/openmmdl_analysis/openmmdlanalysis.py b/openmmdl/openmmdl_analysis/openmmdlanalysis.py
@@ -3,7 +3,6 @@
 Perform Simulations of Protein-ligand complexes with OpenMM
 """
 
-import argparse
 import sys
 import warnings
 
@@ -205,7 +204,7 @@ def main():
         help="Set the Eps for clustering, this defines how big clusters can be spatially in Angstrom",
         default=1.0,
     )
-    
+
     parser.add_argument(
         "--figure",
         dest="figure_type",
@@ -390,7 +389,9 @@ def main():
             selection2=["protein", f"chainID {peptide}"],
         )
         if frame_rmsd != "No":
-            RMSD_dist_frames(f"{topology}", f"{trajectory}", fig_type, lig=f"chainID {peptide}")
+            RMSD_dist_frames(
+                f"{topology}", f"{trajectory}", fig_type, lig=f"chainID {peptide}"
+            )
             print("\033[1mRMSD calculated\033[0m")
     else:
         rmsd_for_atomgroups(
@@ -530,9 +531,9 @@ def main():
 
         # Check if the fingerprint has been encountered before
         if fingerprint in treshold_fingerprint_dict:
-            grouped_frames_treshold.at[
-                index, "Binding_fingerprint_treshold"
-            ] = treshold_fingerprint_dict[fingerprint]
+            grouped_frames_treshold.at[index, "Binding_fingerprint_treshold"] = (
+                treshold_fingerprint_dict[fingerprint]
+            )
         else:
             # Assign a new label if the fingerprint is new
             label = f"Binding_Mode_{label_counter}"
@@ -716,7 +717,11 @@ def main():
                 merged_image_paths, "all_binding_modes_arranged.png"
             )
             generate_ligand_image(
-                ligand, "complex.pdb", "lig_no_h.pdb", "lig.smi", f"ligand_numbering.svg"
+                ligand,
+                "complex.pdb",
+                "lig_no_h.pdb",
+                "lig.smi",
+                f"ligand_numbering.svg",
             )
             if fig_type == "png":
                 cairosvg.svg2png(
@@ -872,7 +877,9 @@ def main():
     for interaction_type, interaction_data in interaction_types.items():
         plot_barcodes_grouped(interaction_data, df_all, interaction_type, fig_type)
 
-    plot_waterbridge_piechart(df_all, waterbridge_barcodes, waterbridge_interactions, fig_type)
+    plot_waterbridge_piechart(
+        df_all, waterbridge_barcodes, waterbridge_interactions, fig_type
+    )
     print("\033[1mBarcodes generated\033[0m")
 
     interacting_water_id_list = interacting_water_ids(df_all, waterbridge_interactions)

diff --git a/openmmdl/openmmdl_analysis/rmsd_calculation.py b/openmmdl/openmmdl_analysis/rmsd_calculation.py
@@ -49,7 +49,9 @@ def rmsd_for_atomgroups(
     return rmsd_df
 
 
-def RMSD_dist_frames(prot_lig_top_file, prot_lig_traj_file, fig_type, lig, nucleic=False):
+def RMSD_dist_frames(
+    prot_lig_top_file, prot_lig_traj_file, fig_type, lig, nucleic=False
+):
     """Calculate the RMSD between all frames in a matrix.
 
     Args:

diff --git a/openmmdl/openmmdl_analysis/visualization_functions.py b/openmmdl/openmmdl_analysis/visualization_functions.py
@@ -7,6 +7,7 @@
 import subprocess
 import os
 import shutil
+import argparse
 
 from openmmdl.openmmdl_analysis.barcode_generation import waterids_barcode_generator
 
@@ -255,8 +256,47 @@ def visualization(
 
 def run_visualization():
     """Runs the visualization notebook in the current directory. The visualization notebook is copied from the package directory to the current directory and automaticaly started."""
-    package_dir = os.path.dirname(__file__)
-    notebook_path = os.path.join(package_dir, "visualization.ipynb")
-    current_dir = os.getcwd()
-    shutil.copyfile(notebook_path, f"{current_dir}/visualization.ipynb")
-    subprocess.run(["jupyter", "notebook", "visualization.ipynb"])
+
+    logo = "\n".join(
+        [
+            "     ,-----.    .-------.     .-''-.  ,---.   .--.,---.    ,---.,---.    ,---. ______       .---.      ",
+            "   .'  .-,  '.  \  _(`)_ \  .'_ _   \ |    \  |  ||    \  /    ||    \  /    ||    _ `''.   | ,_|      ",
+            "  / ,-.|  \ _ \ | (_ o._)| / ( ` )   '|  ,  \ |  ||  ,  \/  ,  ||  ,  \/  ,  || _ | ) _  \,-./  )      ",
+            " ;  \  '_ /  | :|  (_,_) /. (_ o _)  ||  |\_ \|  ||  |\_   /|  ||  |\_   /|  ||( ''_'  ) |\  '_ '`)    ",
+            " |  _`,/ \ _/  ||   '-.-' |  (_,_)___||  _( )_\  ||  _( )_/ |  ||  _( )_/ |  || . (_) `. | > (_)  )    ",
+            " : (  '\_/ \   ;|   |     '  \   .---.| (_ o _)  || (_ o _) |  || (_ o _) |  ||(_    ._) '(  .  .-'    ",
+            "  \ `_/  \  ) / |   |      \  `-'    /|  (_,_)\  ||  (_,_)  |  ||  (_,_)  |  ||  (_.\.' /  `-'`-'|___  ",
+            "   '. \_/``'.'  /   )       \       / |  |    |  ||  |      |  ||  |      |  ||       .'    |        \ ",
+            "     '-----'    `---'        `'-..-'  '--'    '--''--'      '--''--'      '--''-----'`      `--------` ",
+            "              Prepare and Perform OpenMM Protein-Ligand MD Simulations                                 ",
+            "                                     Alpha Version                                                     ",
+        ]
+    )
+    parser = argparse.ArgumentParser(
+        prog="openmmdl_analysis",
+        description=logo,
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    parser.add_argument(
+        "--type",
+        dest="type",
+        help="Software you wish to visualize openmmdl interactions with. Options: nglview, pymol. Default: nglview",
+        default="nglview",
+    )
+    if parser.parse_args().type == "nglview":
+        package_dir = os.path.dirname(__file__)
+        notebook_path = os.path.join(package_dir, "visualization.ipynb")
+        current_dir = os.getcwd()
+        shutil.copyfile(notebook_path, f"{current_dir}/visualization.ipynb")
+        subprocess.run(["jupyter", "notebook", "visualization.ipynb"])
+    if parser.parse_args().type == "pymol":
+        package_dir = os.path.dirname(__file__)
+        pymol_script_path = os.path.join(package_dir, "visualization_pymol.py")
+        current_dir = os.getcwd()
+        shutil.copyfile(pymol_script_path, f"{current_dir}/visualization_pymol.py")
+        print(
+            """\033[1mWARNING!!! 
+To run the visualization script in pymol, please run the following commands in pymol:\033[0m
+    \x1B[3mrun visualization_pymol.py\x1B[0m
+    \x1B[3mopenmdl_visualization PATH_TO_interacting_waters.pdb, modulePATH_TO_clouds.json\x1B[0m"""
+        )
diff --git a/openmmdl/openmmdl_analysis/visualization_pymol.py b/openmmdl/openmmdl_analysis/visualization_pymol.py
@@ -0,0 +1,48 @@
+import json
+import time
+from pymol import cmd, cgo
+
+
+def visualize_pdb(pdb_path):
+    # Load the PDB file
+    cmd.load(pdb_path, "molecule")
+
+    # Select and show only proteins and ligands
+    cmd.show("cartoon", "polymer")
+    cmd.show("sticks", "organic")
+    cmd.show("spheres", "solvent")
+    cmd.hide("sticks", "hydrogen")
+
+
+def visualize_pointcloud(json_path, buffer_size=1000):
+    with open(json_path, "r") as json_file:
+        clusters = json.load(json_file)
+
+    for type, prop in clusters.items():
+        r = prop["radius"]
+        color = [prop["color"][0], prop["color"][1], prop["color"][2]]
+        points_buffer = []
+
+        for i, point in enumerate(prop["coordinates"]):
+            points_buffer.extend([cgo.COLOR, *color, cgo.SPHERE, *point, r])
+
+            # Load in chunks of buffer_size
+            if (i + 1) % buffer_size == 0:
+                cmd.load_cgo(points_buffer, type)
+                points_buffer = []  # Reset the buffer
+                cmd.refresh()
+                time.sleep(0.1)  # Small delay to ensure proper loading
+
+        # Load any remaining points in the buffer
+        if points_buffer:
+            cmd.load_cgo(points_buffer, type)
+            cmd.refresh()
+
+
+# The function to be called by PyMOL with provided arguments
+def main(pdb_path, json_path):
+    visualize_pdb(pdb_path)
+    visualize_pointcloud(json_path)
+
+
+cmd.extend("openmmdl_visualization", main)
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		__version__ = "1.0.0+724.g9044002.dirty"
		__version__ = "1.0.0+751.g26e6c47.dirty"