From f8525a9b4b3e72bd1c8e9271bca3e01047b6ba60 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Sat, 14 Dec 2024 17:44:49 +0100 Subject: [PATCH] Delete openmmdl/tests/openmmdl_analysis directory --- .../barcode_generation_test.py | 325 --- .../binding_mode_processing_test.py | 1829 ----------------- .../openmmdlanalysis_test.py | 124 -- .../openmmdl_analysis/pml_writer_test.py | 248 --- .../test_find_stable_waters.py | 261 --- .../test_interaction_gathering.py | 360 ---- .../openmmdl_analysis/test_preprocessing.py | 229 --- .../test_rdkit_figure_generation.py | 288 --- .../test_rmsd_calculation.py | 71 - .../visualization_functions_test.py | 239 --- 10 files changed, 3974 deletions(-) delete mode 100644 openmmdl/tests/openmmdl_analysis/barcode_generation_test.py delete mode 100644 openmmdl/tests/openmmdl_analysis/binding_mode_processing_test.py delete mode 100644 openmmdl/tests/openmmdl_analysis/openmmdlanalysis_test.py delete mode 100644 openmmdl/tests/openmmdl_analysis/pml_writer_test.py delete mode 100644 openmmdl/tests/openmmdl_analysis/test_find_stable_waters.py delete mode 100644 openmmdl/tests/openmmdl_analysis/test_interaction_gathering.py delete mode 100644 openmmdl/tests/openmmdl_analysis/test_preprocessing.py delete mode 100644 openmmdl/tests/openmmdl_analysis/test_rdkit_figure_generation.py delete mode 100644 openmmdl/tests/openmmdl_analysis/test_rmsd_calculation.py delete mode 100644 openmmdl/tests/openmmdl_analysis/visualization_functions_test.py diff --git a/openmmdl/tests/openmmdl_analysis/barcode_generation_test.py b/openmmdl/tests/openmmdl_analysis/barcode_generation_test.py deleted file mode 100644 index d8eee34a..00000000 --- a/openmmdl/tests/openmmdl_analysis/barcode_generation_test.py +++ /dev/null @@ -1,325 +0,0 @@ -import numpy as np -import pandas as pd -import re -import os -import matplotlib.pyplot as plt -import pytest -from openmmdl.openmmdl_analysis.barcode_generation import * - - -# Barcode generation tests -@pytest.fixture -def sample_dataframe_barcode_generation(): - data = { - "FRAME": [1, 1, 2, 2, 3], - "Interaction1": [1, 0, 1, 0, 0], - "Interaction2": [0, 0, 0, 1, 1], - "WATER_IDX": [101, 102, 103, 104, 105], - } - return pd.DataFrame(data) - - -def test_barcodegeneration(sample_dataframe_barcode_generation): - interaction = "Interaction1" - barcode = barcodegeneration(sample_dataframe_barcode_generation, interaction) - - assert isinstance(barcode, np.ndarray) - - expected_barcode = np.array([1, 1, 0]) - assert np.array_equal(barcode, expected_barcode) - - -def test_waterids_barcode_generator(sample_dataframe_barcode_generation): - interaction = "Interaction2" - waterid_barcode = waterids_barcode_generator( - sample_dataframe_barcode_generation, interaction - ) - - # Test if the output is a list - assert isinstance(waterid_barcode, list) - - # Test the expected waterid barcode for the sample dataframe and interaction - expected_waterid_barcode = [0, 104, 105] - assert waterid_barcode == expected_waterid_barcode - - -def test_plot_barcodes(): - # create barcode data - working_directory = os.getcwd() - # Print the current files in the working directory for debugging - files_in_working_directory = os.listdir(working_directory) - print("Files in Working Directory before:", files_in_working_directory) - - # Test case 1: No barcode - plot_barcodes({}, "no_barcodes.png") - assert not os.path.isfile("no_barcodes.png") - - # Test case 2: Single barcode - barcode_data = { - "166ARGA_4220,4221_Carboxylate_NI_saltbridge": np.array( - [ - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - ] - ) - } - plot_barcodes(barcode_data, "single_barcode.png") - single_barcode = "single_barcode.png" - assert single_barcode is not None - - barcodes = { - "Barcode 1": np.array( - [ - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - ] - ), - "Barcode 2": np.array( - [ - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - ] - ), - # Include more barcodes as needed - } - plot_barcodes(barcodes, "multiple_barcodes.png") - - files_in_working_directory = os.listdir(working_directory) - print("Files in Working Directory after:", files_in_working_directory) - save_path = "multiple_barcodes.png" - - assert save_path is not None - - -def test_plot_waterbridge_piechart(tmp_path): - # Prepare inputs - df_all = pd.DataFrame( - { - "interaction1": [1, 0, 1, 0], - "interaction2": [0, 1, 0, 1], - "WATER_IDX": [1, 2, 1, 2], # changed 'waterid' to 'WATER_IDX' - "FRAME": [0, 1, 2, 3], # added 'FRAME' column - } - ) - waterbridge_barcodes = [np.array([1, 0, 1, 0]), np.array([0, 1, 0, 1])] - waterbridge_interactions = ["interaction1", "interaction2"] - fig_type = "png" - - # Change the current working directory to tmp_path - - # Use os.makedirs - os.makedirs(f"{tmp_path}/Barcodes/Waterbridge_Piecharts/", exist_ok=True) - - # Call the function - plot_waterbridge_piechart( - df_all, waterbridge_barcodes, waterbridge_interactions, fig_type - ) - - # Check if the output files are created - for interaction in waterbridge_interactions: - outname_png = f"./Barcodes/Waterbridge_Piecharts/{interaction}.png" - assert os.path.isfile(outname_png), f"File {outname_png} not found." - - # Additional assertions for content or specific properties of the generated files - img = plt.imread(outname_png) - assert img is not None, f"Unable to read image file {outname_png}." - - # Retrieve the percentage directly from the Axes object - percentage_text = plt.gca().texts[0].get_text() - assert percentage_text is not None, "Percentage text is None." - - # Retrieve the title directly from the Axes object - title_text = plt.gca().get_title() - assert title_text is not None, "Title text is None." - - # You can add more assertions based on your specific requirements - # For example, check if the file size is greater than zero, etc. - assert os.path.getsize(outname_png) > 0, f"File {outname_png} is empty." - - -def test_plot_bacodes_grouped(tmp_path): - # Create a mock dataframe with all necessary columns - df_all = pd.DataFrame( - { - "column1": [1, 2, 3], - "column2": ["a", "b", "c"], - "FRAME": [0, 1, 2], - "atom1_atom2_interaction": [1, 0, 1], - "atom3_atom4_interaction": [0, 1, 1], - } - ) - - # Define interactions and interaction_type - interactions = ["atom1_atom2_interaction", "atom3_atom4_interaction"] - interaction_type = "interaction" - fig_type = "png" - - working_directory = os.getcwd() - plot_barcodes_grouped(interactions, df_all, interaction_type, fig_type) - # Check if the output files were created - assert os.path.exists( - os.path.join( - working_directory, - "Barcodes", - "atom2", - f"atom2_{interaction_type}_barcodes.png", - ) - ) - assert os.path.exists( - os.path.join( - working_directory, - "Barcodes", - "atom4", - f"atom4_{interaction_type}_barcodes.png", - ) - ) - assert os.path.exists( - os.path.join( - working_directory, "Barcodes", f"{interaction_type}_interactions.png" - ) - ) diff --git a/openmmdl/tests/openmmdl_analysis/binding_mode_processing_test.py b/openmmdl/tests/openmmdl_analysis/binding_mode_processing_test.py deleted file mode 100644 index db3797e9..00000000 --- a/openmmdl/tests/openmmdl_analysis/binding_mode_processing_test.py +++ /dev/null @@ -1,1829 +0,0 @@ -import numpy as np -import pandas as pd -import rdkit -import MDAnalysis as mda -import re -import os -import matplotlib.pyplot as plt -import pytest - -from openmmdl.openmmdl_analysis.binding_mode_processing import * - - -# binding_mode_processing tests -@pytest.fixture -def sample_dataframe_bindingmode_processing(): - data = { - "FRAME": {0: 1, 1: 2, 2: 3, 3: 2}, - "Prot_partner": {0: "A", 1: "B", 2: "C", 3: "A"}, - "INTERACTION": { - 0: "hydrophobic", - 1: "hbond", - 2: "saltbridge", - 3: "hydrophobic", - }, - "LIGCARBONIDX": {0: 101, 1: 102, 2: 103, 3: 102}, - "DONORIDX": {0: 201, 1: 202, 2: 203, 3: 202}, - "ACCEPTORIDX": {0: 301, 1: 302, 2: 303, 3: 302}, - "PROTISDON": {0: True, 1: False, 2: True, 3: False}, - "LIG_IDX_LIST": {0: [1, 2], 1: [3, 4], 2: [5, 6], 3: [3, 4]}, - "LIG_GROUP": {0: "Group1", 1: "Group2", 2: "Group3", 3: "Group1"}, - "PROTISPOS": {0: True, 1: False, 2: True, 3: True}, - "DON_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "DONORTYPE": {0: 0, 1: 0, 2: 0, 3: 0}, - "ACCEPTOR_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "DONOR_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "LOCATION": {0: 0, 1: 0, 2: 0, 3: 0}, - "METAL_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "METAL_TYPE": {0: 0, 1: 0, 2: 0, 3: 0}, - "RESTYPE_LIG": {0: 0, 1: 0, 2: 0, 3: 0}, - "TARGET_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "COORDINATION": {0: 0, 1: 0, 2: 0, 3: 0}, - } - - # Add 'halogen' and 'hbond' data to the existing DataFrame - data["FRAME"][4] = 4 # Add a new 'FRAME' value - data["Prot_partner"][4] = "A" # Add a new 'Prot_partner' value - data["INTERACTION"][4] = "halogen" # Add 'halogen' interaction - data["DON_IDX"][4] = 501 # DON_IDX for 'halogen' - data["DONORTYPE"][4] = "F" # Halogen type - data["ACCEPTOR_IDX"][4] = 0 - data["DONOR_IDX"][4] = 0 - data["LIG_IDX_LIST"][4] = 0 - data["LIG_GROUP"][4] = 0 # LIG_GROUP for 'pication - data["RESTYPE_LIG"][4] = 0 - data["TARGET_IDX"][4] = 0 - - data["FRAME"][5] = 5 # Add a new 'FRAME' value - data["Prot_partner"][5] = "A" # Add a new 'Prot_partner' value - data["INTERACTION"][5] = "hbond" # Add 'hbond' interaction - data["ACCEPTORIDX"][5] = 301 # ACCEPTORIDX for 'hbond' - data["DON_IDX"][5] = 0 # DON_IDX - data["DONORTYPE"][5] = 0 # DON_IDX - data["PROTISDON"][5] = True # PROTISDON is True for 'hbond' - data["ACCEPTOR_IDX"][5] = 0 - data["LIG_IDX_LIST"][5] = 0 - data["DONOR_IDX"][5] = 0 - data["LIG_GROUP"][5] = 0 # LIG_GROUP for 'pication - data["RESTYPE_LIG"][5] = 0 - data["TARGET_IDX"][5] = 0 - - # Add 'waterbridge' cases where PROTISDON is both True and False - data["FRAME"][6] = 6 # Add a new 'FRAME' value - data["Prot_partner"][6] = "A" # Add a new 'Prot_partner' value - data["INTERACTION"][6] = "waterbridge" # Add 'waterbridge' interaction - data["ACCEPTOR_IDX"][6] = 401 # ACCEPTOR_IDX for 'waterbridge' - data["DON_IDX"][6] = 0 # DON_IDX - data["DONORTYPE"][6] = 0 # DON_IDX - data["DONOR_IDX"][6] = 0 - data["LIG_IDX_LIST"][6] = 0 - data["PROTISDON"][6] = True # PROTISDON is True for 'waterbridge' - data["LIG_GROUP"][6] = 0 # LIG_GROUP for 'pication - data["RESTYPE_LIG"][6] = 0 - data["TARGET_IDX"][6] = 0 - - data["FRAME"][7] = 7 # Add a new 'FRAME' value - data["Prot_partner"][7] = "B" # Add a new 'Prot_partner' value - data["INTERACTION"][7] = "waterbridge" # Add 'waterbridge' interaction - data["DONOR_IDX"][7] = 501 # DONOR_IDX for 'waterbridge' - data["DON_IDX"][7] = 0 # DON_IDX - data["DONORTYPE"][7] = 0 # DON_IDX - data["PROTISDON"][7] = False # PROTISDON is False for 'waterbridge' - data["ACCEPTOR_IDX"][7] = 0 - data["LIG_IDX_LIST"][7] = 0 # LIG_IDX_LIST for 'pication' - data["LIG_GROUP"][7] = 0 # LIG_GROUP for 'pication - data["RESTYPE_LIG"][7] = 0 - data["TARGET_IDX"][7] = 0 - - # Add 'pistacking' case - data["FRAME"][8] = 8 # Add a new 'FRAME' value - data["Prot_partner"][8] = "A" # Add a new 'Prot_partner' value - data["INTERACTION"][8] = "pistacking" # Add 'pistacking' interaction - data["LIG_IDX_LIST"][8] = [7, 8] # LIG_IDX_LIST for 'pistacking' - data["LIG_GROUP"][8] = 0 # LIG_GROUP for 'pication - data["ACCEPTOR_IDX"][8] = 0 - data["DON_IDX"][8] = 0 # DON_IDX - data["DONOR_IDX"][8] = 0 - data["PROTISDON"][8] = False - data["DONORTYPE"][8] = 0 # DON_IDX - data["RESTYPE_LIG"][8] = 0 - data["TARGET_IDX"][8] = 0 - - # Add 'pication' case - data["FRAME"][9] = 9 # Add a new 'FRAME' value - data["Prot_partner"][9] = "A" # Add a new 'Prot_partner' value - data["INTERACTION"][9] = "pication" # Add 'pication' interaction - data["LIG_IDX_LIST"][9] = [9, 10] # LIG_IDX_LIST for 'pication' - data["LIG_GROUP"][9] = "Group4" # LIG_GROUP for 'pication' - data["ACCEPTOR_IDX"][9] = 0 - data["DON_IDX"][9] = 0 # DON_IDX - data["PROTISDON"][9] = False - data["DONOR_IDX"][9] = 0 - data["DONORTYPE"][9] = 0 # DON_IDX - data["RESTYPE_LIG"][9] = 0 - data["TARGET_IDX"][9] = 0 - - # Add 'metal' interaction case - data["FRAME"][10] = 10 # Add a new 'FRAME' value - data["Prot_partner"][10] = "A" # Add a new 'Prot_partner' value - data["INTERACTION"][10] = "metal" # Add 'metal' interaction - data["METAL_IDX"][10] = 401 # METAL_IDX for 'metal' - data["METAL_TYPE"][10] = "Fe" # Metal type - data["LOCATION"][10] = "site1" # Location - data["ACCEPTOR_IDX"][10] = 0 - data["DONOR_IDX"][10] = 0 - data["RESTYPE_LIG"][10] = "A" - data["TARGET_IDX"][10] = 401 - data["COORDINATION"][10] = "site1" - - data["FRAME"][11] = 11 # Add a new 'FRAME' value - data["Prot_partner"][11] = "A" # Add a new 'Prot_partner' value - data["INTERACTION"][11] = "saltbridge" # Add 'saltbridge' interaction - data["LIG_IDX_LIST"][11] = [7, 8] # Ligand index list for 'saltbridge PI' - data["LIG_GROUP"][11] = "Group4" # Ligand group for 'saltbridge PI' - data["PROTISPOS"][11] = False # PROTISPOS is False for 'saltbridge PI' - data["RESTYPE_LIG"][11] = 0 - data["TARGET_IDX"][11] = 0 - - # Add 'hydrophobic' case where 'ring_found' is False - data["FRAME"][12] = 12 # Add a new 'FRAME' value - data["Prot_partner"][12] = "C" # Add a new 'Prot_partner' value - data["INTERACTION"][12] = "hydrophobic" # Add 'hydrophobic' interaction - data["LIGCARBONIDX"][12] = 104 # LIGCARBONIDX for 'hydrophobic' (not in any ring) - data["RESTYPE_LIG"][12] = 0 - data["TARGET_IDX"][12] = 0 - - return pd.DataFrame(data) - - -@pytest.fixture -def sample_dataframe_bindingmode_processing_with_peptides(): - data = { - "FRAME": {0: 1, 1: 2, 2: 3, 3: 2}, - "Prot_partner": {0: "62VAL", 1: "SER144", 2: "GLU321", 3: "ILE432"}, - "INTERACTION": { - 0: "hydrophobic", - 1: "hbond", - 2: "saltbridge", - 3: "hydrophobic", - }, - "LIGCARBONIDX": {0: 101, 1: 102, 2: 103, 3: 102}, - "DONORIDX": {0: 201, 1: 202, 2: 203, 3: 202}, - "ACCEPTORIDX": {0: 301, 1: 302, 2: 303, 3: 302}, - "PROTISDON": {0: True, 1: False, 2: True, 3: False}, - "LIG_IDX_LIST": {0: [1, 2], 1: [3, 4], 2: [5, 6], 3: [3, 4]}, - "LIG_GROUP": {0: "Group1", 1: "Group2", 2: "Group3", 3: "Group1"}, - "PROTISPOS": {0: True, 1: False, 2: True, 3: True}, - "DON_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "DONORTYPE": {0: 0, 1: 0, 2: 0, 3: 0}, - "ACCEPTOR_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "DONOR_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "LOCATION": {0: 0, 1: 0, 2: 0, 3: 0}, - "METAL_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "METAL_TYPE": {0: 0, 1: 0, 2: 0, 3: 0}, - "RESTYPE_LIG": {0: "ILE", 1: "TYR", 2: "ARG", 3: "VAL"}, - "TARGET_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "COORDINATION": {0: 0, 1: 0, 2: 0, 3: 0}, - "RESNR_LIG": {0: "101", 1: "202", 2: "155", 3: "102"}, - } - - # Additional data for peptide interactions - data["FRAME"][4] = 4 - data["Prot_partner"][4] = "LEU248" - data["INTERACTION"][4] = "halogen" - data["DON_IDX"][4] = 501 - data["DONORTYPE"][4] = "F" - data["ACCEPTOR_IDX"][4] = 0 - data["DONOR_IDX"][4] = 0 - data["LIG_IDX_LIST"][4] = 0 - data["LIG_GROUP"][4] = 0 - data["RESTYPE_LIG"][4] = "ILE" - data["TARGET_IDX"][4] = 0 - data["RESNR_LIG"][4] = "501" - - data["FRAME"][5] = 5 - data["Prot_partner"][5] = "SER300" - data["INTERACTION"][5] = "hbond" - data["ACCEPTORIDX"][5] = 301 - data["DON_IDX"][5] = 0 - data["DONORTYPE"][5] = 0 - data["PROTISDON"][5] = True - data["ACCEPTOR_IDX"][5] = 0 - data["LIG_IDX_LIST"][5] = 0 - data["DONOR_IDX"][5] = 0 - data["LIG_GROUP"][5] = 0 - data["RESTYPE_LIG"][5] = "HIS" - data["TARGET_IDX"][5] = 0 - data["RESNR_LIG"][5] = "301" - - data["FRAME"][6] = 6 - data["Prot_partner"][6] = "TYR343" - data["INTERACTION"][6] = "waterbridge" - data["ACCEPTOR_IDX"][6] = 401 - data["DON_IDX"][6] = 0 - data["DONORTYPE"][6] = 0 - data["DONOR_IDX"][6] = 0 - data["LIG_IDX_LIST"][6] = 0 - data["PROTISDON"][6] = True - data["LIG_GROUP"][6] = 0 - data["RESTYPE_LIG"][6] = "SER" - data["TARGET_IDX"][6] = 0 - data["RESNR_LIG"][6] = "455" - - data["FRAME"][7] = 7 - data["Prot_partner"][7] = "ILE178" - data["INTERACTION"][7] = "waterbridge" - data["DONOR_IDX"][7] = 501 - data["DON_IDX"][7] = 0 - data["DONORTYPE"][7] = 0 - data["PROTISDON"][7] = False - data["ACCEPTOR_IDX"][7] = 0 - data["LIG_IDX_LIST"][7] = 0 - data["LIG_GROUP"][7] = 0 - data["RESTYPE_LIG"][7] = "TYR" - data["TARGET_IDX"][7] = 0 - data["RESNR_LIG"][7] = "467" - - data["FRAME"][8] = 8 - data["Prot_partner"][8] = "PHE344" - data["INTERACTION"][8] = "pistacking" - data["LIG_IDX_LIST"][8] = [7, 8] - data["LIG_GROUP"][8] = 0 - data["ACCEPTOR_IDX"][8] = 0 - data["DON_IDX"][8] = 0 - data["DONOR_IDX"][8] = 0 - data["PROTISDON"][8] = False - data["DONORTYPE"][8] = 0 - data["RESTYPE_LIG"][8] = "PHE" - data["TARGET_IDX"][8] = 0 - data["RESNR_LIG"][8] = "398" - - data["FRAME"][9] = 9 - data["Prot_partner"][9] = "PHE754" - data["INTERACTION"][9] = "pication" - data["LIG_IDX_LIST"][9] = [9, 10] - data["LIG_GROUP"][9] = "B" - data["ACCEPTOR_IDX"][9] = 0 - data["DON_IDX"][9] = 0 - data["PROTISDON"][9] = False - data["DONOR_IDX"][9] = 0 - data["DONORTYPE"][9] = 0 - data["RESTYPE_LIG"][9] = "ARG" - data["TARGET_IDX"][9] = 0 - data["RESNR_LIG"][9] = "245" - - data["FRAME"][10] = 10 - data["Prot_partner"][10] = "LYS567" - data["INTERACTION"][10] = "pication" - data["LIG_IDX_LIST"][10] = [9, 10] - data["LIG_GROUP"][10] = "B" - data["RESTYPE_LIG"][10] = "PHE" - data["TARGET_IDX"][10] = 501 - data["RESNR_LIG"][10] = "228" - - data["FRAME"][11] = 11 - data["Prot_partner"][11] = "LYS567" - data["INTERACTION"][11] = "saltbridge" - data["LIG_IDX_LIST"][11] = [7, 8] - data["LIG_GROUP"][11] = "Group4" - data["PROTISPOS"][11] = False - data["RESTYPE_LIG"][11] = "GLU" - data["TARGET_IDX"][11] = 0 - data["RESNR_LIG"][11] = "423" - - data["FRAME"][12] = 12 - data["Prot_partner"][12] = "HEM144" - data["INTERACTION"][12] = "metal" - data["METAL_IDX"][12] = 401 # METAL_IDX for 'metal' - data["METAL_TYPE"][12] = "Fe" # Metal type - data["LOCATION"][12] = "site1" # Location - data["ACCEPTOR_IDX"][12] = 0 - data["DON_IDX"][12] = 0 - data["RESTYPE_LIG"][ - 12 - ] = "HIS" # Assuming 'A' as the RESTYPE_LIG for the metal interaction - data["TARGET_IDX"][12] = 401 - data["COORDINATION"][12] = "site1" - data["RESNR_LIG"][12] = "256" - - return pd.DataFrame(data) - - -def test_gather_interactions(sample_dataframe_bindingmode_processing): - df = sample_dataframe_bindingmode_processing - ligand_rings = [[101], [102], [103]] # Define sample ligand rings for testing - - result = gather_interactions(df, ligand_rings) - - # Assert that the result is a dictionary - - assert isinstance(result, dict) - - # Check specific values in the generated dictionary for known interactions based on the updated fixture - expected_result = { - 1: {0: "A_101_hydrophobic"}, - 2: {1: "B_202_Donor_hbond", 3: "A_102_hydrophobic"}, - 3: {2: "C_[5, 6]_Group3_NI_saltbridge"}, - 4: {4: "A_501_F_halogen"}, - 5: {5: "A_301_Acceptor_hbond"}, - 6: {6: "A_401_Acceptor_waterbridge"}, - 7: {7: "B_501_Donor_waterbridge"}, - 8: {8: "A_[7, 8]_pistacking"}, - 9: {9: "A_[9_ 10]_Group4_pication"}, - 10: {10: "A_401_Fe_site1_metal"}, - 11: {11: "A_[7, 8]_Group4_PI_saltbridge"}, - 12: {12: "C_104_hydrophobic"}, - } - # Check if the actual result matches the expected result - assert result == expected_result - - -def test_gather_interactions_with_peptides( - sample_dataframe_bindingmode_processing_with_peptides, -): - df = sample_dataframe_bindingmode_processing_with_peptides - ligand_rings = [[101], [102], [103]] # Define sample ligand rings for testing - - result = gather_interactions(df, ligand_rings, peptide=True) - - # Assert that the result is a dictionary - assert isinstance(result, dict) - - # Check specific values in the generated dictionary for known interactions based on the updated fixture - expected_result = { - 1: {0: "62VAL_101ILE_hydrophobic"}, - 2: {1: "SER144_202TYR_Donor_hbond", 3: "ILE432_102VAL_hydrophobic"}, - 3: {2: "GLU321_155ARG_ARG_NI_saltbridge"}, - 4: {4: "LEU248_501ILE_F_halogen"}, - 5: {5: "SER300_301HIS_Acceptor_hbond"}, - 6: {6: "TYR343_455SER_Acceptor_waterbridge"}, - 7: {7: "ILE178_467TYR_Donor_waterbridge"}, - 8: {8: "PHE344_398PHE_pistacking"}, - 9: {9: "PHE754_245ARG_ARG_pication"}, - 10: {10: "LYS567_228PHE_PHE_pication"}, - 11: {11: "LYS567_423GLU_GLU_PI_saltbridge"}, - 12: {12: "HIS_256HIS_Fe_site1_metal"}, - } - - # Check if the actual result matches the expected result - assert result == expected_result - - -@pytest.fixture -def test_remove_duplicates_data(): - input_data = {"a": {"x": 1, "y": 2, "z": 1}, "b": {"p": 3, "q": 3, "r": 4}} - expected_output = {"a": {"x": 1, "y": 2}, "b": {"p": 3, "r": 4}} - return input_data, expected_output - - -def test_unique_data_generation(): - # Test case 1: Check if the function returns an empty dictionary for an empty list - result = unique_data_generation([]) - assert result == {} - - # Test case 2: Check if the function correctly identifies and stores unique values - input_list = [1, 2, 2, 3, 3, 4, 5, 5] - result = unique_data_generation(input_list) - expected_result = {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} - assert result == expected_result - - # Test case 3: Check if the function handles strings - input_list = ["apple", "banana", "apple", "cherry"] - result = unique_data_generation(input_list) - expected_result = {"apple": "apple", "banana": "banana", "cherry": "cherry"} - assert result == expected_result - - -# Define a test case that uses the fixture -def test_remove_duplicate_values(test_remove_duplicates_data): - input_data, expected_output = test_remove_duplicates_data - assert remove_duplicate_values(input_data) == expected_output - - -def test_combine_subdict_values(): - # Test case 1: Empty input dictionary - data = {} - result = combine_subdict_values(data) - assert result == {"all": []} - - # Test case 2: Input dictionary with sub-dictionaries - data = { - "dict1": {"a": 1, "b": 2}, - "dict2": {"c": 3, "d": 4}, - "dict3": {"e": 5, "f": 6}, - } - result = combine_subdict_values(data) - assert result == {"all": [1, 2, 3, 4, 5, 6]} - - # Test case 3: Input dictionary with empty sub-dictionaries - data = { - "dict1": {}, - "dict2": {}, - } - result = combine_subdict_values(data) - assert result == {"all": []} - - # Test case 4: Input dictionary with sub-dictionaries containing various data types - data = { - "dict1": {"a": 1, "b": "text", "c": [1, 2, 3]}, - "dict2": {"d": None, "e": 5.5}, - } - result = combine_subdict_values(data) - assert result == {"all": [1, "text", [1, 2, 3], None, 5.5]} - - -# Define a sample DataFrame for testing -sample_data = { - "A": [1, 2, 3, 4, 5], - "B": [2, 3, 4, 5, 6], - "C": [3, 4, 5, 6, 7], - "D": [4, 5, 6, 7, 8], -} -sample_df = pd.DataFrame(sample_data) - -# Define the provided 'unique_columns_rings_grouped' data for testing -unique_columns_rings_grouped = { - 1: {0: "A_101_hydrophobic"}, - 2: {1: "B_202_Donor_hbond", 3: "A_102_hydrophobic"}, - 3: {2: "C_[5, 6]_Group3_NI_saltbridge"}, - 4: {4: "A_501_F_halogen"}, - 5: {5: "A_301_Acceptor_hbond"}, - 6: {6: "A_401_Acceptor_waterbridge"}, - 7: {7: "B_501_Donor_waterbridge"}, - 8: {8: "A_[7, 8]_pistacking"}, - 9: {9: "A_[9_ 10]_Group4_pication"}, - 10: {10: "A_401_Fe_site1_metal"}, - 11: {11: "A_[7, 8]_Group4_PI_saltbridge"}, - 12: {12: "C_104_hydrophobic"}, -} - - -def test_filtering_values_with_provided_data(): - # Test case 1: Check if the function returns a list - threshold = 0.2 # 20% threshold - frames = 1000 # Some arbitrary number of frames - df = pd.DataFrame() # Create an empty DataFrame for testing - result = filtering_values(threshold, frames, df, unique_columns_rings_grouped) - - assert isinstance(result, list) # Check if the result is a list - - # Test case 2: Check if the filtered values are present in the DataFrame - assert all(col in df.columns for col in result) - - # Test case 3: Check if the DataFrame has the correct shape after filtering - expected_shape = (df.shape[0], df.shape[1] + len(result)) - assert df.shape == expected_shape - - # Test case 4: Check if the filtered values are not duplicated - assert len(set(result)) == len(result) - - # Test case 5: Check if the DataFrame values are initially set to None - assert df[result].isnull().all().all() - - # Test case 6: Check if the threshold calculation is correct - expected_threshold = threshold * frames - occurrences = {value: 5 for value in result} # Assume all values occur 5 times - assert all(count >= expected_threshold for count in occurrences.values()) - - -def test_df_iteration_numbering(): - # Sample DataFrame for testing - data = { - "Unnamed: 0": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - ], - "RESNR": [ - 98, - 63, - 162, - 161, - 166, - 165, - 125, - 166, - 211, - 227, - 223, - 165, - 100, - 59, - 98, - 207, - 164, - 155, - 228, - ], - "RESTYPE": [ - "PHE", - "ARG", - "ALA", - "PHE", - "ARG", - "ASP", - "TYR", - "ARG", - "PHE", - "LEU", - "THR", - "ASP", - "ASP", - "ARG", - "PHE", - "PHE", - "LYS", - "HEM", - "SER", - ], - "RESCHAIN": [ - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - ], - "RESNR_LIG": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "RESTYPE_LIG": [ - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "HEM", - "UNK", - ], - "RESCHAIN_LIG": [ - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - ], - "DIST": [ - 3.46, - 0.00, - 0.00, - 0.00, - 0.00, - 0.00, - 0.00, - 3.36, - 3.61, - 3.84, - 3.62, - 3.72, - 3.62, - 3.99, - 3.65, - 3.70, - 5.16, - 2.55, - 2.34, - ], - "LIGCARBONIDX": [ - 4196.0, - 0.0, - 4214.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 4206.0, - 4207.0, - 4207.0, - 4215.0, - 4217.0, - 4217.0, - 4194.0, - 4208.0, - 0.0, - 0.0, - 0.0, - ], - "162ALAA_4214_4215_4216_4217_4218_4213_hydrophobic": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "166ARGA_4220,4221_Carboxylate_NI_saltbridge": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "98PHEA_4194_hydrophobic": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "63ARGA_4201_Acceptor_waterbridge": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "166ARGA_4220_Acceptor_hbond": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "98PHEA_4225_Donor_hbond": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "207PHEA_4213,4214,4215,4216,4217,4218_pistacking": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "100ASPA_4005_Donor_waterbridge": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "59ARGA_4222_Acceptor_waterbridge": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "HEM_4255_Fe_4.0_metal": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "228SERA_4228_F_halogen": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - } - - df = pd.DataFrame(data) - - interactions = [ - "hbond", - "waterbridge", - "hydrophobic", - "hbond", - "hbond", - "hbond", - "hbond", - "saltbridge", - "hydrophobic", - "hydrophobic", - "hydrophobic", - "hydrophobic", - "waterbridge", - "waterbridge", - "hydrophobic", - "pistacking", - "pication", - "metal", - "halogen", - ] - df["INTERACTION"] = interactions - - # Define the values for the "PROTISDON" column - protisdon_values = [ - False, - True, - True, - True, - True, - True, - True, - 0, - 0, - 0, - 0, - 0, - False, - True, - 0, - 0, - 0, - 0, - 0, - ] - - # Update the "PROTISDON" column in the DataFrame - df["PROTISDON"] = protisdon_values - - # Define the values for the "Prot_partner" column - prot_partner_values = [ - "98PHEA", - "63ARGA", - "162ALAA", - "161PHEA", - "166ARGA", - "165ASPA", - "125TYRA", - "166ARGA", - "211PHEA", - "227LEUA", - "223THRA", - "165ASPA", - "100ASPA", - "59ARGA", - "98PHEA", - "207PHEA", - "164LYSA", - "105HEM", - "228SERA", - ] - - # Update the "Prot_partner" column in the DataFrame - df["Prot_partner"] = prot_partner_values - - # Define the values for the "ACCEPTORIDX" column - acceptoridx_values = [ - 0.0, - 0.0, - 4221.0, - 4221.0, - 4220.0, - 4220.0, - 4192.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - ] - - # Update the "ACCEPTORIDX" column in the DataFrame - df["ACCEPTORIDX"] = acceptoridx_values - - # Define the values for the "DONORIDX" column - donoridx_values = [ - 4225.0, - 0.0, - 2417.0, - 2397.0, - 2468.0, - 2456.0, - 1828.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - ] - - # Update the "DONORIDX" column in the DataFrame - df["DONORIDX"] = donoridx_values - - # Define the values for the "ACCEPTOR_IDX" column - acceptor_idx_values = [ - 0.0, - 4201.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 4222.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - ] - - # Add the "ACCEPTOR_IDX" column to the DataFrame - df["ACCEPTOR_IDX"] = acceptor_idx_values - - # Define the values for the "DONOR_IDX" column - donor_idx_values = [ - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 4005.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 4228.0, - ] - - # Add the "DONOR_IDX" column to the DataFrame - df["DONOR_IDX"] = donor_idx_values - - # Define the values for the "DON_IDX" column - don_idx_values = [ - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 4005.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 4228.0, - ] - - # Add the "DON_IDX" column to the DataFrame - df["DON_IDX"] = don_idx_values - - # Define the values for the "LIG_IDX_LIST" column - lig_idx_list_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - "4220,4221", - 0, - 0, - 0, - 0, - 0, - 0, - 0, - "4213,4214,4215,4216,4217,4218", - "4213,4214,4215,4216,4217,4218", - 0, - 0, - ] - - # Add the "LIG_IDX_LIST" column to the DataFrame - df["LIG_IDX_LIST"] = lig_idx_list_values - - # Define the values for the "LIG_GROUP" column - lig_group_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - "Carboxylate", - 0, - 0, - 0, - 0, - 0, - 0, - 0, - "Aromatic", - "Aromatic", - 0, - 0, - ] - - # Add the "LIG_GROUP" column to the DataFrame - df["LIG_GROUP"] = lig_group_values - - # Define the values for the "TARGET_IDX" column - target_idx_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4255, 0] - - # Add the "TARGET_IDX" column to the DataFrame - df["TARGET_IDX"] = target_idx_values - - # Define the values for the "METAL_TYPE" column - metal_type_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "Fe", 0] - - # Add the "METAL_TYPE" column to the DataFrame - df["METAL_TYPE"] = metal_type_values - - # Define the values for the "COORDINATION" column - coordination_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0] - - # Add the "COORDINATION" column to the DataFrame - df["COORDINATION"] = coordination_values - - # Define the values for the "DONORTYPE" column - donor_type_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "F"] - - # Add the "DONORTYPE" column to the DataFrame - df["DONORTYPE"] = donor_type_values - - # Updated unique_data dictionary - unique_data = { - "63ARGA_4201_Acceptor_waterbridge": "63ARGA_4201_Acceptor_waterbridge", - "166ARGA_4220_Acceptor_hbond": "166ARGA_4220_Acceptor_hbond", - "166ARGA_4220,4221_Carboxylate_NI_saltbridge": "166ARGA_4220,4221_Carboxylate_NI_saltbridge", - "162ALAA_4214_4215_4216_4217_4218_4213_hydrophobic": "162ALAA_4214_4215_4216_4217_4218_4213_hydrophobic", - "98PHEA_4194_hydrophobic": "98PHEA_4194_hydrophobic", - "98PHEA_4225_Donor_hbond": "98PHEA_4225_Donor_hbond", - "164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication": "164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication", - "207PHEA_4213,4214,4215,4216,4217,4218_pistacking": "207PHEA_4213,4214,4215,4216,4217,4218_pistacking", - "59ARGA_4222_Acceptor_waterbridge": "59ARGA_4222_Acceptor_waterbridge", - "100ASPA_4005_Donor_waterbridge": "100ASPA_4005_Donor_waterbridge", - "HEM_4255_Fe_4.0_metal": "HEM_4255_Fe_4.0_metal", - "228SERA_4228_F_halogen": "228SERA_4228_F_halogen", - } - - # Call the function with the sample DataFrame and unique_data - df_iteration_numbering(df, unique_data) - - expected_162ALAA_values = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert ( - df["162ALAA_4214_4215_4216_4217_4218_4213_hydrophobic"] - == expected_162ALAA_values - ).all() - - expected_98PHEA_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0] - assert (df["98PHEA_4194_hydrophobic"] == expected_98PHEA_values).all() - - expected_166ARGA_4220_Acceptor_hbond_values = [ - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ] - assert ( - df["166ARGA_4220_Acceptor_hbond"] == expected_166ARGA_4220_Acceptor_hbond_values - ).all() - - expected_Carboxylate_NI_saltbridge_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ] - assert ( - df["166ARGA_4220,4221_Carboxylate_NI_saltbridge"] - == expected_Carboxylate_NI_saltbridge_values - ).all() - - expected_63ARGA_4201_Acceptor_waterbridge_values = [ - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ] - assert ( - df["63ARGA_4201_Acceptor_waterbridge"] - == expected_63ARGA_4201_Acceptor_waterbridge_values - ).all() - - expected_98PHEA_4225_Donor_hbond_values = [ - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ] - assert ( - df["98PHEA_4225_Donor_hbond"] == expected_98PHEA_4225_Donor_hbond_values - ).all() - - expected_164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - ] - assert ( - df["164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication"] - == expected_164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication_values - ).all() - - expected_207PHEA_4213_4214_4215_4216_4217_4218_pistacking_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - ] - assert ( - df["207PHEA_4213,4214,4215,4216,4217,4218_pistacking"] - == expected_207PHEA_4213_4214_4215_4216_4217_4218_pistacking_values - ).all() - - expected_59ARGA_4222_Acceptor_waterbridge_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - ] - assert ( - df["59ARGA_4222_Acceptor_waterbridge"] - == expected_59ARGA_4222_Acceptor_waterbridge_values - ).all() - - expected_100ASPA_4005_Donor_waterbridge_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - ] - assert ( - df["100ASPA_4005_Donor_waterbridge"] - == expected_100ASPA_4005_Donor_waterbridge_values - ).all() - - expected_HEM_4255_Fe_4_metal_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - ] - assert (df["HEM_4255_Fe_4.0_metal"] == expected_HEM_4255_Fe_4_metal_values).all() - - expected_228SERA_4228_F_halogen_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - ] - assert ( - df["228SERA_4228_F_halogen"] == expected_228SERA_4228_F_halogen_values - ).all() - - -@pytest.fixture -def sample_dataframe_it_peptides(): - # Create a sample DataFrame for testing - data = { - "Unnamed: 0": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - "Prot_partner": [ - "62VAL", - "SER144", - "GLU321", - "ILE432", - "LEU248", - "SER300", - "TYR343", - "ILE178", - "PHE344", - "PHE754", - "LYS567", - "LYS567", - "HIS", - ], - "LIGCARBONIDX": [ - 101, - 202, - 155, - 102, - 501, - 301, - 467, - 467, - 398, - 245, - 228, - 423, - 256, - ], - "INTERACTION": [ - "hydrophobic", - "hbond", - "saltbridge", - "hydrophobic", - "halogen", - "hbond", - "waterbridge", - "waterbridge", - "pistacking", - "pication", - "pication", - "saltbridge", - "metal", - ], - "PROTISDON": [ - None, - False, - None, - None, - None, - True, - True, - False, - None, - None, - None, - False, - None, - ], - "ACCEPTORIDX": [ - None, - 202, - None, - None, - None, - 301, - None, - None, - None, - None, - None, - None, - None, - ], - "RESNR_LIG": [101, 202, 155, 102, 501, 301, 455, 467, 398, 245, 228, 423, 256], - "DONORIDX": [ - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - ], - "DONORTYPE": [ - None, - None, - None, - None, - "F", - None, - None, - None, - None, - None, - None, - None, - None, - ], - "LIG_IDX_LIST": [ - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - ], - "RESTYPE_LIG": [ - "VAL", - "TYR", - "ARG", - "VAL", - "ILE", - "HIS", - "SER", - "TYR", - "PHE", - "ARG", - "PHE", - "GLU", - "HIS", - ], - "TARGET_IDX": [ - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - ], - "METAL_TYPE": [ - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - "Fe", - ], - "62VAL_101ILE_hydrophobic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "SER144_202TYR_Donor_hbond": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "GLU321_155ARG_ARG_NI_saltbridge": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "ILE432_102VAL_hydrophobic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "LEU248_501ILE_F_halogen": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "SER300_301HIS_Acceptor_hbond": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "TYR343_455SER_Acceptor_waterbridge": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "ILE178_467TYR_Donor_waterbridge": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "PHE344_398PHE_pistacking": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "PHE754_245ARG_ARG_pication": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "LYS567_228PHE_PHE_pication": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "LYS567_423GLU_GLU_PI_saltbridge": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "HIS_256HIS_Fe_site1_metal": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - } - - df = pd.DataFrame(data) - return df - - -def test_df_iteration_numbering_with_peptide(sample_dataframe_it_peptides): - unique_data = { - 0: "62VAL_101ILE_hydrophobic", - 1: "SER144_202TYR_Donor_hbond", - 2: "GLU321_155ARG_ARG_NI_saltbridge", - 3: "ILE432_102VAL_hydrophobic", - 4: "LEU248_501ILE_F_halogen", - 5: "SER300_301HIS_Acceptor_hbond", - 6: "TYR343_455SER_Acceptor_waterbridge", - 7: "ILE178_467TYR_Donor_waterbridge", - 8: "PHE344_398PHE_pistacking", - 9: "PHE754_245ARG_ARG_pication", - 10: "LYS567_228PHE_PHE_pication", - 11: "LYS567_423GLU_GLU_PI_saltbridge", - 12: "HIS_256HIS_Fe_site1_metal", - } - - df_iteration_numbering(sample_dataframe_it_peptides, unique_data, peptide=True) - - # Assertions similar to the provided ones - expected_101ILE_hydrophobic_values = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["62VAL_101ILE_hydrophobic"] - == expected_101ILE_hydrophobic_values - ).all() - - expected_202TYR_Donor_hbond_values = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["SER144_202TYR_Donor_hbond"] - == expected_202TYR_Donor_hbond_values - ).all() - - expected_155ARG_ARG_NI_saltbridge_values = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["GLU321_155ARG_ARG_NI_saltbridge"] - == expected_155ARG_ARG_NI_saltbridge_values - ).all() - - expected_102VAL_hydrophobic_values = [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["ILE432_102VAL_hydrophobic"] - == expected_102VAL_hydrophobic_values - ).all() - - expected_501ILE_halogen_values = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["LEU248_501ILE_F_halogen"] - == expected_501ILE_halogen_values - ).all() - - expected_301HIS_Acceptor_hbond_values = [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["SER300_301HIS_Acceptor_hbond"] - == expected_301HIS_Acceptor_hbond_values - ).all() - - expected_455SER_Acceptor_waterbridge_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - ] - assert ( - sample_dataframe_it_peptides["TYR343_455SER_Acceptor_waterbridge"] - == expected_455SER_Acceptor_waterbridge_values - ).all() - - expected_467TYR_Donor_waterbridge_values = [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["ILE178_467TYR_Donor_waterbridge"] - == expected_467TYR_Donor_waterbridge_values - ).all() - - expected_398PHE_pistacking_values = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["PHE344_398PHE_pistacking"] - == expected_398PHE_pistacking_values - ).all() - - expected_245ARG_ARG_pication_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["PHE754_245ARG_ARG_pication"] - == expected_245ARG_ARG_pication_values - ).all() - - expected_228PHE_PHE_pication_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0] - assert ( - sample_dataframe_it_peptides["LYS567_228PHE_PHE_pication"] - == expected_228PHE_PHE_pication_values - ).all() - - expected_423GLU_GLU_PI_saltbridge_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0] - assert ( - sample_dataframe_it_peptides["LYS567_423GLU_GLU_PI_saltbridge"] - == expected_423GLU_GLU_PI_saltbridge_values - ).all() - - expected_256HIS_Fe_site1_metal_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] - assert ( - sample_dataframe_it_peptides["HIS_256HIS_Fe_site1_metal"] - == expected_256HIS_Fe_site1_metal_values - ).all() - - -@pytest.fixture -def sample_data(): - # Create sample data for testing - df = pd.DataFrame( - {"FRAME": [1, 2, 3], "Column1": [10, 20, 30], "Column2": [40, 50, 60]} - ) - - new_df = pd.DataFrame( - {"FRAME": [1, 2, 3], "Column1": [100, 200, 300], "Column2": [400, 500, 600]} - ) - - unique_data = {"Column1": "Column1", "Column2": "Column2"} - - return df, new_df, unique_data - - -def test_update_values(sample_data): - # Arrange - df, new_df, unique_data = sample_data - - # Set 'FRAME' as the index for new_df - new_df = new_df.set_index("FRAME") - - # Act - update_values(df, new_df, unique_data) - - # Assert - expected_df = pd.DataFrame( - {"FRAME": [1, 2, 3], "Column1": [100, 200, 300], "Column2": [400, 500, 600]} - ) - - # Check if the specific values are updated - assert df[["Column1", "Column2"]].equals(expected_df[["Column1", "Column2"]]) - - -def test_calculate_representative_frame(): - test_data_directory = "openmmdl/tests/data/in" - test_data_directory = Path("openmmdl/tests/data/in") - # load mdtraj trajectory - md = mda.Universe( - f"{test_data_directory}/interacting_waters.pdb", - f"{test_data_directory}/interacting_waters.dcd", - ) - dm = calculate_distance_matrix(md, "protein or resname UNK") - rep = calculate_representative_frame([i for i in range(1, 10)], dm) - assert rep == 4 diff --git a/openmmdl/tests/openmmdl_analysis/openmmdlanalysis_test.py b/openmmdl/tests/openmmdl_analysis/openmmdlanalysis_test.py deleted file mode 100644 index 07918a70..00000000 --- a/openmmdl/tests/openmmdl_analysis/openmmdlanalysis_test.py +++ /dev/null @@ -1,124 +0,0 @@ -import pytest -import subprocess -import os -import openmmdl - -from pathlib import Path - -from openmmdl.openmmdl_analysis.preprocessing import ( - process_pdb_file, - increase_ring_indices, - convert_ligand_to_smiles, -) -from openmmdl.openmmdl_analysis.rmsd_calculation import ( - rmsd_for_atomgroups, - RMSD_dist_frames, -) -from openmmdl.openmmdl_analysis.interaction_gathering import ( - characterize_complex, - retrieve_plip_interactions, - create_df_from_binding_site, - process_frame, - process_trajectory, - fill_missing_frames, -) -from openmmdl.openmmdl_analysis.binding_mode_processing import ( - gather_interactions, - remove_duplicate_values, - combine_subdict_values, - filtering_values, - unique_data_generation, - df_iteration_numbering, - update_values, -) -from openmmdl.openmmdl_analysis.markov_state_figure_generation import ( - min_transition_calculation, - binding_site_markov_network, -) -from openmmdl.openmmdl_analysis.rdkit_figure_generation import ( - split_interaction_data, - highlight_numbers, - generate_interaction_dict, - update_dict, - create_and_merge_images, - arranged_figure_generation, -) -from openmmdl.openmmdl_analysis.barcode_generation import ( - barcodegeneration, - plot_barcodes, - plot_waterbridge_piechart, -) -from openmmdl.openmmdl_analysis.visualization_functions import ( - interacting_water_ids, - save_interacting_waters_trajectory, - cloud_json_generation, -) -from openmmdl.openmmdl_analysis.pml_writer import ( - generate_md_pharmacophore_cloudcenters, - generate_bindingmode_pharmacophore, - generate_pharmacophore_centers_all_points, - generate_point_cloud_pml, -) - -# Print current working directory -print("Current working directory:", os.getcwd()) - -# Print the full path to the input file -input_pdb_filename = "openmmdl/tests/data/in/0_unk_hoh.pdb" -print("Full path to input file:", os.path.abspath(input_pdb_filename)) - -test_data_directory = Path("openmmdl/tests/data/in") - - -@pytest.fixture(scope="session") -def test_data_dir(tmp_path_factory): - data_dir = tmp_path_factory.mktemp("test_data") - return data_dir - - -# def test_script_execution(test_data_dir): -# # Define the root directory -# root_dir = "/home/runner/work/OpenMMDL/OpenMMDL" -# -# # Specify the relative path to the input data -# relative_input_path = "openmmdl/tests/data/in" -# -# # Combine the root directory and relative input path -# input_data_path = os.path.join(root_dir, relative_input_path) - -# # Ensure that the script runs successfully without errors -# script_path = "openmmdlanalysis.py" -# topology_file = os.path.join(input_data_path, "0_unk_hoh.pdb") -# trajectory_file = os.path.join(root_dir, "openmmdl/tests/data/in/all_50.dcd") -# ligand_sdf_file = os.path.join(input_data_path, "lig.sdf") -# ligand_name = "UNK" - -# cmd = f" openmmdl_analysis -t {topology_file} -d {trajectory_file} -l {ligand_sdf_file} -n {ligand_name} -b 10 -c 2" - -# result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=test_data_dir) - -# assert result.returncode == 0, f"Script execution failed with error:\n{result.stderr.decode()}" - -# # Check that expected output files are generated -# assert os.path.exists(os.path.join(test_data_dir, "complex.pdb")) -# assert os.path.exists(os.path.join(test_data_dir, "lig.pdb")) -# assert os.path.exists(os.path.join(test_data_dir, "df_all.csv")) - -# # Check for the presence of "Barcodes" folder -# barcodes_folder = os.path.join(test_data_dir, "Barcodes") -# assert os.path.exists(barcodes_folder), "The 'Barcodes' folder is missing." - -# # Check for the existence of "hydrophobic_barcodes.png" inside "Barcodes" -# hydro_file_path = os.path.join(barcodes_folder, "hydrophobic_barcodes.png") -# assert os.path.exists(hydro_file_path), "The 'hydrophobic_barcodes.png' file is missing inside 'Barcodes'." - - -# Check for the presence of "Binding_Modes_Markov_States" folder -# markov_states_folder = os.path.join(test_data_dir, "Binding_Modes_Markov_States") -# assert os.path.exists(markov_states_folder), "The 'Binding_Modes_Markov_States' folder is missing." - -# # Check for the existence of "all_binding_modes_arranged.png" inside "Binding_Modes_Markov_States" -# png_file_path = os.path.join(markov_states_folder, "all_binding_modes_arranged.png") -# assert os.path.exists(png_file_path), "The 'all_binding_modes_arranged.png' file is missing inside 'Binding_Modes_Markov_States'." - -# # Add more checks for other output files as needed diff --git a/openmmdl/tests/openmmdl_analysis/pml_writer_test.py b/openmmdl/tests/openmmdl_analysis/pml_writer_test.py deleted file mode 100644 index f68b0ce8..00000000 --- a/openmmdl/tests/openmmdl_analysis/pml_writer_test.py +++ /dev/null @@ -1,248 +0,0 @@ -import numpy as np -import pandas as pd -import re -import os -from pathlib import Path -import matplotlib.pyplot as plt -import xml.etree.ElementTree as ET -import pytest -from openmmdl.openmmdl_analysis.pml_writer import * - - -# pml_writer tests -@pytest.fixture -def sample_dataframe_generate_pharmacophore_centers(): - data = { - "Hydrophobic": [1, 1, 0, 1, 0], - "Ionic": [0, 1, 0, 0, 1], - "LIGCOO": [ - "(1.0, 2.0, 3.0)", - "(2.0, 3.0, 4.0)", - "(3.0, 4.0, 5.0)", - "(4.0, 5.0, 6.0)", - "(5.0, 6.0, 7.0)", - ], - } - df = pd.DataFrame(data) - return df - - -@pytest.fixture -def sample_interactions_generate_pharmacophore_centers(): - return ["Hydrophobic", "Ionic"] - - -def test_generate_pharmacophore_centers( - sample_dataframe_generate_pharmacophore_centers, - sample_interactions_generate_pharmacophore_centers, -): - result = generate_pharmacophore_centers( - sample_dataframe_generate_pharmacophore_centers, - sample_interactions_generate_pharmacophore_centers, - ) - - expected_pharmacophore = { - "Hydrophobic": [2.333, 3.333, 4.333], - "Ionic": [3.5, 4.5, 5.5], - } - - assert result == expected_pharmacophore - - -@pytest.fixture -def sample_dataframe_generate_pharmacophore_vectors(): - # Create a sample dataframe for testing - data = { - "HBDonors": [1, 0, 1, 0, 1], - "HBAcceptors": [0, 1, 0, 1, 0], - "LIGCOO": [ - "(1.0, 2.0, 3.0)", - "(2.0, 3.0, 4.0)", - "(3.0, 4.0, 5.0)", - "(4.0, 5.0, 6.0)", - "(5.0, 6.0, 7.0)", - ], - "PROTCOO": [ - "(0.5, 1.5, 2.5)", - "(1.5, 2.5, 3.5)", - "(2.5, 3.5, 4.5)", - "(3.5, 4.5, 5.5)", - "(4.5, 5.5, 6.5)", - ], - } - df = pd.DataFrame(data) - return df - - -@pytest.fixture -def sample_interactions_generate_pharmacophore_vectors(): - return ["HBDonors", "HBAcceptors"] - - -def test_generate_pharmacophore_vectors( - sample_dataframe_generate_pharmacophore_vectors, - sample_interactions_generate_pharmacophore_vectors, -): - result = generate_pharmacophore_vectors( - sample_dataframe_generate_pharmacophore_vectors, - sample_interactions_generate_pharmacophore_vectors, - ) - - expected_pharmacophore = { - "HBDonors": [[3.0, 4.0, 5.0], [2.5, 3.5, 4.5]], - "HBAcceptors": [[3.0, 4.0, 5.0], [2.5, 3.5, 4.5]], - } - - assert result == expected_pharmacophore - - -def test_generate_md_pharmacophore_cloudcenters(tmp_path): - # Sample data for the DataFrame - data = { - "Acceptor_hbond_1": [1, 0, 1, 0, 1], - "Donor_hbond_1": [0, 1, 0, 1, 0], - "pistacking_1": [1, 0, 0, 1, 1], - "hydrophobic_1": [0, 1, 0, 1, 0], - "PI_saltbridge_1": [1, 0, 1, 0, 1], - "NI_saltbridge_1": [0, 1, 0, 1, 0], - "LIGCOO": [ - "(1.0, 2.0, 3.0)", - "(2.0, 3.0, 4.0)", - "(3.0, 4.0, 5.0)", - "(4.0, 5.0, 6.0)", - "(5.0, 6.0, 7.0)", - ], - "PROTCOO": [ - "(7.0, 6.0, 5.0)", - "(6.0, 5.0, 4.0)", - "(5.0, 4.0, 3.0)", - "(4.0, 3.0, 2.0)", - "(3.0, 2.0, 1.0)", - ], - } - - df = pd.DataFrame(data) - - # Output file paths - output_filename = tmp_path / "test_output.pml" - - # Call the function - generate_md_pharmacophore_cloudcenters( - df, "core_compound", output_filename, "system_name", id_num=0 - ) - - # Check if the output file is created - assert os.path.isfile(output_filename), f"File {output_filename} not found." - - # Check if the generated XML is valid - try: - ET.parse(output_filename) - except ET.ParseError: - pytest.fail(f"Invalid XML in {output_filename}") - - -def test_generate_pharmacophore_centers_all_points(): - # Sample data for the DataFrame - data = { - "interaction1": [1, 0, 1, 0, 1], - "interaction2": [0, 1, 0, 1, 0], - "LIGCOO": [ - "(1.0, 2.0, 3.0)", - "(2.0, 3.0, 4.0)", - "(3.0, 4.0, 5.0)", - "(4.0, 5.0, 6.0)", - "(5.0, 6.0, 7.0)", - ], - } - - df = pd.DataFrame(data) - - # Sample interactions - interactions = ["interaction1", "interaction2"] - - # Call the function - pharmacophore = generate_pharmacophore_centers_all_points(df, interactions) - - # Check if the generated pharmacophore has the expected structure - assert isinstance(pharmacophore, dict), "Pharmacophore should be a dictionary." - - for interaction in interactions: - assert ( - interaction in pharmacophore - ), f"{interaction} not found in the generated pharmacophore." - - points = pharmacophore[interaction] - assert isinstance( - points, list - ), f"Pharmacophore points for {interaction} should be a list." - - # Check if the points have the expected structure - for point in points: - assert ( - isinstance(point, list) and len(point) == 3 - ), "Each point should be a list of three coordinates." - - -def test_generate_point_cloud_pml(tmp_path): - # Sample data for the cloud_dict - cloud_dict = { - "feature1": { - "interaction1": [(1.0, 2.0, 3.0), (1.5, 2.5, 3.5), (2.0, 3.0, 4.0)], - "interaction2": [(2.0, 3.0, 4.0), (2.5, 3.5, 4.5), (3.0, 4.0, 5.0)], - }, - "feature2": { - "interaction3": [(3.0, 4.0, 5.0), (3.5, 4.5, 5.5), (4.0, 5.0, 6.0)], - }, - } - - # Output file paths - outname = tmp_path / "test_output" - outname_pml = f"{outname}.pml" - - # Call the function - generate_point_cloud_pml(cloud_dict, "system_name", outname) - - # Check if the output file is created - assert os.path.isfile(outname_pml), f"File {outname_pml} not found." - - # Check if the generated XML is valid - try: - ET.parse(outname_pml) - except ET.ParseError: - pytest.fail(f"Invalid XML in {outname_pml}") - - -def test_generate_bindingmode_pharmacophore(tmp_path): - # Prepare inputs - dict_bindingmode = { - "Acceptor_hbond": {"PROTCOO": [[1, 2, 3]], "LIGCOO": [[4, 5, 6]]} - } - core_compound = "ligand" - sysname = "system" - id_num = 0 - - # Create a symbolic link in the temporary directory - os.symlink( - os.path.abspath("./Binding_Modes_Markov_States"), - f"{tmp_path}/Binding_Modes_Markov_States", - ) - - # Prepare the output filename - outname = "test_output" - - # Call the function - generate_bindingmode_pharmacophore( - dict_bindingmode, core_compound, sysname, outname, id_num - ) - - # Prepare the full output path - outname_pml = f"{tmp_path}/Binding_Modes_Markov_States/{outname}.pml" - - # Check if the output file is created - assert os.path.isfile(outname_pml), f"File {outname_pml} not found." - - # Check if the generated XML is valid - try: - ET.parse(outname_pml) - except ET.ParseError: - pytest.fail(f"Invalid XML in {outname_pml}") diff --git a/openmmdl/tests/openmmdl_analysis/test_find_stable_waters.py b/openmmdl/tests/openmmdl_analysis/test_find_stable_waters.py deleted file mode 100644 index 283bc74b..00000000 --- a/openmmdl/tests/openmmdl_analysis/test_find_stable_waters.py +++ /dev/null @@ -1,261 +0,0 @@ -import MDAnalysis as mda -import pytest -import pandas as pd -import re -import os -import shutil -from Bio.PDB.Structure import Structure -from Bio.PDB import PDBParser -from pathlib import Path -from unittest.mock import patch, mock_open -from openmmdl.openmmdl_analysis.find_stable_waters import ( - perform_clustering_and_writing, - stable_waters_pipeline, - trace_waters, - filter_and_parse_pdb, - write_pdb_clusters_and_representatives, - find_interacting_residues, - read_pdb_as_dataframe, - analyze_protein_and_water_interaction, -) - -# Fixtures and mock data setup - -test_data_directory = Path("openmmdl/tests/data/in") -topology_file = f"{test_data_directory}/metal_top.pdb" -trajectory_file = f"{test_data_directory}/metal_traj_25.dcd" -csv_file_path = f"{test_data_directory}/stable_waters.csv" -repwat_file_path = f"{test_data_directory}/representative_waters.pdb" -output_dirs = [] - - -def test_stable_waters_pipeline(): - water_eps_values = [0.5, 1.0, 2.0] # Example epsilon values - - for water_eps in water_eps_values: - output_directory = f"./test_output" - stable_waters_pipeline( - topology_file, trajectory_file, water_eps, output_directory - ) - - strEps = str(water_eps).replace(".", "") - output_directory = f"./test_output_clusterEps_{strEps}" - # Check if the expected output directory is created - assert os.path.isdir( - output_directory - ), f"Directory {output_directory} was not created" - output_dirs.append(output_directory) - - # Check if stable_waters.csv is created - csv_file = os.path.join(output_directory, "stable_waters.csv") - assert os.path.isfile(csv_file) - - # Load and verify the data in stable_waters.csv - stable_waters_df = pd.read_csv(csv_file) - assert not stable_waters_df.empty - assert set(stable_waters_df.columns) == { - "Frame", - "Residue", - "Oxygen_X", - "Oxygen_Y", - "Oxygen_Z", - } - - # Cleanup: remove created directories and files - for dir in output_dirs: - shutil.rmtree(dir) - - -def test_perform_clustering(): - # Load the stable_waters data from the CSV file - stable_waters_df = pd.read_csv(csv_file_path) - - # Define test parameters - cluster_eps = 2 - - u = mda.Universe(topology_file, trajectory_file) - # Get the total number of frames for the progress bar - total_frames = len(u.trajectory) - output_directory = "./test_output_clustering" - - # Run the function - perform_clustering_and_writing( - stable_waters_df, cluster_eps, total_frames, output_directory - ) - - # Define the regular expression pattern for matching the line - pattern = re.compile( - r"ATOM\s+\d+\s+O\s+WAT\s+A\s+\d+\s+(-?\d+\.\d+)\s+(-?\d+\.\d+)\s+(-?\d+\.\d+)\s+1\.00\s+0\.00\s+O" - ) - - # Assert subdirectory creation and file creation - percentage_values = [25, 50, 75, 90, 99] - for percent in percentage_values: - min_samples = int((percent / 100) * total_frames) - sub_directory = os.path.join(output_directory, f"clusterSize{min_samples}") - - assert os.path.isdir(sub_directory), f"Subdirectory for {percent}% not created" - - # Assuming the names of files created, adjust as necessary - expected_files = [ - "cluster_0.pdb", - "cluster_1.pdb", - ] # Replace with actual expected file names - for file_name in expected_files: - file_path = os.path.join(sub_directory, file_name) - assert os.path.isfile( - file_path - ), f"File {file_name} was not created in {sub_directory}" - - # Check the contents of the files - for file_name in expected_files: - file_path = os.path.join(sub_directory, file_name) - with open(file_path, "r") as file: - # Read file and search for the pattern - if not any(pattern.match(line) for line in file): - assert ( - False - ), f"File {file_name} does not contain the required line format" - - # Cleanup - shutil.rmtree(output_directory) - - -def test_write_pdb_clusters_and_representatives(): - # Mock data setup - data = { - "Oxygen_X": [1.0, 2.0, 3.0], - "Oxygen_Y": [4.0, 5.0, 6.0], - "Oxygen_Z": [7.0, 8.0, 9.0], - "Cluster_Label": [0, 0, 1], - } - clustered_waters = pd.DataFrame(data) - min_samples = 2 - output_sub_directory = "test_write_representatives" - - if os.path.exists(output_sub_directory): - shutil.rmtree(output_sub_directory) - os.makedirs(output_sub_directory, exist_ok=True) - - # Run the function - write_pdb_clusters_and_representatives( - clustered_waters, min_samples, output_sub_directory - ) - - # Assert file creation - unique_labels = clustered_waters["Cluster_Label"].unique() - for label in unique_labels: - filename = os.path.join(output_sub_directory, f"cluster_{label}.pdb") - assert os.path.isfile(filename), f"File {filename} not created" - - # Assert representative_waters.pdb creation and contents - rep_file = os.path.join(output_sub_directory, "representative_waters.pdb") - assert os.path.isfile(rep_file), "representative_waters.pdb not created" - - # Cleanup - shutil.rmtree(output_sub_directory) - - -def test_filter_and_parse_pdb(): - # Call the function with the sample PDB file - structure = filter_and_parse_pdb(topology_file) - - # Check if the returned object is a Structure - assert isinstance(structure, Structure), "The returned object is not a Structure" - - -def test_find_interacting_residues(): - representative_waters_file = test_data_directory / "representative_waters.pdb" - distance_threshold = 2.0 # Example threshold - - # Parse structure.pdb - parser = PDBParser(QUIET=True) - structure = parser.get_structure("protein", str(topology_file)) - - # Read representative_waters.pdb into a DataFrame - waters_data = [] - with open(representative_waters_file, "r") as file: - for line in file: - if line.startswith("ATOM"): - parts = line.split() - x, y, z = map(float, parts[5:8]) - waters_data.append([x, y, z]) - representative_waters = pd.DataFrame( - waters_data, columns=["Oxygen_X", "Oxygen_Y", "Oxygen_Z"] - ) - - # Run find_interacting_residues - interacting_residues = find_interacting_residues( - structure, representative_waters, distance_threshold - ) - - # Assert the results - assert isinstance(interacting_residues, dict) - # Example: Check if a specific water molecule interacts with any residues. We now fromthe test data that water 17 should interact. - assert 17 in interacting_residues - - -def test_read_pdb_as_dataframe(): - # Mock PDB file content - mock_pdb_content = ( - "ATOM 1 O WAT A 1 26.091 60.495 24.828 1.00 0.00 O\n" - "ATOM 2 O WAT A 2 30.000 50.000 40.000 1.00 0.00 O\n" - ) - - # Expected data - expected_data = [[26.091, 60.495, 24.828], [30.000, 50.000, 40.000]] - expected_df = pd.DataFrame( - expected_data, columns=["Oxygen_X", "Oxygen_Y", "Oxygen_Z"] - ) - - # Mock open function - with patch("builtins.open", mock_open(read_data=mock_pdb_content)): - # Call the function - result_df = read_pdb_as_dataframe("dummy_path.pdb") - - # Assert DataFrame content - pd.testing.assert_frame_equal(result_df, expected_df) - - -def test_analyze_protein_and_water_interaction(): - # Paths to the real PDB files - - protein_pdb_file = topology_file - representative_waters_file = ( - "representative_waters.pdb" # Assuming this is the correct name - ) - - # Setup output directory - cluster_eps = 1.0 # Example value, adjust as needed - strEps = str(cluster_eps).replace(".", "") - output_directory = Path("testprotwatint/output_clusterEps_" + strEps) - if output_directory.exists(): - shutil.rmtree(output_directory) - os.makedirs(output_directory, exist_ok=True) - - # Create subdirectories and copy representative_waters.pdb into each - mock_subdirectories = ["subdir1", "subdir2"] - for subdir in mock_subdirectories: - sub_path = output_directory / subdir - os.makedirs(sub_path, exist_ok=True) - shutil.copy(test_data_directory / representative_waters_file, sub_path) - - test_output_directory = Path("testprotwatint/output") - os.makedirs(test_output_directory, exist_ok=True) - # Run the function - analyze_protein_and_water_interaction( - str(protein_pdb_file), - representative_waters_file, - cluster_eps, - str(test_output_directory), - distance_threshold=5.0, - ) - - # Assert file creation in each subdirectory - for subdir in mock_subdirectories: - result_file = output_directory / subdir / "interacting_residues.csv" - assert result_file.is_file(), f"File {result_file} not created" - - # Cleanup - shutil.rmtree(output_directory) - shutil.rmtree(test_output_directory) diff --git a/openmmdl/tests/openmmdl_analysis/test_interaction_gathering.py b/openmmdl/tests/openmmdl_analysis/test_interaction_gathering.py deleted file mode 100644 index 889bf850..00000000 --- a/openmmdl/tests/openmmdl_analysis/test_interaction_gathering.py +++ /dev/null @@ -1,360 +0,0 @@ -import os -import pytest -import shutil -import tempfile -from pathlib import Path -import pandas as pd -import numpy as np -import mdtraj as md -import MDAnalysis as mda -import unittest -from unittest.mock import Mock, patch -from plip.structure.preparation import PDBComplex, LigandFinder, Mol, PLInteraction - -from openmmdl.openmmdl_analysis.interaction_gathering import * - - -test_data_directory = Path("openmmdl/tests/data/in") -topology_file = f"{test_data_directory}/complex.pdb" -frame_file = f"{test_data_directory}/processing_frame_1.pdb" -topology_metal = f"{test_data_directory}/metal_top.pdb" -trajetory_metal = f"{test_data_directory}/metal_traj_25.dcd" -ligand_special = f"{test_data_directory}/ligand_special.pdb" - -binding_site_id = "UNK:X:0" -lig_name = "UNK" -peptide = "X" - - -# Test the function -def test_characterize_complex(): - # Call the function - interaction_set = characterize_complex(topology_file, binding_site_id) - - # Check if the function returns a PLInteraction object - assert isinstance(interaction_set, PLInteraction) - - -def test_retrieve_plip_interactions(): - # Call the function - interactions = retrieve_plip_interactions(topology_file, lig_name) - - # Check if the function returns a dictionary - assert isinstance(interactions, dict) - - -def test_retrieve_plip_interactions_peptide(): - # Call the function - interactions = retrieve_plip_interactions_peptide(topology_file, peptide) - - # Check if the function returns a dictionary - assert isinstance(interactions, dict) - - -# Define test data -sample_interactions = { - "hydrophobic": [["Column1", "Column2"], [1, 2], [3, 4]], - "hbond": [["ColumnA", "ColumnB"], ["A", "B"], ["C", "D"]], -} - - -def test_create_df_from_binding_site(): - # Test with valid interaction type - df = create_df_from_binding_site( - sample_interactions, interaction_type="hydrophobic" - ) - assert isinstance(df, pd.DataFrame) - assert df.shape == (2, 2) - assert list(df.columns) == ["Column1", "Column2"] - - # Test with default interaction type - df_default = create_df_from_binding_site(sample_interactions) - assert isinstance(df_default, pd.DataFrame) - assert df_default.shape == (2, 2) - assert list(df_default.columns) == ["ColumnA", "ColumnB"] - - # Test with an invalid interaction type (should default to 'hbond') - df_invalid = create_df_from_binding_site( - sample_interactions, interaction_type="invalid_type" - ) - assert isinstance(df_invalid, pd.DataFrame) - assert df_invalid.shape == (2, 2) - assert list(df_invalid.columns) == ["ColumnA", "ColumnB"] - - -@pytest.fixture -def input_pdb_filename(tmp_path): - input_pdb_filename = tmp_path / "input.pdb" - - # Create a mock PDB file with 10 atoms - input_pdb_content = """ATOM 1 N UNK A 454 43.493 48.319 35.835 1.00 0.00 A N -ATOM 2 N1 UNK A 454 44.740 47.862 35.697 1.00 0.00 A N -ATOM 3 C14 UNK A 454 44.608 46.866 34.829 1.00 0.00 A C -ATOM 4 N2 UNK A 454 43.265 46.644 34.450 1.00 0.00 A N -ATOM 5 C7 UNK A 454 42.607 47.556 35.077 1.00 0.00 A C -ATOM 6 H5 UNK A 454 41.542 47.701 34.954 1.00 0.00 A H -ATOM 7 H10 UNK A 454 45.308 46.132 34.453 1.00 0.00 A H -ATOM 8 C UNK A 454 43.168 49.513 36.656 1.00 0.00 A C -ATOM 9 C2 UNK A 454 42.743 50.705 35.818 1.00 0.00 A C -ATOM 10 C4 UNK A 454 43.545 51.052 34.671 1.00 0.00 A C""" - - input_pdb_filename.write_text(input_pdb_content) - return input_pdb_filename - - -def test_change_lig_to_residue(): - topology_file = f"{test_data_directory}/complex.pdb" - shutil.copy(str(topology_file), ".") - topology_file = "complex.pdb" - - # Change ligand to residue - change_lig_to_residue(str(topology_file), "UNK", "NEW") - - # Read the output PDB file and check if residues are modified - with open(topology_file, "r") as output_file: - modified_lines = output_file.readlines() - assert any("NEW" in line for line in modified_lines) - assert all("UNK" not in line for line in modified_lines) - - -def test_process_frame_with_sample_data(): - # Define a sample frame number - frame_number = 1 - - destination_file = "processing_frame_1.pdb" - - shutil.copy(frame_file, destination_file) - - # Load the sample PDB file into an MDAnalysis Universe - sample_universe = mda.Universe(topology_file) - - # Call the process_frame function with the sample data - result = process_frame(frame_number, sample_universe, lig_name) - - # Define the expected columns you want to check - expected_columns = [ - "FRAME", - "INTERACTION", - ] # Add the specific columns you want to validate - - # Check if the result is a Pandas DataFrame - assert isinstance(result, pd.DataFrame) - - # Check if all expected columns are present in the result - for column in expected_columns: - assert column in result.columns - - -def test_process_frame_with_sample_data_special(): - # Define a sample frame number - frame_number = 1 - special = "HEM" - - destination_file = "processing_frame_1.pdb" - destination_file_complex = "complex.pdb" - - shutil.copy(frame_file, destination_file) - shutil.copy(str(ligand_special), ".") - shutil.copy(str(topology_metal), ".") - shutil.copy(topology_metal, destination_file_complex) - - # Load the sample PDB file into an MDAnalysis Universe - sample_universe = mda.Universe(topology_metal, trajetory_metal) - - # Call the process_frame function with the sample data for special ligand 'HEM' - result_special = process_frame( - frame_number, sample_universe, lig_name, special="HEM" - ) - - # Define the expected columns you want to check for special ligand 'HEM' - expected_columns_special = [ - "FRAME", - "INTERACTION", - "TARGET_IDX", - "RESTYPE", - "LOCATION", - ] # Add specific columns for special ligand 'HEM' - - # Check if the result is a Pandas DataFrame for special ligand 'HEM' - assert isinstance(result_special, pd.DataFrame) - - # Check if all expected columns are present in the result for special ligand 'HEM' - for column in expected_columns_special: - assert column in result_special.columns - - shutil.copy(topology_file, destination_file_complex) - - -def test_process_frame_with_sample_data_peptide(): - # Define a sample frame number - frame_number = 1 - - # Define paths and filenames - peptide_destination_file = f"processing_frame_1.pdb" - - # Copy the frame file to the destination file for testing purposes - shutil.copy(frame_file, peptide_destination_file) - - # Load the sample PDB file into an MDAnalysis Universe - sample_universe = mda.Universe(topology_file) - - # Call the process_frame function with the sample data for peptide - result_peptide = process_frame( - frame_number, sample_universe, lig_name, peptide="X", special=None - ) - - # Define the expected columns you want to check for peptide - expected_columns_peptide = [ - "FRAME", - "INTERACTION", - "TARGET_IDX", - ] # Add specific columns for peptide - - # Check if the result is a Pandas DataFrame for peptide - assert isinstance(result_peptide, pd.DataFrame) - - # Check if all expected columns are present in the result for peptide - for column in expected_columns_peptide: - assert column in result_peptide.columns - - -def test_process_trajectory(): - topology_file = f"{test_data_directory}/0_unk_hoh.pdb" - trajectory_file = f"{test_data_directory}/all_50.dcd" - pdb_md = mda.Universe(topology_file, trajectory_file) - dataframe = None - num_processes = 2 - lig_name = "UNK" - - interaction_list = pd.DataFrame( - columns=[ - "RESNR", - "RESTYPE", - "RESCHAIN", - "RESNR_LIG", - "RESTYPE_LIG", - "RESCHAIN_LIG", - "DIST", - "LIGCARBONIDX", - "PROTCARBONIDX", - "LIGCOO", - "PROTCOO", - ] - ) - - interaction_list = process_trajectory( - pdb_md, dataframe, num_processes, lig_name, special_ligand=None, peptide=None - ) - - assert interaction_list is not None - assert len(interaction_list) > 10 - - -def test_process_frame_special_with_files(): - test_data_directory = "openmmdl/tests/data/in" # Replace with the actual path to your test data directory - topology_metal = f"{test_data_directory}/metal_top.pdb" - trajetory_metal = f"{test_data_directory}/metal_traj_25.dcd" - - # Load PDB and DCD files using mdanalysis.Universe - import MDAnalysis as mda - - u = mda.Universe(topology_metal, trajetory_metal) - - lig_name = "UNK" # Replace with the actual ligand name - special = "HEM" # Replace with the actual special residue name - frame = 0 - - result = process_frame_special(frame, u, lig_name, special) - - assert isinstance(result, list) - assert all(isinstance(df, pd.DataFrame) for df in result) - - # Add more specific assertions based on the expected behavior of the function - # For example, check if the columns in the DataFrame are as expected, or if certain conditions hold - - # Clean up any temporary files created during the test - for frame in range(len(u.trajectory)): - temp_file = f"processing_frame_{frame}.pdb" - if os.path.exists(temp_file): - os.remove(temp_file) - - -def test_process_frame_wrapper(): - - test_data_directory = "openmmdl/tests/data/in" # Replace with the actual path to your test data directory - topology_metal = f"{test_data_directory}/metal_top.pdb" - trajetory_metal = f"{test_data_directory}/metal_traj_25.dcd" - ligand_special = f"{test_data_directory}/ligand_special.pdb" - shutil.copy(str(topology_metal), ".") - shutil.copy(str(trajetory_metal), ".") - shutil.copy(str(ligand_special), ".") - topology_metal = "metal_top.pdb" - trajetory_metal = "metal_traj_25.dcd" - - # Load PDB and DCD files using MDAnalysis - pdb_md = mda.Universe(topology_metal, trajetory_metal) - lig_name = "UNK" # Replace with the actual ligand name - special_ligand = "HEM" # Replace with the actual special ligand name - peptide = None # Replace with the actual peptide name - frame_idx = 2 - - args = (frame_idx, pdb_md, lig_name, special_ligand, peptide) - result = process_frame_wrapper(args) - - # Perform assertions based on the expected behavior of the process_frame_special function - assert isinstance(result, tuple) - assert len(result) == 2 - assert isinstance(result[0], int) - - -def test_fill_missing_frames(): - # Test Case 1: Basic functionality - data = {"FRAME": [1, 2, 4, 5], "Value1": ["A", "B", "C", "D"]} - df = pd.DataFrame(data) - md_len = 6 - filled_df = fill_missing_frames(df, md_len) - assert all(filled_df["FRAME"] == [1, 2, 3, 4, 5]) - assert all(filled_df.loc[filled_df["FRAME"] == 3, "Value1"] == "skip") - - # Test Case 4: No missing frames - no_missing_frames_data = { - "FRAME": [1, 2, 3, 4, 5, 6], - "Value1": ["A", "B", "C", "D", "E", "F"], - } - no_missing_frames_df = pd.DataFrame(no_missing_frames_data) - filled_no_missing_frames_df = fill_missing_frames(no_missing_frames_df, md_len=6) - assert all( - filled_no_missing_frames_df["FRAME"] == [1, 2, 3, 4, 5, 6] - ) # Should remain unchanged - - # Test Case 5: DataFrame with additional columns - data_with_extra_columns = { - "FRAME": [1, 2, 4, 5], - "Value1": ["A", "B", "C", "D"], - "Value2": [10, 20, 30, 40], - } - df_with_extra_columns = pd.DataFrame(data_with_extra_columns) - - # Ensure the original DataFrame has unique frame numbers - assert df_with_extra_columns["FRAME"].nunique() == len(df_with_extra_columns) - - filled_df_extra_columns = fill_missing_frames(df_with_extra_columns, md_len=6) - expected_frames = [1, 2, 3, 4, 5] - - # Debugging prints - print(f"Original DataFrame length: {len(df_with_extra_columns)}") - print(f"Filled DataFrame length: {len(filled_df_extra_columns)}") - print(f"Expected frames: {expected_frames}") - - # Assert that the resulting DataFrame has unique frame numbers - assert filled_df_extra_columns["FRAME"].nunique() == len(filled_df_extra_columns) - - # Assert that the resulting DataFrame has the expected frames - assert all(filled_df_extra_columns["FRAME"] == expected_frames) - - # Assert that the length of the resulting DataFrame is equal to the length of expected frames - assert len(filled_df_extra_columns) == len(expected_frames) - - -if __name__ == "__main": - pytest.main() diff --git a/openmmdl/tests/openmmdl_analysis/test_preprocessing.py b/openmmdl/tests/openmmdl_analysis/test_preprocessing.py deleted file mode 100644 index 16a39d39..00000000 --- a/openmmdl/tests/openmmdl_analysis/test_preprocessing.py +++ /dev/null @@ -1,229 +0,0 @@ -import os -import pytest -import tempfile -import shutil -from Bio import PDB -import numpy as np -import mdtraj as md -from pathlib import Path -import MDAnalysis as mda -from openmmdl.openmmdl_analysis.preprocessing import * - -pdb_file_path = "openmmdl/tests/data/in/0_unk_hoh.pdb" - -# Define test data paths -test_data_directory = Path("openmmdl/tests/data/in") -pdb_file = test_data_directory / "0_unk_hoh.pdb" -topology_metal = f"{test_data_directory}/metal_top.pdb" -ligand_resname = "UNK" - - -@pytest.fixture -def sample_pdb_data(): - # Provide sample PDB data for testing - return """ATOM 1 N UNK A 454 43.493 48.319 35.835 1.00 0.00 A N -ATOM 2 N1 UNK A 454 44.740 47.862 35.697 1.00 0.00 A N -ATOM 3 C14 UNK A 454 44.608 46.866 34.829 1.00 0.00 A C -ATOM 4 N2 UNK A 454 43.265 46.644 34.450 1.00 0.00 A N -ATOM 5 C7 UNK A 454 42.607 47.556 35.077 1.00 0.00 A C -ATOM 6 H5 UNK A 454 41.542 47.701 34.954 1.00 0.00 A H -ATOM 7 H10 UNK A 454 45.308 46.132 34.453 1.00 0.00 A H -ATOM 8 C UNK A 454 43.168 49.513 36.656 1.00 0.00 A C -ATOM 9 C2 UNK A 454 42.743 50.705 35.818 1.00 0.00 A C -ATOM 10 C4 UNK A 454 43.545 51.052 34.671 1.00 0.00 A C -ATOM 11 C9 UNK A 454 43.171 52.151 33.897 1.00 0.00 A C -ATOM 12 C13 UNK A 454 42.090 52.924 34.222 1.00 0.00 A C -ATOM 13 C11 UNK A 454 41.393 52.671 35.378 1.00 0.00 A C -ATOM 14 C6 UNK A 454 41.793 51.635 36.268 1.00 0.00 A C -ATOM 15 H4 UNK A 454 41.220 51.358 37.148 1.00 0.00 A H -ATOM 16 H9 UNK A 454 40.518 53.291 35.552 1.00 0.00 A H -ATOM 17 C16 UNK A 454 41.790 54.079 33.432 1.00 0.00 A C -ATOM 18 N4 UNK A 454 41.594 54.934 32.652 1.00 0.00 A N -ATOM 19 H7 UNK A 454 43.694 52.248 32.951 1.00 0.00 A H -ATOM 20 H2 UNK A 454 44.333 50.369 34.369 1.00 0.00 A H -ATOM 21 H UNK A 454 44.108 49.790 37.148 1.00 0.00 A H -ATOM 22 C1 UNK A 454 42.146 49.054 37.737 1.00 0.00 A C -ATOM 23 C5 UNK A 454 42.675 48.761 39.003 1.00 0.00 A C -ATOM 24 C10 UNK A 454 41.859 48.278 39.998 1.00 0.00 A C -ATOM 25 H8 UNK A 454 42.284 48.099 40.981 1.00 0.00 A H -ATOM 26 H3 UNK A 454 43.752 48.806 39.135 1.00 0.00 A H -ATOM 27 C3 UNK A 454 40.774 48.885 37.463 1.00 0.00 A C -ATOM 28 H1 UNK A 454 40.310 49.079 36.500 1.00 0.00 A H -ATOM 29 C8 UNK A 454 39.907 48.435 38.509 1.00 0.00 A C -ATOM 30 H6 UNK A 454 38.833 48.310 38.406 1.00 0.00 A H -ATOM 31 C12 UNK A 454 40.466 48.125 39.823 1.00 0.00 A C -ATOM 32 C15 UNK A 454 39.627 47.605 40.833 1.00 0.00 A C -ATOM 33 N3 UNK A 454 38.981 47.235 41.740 1.00 0.00 A N """ - - -@pytest.fixture -def input_pdb_filename(tmp_path): - input_pdb_filename = tmp_path / "input.pdb" - with open(input_pdb_filename, "w") as f: - f.write( - """ATOM 1 N SPC A 101 43.493 48.319 35.835 1.00 0.00 A N -ATOM 2 O TIP3 A 102 44.740 47.862 35.697 1.00 0.00 A O -ATOM 3 C * A 103 44.608 46.866 34.829 1.00 0.00 A C -ATOM 4 H * A 104 43.265 46.644 34.450 1.00 0.00 A H -ATOM 5 O WAT A 105 42.607 47.556 35.077 1.00 0.00 A O -ATOM 6 H1 SPC A 106 41.542 47.701 34.954 1.00 0.00 A H -ATOM 7 H2 * A 107 45.308 46.132 34.453 1.00 0.00 A H -ATOM 8 C T3P A 108 43.168 49.513 36.656 1.00 0.00 A C -ATOM 9 O T4P A 109 42.743 50.705 35.818 1.00 0.00 A O -ATOM 10 H T5P A 110 43.545 51.052 34.671 1.00 0.00 A H -ATOM 11 N * A 111 43.171 52.151 33.897 1.00 0.00 A N -ATOM 12 C SPC A 112 42.090 52.924 34.222 1.00 0.00 A C -ATOM 13 O * A 113 41.393 52.671 35.378 1.00 0.00 A O -ATOM 14 C TIP4 A 114 41.793 51.635 36.268 1.00 0.00 A C -ATOM 15 O * A 115 41.220 51.358 37.148 1.00 0.00 A O -ATOM 16 H * A 116 40.518 53.291 35.552 1.00 0.00 A H -ATOM 17 C * A 117 41.790 54.079 33.432 1.00 0.00 A C -ATOM 18 N * A 118 41.594 54.934 32.652 1.00 0.00 A N -ATOM 19 H * A 119 43.694 52.248 32.951 1.00 0.00 A H -ATOM 20 H * A 120 44.333 50.369 34.369 1.00 0.00 A H -ATOM 21 H * A 121 44.108 49.790 37.148 1.00 0.00 A H -ATOM 22 C * A 122 42.146 49.054 37.737 1.00 0.00 A C -ATOM 23 C * A 123 42.675 48.761 39.003 1.00 0.00 A C -ATOM 24 C * A 124 41.859 48.278 39.998 1.00 0.00 A C """ - ) - - -def test_process_pdb_file(): - # Define the input and output file paths - original_cwd = Path(os.getcwd()) - input_pdb_filename = test_data_directory / "0_unk_hoh.pdb" - - shutil.copy(str(input_pdb_filename), ".") - - # Process the provided PDB file - process_pdb_file(input_pdb_filename) - - # Read the modified output PDB file - with open(input_pdb_filename, "r") as f: - modified_data = f.read() - - # Check if the modified data contains the expected residues - assert "HOH" in modified_data - assert "UNK" in modified_data - - -def test_renumber_atoms_in_residues(sample_pdb_data, tmp_path): - input_pdb_filename = tmp_path / "input.pdb" - output_pdb_filename = tmp_path / "output.pdb" - - # Create a mock PDB file - input_pdb_filename.write_text( - """ATOM 1 N UNK A 454 43.493 48.319 35.835 1.00 0.00 A N -ATOM 2 N1 UNK A 454 44.740 47.862 35.697 1.00 0.00 A N -ATOM 3 C14 UNK A 454 44.608 46.866 34.829 1.00 0.00 A C -ATOM 4 N2 UNK A 454 43.265 46.644 34.450 1.00 0.00 A N -ATOM 5 C7 UNK A 454 42.607 47.556 35.077 1.00 0.00 A C -ATOM 6 H5 UNK A 454 41.542 47.701 34.954 1.00 0.00 A H -ATOM 7 H10 UNK A 454 45.308 46.132 34.453 1.00 0.00 A H -ATOM 8 C UNK A 454 43.168 49.513 36.656 1.00 0.00 A C -ATOM 9 C2 UNK A 454 42.743 50.705 35.818 1.00 0.00 A C -ATOM 10 C4 UNK A 454 43.545 51.052 34.671 1.00 0.00 A C -ATOM 11 C9 UNK A 454 43.171 52.151 33.897 1.00 0.00 A C -ATOM 12 C13 UNK A 454 42.090 52.924 34.222 1.00 0.00 A C -ATOM 13 C11 UNK A 454 41.393 52.671 35.378 1.00 0.00 A C -ATOM 14 C6 UNK A 454 41.793 51.635 36.268 1.00 0.00 A C -ATOM 15 H4 UNK A 454 41.220 51.358 37.148 1.00 0.00 A H -ATOM 16 H9 UNK A 454 40.518 53.291 35.552 1.00 0.00 A H -ATOM 17 C16 UNK A 454 41.790 54.079 33.432 1.00 0.00 A C -ATOM 18 N4 UNK A 454 41.594 54.934 32.652 1.00 0.00 A N -ATOM 19 H7 UNK A 454 43.694 52.248 32.951 1.00 0.00 A H -ATOM 20 H2 UNK A 454 44.333 50.369 34.369 1.00 0.00 A H -ATOM 21 H UNK A 454 44.108 49.790 37.148 1.00 0.00 A H -ATOM 22 C1 UNK A 454 42.146 49.054 37.737 1.00 0.00 A C -ATOM 23 C5 UNK A 454 42.675 48.761 39.003 1.00 0.00 A C -ATOM 24 C10 UNK A 454 41.859 48.278 39.998 1.00 0.00 A C -ATOM 25 H8 UNK A 454 42.284 48.099 40.981 1.00 0.00 A H -ATOM 26 H3 UNK A 454 43.752 48.806 39.135 1.00 0.00 A H -ATOM 27 C3 UNK A 454 40.774 48.885 37.463 1.00 0.00 A C -ATOM 28 H1 UNK A 454 40.310 49.079 36.500 1.00 0.00 A H -ATOM 29 C8 UNK A 454 39.907 48.435 38.509 1.00 0.00 A C -ATOM 30 H6 UNK A 454 38.833 48.310 38.406 1.00 0.00 A H -ATOM 31 C12 UNK A 454 40.466 48.125 39.823 1.00 0.00 A C -ATOM 32 C15 UNK A 454 39.627 47.605 40.833 1.00 0.00 A C -ATOM 33 N3 UNK A 454 38.981 47.235 41.740 1.00 0.00 A N""" - ) - - renumber_atoms_in_residues(str(input_pdb_filename), str(output_pdb_filename), "UNK") - assert output_pdb_filename.exists() - - -@pytest.fixture -def sample_pdb_info(): - return """ -ATOM 741 N UNK A 454 43.056 48.258 36.260 1.00 0.00 LIG X -ATOM 742 N1 UNK A 454 44.324 47.906 35.996 1.00 0.00 LIG X -ATOM 743 C14 UNK A 454 44.132 46.990 35.061 1.00 0.00 LIG X - """ - - -def test_process_pdb(sample_pdb_info): - with tempfile.NamedTemporaryFile(mode="w+", delete=False) as temp_file: - temp_filename = temp_file.name - temp_file.write(sample_pdb_info) - - print("Temp Data:") - print(temp_filename) - output_filename = "output_pdb_test.pdb" - process_pdb(temp_filename, output_filename) - - with open(output_filename, "r") as f: - modified_data = f.read() - - print("Modified Data:") - print(modified_data) - - assert " LIG N" in modified_data - assert " LIG C" in modified_data - assert " LIG X" not in modified_data - - # Clean up temporary and output files - os.remove(temp_filename) - os.remove(output_filename) - - -def test_extract_and_save_ligand_as_sdf(): - input_pdb_filename = topology_metal - output_filename = "ligand_changed.sdf" - target_resname = ligand_resname - - extract_and_save_ligand_as_sdf(input_pdb_filename, output_filename, target_resname) - - assert output_filename is not None - os.remove("ligand_changed.sdf") - - -test_data_directory = Path("openmmdl/tests/data/in") -TEST_LIGAND_FILE = f"{test_data_directory}/CVV.sdf" -TEST_OUTPUT_FILE = "CVV.smi" - - -def test_increase_ring_indices(): - # Test case 1: Check if ring indices are correctly increased - ring = [1, 2, 3] - lig_index = 10 - result = increase_ring_indices(ring, lig_index) - assert result == [11, 12, 13] - - # Test case 2: Check with a different lig_index - ring = [3, 4, 5] - lig_index = 20 - result = increase_ring_indices(ring, lig_index) - assert result == [23, 24, 25] - - -def test_convert_ligand_to_smiles(): - # Convert the ligand structure to SMILES in the same directory as the input SDF file - convert_ligand_to_smiles(TEST_LIGAND_FILE, TEST_OUTPUT_FILE) - - # Verify that the output SMILES file was created in the same directory as the input file - assert os.path.exists(TEST_OUTPUT_FILE) - - # Optionally, you can also read and validate the content of the output SMILES file - with open(TEST_OUTPUT_FILE, "r") as smi_file: - smiles_lines = smi_file.readlines() - assert len(smiles_lines) > 0 # Check that there are SMILES representations diff --git a/openmmdl/tests/openmmdl_analysis/test_rdkit_figure_generation.py b/openmmdl/tests/openmmdl_analysis/test_rdkit_figure_generation.py deleted file mode 100644 index 574f8a7f..00000000 --- a/openmmdl/tests/openmmdl_analysis/test_rdkit_figure_generation.py +++ /dev/null @@ -1,288 +0,0 @@ -import pytest -import os -import time -import shutil -from PIL import Image -from pathlib import Path -from openmmdl.openmmdl_analysis.rdkit_figure_generation import * - -test_data_directory = Path( - "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation" -) -test_data_directory_files = Path("openmmdl/tests/data/in") -lig_no_h = test_data_directory_files / "lig_no_h.pdb" -complex = test_data_directory_files / "complex.pdb" -smi_file = test_data_directory_files / "lig_no_h.smi" -current_directory = os.getcwd() -output_path = "all_binding_modes_arranged.png" - -shutil.copy(str(lig_no_h), ".") -shutil.copy(str(complex), ".") - - -@pytest.mark.parametrize( - "input_data, expected_output", - [ - ( - ["60GLUA_4206_4207_4216_4217_4218_4205_hydrophobic"], - ["60GLUA 4206 4207 4216 4217 4218 4205 hydrophobic"], - ), - (["165ASPA_4203_Acceptor_hbond"], ["165ASPA 4203 Acceptor hbond"]), - (["125TYRA_4192_Acceptor_waterbridge"], ["125TYRA 4192 Acceptor waterbridge"]), - ], -) -def test_split_interaction_data(input_data, expected_output): - result = split_interaction_data(input_data) - assert result == expected_output - - -def test_highlight_numbers(): - # Input data - split_data = [ - "163GLYA 4202 Acceptor hbond", - "165ASPA 4203 Donor hbond", - "165ASPA 4222 Donor hbond", - "165ASPA 4203 Acceptor hbond", - "125TYRA 4192 Acceptor waterbridge", - "165ASPA 4222 Donor waterbridge", - "161PHEA 4211 4212 4213 4214 4215 4210 hydrophobic", - "59ARGA 4205 4206 4207 4216 4217 4218 Aromatic pication", - "155PHEA 4205 4206 4207 4216 4217 4218 pistacking", - "59ARGA 4194 F halogen", - "166ARGA 4202,4203 Carboxylate NI saltbridge", - "165ASPA 4202 Amine PI saltbridge", - "HEM 4202 FE 4 metal", - ] - - starting_idx = 1 # Updated starting index - - result = highlight_numbers(split_data, starting_idx) - - ( - highlighted_hbond_donor, - highlighted_hbond_acceptor, - highlighted_hbond_both, - highlighted_hydrophobic, - highlighted_waterbridge, - highlighted_pistacking, - highlighted_halogen, - highlighted_ni, - highlighted_pi, - highlighted_pication, - highlighted_metal, - ) = result - - assert highlighted_hbond_donor is not None - assert highlighted_hbond_acceptor is not None - assert highlighted_hbond_both is not None - assert highlighted_hydrophobic is not None - assert highlighted_waterbridge is not None - assert highlighted_halogen is not None - assert highlighted_ni is not None - assert highlighted_pi is not None and len(highlighted_pi) > 0 - assert highlighted_pication is not None - assert highlighted_metal is not None - - -def test_update_dict(): - # Test case 1: Check if the target dictionary is updated correctly - target_dict = {1: "1", 2: "2"} - source_dict = {3: "3", 4: "4"} - update_dict(target_dict, source_dict) - assert target_dict == {1: "1", 2: "2", 3: "3", 4: "4"} - - # Test case 2: Check if the function handles multiple source dictionaries - target_dict = {} - source_dict1 = {1: "1"} - source_dict2 = {2: "2", 3: "3"} - update_dict(target_dict, source_dict1, source_dict2) - assert target_dict == {1: "1", 2: "2", 3: "3"} - - # Test case 3: Check if the function handles empty source dictionaries - target_dict = {1: "1", 2: "2"} - update_dict(target_dict) # No source dictionaries provided - assert target_dict == {1: "1", 2: "2"} - - -def test_generate_interaction_dict(): - # Test with a known interaction type 'hydrophobic' - interaction_type = "hydrophobic" - keys = [1, 2, 3] - expected_result = {1: (1.0, 1.0, 0.0), 2: (1.0, 1.0, 0.0), 3: (1.0, 1.0, 0.0)} - result = generate_interaction_dict(interaction_type, keys) - assert result == expected_result - - -def test_create_and_merge_images_with_split_data(): - # Define test data - binding_mode = "Binding_Mode_1" - occurrence_percent = 92 - split_data = [ - "166ARGA 4220,4221 Carboxylate NI saltbridge", - "161PHEA 4221 Acceptor hbond", - "207ILEA 4205 4206 4207 4208 4209 4204 hydrophobic", - ] - merged_image_paths = [] - - # Define source image paths - source_image_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_1.png" - source_svg_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_1.svg" - source_merged_image_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_1_merged.png" - - # Copy source image files to the working directory - working_directory = os.getcwd() - destination_image_path = os.path.join( - working_directory, os.path.basename(source_image_path) - ) - destination_svg_path = os.path.join( - working_directory, os.path.basename(source_svg_path) - ) - destination_merged_image_path = os.path.join( - working_directory, os.path.basename(source_merged_image_path) - ) - shutil.copy(source_image_path, destination_image_path) - shutil.copy(source_svg_path, destination_svg_path) - shutil.copy(source_merged_image_path, destination_merged_image_path) - - # Print the current files in the working directory for debugging - files_in_working_directory = os.listdir(working_directory) - print("Files in Working Directory before:", files_in_working_directory) - - # Run the function - merged_image_paths = create_and_merge_images( - binding_mode, occurrence_percent, split_data, merged_image_paths - ) - - # Print the current files in the working directory for debugging - files_in_working_directory = os.listdir(working_directory) - print("Files in Working Directory after:", files_in_working_directory) - - # Check if the merged image file was created - assert len(merged_image_paths) == 1 - - # Check if the merged image file is a valid image - merged_image_path = merged_image_paths[0] - try: - with Image.open(merged_image_path) as img: - img.verify() - except Exception as e: - pytest.fail(f"Merged image file is not a valid image: {e}") - - -def test_max_width_and_height_calculation(): - # Create some example images with different sizes - image1 = Image.new("RGB", (100, 200), (255, 255, 255)) - image2 = Image.new("RGB", (150, 250), (255, 255, 255)) - merged_images = [image1, image2] - - # Calculate the maximum width and height - max_width = max(image.size[0] for image in merged_images) - max_height = max(image.size[1] for image in merged_images) - - # Assert the calculated max_width and max_height - assert max_width == 150 - assert max_height == 250 - - -def test_big_figure_creation(): - # Create example merged images - image1 = Image.new("RGB", (100, 200), (255, 255, 255)) - image2 = Image.new("RGB", (150, 250), (255, 255, 255)) - merged_images = [image1, image2] - - # Calculate the maximum width and height - max_width = max(image.size[0] for image in merged_images) - max_height = max(image.size[1] for image in merged_images) - - # Determine the number of images per row (in your case, 2 images per row) - images_per_row = 2 - - # Calculate the number of rows and columns required - num_rows = (len(merged_images) + images_per_row - 1) // images_per_row - total_width = max_width * images_per_row - total_height = max_height * num_rows - - # Create a new image with the calculated width and height - big_figure = Image.new( - "RGB", (total_width, total_height), (255, 255, 255) - ) # Set background to white - - # Assert the dimensions of the created big_figure - assert big_figure.size == (300, 250) # Width should be 300, height should be 250 - - -def test_arranged_figure_generation(): - binding_mode1_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_1_merged.png" - binding_mode2_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_2_merged.png" - all_modes_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/all_binding_modes_arranged.png" - working_directory = os.getcwd() - - # Print the working directory to verify it's as expected - print("Working Directory:", working_directory) - - destination_path_1 = os.path.join( - working_directory, os.path.basename(binding_mode1_path) - ) - destination_path_2 = os.path.join( - working_directory, os.path.basename(binding_mode2_path) - ) - destination_path_all = os.path.join( - working_directory, os.path.basename(all_modes_path) - ) - - # Print the destination paths to verify they are constructed correctly - print("Destination Path 1:", destination_path_1) - print("Destination Path 2:", destination_path_2) - print("Destination Path All:", destination_path_all) - - shutil.copy(binding_mode1_path, destination_path_1) - shutil.copy(binding_mode2_path, destination_path_2) - shutil.copy(all_modes_path, destination_path_all) - - merged_image_paths = ["Binding_Mode_1_merged.png", "Binding_Mode_2_merged.png"] - output_path = "all_binding_modes_arranged.png" - output_path = os.path.join(working_directory, output_path) - print(output_path) - - # Run the function - arranged_figure_generation(merged_image_paths, output_path) - print(output_path) - - # Print the current files in the working directory for debugging - files_in_working_directory = os.listdir(working_directory) - print("Files in Working Directory:", files_in_working_directory) - - output_path = os.path.join( - working_directory, - "Binding_Modes_Markov_States", - "all_binding_modes_arranged.png", - ) - print(output_path) - - # Check if the output file was created - - assert output_path is not None - - -output_image_file = "output_image.png" - -# Copy the files to the current folder -shutil.copy(complex, Path.cwd()) -shutil.copy(lig_no_h, Path.cwd()) -shutil.copy(smi_file, Path.cwd()) - - -# Test the generate_ligand_image function -def test_generate_ligand_image(): - ligand_name = "UNK" - generate_ligand_image( - ligand_name, "complex.pdb", "lig_no_h.pdb", "lig_no_h.smi", output_image_file - ) - - # Assert that the output image file exists - assert os.path.exists(output_image_file) - - -# Run the tests -if __name__ == "__main__": - pytest.main() diff --git a/openmmdl/tests/openmmdl_analysis/test_rmsd_calculation.py b/openmmdl/tests/openmmdl_analysis/test_rmsd_calculation.py deleted file mode 100644 index 3bae0d7d..00000000 --- a/openmmdl/tests/openmmdl_analysis/test_rmsd_calculation.py +++ /dev/null @@ -1,71 +0,0 @@ -import os -import pytest -import contextlib -from pathlib import Path -import pandas as pd -import numpy as np -import mdtraj as md - -from openmmdl.openmmdl_analysis.rmsd_calculation import ( - rmsd_for_atomgroups, - RMSD_dist_frames, -) - -test_data_directory = Path("openmmdl/tests/data/in") -topology_file = f"{test_data_directory}/0_unk_hoh.pdb" -trajectory_file = f"{test_data_directory}/all_50.dcd" -fig_type = "png" -selection1 = "protein" -selection2 = ("resname UNK", "") -ligand_name = "UNK" - - -def test_rmsd_for_atomgroups(): - - # Call the function - rmsd_df = rmsd_for_atomgroups( - topology_file, trajectory_file, fig_type, selection1, selection2 - ) - - # Check if the output DataFrame has the correct structure - assert isinstance(rmsd_df, pd.DataFrame) - assert rmsd_df.index.name == "frame" - - # Define file paths - csv_path = os.path.join("RMSD", "RMSD_over_time.csv") - plot_path = os.path.join("RMSD", "RMSD_over_time.png") - - print("Checking CSV file:", csv_path) - # Check if the CSV file exists - assert os.path.exists(csv_path), f"CSV file does not exist at {csv_path}" - - print("Checking plot file:", plot_path) - # Check if the plot file exists - assert os.path.exists(plot_path), f"Plot file does not exist at {plot_path}" - - # Cleanup created files after the test - os.remove(csv_path) - os.remove(plot_path) - - -def test_rmsd_dist_frames(): - - # Call the function - pairwise_rmsd_prot, pairwise_rmsd_lig = RMSD_dist_frames( - topology_file, trajectory_file, fig_type, ligand_name - ) - - # Check if the function returns numpy arrays for pairwise RMSD - assert isinstance(pairwise_rmsd_prot, np.ndarray) - assert isinstance(pairwise_rmsd_lig, np.ndarray) - - # Define file paths - plot_path = "./RMSD/RMSD_between_the_frames.png" - - print("Checking plot file:", plot_path) - # Check if the plot file exists - assert os.path.exists(plot_path), f"Plot file does not exist at {plot_path}" - - # Cleanup created files after the test - with contextlib.suppress(FileNotFoundError): - os.remove(plot_path) diff --git a/openmmdl/tests/openmmdl_analysis/visualization_functions_test.py b/openmmdl/tests/openmmdl_analysis/visualization_functions_test.py deleted file mode 100644 index 0b5e2aec..00000000 --- a/openmmdl/tests/openmmdl_analysis/visualization_functions_test.py +++ /dev/null @@ -1,239 +0,0 @@ -import numpy as np -import pandas as pd -import re -import shutil -import subprocess -import os -from pathlib import Path -import matplotlib.pyplot as plt -from unittest.mock import patch, Mock -import pytest -from openmmdl.openmmdl_analysis.visualization_functions import * - -test_data_directory_files = Path("openmmdl/tests/data/in") -clouds = test_data_directory_files / "clouds.json" -waters_pdb = test_data_directory_files / "interacting_waters.pdb" -waters_dcd = test_data_directory_files / "interacting_waters.dcd" -waters_pkl = test_data_directory_files / "interacting_waters.pkl" - - -# visualization_functions tests -@pytest.fixture -def sample_dataframe_interacting_water_ids(): - data = { - "Interaction1": [0, 1, 0, 1, 0], - "Interaction2": [1, 0, 0, 0, 1], - "WATER_IDX": [101, 102, None, 104, 105], - "FRAME": [1, 2, 3, 4, 5], - } - df_all = pd.DataFrame(data) - return df_all - - -def test_interacting_water_ids(sample_dataframe_interacting_water_ids): - waterbridge_interactions = ["Interaction1", "Interaction2"] - - result = interacting_water_ids( - sample_dataframe_interacting_water_ids, waterbridge_interactions - ) - - expected_interacting_waters = [101, 102, 104, 105] - - assert sorted(result) == sorted(expected_interacting_waters) - - -@pytest.fixture -def sample_dataframe_cloud_json_generation(): - data = { - "LIGCOO": [ - "(1.0, 2.0, 3.0)", - "(4.0, 5.0, 6.0)", - "(7.0, 8.0, 9.0)", - ], - "INTERACTION": [ - "hydrophobic", - "acceptor", - "donor", - ], - "PROTISDON": [ - "False", - "True", - "False", - ], - "PROTISPOS": [ - "False", - "False", - "True", - ], - } - df_all = pd.DataFrame(data) - return df_all - - -def test_run_visualization(): - # Set up the paths - package_path = Path("openmmdl/openmmdl_analysis") - notebook_path = package_path / "visualization.ipynb" - - # Run the visualization function - # run_visualization() - - # Check if the notebook was copied to the current directory with the correct name - copied_notebook_path = os.path.join(os.getcwd(), "visualization.ipynb") - shutil.copy(str(notebook_path), ".") - new_notebook_path = "visualization.ipynb" - assert os.path.isfile(copied_notebook_path) - - # Check if the content of the copied notebook is the same as the original notebook - with open(new_notebook_path, "r") as copied_notebook: - with open(notebook_path, "r") as original_notebook: - assert copied_notebook.read() == original_notebook.read() - - -@pytest.fixture -def sample_dataframe(): - # Create a sample dataframe for testing - data = { - "LIGCOO": [ - "(1.0, 2.0, 3.0)", - "(4.0, 5.0, 6.0)", - "(13.0, 14.0, 15.0)", - "(16.0, 17.0, 18.0)", - "(19.0, 20.0, 21.0)", - ], - "INTERACTION": ["hydrophobic", "acceptor", "donor", "pistacking", "pication"], - "PROTISDON": ["False", "True", "True", "False", "True"], - "PROTISPOS": ["False", "True", "False", "False", "False"], - "TARGETCOO": [ - "(7.0, 8.0, 9.0)", - "(10.0, 11.0, 12.0)", - "(22.0, 23.0, 24.0)", - "(25.0, 26.0, 27.0)", - "(28.0, 29.0, 30.0)", - ], - } - - # Extend the existing dataframe with examples for additional interactions - additional_data = [ - ("(31.0, 32.0, 33.0)", "waterbridge", "True", "False", "(34.0, 35.0, 36.0)"), - ( - "(37.0, 38.0, 39.0)", - "negative_ionizable", - "False", - "True", - "(40.0, 41.0, 42.0)", - ), - ( - "(43.0, 44.0, 45.0)", - "positive_ionizable", - "False", - "True", - "(46.0, 47.0, 48.0)", - ), - ("(49.0, 50.0, 51.0)", "halogen", "False", "False", "(52.0, 53.0, 54.0)"), - ("(55.0, 56.0, 57.0)", "metal", "False", "False", "(58.0, 59.0, 60.0)"), - ("(61.0, 62.0, 63.0)", "hydrophobic", "False", "False", "(64.0, 65.0, 66.0)"), - ("(61.0, 62.0, 63.0)", "saltbridge", "False", "True", "(64.0, 65.0, 66.0)"), - ("(61.0, 62.0, 63.0)", "saltbridge", "False", "False", "(64.0, 65.0, 66.0)"), - ("(67.0, 68.0, 69.0)", "donor", "True", "False", "(70.0, 71.0, 72.0)"), - ("(73.0, 74.0, 75.0)", "acceptor", "False", "False", "(76.0, 77.0, 78.0)"), - ( - "(79.0, 80.0, 81.0)", - "negative_ionizable", - "False", - "True", - "(82.0, 83.0, 84.0)", - ), - ] - - for row in additional_data: - data["LIGCOO"].append(row[0]) - data["INTERACTION"].append(row[1]) - data["PROTISDON"].append(row[2]) - data["PROTISPOS"].append(row[3]) - data["TARGETCOO"].append(row[4]) - - return pd.DataFrame(data) - - -def test_cloud_json_generation(sample_dataframe): - result = cloud_json_generation(sample_dataframe) - - assert "hydrophobic" in result - assert "acceptor" in result - assert "donor" in result - assert "waterbridge" in result - assert "negative_ionizable" in result - assert "positive_ionizable" in result - assert "pistacking" in result - assert "pication" in result - assert "halogen" in result - assert "metal" in result - - # Add more specific assertions based on your expectations for the output - # For example, you might want to check the structure of the generated dictionary - assert isinstance(result["hydrophobic"], dict) - assert "coordinates" in result["hydrophobic"] - assert "color" in result["hydrophobic"] - assert "radius" in result["hydrophobic"] - - # Add more tests based on your specific requirements and expected results - - -@pytest.fixture -def input_paths(): - test_data_directory = Path("openmmdl/tests/data/in") - topology_file = f"{test_data_directory}/metal_top.pdb" - frame_file = f"{test_data_directory}/processing_frame_1.pdb" - topology_metal = f"{test_data_directory}/metal_top.pdb" - trajetory_metal = f"{test_data_directory}/metal_traj_25.dcd" - return topology_metal, trajetory_metal - - -def test_save_interacting_waters_trajectory(input_paths): - topology_metal, trajetory_metal = input_paths - interacting_waters = [588, 733, 1555, 2000, 1266] - ligname = "UNK" - special = "HEM" - outputpath = "./" - - save_interacting_waters_trajectory( - topology_metal, - trajetory_metal, - interacting_waters, - ligname, - special, - outputpath, - ) - - interacting_water_pdb = "interacting_waters.pdb" - interacting_water_dcd = "interacting_waters.dcd" - assert interacting_water_pdb is not None - assert interacting_water_dcd is not None - assert os.path.exists(f"{outputpath}interacting_waters.pdb") - assert os.path.exists(f"{outputpath}interacting_waters.dcd") - - # Add additional assertions or checks as needed - # For example, you can use MDAnalysis to check if the saved files contain the expected number of atoms. - - # Cleanup: Remove the created files after the test - os.remove(f"{outputpath}interacting_waters.pdb") - os.remove(f"{outputpath}interacting_waters.dcd") - - -def test_visualization(): - shutil.copy(str(clouds), ".") - shutil.copy(str(waters_pdb), ".") - shutil.copy(str(waters_dcd), ".") - shutil.copy(str(waters_pkl), ".") - ligand_name = "LET" - receptor_type = "protein" - height = "1000px" - width = "1000px" - - # Call the function with sample data - result = visualization(ligand_name, receptor_type, height, width) - - # Perform assertions based on the expected outcome - assert result is not None - # Add more assertions based on your specific requirements