From c63bb99bb72b1754b5cbc8252946c1c263095db8 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Sat, 14 Dec 2024 17:46:54 +0100 Subject: [PATCH 1/8] Delete openmmdl/tests/analysis directory --- .../markov_state_figure_generation_test.py | 35 ------------------- 1 file changed, 35 deletions(-) delete mode 100644 openmmdl/tests/analysis/markov_state_figure_generation_test.py diff --git a/openmmdl/tests/analysis/markov_state_figure_generation_test.py b/openmmdl/tests/analysis/markov_state_figure_generation_test.py deleted file mode 100644 index 98465aa3..00000000 --- a/openmmdl/tests/analysis/markov_state_figure_generation_test.py +++ /dev/null @@ -1,35 +0,0 @@ -import networkx as nx -import os -from openmmdl.openmmdl_analysis.markov_state_figure_generation import ( - min_transition_calculation, - binding_site_markov_network, -) - - -# Create a test for min_transition_calculation -def test_min_transition_calculation(): - min_transition = 10 - expected_output = [10, 20, 50, 100] - result = min_transition_calculation(min_transition) - assert result == expected_output - - -# Create a test for binding_site_markov_network -def test_binding_site_markov_network(): - # Define test data - total_frames = 1000 - min_transitions = [5, 10] - combined_dict = {"all": ["A", "B", "A", "C", "B", "A", "C", "A", "A", "B"]} - fig_type = "png" - - # Run the function - binding_site_markov_network(total_frames, min_transitions, combined_dict, fig_type) - - # Check if the output file exists for each min_transition - for min_transition_percent in min_transitions: - plot_filename = f"markov_chain_plot_{min_transition_percent}.png" - plot_path = os.path.join("Binding_Modes_Markov_States", plot_filename) - assert os.path.exists(plot_path) - - -# Optionally, you can include more test cases to cover different scenarios and edge cases. From 245a4446205a1b2d613292aa82f8b423293470a4 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Sat, 14 Dec 2024 17:47:05 +0100 Subject: [PATCH 2/8] Delete openmmdl/tests/openmmdl_simulation directory --- .../cleaning_procedures_test.py | 187 --------------- .../forcefield_water_test.py | 223 ------------------ .../protein_ligand_prep_test.py | 173 -------------- .../test_post_md_conversions.py | 115 --------- 4 files changed, 698 deletions(-) delete mode 100644 openmmdl/tests/openmmdl_simulation/cleaning_procedures_test.py delete mode 100644 openmmdl/tests/openmmdl_simulation/forcefield_water_test.py delete mode 100644 openmmdl/tests/openmmdl_simulation/protein_ligand_prep_test.py delete mode 100644 openmmdl/tests/openmmdl_simulation/test_post_md_conversions.py diff --git a/openmmdl/tests/openmmdl_simulation/cleaning_procedures_test.py b/openmmdl/tests/openmmdl_simulation/cleaning_procedures_test.py deleted file mode 100644 index a3967ff3..00000000 --- a/openmmdl/tests/openmmdl_simulation/cleaning_procedures_test.py +++ /dev/null @@ -1,187 +0,0 @@ -import os -import shutil -import pytest -from pathlib import Path -from unittest.mock import mock_open, patch -from openmmdl.openmmdl_simulation.scripts.cleaning_procedures import ( - cleanup, - create_directory_if_not_exists, - post_md_file_movement, - copy_file, - create_directory_if_not_exists, - organize_files, -) - - -@pytest.fixture -def test_protein_name(): - return "test_protein" - - -@pytest.fixture -def test_directory_path(): - return "test_directory" - - -def test_cleanup(test_protein_name): - # Create a dummy file to be removed - with open(f"output_{test_protein_name}", "w") as dummy_file: - dummy_file.write("Dummy content") - - # Call the cleanup function - cleanup(test_protein_name) - - # Check if the file has been removed - assert not os.path.exists(f"output_{test_protein_name}") - - -def test_create_directory_if_not_exists(test_directory_path): - # Create a test directory - create_directory_if_not_exists(test_directory_path) - - # Check if the directory exists - assert os.path.exists(test_directory_path) - - # Call the function again, it should not raise an error - create_directory_if_not_exists(test_directory_path) - - # Cleanup: Remove the test directory - shutil.rmtree(test_directory_path) - assert not os.path.exists(test_directory_path) - - -@patch("os.path.exists") -@patch("shutil.copy") -def test_copy_file(mock_copy, mock_exists): - - src = "source_file.txt" - dest = "destination_directory" - - # Mock the os.path.exists to return True, indicating the source file exists - mock_exists.return_value = True - - # Call the copy_file function - copy_file(src, dest) - - # Check that os.path.exists was called with the source file - mock_exists.assert_called_with(src) - - # Check that shutil.copy was called with the source file and destination directory - mock_copy.assert_called_with(src, dest) - - -# Mock the os.path.exists and os.rename functions -@patch("os.path.exists") -@patch("os.rename") -def test_organize_files(mock_rename, mock_exists): - source = ["file1.txt", "file2.txt", "file3.txt"] - destination = "destination_directory" - - # Mock os.path.exists to return True for all source files - mock_exists.side_effect = [True] * len(source) - - # Call the organize_files function - organize_files(source, destination) - - # Print the calls made to os.rename - for call in mock_rename.call_args_list: - print(call) - - -# def test_post_md_file_movement(): -# # Get the absolute path to the test data directory -# test_data_directory = Path("openmmdl/tests/data/in") -# -# # Define the full path to the input files -# ligand = test_data_directory / 'CVV.sdf' -# protein_name = test_data_directory / '6b73.pdb' -# prmtop = test_data_directory / '6b73.prmtop' -# inpcrd = test_data_directory / '6b73.inpcrd' -# -# # Assert that the input files exist before moving -# assert os.path.exists(ligand) -# assert os.path.exists(protein_name) -# assert os.path.exists(prmtop) -# assert os.path.exists(inpcrd) -# -# # Call the post_md_file_movement function -# post_md_file_movement(protein_name, prmtop, inpcrd, ligand) -# -# # Check if the files have been organized and moved to the correct directories -# input_files_dir = Path("Input_Files") -# -# assert os.path.exists(input_files_dir) -# assert os.path.exists(input_files_dir / "6b73.pdb") -# assert os.path.exists(input_files_dir / "6b73.prmtop") -# assert os.path.exists(input_files_dir / "6b73.inpcrd") -# assert os.path.exists(input_files_dir / "CVV.sdf") - - -def test_post_md_file_movement(): - # Get the absolute path to the test data directory - test_data_directory = Path("openmmdl/tests/data/in") - - # Define the full path to the input files - ligand = test_data_directory / "CVV.sdf" - protein_name = test_data_directory / "6b73.pdb" - prmtop = test_data_directory / "6b73.prmtop" - inpcrd = test_data_directory / "6b73.inpcrd" - protein_no_solvent = test_data_directory / "prepared_no_solvent_6b73.pdb" - protein_solvent = test_data_directory / "solvent_padding_6b73.pdb" - protein_equilibration = test_data_directory / "Equilibration_6b73.pdb" - protein_minimization = test_data_directory / "Energyminimization_6b73.pdb" - output_pdb = test_data_directory / "output_6b73.pdb" - mdtraj_top = test_data_directory / "centered_old_coordinates_top.pdb" - prot_lig_top = test_data_directory / "prot_lig_top.pdb" - checkpoint = test_data_directory / "checkpoint.chk" - checkpoint_10x = test_data_directory / "10x_checkpoint.chk" - - # Assert that the input files exist before moving - assert os.path.exists(ligand) - assert os.path.exists(protein_name) - assert os.path.exists(prmtop) - assert os.path.exists(inpcrd) - assert os.path.exists(protein_no_solvent) - - shutil.copy(str(protein_no_solvent), ".") - shutil.copy(str(protein_solvent), ".") - shutil.copy(str(protein_equilibration), ".") - shutil.copy(str(protein_minimization), ".") - shutil.copy(str(output_pdb), ".") - shutil.copy(str(mdtraj_top), ".") - shutil.copy(str(prot_lig_top), ".") - shutil.copy(str(checkpoint), ".") - shutil.copy(str(checkpoint_10x), ".") - shutil.copy(str(protein_name), ".") - protein_name = "6b73.pdb" - - # Call the post_md_file_movement function - post_md_file_movement(str(protein_name), str(prmtop), str(inpcrd), [str(ligand)]) - - # Check if the files have been organized and moved to the correct directories - input_files_dir = Path("Input_Files") - md_files_dir = Path("MD_Files") - md_postprocessing_dir = Path("MD_Postprocessing") - final_output_dir = Path("Final_Output") - checkpoints_dir = Path("Checkpoints") - - assert os.path.exists(input_files_dir) - assert os.path.exists(md_files_dir / "Pre_MD") - assert os.path.exists(md_files_dir / "Pre_MD" / "prepared_no_solvent_6b73.pdb") - assert os.path.exists(md_files_dir / "Pre_MD" / "solvent_padding_6b73.pdb") - assert os.path.exists( - md_files_dir / "Minimization_Equilibration" / "Equilibration_6b73.pdb" - ) - assert os.path.exists( - md_files_dir / "Minimization_Equilibration" / "Energyminimization_6b73.pdb" - ) - assert os.path.exists(md_files_dir / "MD_Output" / "output_6b73.pdb") - assert os.path.exists(md_postprocessing_dir / "centered_old_coordinates_top.pdb") - assert os.path.exists(final_output_dir / "Prot_Lig" / "prot_lig_top.pdb") - assert os.path.exists(checkpoints_dir / "checkpoint.chk") - assert os.path.exists(checkpoints_dir / "10x_checkpoint.chk") - - -# Run the tests -if __name__ == "__main__": - pytest.main() diff --git a/openmmdl/tests/openmmdl_simulation/forcefield_water_test.py b/openmmdl/tests/openmmdl_simulation/forcefield_water_test.py deleted file mode 100644 index cd0967f9..00000000 --- a/openmmdl/tests/openmmdl_simulation/forcefield_water_test.py +++ /dev/null @@ -1,223 +0,0 @@ -import pytest -import simtk.openmm.app as app -from openff.toolkit.topology import Molecule -from openmmforcefields.generators import GAFFTemplateGenerator -from openmmdl.openmmdl_simulation.scripts.forcefield_water import ( - ff_selection, - water_forcefield_selection, - water_model_selection, - generate_forcefield, - generate_transitional_forcefield, -) - -# Replace 'your_module' with the actual name of the module containing your functions. - - -@pytest.fixture -def sample_rdkit_molecule(): - """ - A sample RDKit molecule for testing. - """ - from rdkit import Chem - - mol = Chem.MolFromSmiles("CCO") - return mol - - -def test_ff_selection(): - assert ff_selection("AMBER14") == "amber14-all.xml" - assert ff_selection("AMBER99SB") == "amber99sb.xml" - assert ff_selection("AMBER99SB-ILDN") == "amber99sbildn.xml" - assert ff_selection("AMBER03") == "amber03.xml" - assert ff_selection("AMBER10") == "amber10.xml" - assert ff_selection("CHARMM36") == "charmm36.xml" - assert ff_selection("NonexistentFF") is None - - -def test_water_forcefield_selection(): - # Test cases for 'amber14-all.xml' force field - assert water_forcefield_selection("TIP3P", "amber14-all.xml") == "amber14/tip3p.xml" - assert ( - water_forcefield_selection("TIP3P-FB", "amber14-all.xml") - == "amber14/tip3pfb.xml" - ) - assert water_forcefield_selection("SPC/E", "amber14-all.xml") == "amber14/spce.xml" - assert ( - water_forcefield_selection("TIP4P-Ew", "amber14-all.xml") - == "amber14/tip4pew.xml" - ) - assert ( - water_forcefield_selection("TIP4P-FB", "amber14-all.xml") - == "amber14/tip4pfb.xml" - ) - assert water_forcefield_selection("TIP5P", "amber14-all.xml") is None - assert water_forcefield_selection("NonexistentWater", "amber14-all.xml") is None - assert water_forcefield_selection("TIP3P", "NonexistentFF") is None - - # Test cases for 'charmm36.xml' force field - assert ( - water_forcefield_selection("CHARMM default", "charmm36.xml") - == "charmm36/water.xml" - ) - assert ( - water_forcefield_selection("TIP3P-PME-B", "charmm36.xml") - == "charmm36/tip3p-pme-b.xml" - ) - assert ( - water_forcefield_selection("TIP3P-PME-F", "charmm36.xml") - == "charmm36/tip3p-pme-f.xml" - ) - assert water_forcefield_selection("SPC/E", "charmm36.xml") == "charmm36/spce.xml" - assert ( - water_forcefield_selection("TIP4P-Ew", "charmm36.xml") == "charmm36/tip4pew.xml" - ) - assert ( - water_forcefield_selection("TIP4P-2005", "charmm36.xml") - == "charmm36/tip4p2005.xml" - ) - assert water_forcefield_selection("TIP5P", "charmm36.xml") == "charmm36/tip5p.xml" - assert ( - water_forcefield_selection("TIP5P-Ew", "charmm36.xml") == "charmm36/tip5pew.xml" - ) - assert water_forcefield_selection("NonexistentWater", "charmm36.xml") is None - assert water_forcefield_selection("NonexistentFF", "charmm36.xml") is None - - -def test_water_model_selection(): - assert water_model_selection("TIP3P", "amber99sb.xml") == "tip3p" - assert water_model_selection("TIP3P", "amber99sbildn.xml") == "tip3p" - assert water_model_selection("TIP3P", "amber03.xml") == "tip3p" - assert water_model_selection("TIP3P", "amber10.xml") == "tip3p" - - assert water_model_selection("SPC/E", "amber99sb.xml") == "spce" - assert water_model_selection("SPC/E", "amber99sbildn.xml") == "spce" - assert water_model_selection("SPC/E", "amber03.xml") == "spce" - assert water_model_selection("SPC/E", "amber10.xml") == "spce" - - assert water_model_selection("TIP4P-Ew", "amber99sb.xml") == "tip4pew" - assert water_model_selection("TIP4P-Ew", "amber99sbildn.xml") == "tip4pew" - assert water_model_selection("TIP4P-Ew", "amber03.xml") == "tip4pew" - assert water_model_selection("TIP4P-Ew", "amber10.xml") == "tip4pew" - - assert water_model_selection("TIP4P-FB", "amber99sb.xml") == "tip4pfb" - assert water_model_selection("TIP4P-FB", "amber99sbildn.xml") == "tip4pfb" - assert water_model_selection("TIP4P-FB", "amber03.xml") == "tip4pfb" - assert water_model_selection("TIP4P-FB", "amber10.xml") == "tip4pfb" - - assert water_model_selection("TIP5P", "amber99sb.xml") is None - assert water_model_selection("TIP5P", "amber99sbildn.xml") is None - assert water_model_selection("TIP5P", "amber03.xml") is None - assert water_model_selection("TIP5P", "amber10.xml") is None - assert ( - water_model_selection("TIP5P", "amber14-all.xml") is None - ) # Missing in the initial version - - assert water_model_selection("TIP3P", "amber14-all.xml") == "tip3p" - - assert water_model_selection("CHARMM default", "charmm36.xml") == "charmm" - assert water_model_selection("TIP3P-PME-B", "charmm36.xml") == "charmm" - assert water_model_selection("TIP3P-PME-F", "charmm36.xml") == "charmm" - assert water_model_selection("SPC/E", "charmm36.xml") == "charmm" - assert water_model_selection("TIP4P-Ew", "charmm36.xml") == "tip4pew" - assert water_model_selection("TIP4P-2005", "charmm36.xml") == "tip4pew" - assert water_model_selection("TIP5P", "charmm36.xml") == "tip5p" - assert water_model_selection("TIP5P-Ew", "charmm36.xml") == "tip5p" - - assert water_model_selection("TIP3P", "NonexistentFF") is None - - -def test_generate_forcefield_with_membrane(sample_rdkit_molecule): - forcefield = generate_forcefield( - "amber14-all.xml", "amber14/tip3p.xml", True, sample_rdkit_molecule - ) - assert isinstance(forcefield, app.ForceField) - # Add additional assertions specific to the case with a membrane - - -def test_generate_forcefield_without_membrane(sample_rdkit_molecule): - forcefield = generate_forcefield( - "amber14-all.xml", "amber14/tip3p.xml", False, sample_rdkit_molecule - ) - assert isinstance(forcefield, app.ForceField) - # Add additional assertions specific to the case without a membrane - - -def test_generate_forcefield_with_old_amber_forcefield(sample_rdkit_molecule): - forcefield = generate_forcefield( - "amber99sb.xml", "amber14/tip3p.xml", True, sample_rdkit_molecule - ) - assert isinstance(forcefield, app.ForceField) - # Add additional assertions specific to the case with an old Amber forcefield - - -def test_generate_forcefield_without_small_molecule(): - forcefield = generate_forcefield("amber14-all.xml", "amber14/tip3p.xml", False) - assert isinstance(forcefield, app.ForceField) - # Add additional assertions specific to the case without a small molecule - - -def test_generate_forcefield_membrane_logic(sample_rdkit_molecule): - forcefield_1 = generate_forcefield( - "amber10.xml", "tip3p.xml", True, sample_rdkit_molecule - ) - forcefield_2 = generate_forcefield( - "amber14-all.xml", "amber14/tip3p.xml", True, sample_rdkit_molecule - ) - forcefield_3 = generate_forcefield( - "amber14-all.xml", "amber14/tip3p.xml", False, sample_rdkit_molecule - ) - forcefield_4 = generate_forcefield( - "amber03.xml", "tip3p.xml", False, sample_rdkit_molecule - ) - - assert isinstance(forcefield_1, app.ForceField) - assert isinstance(forcefield_2, app.ForceField) - assert isinstance(forcefield_3, app.ForceField) - assert isinstance(forcefield_4, app.ForceField) - - # Additional tests for different force field combinations - forcefield_5 = generate_forcefield( - "amber14-all.xml", "tip3p.xml", True, sample_rdkit_molecule - ) - forcefield_6 = generate_forcefield( - "amber03.xml", "amber14/tip3p.xml", False, sample_rdkit_molecule - ) - - assert isinstance(forcefield_5, app.ForceField) - assert isinstance(forcefield_6, app.ForceField) - - # Additional tests for membrane flag logic - forcefield_7 = generate_forcefield( - "amber10.xml", "tip3p.xml", True, sample_rdkit_molecule - ) - forcefield_8 = generate_forcefield( - "amber14-all.xml", "tip3p.xml", False, sample_rdkit_molecule - ) - - assert isinstance(forcefield_7, app.ForceField) - assert isinstance(forcefield_8, app.ForceField) - - -def test_generate_transitional_forcefield(sample_rdkit_molecule): - transitional_forcefield = generate_transitional_forcefield( - "amber14-all.xml", "tip3p.xml", True, sample_rdkit_molecule - ) - assert isinstance(transitional_forcefield, app.ForceField) - - # Additional tests for different force field combinations - transitional_forcefield_2 = generate_transitional_forcefield( - "amber03.xml", "amber14/tip3p.xml", False, sample_rdkit_molecule - ) - assert isinstance(transitional_forcefield_2, app.ForceField) - - # Additional tests for membrane flag logic - transitional_forcefield_3 = generate_transitional_forcefield( - "amber14-all.xml", "tip3p.xml", False, sample_rdkit_molecule - ) - assert isinstance(transitional_forcefield_3, app.ForceField) - - # Additional tests for GAFF registration - transitional_forcefield_4 = generate_transitional_forcefield( - "amber14-all.xml", "tip3p.xml", True - ) - assert isinstance(transitional_forcefield_4, app.ForceField) diff --git a/openmmdl/tests/openmmdl_simulation/protein_ligand_prep_test.py b/openmmdl/tests/openmmdl_simulation/protein_ligand_prep_test.py deleted file mode 100644 index 78ae71f5..00000000 --- a/openmmdl/tests/openmmdl_simulation/protein_ligand_prep_test.py +++ /dev/null @@ -1,173 +0,0 @@ -import pytest -import os -import rdkit -from rdkit import Chem -import simtk.openmm.app as app -from simtk.openmm.app import PDBFile, Modeller -from simtk.openmm import unit -from simtk.openmm import Vec3 -import mdtraj as md -import numpy as np -import simtk -from pathlib import Path -import pdbfixer -from openmm.app import PDBFile -from pdbfixer import PDBFixer -import simtk.openmm.app as app - - -from simtk.openmm.app import ( - PDBFile, - Modeller, - PDBReporter, - StateDataReporter, - DCDReporter, - CheckpointReporter, -) -from simtk.openmm import unit, Platform, MonteCarloBarostat, LangevinMiddleIntegrator -from simtk.openmm import Vec3 -import simtk.openmm as mm - - -from openmmdl.openmmdl_simulation.scripts.forcefield_water import ( - ff_selection, - water_forcefield_selection, - water_model_selection, - generate_forcefield, - generate_transitional_forcefield, -) -from openmmdl.openmmdl_simulation.scripts.protein_ligand_prep import ( - prepare_ligand, - rdkit_to_openmm, - merge_protein_and_ligand, - water_padding_solvent_builder, - water_absolute_solvent_builder, - membrane_builder, - water_conversion, -) -from openmmdl.openmmdl_simulation.scripts.post_md_conversions import ( - mdtraj_conversion, - MDanalysis_conversion, -) - - -protein = "6b73.pdb" -ligand = "CVV.sdf" -ligand_name = "UNK" -minimization = False -sanitization = False -ff = "AMBER14" -water = "SPC/E" -add_membrane = False -Water_Box = "Buffer" -water_padding_distance = 1.0 -water_boxShape = "cube" -water_ionicstrength = 0.15 -water_positive_ion = "Na+" -water_negative_ion = "Cl-" - -water_box_x = 6.873 -water_box_y = 7.0 -water_box_z = 9.132 - -# Print current working directory -print("Current working directory:", os.getcwd()) - -# Assuming that 'test_data_directory' is properly defined in your test setup -test_data_directory = "openmmdl/tests/data/in" - - -test_data_directory = Path("openmmdl/tests/data/in") - - -# Define the full path to the input SDF file -TEST_LIGAND_FILE = f"{test_data_directory}/CVV.sdf" -TEST_MOL_FILE = f"{test_data_directory}/CVV.mol" -TEST_MOL2_FILE = f"{test_data_directory}/CVV.mol2" -TEST_PROTEIN = f"{test_data_directory}/6b73.pdb" - -protein_pdb = pdbfixer.PDBFixer(str(TEST_PROTEIN)) - - -ligand_prepared = prepare_ligand(TEST_LIGAND_FILE, minimize_molecule=minimization) -omm_ligand = rdkit_to_openmm(ligand_prepared, ligand_name) -forcefield_selected = ff_selection(ff) -water_selected = water_forcefield_selection( - water=water, forcefield_selection=ff_selection(ff) -) -model_water = water_model_selection(water=water, forcefield_selection=ff_selection(ff)) -forcefield = generate_forcefield( - protein_ff=forcefield_selected, - solvent_ff=water_selected, - add_membrane=add_membrane, - rdkit_mol=ligand_prepared, -) -complex_topology, complex_positions = merge_protein_and_ligand(protein_pdb, omm_ligand) -modeller = app.Modeller(complex_topology, complex_positions) - - -# Test the prepare_ligand function -def test_prepare_ligand(): - # Test the function with the sample ligand file. - rdkit_mol_sdf = prepare_ligand(TEST_LIGAND_FILE, minimize_molecule=False) - rdkit_mol_mol2_2 = prepare_ligand(TEST_MOL2_FILE, minimize_molecule=True) - rdkit_mol_mol = prepare_ligand(TEST_MOL_FILE, minimize_molecule=False) - rdkit_mol_mol2 = prepare_ligand(TEST_MOL2_FILE, minimize_molecule=False) - - # Add your assertions here to check if the preparation worked as expected - assert rdkit_mol_sdf is not None # Check if the result is not None - assert rdkit_mol_mol2_2 is not None # Check if the result is not None - assert rdkit_mol_mol is not None # Check if the result is not None - assert rdkit_mol_mol2 is not None # Check if the result is not None - - -def test_rdkit_to_openmm(): - omm_ligand = rdkit_to_openmm(ligand_prepared, ligand_name) - assert isinstance(omm_ligand, simtk.openmm.app.Modeller) - - -def test_merge_protein_and_ligand(): - complex_topology, complex_positions = merge_protein_and_ligand( - protein_pdb, omm_ligand - ) - assert complex_topology is not None - assert complex_positions is not None - - -def test_water_padding_solvent_builder(): - protein_buffer_solved = water_padding_solvent_builder( - model_water, - forcefield, - water_padding_distance, - protein_pdb, - modeller, - water_positive_ion, - water_negative_ion, - water_ionicstrength, - protein, - ) - assert protein_buffer_solved is not None - - -def test_water_absolute_solvent_builder(): - test_data_directory = Path("openmmdl/tests/data/in") - TEST_PROTEIN = f"{test_data_directory}/6b73.pdb" - protein_pdb = pdbfixer.PDBFixer(str(TEST_PROTEIN)) - protein_absolute_solved = water_absolute_solvent_builder( - model_water, - forcefield, - water_box_x, - water_box_y, - water_box_z, - protein_pdb, - modeller, - water_positive_ion, - water_negative_ion, - water_ionicstrength, - protein, - ) - assert protein_absolute_solved is not None - - -if __name__ == "__main__": - pytest.main() diff --git a/openmmdl/tests/openmmdl_simulation/test_post_md_conversions.py b/openmmdl/tests/openmmdl_simulation/test_post_md_conversions.py deleted file mode 100644 index 030f7304..00000000 --- a/openmmdl/tests/openmmdl_simulation/test_post_md_conversions.py +++ /dev/null @@ -1,115 +0,0 @@ -import pytest -import os -import shutil -from pathlib import Path -import mdtraj as md - -from openmmdl.openmmdl_simulation.scripts.post_md_conversions import ( - mdtraj_conversion, - MDanalysis_conversion, -) - -test_data_directory = Path("openmmdl/tests/data/in") -pdb_file = "0_unk_hoh.pdb" -dcd_file = "trajectory.dcd" -ligand_name = "UNK" - - -def test_mdtraj_conversion(): - original_cwd = os.getcwd() - os.chdir(test_data_directory) - # Create temporary directories to save the output files - output_file_dcd = "centered_old_coordinates.dcd" - output_file_xtc = "centered_old_coordinates.xtc" - output_file_pdb = "centered_old_coordinates_top.pdb" - output_file_gro = "centered_old_coordinates_top.gro" - - mdtraj_conversion(pdb_file, "gro_xtc") - mdtraj_conversion(pdb_file, "pdb_dcd") - - assert output_file_dcd is not None - assert output_file_xtc is not None - assert output_file_pdb is not None - assert output_file_gro is not None - os.chdir(original_cwd) - - -def test_mdanalysis_conversion(): - original_cwd = Path(os.getcwd()) - test_data_directory = Path("openmmdl/tests/data/in") - post_mdtraj_pdb_file = test_data_directory / "centered_old_coordinates_top.pdb" - post_mdtraj_dcd_file = test_data_directory / "centered_old_coordinates.dcd" - - # Create temporary directories to save the output files - all_file_dcd = "centered_traj.dcd" - all_file_dcd_unaligned = "centered_traj_unaligned.dcd" - all_file_pdb = "centered_top.pdb" - prot_lig_file_dcd = "prot_lig_traj.dcd" - prot_lig_file_dcd_unaligned = "prot_lig_traj_unaligned.dcd" - prot_lig_file_pdb = "prot_lig_top.pdb" - all_file_xtc = "centered_traj.xtc" - all_file_xtc_unaligned = "centered_traj_unaligned.xtc" - all_file_gro = "centered_top.gro" - prot_lig_file_xtc = "prot_lig_traj.xtc" - prot_lig_file_xtc_unaligned = "prot_lig_traj_unaligned.xtc" - prot_lig_file_gro = "prot_lig_top.gro" - - shutil.copy(str(post_mdtraj_pdb_file), ".") - shutil.copy(str(post_mdtraj_dcd_file), ".") - - post_mdtraj_pdb_file = "centered_old_coordinates_top.pdb" - post_mdtraj_dcd_file = "centered_old_coordinates.dcd" - ligand_name = "UNK" - mda_output = "pdb_dcd_gro_xtc" - output_selection = "mda_prot_lig_all" - - # MDanalysis_conversion(pdb_file, dcd_file, ligand_name, "pdb_dcd_gro_xtc", "mda_prot_lig_all") - MDanalysis_conversion( - post_mdtraj_pdb_file, - post_mdtraj_dcd_file, - mda_output, - output_selection, - ligand_name, - ) - - assert all_file_dcd is not None - assert all_file_dcd_unaligned is not None - assert all_file_pdb is not None - assert prot_lig_file_dcd is not None - assert prot_lig_file_dcd_unaligned is not None - assert prot_lig_file_pdb is not None - assert all_file_xtc is not None - assert all_file_xtc_unaligned is not None - assert all_file_gro is not None - assert prot_lig_file_xtc is not None - assert prot_lig_file_xtc_unaligned is not None - assert prot_lig_file_gro is not None - - # Assertions or checks to verify the correctness of the results - if "pdb" in mda_output: - if output_selection != "mda_all": - # Check if the expected PDB file exists - pdb_file_path = original_cwd / "prot_lig_top.pdb" - assert pdb_file_path.is_file() - - # Check if the expected DCD file exists - dcd_file_path = original_cwd / "prot_lig_traj.dcd" - assert dcd_file_path.is_file() - - # Check if the DCD file is not empty - traj = md.load(dcd_file_path, top=pdb_file_path) - assert traj.n_frames > 0 - - if "gro" in mda_output: - if output_selection != "mda_all": - # Check if the expected GRO file exists - gro_file_path = original_cwd / "prot_lig_top.gro" - assert gro_file_path.is_file() - - # Check if the expected XTC file exists - xtc_file_path = original_cwd / "prot_lig_traj.xtc" - assert xtc_file_path.is_file() - - # Check if the XTC file is not empty - traj = md.load(xtc_file_path, top=gro_file_path) - assert traj.n_frames > 0 From 7ea845216aa5f75e17f76077c2a1b54b543aff92 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Sat, 14 Dec 2024 17:47:13 +0100 Subject: [PATCH 3/8] Delete openmmdl/tests/openmmdl_analysis directory --- .../barcode_generation_test.py | 325 --- .../binding_mode_processing_test.py | 1829 ----------------- .../openmmdlanalysis_test.py | 124 -- .../openmmdl_analysis/pml_writer_test.py | 248 --- .../test_find_stable_waters.py | 261 --- .../test_interaction_gathering.py | 360 ---- .../openmmdl_analysis/test_preprocessing.py | 229 --- .../test_rdkit_figure_generation.py | 288 --- .../test_rmsd_calculation.py | 71 - .../visualization_functions_test.py | 239 --- 10 files changed, 3974 deletions(-) delete mode 100644 openmmdl/tests/openmmdl_analysis/barcode_generation_test.py delete mode 100644 openmmdl/tests/openmmdl_analysis/binding_mode_processing_test.py delete mode 100644 openmmdl/tests/openmmdl_analysis/openmmdlanalysis_test.py delete mode 100644 openmmdl/tests/openmmdl_analysis/pml_writer_test.py delete mode 100644 openmmdl/tests/openmmdl_analysis/test_find_stable_waters.py delete mode 100644 openmmdl/tests/openmmdl_analysis/test_interaction_gathering.py delete mode 100644 openmmdl/tests/openmmdl_analysis/test_preprocessing.py delete mode 100644 openmmdl/tests/openmmdl_analysis/test_rdkit_figure_generation.py delete mode 100644 openmmdl/tests/openmmdl_analysis/test_rmsd_calculation.py delete mode 100644 openmmdl/tests/openmmdl_analysis/visualization_functions_test.py diff --git a/openmmdl/tests/openmmdl_analysis/barcode_generation_test.py b/openmmdl/tests/openmmdl_analysis/barcode_generation_test.py deleted file mode 100644 index d8eee34a..00000000 --- a/openmmdl/tests/openmmdl_analysis/barcode_generation_test.py +++ /dev/null @@ -1,325 +0,0 @@ -import numpy as np -import pandas as pd -import re -import os -import matplotlib.pyplot as plt -import pytest -from openmmdl.openmmdl_analysis.barcode_generation import * - - -# Barcode generation tests -@pytest.fixture -def sample_dataframe_barcode_generation(): - data = { - "FRAME": [1, 1, 2, 2, 3], - "Interaction1": [1, 0, 1, 0, 0], - "Interaction2": [0, 0, 0, 1, 1], - "WATER_IDX": [101, 102, 103, 104, 105], - } - return pd.DataFrame(data) - - -def test_barcodegeneration(sample_dataframe_barcode_generation): - interaction = "Interaction1" - barcode = barcodegeneration(sample_dataframe_barcode_generation, interaction) - - assert isinstance(barcode, np.ndarray) - - expected_barcode = np.array([1, 1, 0]) - assert np.array_equal(barcode, expected_barcode) - - -def test_waterids_barcode_generator(sample_dataframe_barcode_generation): - interaction = "Interaction2" - waterid_barcode = waterids_barcode_generator( - sample_dataframe_barcode_generation, interaction - ) - - # Test if the output is a list - assert isinstance(waterid_barcode, list) - - # Test the expected waterid barcode for the sample dataframe and interaction - expected_waterid_barcode = [0, 104, 105] - assert waterid_barcode == expected_waterid_barcode - - -def test_plot_barcodes(): - # create barcode data - working_directory = os.getcwd() - # Print the current files in the working directory for debugging - files_in_working_directory = os.listdir(working_directory) - print("Files in Working Directory before:", files_in_working_directory) - - # Test case 1: No barcode - plot_barcodes({}, "no_barcodes.png") - assert not os.path.isfile("no_barcodes.png") - - # Test case 2: Single barcode - barcode_data = { - "166ARGA_4220,4221_Carboxylate_NI_saltbridge": np.array( - [ - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - ] - ) - } - plot_barcodes(barcode_data, "single_barcode.png") - single_barcode = "single_barcode.png" - assert single_barcode is not None - - barcodes = { - "Barcode 1": np.array( - [ - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - ] - ), - "Barcode 2": np.array( - [ - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - 0, - 1, - ] - ), - # Include more barcodes as needed - } - plot_barcodes(barcodes, "multiple_barcodes.png") - - files_in_working_directory = os.listdir(working_directory) - print("Files in Working Directory after:", files_in_working_directory) - save_path = "multiple_barcodes.png" - - assert save_path is not None - - -def test_plot_waterbridge_piechart(tmp_path): - # Prepare inputs - df_all = pd.DataFrame( - { - "interaction1": [1, 0, 1, 0], - "interaction2": [0, 1, 0, 1], - "WATER_IDX": [1, 2, 1, 2], # changed 'waterid' to 'WATER_IDX' - "FRAME": [0, 1, 2, 3], # added 'FRAME' column - } - ) - waterbridge_barcodes = [np.array([1, 0, 1, 0]), np.array([0, 1, 0, 1])] - waterbridge_interactions = ["interaction1", "interaction2"] - fig_type = "png" - - # Change the current working directory to tmp_path - - # Use os.makedirs - os.makedirs(f"{tmp_path}/Barcodes/Waterbridge_Piecharts/", exist_ok=True) - - # Call the function - plot_waterbridge_piechart( - df_all, waterbridge_barcodes, waterbridge_interactions, fig_type - ) - - # Check if the output files are created - for interaction in waterbridge_interactions: - outname_png = f"./Barcodes/Waterbridge_Piecharts/{interaction}.png" - assert os.path.isfile(outname_png), f"File {outname_png} not found." - - # Additional assertions for content or specific properties of the generated files - img = plt.imread(outname_png) - assert img is not None, f"Unable to read image file {outname_png}." - - # Retrieve the percentage directly from the Axes object - percentage_text = plt.gca().texts[0].get_text() - assert percentage_text is not None, "Percentage text is None." - - # Retrieve the title directly from the Axes object - title_text = plt.gca().get_title() - assert title_text is not None, "Title text is None." - - # You can add more assertions based on your specific requirements - # For example, check if the file size is greater than zero, etc. - assert os.path.getsize(outname_png) > 0, f"File {outname_png} is empty." - - -def test_plot_bacodes_grouped(tmp_path): - # Create a mock dataframe with all necessary columns - df_all = pd.DataFrame( - { - "column1": [1, 2, 3], - "column2": ["a", "b", "c"], - "FRAME": [0, 1, 2], - "atom1_atom2_interaction": [1, 0, 1], - "atom3_atom4_interaction": [0, 1, 1], - } - ) - - # Define interactions and interaction_type - interactions = ["atom1_atom2_interaction", "atom3_atom4_interaction"] - interaction_type = "interaction" - fig_type = "png" - - working_directory = os.getcwd() - plot_barcodes_grouped(interactions, df_all, interaction_type, fig_type) - # Check if the output files were created - assert os.path.exists( - os.path.join( - working_directory, - "Barcodes", - "atom2", - f"atom2_{interaction_type}_barcodes.png", - ) - ) - assert os.path.exists( - os.path.join( - working_directory, - "Barcodes", - "atom4", - f"atom4_{interaction_type}_barcodes.png", - ) - ) - assert os.path.exists( - os.path.join( - working_directory, "Barcodes", f"{interaction_type}_interactions.png" - ) - ) diff --git a/openmmdl/tests/openmmdl_analysis/binding_mode_processing_test.py b/openmmdl/tests/openmmdl_analysis/binding_mode_processing_test.py deleted file mode 100644 index db3797e9..00000000 --- a/openmmdl/tests/openmmdl_analysis/binding_mode_processing_test.py +++ /dev/null @@ -1,1829 +0,0 @@ -import numpy as np -import pandas as pd -import rdkit -import MDAnalysis as mda -import re -import os -import matplotlib.pyplot as plt -import pytest - -from openmmdl.openmmdl_analysis.binding_mode_processing import * - - -# binding_mode_processing tests -@pytest.fixture -def sample_dataframe_bindingmode_processing(): - data = { - "FRAME": {0: 1, 1: 2, 2: 3, 3: 2}, - "Prot_partner": {0: "A", 1: "B", 2: "C", 3: "A"}, - "INTERACTION": { - 0: "hydrophobic", - 1: "hbond", - 2: "saltbridge", - 3: "hydrophobic", - }, - "LIGCARBONIDX": {0: 101, 1: 102, 2: 103, 3: 102}, - "DONORIDX": {0: 201, 1: 202, 2: 203, 3: 202}, - "ACCEPTORIDX": {0: 301, 1: 302, 2: 303, 3: 302}, - "PROTISDON": {0: True, 1: False, 2: True, 3: False}, - "LIG_IDX_LIST": {0: [1, 2], 1: [3, 4], 2: [5, 6], 3: [3, 4]}, - "LIG_GROUP": {0: "Group1", 1: "Group2", 2: "Group3", 3: "Group1"}, - "PROTISPOS": {0: True, 1: False, 2: True, 3: True}, - "DON_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "DONORTYPE": {0: 0, 1: 0, 2: 0, 3: 0}, - "ACCEPTOR_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "DONOR_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "LOCATION": {0: 0, 1: 0, 2: 0, 3: 0}, - "METAL_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "METAL_TYPE": {0: 0, 1: 0, 2: 0, 3: 0}, - "RESTYPE_LIG": {0: 0, 1: 0, 2: 0, 3: 0}, - "TARGET_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "COORDINATION": {0: 0, 1: 0, 2: 0, 3: 0}, - } - - # Add 'halogen' and 'hbond' data to the existing DataFrame - data["FRAME"][4] = 4 # Add a new 'FRAME' value - data["Prot_partner"][4] = "A" # Add a new 'Prot_partner' value - data["INTERACTION"][4] = "halogen" # Add 'halogen' interaction - data["DON_IDX"][4] = 501 # DON_IDX for 'halogen' - data["DONORTYPE"][4] = "F" # Halogen type - data["ACCEPTOR_IDX"][4] = 0 - data["DONOR_IDX"][4] = 0 - data["LIG_IDX_LIST"][4] = 0 - data["LIG_GROUP"][4] = 0 # LIG_GROUP for 'pication - data["RESTYPE_LIG"][4] = 0 - data["TARGET_IDX"][4] = 0 - - data["FRAME"][5] = 5 # Add a new 'FRAME' value - data["Prot_partner"][5] = "A" # Add a new 'Prot_partner' value - data["INTERACTION"][5] = "hbond" # Add 'hbond' interaction - data["ACCEPTORIDX"][5] = 301 # ACCEPTORIDX for 'hbond' - data["DON_IDX"][5] = 0 # DON_IDX - data["DONORTYPE"][5] = 0 # DON_IDX - data["PROTISDON"][5] = True # PROTISDON is True for 'hbond' - data["ACCEPTOR_IDX"][5] = 0 - data["LIG_IDX_LIST"][5] = 0 - data["DONOR_IDX"][5] = 0 - data["LIG_GROUP"][5] = 0 # LIG_GROUP for 'pication - data["RESTYPE_LIG"][5] = 0 - data["TARGET_IDX"][5] = 0 - - # Add 'waterbridge' cases where PROTISDON is both True and False - data["FRAME"][6] = 6 # Add a new 'FRAME' value - data["Prot_partner"][6] = "A" # Add a new 'Prot_partner' value - data["INTERACTION"][6] = "waterbridge" # Add 'waterbridge' interaction - data["ACCEPTOR_IDX"][6] = 401 # ACCEPTOR_IDX for 'waterbridge' - data["DON_IDX"][6] = 0 # DON_IDX - data["DONORTYPE"][6] = 0 # DON_IDX - data["DONOR_IDX"][6] = 0 - data["LIG_IDX_LIST"][6] = 0 - data["PROTISDON"][6] = True # PROTISDON is True for 'waterbridge' - data["LIG_GROUP"][6] = 0 # LIG_GROUP for 'pication - data["RESTYPE_LIG"][6] = 0 - data["TARGET_IDX"][6] = 0 - - data["FRAME"][7] = 7 # Add a new 'FRAME' value - data["Prot_partner"][7] = "B" # Add a new 'Prot_partner' value - data["INTERACTION"][7] = "waterbridge" # Add 'waterbridge' interaction - data["DONOR_IDX"][7] = 501 # DONOR_IDX for 'waterbridge' - data["DON_IDX"][7] = 0 # DON_IDX - data["DONORTYPE"][7] = 0 # DON_IDX - data["PROTISDON"][7] = False # PROTISDON is False for 'waterbridge' - data["ACCEPTOR_IDX"][7] = 0 - data["LIG_IDX_LIST"][7] = 0 # LIG_IDX_LIST for 'pication' - data["LIG_GROUP"][7] = 0 # LIG_GROUP for 'pication - data["RESTYPE_LIG"][7] = 0 - data["TARGET_IDX"][7] = 0 - - # Add 'pistacking' case - data["FRAME"][8] = 8 # Add a new 'FRAME' value - data["Prot_partner"][8] = "A" # Add a new 'Prot_partner' value - data["INTERACTION"][8] = "pistacking" # Add 'pistacking' interaction - data["LIG_IDX_LIST"][8] = [7, 8] # LIG_IDX_LIST for 'pistacking' - data["LIG_GROUP"][8] = 0 # LIG_GROUP for 'pication - data["ACCEPTOR_IDX"][8] = 0 - data["DON_IDX"][8] = 0 # DON_IDX - data["DONOR_IDX"][8] = 0 - data["PROTISDON"][8] = False - data["DONORTYPE"][8] = 0 # DON_IDX - data["RESTYPE_LIG"][8] = 0 - data["TARGET_IDX"][8] = 0 - - # Add 'pication' case - data["FRAME"][9] = 9 # Add a new 'FRAME' value - data["Prot_partner"][9] = "A" # Add a new 'Prot_partner' value - data["INTERACTION"][9] = "pication" # Add 'pication' interaction - data["LIG_IDX_LIST"][9] = [9, 10] # LIG_IDX_LIST for 'pication' - data["LIG_GROUP"][9] = "Group4" # LIG_GROUP for 'pication' - data["ACCEPTOR_IDX"][9] = 0 - data["DON_IDX"][9] = 0 # DON_IDX - data["PROTISDON"][9] = False - data["DONOR_IDX"][9] = 0 - data["DONORTYPE"][9] = 0 # DON_IDX - data["RESTYPE_LIG"][9] = 0 - data["TARGET_IDX"][9] = 0 - - # Add 'metal' interaction case - data["FRAME"][10] = 10 # Add a new 'FRAME' value - data["Prot_partner"][10] = "A" # Add a new 'Prot_partner' value - data["INTERACTION"][10] = "metal" # Add 'metal' interaction - data["METAL_IDX"][10] = 401 # METAL_IDX for 'metal' - data["METAL_TYPE"][10] = "Fe" # Metal type - data["LOCATION"][10] = "site1" # Location - data["ACCEPTOR_IDX"][10] = 0 - data["DONOR_IDX"][10] = 0 - data["RESTYPE_LIG"][10] = "A" - data["TARGET_IDX"][10] = 401 - data["COORDINATION"][10] = "site1" - - data["FRAME"][11] = 11 # Add a new 'FRAME' value - data["Prot_partner"][11] = "A" # Add a new 'Prot_partner' value - data["INTERACTION"][11] = "saltbridge" # Add 'saltbridge' interaction - data["LIG_IDX_LIST"][11] = [7, 8] # Ligand index list for 'saltbridge PI' - data["LIG_GROUP"][11] = "Group4" # Ligand group for 'saltbridge PI' - data["PROTISPOS"][11] = False # PROTISPOS is False for 'saltbridge PI' - data["RESTYPE_LIG"][11] = 0 - data["TARGET_IDX"][11] = 0 - - # Add 'hydrophobic' case where 'ring_found' is False - data["FRAME"][12] = 12 # Add a new 'FRAME' value - data["Prot_partner"][12] = "C" # Add a new 'Prot_partner' value - data["INTERACTION"][12] = "hydrophobic" # Add 'hydrophobic' interaction - data["LIGCARBONIDX"][12] = 104 # LIGCARBONIDX for 'hydrophobic' (not in any ring) - data["RESTYPE_LIG"][12] = 0 - data["TARGET_IDX"][12] = 0 - - return pd.DataFrame(data) - - -@pytest.fixture -def sample_dataframe_bindingmode_processing_with_peptides(): - data = { - "FRAME": {0: 1, 1: 2, 2: 3, 3: 2}, - "Prot_partner": {0: "62VAL", 1: "SER144", 2: "GLU321", 3: "ILE432"}, - "INTERACTION": { - 0: "hydrophobic", - 1: "hbond", - 2: "saltbridge", - 3: "hydrophobic", - }, - "LIGCARBONIDX": {0: 101, 1: 102, 2: 103, 3: 102}, - "DONORIDX": {0: 201, 1: 202, 2: 203, 3: 202}, - "ACCEPTORIDX": {0: 301, 1: 302, 2: 303, 3: 302}, - "PROTISDON": {0: True, 1: False, 2: True, 3: False}, - "LIG_IDX_LIST": {0: [1, 2], 1: [3, 4], 2: [5, 6], 3: [3, 4]}, - "LIG_GROUP": {0: "Group1", 1: "Group2", 2: "Group3", 3: "Group1"}, - "PROTISPOS": {0: True, 1: False, 2: True, 3: True}, - "DON_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "DONORTYPE": {0: 0, 1: 0, 2: 0, 3: 0}, - "ACCEPTOR_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "DONOR_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "LOCATION": {0: 0, 1: 0, 2: 0, 3: 0}, - "METAL_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "METAL_TYPE": {0: 0, 1: 0, 2: 0, 3: 0}, - "RESTYPE_LIG": {0: "ILE", 1: "TYR", 2: "ARG", 3: "VAL"}, - "TARGET_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, - "COORDINATION": {0: 0, 1: 0, 2: 0, 3: 0}, - "RESNR_LIG": {0: "101", 1: "202", 2: "155", 3: "102"}, - } - - # Additional data for peptide interactions - data["FRAME"][4] = 4 - data["Prot_partner"][4] = "LEU248" - data["INTERACTION"][4] = "halogen" - data["DON_IDX"][4] = 501 - data["DONORTYPE"][4] = "F" - data["ACCEPTOR_IDX"][4] = 0 - data["DONOR_IDX"][4] = 0 - data["LIG_IDX_LIST"][4] = 0 - data["LIG_GROUP"][4] = 0 - data["RESTYPE_LIG"][4] = "ILE" - data["TARGET_IDX"][4] = 0 - data["RESNR_LIG"][4] = "501" - - data["FRAME"][5] = 5 - data["Prot_partner"][5] = "SER300" - data["INTERACTION"][5] = "hbond" - data["ACCEPTORIDX"][5] = 301 - data["DON_IDX"][5] = 0 - data["DONORTYPE"][5] = 0 - data["PROTISDON"][5] = True - data["ACCEPTOR_IDX"][5] = 0 - data["LIG_IDX_LIST"][5] = 0 - data["DONOR_IDX"][5] = 0 - data["LIG_GROUP"][5] = 0 - data["RESTYPE_LIG"][5] = "HIS" - data["TARGET_IDX"][5] = 0 - data["RESNR_LIG"][5] = "301" - - data["FRAME"][6] = 6 - data["Prot_partner"][6] = "TYR343" - data["INTERACTION"][6] = "waterbridge" - data["ACCEPTOR_IDX"][6] = 401 - data["DON_IDX"][6] = 0 - data["DONORTYPE"][6] = 0 - data["DONOR_IDX"][6] = 0 - data["LIG_IDX_LIST"][6] = 0 - data["PROTISDON"][6] = True - data["LIG_GROUP"][6] = 0 - data["RESTYPE_LIG"][6] = "SER" - data["TARGET_IDX"][6] = 0 - data["RESNR_LIG"][6] = "455" - - data["FRAME"][7] = 7 - data["Prot_partner"][7] = "ILE178" - data["INTERACTION"][7] = "waterbridge" - data["DONOR_IDX"][7] = 501 - data["DON_IDX"][7] = 0 - data["DONORTYPE"][7] = 0 - data["PROTISDON"][7] = False - data["ACCEPTOR_IDX"][7] = 0 - data["LIG_IDX_LIST"][7] = 0 - data["LIG_GROUP"][7] = 0 - data["RESTYPE_LIG"][7] = "TYR" - data["TARGET_IDX"][7] = 0 - data["RESNR_LIG"][7] = "467" - - data["FRAME"][8] = 8 - data["Prot_partner"][8] = "PHE344" - data["INTERACTION"][8] = "pistacking" - data["LIG_IDX_LIST"][8] = [7, 8] - data["LIG_GROUP"][8] = 0 - data["ACCEPTOR_IDX"][8] = 0 - data["DON_IDX"][8] = 0 - data["DONOR_IDX"][8] = 0 - data["PROTISDON"][8] = False - data["DONORTYPE"][8] = 0 - data["RESTYPE_LIG"][8] = "PHE" - data["TARGET_IDX"][8] = 0 - data["RESNR_LIG"][8] = "398" - - data["FRAME"][9] = 9 - data["Prot_partner"][9] = "PHE754" - data["INTERACTION"][9] = "pication" - data["LIG_IDX_LIST"][9] = [9, 10] - data["LIG_GROUP"][9] = "B" - data["ACCEPTOR_IDX"][9] = 0 - data["DON_IDX"][9] = 0 - data["PROTISDON"][9] = False - data["DONOR_IDX"][9] = 0 - data["DONORTYPE"][9] = 0 - data["RESTYPE_LIG"][9] = "ARG" - data["TARGET_IDX"][9] = 0 - data["RESNR_LIG"][9] = "245" - - data["FRAME"][10] = 10 - data["Prot_partner"][10] = "LYS567" - data["INTERACTION"][10] = "pication" - data["LIG_IDX_LIST"][10] = [9, 10] - data["LIG_GROUP"][10] = "B" - data["RESTYPE_LIG"][10] = "PHE" - data["TARGET_IDX"][10] = 501 - data["RESNR_LIG"][10] = "228" - - data["FRAME"][11] = 11 - data["Prot_partner"][11] = "LYS567" - data["INTERACTION"][11] = "saltbridge" - data["LIG_IDX_LIST"][11] = [7, 8] - data["LIG_GROUP"][11] = "Group4" - data["PROTISPOS"][11] = False - data["RESTYPE_LIG"][11] = "GLU" - data["TARGET_IDX"][11] = 0 - data["RESNR_LIG"][11] = "423" - - data["FRAME"][12] = 12 - data["Prot_partner"][12] = "HEM144" - data["INTERACTION"][12] = "metal" - data["METAL_IDX"][12] = 401 # METAL_IDX for 'metal' - data["METAL_TYPE"][12] = "Fe" # Metal type - data["LOCATION"][12] = "site1" # Location - data["ACCEPTOR_IDX"][12] = 0 - data["DON_IDX"][12] = 0 - data["RESTYPE_LIG"][ - 12 - ] = "HIS" # Assuming 'A' as the RESTYPE_LIG for the metal interaction - data["TARGET_IDX"][12] = 401 - data["COORDINATION"][12] = "site1" - data["RESNR_LIG"][12] = "256" - - return pd.DataFrame(data) - - -def test_gather_interactions(sample_dataframe_bindingmode_processing): - df = sample_dataframe_bindingmode_processing - ligand_rings = [[101], [102], [103]] # Define sample ligand rings for testing - - result = gather_interactions(df, ligand_rings) - - # Assert that the result is a dictionary - - assert isinstance(result, dict) - - # Check specific values in the generated dictionary for known interactions based on the updated fixture - expected_result = { - 1: {0: "A_101_hydrophobic"}, - 2: {1: "B_202_Donor_hbond", 3: "A_102_hydrophobic"}, - 3: {2: "C_[5, 6]_Group3_NI_saltbridge"}, - 4: {4: "A_501_F_halogen"}, - 5: {5: "A_301_Acceptor_hbond"}, - 6: {6: "A_401_Acceptor_waterbridge"}, - 7: {7: "B_501_Donor_waterbridge"}, - 8: {8: "A_[7, 8]_pistacking"}, - 9: {9: "A_[9_ 10]_Group4_pication"}, - 10: {10: "A_401_Fe_site1_metal"}, - 11: {11: "A_[7, 8]_Group4_PI_saltbridge"}, - 12: {12: "C_104_hydrophobic"}, - } - # Check if the actual result matches the expected result - assert result == expected_result - - -def test_gather_interactions_with_peptides( - sample_dataframe_bindingmode_processing_with_peptides, -): - df = sample_dataframe_bindingmode_processing_with_peptides - ligand_rings = [[101], [102], [103]] # Define sample ligand rings for testing - - result = gather_interactions(df, ligand_rings, peptide=True) - - # Assert that the result is a dictionary - assert isinstance(result, dict) - - # Check specific values in the generated dictionary for known interactions based on the updated fixture - expected_result = { - 1: {0: "62VAL_101ILE_hydrophobic"}, - 2: {1: "SER144_202TYR_Donor_hbond", 3: "ILE432_102VAL_hydrophobic"}, - 3: {2: "GLU321_155ARG_ARG_NI_saltbridge"}, - 4: {4: "LEU248_501ILE_F_halogen"}, - 5: {5: "SER300_301HIS_Acceptor_hbond"}, - 6: {6: "TYR343_455SER_Acceptor_waterbridge"}, - 7: {7: "ILE178_467TYR_Donor_waterbridge"}, - 8: {8: "PHE344_398PHE_pistacking"}, - 9: {9: "PHE754_245ARG_ARG_pication"}, - 10: {10: "LYS567_228PHE_PHE_pication"}, - 11: {11: "LYS567_423GLU_GLU_PI_saltbridge"}, - 12: {12: "HIS_256HIS_Fe_site1_metal"}, - } - - # Check if the actual result matches the expected result - assert result == expected_result - - -@pytest.fixture -def test_remove_duplicates_data(): - input_data = {"a": {"x": 1, "y": 2, "z": 1}, "b": {"p": 3, "q": 3, "r": 4}} - expected_output = {"a": {"x": 1, "y": 2}, "b": {"p": 3, "r": 4}} - return input_data, expected_output - - -def test_unique_data_generation(): - # Test case 1: Check if the function returns an empty dictionary for an empty list - result = unique_data_generation([]) - assert result == {} - - # Test case 2: Check if the function correctly identifies and stores unique values - input_list = [1, 2, 2, 3, 3, 4, 5, 5] - result = unique_data_generation(input_list) - expected_result = {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} - assert result == expected_result - - # Test case 3: Check if the function handles strings - input_list = ["apple", "banana", "apple", "cherry"] - result = unique_data_generation(input_list) - expected_result = {"apple": "apple", "banana": "banana", "cherry": "cherry"} - assert result == expected_result - - -# Define a test case that uses the fixture -def test_remove_duplicate_values(test_remove_duplicates_data): - input_data, expected_output = test_remove_duplicates_data - assert remove_duplicate_values(input_data) == expected_output - - -def test_combine_subdict_values(): - # Test case 1: Empty input dictionary - data = {} - result = combine_subdict_values(data) - assert result == {"all": []} - - # Test case 2: Input dictionary with sub-dictionaries - data = { - "dict1": {"a": 1, "b": 2}, - "dict2": {"c": 3, "d": 4}, - "dict3": {"e": 5, "f": 6}, - } - result = combine_subdict_values(data) - assert result == {"all": [1, 2, 3, 4, 5, 6]} - - # Test case 3: Input dictionary with empty sub-dictionaries - data = { - "dict1": {}, - "dict2": {}, - } - result = combine_subdict_values(data) - assert result == {"all": []} - - # Test case 4: Input dictionary with sub-dictionaries containing various data types - data = { - "dict1": {"a": 1, "b": "text", "c": [1, 2, 3]}, - "dict2": {"d": None, "e": 5.5}, - } - result = combine_subdict_values(data) - assert result == {"all": [1, "text", [1, 2, 3], None, 5.5]} - - -# Define a sample DataFrame for testing -sample_data = { - "A": [1, 2, 3, 4, 5], - "B": [2, 3, 4, 5, 6], - "C": [3, 4, 5, 6, 7], - "D": [4, 5, 6, 7, 8], -} -sample_df = pd.DataFrame(sample_data) - -# Define the provided 'unique_columns_rings_grouped' data for testing -unique_columns_rings_grouped = { - 1: {0: "A_101_hydrophobic"}, - 2: {1: "B_202_Donor_hbond", 3: "A_102_hydrophobic"}, - 3: {2: "C_[5, 6]_Group3_NI_saltbridge"}, - 4: {4: "A_501_F_halogen"}, - 5: {5: "A_301_Acceptor_hbond"}, - 6: {6: "A_401_Acceptor_waterbridge"}, - 7: {7: "B_501_Donor_waterbridge"}, - 8: {8: "A_[7, 8]_pistacking"}, - 9: {9: "A_[9_ 10]_Group4_pication"}, - 10: {10: "A_401_Fe_site1_metal"}, - 11: {11: "A_[7, 8]_Group4_PI_saltbridge"}, - 12: {12: "C_104_hydrophobic"}, -} - - -def test_filtering_values_with_provided_data(): - # Test case 1: Check if the function returns a list - threshold = 0.2 # 20% threshold - frames = 1000 # Some arbitrary number of frames - df = pd.DataFrame() # Create an empty DataFrame for testing - result = filtering_values(threshold, frames, df, unique_columns_rings_grouped) - - assert isinstance(result, list) # Check if the result is a list - - # Test case 2: Check if the filtered values are present in the DataFrame - assert all(col in df.columns for col in result) - - # Test case 3: Check if the DataFrame has the correct shape after filtering - expected_shape = (df.shape[0], df.shape[1] + len(result)) - assert df.shape == expected_shape - - # Test case 4: Check if the filtered values are not duplicated - assert len(set(result)) == len(result) - - # Test case 5: Check if the DataFrame values are initially set to None - assert df[result].isnull().all().all() - - # Test case 6: Check if the threshold calculation is correct - expected_threshold = threshold * frames - occurrences = {value: 5 for value in result} # Assume all values occur 5 times - assert all(count >= expected_threshold for count in occurrences.values()) - - -def test_df_iteration_numbering(): - # Sample DataFrame for testing - data = { - "Unnamed: 0": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - ], - "RESNR": [ - 98, - 63, - 162, - 161, - 166, - 165, - 125, - 166, - 211, - 227, - 223, - 165, - 100, - 59, - 98, - 207, - 164, - 155, - 228, - ], - "RESTYPE": [ - "PHE", - "ARG", - "ALA", - "PHE", - "ARG", - "ASP", - "TYR", - "ARG", - "PHE", - "LEU", - "THR", - "ASP", - "ASP", - "ARG", - "PHE", - "PHE", - "LYS", - "HEM", - "SER", - ], - "RESCHAIN": [ - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - "A", - ], - "RESNR_LIG": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "RESTYPE_LIG": [ - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "UNK", - "HEM", - "UNK", - ], - "RESCHAIN_LIG": [ - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - "X", - ], - "DIST": [ - 3.46, - 0.00, - 0.00, - 0.00, - 0.00, - 0.00, - 0.00, - 3.36, - 3.61, - 3.84, - 3.62, - 3.72, - 3.62, - 3.99, - 3.65, - 3.70, - 5.16, - 2.55, - 2.34, - ], - "LIGCARBONIDX": [ - 4196.0, - 0.0, - 4214.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 4206.0, - 4207.0, - 4207.0, - 4215.0, - 4217.0, - 4217.0, - 4194.0, - 4208.0, - 0.0, - 0.0, - 0.0, - ], - "162ALAA_4214_4215_4216_4217_4218_4213_hydrophobic": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "166ARGA_4220,4221_Carboxylate_NI_saltbridge": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "98PHEA_4194_hydrophobic": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "63ARGA_4201_Acceptor_waterbridge": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "166ARGA_4220_Acceptor_hbond": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "98PHEA_4225_Donor_hbond": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "207PHEA_4213,4214,4215,4216,4217,4218_pistacking": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "100ASPA_4005_Donor_waterbridge": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "59ARGA_4222_Acceptor_waterbridge": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "HEM_4255_Fe_4.0_metal": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "228SERA_4228_F_halogen": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - } - - df = pd.DataFrame(data) - - interactions = [ - "hbond", - "waterbridge", - "hydrophobic", - "hbond", - "hbond", - "hbond", - "hbond", - "saltbridge", - "hydrophobic", - "hydrophobic", - "hydrophobic", - "hydrophobic", - "waterbridge", - "waterbridge", - "hydrophobic", - "pistacking", - "pication", - "metal", - "halogen", - ] - df["INTERACTION"] = interactions - - # Define the values for the "PROTISDON" column - protisdon_values = [ - False, - True, - True, - True, - True, - True, - True, - 0, - 0, - 0, - 0, - 0, - False, - True, - 0, - 0, - 0, - 0, - 0, - ] - - # Update the "PROTISDON" column in the DataFrame - df["PROTISDON"] = protisdon_values - - # Define the values for the "Prot_partner" column - prot_partner_values = [ - "98PHEA", - "63ARGA", - "162ALAA", - "161PHEA", - "166ARGA", - "165ASPA", - "125TYRA", - "166ARGA", - "211PHEA", - "227LEUA", - "223THRA", - "165ASPA", - "100ASPA", - "59ARGA", - "98PHEA", - "207PHEA", - "164LYSA", - "105HEM", - "228SERA", - ] - - # Update the "Prot_partner" column in the DataFrame - df["Prot_partner"] = prot_partner_values - - # Define the values for the "ACCEPTORIDX" column - acceptoridx_values = [ - 0.0, - 0.0, - 4221.0, - 4221.0, - 4220.0, - 4220.0, - 4192.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - ] - - # Update the "ACCEPTORIDX" column in the DataFrame - df["ACCEPTORIDX"] = acceptoridx_values - - # Define the values for the "DONORIDX" column - donoridx_values = [ - 4225.0, - 0.0, - 2417.0, - 2397.0, - 2468.0, - 2456.0, - 1828.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - ] - - # Update the "DONORIDX" column in the DataFrame - df["DONORIDX"] = donoridx_values - - # Define the values for the "ACCEPTOR_IDX" column - acceptor_idx_values = [ - 0.0, - 4201.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 4222.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - ] - - # Add the "ACCEPTOR_IDX" column to the DataFrame - df["ACCEPTOR_IDX"] = acceptor_idx_values - - # Define the values for the "DONOR_IDX" column - donor_idx_values = [ - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 4005.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 4228.0, - ] - - # Add the "DONOR_IDX" column to the DataFrame - df["DONOR_IDX"] = donor_idx_values - - # Define the values for the "DON_IDX" column - don_idx_values = [ - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 4005.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 4228.0, - ] - - # Add the "DON_IDX" column to the DataFrame - df["DON_IDX"] = don_idx_values - - # Define the values for the "LIG_IDX_LIST" column - lig_idx_list_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - "4220,4221", - 0, - 0, - 0, - 0, - 0, - 0, - 0, - "4213,4214,4215,4216,4217,4218", - "4213,4214,4215,4216,4217,4218", - 0, - 0, - ] - - # Add the "LIG_IDX_LIST" column to the DataFrame - df["LIG_IDX_LIST"] = lig_idx_list_values - - # Define the values for the "LIG_GROUP" column - lig_group_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - "Carboxylate", - 0, - 0, - 0, - 0, - 0, - 0, - 0, - "Aromatic", - "Aromatic", - 0, - 0, - ] - - # Add the "LIG_GROUP" column to the DataFrame - df["LIG_GROUP"] = lig_group_values - - # Define the values for the "TARGET_IDX" column - target_idx_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4255, 0] - - # Add the "TARGET_IDX" column to the DataFrame - df["TARGET_IDX"] = target_idx_values - - # Define the values for the "METAL_TYPE" column - metal_type_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "Fe", 0] - - # Add the "METAL_TYPE" column to the DataFrame - df["METAL_TYPE"] = metal_type_values - - # Define the values for the "COORDINATION" column - coordination_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0] - - # Add the "COORDINATION" column to the DataFrame - df["COORDINATION"] = coordination_values - - # Define the values for the "DONORTYPE" column - donor_type_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "F"] - - # Add the "DONORTYPE" column to the DataFrame - df["DONORTYPE"] = donor_type_values - - # Updated unique_data dictionary - unique_data = { - "63ARGA_4201_Acceptor_waterbridge": "63ARGA_4201_Acceptor_waterbridge", - "166ARGA_4220_Acceptor_hbond": "166ARGA_4220_Acceptor_hbond", - "166ARGA_4220,4221_Carboxylate_NI_saltbridge": "166ARGA_4220,4221_Carboxylate_NI_saltbridge", - "162ALAA_4214_4215_4216_4217_4218_4213_hydrophobic": "162ALAA_4214_4215_4216_4217_4218_4213_hydrophobic", - "98PHEA_4194_hydrophobic": "98PHEA_4194_hydrophobic", - "98PHEA_4225_Donor_hbond": "98PHEA_4225_Donor_hbond", - "164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication": "164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication", - "207PHEA_4213,4214,4215,4216,4217,4218_pistacking": "207PHEA_4213,4214,4215,4216,4217,4218_pistacking", - "59ARGA_4222_Acceptor_waterbridge": "59ARGA_4222_Acceptor_waterbridge", - "100ASPA_4005_Donor_waterbridge": "100ASPA_4005_Donor_waterbridge", - "HEM_4255_Fe_4.0_metal": "HEM_4255_Fe_4.0_metal", - "228SERA_4228_F_halogen": "228SERA_4228_F_halogen", - } - - # Call the function with the sample DataFrame and unique_data - df_iteration_numbering(df, unique_data) - - expected_162ALAA_values = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert ( - df["162ALAA_4214_4215_4216_4217_4218_4213_hydrophobic"] - == expected_162ALAA_values - ).all() - - expected_98PHEA_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0] - assert (df["98PHEA_4194_hydrophobic"] == expected_98PHEA_values).all() - - expected_166ARGA_4220_Acceptor_hbond_values = [ - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ] - assert ( - df["166ARGA_4220_Acceptor_hbond"] == expected_166ARGA_4220_Acceptor_hbond_values - ).all() - - expected_Carboxylate_NI_saltbridge_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ] - assert ( - df["166ARGA_4220,4221_Carboxylate_NI_saltbridge"] - == expected_Carboxylate_NI_saltbridge_values - ).all() - - expected_63ARGA_4201_Acceptor_waterbridge_values = [ - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ] - assert ( - df["63ARGA_4201_Acceptor_waterbridge"] - == expected_63ARGA_4201_Acceptor_waterbridge_values - ).all() - - expected_98PHEA_4225_Donor_hbond_values = [ - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ] - assert ( - df["98PHEA_4225_Donor_hbond"] == expected_98PHEA_4225_Donor_hbond_values - ).all() - - expected_164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - ] - assert ( - df["164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication"] - == expected_164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication_values - ).all() - - expected_207PHEA_4213_4214_4215_4216_4217_4218_pistacking_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - ] - assert ( - df["207PHEA_4213,4214,4215,4216,4217,4218_pistacking"] - == expected_207PHEA_4213_4214_4215_4216_4217_4218_pistacking_values - ).all() - - expected_59ARGA_4222_Acceptor_waterbridge_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - ] - assert ( - df["59ARGA_4222_Acceptor_waterbridge"] - == expected_59ARGA_4222_Acceptor_waterbridge_values - ).all() - - expected_100ASPA_4005_Donor_waterbridge_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - ] - assert ( - df["100ASPA_4005_Donor_waterbridge"] - == expected_100ASPA_4005_Donor_waterbridge_values - ).all() - - expected_HEM_4255_Fe_4_metal_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - ] - assert (df["HEM_4255_Fe_4.0_metal"] == expected_HEM_4255_Fe_4_metal_values).all() - - expected_228SERA_4228_F_halogen_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - ] - assert ( - df["228SERA_4228_F_halogen"] == expected_228SERA_4228_F_halogen_values - ).all() - - -@pytest.fixture -def sample_dataframe_it_peptides(): - # Create a sample DataFrame for testing - data = { - "Unnamed: 0": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - "Prot_partner": [ - "62VAL", - "SER144", - "GLU321", - "ILE432", - "LEU248", - "SER300", - "TYR343", - "ILE178", - "PHE344", - "PHE754", - "LYS567", - "LYS567", - "HIS", - ], - "LIGCARBONIDX": [ - 101, - 202, - 155, - 102, - 501, - 301, - 467, - 467, - 398, - 245, - 228, - 423, - 256, - ], - "INTERACTION": [ - "hydrophobic", - "hbond", - "saltbridge", - "hydrophobic", - "halogen", - "hbond", - "waterbridge", - "waterbridge", - "pistacking", - "pication", - "pication", - "saltbridge", - "metal", - ], - "PROTISDON": [ - None, - False, - None, - None, - None, - True, - True, - False, - None, - None, - None, - False, - None, - ], - "ACCEPTORIDX": [ - None, - 202, - None, - None, - None, - 301, - None, - None, - None, - None, - None, - None, - None, - ], - "RESNR_LIG": [101, 202, 155, 102, 501, 301, 455, 467, 398, 245, 228, 423, 256], - "DONORIDX": [ - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - ], - "DONORTYPE": [ - None, - None, - None, - None, - "F", - None, - None, - None, - None, - None, - None, - None, - None, - ], - "LIG_IDX_LIST": [ - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - ], - "RESTYPE_LIG": [ - "VAL", - "TYR", - "ARG", - "VAL", - "ILE", - "HIS", - "SER", - "TYR", - "PHE", - "ARG", - "PHE", - "GLU", - "HIS", - ], - "TARGET_IDX": [ - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - ], - "METAL_TYPE": [ - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - "Fe", - ], - "62VAL_101ILE_hydrophobic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "SER144_202TYR_Donor_hbond": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "GLU321_155ARG_ARG_NI_saltbridge": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "ILE432_102VAL_hydrophobic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "LEU248_501ILE_F_halogen": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "SER300_301HIS_Acceptor_hbond": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "TYR343_455SER_Acceptor_waterbridge": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "ILE178_467TYR_Donor_waterbridge": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "PHE344_398PHE_pistacking": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "PHE754_245ARG_ARG_pication": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "LYS567_228PHE_PHE_pication": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "LYS567_423GLU_GLU_PI_saltbridge": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "HIS_256HIS_Fe_site1_metal": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - } - - df = pd.DataFrame(data) - return df - - -def test_df_iteration_numbering_with_peptide(sample_dataframe_it_peptides): - unique_data = { - 0: "62VAL_101ILE_hydrophobic", - 1: "SER144_202TYR_Donor_hbond", - 2: "GLU321_155ARG_ARG_NI_saltbridge", - 3: "ILE432_102VAL_hydrophobic", - 4: "LEU248_501ILE_F_halogen", - 5: "SER300_301HIS_Acceptor_hbond", - 6: "TYR343_455SER_Acceptor_waterbridge", - 7: "ILE178_467TYR_Donor_waterbridge", - 8: "PHE344_398PHE_pistacking", - 9: "PHE754_245ARG_ARG_pication", - 10: "LYS567_228PHE_PHE_pication", - 11: "LYS567_423GLU_GLU_PI_saltbridge", - 12: "HIS_256HIS_Fe_site1_metal", - } - - df_iteration_numbering(sample_dataframe_it_peptides, unique_data, peptide=True) - - # Assertions similar to the provided ones - expected_101ILE_hydrophobic_values = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["62VAL_101ILE_hydrophobic"] - == expected_101ILE_hydrophobic_values - ).all() - - expected_202TYR_Donor_hbond_values = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["SER144_202TYR_Donor_hbond"] - == expected_202TYR_Donor_hbond_values - ).all() - - expected_155ARG_ARG_NI_saltbridge_values = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["GLU321_155ARG_ARG_NI_saltbridge"] - == expected_155ARG_ARG_NI_saltbridge_values - ).all() - - expected_102VAL_hydrophobic_values = [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["ILE432_102VAL_hydrophobic"] - == expected_102VAL_hydrophobic_values - ).all() - - expected_501ILE_halogen_values = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["LEU248_501ILE_F_halogen"] - == expected_501ILE_halogen_values - ).all() - - expected_301HIS_Acceptor_hbond_values = [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["SER300_301HIS_Acceptor_hbond"] - == expected_301HIS_Acceptor_hbond_values - ).all() - - expected_455SER_Acceptor_waterbridge_values = [ - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - ] - assert ( - sample_dataframe_it_peptides["TYR343_455SER_Acceptor_waterbridge"] - == expected_455SER_Acceptor_waterbridge_values - ).all() - - expected_467TYR_Donor_waterbridge_values = [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["ILE178_467TYR_Donor_waterbridge"] - == expected_467TYR_Donor_waterbridge_values - ).all() - - expected_398PHE_pistacking_values = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["PHE344_398PHE_pistacking"] - == expected_398PHE_pistacking_values - ).all() - - expected_245ARG_ARG_pication_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0] - assert ( - sample_dataframe_it_peptides["PHE754_245ARG_ARG_pication"] - == expected_245ARG_ARG_pication_values - ).all() - - expected_228PHE_PHE_pication_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0] - assert ( - sample_dataframe_it_peptides["LYS567_228PHE_PHE_pication"] - == expected_228PHE_PHE_pication_values - ).all() - - expected_423GLU_GLU_PI_saltbridge_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0] - assert ( - sample_dataframe_it_peptides["LYS567_423GLU_GLU_PI_saltbridge"] - == expected_423GLU_GLU_PI_saltbridge_values - ).all() - - expected_256HIS_Fe_site1_metal_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] - assert ( - sample_dataframe_it_peptides["HIS_256HIS_Fe_site1_metal"] - == expected_256HIS_Fe_site1_metal_values - ).all() - - -@pytest.fixture -def sample_data(): - # Create sample data for testing - df = pd.DataFrame( - {"FRAME": [1, 2, 3], "Column1": [10, 20, 30], "Column2": [40, 50, 60]} - ) - - new_df = pd.DataFrame( - {"FRAME": [1, 2, 3], "Column1": [100, 200, 300], "Column2": [400, 500, 600]} - ) - - unique_data = {"Column1": "Column1", "Column2": "Column2"} - - return df, new_df, unique_data - - -def test_update_values(sample_data): - # Arrange - df, new_df, unique_data = sample_data - - # Set 'FRAME' as the index for new_df - new_df = new_df.set_index("FRAME") - - # Act - update_values(df, new_df, unique_data) - - # Assert - expected_df = pd.DataFrame( - {"FRAME": [1, 2, 3], "Column1": [100, 200, 300], "Column2": [400, 500, 600]} - ) - - # Check if the specific values are updated - assert df[["Column1", "Column2"]].equals(expected_df[["Column1", "Column2"]]) - - -def test_calculate_representative_frame(): - test_data_directory = "openmmdl/tests/data/in" - test_data_directory = Path("openmmdl/tests/data/in") - # load mdtraj trajectory - md = mda.Universe( - f"{test_data_directory}/interacting_waters.pdb", - f"{test_data_directory}/interacting_waters.dcd", - ) - dm = calculate_distance_matrix(md, "protein or resname UNK") - rep = calculate_representative_frame([i for i in range(1, 10)], dm) - assert rep == 4 diff --git a/openmmdl/tests/openmmdl_analysis/openmmdlanalysis_test.py b/openmmdl/tests/openmmdl_analysis/openmmdlanalysis_test.py deleted file mode 100644 index 07918a70..00000000 --- a/openmmdl/tests/openmmdl_analysis/openmmdlanalysis_test.py +++ /dev/null @@ -1,124 +0,0 @@ -import pytest -import subprocess -import os -import openmmdl - -from pathlib import Path - -from openmmdl.openmmdl_analysis.preprocessing import ( - process_pdb_file, - increase_ring_indices, - convert_ligand_to_smiles, -) -from openmmdl.openmmdl_analysis.rmsd_calculation import ( - rmsd_for_atomgroups, - RMSD_dist_frames, -) -from openmmdl.openmmdl_analysis.interaction_gathering import ( - characterize_complex, - retrieve_plip_interactions, - create_df_from_binding_site, - process_frame, - process_trajectory, - fill_missing_frames, -) -from openmmdl.openmmdl_analysis.binding_mode_processing import ( - gather_interactions, - remove_duplicate_values, - combine_subdict_values, - filtering_values, - unique_data_generation, - df_iteration_numbering, - update_values, -) -from openmmdl.openmmdl_analysis.markov_state_figure_generation import ( - min_transition_calculation, - binding_site_markov_network, -) -from openmmdl.openmmdl_analysis.rdkit_figure_generation import ( - split_interaction_data, - highlight_numbers, - generate_interaction_dict, - update_dict, - create_and_merge_images, - arranged_figure_generation, -) -from openmmdl.openmmdl_analysis.barcode_generation import ( - barcodegeneration, - plot_barcodes, - plot_waterbridge_piechart, -) -from openmmdl.openmmdl_analysis.visualization_functions import ( - interacting_water_ids, - save_interacting_waters_trajectory, - cloud_json_generation, -) -from openmmdl.openmmdl_analysis.pml_writer import ( - generate_md_pharmacophore_cloudcenters, - generate_bindingmode_pharmacophore, - generate_pharmacophore_centers_all_points, - generate_point_cloud_pml, -) - -# Print current working directory -print("Current working directory:", os.getcwd()) - -# Print the full path to the input file -input_pdb_filename = "openmmdl/tests/data/in/0_unk_hoh.pdb" -print("Full path to input file:", os.path.abspath(input_pdb_filename)) - -test_data_directory = Path("openmmdl/tests/data/in") - - -@pytest.fixture(scope="session") -def test_data_dir(tmp_path_factory): - data_dir = tmp_path_factory.mktemp("test_data") - return data_dir - - -# def test_script_execution(test_data_dir): -# # Define the root directory -# root_dir = "/home/runner/work/OpenMMDL/OpenMMDL" -# -# # Specify the relative path to the input data -# relative_input_path = "openmmdl/tests/data/in" -# -# # Combine the root directory and relative input path -# input_data_path = os.path.join(root_dir, relative_input_path) - -# # Ensure that the script runs successfully without errors -# script_path = "openmmdlanalysis.py" -# topology_file = os.path.join(input_data_path, "0_unk_hoh.pdb") -# trajectory_file = os.path.join(root_dir, "openmmdl/tests/data/in/all_50.dcd") -# ligand_sdf_file = os.path.join(input_data_path, "lig.sdf") -# ligand_name = "UNK" - -# cmd = f" openmmdl_analysis -t {topology_file} -d {trajectory_file} -l {ligand_sdf_file} -n {ligand_name} -b 10 -c 2" - -# result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=test_data_dir) - -# assert result.returncode == 0, f"Script execution failed with error:\n{result.stderr.decode()}" - -# # Check that expected output files are generated -# assert os.path.exists(os.path.join(test_data_dir, "complex.pdb")) -# assert os.path.exists(os.path.join(test_data_dir, "lig.pdb")) -# assert os.path.exists(os.path.join(test_data_dir, "df_all.csv")) - -# # Check for the presence of "Barcodes" folder -# barcodes_folder = os.path.join(test_data_dir, "Barcodes") -# assert os.path.exists(barcodes_folder), "The 'Barcodes' folder is missing." - -# # Check for the existence of "hydrophobic_barcodes.png" inside "Barcodes" -# hydro_file_path = os.path.join(barcodes_folder, "hydrophobic_barcodes.png") -# assert os.path.exists(hydro_file_path), "The 'hydrophobic_barcodes.png' file is missing inside 'Barcodes'." - - -# Check for the presence of "Binding_Modes_Markov_States" folder -# markov_states_folder = os.path.join(test_data_dir, "Binding_Modes_Markov_States") -# assert os.path.exists(markov_states_folder), "The 'Binding_Modes_Markov_States' folder is missing." - -# # Check for the existence of "all_binding_modes_arranged.png" inside "Binding_Modes_Markov_States" -# png_file_path = os.path.join(markov_states_folder, "all_binding_modes_arranged.png") -# assert os.path.exists(png_file_path), "The 'all_binding_modes_arranged.png' file is missing inside 'Binding_Modes_Markov_States'." - -# # Add more checks for other output files as needed diff --git a/openmmdl/tests/openmmdl_analysis/pml_writer_test.py b/openmmdl/tests/openmmdl_analysis/pml_writer_test.py deleted file mode 100644 index f68b0ce8..00000000 --- a/openmmdl/tests/openmmdl_analysis/pml_writer_test.py +++ /dev/null @@ -1,248 +0,0 @@ -import numpy as np -import pandas as pd -import re -import os -from pathlib import Path -import matplotlib.pyplot as plt -import xml.etree.ElementTree as ET -import pytest -from openmmdl.openmmdl_analysis.pml_writer import * - - -# pml_writer tests -@pytest.fixture -def sample_dataframe_generate_pharmacophore_centers(): - data = { - "Hydrophobic": [1, 1, 0, 1, 0], - "Ionic": [0, 1, 0, 0, 1], - "LIGCOO": [ - "(1.0, 2.0, 3.0)", - "(2.0, 3.0, 4.0)", - "(3.0, 4.0, 5.0)", - "(4.0, 5.0, 6.0)", - "(5.0, 6.0, 7.0)", - ], - } - df = pd.DataFrame(data) - return df - - -@pytest.fixture -def sample_interactions_generate_pharmacophore_centers(): - return ["Hydrophobic", "Ionic"] - - -def test_generate_pharmacophore_centers( - sample_dataframe_generate_pharmacophore_centers, - sample_interactions_generate_pharmacophore_centers, -): - result = generate_pharmacophore_centers( - sample_dataframe_generate_pharmacophore_centers, - sample_interactions_generate_pharmacophore_centers, - ) - - expected_pharmacophore = { - "Hydrophobic": [2.333, 3.333, 4.333], - "Ionic": [3.5, 4.5, 5.5], - } - - assert result == expected_pharmacophore - - -@pytest.fixture -def sample_dataframe_generate_pharmacophore_vectors(): - # Create a sample dataframe for testing - data = { - "HBDonors": [1, 0, 1, 0, 1], - "HBAcceptors": [0, 1, 0, 1, 0], - "LIGCOO": [ - "(1.0, 2.0, 3.0)", - "(2.0, 3.0, 4.0)", - "(3.0, 4.0, 5.0)", - "(4.0, 5.0, 6.0)", - "(5.0, 6.0, 7.0)", - ], - "PROTCOO": [ - "(0.5, 1.5, 2.5)", - "(1.5, 2.5, 3.5)", - "(2.5, 3.5, 4.5)", - "(3.5, 4.5, 5.5)", - "(4.5, 5.5, 6.5)", - ], - } - df = pd.DataFrame(data) - return df - - -@pytest.fixture -def sample_interactions_generate_pharmacophore_vectors(): - return ["HBDonors", "HBAcceptors"] - - -def test_generate_pharmacophore_vectors( - sample_dataframe_generate_pharmacophore_vectors, - sample_interactions_generate_pharmacophore_vectors, -): - result = generate_pharmacophore_vectors( - sample_dataframe_generate_pharmacophore_vectors, - sample_interactions_generate_pharmacophore_vectors, - ) - - expected_pharmacophore = { - "HBDonors": [[3.0, 4.0, 5.0], [2.5, 3.5, 4.5]], - "HBAcceptors": [[3.0, 4.0, 5.0], [2.5, 3.5, 4.5]], - } - - assert result == expected_pharmacophore - - -def test_generate_md_pharmacophore_cloudcenters(tmp_path): - # Sample data for the DataFrame - data = { - "Acceptor_hbond_1": [1, 0, 1, 0, 1], - "Donor_hbond_1": [0, 1, 0, 1, 0], - "pistacking_1": [1, 0, 0, 1, 1], - "hydrophobic_1": [0, 1, 0, 1, 0], - "PI_saltbridge_1": [1, 0, 1, 0, 1], - "NI_saltbridge_1": [0, 1, 0, 1, 0], - "LIGCOO": [ - "(1.0, 2.0, 3.0)", - "(2.0, 3.0, 4.0)", - "(3.0, 4.0, 5.0)", - "(4.0, 5.0, 6.0)", - "(5.0, 6.0, 7.0)", - ], - "PROTCOO": [ - "(7.0, 6.0, 5.0)", - "(6.0, 5.0, 4.0)", - "(5.0, 4.0, 3.0)", - "(4.0, 3.0, 2.0)", - "(3.0, 2.0, 1.0)", - ], - } - - df = pd.DataFrame(data) - - # Output file paths - output_filename = tmp_path / "test_output.pml" - - # Call the function - generate_md_pharmacophore_cloudcenters( - df, "core_compound", output_filename, "system_name", id_num=0 - ) - - # Check if the output file is created - assert os.path.isfile(output_filename), f"File {output_filename} not found." - - # Check if the generated XML is valid - try: - ET.parse(output_filename) - except ET.ParseError: - pytest.fail(f"Invalid XML in {output_filename}") - - -def test_generate_pharmacophore_centers_all_points(): - # Sample data for the DataFrame - data = { - "interaction1": [1, 0, 1, 0, 1], - "interaction2": [0, 1, 0, 1, 0], - "LIGCOO": [ - "(1.0, 2.0, 3.0)", - "(2.0, 3.0, 4.0)", - "(3.0, 4.0, 5.0)", - "(4.0, 5.0, 6.0)", - "(5.0, 6.0, 7.0)", - ], - } - - df = pd.DataFrame(data) - - # Sample interactions - interactions = ["interaction1", "interaction2"] - - # Call the function - pharmacophore = generate_pharmacophore_centers_all_points(df, interactions) - - # Check if the generated pharmacophore has the expected structure - assert isinstance(pharmacophore, dict), "Pharmacophore should be a dictionary." - - for interaction in interactions: - assert ( - interaction in pharmacophore - ), f"{interaction} not found in the generated pharmacophore." - - points = pharmacophore[interaction] - assert isinstance( - points, list - ), f"Pharmacophore points for {interaction} should be a list." - - # Check if the points have the expected structure - for point in points: - assert ( - isinstance(point, list) and len(point) == 3 - ), "Each point should be a list of three coordinates." - - -def test_generate_point_cloud_pml(tmp_path): - # Sample data for the cloud_dict - cloud_dict = { - "feature1": { - "interaction1": [(1.0, 2.0, 3.0), (1.5, 2.5, 3.5), (2.0, 3.0, 4.0)], - "interaction2": [(2.0, 3.0, 4.0), (2.5, 3.5, 4.5), (3.0, 4.0, 5.0)], - }, - "feature2": { - "interaction3": [(3.0, 4.0, 5.0), (3.5, 4.5, 5.5), (4.0, 5.0, 6.0)], - }, - } - - # Output file paths - outname = tmp_path / "test_output" - outname_pml = f"{outname}.pml" - - # Call the function - generate_point_cloud_pml(cloud_dict, "system_name", outname) - - # Check if the output file is created - assert os.path.isfile(outname_pml), f"File {outname_pml} not found." - - # Check if the generated XML is valid - try: - ET.parse(outname_pml) - except ET.ParseError: - pytest.fail(f"Invalid XML in {outname_pml}") - - -def test_generate_bindingmode_pharmacophore(tmp_path): - # Prepare inputs - dict_bindingmode = { - "Acceptor_hbond": {"PROTCOO": [[1, 2, 3]], "LIGCOO": [[4, 5, 6]]} - } - core_compound = "ligand" - sysname = "system" - id_num = 0 - - # Create a symbolic link in the temporary directory - os.symlink( - os.path.abspath("./Binding_Modes_Markov_States"), - f"{tmp_path}/Binding_Modes_Markov_States", - ) - - # Prepare the output filename - outname = "test_output" - - # Call the function - generate_bindingmode_pharmacophore( - dict_bindingmode, core_compound, sysname, outname, id_num - ) - - # Prepare the full output path - outname_pml = f"{tmp_path}/Binding_Modes_Markov_States/{outname}.pml" - - # Check if the output file is created - assert os.path.isfile(outname_pml), f"File {outname_pml} not found." - - # Check if the generated XML is valid - try: - ET.parse(outname_pml) - except ET.ParseError: - pytest.fail(f"Invalid XML in {outname_pml}") diff --git a/openmmdl/tests/openmmdl_analysis/test_find_stable_waters.py b/openmmdl/tests/openmmdl_analysis/test_find_stable_waters.py deleted file mode 100644 index 283bc74b..00000000 --- a/openmmdl/tests/openmmdl_analysis/test_find_stable_waters.py +++ /dev/null @@ -1,261 +0,0 @@ -import MDAnalysis as mda -import pytest -import pandas as pd -import re -import os -import shutil -from Bio.PDB.Structure import Structure -from Bio.PDB import PDBParser -from pathlib import Path -from unittest.mock import patch, mock_open -from openmmdl.openmmdl_analysis.find_stable_waters import ( - perform_clustering_and_writing, - stable_waters_pipeline, - trace_waters, - filter_and_parse_pdb, - write_pdb_clusters_and_representatives, - find_interacting_residues, - read_pdb_as_dataframe, - analyze_protein_and_water_interaction, -) - -# Fixtures and mock data setup - -test_data_directory = Path("openmmdl/tests/data/in") -topology_file = f"{test_data_directory}/metal_top.pdb" -trajectory_file = f"{test_data_directory}/metal_traj_25.dcd" -csv_file_path = f"{test_data_directory}/stable_waters.csv" -repwat_file_path = f"{test_data_directory}/representative_waters.pdb" -output_dirs = [] - - -def test_stable_waters_pipeline(): - water_eps_values = [0.5, 1.0, 2.0] # Example epsilon values - - for water_eps in water_eps_values: - output_directory = f"./test_output" - stable_waters_pipeline( - topology_file, trajectory_file, water_eps, output_directory - ) - - strEps = str(water_eps).replace(".", "") - output_directory = f"./test_output_clusterEps_{strEps}" - # Check if the expected output directory is created - assert os.path.isdir( - output_directory - ), f"Directory {output_directory} was not created" - output_dirs.append(output_directory) - - # Check if stable_waters.csv is created - csv_file = os.path.join(output_directory, "stable_waters.csv") - assert os.path.isfile(csv_file) - - # Load and verify the data in stable_waters.csv - stable_waters_df = pd.read_csv(csv_file) - assert not stable_waters_df.empty - assert set(stable_waters_df.columns) == { - "Frame", - "Residue", - "Oxygen_X", - "Oxygen_Y", - "Oxygen_Z", - } - - # Cleanup: remove created directories and files - for dir in output_dirs: - shutil.rmtree(dir) - - -def test_perform_clustering(): - # Load the stable_waters data from the CSV file - stable_waters_df = pd.read_csv(csv_file_path) - - # Define test parameters - cluster_eps = 2 - - u = mda.Universe(topology_file, trajectory_file) - # Get the total number of frames for the progress bar - total_frames = len(u.trajectory) - output_directory = "./test_output_clustering" - - # Run the function - perform_clustering_and_writing( - stable_waters_df, cluster_eps, total_frames, output_directory - ) - - # Define the regular expression pattern for matching the line - pattern = re.compile( - r"ATOM\s+\d+\s+O\s+WAT\s+A\s+\d+\s+(-?\d+\.\d+)\s+(-?\d+\.\d+)\s+(-?\d+\.\d+)\s+1\.00\s+0\.00\s+O" - ) - - # Assert subdirectory creation and file creation - percentage_values = [25, 50, 75, 90, 99] - for percent in percentage_values: - min_samples = int((percent / 100) * total_frames) - sub_directory = os.path.join(output_directory, f"clusterSize{min_samples}") - - assert os.path.isdir(sub_directory), f"Subdirectory for {percent}% not created" - - # Assuming the names of files created, adjust as necessary - expected_files = [ - "cluster_0.pdb", - "cluster_1.pdb", - ] # Replace with actual expected file names - for file_name in expected_files: - file_path = os.path.join(sub_directory, file_name) - assert os.path.isfile( - file_path - ), f"File {file_name} was not created in {sub_directory}" - - # Check the contents of the files - for file_name in expected_files: - file_path = os.path.join(sub_directory, file_name) - with open(file_path, "r") as file: - # Read file and search for the pattern - if not any(pattern.match(line) for line in file): - assert ( - False - ), f"File {file_name} does not contain the required line format" - - # Cleanup - shutil.rmtree(output_directory) - - -def test_write_pdb_clusters_and_representatives(): - # Mock data setup - data = { - "Oxygen_X": [1.0, 2.0, 3.0], - "Oxygen_Y": [4.0, 5.0, 6.0], - "Oxygen_Z": [7.0, 8.0, 9.0], - "Cluster_Label": [0, 0, 1], - } - clustered_waters = pd.DataFrame(data) - min_samples = 2 - output_sub_directory = "test_write_representatives" - - if os.path.exists(output_sub_directory): - shutil.rmtree(output_sub_directory) - os.makedirs(output_sub_directory, exist_ok=True) - - # Run the function - write_pdb_clusters_and_representatives( - clustered_waters, min_samples, output_sub_directory - ) - - # Assert file creation - unique_labels = clustered_waters["Cluster_Label"].unique() - for label in unique_labels: - filename = os.path.join(output_sub_directory, f"cluster_{label}.pdb") - assert os.path.isfile(filename), f"File {filename} not created" - - # Assert representative_waters.pdb creation and contents - rep_file = os.path.join(output_sub_directory, "representative_waters.pdb") - assert os.path.isfile(rep_file), "representative_waters.pdb not created" - - # Cleanup - shutil.rmtree(output_sub_directory) - - -def test_filter_and_parse_pdb(): - # Call the function with the sample PDB file - structure = filter_and_parse_pdb(topology_file) - - # Check if the returned object is a Structure - assert isinstance(structure, Structure), "The returned object is not a Structure" - - -def test_find_interacting_residues(): - representative_waters_file = test_data_directory / "representative_waters.pdb" - distance_threshold = 2.0 # Example threshold - - # Parse structure.pdb - parser = PDBParser(QUIET=True) - structure = parser.get_structure("protein", str(topology_file)) - - # Read representative_waters.pdb into a DataFrame - waters_data = [] - with open(representative_waters_file, "r") as file: - for line in file: - if line.startswith("ATOM"): - parts = line.split() - x, y, z = map(float, parts[5:8]) - waters_data.append([x, y, z]) - representative_waters = pd.DataFrame( - waters_data, columns=["Oxygen_X", "Oxygen_Y", "Oxygen_Z"] - ) - - # Run find_interacting_residues - interacting_residues = find_interacting_residues( - structure, representative_waters, distance_threshold - ) - - # Assert the results - assert isinstance(interacting_residues, dict) - # Example: Check if a specific water molecule interacts with any residues. We now fromthe test data that water 17 should interact. - assert 17 in interacting_residues - - -def test_read_pdb_as_dataframe(): - # Mock PDB file content - mock_pdb_content = ( - "ATOM 1 O WAT A 1 26.091 60.495 24.828 1.00 0.00 O\n" - "ATOM 2 O WAT A 2 30.000 50.000 40.000 1.00 0.00 O\n" - ) - - # Expected data - expected_data = [[26.091, 60.495, 24.828], [30.000, 50.000, 40.000]] - expected_df = pd.DataFrame( - expected_data, columns=["Oxygen_X", "Oxygen_Y", "Oxygen_Z"] - ) - - # Mock open function - with patch("builtins.open", mock_open(read_data=mock_pdb_content)): - # Call the function - result_df = read_pdb_as_dataframe("dummy_path.pdb") - - # Assert DataFrame content - pd.testing.assert_frame_equal(result_df, expected_df) - - -def test_analyze_protein_and_water_interaction(): - # Paths to the real PDB files - - protein_pdb_file = topology_file - representative_waters_file = ( - "representative_waters.pdb" # Assuming this is the correct name - ) - - # Setup output directory - cluster_eps = 1.0 # Example value, adjust as needed - strEps = str(cluster_eps).replace(".", "") - output_directory = Path("testprotwatint/output_clusterEps_" + strEps) - if output_directory.exists(): - shutil.rmtree(output_directory) - os.makedirs(output_directory, exist_ok=True) - - # Create subdirectories and copy representative_waters.pdb into each - mock_subdirectories = ["subdir1", "subdir2"] - for subdir in mock_subdirectories: - sub_path = output_directory / subdir - os.makedirs(sub_path, exist_ok=True) - shutil.copy(test_data_directory / representative_waters_file, sub_path) - - test_output_directory = Path("testprotwatint/output") - os.makedirs(test_output_directory, exist_ok=True) - # Run the function - analyze_protein_and_water_interaction( - str(protein_pdb_file), - representative_waters_file, - cluster_eps, - str(test_output_directory), - distance_threshold=5.0, - ) - - # Assert file creation in each subdirectory - for subdir in mock_subdirectories: - result_file = output_directory / subdir / "interacting_residues.csv" - assert result_file.is_file(), f"File {result_file} not created" - - # Cleanup - shutil.rmtree(output_directory) - shutil.rmtree(test_output_directory) diff --git a/openmmdl/tests/openmmdl_analysis/test_interaction_gathering.py b/openmmdl/tests/openmmdl_analysis/test_interaction_gathering.py deleted file mode 100644 index 889bf850..00000000 --- a/openmmdl/tests/openmmdl_analysis/test_interaction_gathering.py +++ /dev/null @@ -1,360 +0,0 @@ -import os -import pytest -import shutil -import tempfile -from pathlib import Path -import pandas as pd -import numpy as np -import mdtraj as md -import MDAnalysis as mda -import unittest -from unittest.mock import Mock, patch -from plip.structure.preparation import PDBComplex, LigandFinder, Mol, PLInteraction - -from openmmdl.openmmdl_analysis.interaction_gathering import * - - -test_data_directory = Path("openmmdl/tests/data/in") -topology_file = f"{test_data_directory}/complex.pdb" -frame_file = f"{test_data_directory}/processing_frame_1.pdb" -topology_metal = f"{test_data_directory}/metal_top.pdb" -trajetory_metal = f"{test_data_directory}/metal_traj_25.dcd" -ligand_special = f"{test_data_directory}/ligand_special.pdb" - -binding_site_id = "UNK:X:0" -lig_name = "UNK" -peptide = "X" - - -# Test the function -def test_characterize_complex(): - # Call the function - interaction_set = characterize_complex(topology_file, binding_site_id) - - # Check if the function returns a PLInteraction object - assert isinstance(interaction_set, PLInteraction) - - -def test_retrieve_plip_interactions(): - # Call the function - interactions = retrieve_plip_interactions(topology_file, lig_name) - - # Check if the function returns a dictionary - assert isinstance(interactions, dict) - - -def test_retrieve_plip_interactions_peptide(): - # Call the function - interactions = retrieve_plip_interactions_peptide(topology_file, peptide) - - # Check if the function returns a dictionary - assert isinstance(interactions, dict) - - -# Define test data -sample_interactions = { - "hydrophobic": [["Column1", "Column2"], [1, 2], [3, 4]], - "hbond": [["ColumnA", "ColumnB"], ["A", "B"], ["C", "D"]], -} - - -def test_create_df_from_binding_site(): - # Test with valid interaction type - df = create_df_from_binding_site( - sample_interactions, interaction_type="hydrophobic" - ) - assert isinstance(df, pd.DataFrame) - assert df.shape == (2, 2) - assert list(df.columns) == ["Column1", "Column2"] - - # Test with default interaction type - df_default = create_df_from_binding_site(sample_interactions) - assert isinstance(df_default, pd.DataFrame) - assert df_default.shape == (2, 2) - assert list(df_default.columns) == ["ColumnA", "ColumnB"] - - # Test with an invalid interaction type (should default to 'hbond') - df_invalid = create_df_from_binding_site( - sample_interactions, interaction_type="invalid_type" - ) - assert isinstance(df_invalid, pd.DataFrame) - assert df_invalid.shape == (2, 2) - assert list(df_invalid.columns) == ["ColumnA", "ColumnB"] - - -@pytest.fixture -def input_pdb_filename(tmp_path): - input_pdb_filename = tmp_path / "input.pdb" - - # Create a mock PDB file with 10 atoms - input_pdb_content = """ATOM 1 N UNK A 454 43.493 48.319 35.835 1.00 0.00 A N -ATOM 2 N1 UNK A 454 44.740 47.862 35.697 1.00 0.00 A N -ATOM 3 C14 UNK A 454 44.608 46.866 34.829 1.00 0.00 A C -ATOM 4 N2 UNK A 454 43.265 46.644 34.450 1.00 0.00 A N -ATOM 5 C7 UNK A 454 42.607 47.556 35.077 1.00 0.00 A C -ATOM 6 H5 UNK A 454 41.542 47.701 34.954 1.00 0.00 A H -ATOM 7 H10 UNK A 454 45.308 46.132 34.453 1.00 0.00 A H -ATOM 8 C UNK A 454 43.168 49.513 36.656 1.00 0.00 A C -ATOM 9 C2 UNK A 454 42.743 50.705 35.818 1.00 0.00 A C -ATOM 10 C4 UNK A 454 43.545 51.052 34.671 1.00 0.00 A C""" - - input_pdb_filename.write_text(input_pdb_content) - return input_pdb_filename - - -def test_change_lig_to_residue(): - topology_file = f"{test_data_directory}/complex.pdb" - shutil.copy(str(topology_file), ".") - topology_file = "complex.pdb" - - # Change ligand to residue - change_lig_to_residue(str(topology_file), "UNK", "NEW") - - # Read the output PDB file and check if residues are modified - with open(topology_file, "r") as output_file: - modified_lines = output_file.readlines() - assert any("NEW" in line for line in modified_lines) - assert all("UNK" not in line for line in modified_lines) - - -def test_process_frame_with_sample_data(): - # Define a sample frame number - frame_number = 1 - - destination_file = "processing_frame_1.pdb" - - shutil.copy(frame_file, destination_file) - - # Load the sample PDB file into an MDAnalysis Universe - sample_universe = mda.Universe(topology_file) - - # Call the process_frame function with the sample data - result = process_frame(frame_number, sample_universe, lig_name) - - # Define the expected columns you want to check - expected_columns = [ - "FRAME", - "INTERACTION", - ] # Add the specific columns you want to validate - - # Check if the result is a Pandas DataFrame - assert isinstance(result, pd.DataFrame) - - # Check if all expected columns are present in the result - for column in expected_columns: - assert column in result.columns - - -def test_process_frame_with_sample_data_special(): - # Define a sample frame number - frame_number = 1 - special = "HEM" - - destination_file = "processing_frame_1.pdb" - destination_file_complex = "complex.pdb" - - shutil.copy(frame_file, destination_file) - shutil.copy(str(ligand_special), ".") - shutil.copy(str(topology_metal), ".") - shutil.copy(topology_metal, destination_file_complex) - - # Load the sample PDB file into an MDAnalysis Universe - sample_universe = mda.Universe(topology_metal, trajetory_metal) - - # Call the process_frame function with the sample data for special ligand 'HEM' - result_special = process_frame( - frame_number, sample_universe, lig_name, special="HEM" - ) - - # Define the expected columns you want to check for special ligand 'HEM' - expected_columns_special = [ - "FRAME", - "INTERACTION", - "TARGET_IDX", - "RESTYPE", - "LOCATION", - ] # Add specific columns for special ligand 'HEM' - - # Check if the result is a Pandas DataFrame for special ligand 'HEM' - assert isinstance(result_special, pd.DataFrame) - - # Check if all expected columns are present in the result for special ligand 'HEM' - for column in expected_columns_special: - assert column in result_special.columns - - shutil.copy(topology_file, destination_file_complex) - - -def test_process_frame_with_sample_data_peptide(): - # Define a sample frame number - frame_number = 1 - - # Define paths and filenames - peptide_destination_file = f"processing_frame_1.pdb" - - # Copy the frame file to the destination file for testing purposes - shutil.copy(frame_file, peptide_destination_file) - - # Load the sample PDB file into an MDAnalysis Universe - sample_universe = mda.Universe(topology_file) - - # Call the process_frame function with the sample data for peptide - result_peptide = process_frame( - frame_number, sample_universe, lig_name, peptide="X", special=None - ) - - # Define the expected columns you want to check for peptide - expected_columns_peptide = [ - "FRAME", - "INTERACTION", - "TARGET_IDX", - ] # Add specific columns for peptide - - # Check if the result is a Pandas DataFrame for peptide - assert isinstance(result_peptide, pd.DataFrame) - - # Check if all expected columns are present in the result for peptide - for column in expected_columns_peptide: - assert column in result_peptide.columns - - -def test_process_trajectory(): - topology_file = f"{test_data_directory}/0_unk_hoh.pdb" - trajectory_file = f"{test_data_directory}/all_50.dcd" - pdb_md = mda.Universe(topology_file, trajectory_file) - dataframe = None - num_processes = 2 - lig_name = "UNK" - - interaction_list = pd.DataFrame( - columns=[ - "RESNR", - "RESTYPE", - "RESCHAIN", - "RESNR_LIG", - "RESTYPE_LIG", - "RESCHAIN_LIG", - "DIST", - "LIGCARBONIDX", - "PROTCARBONIDX", - "LIGCOO", - "PROTCOO", - ] - ) - - interaction_list = process_trajectory( - pdb_md, dataframe, num_processes, lig_name, special_ligand=None, peptide=None - ) - - assert interaction_list is not None - assert len(interaction_list) > 10 - - -def test_process_frame_special_with_files(): - test_data_directory = "openmmdl/tests/data/in" # Replace with the actual path to your test data directory - topology_metal = f"{test_data_directory}/metal_top.pdb" - trajetory_metal = f"{test_data_directory}/metal_traj_25.dcd" - - # Load PDB and DCD files using mdanalysis.Universe - import MDAnalysis as mda - - u = mda.Universe(topology_metal, trajetory_metal) - - lig_name = "UNK" # Replace with the actual ligand name - special = "HEM" # Replace with the actual special residue name - frame = 0 - - result = process_frame_special(frame, u, lig_name, special) - - assert isinstance(result, list) - assert all(isinstance(df, pd.DataFrame) for df in result) - - # Add more specific assertions based on the expected behavior of the function - # For example, check if the columns in the DataFrame are as expected, or if certain conditions hold - - # Clean up any temporary files created during the test - for frame in range(len(u.trajectory)): - temp_file = f"processing_frame_{frame}.pdb" - if os.path.exists(temp_file): - os.remove(temp_file) - - -def test_process_frame_wrapper(): - - test_data_directory = "openmmdl/tests/data/in" # Replace with the actual path to your test data directory - topology_metal = f"{test_data_directory}/metal_top.pdb" - trajetory_metal = f"{test_data_directory}/metal_traj_25.dcd" - ligand_special = f"{test_data_directory}/ligand_special.pdb" - shutil.copy(str(topology_metal), ".") - shutil.copy(str(trajetory_metal), ".") - shutil.copy(str(ligand_special), ".") - topology_metal = "metal_top.pdb" - trajetory_metal = "metal_traj_25.dcd" - - # Load PDB and DCD files using MDAnalysis - pdb_md = mda.Universe(topology_metal, trajetory_metal) - lig_name = "UNK" # Replace with the actual ligand name - special_ligand = "HEM" # Replace with the actual special ligand name - peptide = None # Replace with the actual peptide name - frame_idx = 2 - - args = (frame_idx, pdb_md, lig_name, special_ligand, peptide) - result = process_frame_wrapper(args) - - # Perform assertions based on the expected behavior of the process_frame_special function - assert isinstance(result, tuple) - assert len(result) == 2 - assert isinstance(result[0], int) - - -def test_fill_missing_frames(): - # Test Case 1: Basic functionality - data = {"FRAME": [1, 2, 4, 5], "Value1": ["A", "B", "C", "D"]} - df = pd.DataFrame(data) - md_len = 6 - filled_df = fill_missing_frames(df, md_len) - assert all(filled_df["FRAME"] == [1, 2, 3, 4, 5]) - assert all(filled_df.loc[filled_df["FRAME"] == 3, "Value1"] == "skip") - - # Test Case 4: No missing frames - no_missing_frames_data = { - "FRAME": [1, 2, 3, 4, 5, 6], - "Value1": ["A", "B", "C", "D", "E", "F"], - } - no_missing_frames_df = pd.DataFrame(no_missing_frames_data) - filled_no_missing_frames_df = fill_missing_frames(no_missing_frames_df, md_len=6) - assert all( - filled_no_missing_frames_df["FRAME"] == [1, 2, 3, 4, 5, 6] - ) # Should remain unchanged - - # Test Case 5: DataFrame with additional columns - data_with_extra_columns = { - "FRAME": [1, 2, 4, 5], - "Value1": ["A", "B", "C", "D"], - "Value2": [10, 20, 30, 40], - } - df_with_extra_columns = pd.DataFrame(data_with_extra_columns) - - # Ensure the original DataFrame has unique frame numbers - assert df_with_extra_columns["FRAME"].nunique() == len(df_with_extra_columns) - - filled_df_extra_columns = fill_missing_frames(df_with_extra_columns, md_len=6) - expected_frames = [1, 2, 3, 4, 5] - - # Debugging prints - print(f"Original DataFrame length: {len(df_with_extra_columns)}") - print(f"Filled DataFrame length: {len(filled_df_extra_columns)}") - print(f"Expected frames: {expected_frames}") - - # Assert that the resulting DataFrame has unique frame numbers - assert filled_df_extra_columns["FRAME"].nunique() == len(filled_df_extra_columns) - - # Assert that the resulting DataFrame has the expected frames - assert all(filled_df_extra_columns["FRAME"] == expected_frames) - - # Assert that the length of the resulting DataFrame is equal to the length of expected frames - assert len(filled_df_extra_columns) == len(expected_frames) - - -if __name__ == "__main": - pytest.main() diff --git a/openmmdl/tests/openmmdl_analysis/test_preprocessing.py b/openmmdl/tests/openmmdl_analysis/test_preprocessing.py deleted file mode 100644 index 16a39d39..00000000 --- a/openmmdl/tests/openmmdl_analysis/test_preprocessing.py +++ /dev/null @@ -1,229 +0,0 @@ -import os -import pytest -import tempfile -import shutil -from Bio import PDB -import numpy as np -import mdtraj as md -from pathlib import Path -import MDAnalysis as mda -from openmmdl.openmmdl_analysis.preprocessing import * - -pdb_file_path = "openmmdl/tests/data/in/0_unk_hoh.pdb" - -# Define test data paths -test_data_directory = Path("openmmdl/tests/data/in") -pdb_file = test_data_directory / "0_unk_hoh.pdb" -topology_metal = f"{test_data_directory}/metal_top.pdb" -ligand_resname = "UNK" - - -@pytest.fixture -def sample_pdb_data(): - # Provide sample PDB data for testing - return """ATOM 1 N UNK A 454 43.493 48.319 35.835 1.00 0.00 A N -ATOM 2 N1 UNK A 454 44.740 47.862 35.697 1.00 0.00 A N -ATOM 3 C14 UNK A 454 44.608 46.866 34.829 1.00 0.00 A C -ATOM 4 N2 UNK A 454 43.265 46.644 34.450 1.00 0.00 A N -ATOM 5 C7 UNK A 454 42.607 47.556 35.077 1.00 0.00 A C -ATOM 6 H5 UNK A 454 41.542 47.701 34.954 1.00 0.00 A H -ATOM 7 H10 UNK A 454 45.308 46.132 34.453 1.00 0.00 A H -ATOM 8 C UNK A 454 43.168 49.513 36.656 1.00 0.00 A C -ATOM 9 C2 UNK A 454 42.743 50.705 35.818 1.00 0.00 A C -ATOM 10 C4 UNK A 454 43.545 51.052 34.671 1.00 0.00 A C -ATOM 11 C9 UNK A 454 43.171 52.151 33.897 1.00 0.00 A C -ATOM 12 C13 UNK A 454 42.090 52.924 34.222 1.00 0.00 A C -ATOM 13 C11 UNK A 454 41.393 52.671 35.378 1.00 0.00 A C -ATOM 14 C6 UNK A 454 41.793 51.635 36.268 1.00 0.00 A C -ATOM 15 H4 UNK A 454 41.220 51.358 37.148 1.00 0.00 A H -ATOM 16 H9 UNK A 454 40.518 53.291 35.552 1.00 0.00 A H -ATOM 17 C16 UNK A 454 41.790 54.079 33.432 1.00 0.00 A C -ATOM 18 N4 UNK A 454 41.594 54.934 32.652 1.00 0.00 A N -ATOM 19 H7 UNK A 454 43.694 52.248 32.951 1.00 0.00 A H -ATOM 20 H2 UNK A 454 44.333 50.369 34.369 1.00 0.00 A H -ATOM 21 H UNK A 454 44.108 49.790 37.148 1.00 0.00 A H -ATOM 22 C1 UNK A 454 42.146 49.054 37.737 1.00 0.00 A C -ATOM 23 C5 UNK A 454 42.675 48.761 39.003 1.00 0.00 A C -ATOM 24 C10 UNK A 454 41.859 48.278 39.998 1.00 0.00 A C -ATOM 25 H8 UNK A 454 42.284 48.099 40.981 1.00 0.00 A H -ATOM 26 H3 UNK A 454 43.752 48.806 39.135 1.00 0.00 A H -ATOM 27 C3 UNK A 454 40.774 48.885 37.463 1.00 0.00 A C -ATOM 28 H1 UNK A 454 40.310 49.079 36.500 1.00 0.00 A H -ATOM 29 C8 UNK A 454 39.907 48.435 38.509 1.00 0.00 A C -ATOM 30 H6 UNK A 454 38.833 48.310 38.406 1.00 0.00 A H -ATOM 31 C12 UNK A 454 40.466 48.125 39.823 1.00 0.00 A C -ATOM 32 C15 UNK A 454 39.627 47.605 40.833 1.00 0.00 A C -ATOM 33 N3 UNK A 454 38.981 47.235 41.740 1.00 0.00 A N """ - - -@pytest.fixture -def input_pdb_filename(tmp_path): - input_pdb_filename = tmp_path / "input.pdb" - with open(input_pdb_filename, "w") as f: - f.write( - """ATOM 1 N SPC A 101 43.493 48.319 35.835 1.00 0.00 A N -ATOM 2 O TIP3 A 102 44.740 47.862 35.697 1.00 0.00 A O -ATOM 3 C * A 103 44.608 46.866 34.829 1.00 0.00 A C -ATOM 4 H * A 104 43.265 46.644 34.450 1.00 0.00 A H -ATOM 5 O WAT A 105 42.607 47.556 35.077 1.00 0.00 A O -ATOM 6 H1 SPC A 106 41.542 47.701 34.954 1.00 0.00 A H -ATOM 7 H2 * A 107 45.308 46.132 34.453 1.00 0.00 A H -ATOM 8 C T3P A 108 43.168 49.513 36.656 1.00 0.00 A C -ATOM 9 O T4P A 109 42.743 50.705 35.818 1.00 0.00 A O -ATOM 10 H T5P A 110 43.545 51.052 34.671 1.00 0.00 A H -ATOM 11 N * A 111 43.171 52.151 33.897 1.00 0.00 A N -ATOM 12 C SPC A 112 42.090 52.924 34.222 1.00 0.00 A C -ATOM 13 O * A 113 41.393 52.671 35.378 1.00 0.00 A O -ATOM 14 C TIP4 A 114 41.793 51.635 36.268 1.00 0.00 A C -ATOM 15 O * A 115 41.220 51.358 37.148 1.00 0.00 A O -ATOM 16 H * A 116 40.518 53.291 35.552 1.00 0.00 A H -ATOM 17 C * A 117 41.790 54.079 33.432 1.00 0.00 A C -ATOM 18 N * A 118 41.594 54.934 32.652 1.00 0.00 A N -ATOM 19 H * A 119 43.694 52.248 32.951 1.00 0.00 A H -ATOM 20 H * A 120 44.333 50.369 34.369 1.00 0.00 A H -ATOM 21 H * A 121 44.108 49.790 37.148 1.00 0.00 A H -ATOM 22 C * A 122 42.146 49.054 37.737 1.00 0.00 A C -ATOM 23 C * A 123 42.675 48.761 39.003 1.00 0.00 A C -ATOM 24 C * A 124 41.859 48.278 39.998 1.00 0.00 A C """ - ) - - -def test_process_pdb_file(): - # Define the input and output file paths - original_cwd = Path(os.getcwd()) - input_pdb_filename = test_data_directory / "0_unk_hoh.pdb" - - shutil.copy(str(input_pdb_filename), ".") - - # Process the provided PDB file - process_pdb_file(input_pdb_filename) - - # Read the modified output PDB file - with open(input_pdb_filename, "r") as f: - modified_data = f.read() - - # Check if the modified data contains the expected residues - assert "HOH" in modified_data - assert "UNK" in modified_data - - -def test_renumber_atoms_in_residues(sample_pdb_data, tmp_path): - input_pdb_filename = tmp_path / "input.pdb" - output_pdb_filename = tmp_path / "output.pdb" - - # Create a mock PDB file - input_pdb_filename.write_text( - """ATOM 1 N UNK A 454 43.493 48.319 35.835 1.00 0.00 A N -ATOM 2 N1 UNK A 454 44.740 47.862 35.697 1.00 0.00 A N -ATOM 3 C14 UNK A 454 44.608 46.866 34.829 1.00 0.00 A C -ATOM 4 N2 UNK A 454 43.265 46.644 34.450 1.00 0.00 A N -ATOM 5 C7 UNK A 454 42.607 47.556 35.077 1.00 0.00 A C -ATOM 6 H5 UNK A 454 41.542 47.701 34.954 1.00 0.00 A H -ATOM 7 H10 UNK A 454 45.308 46.132 34.453 1.00 0.00 A H -ATOM 8 C UNK A 454 43.168 49.513 36.656 1.00 0.00 A C -ATOM 9 C2 UNK A 454 42.743 50.705 35.818 1.00 0.00 A C -ATOM 10 C4 UNK A 454 43.545 51.052 34.671 1.00 0.00 A C -ATOM 11 C9 UNK A 454 43.171 52.151 33.897 1.00 0.00 A C -ATOM 12 C13 UNK A 454 42.090 52.924 34.222 1.00 0.00 A C -ATOM 13 C11 UNK A 454 41.393 52.671 35.378 1.00 0.00 A C -ATOM 14 C6 UNK A 454 41.793 51.635 36.268 1.00 0.00 A C -ATOM 15 H4 UNK A 454 41.220 51.358 37.148 1.00 0.00 A H -ATOM 16 H9 UNK A 454 40.518 53.291 35.552 1.00 0.00 A H -ATOM 17 C16 UNK A 454 41.790 54.079 33.432 1.00 0.00 A C -ATOM 18 N4 UNK A 454 41.594 54.934 32.652 1.00 0.00 A N -ATOM 19 H7 UNK A 454 43.694 52.248 32.951 1.00 0.00 A H -ATOM 20 H2 UNK A 454 44.333 50.369 34.369 1.00 0.00 A H -ATOM 21 H UNK A 454 44.108 49.790 37.148 1.00 0.00 A H -ATOM 22 C1 UNK A 454 42.146 49.054 37.737 1.00 0.00 A C -ATOM 23 C5 UNK A 454 42.675 48.761 39.003 1.00 0.00 A C -ATOM 24 C10 UNK A 454 41.859 48.278 39.998 1.00 0.00 A C -ATOM 25 H8 UNK A 454 42.284 48.099 40.981 1.00 0.00 A H -ATOM 26 H3 UNK A 454 43.752 48.806 39.135 1.00 0.00 A H -ATOM 27 C3 UNK A 454 40.774 48.885 37.463 1.00 0.00 A C -ATOM 28 H1 UNK A 454 40.310 49.079 36.500 1.00 0.00 A H -ATOM 29 C8 UNK A 454 39.907 48.435 38.509 1.00 0.00 A C -ATOM 30 H6 UNK A 454 38.833 48.310 38.406 1.00 0.00 A H -ATOM 31 C12 UNK A 454 40.466 48.125 39.823 1.00 0.00 A C -ATOM 32 C15 UNK A 454 39.627 47.605 40.833 1.00 0.00 A C -ATOM 33 N3 UNK A 454 38.981 47.235 41.740 1.00 0.00 A N""" - ) - - renumber_atoms_in_residues(str(input_pdb_filename), str(output_pdb_filename), "UNK") - assert output_pdb_filename.exists() - - -@pytest.fixture -def sample_pdb_info(): - return """ -ATOM 741 N UNK A 454 43.056 48.258 36.260 1.00 0.00 LIG X -ATOM 742 N1 UNK A 454 44.324 47.906 35.996 1.00 0.00 LIG X -ATOM 743 C14 UNK A 454 44.132 46.990 35.061 1.00 0.00 LIG X - """ - - -def test_process_pdb(sample_pdb_info): - with tempfile.NamedTemporaryFile(mode="w+", delete=False) as temp_file: - temp_filename = temp_file.name - temp_file.write(sample_pdb_info) - - print("Temp Data:") - print(temp_filename) - output_filename = "output_pdb_test.pdb" - process_pdb(temp_filename, output_filename) - - with open(output_filename, "r") as f: - modified_data = f.read() - - print("Modified Data:") - print(modified_data) - - assert " LIG N" in modified_data - assert " LIG C" in modified_data - assert " LIG X" not in modified_data - - # Clean up temporary and output files - os.remove(temp_filename) - os.remove(output_filename) - - -def test_extract_and_save_ligand_as_sdf(): - input_pdb_filename = topology_metal - output_filename = "ligand_changed.sdf" - target_resname = ligand_resname - - extract_and_save_ligand_as_sdf(input_pdb_filename, output_filename, target_resname) - - assert output_filename is not None - os.remove("ligand_changed.sdf") - - -test_data_directory = Path("openmmdl/tests/data/in") -TEST_LIGAND_FILE = f"{test_data_directory}/CVV.sdf" -TEST_OUTPUT_FILE = "CVV.smi" - - -def test_increase_ring_indices(): - # Test case 1: Check if ring indices are correctly increased - ring = [1, 2, 3] - lig_index = 10 - result = increase_ring_indices(ring, lig_index) - assert result == [11, 12, 13] - - # Test case 2: Check with a different lig_index - ring = [3, 4, 5] - lig_index = 20 - result = increase_ring_indices(ring, lig_index) - assert result == [23, 24, 25] - - -def test_convert_ligand_to_smiles(): - # Convert the ligand structure to SMILES in the same directory as the input SDF file - convert_ligand_to_smiles(TEST_LIGAND_FILE, TEST_OUTPUT_FILE) - - # Verify that the output SMILES file was created in the same directory as the input file - assert os.path.exists(TEST_OUTPUT_FILE) - - # Optionally, you can also read and validate the content of the output SMILES file - with open(TEST_OUTPUT_FILE, "r") as smi_file: - smiles_lines = smi_file.readlines() - assert len(smiles_lines) > 0 # Check that there are SMILES representations diff --git a/openmmdl/tests/openmmdl_analysis/test_rdkit_figure_generation.py b/openmmdl/tests/openmmdl_analysis/test_rdkit_figure_generation.py deleted file mode 100644 index 574f8a7f..00000000 --- a/openmmdl/tests/openmmdl_analysis/test_rdkit_figure_generation.py +++ /dev/null @@ -1,288 +0,0 @@ -import pytest -import os -import time -import shutil -from PIL import Image -from pathlib import Path -from openmmdl.openmmdl_analysis.rdkit_figure_generation import * - -test_data_directory = Path( - "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation" -) -test_data_directory_files = Path("openmmdl/tests/data/in") -lig_no_h = test_data_directory_files / "lig_no_h.pdb" -complex = test_data_directory_files / "complex.pdb" -smi_file = test_data_directory_files / "lig_no_h.smi" -current_directory = os.getcwd() -output_path = "all_binding_modes_arranged.png" - -shutil.copy(str(lig_no_h), ".") -shutil.copy(str(complex), ".") - - -@pytest.mark.parametrize( - "input_data, expected_output", - [ - ( - ["60GLUA_4206_4207_4216_4217_4218_4205_hydrophobic"], - ["60GLUA 4206 4207 4216 4217 4218 4205 hydrophobic"], - ), - (["165ASPA_4203_Acceptor_hbond"], ["165ASPA 4203 Acceptor hbond"]), - (["125TYRA_4192_Acceptor_waterbridge"], ["125TYRA 4192 Acceptor waterbridge"]), - ], -) -def test_split_interaction_data(input_data, expected_output): - result = split_interaction_data(input_data) - assert result == expected_output - - -def test_highlight_numbers(): - # Input data - split_data = [ - "163GLYA 4202 Acceptor hbond", - "165ASPA 4203 Donor hbond", - "165ASPA 4222 Donor hbond", - "165ASPA 4203 Acceptor hbond", - "125TYRA 4192 Acceptor waterbridge", - "165ASPA 4222 Donor waterbridge", - "161PHEA 4211 4212 4213 4214 4215 4210 hydrophobic", - "59ARGA 4205 4206 4207 4216 4217 4218 Aromatic pication", - "155PHEA 4205 4206 4207 4216 4217 4218 pistacking", - "59ARGA 4194 F halogen", - "166ARGA 4202,4203 Carboxylate NI saltbridge", - "165ASPA 4202 Amine PI saltbridge", - "HEM 4202 FE 4 metal", - ] - - starting_idx = 1 # Updated starting index - - result = highlight_numbers(split_data, starting_idx) - - ( - highlighted_hbond_donor, - highlighted_hbond_acceptor, - highlighted_hbond_both, - highlighted_hydrophobic, - highlighted_waterbridge, - highlighted_pistacking, - highlighted_halogen, - highlighted_ni, - highlighted_pi, - highlighted_pication, - highlighted_metal, - ) = result - - assert highlighted_hbond_donor is not None - assert highlighted_hbond_acceptor is not None - assert highlighted_hbond_both is not None - assert highlighted_hydrophobic is not None - assert highlighted_waterbridge is not None - assert highlighted_halogen is not None - assert highlighted_ni is not None - assert highlighted_pi is not None and len(highlighted_pi) > 0 - assert highlighted_pication is not None - assert highlighted_metal is not None - - -def test_update_dict(): - # Test case 1: Check if the target dictionary is updated correctly - target_dict = {1: "1", 2: "2"} - source_dict = {3: "3", 4: "4"} - update_dict(target_dict, source_dict) - assert target_dict == {1: "1", 2: "2", 3: "3", 4: "4"} - - # Test case 2: Check if the function handles multiple source dictionaries - target_dict = {} - source_dict1 = {1: "1"} - source_dict2 = {2: "2", 3: "3"} - update_dict(target_dict, source_dict1, source_dict2) - assert target_dict == {1: "1", 2: "2", 3: "3"} - - # Test case 3: Check if the function handles empty source dictionaries - target_dict = {1: "1", 2: "2"} - update_dict(target_dict) # No source dictionaries provided - assert target_dict == {1: "1", 2: "2"} - - -def test_generate_interaction_dict(): - # Test with a known interaction type 'hydrophobic' - interaction_type = "hydrophobic" - keys = [1, 2, 3] - expected_result = {1: (1.0, 1.0, 0.0), 2: (1.0, 1.0, 0.0), 3: (1.0, 1.0, 0.0)} - result = generate_interaction_dict(interaction_type, keys) - assert result == expected_result - - -def test_create_and_merge_images_with_split_data(): - # Define test data - binding_mode = "Binding_Mode_1" - occurrence_percent = 92 - split_data = [ - "166ARGA 4220,4221 Carboxylate NI saltbridge", - "161PHEA 4221 Acceptor hbond", - "207ILEA 4205 4206 4207 4208 4209 4204 hydrophobic", - ] - merged_image_paths = [] - - # Define source image paths - source_image_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_1.png" - source_svg_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_1.svg" - source_merged_image_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_1_merged.png" - - # Copy source image files to the working directory - working_directory = os.getcwd() - destination_image_path = os.path.join( - working_directory, os.path.basename(source_image_path) - ) - destination_svg_path = os.path.join( - working_directory, os.path.basename(source_svg_path) - ) - destination_merged_image_path = os.path.join( - working_directory, os.path.basename(source_merged_image_path) - ) - shutil.copy(source_image_path, destination_image_path) - shutil.copy(source_svg_path, destination_svg_path) - shutil.copy(source_merged_image_path, destination_merged_image_path) - - # Print the current files in the working directory for debugging - files_in_working_directory = os.listdir(working_directory) - print("Files in Working Directory before:", files_in_working_directory) - - # Run the function - merged_image_paths = create_and_merge_images( - binding_mode, occurrence_percent, split_data, merged_image_paths - ) - - # Print the current files in the working directory for debugging - files_in_working_directory = os.listdir(working_directory) - print("Files in Working Directory after:", files_in_working_directory) - - # Check if the merged image file was created - assert len(merged_image_paths) == 1 - - # Check if the merged image file is a valid image - merged_image_path = merged_image_paths[0] - try: - with Image.open(merged_image_path) as img: - img.verify() - except Exception as e: - pytest.fail(f"Merged image file is not a valid image: {e}") - - -def test_max_width_and_height_calculation(): - # Create some example images with different sizes - image1 = Image.new("RGB", (100, 200), (255, 255, 255)) - image2 = Image.new("RGB", (150, 250), (255, 255, 255)) - merged_images = [image1, image2] - - # Calculate the maximum width and height - max_width = max(image.size[0] for image in merged_images) - max_height = max(image.size[1] for image in merged_images) - - # Assert the calculated max_width and max_height - assert max_width == 150 - assert max_height == 250 - - -def test_big_figure_creation(): - # Create example merged images - image1 = Image.new("RGB", (100, 200), (255, 255, 255)) - image2 = Image.new("RGB", (150, 250), (255, 255, 255)) - merged_images = [image1, image2] - - # Calculate the maximum width and height - max_width = max(image.size[0] for image in merged_images) - max_height = max(image.size[1] for image in merged_images) - - # Determine the number of images per row (in your case, 2 images per row) - images_per_row = 2 - - # Calculate the number of rows and columns required - num_rows = (len(merged_images) + images_per_row - 1) // images_per_row - total_width = max_width * images_per_row - total_height = max_height * num_rows - - # Create a new image with the calculated width and height - big_figure = Image.new( - "RGB", (total_width, total_height), (255, 255, 255) - ) # Set background to white - - # Assert the dimensions of the created big_figure - assert big_figure.size == (300, 250) # Width should be 300, height should be 250 - - -def test_arranged_figure_generation(): - binding_mode1_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_1_merged.png" - binding_mode2_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_2_merged.png" - all_modes_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/all_binding_modes_arranged.png" - working_directory = os.getcwd() - - # Print the working directory to verify it's as expected - print("Working Directory:", working_directory) - - destination_path_1 = os.path.join( - working_directory, os.path.basename(binding_mode1_path) - ) - destination_path_2 = os.path.join( - working_directory, os.path.basename(binding_mode2_path) - ) - destination_path_all = os.path.join( - working_directory, os.path.basename(all_modes_path) - ) - - # Print the destination paths to verify they are constructed correctly - print("Destination Path 1:", destination_path_1) - print("Destination Path 2:", destination_path_2) - print("Destination Path All:", destination_path_all) - - shutil.copy(binding_mode1_path, destination_path_1) - shutil.copy(binding_mode2_path, destination_path_2) - shutil.copy(all_modes_path, destination_path_all) - - merged_image_paths = ["Binding_Mode_1_merged.png", "Binding_Mode_2_merged.png"] - output_path = "all_binding_modes_arranged.png" - output_path = os.path.join(working_directory, output_path) - print(output_path) - - # Run the function - arranged_figure_generation(merged_image_paths, output_path) - print(output_path) - - # Print the current files in the working directory for debugging - files_in_working_directory = os.listdir(working_directory) - print("Files in Working Directory:", files_in_working_directory) - - output_path = os.path.join( - working_directory, - "Binding_Modes_Markov_States", - "all_binding_modes_arranged.png", - ) - print(output_path) - - # Check if the output file was created - - assert output_path is not None - - -output_image_file = "output_image.png" - -# Copy the files to the current folder -shutil.copy(complex, Path.cwd()) -shutil.copy(lig_no_h, Path.cwd()) -shutil.copy(smi_file, Path.cwd()) - - -# Test the generate_ligand_image function -def test_generate_ligand_image(): - ligand_name = "UNK" - generate_ligand_image( - ligand_name, "complex.pdb", "lig_no_h.pdb", "lig_no_h.smi", output_image_file - ) - - # Assert that the output image file exists - assert os.path.exists(output_image_file) - - -# Run the tests -if __name__ == "__main__": - pytest.main() diff --git a/openmmdl/tests/openmmdl_analysis/test_rmsd_calculation.py b/openmmdl/tests/openmmdl_analysis/test_rmsd_calculation.py deleted file mode 100644 index 3bae0d7d..00000000 --- a/openmmdl/tests/openmmdl_analysis/test_rmsd_calculation.py +++ /dev/null @@ -1,71 +0,0 @@ -import os -import pytest -import contextlib -from pathlib import Path -import pandas as pd -import numpy as np -import mdtraj as md - -from openmmdl.openmmdl_analysis.rmsd_calculation import ( - rmsd_for_atomgroups, - RMSD_dist_frames, -) - -test_data_directory = Path("openmmdl/tests/data/in") -topology_file = f"{test_data_directory}/0_unk_hoh.pdb" -trajectory_file = f"{test_data_directory}/all_50.dcd" -fig_type = "png" -selection1 = "protein" -selection2 = ("resname UNK", "") -ligand_name = "UNK" - - -def test_rmsd_for_atomgroups(): - - # Call the function - rmsd_df = rmsd_for_atomgroups( - topology_file, trajectory_file, fig_type, selection1, selection2 - ) - - # Check if the output DataFrame has the correct structure - assert isinstance(rmsd_df, pd.DataFrame) - assert rmsd_df.index.name == "frame" - - # Define file paths - csv_path = os.path.join("RMSD", "RMSD_over_time.csv") - plot_path = os.path.join("RMSD", "RMSD_over_time.png") - - print("Checking CSV file:", csv_path) - # Check if the CSV file exists - assert os.path.exists(csv_path), f"CSV file does not exist at {csv_path}" - - print("Checking plot file:", plot_path) - # Check if the plot file exists - assert os.path.exists(plot_path), f"Plot file does not exist at {plot_path}" - - # Cleanup created files after the test - os.remove(csv_path) - os.remove(plot_path) - - -def test_rmsd_dist_frames(): - - # Call the function - pairwise_rmsd_prot, pairwise_rmsd_lig = RMSD_dist_frames( - topology_file, trajectory_file, fig_type, ligand_name - ) - - # Check if the function returns numpy arrays for pairwise RMSD - assert isinstance(pairwise_rmsd_prot, np.ndarray) - assert isinstance(pairwise_rmsd_lig, np.ndarray) - - # Define file paths - plot_path = "./RMSD/RMSD_between_the_frames.png" - - print("Checking plot file:", plot_path) - # Check if the plot file exists - assert os.path.exists(plot_path), f"Plot file does not exist at {plot_path}" - - # Cleanup created files after the test - with contextlib.suppress(FileNotFoundError): - os.remove(plot_path) diff --git a/openmmdl/tests/openmmdl_analysis/visualization_functions_test.py b/openmmdl/tests/openmmdl_analysis/visualization_functions_test.py deleted file mode 100644 index 0b5e2aec..00000000 --- a/openmmdl/tests/openmmdl_analysis/visualization_functions_test.py +++ /dev/null @@ -1,239 +0,0 @@ -import numpy as np -import pandas as pd -import re -import shutil -import subprocess -import os -from pathlib import Path -import matplotlib.pyplot as plt -from unittest.mock import patch, Mock -import pytest -from openmmdl.openmmdl_analysis.visualization_functions import * - -test_data_directory_files = Path("openmmdl/tests/data/in") -clouds = test_data_directory_files / "clouds.json" -waters_pdb = test_data_directory_files / "interacting_waters.pdb" -waters_dcd = test_data_directory_files / "interacting_waters.dcd" -waters_pkl = test_data_directory_files / "interacting_waters.pkl" - - -# visualization_functions tests -@pytest.fixture -def sample_dataframe_interacting_water_ids(): - data = { - "Interaction1": [0, 1, 0, 1, 0], - "Interaction2": [1, 0, 0, 0, 1], - "WATER_IDX": [101, 102, None, 104, 105], - "FRAME": [1, 2, 3, 4, 5], - } - df_all = pd.DataFrame(data) - return df_all - - -def test_interacting_water_ids(sample_dataframe_interacting_water_ids): - waterbridge_interactions = ["Interaction1", "Interaction2"] - - result = interacting_water_ids( - sample_dataframe_interacting_water_ids, waterbridge_interactions - ) - - expected_interacting_waters = [101, 102, 104, 105] - - assert sorted(result) == sorted(expected_interacting_waters) - - -@pytest.fixture -def sample_dataframe_cloud_json_generation(): - data = { - "LIGCOO": [ - "(1.0, 2.0, 3.0)", - "(4.0, 5.0, 6.0)", - "(7.0, 8.0, 9.0)", - ], - "INTERACTION": [ - "hydrophobic", - "acceptor", - "donor", - ], - "PROTISDON": [ - "False", - "True", - "False", - ], - "PROTISPOS": [ - "False", - "False", - "True", - ], - } - df_all = pd.DataFrame(data) - return df_all - - -def test_run_visualization(): - # Set up the paths - package_path = Path("openmmdl/openmmdl_analysis") - notebook_path = package_path / "visualization.ipynb" - - # Run the visualization function - # run_visualization() - - # Check if the notebook was copied to the current directory with the correct name - copied_notebook_path = os.path.join(os.getcwd(), "visualization.ipynb") - shutil.copy(str(notebook_path), ".") - new_notebook_path = "visualization.ipynb" - assert os.path.isfile(copied_notebook_path) - - # Check if the content of the copied notebook is the same as the original notebook - with open(new_notebook_path, "r") as copied_notebook: - with open(notebook_path, "r") as original_notebook: - assert copied_notebook.read() == original_notebook.read() - - -@pytest.fixture -def sample_dataframe(): - # Create a sample dataframe for testing - data = { - "LIGCOO": [ - "(1.0, 2.0, 3.0)", - "(4.0, 5.0, 6.0)", - "(13.0, 14.0, 15.0)", - "(16.0, 17.0, 18.0)", - "(19.0, 20.0, 21.0)", - ], - "INTERACTION": ["hydrophobic", "acceptor", "donor", "pistacking", "pication"], - "PROTISDON": ["False", "True", "True", "False", "True"], - "PROTISPOS": ["False", "True", "False", "False", "False"], - "TARGETCOO": [ - "(7.0, 8.0, 9.0)", - "(10.0, 11.0, 12.0)", - "(22.0, 23.0, 24.0)", - "(25.0, 26.0, 27.0)", - "(28.0, 29.0, 30.0)", - ], - } - - # Extend the existing dataframe with examples for additional interactions - additional_data = [ - ("(31.0, 32.0, 33.0)", "waterbridge", "True", "False", "(34.0, 35.0, 36.0)"), - ( - "(37.0, 38.0, 39.0)", - "negative_ionizable", - "False", - "True", - "(40.0, 41.0, 42.0)", - ), - ( - "(43.0, 44.0, 45.0)", - "positive_ionizable", - "False", - "True", - "(46.0, 47.0, 48.0)", - ), - ("(49.0, 50.0, 51.0)", "halogen", "False", "False", "(52.0, 53.0, 54.0)"), - ("(55.0, 56.0, 57.0)", "metal", "False", "False", "(58.0, 59.0, 60.0)"), - ("(61.0, 62.0, 63.0)", "hydrophobic", "False", "False", "(64.0, 65.0, 66.0)"), - ("(61.0, 62.0, 63.0)", "saltbridge", "False", "True", "(64.0, 65.0, 66.0)"), - ("(61.0, 62.0, 63.0)", "saltbridge", "False", "False", "(64.0, 65.0, 66.0)"), - ("(67.0, 68.0, 69.0)", "donor", "True", "False", "(70.0, 71.0, 72.0)"), - ("(73.0, 74.0, 75.0)", "acceptor", "False", "False", "(76.0, 77.0, 78.0)"), - ( - "(79.0, 80.0, 81.0)", - "negative_ionizable", - "False", - "True", - "(82.0, 83.0, 84.0)", - ), - ] - - for row in additional_data: - data["LIGCOO"].append(row[0]) - data["INTERACTION"].append(row[1]) - data["PROTISDON"].append(row[2]) - data["PROTISPOS"].append(row[3]) - data["TARGETCOO"].append(row[4]) - - return pd.DataFrame(data) - - -def test_cloud_json_generation(sample_dataframe): - result = cloud_json_generation(sample_dataframe) - - assert "hydrophobic" in result - assert "acceptor" in result - assert "donor" in result - assert "waterbridge" in result - assert "negative_ionizable" in result - assert "positive_ionizable" in result - assert "pistacking" in result - assert "pication" in result - assert "halogen" in result - assert "metal" in result - - # Add more specific assertions based on your expectations for the output - # For example, you might want to check the structure of the generated dictionary - assert isinstance(result["hydrophobic"], dict) - assert "coordinates" in result["hydrophobic"] - assert "color" in result["hydrophobic"] - assert "radius" in result["hydrophobic"] - - # Add more tests based on your specific requirements and expected results - - -@pytest.fixture -def input_paths(): - test_data_directory = Path("openmmdl/tests/data/in") - topology_file = f"{test_data_directory}/metal_top.pdb" - frame_file = f"{test_data_directory}/processing_frame_1.pdb" - topology_metal = f"{test_data_directory}/metal_top.pdb" - trajetory_metal = f"{test_data_directory}/metal_traj_25.dcd" - return topology_metal, trajetory_metal - - -def test_save_interacting_waters_trajectory(input_paths): - topology_metal, trajetory_metal = input_paths - interacting_waters = [588, 733, 1555, 2000, 1266] - ligname = "UNK" - special = "HEM" - outputpath = "./" - - save_interacting_waters_trajectory( - topology_metal, - trajetory_metal, - interacting_waters, - ligname, - special, - outputpath, - ) - - interacting_water_pdb = "interacting_waters.pdb" - interacting_water_dcd = "interacting_waters.dcd" - assert interacting_water_pdb is not None - assert interacting_water_dcd is not None - assert os.path.exists(f"{outputpath}interacting_waters.pdb") - assert os.path.exists(f"{outputpath}interacting_waters.dcd") - - # Add additional assertions or checks as needed - # For example, you can use MDAnalysis to check if the saved files contain the expected number of atoms. - - # Cleanup: Remove the created files after the test - os.remove(f"{outputpath}interacting_waters.pdb") - os.remove(f"{outputpath}interacting_waters.dcd") - - -def test_visualization(): - shutil.copy(str(clouds), ".") - shutil.copy(str(waters_pdb), ".") - shutil.copy(str(waters_dcd), ".") - shutil.copy(str(waters_pkl), ".") - ligand_name = "LET" - receptor_type = "protein" - height = "1000px" - width = "1000px" - - # Call the function with sample data - result = visualization(ligand_name, receptor_type, height, width) - - # Perform assertions based on the expected outcome - assert result is not None - # Add more assertions based on your specific requirements From 4b4d751e3abc928a5cd2dead4184d1cf4c82e9b5 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Sat, 14 Dec 2024 17:51:41 +0100 Subject: [PATCH 4/8] Added setup tests --- .../test_amberscript_creator.py | 576 ++++++++++++++++++ .../openmmdl_setup/test_configfile_creator.py | 565 +++++++++++++++++ .../openmmdl_setup/test_file_operator.py | 100 +++ .../openmmdl_setup/test_setup_options.py | 198 ++++++ 4 files changed, 1439 insertions(+) create mode 100644 openmmdl/tests/openmmdl_setup/test_amberscript_creator.py create mode 100644 openmmdl/tests/openmmdl_setup/test_configfile_creator.py create mode 100644 openmmdl/tests/openmmdl_setup/test_file_operator.py create mode 100644 openmmdl/tests/openmmdl_setup/test_setup_options.py diff --git a/openmmdl/tests/openmmdl_setup/test_amberscript_creator.py b/openmmdl/tests/openmmdl_setup/test_amberscript_creator.py new file mode 100644 index 00000000..2a797311 --- /dev/null +++ b/openmmdl/tests/openmmdl_setup/test_amberscript_creator.py @@ -0,0 +1,576 @@ +import pytest +from openmmdl.openmmdl_setup.amberscript_creator import AmberScriptGenerator # replace 'your_module' with the actual module name + +@pytest.fixture +def mock_data(): + """Fixture to provide mock session and uploadedFiles data.""" + return { + "session": { + "rcpType": "protRcp", + "prot_ff": "ff14SB", + "other_prot_ff_input": "other_prot_ff_input", + "dna_ff": "ff99bsc1", + "other_dna_ff_input": "other_dna_ff_input", + "rna_ff": "ff99", + "other_rna_ff_input": "other_rna_ff_input", + "carbo_ff": "glycam", + "other_carbo_ff_input": "other_carbo_ff_input", + "charge_method": "bcc", + "charge_value": "-1", + "lig_ff": "gaff", + "nmLig": True, + "spLig": True, + "addType": "addWater", + "boxType": "cube", + "dist": "10.0", + "addType": "addMembrane", + "lipid_tp": "other_lipid_tp", + "other_lipid_tp_input": "custom_lipid", + "lipid_ratio": "1", + "lipid_ff": "other_lipid_ff", + "other_lipid_ff_input": "custom_lipid_ff", + "dist2Border": "10.0", + "padDist": "5.0", + "water_ff": "tip3p", + "other_water_ff_input": "custom_water_ff" + }, + "uploadedFiles": { + "protFile": [("file1", "protein.pdb")], + "dnaFile": [("file2", "dna.pdb")], + "rnaFile": [("file3", "rna.pdb")], + "carboFile": [("file4", "carbo.pdb")], + "nmLigFile": [("file5", "ligand.pdb")], + "spLigFile": [("file6", "ligand.pdb")], + "prepcFile": [("file7", "ligand.prepc")], + "frcmodFile": [("file8", "ligand.frcmod")] + } + } + +@pytest.fixture +def base_mock_data(): + """Fixture providing mock data for different receptor types.""" + return { + "protRcp": { + "session": { + "rcpType": "protRcp", + "prot_ff": "ff14SB", + "other_prot_ff_input": "custom_ff" + }, + "uploadedFiles": { + "protFile": [["file1", "protein.pdb"]] + } + }, + "dnaRcp": { + "session": { + "rcpType": "dnaRcp", + "dna_ff": "bsc1", + "other_dna_ff_input": "custom_dna_ff" + }, + "uploadedFiles": { + "dnaFile": [["file2", "dna.pdb"]] + } + }, + "rnaRcp": { + "session": { + "rcpType": "rnaRcp", + "rna_ff": "ff99SB", + "other_rna_ff_input": "custom_rna_ff" + }, + "uploadedFiles": { + "rnaFile": [["file3", "rna.pdb"]] + } + }, + "carboRcp": { + "session": { + "rcpType": "carboRcp", + "carbo_ff": "GLYCAM", + "other_carbo_ff_input": "custom_carbo_ff" + }, + "uploadedFiles": { + "carboFile": [["file4", "carbo.pdb"]] + } + } + } + +def test_add_openmmdl_logo(mock_data): + """Test if add_openmmdl_logo correctly appends the logo to the amber_script list.""" + session, uploadedFiles = mock_data + amber_script_gen = AmberScriptGenerator(session, uploadedFiles) + + # Prepare an empty amber_script list + amber_script = [] + + # Call the method + amber_script_gen.add_openmmdl_logo(amber_script) + + # Define the expected logo output + expected_logo = """ +# ,-----. .-------. .-''-. ,---. .--.,---. ,---.,---. ,---. ______ .---. +# .' .-, '. \\ _(`)_ \\ .'_ _ \\ | \\ | || \\ / || \\ / || _ `''. | ,_| +# / ,-.| \\ _ \\ | (_ o._)| / ( ` ) '| , \\ | || , \\/ , || , \\/ , || _ | ) _ \\,-./ ) +# ; \\ '_ / | :| (_,_) /. (_ o _) || |\\_ \\| || |\\_ /| || |\\_ /| ||( ''_' ) |\\ '_ '`) +# | _`,/ \\ _/ || '-.-' | (_,_)___|| _( )_\\ || _( )_/ | || _( )_/ | || . (_) `. | > (_) ) +# : ( '\\_/ \\ ;| | ' \\ .---.| (_ o _) || (_ o _) | || (_ o _) | ||(_ ._) '( . .-' +# \\ `"/ \\ ) / | | \\ `-' /| (_,_)\ || (_,_) | || (_,_) | || (_.\\.' / `-'`-'|___ +# '. \\_/``".' / ) \\ / | | | || | | || | | || .' | \\ +# '-----' `---' `'-..-' '--' '--''--' '--''--' '--''-----'` `--------` + """ + + # Verify the logo is appended + assert len(amber_script) == 1 + assert amber_script[0] == expected_logo + +def test_add_openmmdl_amber_logo(mock_data): + """Test if add_openmmdl_amber_logo correctly appends the full amber logo to the amber_script list.""" + session, uploadedFiles = mock_data + amber_script_gen = AmberScriptGenerator(session, uploadedFiles) + + # Prepare an empty amber_script list + amber_script = [] + + # Call the method + amber_script_gen.add_openmmdl_amber_logo(amber_script) + + # Define the expected Amber logo output + expected_logo = """ +# _ _ +# / \\ _ __ ___ | |__ ___ _ __ +# / _ \\ | '_ ` _ \\| '_ \\ / _ \\ '__| +# / ___ \\| | | | | | |_) | __/ | +# /_/ \\_\\_| |_| |_|_.__/ \\___|_| + """ + + # Verify that the full expected logo is appended + assert len(amber_script) == 1 + assert amber_script[0] == expected_logo + + +def test_add_prot_receptor_type(base_mock_data): + """Test if add_receptor_type correctly appends commands for protein receptor.""" + data = base_mock_data["protRcp"] + amber_script_gen = AmberScriptGenerator(data["session"], data["uploadedFiles"]) + + amber_script = [] + amber_script_gen.add_receptor_type(amber_script) + + expected_output = [ + "#!/bin/bash\n", + "################################## Receptor ######################################\n", + "rcp_nm=protein # the file name of ligand without suffix `pdb`", + "rcp_ff=ff14SB", + "\n" + ] + assert amber_script == expected_output + +def test_add_dna_receptor_type(base_mock_data): + """Test if add_receptor_type correctly appends commands for DNA receptor.""" + data = base_mock_data["dnaRcp"] + amber_script_gen = AmberScriptGenerator(data["session"], data["uploadedFiles"]) + + amber_script = [] + amber_script_gen.add_receptor_type(amber_script) + + expected_output = [ + "#!/bin/bash\n", + "################################## Receptor ######################################\n", + "rcp_nm=dna # the file name of ligand without suffix `pdb`", + "rcp_ff=bsc1", + "\n" + ] + assert amber_script == expected_output + +def test_add_rna_receptor_type(base_mock_data): + """Test if add_receptor_type correctly appends commands for RNA receptor.""" + data = base_mock_data["rnaRcp"] + amber_script_gen = AmberScriptGenerator(data["session"], data["uploadedFiles"]) + + amber_script = [] + amber_script_gen.add_receptor_type(amber_script) + + expected_output = [ + "#!/bin/bash\n", + "################################## Receptor ######################################\n", + "rcp_nm=rna # the file name of ligand without suffix `pdb`", + "rcp_ff=ff99SB", + "\n" + ] + assert amber_script == expected_output + +def test_add_carbo_receptor_type(base_mock_data): + """Test if add_receptor_type correctly appends commands for carbohydrate receptor.""" + data = base_mock_data["carboRcp"] + amber_script_gen = AmberScriptGenerator(data["session"], data["uploadedFiles"]) + + amber_script = [] + amber_script_gen.add_receptor_type(amber_script) + + expected_output = [ + "#!/bin/bash\n", + "################################## Receptor ######################################\n", + "rcp_nm=carbo # the file name of ligand without suffix `pdb`", + "rcp_ff=GLYCAM", + "\n" + ] + assert amber_script == expected_output + +def test_add_clean_pdb_commands(mock_data): + """Test if add_clean_pdb_commands correctly appends commands to clean the PDB file.""" + session, uploadedFiles = mock_data["session"], mock_data["uploadedFiles"] + amber_script_gen = AmberScriptGenerator(session, uploadedFiles) + + amber_script = [] + amber_script_gen.add_clean_pdb_commands(amber_script) + + expected_output = [ + "## Clean the PDB file by pdb4amber", + "pdb4amber -i ${rcp_nm}.pdb -o ${rcp_nm}_amber.pdb", + """ +## `tleap` requires that all residues and atoms have appropriate types to ensure compatibility with the specified force field. +## To avoid `tleap` failing, we delete non-essential atoms, such as hydrogens, but preserve important atoms like carbon and nitrogen within the caps residues. +## Don' worry about the missing atoms as tleap has the capability to reconstruct them automatically.""", + """awk '! ($2 ~ "(CH3|HH31|HH32|HH33)" || $3 ~ "(CH3|HH31|HH32|HH33)" )' ${rcp_nm}_amber.pdb > ${rcp_nm}_amber_f.pdb""", + "grep -v '^CONECT' ${rcp_nm}_amber_f.pdb > ${rcp_nm}_cnt_rmv.pdb\n" + ] + + assert amber_script == expected_output + + +def test_add_ligand_commands_nmLig(mock_data): + """Test if add_ligand_commands correctly appends commands for a normal ligand (nmLig).""" + session = { + "nmLig": True, + "charge_method": "bcc", + "charge_value": "-1", + "lig_ff": "gaff" + } + uploadedFiles = { + "nmLigFile": [("file5", "ligand.pdb")] + } + amber_script_gen = AmberScriptGenerator(session, uploadedFiles) + + amber_script = [] + amber_script_gen.add_ligand_commands(amber_script) + + expected_output = [ + "################################## Ligand ######################################", + "# Normal Ligand that is compatible with GAFF force field", + "nmLigFile=ligand # the file name of ligand without suffix `.pdb` or `.sdf`", + "obabel ${nmLigFile}.pdb -O ${nmLigFile}.sdf -p # convert to sdf file for openmmdl_analysis, -p: add hydrogens appropriate for pH7.4", + "charge_method=bcc # refers to the charge method that antechamber will adopt", + "charge_value=-1 # Enter the net molecular charge of the ligand as integer (e.g. 1 or -2)", + "lig_ff=gaff # Ligand force field\n", + "## Clean the PDB file by pdb4amber", + "pdb4amber -i ${nmLigFile}.pdb -o ${nmLigFile}_amber.pdb\n", + "## Generate a prepc file and an additional frcmod file by `antechamber`", + "antechamber -fi pdb -fo prepc -i ${nmLigFile}_amber.pdb -o ${nmLigFile}.prepc -c ${charge_method} -at ${lig_ff} -nc ${charge_value} -pf y", + "parmchk2 -f prepc -i ${nmLigFile}.prepc -o ${nmLigFile}.frcmod\n", + "## Rename ligand pdb", + "antechamber -i ${nmLigFile}.prepc -fi prepc -o rename_${nmLigFile}.pdb -fo pdb\n" + ] + + assert amber_script == expected_output + + +def test_add_ligand_commands_spLig(mock_data): + """Test if add_ligand_commands correctly appends commands for a special ligand (spLig).""" + session = { + "spLig": True + } + uploadedFiles = { + "spLigFile": [("file6", "ligand.pdb")], + "prepcFile": [("file7", "ligand.prepc")], + "frcmodFile": [("file8", "ligand.frcmod")] + } + amber_script_gen = AmberScriptGenerator(session, uploadedFiles) + + amber_script = [] + amber_script_gen.add_ligand_commands(amber_script) + + expected_output = [ + "################################## Ligand ######################################", + "# Special Ligand that is incompatible with GAFF force field", + "spLigFile=ligand # the file name of ligand without suffix `.pdb`", + "prepc=ligand # the file name without suffix `prepc`", + "frcmod=ligand # the file name without suffix `frcmod`\n", + "## Clean the PDB file by pdb4amber", + "pdb4amber -i ${spLigFile}.pdb -o ${spLigFile}_amber.pdb\n", + "spLigName=$(awk 'NR==1 {print $4}' ${spLigFile}_amber.pdb)\n" + ] + + assert amber_script == expected_output + +def test_add_combine_components_commands(mock_data): + """Test if add_combine_components_commands correctly appends commands to combine all components.""" + session, uploadedFiles = mock_data["session"], mock_data["uploadedFiles"] + amber_script_gen = AmberScriptGenerator(session, uploadedFiles) + + amber_script = [] + amber_script_gen.add_combine_components_commands(amber_script) + + expected_output = [ + "###################### Combine All Components to Be Modelled ####################", + "cat > tleap.combine.in < tleap.combine.out", + "grep -v '^CONECT' comp.pdb > comp_cnt_rmv.pdb\n" + ] + + assert amber_script == expected_output + + +def test_add_solvation_commands(mock_data): + """Test if add_solvation_commands correctly appends commands for solvation settings.""" + session = { + "addType": "addWater", + "boxType": "cube", + "dist": "10.0" + } + uploadedFiles = {} + amber_script_gen = AmberScriptGenerator(session, uploadedFiles) + + amber_script = [] + amber_script_gen.add_solvation_commands(amber_script) + + expected_output = [ + "boxType=solvatebox # `solvatebox`, a command in tleap, creates a cubic box ", + "dist=10.0 # the minimum distance between any atom originally present in solute and the edge of the periodic box." + ] + + assert amber_script == expected_output + + +@pytest.fixture +def mock_data_membrane(): + """Fixture to provide mock session and uploadedFiles data for membrane commands.""" + return { + "session": { + "addType": "addMembrane", + "lipid_tp": "custom_lipid", + "other_lipid_tp_input": "custom_lipid", + "lipid_ratio": "1", + "lipid_ff": "custom_lipid_ff", + "other_lipid_ff_input": "custom_lipid_ff", + "dist2Border": "10.0", + "padDist": "5.0" + }, + "uploadedFiles": {} + } + +def test_add_membrane_commands(mock_data_membrane): + """Test if add_membrane_commands correctly appends commands for membrane settings.""" + session, uploadedFiles = mock_data_membrane["session"], mock_data_membrane["uploadedFiles"] + amber_script_gen = AmberScriptGenerator(session, uploadedFiles) + + amber_script = [] + amber_script_gen.add_membrane_commands(amber_script) + + # Define the expected output lines + expected_output = [ + "lipid_tp=custom_lipid", + "lipid_ratio=1", + "lipid_ff=custom_lipid_ff", + "dist2Border=10.0 # The minimum distance between the maxmin values for x y and z to the box boundaries. Flag --dist", + "padDist=5.0 # The width of the water layer over the membrane or protein in the z axis. Flag --dist_wat" + ] + + # Print the actual amber_script for debugging + print("Actual amber_script content:") + for line in amber_script: + print(line) + + # Check that the length of the amber_script matches the expected output + assert len(amber_script) == len(expected_output), "The number of lines in amber_script does not match the expected output." + + # Verify each line in amber_script matches the expected output + for i, (actual, expected) in enumerate(zip(amber_script, expected_output)): + assert actual == expected, f"Line {i} does not match: {actual}" + +def test_add_water_ff_commands(): + """Test if add_water_ff_commands correctly appends commands for water force field settings.""" + # Define a mock session and uploadedFiles + session = { + "water_ff": "tip3p", # Change this value to test different water force fields + "addType": "addWater", + # "other_water_ff_input" can be defined if water_ff is "other_water_ff" + } + uploadedFiles = {} # No uploaded files needed for this test + + # Instantiate the generator + amber_script_gen = AmberScriptGenerator(session, uploadedFiles) + + # Initialize the amber_script list + amber_script = [] + amber_script_gen.add_water_ff_commands(amber_script) + + # Define the expected output lines + expected_output = [ + "water_ff=tip3p", + "solvent=TIP3PBOX # set the water box" + ] + + # Print the actual amber_script for debugging + print("Actual amber_script content:") + for line in amber_script: + print(line) + + # Check that the length of the amber_script matches the expected output + assert len(amber_script) == len(expected_output), "The number of lines in amber_script does not match the expected output." + + # Verify each line in amber_script matches the expected output + for i, (actual, expected) in enumerate(zip(amber_script, expected_output)): + assert actual == expected, f"Line {i} does not match: {actual}" + +def test_add_ion_commands(): + """Test if add_ion_commands correctly appends commands for ion settings.""" + # Define mock sessions for different scenarios + sessions = [ + { + "pos_ion": "Na+", + "neg_ion": "Cl-", + "addType": "addWater", + "other_pos_ion_input": "custom_pos_ion", + "other_neg_ion_input": "custom_neg_ion", + "ionConc": "0.15" + }, + { + "pos_ion": "other_pos_ion", + "neg_ion": "other_neg_ion", + "other_pos_ion_input": "custom_pos_ion", + "other_neg_ion_input": "custom_neg_ion", + "addType": "addMembrane", + "ionConc": "0.15" + } + ] + + expected_outputs = [ + [ + "pos_ion=Na+", + "neg_ion=Cl-", + "numIon=0 # `numIon` is the flag for `addions` in tleap. When set to 0, the system will be neutralized", + "\n" + ], + [ + "pos_ion=custom_pos_ion # In development!", + "neg_ion=custom_neg_ion # In development!", + "ionConc=0.15", + "\n" + ] + ] + + for session, expected_output in zip(sessions, expected_outputs): + # Instantiate the generator + amber_script_gen = AmberScriptGenerator(session, {}) + + # Initialize the amber_script list + amber_script = [] + amber_script_gen.add_ion_commands(amber_script) + + # Print the actual amber_script for debugging + print("Actual amber_script content:") + for line in amber_script: + print(line) + + # Check that the length of the amber_script matches the expected output + assert len(amber_script) == len(expected_output), "The number of lines in amber_script does not match the expected output." + + # Verify each line in amber_script matches the expected output + for i, (actual, expected) in enumerate(zip(amber_script, expected_output)): + assert actual == expected, f"Line {i} does not match: {actual}" + +def test_add_membrane_building_commands(): + """Test if add_membrane_building_commands correctly appends commands for membrane settings.""" + + # Define mock sessions for different scenarios + sessions = [ + { + "addType": "addMembrane", + "nmLig": False, + "spLig": False, + "lipid_tp": "custom_lipid", + "lipid_ratio": "1", + "dist2Border": "10.0", + "padDist": "5.0", + "pos_ion": "Na+", + "ionConc": "0.15" + }, + { + "addType": "addMembrane", + "nmLig": True, + "spLig": False, + "lipid_tp": "custom_lipid", + "lipid_ratio": "1", + "dist2Border": "10.0", + "padDist": "5.0", + "pos_ion": "Na+", + "ionConc": "0.15" + }, + { + "addType": "addMembrane", + "nmLig": False, + "spLig": True, + "lipid_tp": "custom_lipid", + "lipid_ratio": "1", + "dist2Border": "10.0", + "padDist": "5.0", + "pos_ion": "Na+", + "ionConc": "0.15" + } + ] + + expected_outputs = [ + [ + "## Build the membrane", + "packmol-memgen --pdb ${rcp_nm}_cnt_rmv.pdb --lipids ${lipid_tp} --ratio ${lipid_ratio} --preoriented --dist ${dist2Border} --dist_wat ${padDist} --salt --salt_c ${pos_ion} --saltcon ${ionConc} --nottrim --overwrite --notprotonate\n", + "## Clean the complex pdb by `pdb4amber` for further `tleap` process", + "pdb4amber -i bilayer_${rcp_nm}_cnt_rmv.pdb -o clean_bilayer_${rcp_nm}.pdb", + "grep -v '^CONECT' clean_bilayer_${rcp_nm}.pdb > clean_bilayer_${rcp_nm}_cnt_rmv.pdb", + "\n" + ], + [ + "## Build the membrane", + "packmol-memgen --pdb comp.pdb --lipids ${lipid_tp} --ratio ${lipid_ratio} --preoriented --dist ${dist2Border} --dist_wat ${padDist} --salt --salt_c ${pos_ion} --saltcon ${ionConc} --nottrim --overwrite --notprotonate\n", + "## Clean the complex pdb by `pdb4amber` for further `tleap` process", + "pdb4amber -i bilayer_comp.pdb -o clean_bilayer_comp.pdb", + "grep -v '^CONECT' clean_bilayer_comp.pdb > clean_bilayer_comp_cnt_rmv.pdb", + "\n" + ], + [ + "## Build the membrane", + "packmol-memgen --pdb comp.pdb --lipids ${lipid_tp} --ratio ${lipid_ratio} --preoriented --dist ${dist2Border} --dist_wat ${padDist} --salt --salt_c ${pos_ion} --saltcon ${ionConc} --nottrim --overwrite --notprotonate\n", + "## Clean the complex pdb by `pdb4amber` for further `tleap` process", + "pdb4amber -i bilayer_comp.pdb -o clean_bilayer_comp.pdb", + "grep -v '^CONECT' clean_bilayer_comp.pdb > clean_bilayer_comp_cnt_rmv.pdb", + "\n" + ] + ] + + for session, expected_output in zip(sessions, expected_outputs): + # Instantiate the generator + amber_script_gen = AmberScriptGenerator(session, {}) + + # Initialize the amber_script list + amber_script = [] + amber_script_gen.add_membrane_building_commands(amber_script) + + # Print the actual amber_script for debugging + print("Actual amber_script content:") + for line in amber_script: + print(line) + + # Check that the length of the amber_script matches the expected output + assert len(amber_script) == len(expected_output), "The number of lines in amber_script does not match the expected output." + + # Verify each line in amber_script matches the expected output + for i, (actual, expected) in enumerate(zip(amber_script, expected_output)): + assert actual == expected, f"Line {i} does not match: {actual}" diff --git a/openmmdl/tests/openmmdl_setup/test_configfile_creator.py b/openmmdl/tests/openmmdl_setup/test_configfile_creator.py new file mode 100644 index 00000000..4c348b1c --- /dev/null +++ b/openmmdl/tests/openmmdl_setup/test_configfile_creator.py @@ -0,0 +1,565 @@ +import pytest +from openmmdl.openmmdl_setup.configfile_creator import ConfigCreator, ConfigWriter # Adjust import as necessary + +def test_add_openmmdl_ascii_art_logo(): + """Test if add_openmmdl_ascii_art_logo correctly adds the ASCII art logo.""" + + # Create a ConfigCreator instance + session = {} + uploadedFiles = {} + config_creator = ConfigCreator(session, uploadedFiles) + + # Initialize the script list + script = [] + config_creator.add_openmmdl_ascii_art_logo(script) + + # Expected ASCII art + expected_logo = """ + ,-----. .-------. .-''-. ,---. .--.,---. ,---.,---. ,---. ______ .---. + .' .-, '. \ _(`)_ \ .'_ _ \ | \ | || \ / || \ / || _ `''. | ,_| + / ,-.| \ _ \ | (_ o._)| / ( ` ) '| , \ | || , \/ , || , \/ , || _ | ) _ \,-./ ) + ; \ '_ / | :| (_,_) /. (_ o _) || |\_ \| || |\_ /| || |\_ /| ||( ''_' ) |\ '_ '`) + | _`,/ \ _/ || '-.-' | (_,_)___|| _( )_\ || _( )_/ | || _( )_/ | || . (_) `. | > (_) ) + : ( '\_/ \ ;| | ' \ .---.| (_ o _) || (_ o _) | || (_ o _) | ||(_ ._) '( . .-' + \ `"/ \ ) / | | \ `-' /| (_,_)\ || (_,_) | || (_,_) | || (_.\.' / `-'`-'|___ + '. \_/``".' / ) \ / | | | || | | || | | || .' | \ + '-----' `---' `'-..-' '--' '--''--' '--''--' '--''-----'` `--------` + """ + + # Compare script content with expected output + assert script[0].strip() == expected_logo.strip() + +def test_add_ascii_config_art(): + """Test if add_ascii_config_art correctly adds the configuration file ASCII art header.""" + + # Create a ConfigCreator instance + session = {} + uploadedFiles = {} + config_creator = ConfigCreator(session, uploadedFiles) + + # Initialize the script list + script = [] + config_creator.add_ascii_config_art(script) + + # Expected ASCII art + expected_art = """ + __ __ ___ __ ___ ___ + / ` / \ |\ | |__ | / _` |__ | | |__ + \__, \__/ | \| | | \__> | | |___ |___ + """ + + # Compare script content with expected output + assert script[0].strip() == expected_art.strip() + +@pytest.fixture +def config_creator_pdb(): + session = { + "fileType": "pdb", + "pdbType": "pdb", + "sdfFile": "ligand.sdf", + "ligandMinimization": "minimization_method", + "smallMoleculeForceField": "force_field", + "ligandSanitization": "sanitization_method", + "waterModel": "spce" + } + uploadedFiles = { + "file": [("protein.pdb", "path/to/protein.pdb")] + } + return ConfigCreator(session, uploadedFiles) + +def test_add_pdb_input_files_configuration(config_creator_pdb): + """Test if add_pdb_input_files_configuration correctly configures PDB input files.""" + + # Initialize the script list + script = [] + config_creator_pdb.add_pdb_input_files_configuration(script) + + # Expected script content + expected_lines = [ + "\n# Input Files", + "############# Ligand and Protein Data ###################", + "input_file_type = pdb", + "protein = path/to/protein.pdb", + "ligand = ligand.sdf", + "ligand_name = UNK", + "minimization = minimization_method", + "smallMoleculeForceField = force_field", + "sanitization = sanitization_method" + ] + + # Compare script content with expected output + assert script == expected_lines + +@pytest.fixture +def config_creator_amber(): + session = { + "fileType": "amber", + "has_files": "yes", + "nmLig": True, + "spLig": False, + "nmLigName": "nmLig", + "spLigName": None, + "water_ff": "tip3p" + } + uploadedFiles = { + "prmtopFile": [("prmtop.prmtop", "path/to/prmtop.prmtop")], + "inpcrdFile": [("inpcrd.inpcrd", "path/to/inpcrd.inpcrd")], + "nmLigFile": [("nmLig.sdf", "path/to/nmLig.sdf")] + } + return ConfigCreator(session, uploadedFiles) + +def test_add_amber_file_configuration(config_creator_amber): + """Test if add_amber_file_configuration correctly configures Amber files.""" + + # Initialize the script list + script = [] + config_creator_amber.add_amber_file_configuration(script) + + # Expected script content + expected_lines = [ + """####### Add the Amber Files in the Folder with this Script ####### \n""", + "input_file_type = amber", + "prmtop_file = path/to/prmtop.prmtop", + "inpcrd_file = path/to/inpcrd.inpcrd", + "prmtop = AmberPrmtopFile(prmtop_file)", + "inpcrd = AmberInpcrdFile(inpcrd_file)" + ] + + # Compare script content with expected output + assert script == expected_lines + +@pytest.fixture +def config_creator_ff(): + session = { + "fileType": "pdb", + "forcefield": "ff99SB", + "waterModel": "spce" + } + uploadedFiles = {} + return ConfigCreator(session, uploadedFiles) + +def test_add_forcefield_and_water_model_configuration(config_creator_ff): + """Test if add_forcefield_and_water_model_configuration correctly configures forcefield and water model.""" + + # Initialize the script list + script = [] + config_creator_ff.add_forcefield_and_water_model_configuration(script) + + # Expected script content + expected_lines = [ + "\n############# Forcefield, Water and Membrane Model Selection ###################\n", + "forcefield = ff99SB", + "water = spce" + ] + + # Compare script content with expected output + assert script == expected_lines + +@pytest.fixture +def config_creator_solvent(): + session = { + "fileType": "pdb", + "solvent": True, + "add_membrane": False, + "water_padding": True, + "water_padding_distance": "10.0", + "water_boxShape": "cubic", + "box_x": None, + "box_y": None, + "box_z": None, + "water_ionicstrength": "0.15", + "water_positive": "Na+", + "water_negative": "Cl-" + } + uploadedFiles = {} + return ConfigCreator(session, uploadedFiles) + +def test_add_solvent_configuration(config_creator_solvent): + """Test if add_solvent_configuration correctly configures solvent or membrane settings.""" + + # Initialize the script list + script = [] + config_creator_solvent.add_solvent_configuration(script) + + # Expected script content + expected_lines = [ + "\n############# Water Box Settings ###################\n", + "add_membrane = False", + "Water_Box = Buffer", + "water_padding_distance = 10.0", + "water_boxShape = cubic", + "water_ionicstrength = 0.15", + "water_positive_ion = Na+", + "water_negative_ion = Cl-" + ] + + # Compare script content with expected output + assert script == expected_lines + +@pytest.fixture +def config_creator_system(): + session = { + "nonbondedMethod": "PME", + "cutoff": "1.0", + "ewaldTol": "0.0005", + "hmr": True, + "hmrMass": "1.008", + "constraints": "hbonds", + "constraintTol": "0.0001" + } + uploadedFiles = {} + return ConfigCreator(session, uploadedFiles) + +def test_add_system_configuration(config_creator_system): + """Test if add_system_configuration correctly configures system settings.""" + + # Initialize the script list + script = [] + config_creator_system.add_system_configuration(script) + print(script) + + # Expected script content + expected_lines = [ + "\n# System Configuration\n", + "nonbondedMethod = app.PME", + "nonbondedCutoff = 1.0*unit.nanometers", + "ewaldErrorTolerance = 0.0005", + "hmrOptions = ', hydrogenMass=hydrogenMass'", + "constraints = app.HBonds", + "rigidWater = True", + "constraintTolerance = 0.0001", + "hydrogenMass = 1.008*unit.amu" + ] + + # Compare script content with expected output + assert script == expected_lines + +@pytest.fixture +def config_creator_integrator(): + session = { + "dt": "0.002", + "temperature": "300", + "friction": "1.0", + "ensemble": "npt", + "pressure": "1.0", + "barostatInterval": "100" + } + uploadedFiles = {} + return ConfigCreator(session, uploadedFiles) + +def test_add_integration_configuration(config_creator_integrator): + """Test if add_integration_configuration correctly configures integration settings.""" + + # Initialize the script list + script = [] + config_creator_integrator.add_integration_configuration(script) + + # Expected script content + expected_lines = [ + "\n# Integration Configuration\n", + "step_time = 0.002", + "dt = 0.002*unit.picoseconds", + "temperature = 300*unit.kelvin", + "friction = 1.0/unit.picosecond", + "pressure = 1.0*unit.atmospheres", + "barostatInterval = 100" + ] + + # Compare script content with expected output + assert script == expected_lines + +@pytest.fixture +def config_creator_sim_time(): + session = { + "sim_length": "1000", # Simulation length in ns + "dt": "0.002", # Time step in ps + "dcdFrames": "5000", # Number of frames for DCD output + "pdbInterval_ns": "10" # Interval for PDB output in ns + } + uploadedFiles = {} + return ConfigCreator(session, uploadedFiles) + +def test_add_simulation_time_and_steps_configuration(config_creator_sim_time): + """Test if add_simulation_time_and_steps_configuration correctly configures simulation time and steps.""" + + # Initialize the script list + script = [] + config_creator_sim_time.add_simulation_time_and_steps_configuration(script) + + # Calculate expected values + steps = int(float("1000") / float("0.002") * 1000) # Total steps + dcdinterval = int(steps / int("5000")) # DCD interval + pdbInterval = int( + steps * (float("10") / float("1000")) # PDB interval + ) + + # Expected script content + expected_lines = [ + "\n# Simulation Time and Steps Configuration\n", + "sim_length = 1000", + "steps = %s" % steps, + "\n# Frames and Interval Configuration\n", + "dcdFrames = 5000", + "dcdInterval = %s" % dcdinterval, + "pdbInterval_ns = 10", + "pdbInterval = %s" % pdbInterval + ] + + # Compare script content with expected output + assert script == expected_lines + +@pytest.fixture +def config_creator_equil(): + session = { + "equilibration": "equilibration" # Change to "minimization" for testing that case + } + uploadedFiles = {} + return ConfigCreator(session, uploadedFiles) + +def test_add_equilibration_configuration(config_creator_equil): + """Test if add_equilibration_configuration correctly configures equilibration or minimization settings.""" + + # Initialize the script list + script = [] + config_creator_equil.add_equilibration_configuration(script) + + # Expected script content + expected_lines = [ + "\n# Equilibration & Minimization Configuration\n", + "preparation_type = equilibration" + ] + + # Compare script content with expected output + assert script == expected_lines + + # Change the session parameter to test "minimization" + config_creator_equil.session["equilibration"] = "minimization" + script = [] + config_creator_equil.add_equilibration_configuration(script) + + # Expected script content for minimization + expected_lines = [ + "\n# Equilibration & Minimization Configuration\n", + "preparation_type = minimization" + ] + + # Compare script content with expected output + assert script == expected_lines + +@pytest.fixture +def config_creator_simulation(): + session = { + "platform": "CUDA", + "precision": "mixed", + "writeDCD": True, + "dcdFilename": "simulation", + "writeData": True, + "dataFields": ["energy", "temperature"], + "dataInterval": "1000", + "restart_checkpoint": "yes", + "restart_step": "step1", + "dataFilename": "data_reporter" + } + uploadedFiles = {} + return ConfigCreator(session, uploadedFiles) + +def test_add_simulation_configuration(config_creator_simulation): + """Test if add_simulation_configuration correctly configures simulation platform, precision, and file outputs.""" + + # Initialize the script list + script = [] + config_creator_simulation.add_simulation_configuration(script) + + # Expected script content + expected_lines = [ + "\n# Simulation Configuration\n", + "platform = CUDA", + "platformProperties = {'Precision': 'mixed'}", + "dcd_name = step1_simulation", + "dataReporter = StateDataReporter('step1_data_reporter', 1000, totalSteps=steps,", + " energy=True, temperature=True, separator='\\t')" + ] + + # Compare script content with expected output + assert script == expected_lines + + # Test case with no data reporting + config_creator_simulation.session["writeData"] = False + script = [] + config_creator_simulation.add_simulation_configuration(script) + + # Expected script content without dataReporter + expected_lines = [ + "\n# Simulation Configuration\n", + "platform = CUDA", + "platformProperties = {'Precision': 'mixed'}", + "dcd_name = step1_simulation" + ] + + assert script == expected_lines + + +@pytest.fixture +def config_creator_checkpoint(): + session = { + "writeCheckpoint": True, + "checkpointInterval_ns": "500", # Checkpoint interval in ns + "dt": "0.002", # Time step in ps + "checkpointFilename": "checkpoint", + "restart_checkpoint": "yes", + "restart_step": "step1" + } + uploadedFiles = {} + return ConfigCreator(session, uploadedFiles) + +def test_add_checkpoint_configuration(config_creator_checkpoint): + """Test if add_checkpoint_configuration correctly configures checkpoint and restart settings.""" + + # Initialize the script list + script = [] + config_creator_checkpoint.add_checkpoint_configuration(script) + + # Calculate expected checkpoint interval + checkpointInterval = int( + 1000 * float("500") / float("0.002") + ) + + # Expected script content + expected_lines = [ + "\n# Checkpoint and Restart Configuration\n", + "checkpointInterval = %s" % checkpointInterval, + "checkpoint_name = checkpoint", + "restart_step = step1" + ] + + # Compare script content with expected output + assert script == expected_lines + + # Test case with no checkpoint + config_creator_checkpoint.session["writeCheckpoint"] = False + script = [] + config_creator_checkpoint.add_checkpoint_configuration(script) + + # Expected script content without checkpoint configuration + expected_lines = [] + + assert script == expected_lines + +@pytest.fixture +def config_creator_xml(): + session = { + "writeSimulationXml": True, + "systemXmlFilename": "system.xml", + "integratorXmlFilename": "integrator.xml" + } + uploadedFiles = {} + return ConfigCreator(session, uploadedFiles) + +def test_add_xml_serialization_configuration(config_creator_xml): + """Test if add_xml_serialization_configuration correctly configures XML serialization settings.""" + + # Initialize the script list + script = [] + config_creator_xml.add_xml_serialization_configuration(script) + + # Expected script content + expected_lines = [ + "\n# Write XML Serialized Objects\n", + "xmlsystem_filename = system.xml", + "xmlintegrator_filename = integrator.xml" + ] + + # Compare script content with expected output + assert script == expected_lines + + # Test case with no XML serialization + config_creator_xml.session["writeSimulationXml"] = False + script = [] + config_creator_xml.add_xml_serialization_configuration(script) + + # Expected script content without XML serialization configuration + expected_lines = [] + + assert script == expected_lines + +@pytest.fixture +def config_creator_postprocessing(): + session = { + "md_postprocessing": "enabled", + "mdtraj_output": "mdtraj_output.pdb", + "mdtraj_removal": "mdtraj_removal.pdb", + "mda_output": "mda_output.h5", + "mda_selection": "resname LIG" + } + uploadedFiles = {} + return ConfigCreator(session, uploadedFiles) + +def test_add_postprocessing_configuration(config_creator_postprocessing): + """Test if add_postprocessing_configuration correctly configures MD post-processing settings.""" + + # Initialize the script list + script = [] + config_creator_postprocessing.add_postprocessing_configuration(script) + + # Expected script content + expected_lines = [ + "\n# Post-Processing Configuration\n", + "postprocessing = enabled", + "old_output = mdtraj_output.pdb", + "old_removal = mdtraj_removal.pdb", + "mda_output = mda_output.h5", + "mda_selection = resname LIG" + ] + + # Compare script content with expected output + assert script == expected_lines + + +@pytest.fixture +def config_creator_analysis(): + session = { + "openmmdl_analysis": "Yes", + "analysis_selection": "all", + "binding_mode": "flexible", + "min_transition": "5.0", + "rmsd_diff": "0.1", + "pml_generation": "enabled" + } + uploadedFiles = {} + return ConfigCreator(session, uploadedFiles) + +def test_add_openmmdl_analysis_configuration(config_creator_analysis): + """Test if add_openmmdl_analysis_configuration correctly configures OpenMMDL Analysis settings.""" + + # Initialize the script list + script = [] + config_creator_analysis.add_openmmdl_analysis_configuration(script) + + # Expected script content + expected_lines = [ + "\n# OpenMMDL Analysis Configuration\n", + "openmmdl_analysis = Yes", + "analysis_selection = all", + "binding_mode = flexible", + "min_transition = 5.0", + "rmsd_diff = 0.1", + "pml_generation = enabled" + ] + + # Compare script content with expected output + assert script == expected_lines + + # Test case with OpenMMDL Analysis disabled + config_creator_analysis.session["openmmdl_analysis"] = "No" + script = [] + config_creator_analysis.add_openmmdl_analysis_configuration(script) + + # Expected script content without OpenMMDL Analysis configuration + expected_lines = [ + "\n# OpenMMDL Analysis Configuration\n", + "openmmdl_analysis = No" + ] + + assert script == expected_lines + + diff --git a/openmmdl/tests/openmmdl_setup/test_file_operator.py b/openmmdl/tests/openmmdl_setup/test_file_operator.py new file mode 100644 index 00000000..3a8b8425 --- /dev/null +++ b/openmmdl/tests/openmmdl_setup/test_file_operator.py @@ -0,0 +1,100 @@ +import pytest +from werkzeug.datastructures import FileStorage +from io import BytesIO +from openmmdl.openmmdl_setup.file_operator import LigandExtractor, FileUploader # Replace with the actual module name +import tempfile + +# Test cases for LigandExtractor +class TestLigandExtractor: + def test_extract_ligand_name_pdb(self): + lig_file_name = "ligand.pdb" + expected_output = "ligand" + assert LigandExtractor.extract_ligand_name(lig_file_name) == expected_output + + def test_extract_ligand_name_sdf(self): + lig_file_name = "ligand.sdf" + expected_output = "UNL" + assert LigandExtractor.extract_ligand_name(lig_file_name) == expected_output + + def test_extract_ligand_name_invalid_extension(self): + lig_file_name = "ligand.txt" + with pytest.raises(ValueError, match="Unsupported file format. Only .sdf and .pdb are supported."): + LigandExtractor.extract_ligand_name(lig_file_name) + + def test_extract_ligand_name_non_string(self): + lig_file_name = 12345 + with pytest.raises(TypeError, match="lig_file_name must be a string"): + LigandExtractor.extract_ligand_name(lig_file_name) + + + +# Custom class to mock the behavior of request.files in Flask +class MockFileMultiDict: + """ + This class mimics the behavior of Flask's request.files MultiDict for testing purposes. + """ + def __init__(self, file_dict): + self.file_dict = file_dict + + def getlist(self, key): + return self.file_dict.get(key, []) + + def __iter__(self): + return iter(self.file_dict) + + def items(self): + return self.file_dict.items() + +# Test cases for FileUploader +class TestFileUploader: + @pytest.fixture + def fake_request(self): + """ + Simulates a fake request with files using FileStorage and provides the getlist() method. + """ + file1 = FileStorage( + stream=BytesIO(b"dummy content 1"), filename="file1.txt", content_type="text/plain" + ) + file2 = FileStorage( + stream=BytesIO(b"dummy content 2"), filename="file2.txt", content_type="text/plain" + ) + + class FakeRequest: + def __init__(self): + # Mimic request.files using the MockFileMultiDict + self.files = MockFileMultiDict({ + "file_field_1": [file1], + "file_field_2": [file2], + }) + + return FakeRequest() + + def test_save_uploaded_files_success(self, fake_request): + uploadedFiles = {} + FileUploader.save_uploaded_files(uploadedFiles, fake_request) + + # Verify that files were saved correctly + assert "file_field_1" in uploadedFiles + assert "file_field_2" in uploadedFiles + assert len(uploadedFiles["file_field_1"]) == 1 + assert uploadedFiles["file_field_1"][0][1] == "file1.txt" + assert len(uploadedFiles["file_field_2"]) == 1 + assert uploadedFiles["file_field_2"][0][1] == "file2.txt" + + def test_save_uploaded_files_invalid_dict(self, fake_request): + uploadedFiles = [] + with pytest.raises(TypeError, match="uploadedFiles must be a dictionary"): + FileUploader.save_uploaded_files(uploadedFiles, fake_request) + + def test_save_uploaded_files_invalid_request(self): + uploadedFiles = {} + invalid_request = object() # Request without 'files' attribute + with pytest.raises(TypeError, match="request object must have a 'files' attribute"): + FileUploader.save_uploaded_files(uploadedFiles, invalid_request) + + def test_save_uploaded_files_non_filestorage(self, fake_request): + # Modify fake_request to include an invalid file type (non-FileStorage instance) + fake_request.files.file_dict["file_field_1"] = [object()] # Invalid file type + uploadedFiles = {} + with pytest.raises(TypeError, match="file must be a FileStorage instance"): + FileUploader.save_uploaded_files(uploadedFiles, fake_request) diff --git a/openmmdl/tests/openmmdl_setup/test_setup_options.py b/openmmdl/tests/openmmdl_setup/test_setup_options.py new file mode 100644 index 00000000..7202921c --- /dev/null +++ b/openmmdl/tests/openmmdl_setup/test_setup_options.py @@ -0,0 +1,198 @@ +import pytest +from typing import List, Dict +from flask import session +from werkzeug.datastructures import ImmutableMultiDict +from openmmdl.openmmdl_setup.setup_options import SetupOptionsConfigurator, SessionDict, RequestSessionManager + +@pytest.fixture +def default_session() -> SessionDict: + """ + Provides a default mock session with minimal required data for testing. + """ + return { + "fileType": "pdb", + "waterModel": "explicit", + } + +@pytest.fixture +def configurator(default_session: SessionDict) -> SetupOptionsConfigurator: + """ + Provides a SetupOptionsConfigurator instance initialized with the default session. + """ + return SetupOptionsConfigurator(session=default_session) + +def test_configure_default_options(configurator: SetupOptionsConfigurator, default_session: SessionDict): + """ + Test the `configure_default_options` method to ensure default values are properly set. + """ + configurator.configure_default_options() + + assert default_session["restart_checkpoint"] is False + assert default_session["mdtraj_output"] == "mdtraj_pdb_dcd" + assert default_session["mda_output"] == "mda_pdb_dcd" + assert default_session["analysis_selection"] == "analysis_all" + assert default_session["binding_mode"] == "40" + assert default_session["ensemble"] == "npt" # since waterModel is 'explicit' + assert default_session["platform"] == "CUDA" + assert default_session["precision"] == "mixed" + assert default_session["cutoff"] == "1.0" # as waterModel is not 'implicit' + assert default_session["hmr"] is True + assert default_session["writeDCD"] is True + assert default_session["dataFields"] == ["step", "speed", "progress", "potentialEnergy", "temperature"] + assert default_session["writeCheckpoint"] is True + +def test_configure_default_options_with_implicit_water(configurator: SetupOptionsConfigurator, default_session: SessionDict): + """ + Test `configure_default_options` when the water model is set to implicit. + """ + default_session["waterModel"] = "implicit" + configurator.configure_default_options() + + assert default_session["ensemble"] == "nvt" # should switch to nvt due to implicit water + assert default_session["cutoff"] == "2.0" # cutoff should change + assert default_session["nonbondedMethod"] == "CutoffNonPeriodic" + +def test_configure_default_amber_options(configurator: SetupOptionsConfigurator, default_session: SessionDict): + """ + Test the `configureDefaultAmberOptions` method to ensure Amber options are set correctly. + """ + configurator.configureDefaultAmberOptions() + + assert default_session["lig_ff"] == "gaff2" + assert default_session["charge_value"] == "0" + assert default_session["charge_method"] == "bcc" + assert default_session["prot_ff"] == "ff19SB" + assert default_session["dna_ff"] == "OL15" + assert default_session["rna_ff"] == "OL3" + assert default_session["carbo_ff"] == "GLYCAM_06j" + assert default_session["addType"] == "addWater" + assert default_session["boxType"] == "cube" + assert default_session["lipid_tp"] == "POPC" + assert default_session["dist2Border"] == "15" + assert default_session["water_ff"] == "opc" + assert default_session["pos_ion"] == "Na+" + assert default_session["neg_ion"] == "Cl-" + assert default_session["ionConc"] == "0.15" + + +@pytest.fixture +def app_context(): + """ + Provides a Flask test request context for session management. + """ + from flask import Flask + + app = Flask(__name__) + app.secret_key = 'test_secret_key' # Required to use sessions + with app.test_request_context(): + yield + +@pytest.fixture +def default_form() -> ImmutableMultiDict: + """ + Provides default mock form data for testing. + """ + return ImmutableMultiDict({ + "rcpType": "protein", + "prot_ff": "ff14SB", + "other_prot_ff_input": "custom_prot_ff", + "dna_ff": "OL15", + "other_dna_ff_input": "custom_dna_ff", + "rna_ff": "OL3", + "other_rna_ff_input": "custom_rna_ff", + "carbo_ff": "GLYCAM_06j", + "addType": "addWater", + "boxType": "geometry", + "geomPadding": "10", + "ionicstrength": "0.15", + "positiveion": "Na+", + "negativeion": "Cl-", + "forcefield": "amber14", + "ml_forcefield": "openff", + "waterModel": "tip3p", + "smallMoleculeForceField": "gaff", + "ligandMinimization": "Yes", + "ligandSanitization": "Yes", + "writeDCD": "True", + "writeData": "True", + "writeCheckpoint": "True", + "dataFields": "step,speed,temperature", + "hmr": "True", + }) + +@pytest.fixture +def request_manager(default_form) -> RequestSessionManager: + """ + Provides a RequestSessionManager instance initialized with the default form data. + """ + return RequestSessionManager(form=default_form) + +def test_set_amber_options_rcp_session(request_manager: RequestSessionManager, app_context): + """ + Test the `setAmberOptions_rcp_session` method. + """ + request_manager.setAmberOptions_rcp_session() + + assert session["rcpType"] == "protein" + assert session["prot_ff"] == "ff14SB" + assert session["other_prot_ff_input"] == "custom_prot_ff" + assert session["dna_ff"] == "OL15" + assert session["other_dna_ff_input"] == "custom_dna_ff" + assert session["rna_ff"] == "OL3" + assert session["other_rna_ff_input"] == "custom_rna_ff" + assert session["carbo_ff"] == "GLYCAM_06j" + +def test_set_amber_options_water_membrane_session(request_manager: RequestSessionManager, app_context): + """ + Test the `setAmberOptions_water_membrane_session` method. + """ + request_manager.setAmberOptions_water_membrane_session() + + assert session["addType"] == "addWater" + assert session["boxType"] == "geometry" + assert session["dist"] == "" + assert session["lipid_tp"] == "" + assert session["other_lipid_tp_input"] == "" + assert session["lipid_ratio"] == "" + assert session["lipid_ff"] == "" + assert session["dist2Border"] == "" + assert session["padDist"] == "" + +def test_simulationoptions_add_general_settings(request_manager: RequestSessionManager, app_context): + """ + Test the `simulationoptions_add_general_settings` method to ensure general simulation settings are correctly added to the session. + """ + request_manager.simulationoptions_add_general_settings() + + assert session["forcefield"] == "amber14" + assert session["ml_forcefield"] == "openff" + assert session["waterModel"] == "tip3p" + assert session["smallMoleculeForceField"] == "gaff" + assert session["ligandMinimization"] == "Yes" + assert session["ligandSanitization"] == "Yes" + assert session["writeDCD"] is True + assert session["writeData"] is True + assert session["writeCheckpoint"] is True + assert session["dataFields"] == ["step,speed,temperature"] + assert session["hmr"] is True + +def test_configure_files_add_forcefield_ligand_settings(request_manager: RequestSessionManager, app_context): + """ + Test the `configureFiles_add_forcefield_ligand_settings` method to ensure forcefield and ligand settings are added to the session. + """ + request_manager.configureFiles_add_forcefield_ligand_settings() + + assert session["forcefield"] == "amber14" + assert session["ml_forcefield"] == "openff" + assert session["waterModel"] == "tip3p" + assert session["smallMoleculeForceField"] == "gaff" + assert session["ligandMinimization"] == "Yes" + assert session["ligandSanitization"] == "Yes" + +def test_parse_float(request_manager: RequestSessionManager): + """ + Test the `_parse_float` helper function. + """ + assert request_manager._parse_float("10.5") == 10.5 + assert request_manager._parse_float(None) is None + assert request_manager._parse_float("invalid") is None From 16f79881cd5a0928a1676fa4416dcd062fd45c51 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Sat, 14 Dec 2024 17:57:20 +0100 Subject: [PATCH 5/8] Update CI-CD.yml --- .github/workflows/CI-CD.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/CI-CD.yml b/.github/workflows/CI-CD.yml index 96fe8081..c2a104b9 100644 --- a/.github/workflows/CI-CD.yml +++ b/.github/workflows/CI-CD.yml @@ -1,6 +1,11 @@ name: CI-CD - -on: [push] +on: + push: + branches: + - '**' + pull_request: + branches: + - '**' jobs: build-linux: From 97a48879ea1006bb2c051bcaf4ae1d3727d37446 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Sat, 14 Dec 2024 18:03:49 +0100 Subject: [PATCH 6/8] Update CI-CD.yml --- .github/workflows/CI-CD.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI-CD.yml b/.github/workflows/CI-CD.yml index c2a104b9..9beca257 100644 --- a/.github/workflows/CI-CD.yml +++ b/.github/workflows/CI-CD.yml @@ -43,7 +43,7 @@ jobs: - name: Run black formatter run: | - black -l 99 . + black -l 99 --exclude '\.ipynb$' . - name: Install pytest and run tests run: | From 161a5c0fc1baa5b2c2f22aa4cbe78b203dad7f7a Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Sat, 14 Dec 2024 18:14:14 +0100 Subject: [PATCH 7/8] Update CI-CD.yml --- .github/workflows/CI-CD.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI-CD.yml b/.github/workflows/CI-CD.yml index 9beca257..ae34abd9 100644 --- a/.github/workflows/CI-CD.yml +++ b/.github/workflows/CI-CD.yml @@ -48,4 +48,4 @@ jobs: - name: Install pytest and run tests run: | pip install pytest pytest-cov - pytest -vv --cov=openmmdl --cov-report=xml --cov-report=html --color=yes openmmdl/tests/ + pytest -vv --cov=openmmdl --cov-branch --cov-report=xml --cov-report=html --color=yes openmmdl/tests/ From 37dd18c2c66a97e5ef131738f0cdad87a0f210f3 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Sat, 14 Dec 2024 18:20:42 +0100 Subject: [PATCH 8/8] Update CI-CD.yml --- .github/workflows/CI-CD.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI-CD.yml b/.github/workflows/CI-CD.yml index ae34abd9..b0024ddc 100644 --- a/.github/workflows/CI-CD.yml +++ b/.github/workflows/CI-CD.yml @@ -48,4 +48,4 @@ jobs: - name: Install pytest and run tests run: | pip install pytest pytest-cov - pytest -vv --cov=openmmdl --cov-branch --cov-report=xml --cov-report=html --color=yes openmmdl/tests/ + coverage run -m pytest -vv --cov=openmmdl --cov-branch --cov-report=xml --cov-report=html --color=yes openmmdl/tests/