Skip to content

Commit 3e26557

Browse files
kierandidia-r-j
andauthored
[feat] added get_model and get_models fct to mmcif (#145)
* [feat] added get_model and get_models fct to mmcif * [docs] add CHANGELOG.md * [feat] added tests for multiple models * [feat] added tests for multiple models * [feat] rename df to biopandas_structure * bump changelog + changelog workflow * Delete docs/sources/CHANGELOG.md --------- Co-authored-by: Arian Jamasb <[email protected]>
1 parent 67aa2f2 commit 3e26557

File tree

7 files changed

+143
-50
lines changed

7 files changed

+143
-50
lines changed

.github/workflows/changelog-enforcer.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,5 @@ jobs:
1313
- uses: actions/checkout@v3
1414
- uses: dangoslen/changelog-enforcer@v3
1515
with:
16-
skipLabels: 'skip-changelog'
16+
skipLabels: 'skip-changelog'
17+
changeLogPath: 'docs/CHANGELOG.md'

biopandas/mmcif/pandas_mmcif.py

+63-2
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
# License: BSD 3 clause
77
# Project Website: http://rasbt.github.io/biopandas/
88
# Code Repository: https://github.com/rasbt/biopandas
9-
9+
from __future__ import annotations
1010
import gzip
1111
import sys
12+
import copy
1213
import warnings
1314
from typing import Dict, List, Optional
1415
from urllib.error import HTTPError, URLError
@@ -69,6 +70,66 @@ def read_mmcif(self, path):
6970
# self.header, self.code = self._parse_header_code() #TODO: implement
7071
self.code = self.data["entry"]["id"][0].lower()
7172
return self
73+
74+
def label_models(self):
75+
"""Adds a column ("model_id") to the underlying
76+
DataFrames containing the model number."""
77+
if "ATOM" in self.df.keys():
78+
self.df["ATOM"]["model_id"] = self.df["ATOM"]["pdbx_PDB_model_num"]
79+
if "HETATM" in self.df.keys():
80+
self.df["HETATM"]["model_id"] = self.df["HETATM"]["pdbx_PDB_model_num"]
81+
return self
82+
83+
def get_model(self, model_index: int) -> PandasMmcif:
84+
"""Returns a new PandasMmcif object with the dataframes subset to the
85+
given model index.
86+
87+
Parameters
88+
----------
89+
model_index : int
90+
An integer representing the model index to subset to.
91+
92+
Returns
93+
---------
94+
pandas_pdb.PandasPdb : A new PandasMMcif object containing the
95+
structure subsetted to the given model.
96+
"""
97+
98+
biopandas_structure = copy.deepcopy(self)
99+
if "ATOM" in biopandas_structure.df.keys():
100+
biopandas_structure.df["ATOM"] = biopandas_structure.df["ATOM"].loc[biopandas_structure.df["ATOM"]["pdbx_PDB_model_num"] == model_index]
101+
if "HETATM" in biopandas_structure.df.keys():
102+
biopandas_structure.df["HETATM"] = biopandas_structure.df["HETATM"].loc[
103+
biopandas_structure.df["HETATM"]["pdbx_PDB_model_num"] == model_index
104+
]
105+
return biopandas_structure
106+
107+
def get_models(self, model_indices: List[int]) -> PandasMmcif:
108+
"""Returns a new PandasMmcif object with the dataframes subset to the
109+
given model index.
110+
111+
Parameters
112+
----------
113+
model_indices : List[int]
114+
A list representing the model indexes to subset to.
115+
116+
Returns
117+
---------
118+
pandas_pdb.PandasMmtf : A new PandasMmcif object
119+
containing the structure subsetted to the given model.
120+
"""
121+
122+
biopandas_structure = copy.deepcopy(self)
123+
124+
if "ATOM" in biopandas_structure.df.keys():
125+
biopandas_structure.df["ATOM"] = biopandas_structure.df["ATOM"].loc[
126+
[x in model_indices for x in biopandas_structure.df["ATOM"]["pdbx_PDB_model_num"].tolist()]
127+
]
128+
if "HETATM" in biopandas_structure.df.keys():
129+
biopandas_structure.df["HETATM"] = biopandas_structure.df["HETATM"].loc[
130+
[x in model_indices for x in biopandas_structure.df["HETATM"]["pdbx_PDB_model_num"].tolist()]
131+
]
132+
return biopandas_structure
72133

73134
def fetch_mmcif(
74135
self,
@@ -583,4 +644,4 @@ def convert_to_pandas_pdb(
583644
pandaspdb.df["HETATM"]["atom_number"] + hetatom_offset
584645
)
585646

586-
return pandaspdb
647+
return pandaspdb
525 KB
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# BioPandas
2+
# Author: Sebastian Raschka <[email protected]>
3+
# Author: Arian Jamasb <[email protected]>
4+
# License: BSD 3 clause
5+
# Project Website: http://rasbt.github.io/biopandas/
6+
# Code Repository: https://github.com/rasbt/biopandas
7+
import os
8+
9+
from biopandas.mmcif import PandasMmcif
10+
11+
TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "2jyf.cif.gz")
12+
13+
def test_label_models():
14+
biopandas_structure = PandasMmcif().read_mmcif(TESTDATA_FILENAME)
15+
biopandas_structure.label_models()
16+
assert "model_id" in biopandas_structure.df["ATOM"].columns
17+
18+
def test_get_model():
19+
biopandas_structure = PandasMmcif().read_mmcif(TESTDATA_FILENAME)
20+
MODEL_INDEX = 1
21+
new_biopandas_structure = biopandas_structure.get_model(MODEL_INDEX)
22+
assert new_biopandas_structure.df["ATOM"]["pdbx_PDB_model_num"].all() == MODEL_INDEX
23+
24+
25+
def test_get_models():
26+
biopandas_structure = PandasMmcif().read_mmcif(TESTDATA_FILENAME)
27+
MODEL_INDICES = [1, 3, 5]
28+
29+
new_biopandas_structure = biopandas_structure.get_models(MODEL_INDICES)
30+
assert new_biopandas_structure.df["ATOM"]["pdbx_PDB_model_num"].all() in MODEL_INDICES

biopandas/mmtf/pandas_mmtf.py

+22-22
Original file line numberDiff line numberDiff line change
@@ -438,21 +438,21 @@ def get_model(self, model_index: int) -> PandasMmtf:
438438
structure subsetted to the given model.
439439
"""
440440

441-
df = copy.deepcopy(self)
441+
biopandas_structure = copy.deepcopy(self)
442442

443-
if "ATOM" in df.df.keys():
444-
df.df["ATOM"] = df.df["ATOM"].loc[
445-
df.df["ATOM"]["model_id"] == model_index
443+
if "ATOM" in biopandas_structure.df.keys():
444+
biopandas_structure.df["ATOM"] = biopandas_structure.df["ATOM"].loc[
445+
biopandas_structure.df["ATOM"]["model_id"] == model_index
446446
]
447-
if "HETATM" in df.df.keys():
448-
df.df["HETATM"] = df.df["HETATM"].loc[
449-
df.df["HETATM"]["model_id"] == model_index
447+
if "HETATM" in biopandas_structure.df.keys():
448+
biopandas_structure.df["HETATM"] = biopandas_structure.df["HETATM"].loc[
449+
biopandas_structure.df["HETATM"]["model_id"] == model_index
450450
]
451-
if "ANISOU" in df.df.keys():
452-
df.df["ANISOU"] = df.df["ANISOU"].loc[
453-
df.df["ANISOU"]["model_id"] == model_index
451+
if "ANISOU" in biopandas_structure.df.keys():
452+
biopandas_structure.df["ANISOU"] = biopandas_structure.df["ANISOU"].loc[
453+
biopandas_structure.df["ANISOU"]["model_id"] == model_index
454454
]
455-
return df
455+
return biopandas_structure
456456

457457
def get_models(self, model_indices: List[int]) -> PandasMmtf:
458458
"""Returns a new PandasMmtf object with the dataframes subset to the
@@ -469,30 +469,30 @@ def get_models(self, model_indices: List[int]) -> PandasMmtf:
469469
containing the structure subsetted to the given model.
470470
"""
471471

472-
df = copy.deepcopy(self)
472+
biopandas_structure = copy.deepcopy(self)
473473

474-
if "ATOM" in df.df.keys():
475-
df.df["ATOM"] = df.df["ATOM"].loc[
474+
if "ATOM" in biopandas_structure.df.keys():
475+
biopandas_structure.df["ATOM"] = biopandas_structure.df["ATOM"].loc[
476476
[
477477
x in model_indices
478-
for x in df.df["ATOM"]["model_id"].tolist()
478+
for x in biopandas_structure.df["ATOM"]["model_id"].tolist()
479479
]
480480
]
481-
if "HETATM" in df.df.keys():
482-
df.df["HETATM"] = df.df["HETATM"].loc[
481+
if "HETATM" in biopandas_structure.df.keys():
482+
biopandas_structure.df["HETATM"] = biopandas_structure.df["HETATM"].loc[
483483
[
484484
x in model_indices
485-
for x in df.df["HETATM"]["model_id"].tolist()
485+
for x in biopandas_structure.df["HETATM"]["model_id"].tolist()
486486
]
487487
]
488-
if "ANISOU" in df.df.keys():
489-
df.df["ANISOU"] = df.df["ANISOU"].loc[
488+
if "ANISOU" in biopandas_structure.df.keys():
489+
biopandas_structure.df["ANISOU"] = biopandas_structure.df["ANISOU"].loc[
490490
[
491491
x in model_indices
492-
for x in df.df["ANISOU"]["model_id"].tolist()
492+
for x in biopandas_structure.df["ANISOU"]["model_id"].tolist()
493493
]
494494
]
495-
return df
495+
return biopandas_structure
496496

497497

498498
def fetch_mmtf(pdb_code: str) -> pd.DataFrame:

biopandas/pdb/pandas_pdb.py

+24-24
Original file line numberDiff line numberDiff line change
@@ -843,20 +843,20 @@ def get_model(self, model_index: int) -> PandasPdb:
843843
structure subsetted to the given model.
844844
"""
845845

846-
df = deepcopy(self)
847-
df.label_models()
848-
849-
if "ATOM" in df.df.keys():
850-
df.df["ATOM"] = df.df["ATOM"].loc[df.df["ATOM"]["model_id"] == model_index]
851-
if "HETATM" in df.df.keys():
852-
df.df["HETATM"] = df.df["HETATM"].loc[
853-
df.df["HETATM"]["model_id"] == model_index
846+
biopandas_structure = deepcopy(self)
847+
biopandas_structure.label_models()
848+
849+
if "ATOM" in biopandas_structure.df.keys():
850+
biopandas_structure.df["ATOM"] = biopandas_structure.df["ATOM"].loc[biopandas_structure.df["ATOM"]["model_id"] == model_index]
851+
if "HETATM" in biopandas_structure.df.keys():
852+
biopandas_structure.df["HETATM"] = biopandas_structure.df["HETATM"].loc[
853+
biopandas_structure.df["HETATM"]["model_id"] == model_index
854854
]
855-
if "ANISOU" in df.df.keys():
856-
df.df["ANISOU"] = df.df["ANISOU"].loc[
857-
df.df["ANISOU"]["model_id"] == model_index
855+
if "ANISOU" in biopandas_structure.df.keys():
856+
biopandas_structure.df["ANISOU"] = biopandas_structure.df["ANISOU"].loc[
857+
biopandas_structure.df["ANISOU"]["model_id"] == model_index
858858
]
859-
return df
859+
return biopandas_structure
860860

861861
def get_models(self, model_indices: List[int]) -> PandasPdb:
862862
"""Returns a new PandasPDB object with the dataframes subset to the given model index.
@@ -872,22 +872,22 @@ def get_models(self, model_indices: List[int]) -> PandasPdb:
872872
containing the structure subsetted to the given model.
873873
"""
874874

875-
df = deepcopy(self)
876-
df.label_models()
875+
biopandas_structure = deepcopy(self)
876+
biopandas_structure.label_models()
877877

878-
if "ATOM" in df.df.keys():
879-
df.df["ATOM"] = df.df["ATOM"].loc[
880-
[x in model_indices for x in df.df["ATOM"]["model_id"].tolist()]
878+
if "ATOM" in biopandas_structure.df.keys():
879+
biopandas_structure.df["ATOM"] = biopandas_structure.df["ATOM"].loc[
880+
[x in model_indices for x in biopandas_structure.df["ATOM"]["model_id"].tolist()]
881881
]
882-
if "HETATM" in df.df.keys():
883-
df.df["HETATM"] = df.df["HETATM"].loc[
884-
[x in model_indices for x in df.df["HETATM"]["model_id"].tolist()]
882+
if "HETATM" in biopandas_structure.df.keys():
883+
biopandas_structure.df["HETATM"] = biopandas_structure.df["HETATM"].loc[
884+
[x in model_indices for x in biopandas_structure.df["HETATM"]["model_id"].tolist()]
885885
]
886-
if "ANISOU" in df.df.keys():
887-
df.df["ANISOU"] = df.df["ANISOU"].loc[
888-
[x in model_indices for x in df.df["ANISOU"]["model_id"].tolist()]
886+
if "ANISOU" in biopandas_structure.df.keys():
887+
biopandas_structure.df["ANISOU"] = biopandas_structure.df["ANISOU"].loc[
888+
[x in model_indices for x in biopandas_structure.df["ANISOU"]["model_id"].tolist()]
889889
]
890-
return df
890+
return biopandas_structure
891891

892892
def to_pdb_stream(self, records: tuple[str] = ("ATOM", "HETATM")) -> StringIO:
893893
"""Writes a PDB dataframe to a stream.

docs/CHANGELOG.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ The CHANGELOG for the current development version is available at
66

77
### 0.5.1dev1 (UNRELEASED)
88

9-
- Dev: switched testing framework entirely to pytest. Drops nose dependency due to version conflicts with Python 3.12 (`nose`) and 3.8 (`nose`)
9+
- Feature: added method to `PandasMmcif` that allow to select by model ids. PR #[145](https://github.com/BioPandas/biopandas/pull/145))
10+
- Dev: switched testing framework entirely to pytest. Drops nose dependency due to version conflicts with Python 3.12 (`nose`) and 3.8 (`nose`) PR #[146](https://github.com/BioPandas/biopandas/pull/146))
1011

1112

1213
### 0.5.0dev1 (31/7/2023)

0 commit comments

Comments
 (0)