Skip to content

Commit 67aa2f2

Browse files
a-r-jArian Jamasb
and
Arian Jamasb
authored
fix: switch pytest to unittest (#146)
* switch pytest to unittest * switch testing over to pytest from nose/unittest * fix pytest expected failures * fix lingering expected fail * linting * linting * linting * linting * remove unused variables * fix type comparison * bump changelog * add changelog enforcer test * fix type comparison * clean up unused variable * fix whitespace * remove unused mmtf * remove whitespace * remove whitespace * rename ambiguous variable * reduce whitespace * reduce whitespace --------- Co-authored-by: Arian Jamasb <[email protected]>
1 parent 7a1517d commit 67aa2f2

36 files changed

+1783
-505
lines changed

.appveyor.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ install:
1414
- conda config --set always_yes yes --set changeps1 no
1515
- conda update -q conda
1616
- conda info -a
17-
- conda create -q -n test-environment --channel=conda-forge mmtf-python numpy scipy pandas nose looseversion python=%PYTHON_VERSION%
17+
- conda create -q -n test-environment --channel=conda-forge mmtf-python numpy scipy pandas pytest looseversion python=%PYTHON_VERSION%
1818
- activate test-environment
1919

2020
test_script:
21-
- nosetests -s -v
21+
- pytest -s -v
+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
name: Changelog Enforcer
2+
3+
on: # yamllint disable-line rule:truthy
4+
pull_request:
5+
types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled]
6+
7+
jobs:
8+
9+
changelog:
10+
runs-on: ubuntu-latest
11+
12+
steps:
13+
- uses: actions/checkout@v3
14+
- uses: dangoslen/changelog-enforcer@v3
15+
with:
16+
skipLabels: 'skip-changelog'

biopandas/constants.py

+1,039-174
Large diffs are not rendered by default.

biopandas/mmcif/mmcif_parser.py

+24-13
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,28 @@ def __init__(self, parser_obj):
2222
self.names_defined = False
2323

2424
def add_name(self, name):
25-
cat_name = type(name) == str and partition_string(name, ".") or ["", "", ""]
25+
cat_name = (
26+
isinstance(name, str) and partition_string(name, ".") or ["", "", ""]
27+
)
2628
if cat_name[1]:
2729
if cat_name[0] not in self.parser_obj.current_target[-2]:
2830
self.parser_obj.current_target[-2][cat_name[0]] = {}
29-
if cat_name[2] not in self.parser_obj.current_target[-2][cat_name[0]]:
30-
self.parser_obj.current_target[-2][cat_name[0]][cat_name[2]] = []
31+
if (
32+
cat_name[2]
33+
not in self.parser_obj.current_target[-2][cat_name[0]]
34+
):
35+
self.parser_obj.current_target[-2][cat_name[0]][
36+
cat_name[2]
37+
] = []
3138
self.ref_list.append(
3239
self.parser_obj.current_target[-2][cat_name[0]][cat_name[2]]
3340
)
3441
else:
3542
if cat_name[0] not in self.parser_obj.current_target[-2]:
3643
self.parser_obj.current_target[-2][cat_name[0]] = []
37-
self.ref_list.append(self.parser_obj.current_target[-2][cat_name[0]])
44+
self.ref_list.append(
45+
self.parser_obj.current_target[-2][cat_name[0]]
46+
)
3847
self.length = len(self.ref_list)
3948

4049
def push_value(self, value):
@@ -218,16 +227,16 @@ def __repr__(self):
218227
def __cif_float_range__(inp):
219228
try:
220229
pos = inp.index("-", 1)
221-
return (__CIFFloat__(inp[:pos]), __CIFFloat__(inp[pos + 1 :]))
222-
except:
230+
return (__CIFFloat__(inp[:pos]), __CIFFloat__(inp[pos + 1:]))
231+
except Exception:
223232
return (__CIFFloat__(inp),)
224233

225234

226235
def __cif_int_range__(inp):
227236
try:
228237
pos = inp.index("-", 1)
229-
return (__CIFInt__(inp[:pos]), __CIFInt__(inp[pos + 1 :]))
230-
except:
238+
return (__CIFInt__(inp[:pos]), __CIFInt__(inp[pos + 1:]))
239+
except Exception:
231240
return (__CIFInt__(inp),)
232241

233242

@@ -239,12 +248,12 @@ def __load_cif_dic__(dic_file, force=False):
239248
if force:
240249
throw
241250
dic = json.loads(open(jsf).read())
242-
except:
251+
except Exception:
243252
parser = CIFParser()
244253
parser.parse(open(dic_file))
245254
json.dump(parser.data, open(jsf_dic, "w"))
246255
for k, v in parser.data["data_mmcif_pdbx.dic"].items():
247-
if type(v) != dict or "item_type" not in v:
256+
if not isinstance(v, dict) or "item_type" not in v:
248257
continue
249258
name = partition_string(k[6:], ".")
250259
if name[0] not in dic:
@@ -285,11 +294,13 @@ def __dump_cif__(jso):
285294
def __dump_str__(inp):
286295
if inp is None:
287296
return "?"
288-
if type(inp) is not str:
297+
if not isinstance(inp, str):
289298
return str(inp)
290299
if re.search(__CIF_STR_NL_CHECK__, inp) is not None:
291300
return "\n;%s\n;" % inp
292-
return "'%s'" % inp if re.search(__CIF_STR_CHECK__, inp) is not None else inp
301+
return (
302+
"'%s'" % inp if re.search(__CIF_STR_CHECK__, inp) is not None else inp
303+
)
293304

294305

295306
def __pad_string__(inp, flength):
@@ -354,7 +365,7 @@ def __dump_part__(jso):
354365

355366
def load_cif_data(data, do_clean=True, do_type=True):
356367
parser = CIFParser()
357-
if type(data) == str:
368+
if isinstance(data, str):
358369
parser.parse_string(data)
359370
else:
360371
parser.parse(data) # fileobj

biopandas/mmcif/pandas_mmcif.py

+98-46
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Class for working with MMCIF files."""
2+
23
# BioPandas
34
# Authors: Arian Jamasb <[email protected]>,
45
# Authors: Sebastian Raschka <[email protected]>
@@ -69,56 +70,76 @@ def read_mmcif(self, path):
6970
self.code = self.data["entry"]["id"][0].lower()
7071
return self
7172

72-
def fetch_mmcif(self, pdb_code: Optional[str] = None, uniprot_id: Optional[str] = None, source: str = "pdb"):
73+
def fetch_mmcif(
74+
self,
75+
pdb_code: Optional[str] = None,
76+
uniprot_id: Optional[str] = None,
77+
source: str = "pdb",
78+
):
7379
"""Fetches mmCIF file contents from the Protein Databank at rcsb.org or AlphaFold database at https://alphafold.ebi.ac.uk/.
74-
.
80+
.
7581
76-
Parameters
77-
----------
78-
pdb_code : str, optional
79-
A 4-letter PDB code, e.g., `"3eiy"` to retrieve structures from the PDB. Defaults to `None`.
82+
Parameters
83+
----------
84+
pdb_code : str, optional
85+
A 4-letter PDB code, e.g., `"3eiy"` to retrieve structures from the PDB. Defaults to `None`.
8086
81-
uniprot_id : str, optional
82-
A UniProt Identifier, e.g., `"Q5VSL9"` to retrieve structures from the AF2 database. Defaults to `None`.
87+
uniprot_id : str, optional
88+
A UniProt Identifier, e.g., `"Q5VSL9"` to retrieve structures from the AF2 database. Defaults to `None`.
8389
84-
source : str
85-
The source to retrieve the structure from
86-
(`"pdb"`, `"alphafold2-v3"` or `"alphafold2-v4"`). Defaults to `"pdb"`.
90+
source : str
91+
The source to retrieve the structure from
92+
(`"pdb"`, `"alphafold2-v3"` or `"alphafold2-v4"`). Defaults to `"pdb"`.
8793
88-
Returns
89-
---------
90-
self
94+
Returns
95+
---------
96+
self
9197
9298
"""
9399
# Sanitize input
94100
invalid_input_identifier_1 = pdb_code is None and uniprot_id is None
95-
invalid_input_identifier_2 = pdb_code is not None and uniprot_id is not None
96-
invalid_input_combination_1 = uniprot_id is not None and source == "pdb"
101+
invalid_input_identifier_2 = (
102+
pdb_code is not None and uniprot_id is not None
103+
)
104+
invalid_input_combination_1 = (
105+
uniprot_id is not None and source == "pdb"
106+
)
97107
invalid_input_combination_2 = pdb_code is not None and source in {
98-
"alphafold2-v3", "alphafold2-v4"}
108+
"alphafold2-v3",
109+
"alphafold2-v4",
110+
}
99111

100112
if invalid_input_identifier_1 or invalid_input_identifier_2:
101113
raise ValueError(
102-
"Please provide either a PDB code or a UniProt ID.")
114+
"Please provide either a PDB code or a UniProt ID."
115+
)
103116

104117
if invalid_input_combination_1:
105118
raise ValueError(
106-
"Please use a 'pdb_code' instead of 'uniprot_id' for source='pdb'.")
119+
"Please use a 'pdb_code' instead of 'uniprot_id' for source='pdb'."
120+
)
107121
elif invalid_input_combination_2:
108122
raise ValueError(
109-
f"Please use a 'uniprot_id' instead of 'pdb_code' for source={source}.")
123+
f"Please use a 'uniprot_id' instead of 'pdb_code' for source={source}."
124+
)
110125

111126
if source == "pdb":
112127
self.mmcif_path, self.mmcif_text = self._fetch_mmcif(pdb_code)
113128
elif source == "alphafold2-v3":
114129
af2_version = 3
115-
self.mmcif_path, self.mmcif_text = self._fetch_af2(uniprot_id, af2_version)
130+
self.mmcif_path, self.mmcif_text = self._fetch_af2(
131+
uniprot_id, af2_version
132+
)
116133
elif source == "alphafold2-v4":
117134
af2_version = 4
118-
self.mmcif_path, self.mmcif_text = self._fetch_af2(uniprot_id, af2_version)
135+
self.mmcif_path, self.mmcif_text = self._fetch_af2(
136+
uniprot_id, af2_version
137+
)
119138
else:
120-
raise ValueError(f"Invalid source: {source}."
121-
" Please use one of 'pdb', 'alphafold2-v3' or 'alphafold2-v4'.")
139+
raise ValueError(
140+
f"Invalid source: {source}."
141+
" Please use one of 'pdb', 'alphafold2-v3' or 'alphafold2-v4'."
142+
)
122143

123144
self._df = self._construct_df(text=self.mmcif_text)
124145
return self
@@ -129,7 +150,8 @@ def _construct_df(self, text: str):
129150
self.data = data
130151
df: Dict[str, pd.DataFrame] = {}
131152
full_df = pd.DataFrame.from_dict(
132-
data["atom_site"], orient="index").transpose()
153+
data["atom_site"], orient="index"
154+
).transpose()
133155
full_df = full_df.astype(mmcif_col_types, errors="ignore")
134156
df["ATOM"] = pd.DataFrame(full_df[full_df.group_PDB == "ATOM"])
135157
df["HETATM"] = pd.DataFrame(full_df[full_df.group_PDB == "HETATM"])
@@ -148,8 +170,9 @@ def _fetch_mmcif(pdb_code):
148170
response = urlopen(url)
149171
txt = response.read()
150172
txt = (
151-
txt.decode(
152-
"utf-8") if sys.version_info[0] >= 3 else txt.encode("ascii")
173+
txt.decode("utf-8")
174+
if sys.version_info[0] >= 3
175+
else txt.encode("ascii")
153176
)
154177
except HTTPError as e:
155178
print(f"HTTP Error {e.code}")
@@ -166,11 +189,15 @@ def _fetch_af2(uniprot_id: str, af2_version: int = 3):
166189
try:
167190
response = urlopen(url)
168191
txt = response.read()
169-
txt = txt.decode('utf-8') if sys.version_info[0] >= 3 else txt.encode('ascii')
192+
txt = (
193+
txt.decode("utf-8")
194+
if sys.version_info[0] >= 3
195+
else txt.encode("ascii")
196+
)
170197
except HTTPError as e:
171-
print(f'HTTP Error {e.code}')
198+
print(f"HTTP Error {e.code}")
172199
except URLError as e:
173-
print(f'URL Error {e.args}')
200+
print(f"URL Error {e.args}")
174201
return url, txt
175202

176203
@staticmethod
@@ -184,7 +211,8 @@ def _read_mmcif(path):
184211
openf = gzip.open
185212
else:
186213
allowed_formats = ", ".join(
187-
(".cif", ".cif.gz", ".mmcif", ".mmcif.gz"))
214+
(".cif", ".cif.gz", ".mmcif", ".mmcif.gz")
215+
)
188216
raise ValueError(
189217
f"Wrong file format; allowed file formats are {allowed_formats}"
190218
)
@@ -194,8 +222,9 @@ def _read_mmcif(path):
194222

195223
if path.endswith(".gz"):
196224
txt = (
197-
txt.decode(
198-
"utf-8") if sys.version_info[0] >= 3 else txt.encode("ascii")
225+
txt.decode("utf-8")
226+
if sys.version_info[0] >= 3
227+
else txt.encode("ascii")
199228
)
200229
return path, txt
201230

@@ -271,14 +300,19 @@ def _get_mainchain(
271300
def _get_hydrogen(df, invert):
272301
"""Return only hydrogen atom entries from a DataFrame"""
273302
return (
274-
df[(df["type_symbol"] != "H")] if invert else df[(
275-
df["type_symbol"] == "H")]
303+
df[(df["type_symbol"] != "H")]
304+
if invert
305+
else df[(df["type_symbol"] == "H")]
276306
)
277307

278308
@staticmethod
279309
def _get_heavy(df, invert):
280310
"""Return only heavy atom entries from a DataFrame"""
281-
return df[df["type_symbol"] == "H"] if invert else df[df["type_symbol"] != "H"]
311+
return (
312+
df[df["type_symbol"] == "H"]
313+
if invert
314+
else df[df["type_symbol"] != "H"]
315+
)
282316

283317
@staticmethod
284318
def _get_calpha(df, invert, atom_col: str = "auth_atom_id"):
@@ -288,7 +322,11 @@ def _get_calpha(df, invert, atom_col: str = "auth_atom_id"):
288322
@staticmethod
289323
def _get_carbon(df, invert):
290324
"""Return carbon atom entries from a DataFrame"""
291-
return df[df["type_symbol"] != "C"] if invert else df[df["type_symbol"] == "C"]
325+
return (
326+
df[df["type_symbol"] != "C"]
327+
if invert
328+
else df[df["type_symbol"] == "C"]
329+
)
292330

293331
def amino3to1(
294332
self,
@@ -339,8 +377,9 @@ def amino3to1(
339377
indices.append(ind)
340378
cmp = num
341379

342-
transl = tmp.iloc[indices][residue_col].map(
343-
amino3to1dict).fillna(fillna)
380+
transl = (
381+
tmp.iloc[indices][residue_col].map(amino3to1dict).fillna(fillna)
382+
)
344383

345384
return pd.concat((tmp.iloc[indices][chain_col], transl), axis=1)
346385

@@ -425,7 +464,9 @@ def distance(self, xyz=(0.00, 0.00, 0.00), records=("ATOM", "HETATM")):
425464

426465
return np.sqrt(
427466
np.sum(
428-
df[["Cartn_x", "Cartn_y", "Cartn_z"]].subtract(xyz, axis=1) ** 2, axis=1
467+
df[["Cartn_x", "Cartn_y", "Cartn_z"]].subtract(xyz, axis=1)
468+
** 2,
469+
axis=1,
429470
)
430471
)
431472

@@ -451,7 +492,9 @@ def distance_df(df, xyz=(0.00, 0.00, 0.00)):
451492
"""
452493
return np.sqrt(
453494
np.sum(
454-
df[["Cartn_x", "Cartn_y", "Cartn_z"]].subtract(xyz, axis=1) ** 2, axis=1
495+
df[["Cartn_x", "Cartn_y", "Cartn_z"]].subtract(xyz, axis=1)
496+
** 2,
497+
axis=1,
455498
)
456499
)
457500

@@ -485,7 +528,11 @@ def read_mmcif_from_list(self, mmcif_lines):
485528
self.code = self.data["entry"]["id"][0].lower()
486529
return self
487530

488-
def convert_to_pandas_pdb(self, offset_chains: bool = True, records: List[str] = ["ATOM", "HETATM"]) -> PandasPdb:
531+
def convert_to_pandas_pdb(
532+
self,
533+
offset_chains: bool = True,
534+
records: List[str] = ["ATOM", "HETATM"],
535+
) -> PandasPdb:
489536
"""Returns a PandasPdb object with the same data as the PandasMmcif
490537
object.
491538
@@ -525,10 +572,15 @@ def convert_to_pandas_pdb(self, offset_chains: bool = True, records: List[str] =
525572

526573
# Update atom numbers
527574
if offset_chains:
528-
offsets = pandaspdb.df["ATOM"]["chain_id"].astype(
529-
"category").cat.codes
530-
pandaspdb.df["ATOM"]["atom_number"] = pandaspdb.df["ATOM"]["atom_number"] + offsets
575+
offsets = (
576+
pandaspdb.df["ATOM"]["chain_id"].astype("category").cat.codes
577+
)
578+
pandaspdb.df["ATOM"]["atom_number"] = (
579+
pandaspdb.df["ATOM"]["atom_number"] + offsets
580+
)
531581
hetatom_offset = offsets.max() + 1
532-
pandaspdb.df["HETATM"]["atom_number"] = pandaspdb.df["HETATM"]["atom_number"] + hetatom_offset
582+
pandaspdb.df["HETATM"]["atom_number"] = (
583+
pandaspdb.df["HETATM"]["atom_number"] + hetatom_offset
584+
)
533585

534586
return pandaspdb

0 commit comments

Comments
 (0)