From f420b0d56c0bad55464cebdf842fcfae6ab0421c Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 19 Jul 2019 19:48:33 -0400 Subject: [PATCH] Reorganize, update code, and add travis/tox Closes #3. Begins #2 --- .travis.yml | 27 ++++ LICENSE | 2 +- setup.cfg | 8 +- src/guilty_phewas/__init__.py | 6 - src/guilty_phewas/__main__.py | 14 -- src/guilty_phewas/script.py | 22 --- src/guiltytargets_phewas/__init__.py | 3 + .../constants.py | 4 +- .../network_phewas.py | 31 ++-- .../pulearn.py | 25 ++-- src/guiltytargets_phewas/script.py | 28 ++++ .../string_ppi_assembler.py | 6 +- .../utils.py | 19 ++- tox.ini | 138 ++++++++++++++++++ 14 files changed, 255 insertions(+), 78 deletions(-) create mode 100644 .travis.yml delete mode 100644 src/guilty_phewas/__init__.py delete mode 100644 src/guilty_phewas/__main__.py delete mode 100644 src/guilty_phewas/script.py create mode 100644 src/guiltytargets_phewas/__init__.py rename src/{guilty_phewas => guiltytargets_phewas}/constants.py (58%) rename src/{guilty_phewas => guiltytargets_phewas}/network_phewas.py (92%) rename src/{guilty_phewas => guiltytargets_phewas}/pulearn.py (89%) create mode 100644 src/guiltytargets_phewas/script.py rename src/{guilty_phewas => guiltytargets_phewas}/string_ppi_assembler.py (99%) rename src/{guilty_phewas => guiltytargets_phewas}/utils.py (97%) create mode 100644 tox.ini diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..2c1558c --- /dev/null +++ b/.travis.yml @@ -0,0 +1,27 @@ +sudo: false +cache: pip +language: python +python: + - "3.6" +stages: + - lint + - docs +jobs: + include: + # lint stage + - stage: lint + env: TOXENV=manifest + - env: TOXENV=flake8 + - env: TOXENV=xenon + # docs stage + - stage: docs + env: TOXENV=doc8 + - env: TOXENV=readme + - env: TOXENV=docs +matrix: + allow_failures: + - env: TOXENV=xenon +install: + - pip install tox +script: + - tox diff --git a/LICENSE b/LICENSE index 8b23445..682d4b7 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2019 +Copyright (c) GuiltyTargets Developers, 2019 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/setup.cfg b/setup.cfg index 216a71d..f2fea4b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,7 +2,7 @@ # Setup.py Configuration # ########################## [metadata] -name = guilty_reloaded +name = guiltytargets_phewas version = 0.0.1-dev description = Change meeeeee long_description = file: README.rst @@ -13,15 +13,17 @@ author_email = mauriciopl@gmail.com maintainer = Mauricio Pio de Lacerda maintainer_email = mauriciopl@gmail.com +url = https://github.com/GuiltyTargets/phewas + # License Information -license = Apache 2.0 License +license = MIT license_file = LICENSE # Search tags classifiers = Development Status :: 1 - Planning Intended Audience :: Developers - License :: OSI Approved :: Apache Software License + License :: OSI Approved :: MIT License Operating System :: OS Independent Programming Language :: Python Programming Language :: Python :: 3.6 diff --git a/src/guilty_phewas/__init__.py b/src/guilty_phewas/__init__.py deleted file mode 100644 index e61fdc9..0000000 --- a/src/guilty_phewas/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# -*- coding: utf-8 -*- - -"""A dream come true.""" - -def get_version(): - return "" diff --git a/src/guilty_phewas/__main__.py b/src/guilty_phewas/__main__.py deleted file mode 100644 index 65944d6..0000000 --- a/src/guilty_phewas/__main__.py +++ /dev/null @@ -1,14 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Entrypoint module, in case you use ``python3 -m module_name``. - -Why does this file exist, and why ``__main__``? For more info, read: - -- https://www.python.org/dev/peps/pep-0338/ -- https://docs.python.org/3/using/cmdline.html#cmdoption-m -""" - -from .cli import main - -if __name__ == '__main__': - main() diff --git a/src/guilty_phewas/script.py b/src/guilty_phewas/script.py deleted file mode 100644 index 7687fcd..0000000 --- a/src/guilty_phewas/script.py +++ /dev/null @@ -1,22 +0,0 @@ -from GAT2VEC.evaluation.classification import Classification -from GAT2VEC.gat2vec import Gat2Vec -from guiltytargets.constants import gat2vec_config - -dir_ = "C:/Users/Mauricio/Thesis/bel_data/alzh" # windows -# dir_ = "C:\\Users\\Mauricio\\Thesis\\git\\reproduction\\data\\lc" - -classifier = Classification(dir_, dir_, tr=gat2vec_config.training_ratio) - -g2v = Gat2Vec(dir_, dir_, label=False, tr=gat2vec_config.training_ratio) -model = g2v.train_gat2vec( - gat2vec_config.num_walks, - gat2vec_config.walk_length, - gat2vec_config.dimension, - gat2vec_config.window_size, - output=True, - ) - -classifier = Classification(dir_, dir_, tr=gat2vec_config.training_ratio) - -auc_df = classifier.evaluate(model, label=False, evaluation_scheme="cv") - diff --git a/src/guiltytargets_phewas/__init__.py b/src/guiltytargets_phewas/__init__.py new file mode 100644 index 0000000..cd0040a --- /dev/null +++ b/src/guiltytargets_phewas/__init__.py @@ -0,0 +1,3 @@ +# -*- coding: utf-8 -*- + +"""GuiltyTargets PheWAS Extension.""" \ No newline at end of file diff --git a/src/guilty_phewas/constants.py b/src/guiltytargets_phewas/constants.py similarity index 58% rename from src/guilty_phewas/constants.py rename to src/guiltytargets_phewas/constants.py index e055fcc..69b83fd 100644 --- a/src/guilty_phewas/constants.py +++ b/src/guiltytargets_phewas/constants.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- +"""Constants for GuiltyTargets PheWAS.""" + data_dir = 'data' disease_abr = ['ad'] disease_ids_efo = ['EFO_0000249'] -ot_file = 'ot_entrez.txt' \ No newline at end of file +ot_file = 'ot_entrez.txt' diff --git a/src/guilty_phewas/network_phewas.py b/src/guiltytargets_phewas/network_phewas.py similarity index 92% rename from src/guilty_phewas/network_phewas.py rename to src/guiltytargets_phewas/network_phewas.py index b2a1d0c..1e421d5 100644 --- a/src/guilty_phewas/network_phewas.py +++ b/src/guiltytargets_phewas/network_phewas.py @@ -7,8 +7,13 @@ import networkx as nx import numpy as np -from ppi_network_annotation.model.gene import Gene -from pybel.dsl import protein, rna, gene +from pybel.dsl import gene, protein, rna + +from guiltytargets.ppi_network_annotation import Gene + +__all__ = [ + 'NetworkNx', +] logger = logging.getLogger(__name__) @@ -16,11 +21,13 @@ class NetworkNx: """Encapsulate a PPI network with differential gene expression, phenotypes and disease association annotation.""" - def __init__(self, - ppi_graph: nx.Graph, - max_adj_p: Optional[float] = None, - max_l2fc: Optional[float] = None, - min_l2fc: Optional[float] = None): + def __init__( + self, + ppi_graph: nx.Graph, + max_adj_p: Optional[float] = None, + max_l2fc: Optional[float] = None, + min_l2fc: Optional[float] = None, + ) -> None: """Initialize the network object. :param ppi_graph: A graph of protein interactions. @@ -63,8 +70,11 @@ def filter_genes(self, relevant_entrez: list) -> None: logger.info("In filter_genes()") raise Exception('Not ready to filter genes using NetworkX') - def _add_vertex_attributes(self, genes: List[Gene], - disease_associations: Optional[dict] = None) -> None: + def _add_vertex_attributes( + self, + genes: List[Gene], + disease_associations: Optional[dict] = None, + ) -> None: """Add attributes to vertices. :param genes: A list of genes containing attribute information. @@ -158,8 +168,7 @@ def get_attribute_from_indices(self, indices: list, attribute_name: str): return list(np.array(self.graph.vs[attribute_name])[indices]) def get_differentially_expressed_genes(self, diff_type: str) -> List: - """Get up regulated, down regulated, all differentially expressed or not - differentially expressed nodes. + """Get up regulated, down regulated, all differentially expressed or not differentially expressed nodes. :param diff_type: One of `not_diff_expressed`, `diff_expressed`, `up_regulated`, `down_regulated` :return: A list of nodes corresponding to diff_type. diff --git a/src/guilty_phewas/pulearn.py b/src/guiltytargets_phewas/pulearn.py similarity index 89% rename from src/guilty_phewas/pulearn.py rename to src/guiltytargets_phewas/pulearn.py index ca39f68..f4d4a1b 100644 --- a/src/guilty_phewas/pulearn.py +++ b/src/guiltytargets_phewas/pulearn.py @@ -3,21 +3,22 @@ from collections import defaultdict import pandas as pd -from GAT2VEC import parsers, paths -from GAT2VEC.evaluation.classification import Classification from sklearn import svm from sklearn.metrics import accuracy_score, f1_score, roc_auc_score from sklearn.model_selection import StratifiedKFold +from guiltytargets.gat2vec import Classification, Gat2Vec, gat2vec_parsers, gat2vec_paths -# TODO move to GAT2VEC (forked)? +__all__ = [ + 'PULearn', +] +# TODO move to GAT2VEC (forked)? class PULearn(Classification): - """ """ def evaluate(self, model, label=False, evaluation_scheme="tr", cost_p: float = None, cost_n: float = None): - """Evaluates the model according to the given evaluation scheme. + """Evaluate the model according to the given evaluation scheme. :param model: :param label: @@ -30,7 +31,7 @@ def evaluate(self, model, label=False, evaluation_scheme="tr", cost_p: float = N clf = self.get_classifier() if not label: - embedding = parsers.get_embeddingDF(model) + embedding = gat2vec_parsers.get_embeddingDF(model) if evaluation_scheme == "bsvm": if cost_n and cost_p and cost_n > 0 and cost_p > 0: @@ -44,9 +45,9 @@ def evaluate(self, model, label=False, evaluation_scheme="tr", cost_p: float = N for tr in self.TR: print("TR ... ", tr) if label: - model = paths.get_embedding_path_wl(self.dataset_dir, self.output_dir, tr) + model = gat2vec_paths.get_embedding_path_wl(self.dataset_dir, self.output_dir, tr) if isinstance(model, str): - embedding = parsers.get_embeddingDF(model) + embedding = gat2vec_parsers.get_embeddingDF(model) results.update(self.evaluate_tr(clf, embedding, tr)) print("Training Finished") @@ -89,9 +90,7 @@ def get_biased_predictions(clf, x_train, x_test, y_train, cost_p, cost_n): return clf.predict(x_test), clf.predict_proba(x_test) -if __name__ == '__main__': - from GAT2VEC.gat2vec import Gat2Vec - +def main(): dir_ = "C:/Users/Mauricio/Thesis/bel_data/alzh" g2v = Gat2Vec(dir_, dir_, label=False, tr=[0.1, 0.3, 0.5]) walk_length = 4 @@ -111,3 +110,7 @@ def get_biased_predictions(clf, x_train, x_test, y_train, cost_p, cost_n): print('PU created.') auc_df = pul.evaluate(model, label=False, evaluation_scheme="bsvm") print(auc_df) + + +if __name__ == '__main__': + main() diff --git a/src/guiltytargets_phewas/script.py b/src/guiltytargets_phewas/script.py new file mode 100644 index 0000000..b4ecbfb --- /dev/null +++ b/src/guiltytargets_phewas/script.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- + +from guiltytargets.constants import gat2vec_config +from guiltytargets.gat2vec import Classification, Gat2Vec + + +def main(): + dir_ = "C:/Users/Mauricio/Thesis/bel_data/alzh" # windows + # dir_ = "C:\\Users\\Mauricio\\Thesis\\git\\reproduction\\data\\lc" + + classifier = Classification(dir_, dir_, tr=gat2vec_config.training_ratio) + + g2v = Gat2Vec(dir_, dir_, label=False, tr=gat2vec_config.training_ratio) + model = g2v.train_gat2vec( + gat2vec_config.num_walks, + gat2vec_config.walk_length, + gat2vec_config.dimension, + gat2vec_config.window_size, + output=True, + ) + + classifier = Classification(dir_, dir_, tr=gat2vec_config.training_ratio) + + auc_df = classifier.evaluate(model, label=False, evaluation_scheme="cv") + + +if __name__ == '__main__': + main() diff --git a/src/guilty_phewas/string_ppi_assembler.py b/src/guiltytargets_phewas/string_ppi_assembler.py similarity index 99% rename from src/guilty_phewas/string_ppi_assembler.py rename to src/guiltytargets_phewas/string_ppi_assembler.py index 150fba7..1df7ea8 100644 --- a/src/guilty_phewas/string_ppi_assembler.py +++ b/src/guiltytargets_phewas/string_ppi_assembler.py @@ -68,6 +68,10 @@ def _get_ensembl_id_to_symbol_converter(self) -> Dict: return dict(data3[['protein_external_id', 'preferred_name']].values) -if __name__ == '__main__': +def main(): assembler = StringAssembler() assembler.create_adj_file() + + +if __name__ == '__main__': + main() diff --git a/src/guilty_phewas/utils.py b/src/guiltytargets_phewas/utils.py similarity index 97% rename from src/guilty_phewas/utils.py rename to src/guiltytargets_phewas/utils.py index e259e7e..814960c 100644 --- a/src/guilty_phewas/utils.py +++ b/src/guiltytargets_phewas/utils.py @@ -6,13 +6,13 @@ from typing import Dict, List, Optional import bio2bel_phewascatalog -from guiltytargets.ppi_network_annotation.model.gene import Gene import mygene -import pandas as pd import networkx as nx +import pandas as pd from opentargets import OpenTargetsClient from pybel.dsl import BaseEntity, gene, protein, rna +from guiltytargets.ppi_network_annotation.model.gene import Gene from .network_phewas import NetworkNx @@ -67,9 +67,11 @@ def get_significantly_differentiated(gene_list: List[Gene], max_adjp: float): """Returns a dictionary only with significantly differentially expressed genes from the gene list.""" max_adjp = max_adjp or 0.05 - dge = {g.entrez_id or g.symbol: g.log2_fold_change - for g in gene_list - if g.padj < max_adjp} + dge = { + g.entrez_id or g.symbol: g.log2_fold_change + for g in gene_list + if g.padj < max_adjp + } return {k: v for k, v in dge.items() if k} @@ -123,8 +125,10 @@ def get_association_scores(disease_id, outpath): fields=['association_scoreoverall', 'target.id'] ) assoc_simple = [ - {'id': a['target']['id'], - 'score': a['association_score']['overall']} + { + 'id': a['target']['id'], + 'score': a['association_score']['overall'] + } for a in assoc ] ensembl_list = [a['id'] for a in assoc_simple] @@ -305,4 +309,3 @@ def _add_attribute_values(value, att_mappings, indices): """ for i in indices: att_mappings[i].append(value) - diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..e8f702b --- /dev/null +++ b/tox.ini @@ -0,0 +1,138 @@ +[tox] +envlist = + coverage-clean + manifest + flake8 + readme + doc8 + docs + py + coverage-report + +[testenv] +commands = coverage run -p -m pytest tests {posargs} +passenv = CI TRAVIS TRAVIS_* +deps = + coverage + pytest +whitelist_externals = + /bin/cat + /bin/cp + /bin/mkdir + /usr/bin/git + /usr/local/bin/hub + +[testenv:coverage-clean] +deps = coverage +skip_install = true +commands = coverage erase + +[testenv:coverage-report] +deps = coverage +skip_install = true +commands = + coverage combine + coverage report + +[testenv:manifest] +deps = check-manifest +commands = check-manifest +skip_install = true + +[testenv:xenon] +deps = xenon +skip_install = true +commands = xenon --max-average A --max-modules A --max-absolute B . +description = Run the xenon tool to monitor code complexity. + +[testenv:mypy] +deps = mypy +skip_install = true +commands = mypy --ignore-missing-imports src/guiltytargets_phewas/ +description = Run the mypy tool to check static typing on the project. + +[testenv:pyroma] +deps = + pygments + pyroma +skip_install = true +commands = pyroma --min=10 . +description = Run the pyroma tool to check the project's package friendliness. + +[testenv:docs] +changedir = docs +deps = + sphinx + sphinx-rtd-theme + sphinx-autodoc-typehints +commands = + mkdir -p {envtmpdir} + cp -r source {envtmpdir}/source + sphinx-build -W -b html -d {envtmpdir}/build/doctrees {envtmpdir}/source {envtmpdir}/build/html + sphinx-build -W -b coverage -d {envtmpdir}/build/doctrees {envtmpdir}/source {envtmpdir}/build/coverage + cat {envtmpdir}/build/coverage/c.txt + cat {envtmpdir}/build/coverage/python.txt + +[testenv:readme] +commands = rst-lint README.rst +skip_install = true +deps = + restructuredtext_lint + pygments + +[testenv:flake8] +skip_install = true +deps = + flake8 + flake8-docstrings>=0.2.7 + flake8-import-order>=0.9 + pep8-naming + flake8-colors +commands = + flake8 src/guiltytargets_phewas/ tests/ setup.py + +[testenv:doc8] +skip_install = true +deps = + sphinx + doc8 +commands = + doc8 docs/source/ README.rst + +#################### +# Deployment tools # +#################### + +[testenv:bumpversion] +commands = bumpversion {posargs} +skip_install = true +deps = + bumpversion + +[testenv:build] +skip_install = true +deps = + wheel + setuptools +commands = + python setup.py -q sdist bdist_wheel + +[testenv:release] +skip_install = true +deps = + {[testenv:build]deps} + twine >= 1.5.0 +commands = + {[testenv:build]commands} + twine upload --skip-existing dist/* + +[testenv:finish] +skip_install = true +deps = + {[testenv:release]deps} + bumpversion +commands = + bumpversion release + {[testenv:release]commands} + git push + bumpversion patch