Skip to content

Commit

Permalink
Reorganize, update code, and add travis/tox
Browse files Browse the repository at this point in the history
Closes #3. Begins #2
  • Loading branch information
cthoyt committed Jul 19, 2019
1 parent a66bb8d commit f420b0d
Show file tree
Hide file tree
Showing 14 changed files with 255 additions and 78 deletions.
27 changes: 27 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
sudo: false
cache: pip
language: python
python:
- "3.6"
stages:
- lint
- docs
jobs:
include:
# lint stage
- stage: lint
env: TOXENV=manifest
- env: TOXENV=flake8
- env: TOXENV=xenon
# docs stage
- stage: docs
env: TOXENV=doc8
- env: TOXENV=readme
- env: TOXENV=docs
matrix:
allow_failures:
- env: TOXENV=xenon
install:
- pip install tox
script:
- tox
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2019
Copyright (c) GuiltyTargets Developers, 2019

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
8 changes: 5 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Setup.py Configuration #
##########################
[metadata]
name = guilty_reloaded
name = guiltytargets_phewas
version = 0.0.1-dev
description = Change meeeeee
long_description = file: README.rst
Expand All @@ -13,15 +13,17 @@ author_email = [email protected]
maintainer = Mauricio Pio de Lacerda
maintainer_email = [email protected]

url = https://github.com/GuiltyTargets/phewas

# License Information
license = Apache 2.0 License
license = MIT
license_file = LICENSE

# Search tags
classifiers =
Development Status :: 1 - Planning
Intended Audience :: Developers
License :: OSI Approved :: Apache Software License
License :: OSI Approved :: MIT License
Operating System :: OS Independent
Programming Language :: Python
Programming Language :: Python :: 3.6
Expand Down
6 changes: 0 additions & 6 deletions src/guilty_phewas/__init__.py

This file was deleted.

14 changes: 0 additions & 14 deletions src/guilty_phewas/__main__.py

This file was deleted.

22 changes: 0 additions & 22 deletions src/guilty_phewas/script.py

This file was deleted.

3 changes: 3 additions & 0 deletions src/guiltytargets_phewas/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# -*- coding: utf-8 -*-

"""GuiltyTargets PheWAS Extension."""
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# -*- coding: utf-8 -*-

"""Constants for GuiltyTargets PheWAS."""

data_dir = 'data'
disease_abr = ['ad']
disease_ids_efo = ['EFO_0000249']
ot_file = 'ot_entrez.txt'
ot_file = 'ot_entrez.txt'
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,27 @@

import networkx as nx
import numpy as np
from ppi_network_annotation.model.gene import Gene
from pybel.dsl import protein, rna, gene
from pybel.dsl import gene, protein, rna

from guiltytargets.ppi_network_annotation import Gene

__all__ = [
'NetworkNx',
]

logger = logging.getLogger(__name__)


class NetworkNx:
"""Encapsulate a PPI network with differential gene expression, phenotypes and disease association annotation."""

def __init__(self,
ppi_graph: nx.Graph,
max_adj_p: Optional[float] = None,
max_l2fc: Optional[float] = None,
min_l2fc: Optional[float] = None):
def __init__(
self,
ppi_graph: nx.Graph,
max_adj_p: Optional[float] = None,
max_l2fc: Optional[float] = None,
min_l2fc: Optional[float] = None,
) -> None:
"""Initialize the network object.
:param ppi_graph: A graph of protein interactions.
Expand Down Expand Up @@ -63,8 +70,11 @@ def filter_genes(self, relevant_entrez: list) -> None:
logger.info("In filter_genes()")
raise Exception('Not ready to filter genes using NetworkX')

def _add_vertex_attributes(self, genes: List[Gene],
disease_associations: Optional[dict] = None) -> None:
def _add_vertex_attributes(
self,
genes: List[Gene],
disease_associations: Optional[dict] = None,
) -> None:
"""Add attributes to vertices.
:param genes: A list of genes containing attribute information.
Expand Down Expand Up @@ -158,8 +168,7 @@ def get_attribute_from_indices(self, indices: list, attribute_name: str):
return list(np.array(self.graph.vs[attribute_name])[indices])

def get_differentially_expressed_genes(self, diff_type: str) -> List:
"""Get up regulated, down regulated, all differentially expressed or not
differentially expressed nodes.
"""Get up regulated, down regulated, all differentially expressed or not differentially expressed nodes.
:param diff_type: One of `not_diff_expressed`, `diff_expressed`, `up_regulated`, `down_regulated`
:return: A list of nodes corresponding to diff_type.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,22 @@
from collections import defaultdict

import pandas as pd
from GAT2VEC import parsers, paths
from GAT2VEC.evaluation.classification import Classification
from sklearn import svm
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold

from guiltytargets.gat2vec import Classification, Gat2Vec, gat2vec_parsers, gat2vec_paths

# TODO move to GAT2VEC (forked)?
__all__ = [
'PULearn',
]


# TODO move to GAT2VEC (forked)?
class PULearn(Classification):
""" """

def evaluate(self, model, label=False, evaluation_scheme="tr", cost_p: float = None, cost_n: float = None):
"""Evaluates the model according to the given evaluation scheme.
"""Evaluate the model according to the given evaluation scheme.
:param model:
:param label:
Expand All @@ -30,7 +31,7 @@ def evaluate(self, model, label=False, evaluation_scheme="tr", cost_p: float = N
clf = self.get_classifier()

if not label:
embedding = parsers.get_embeddingDF(model)
embedding = gat2vec_parsers.get_embeddingDF(model)

if evaluation_scheme == "bsvm":
if cost_n and cost_p and cost_n > 0 and cost_p > 0:
Expand All @@ -44,9 +45,9 @@ def evaluate(self, model, label=False, evaluation_scheme="tr", cost_p: float = N
for tr in self.TR:
print("TR ... ", tr)
if label:
model = paths.get_embedding_path_wl(self.dataset_dir, self.output_dir, tr)
model = gat2vec_paths.get_embedding_path_wl(self.dataset_dir, self.output_dir, tr)
if isinstance(model, str):
embedding = parsers.get_embeddingDF(model)
embedding = gat2vec_parsers.get_embeddingDF(model)
results.update(self.evaluate_tr(clf, embedding, tr))

print("Training Finished")
Expand Down Expand Up @@ -89,9 +90,7 @@ def get_biased_predictions(clf, x_train, x_test, y_train, cost_p, cost_n):
return clf.predict(x_test), clf.predict_proba(x_test)


if __name__ == '__main__':
from GAT2VEC.gat2vec import Gat2Vec

def main():
dir_ = "C:/Users/Mauricio/Thesis/bel_data/alzh"
g2v = Gat2Vec(dir_, dir_, label=False, tr=[0.1, 0.3, 0.5])
walk_length = 4
Expand All @@ -111,3 +110,7 @@ def get_biased_predictions(clf, x_train, x_test, y_train, cost_p, cost_n):
print('PU created.')
auc_df = pul.evaluate(model, label=False, evaluation_scheme="bsvm")
print(auc_df)


if __name__ == '__main__':
main()
28 changes: 28 additions & 0 deletions src/guiltytargets_phewas/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# -*- coding: utf-8 -*-

from guiltytargets.constants import gat2vec_config
from guiltytargets.gat2vec import Classification, Gat2Vec


def main():
dir_ = "C:/Users/Mauricio/Thesis/bel_data/alzh" # windows
# dir_ = "C:\\Users\\Mauricio\\Thesis\\git\\reproduction\\data\\lc"

classifier = Classification(dir_, dir_, tr=gat2vec_config.training_ratio)

g2v = Gat2Vec(dir_, dir_, label=False, tr=gat2vec_config.training_ratio)
model = g2v.train_gat2vec(
gat2vec_config.num_walks,
gat2vec_config.walk_length,
gat2vec_config.dimension,
gat2vec_config.window_size,
output=True,
)

classifier = Classification(dir_, dir_, tr=gat2vec_config.training_ratio)

auc_df = classifier.evaluate(model, label=False, evaluation_scheme="cv")


if __name__ == '__main__':
main()
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ def _get_ensembl_id_to_symbol_converter(self) -> Dict:
return dict(data3[['protein_external_id', 'preferred_name']].values)


if __name__ == '__main__':
def main():
assembler = StringAssembler()
assembler.create_adj_file()


if __name__ == '__main__':
main()
19 changes: 11 additions & 8 deletions src/guilty_phewas/utils.py → src/guiltytargets_phewas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
from typing import Dict, List, Optional

import bio2bel_phewascatalog
from guiltytargets.ppi_network_annotation.model.gene import Gene
import mygene
import pandas as pd
import networkx as nx
import pandas as pd
from opentargets import OpenTargetsClient
from pybel.dsl import BaseEntity, gene, protein, rna

from guiltytargets.ppi_network_annotation.model.gene import Gene
from .network_phewas import NetworkNx


Expand Down Expand Up @@ -67,9 +67,11 @@ def get_significantly_differentiated(gene_list: List[Gene], max_adjp: float):
"""Returns a dictionary only with significantly differentially expressed genes from the gene list."""
max_adjp = max_adjp or 0.05

dge = {g.entrez_id or g.symbol: g.log2_fold_change
for g in gene_list
if g.padj < max_adjp}
dge = {
g.entrez_id or g.symbol: g.log2_fold_change
for g in gene_list
if g.padj < max_adjp
}

return {k: v for k, v in dge.items() if k}

Expand Down Expand Up @@ -123,8 +125,10 @@ def get_association_scores(disease_id, outpath):
fields=['association_scoreoverall', 'target.id']
)
assoc_simple = [
{'id': a['target']['id'],
'score': a['association_score']['overall']}
{
'id': a['target']['id'],
'score': a['association_score']['overall']
}
for a in assoc
]
ensembl_list = [a['id'] for a in assoc_simple]
Expand Down Expand Up @@ -305,4 +309,3 @@ def _add_attribute_values(value, att_mappings, indices):
"""
for i in indices:
att_mappings[i].append(value)

Loading

0 comments on commit f420b0d

Please sign in to comment.