Skip to content

Commit

Permalink
Merge pull request typedb-osi#6 from Ariwonto/master
Browse files Browse the repository at this point in the history
path optimization
  • Loading branch information
tomassabat authored Sep 3, 2020
2 parents 7cb4962 + 39a4e75 commit f7fba1f
Show file tree
Hide file tree
Showing 11 changed files with 295 additions and 307 deletions.
488 changes: 245 additions & 243 deletions Migrators/CORD_NER/cord_ner_migrator.py

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions Migrators/Coronaviruses/CoronavirusMigrator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import csv

from grakn.client import GraknClient
import csv
import os


def coronavirusMigrator(uri, keyspace):
client = GraknClient(uri=uri)
Expand All @@ -16,7 +17,7 @@ def coronavirusMigrator(uri, keyspace):
tx.query(graql)
tx.commit()

with open('../biograkn-covid/Dataset/Coronaviruses/Genome identity.csv', 'rt', encoding='utf-8') as csvfile:
with open('Dataset/Coronaviruses/Genome identity.csv', 'rt', encoding='utf-8') as csvfile:
tx = session.transaction().write()
csvreader = csv.reader(csvfile, delimiter=',')
raw_file = []
Expand Down Expand Up @@ -63,7 +64,7 @@ def coronavirusMigrator(uri, keyspace):
tx.commit()


with open('../biograkn-covid/Dataset/Coronaviruses/Host proteins (potential drug targets).csv', 'rt', encoding='utf-8') as csvfile:
with open('Dataset/Coronaviruses/Host proteins (potential drug targets).csv', 'rt', encoding='utf-8') as csvfile:
tx = session.transaction().write()
csvreader = csv.reader(csvfile, delimiter=',')
raw_file = []
Expand Down
14 changes: 6 additions & 8 deletions Migrators/DGIdb/DGIdbMigrator.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
from functools import partial
from multiprocessing.dummy import Pool as ThreadPool

from grakn.client import GraknClient
import csv
import os
from inspect import cleandoc
import itertools

from multiprocessing.dummy import Pool as ThreadPool
from functools import partial
from Migrators.Helpers.batchLoader import batch_job
from Migrators.Helpers.open_file import openFile


def dgidbMigrator(uri, keyspace, num_dr, num_int, num_threads, ctn):
client = GraknClient(uri=uri)
session = client.session(keyspace=keyspace)
Expand All @@ -19,7 +17,7 @@ def dgidbMigrator(uri, keyspace, num_dr, num_int, num_threads, ctn):


def insertDrugs(uri, keyspace, num_dr, num_threads, ctn, session):
file = '../biograkn-covid/Dataset/DGIdb/dgidb_drugs.tsv'
file = 'Dataset/DGIdb/dgidb_drugs.tsv'
print(' ')
print('Opening DGIdb...')
print(' ')
Expand Down Expand Up @@ -58,7 +56,7 @@ def insertDrugs(uri, keyspace, num_dr, num_threads, ctn, session):
def insertInteractions(uri, keyspace, num_int, num_threads, ctn, session):
batches_pr = []

file = '../biograkn-covid/Dataset/DGIdb/dgidb_interactions.tsv'
file = 'Dataset/DGIdb/dgidb_interactions.tsv'
print(' ')
print('Opening DGIdb-Interactions...')
print(' ')
Expand Down
14 changes: 7 additions & 7 deletions Migrators/Disgenet/disgenetMigrator.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from grakn.client import GraknClient
import csv
import os
from inspect import cleandoc
import csv
import itertools

from multiprocessing.dummy import Pool as ThreadPool
from functools import partial
from multiprocessing.dummy import Pool as ThreadPool

from grakn.client import GraknClient

from Migrators.Helpers.batchLoader import batch_job


def disgenetMigrator(uri, keyspace, num, num_threads, ctn):

client = GraknClient(uri=uri)
Expand All @@ -19,7 +19,7 @@ def disgenetMigrator(uri, keyspace, num, num_threads, ctn):
print('Opening Disgenet dataset...')
print(' ')

with open('../biograkn-covid/Dataset/Disgenet/all_gene_disease_associations.tsv', 'rt', encoding='utf-8') as csvfile:
with open('Dataset/Disgenet/all_gene_disease_associations.tsv', 'rt', encoding='utf-8') as csvfile:
csvreader = csv.reader(csvfile, delimiter=' ')
raw_file = []
n = 0
Expand Down
12 changes: 6 additions & 6 deletions Migrators/HumanProteinAtlas/HumanProteinAtlasMigrator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import csv
from functools import partial
from multiprocessing.dummy import Pool as ThreadPool

from grakn.client import GraknClient
import csv
import os
from inspect import cleandoc

from multiprocessing.dummy import Pool as ThreadPool
from functools import partial
from Migrators.Helpers.batchLoader import batch_job


def proteinAtlasMigrator(uri, keyspace, num, num_threads, ctn):
client = GraknClient(uri=uri)
session = client.session(keyspace=keyspace)
Expand All @@ -16,7 +16,7 @@ def proteinAtlasMigrator(uri, keyspace, num, num_threads, ctn):
print(' ')
print('Opening HPA dataset...')
print(' ')
with open('../biograkn-covid/Dataset/HumanProteinAtlas/normal_tissue.tsv', 'rt', encoding='utf-8') as csvfile:
with open('Dataset/HumanProteinAtlas/normal_tissue.tsv', 'rt', encoding='utf-8') as csvfile:
csvreader = csv.reader(csvfile, delimiter=' ')
raw_file = []
n = 0
Expand Down
13 changes: 6 additions & 7 deletions Migrators/Reactome/reactomeMigrator.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from grakn.client import GraknClient
import csv
import os
from inspect import cleandoc
import itertools

from multiprocessing.dummy import Pool as ThreadPool
from functools import partial
from multiprocessing.dummy import Pool as ThreadPool

from grakn.client import GraknClient

from Migrators.Helpers.batchLoader import batch_job
from Migrators.Helpers.open_file import openFile


def reactomeMigrator(uri, keyspace, num_path, num_threads, ctn):
client = GraknClient(uri=uri)
session = client.session(keyspace=keyspace)
Expand Down Expand Up @@ -70,7 +69,7 @@ def insertPathwayInteractions(uri, keyspace, num_threads, ctn, session, pathway_


def filterHomoSapiens(num_path):
file = '../biograkn-covid/Dataset/Reactome/UniProt2Reactome_All_Levels.tsv'
file = 'Dataset/Reactome/UniProt2Reactome_All_Levels.tsv'
print(' ')
print('Opening Reactome...')
print(' ')
Expand Down
24 changes: 8 additions & 16 deletions Migrators/SemMed/semmed_migrator.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,17 @@
from grakn.client import GraknClient
import os
import csv
import json
import untangle
import csv
# from multiprocessing.dummy import Pool as ThreadPool
import multiprocessing
from functools import partial
import datetime
from Migrators.Helpers.batchLoader import batch_job

import pandas as pd
import untangle
from grakn.client import GraknClient


def migrate_semmed(uri, keyspace, num_semmed, num_threads, ctn):

print("Migrate 'Subject_CORD_NER.csv'")

file_path = "../biograkn-covid/Dataset/SemMed/Subject_CORD_NER.csv"
file_path = "Dataset/SemMed/Subject_CORD_NER.csv"
raw_file = openFile(file_path, 1)[:num_semmed]
pmids_set = list(set([tupple[3] for tupple in raw_file])) #get set of pmids

Expand All @@ -24,7 +20,7 @@ def migrate_semmed(uri, keyspace, num_semmed, num_threads, ctn):
journal_names = get_journal_names(xml_articles_data)
author_names = get_authors_names(xml_articles_data)
publications_list = get_publication_data(xml_articles_data)
relationship_data = get_relationship_data('../biograkn-covid/Dataset/SemMed/Subject_CORD_NER.csv')[:num_semmed]
relationship_data = get_relationship_data('Dataset/SemMed/Subject_CORD_NER.csv')[:num_semmed]

print("--------Loading journals---------")
load_in_parallel(migrate_journals, journal_names, num_threads, ctn, uri, keyspace)
Expand All @@ -37,7 +33,7 @@ def migrate_semmed(uri, keyspace, num_semmed, num_threads, ctn):

print("Migrate 'Object_CORD_NER.csv'")

file_path = "../biograkn-covid/Dataset/SemMed/Object_CORD_NER.csv"
file_path = "Dataset/SemMed/Object_CORD_NER.csv"
raw_file = openFile(file_path, 1)[:num_semmed]
pmids_set = list(set([tupple[3] for tupple in raw_file])) #get set of pmids

Expand All @@ -46,7 +42,7 @@ def migrate_semmed(uri, keyspace, num_semmed, num_threads, ctn):
journal_names = get_journal_names(xml_articles_data)
author_names = get_authors_names(xml_articles_data)
publications_list = get_publication_data(xml_articles_data)
relationship_data = get_relationship_data('../biograkn-covid/Dataset/SemMed/Object_CORD_NER.csv')[:num_semmed]
relationship_data = get_relationship_data('Dataset/SemMed/Object_CORD_NER.csv')[:num_semmed]

print("--------Loading journals---------")
load_in_parallel(migrate_journals, journal_names, num_threads, ctn, uri, keyspace)
Expand Down Expand Up @@ -289,10 +285,6 @@ def fetch_articles_metadata(pmids):
function - function name to run in paralell\n
data - data to load by function running in parallel
'''
import urllib
import urllib.request
import ssl
import json
import requests

ids_param = ""
Expand Down
10 changes: 3 additions & 7 deletions Migrators/TissueNet/TissueNetMigrator.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
import csv

from grakn.client import GraknClient
import csv
import os
from inspect import cleandoc

from multiprocessing.dummy import Pool as ThreadPool
from functools import partial
from Migrators.Helpers.batchLoader import batch_job

def tissueNetMigrator(uri, keyspace, num, num_threads, ctn):
client = GraknClient(uri=uri)
Expand All @@ -17,7 +13,7 @@ def tissueNetMigrator(uri, keyspace, num, num_threads, ctn):
print('Opening TissueNet dataset...')
print(' ')

with open('../biograkn-covid/Dataset/TissueNet/HPA-Protein.tsv', 'rt', encoding='utf-8') as csvfile:
with open('Dataset/TissueNet/HPA-Protein.tsv', 'rt', encoding='utf-8') as csvfile:
csvreader = csv.reader(csvfile, delimiter=' ')
raw_file = []
n = 0
Expand Down
12 changes: 6 additions & 6 deletions Migrators/Uniprot/UniprotMigrator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import csv
from functools import partial
from multiprocessing.dummy import Pool as ThreadPool

from grakn.client import GraknClient
import csv
import os
from inspect import cleandoc

from multiprocessing.dummy import Pool as ThreadPool
from functools import partial
from Migrators.Helpers.batchLoader import batch_job


def uniprotMigrate(uri, keyspace, num, num_threads, ctn):
client = GraknClient(uri=uri)
session = client.session(keyspace=keyspace)
Expand All @@ -22,7 +22,7 @@ def uniprotMigrate(uri, keyspace, num, num_threads, ctn):
tx.query(org)
tx.commit()

with open('../biograkn-covid/Dataset/Uniprot/uniprot-reviewed_yes+AND+proteome.tsv', 'rt', encoding='utf-8') as csvfile:
with open('Dataset/Uniprot/uniprot-reviewed_yes+AND+proteome.tsv', 'rt', encoding='utf-8') as csvfile:
csvreader = csv.reader(csvfile, delimiter=' ')
raw_file = []
n = 0
Expand Down
5 changes: 2 additions & 3 deletions Schema/schema_insert.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from grakn.client import GraknClient
import csv
import os


def insertSchema(uri, keyspace):
client = GraknClient(uri=uri)
session = client.session(keyspace=keyspace)
print('.....')
print('Inserting schema...')
print('.....')
with open("../biograkn-covid/Schema/biograkn-covid.gql", "r") as graql_file:
with open("Schema/biograkn-covid.gql", "r") as graql_file:
schema = graql_file.read()
with session.transaction().write() as write_transaction:
write_transaction.query(schema)
Expand Down
1 change: 1 addition & 0 deletions migrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from Migrators.SemMed.semmed_migrator import migrate_semmed
from timeit import default_timer as timer

# for Windows URI = IP:port (127.0.0.1:48555)
URI = "insert IP or localhost"
KEYSPACE = "biograkn_covid"

Expand Down

0 comments on commit f7fba1f

Please sign in to comment.