Skip to content

Commit 30b20f4

Browse files
committed
paxdb-api v5.0 update
1 parent 045558a commit 30b20f4

File tree

3,948 files changed

+7481180
-3972743
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

3,948 files changed

+7481180
-3972743
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -115,4 +115,4 @@ com_crashlytics_export_strings.xml
115115
crashlytics.properties
116116
crashlytics-build.properties
117117

118-
118+
lib/

Dockerfile

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
## $ docker service update --image docker-registry.meringlab.org:5443/paxdb/api:blue paxdb_api_species_4
1111
### see https://docs.docker.com/engine/swarm/swarm-tutorial/rolling-update/
1212
FROM node:10-alpine
13-
MAINTAINER Milan Simonovic <[email protected]>
13+
LABEL api-species.authors="Milan Simonovic, Qingyao Huang"
1414

1515
EXPOSE 3000
1616

@@ -32,6 +32,6 @@ RUN apk del build-dependencies
3232
COPY . .
3333

3434
ENV SERVICE_TAGS "paxdb,api"
35-
ENV SERVICE_NAME "species_v4.1"
35+
ENV SERVICE_NAME "species_v5.0"
3636

3737
CMD ["node", "--max-old-space-size=2048", "./bin/www"]

README.md

+5-5
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ MIT. See "LICENSE.txt".
1919

2020
### Build the image
2121

22-
To create the image `paxdb/species`, execute the following command:
22+
To create the image `paxdb/api-species`, execute the following command:
2323

2424
```
2525
$ docker build -t paxdb/api-species .
@@ -36,8 +36,8 @@ $ docker run --restart=always -P -d --name paxdb_api_species paxdb/api-species
3636
The process of updating to a new version is as follows:
3737

3838
1. update ./data/abundances (computed by [data-pipeline](https://github.com/meringlab/paxdb-data-pipeline))
39-
2. update ./data/orthgroups (computed by Damian but should be in a repo of its own)
40-
3. update ./data/eggnog4_genome_linkout.txt (last time Kristoffer Forslund <[email protected]> did it)
41-
4. update ./data/paxdb_uniprot_linkins_ids.tsv (a pruned down version of string_uniprot_linkins computed by Damian)
42-
5. update lib/cladogram.js
39+
2. update lib/cladogram.js
40+
3. update ./data/orthgroups (IMPORTANT! Should include no extra taxonomic levels than required by cladogram.js)
41+
4. update ./data/eggnog5_genome_linkout.txt (linkout to ensembl when available, otherwise to ncbi taxon browser)
42+
5. update ./data/paxdb_uniprot_linkins_ids.tsv (generated from "BLAST_UniProt_ID" terms from STRING v11.5 protein alias file)
4343
6. increment PAYLOAD_VERSION and update connectionString in build.js, then run it to generate lib/species.js, lib/dataset and lib/proteins

build.js

+32-31
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,32 @@
22
* Created by milans on 9/8/16.
33
*/
44

5-
const PAYLOAD_VERSION = 17;
5+
const PAYLOAD_VERSION = 22;
66
const uniprotMappingFile = './data/paxdb_uniprot_linkins_ids.tsv';
77
const PAXDB_URL = 'https://pax-db.org/';
8+
const PAXDB_API_URL = 'https://beta-api.pax-db.org/';
89
const fs = require('fs');
910
const async = require('async');
1011
const pg = require('pg');
1112
const readline = require('readline');
1213

13-
const speciesIds = [882,1148,3055,3702,4081,4577,4896,4932,5061,5691,5833,6239,7165,7227,7460,7955,8364,9031,9598,9606,9615,9796,9823,9913,10090,10116,39947,44689,64091,83332,85962,99287,122586,158878,160490,169963,192222,198214,208964,211586,214092,214684,224308,226186,243159,260799,267671,272623,272624,283166,353153,449447,511145,546414,593117,722438];
14+
const speciesIds = [882,1148,3055,3702,4081,4577,4896,4932,5061,5691,5833,6239,7165,7227,7460,7955,8364,9031,9598,9606,9615,9796,9823,9913,10090,10116,39947,44689,64091,83332,85962,99287,122586,1280,1314,169963,192222,198214,208964,211586,214092,214684,224308,226186,243159,260799,189518,272623,272624,283166,353153,449447,511145,546414,593117,722438,73239,373153,224326,170187,5476,29760,246196,392499,284590,3708,67767,309800,7091,212042,6945,121845,8355,246200,547559,1286170,4113,7159,3847,4097,4565,8030,9544,4513,8022,3880,3218,272620,5811,9986,9685,65489,347256,89462,3635,9940,2711,160488,3827,100226,257313,1140,88036,109376,265311];
1415

15-
const connectionString = process.env.DATABASE_URL || 'postgres://postgres@atlas.meringlab.org:5432/string_10_5';
16+
const connectionString = process.env.DATABASE_URL || 'postgres://postgres:postgres@localhost:5434/paxdb';
1617
const client = new pg.Client(connectionString);
1718
client.connect();
1819

1920
function loadSpeciesInfo(callback) {
2021
console.log(`loading species info`);
21-
const sqlSpeciesInfo = `select species_id,official_name,compact_name from items.species where species_id in (${speciesIds.join(',')})`;
22-
const sqlNumProteins = `select species_id,count(protein_id) as c from items.proteins where species_id in (${speciesIds.join(',')}) group by species_id; `;
22+
const sqlSpeciesInfo = `select species_id,official_name,compact_name from paxdb5_0.species where species_id in (${speciesIds.join(',')})`;
23+
const sqlNumProteins = `select species_id,count(protein_id) as c from paxdb5_0.proteins where species_id in (${speciesIds.join(',')}) group by species_id; `;
2324
const species = {};
2425
client.query(sqlSpeciesInfo).then(res => {
25-
res.rows.forEach(function(r) {
26+
res.rows.forEach(function (r) {
2627
species[r.species_id] = { id: r.species_id, name: r.official_name, compact_name: r.compact_name };
2728
});
2829
client.query(sqlNumProteins).then(npres => {
29-
npres.rows.forEach(function(r) {
30+
npres.rows.forEach(function (r) {
3031
species[r.species_id]['num_proteins'] = parseInt(r.c);
3132
});
3233
console.log(`loading species info DONE`);
@@ -36,13 +37,13 @@ function loadSpeciesInfo(callback) {
3637
}
3738

3839
function parseOrthgroups(contents, familySet) {
39-
contents.split('\n').forEach(function(line) {
40+
contents.split('\n').forEach(function (line) {
4041
if (line.trim() == 0) {
4142
return
4243
}
4344
var rec = line.split('\t');
4445
//{"id": 9443, "name": "NOG21051", "clade": "PRIMATES", "members": [1803841, 1854701]},
45-
rec.slice(1, rec.length).forEach(function(el) {
46+
rec.slice(1, rec.length).forEach(function (el) {
4647
familySet.add(parseInt(el));
4748
});
4849
});
@@ -67,21 +68,21 @@ function loadProteins(cb, createProteinModules = false) {
6768
console.log(`loading proteins`);
6869
console.log("loading orthgroups");
6970
const familySet = new Set();
70-
fs.readdirSync('./data/orthgroups').forEach(function(file) {
71+
fs.readdirSync('./data/orthgroups').forEach(function (file) {
7172
parseOrthgroups(fs.readFileSync(`./data/orthgroups/${file}`, { 'encoding': 'utf8' }), familySet);
7273
});
7374

7475
const paxdbUniprotIdsMap = loadUniprotMapping();
7576
const uniprotPaxdbIdsMap = {};
7677
const speciesForProtein = {};
7778

78-
async.eachSeries(speciesIds, function(speciesId, callback) {
79+
async.eachSeries(speciesIds, function (speciesId, callback) {
7980
console.log(`loading proteins for ${speciesId}`);
8081
const proteins = {};
8182
const sql = `select protein_id, protein_external_id, preferred_name, annotation ` +
82-
` from items.proteins where species_id = ${speciesId}`;
83+
` from paxdb5_0.proteins where species_id = ${speciesId}`;
8384
client.query(sql).then(res => {
84-
res.rows.forEach(function(r) {
85+
res.rows.forEach(function (r) {
8586
proteins[r.protein_id] = {
8687
id: r.protein_id,
8788
externalId: r.protein_external_id,
@@ -92,7 +93,7 @@ function loadProteins(cb, createProteinModules = false) {
9293
if (Object.prototype.hasOwnProperty.call(paxdbUniprotIdsMap, r.protein_external_id)) {
9394
const ac = paxdbUniprotIdsMap[r.protein_external_id];
9495
proteins[r.protein_id].uniprotId = ac;
95-
if (! Object.prototype.hasOwnProperty.call(uniprotPaxdbIdsMap, ac)) {
96+
if (!Object.prototype.hasOwnProperty.call(uniprotPaxdbIdsMap, ac)) {
9697
uniprotPaxdbIdsMap[ac] = r.protein_id;
9798
} else {
9899
let prev = uniprotPaxdbIdsMap[ac];
@@ -117,7 +118,7 @@ function loadProteins(cb, createProteinModules = false) {
117118
}
118119
callback();
119120
});
120-
}, function(err) {
121+
}, function (err) {
121122
console.log(`loading proteins DONE`);
122123
if (err) throw err;
123124
cb(speciesForProtein, uniprotPaxdbIdsMap);
@@ -131,7 +132,7 @@ function loadDatasetInfo(cb) {
131132
const abundances_asc = {};
132133
const abundances_desc = {};
133134
const proteinsCovered = {}
134-
async.eachSeries(fs.readdirSync('./data/abundances'), function(d, callback) {
135+
async.eachSeries(fs.readdirSync('./data/abundances'), function (d, callback) {
135136
const dataset = {};
136137
const abundances = {};
137138
const peptideCounts = {};
@@ -145,7 +146,7 @@ function loadDatasetInfo(cb) {
145146
datasets[species].push(dataset);
146147
const input = fs.createReadStream(`./data/abundances/${d}`);
147148
const rl = readline.createInterface({ input })
148-
rl.on('close', function() {
149+
rl.on('close', function () {
149150

150151
//add ranks
151152
var abundancesSorted = []
@@ -180,7 +181,7 @@ function loadDatasetInfo(cb) {
180181
callback(null);
181182
});
182183

183-
rl.on('line', function(line) {
184+
rl.on('line', function (line) {
184185
if (!line.startsWith("#")) {
185186
var rec = line.split('\t');
186187
if (rec.length > 1) {
@@ -224,10 +225,10 @@ function loadDatasetInfo(cb) {
224225
}
225226
}
226227
})
227-
}, function(err) {
228+
}, function (err) {
228229
console.log(`loading dataset info DONE`);
229230
if (err) throw err;
230-
speciesIds.forEach(function(id) {
231+
speciesIds.forEach(function (id) {
231232
proteinsCovered[id] = proteinsCovered[id].size
232233
});
233234

@@ -236,16 +237,16 @@ function loadDatasetInfo(cb) {
236237
}
237238

238239
function loadGenomeSources(callback) {
239-
const input = fs.createReadStream('./data/eggnog4_genome_linkout.txt');
240+
const input = fs.createReadStream('./data/eggnog5_genome_linkout.txt');
240241
const rl = readline.createInterface({ input })
241242
const sources = {};
242243
const versions = {};
243244

244-
rl.on('close', function() {
245+
rl.on('close', function () {
245246
callback(sources, versions);
246247
});
247248

248-
rl.on('line', function(line) {
249+
rl.on('line', function (line) {
249250
const rec = line.split('\t');
250251
if (rec.length > 4) {
251252
sources[parseInt(rec[1])] = `<a href='${rec[4]}'>${rec[2]}</a>`;
@@ -420,7 +421,7 @@ function build_proteins_index(){
420421
uniprotIdsMap[rec[1]] = externalToInternalMap[rec[0]];
421422
//append linkout ids as well:
422423
let protein = proteins[externalToInternalMap[rec[0]]];
423-
if (protein.uniprotId && !(protein.uniprotId in uniprotIdsMap) ) {
424+
if (protein.uniprotId && !(protein.uniprotId in uniprotIdsMap)) {
424425
uniprotIdsMap[protein.uniprotId] = protein.id;
425426
}
426427
}
@@ -431,7 +432,7 @@ function build_proteins_index(){
431432
}
432433

433434
console.log('writing proteins_index.js');
434-
let writeStream = fs .createWriteStream('./lib/proteins_index.js');
435+
let writeStream = fs.createWriteStream('./lib/proteins_index.js');
435436
writeStream.write(`//FILE GENERATED BY build.js on ${new Date()}, DO NOT MODIFY!\n`);
436437
writeStream.write("const speciesForProtein = ");
437438
writeStream.write(JSON.stringify(speciesForProtein));
@@ -499,7 +500,7 @@ function buildPayload() {
499500
"legend_file": "${PAXDB_URL}images/payload_legend.png",
500501
"name" : "PaxDB"
501502
}`);
502-
payloadStream.end(e=> {
503+
payloadStream.end(e => {
503504
if (e) console.log(`error writing ${speciesId} payload: ${e.message}`); else console.log(`${speciesId} payload written`);
504505
});
505506

@@ -514,12 +515,12 @@ function buildPayload() {
514515
const abundance = dataset.abundances[proteinId];
515516
const hexColor = ranking.toRGB(abundance.r);
516517
nodesStream.write(`${proteins[proteinId].externalId}\t${hexColor}\tAbundance: ${datasetLib.formattedAbundance(abundance.a)}, rank: ${ranking.formatRank(abundance.r)}\t`);
517-
nodesStream.write(`${PAXDB_URL}protein/${proteinId}/${proteins[proteinId].name}\t`);
518-
nodesStream.write(`${PAXDB_URL}dataset/${d.id}/histogram?hightlightProteinId=${proteinId}\n`);
518+
nodesStream.write(`${PAXDB_API_URL}protein/${proteinId}/${proteins[proteinId].name}\t`);
519+
nodesStream.write(`${PAXDB_API_URL}dataset/${d.id}/histogram?hightlightProteinId=${proteinId}\n`);
519520

520521
});
521522

522-
nodesStream.end(e=> {
523+
nodesStream.end(e => {
523524
if (e) console.log(`error writing ${speciesId} payload nodes: ${e.message}`); else console.log(`${speciesId} payload nodes written`)
524525
});
525526

@@ -530,7 +531,7 @@ buildSpecies();
530531
buildDatasets();
531532
buildProteins();
532533
//TODO FIXME writing streams is async, so lib/species.js won' show up before buildPayload is called
533-
// buildPayload();
534-
// buildHistograms();
534+
buildPayload();
535+
buildHistograms();
535536

536537

0 commit comments

Comments
 (0)