2
2
* Created by milans on 9/8/16.
3
3
*/
4
4
5
- const PAYLOAD_VERSION = 17 ;
5
+ const PAYLOAD_VERSION = 22 ;
6
6
const uniprotMappingFile = './data/paxdb_uniprot_linkins_ids.tsv' ;
7
7
const PAXDB_URL = 'https://pax-db.org/' ;
8
+ const PAXDB_API_URL = 'https://beta-api.pax-db.org/' ;
8
9
const fs = require ( 'fs' ) ;
9
10
const async = require ( 'async' ) ;
10
11
const pg = require ( 'pg' ) ;
11
12
const readline = require ( 'readline' ) ;
12
13
13
- const speciesIds = [ 882 , 1148 , 3055 , 3702 , 4081 , 4577 , 4896 , 4932 , 5061 , 5691 , 5833 , 6239 , 7165 , 7227 , 7460 , 7955 , 8364 , 9031 , 9598 , 9606 , 9615 , 9796 , 9823 , 9913 , 10090 , 10116 , 39947 , 44689 , 64091 , 83332 , 85962 , 99287 , 122586 , 158878 , 160490 , 169963 , 192222 , 198214 , 208964 , 211586 , 214092 , 214684 , 224308 , 226186 , 243159 , 260799 , 267671 , 272623 , 272624 , 283166 , 353153 , 449447 , 511145 , 546414 , 593117 , 722438 ] ;
14
+ const speciesIds = [ 882 , 1148 , 3055 , 3702 , 4081 , 4577 , 4896 , 4932 , 5061 , 5691 , 5833 , 6239 , 7165 , 7227 , 7460 , 7955 , 8364 , 9031 , 9598 , 9606 , 9615 , 9796 , 9823 , 9913 , 10090 , 10116 , 39947 , 44689 , 64091 , 83332 , 85962 , 99287 , 122586 , 1280 , 1314 , 169963 , 192222 , 198214 , 208964 , 211586 , 214092 , 214684 , 224308 , 226186 , 243159 , 260799 , 189518 , 272623 , 272624 , 283166 , 353153 , 449447 , 511145 , 546414 , 593117 , 722438 , 73239 , 373153 , 224326 , 170187 , 5476 , 29760 , 246196 , 392499 , 284590 , 3708 , 67767 , 309800 , 7091 , 212042 , 6945 , 121845 , 8355 , 246200 , 547559 , 1286170 , 4113 , 7159 , 3847 , 4097 , 4565 , 8030 , 9544 , 4513 , 8022 , 3880 , 3218 , 272620 , 5811 , 9986 , 9685 , 65489 , 347256 , 89462 , 3635 , 9940 , 2711 , 160488 , 3827 , 100226 , 257313 , 1140 , 88036 , 109376 , 265311 ] ;
14
15
15
- const connectionString = process . env . DATABASE_URL || 'postgres://postgres@atlas.meringlab.org:5432/string_10_5 ' ;
16
+ const connectionString = process . env . DATABASE_URL || 'postgres://postgres:postgres@localhost:5434/paxdb ' ;
16
17
const client = new pg . Client ( connectionString ) ;
17
18
client . connect ( ) ;
18
19
19
20
function loadSpeciesInfo ( callback ) {
20
21
console . log ( `loading species info` ) ;
21
- const sqlSpeciesInfo = `select species_id,official_name,compact_name from items .species where species_id in (${ speciesIds . join ( ',' ) } )` ;
22
- const sqlNumProteins = `select species_id,count(protein_id) as c from items .proteins where species_id in (${ speciesIds . join ( ',' ) } ) group by species_id; ` ;
22
+ const sqlSpeciesInfo = `select species_id,official_name,compact_name from paxdb5_0 .species where species_id in (${ speciesIds . join ( ',' ) } )` ;
23
+ const sqlNumProteins = `select species_id,count(protein_id) as c from paxdb5_0 .proteins where species_id in (${ speciesIds . join ( ',' ) } ) group by species_id; ` ;
23
24
const species = { } ;
24
25
client . query ( sqlSpeciesInfo ) . then ( res => {
25
- res . rows . forEach ( function ( r ) {
26
+ res . rows . forEach ( function ( r ) {
26
27
species [ r . species_id ] = { id : r . species_id , name : r . official_name , compact_name : r . compact_name } ;
27
28
} ) ;
28
29
client . query ( sqlNumProteins ) . then ( npres => {
29
- npres . rows . forEach ( function ( r ) {
30
+ npres . rows . forEach ( function ( r ) {
30
31
species [ r . species_id ] [ 'num_proteins' ] = parseInt ( r . c ) ;
31
32
} ) ;
32
33
console . log ( `loading species info DONE` ) ;
@@ -36,13 +37,13 @@ function loadSpeciesInfo(callback) {
36
37
}
37
38
38
39
function parseOrthgroups ( contents , familySet ) {
39
- contents . split ( '\n' ) . forEach ( function ( line ) {
40
+ contents . split ( '\n' ) . forEach ( function ( line ) {
40
41
if ( line . trim ( ) == 0 ) {
41
42
return
42
43
}
43
44
var rec = line . split ( '\t' ) ;
44
45
//{"id": 9443, "name": "NOG21051", "clade": "PRIMATES", "members": [1803841, 1854701]},
45
- rec . slice ( 1 , rec . length ) . forEach ( function ( el ) {
46
+ rec . slice ( 1 , rec . length ) . forEach ( function ( el ) {
46
47
familySet . add ( parseInt ( el ) ) ;
47
48
} ) ;
48
49
} ) ;
@@ -67,21 +68,21 @@ function loadProteins(cb, createProteinModules = false) {
67
68
console . log ( `loading proteins` ) ;
68
69
console . log ( "loading orthgroups" ) ;
69
70
const familySet = new Set ( ) ;
70
- fs . readdirSync ( './data/orthgroups' ) . forEach ( function ( file ) {
71
+ fs . readdirSync ( './data/orthgroups' ) . forEach ( function ( file ) {
71
72
parseOrthgroups ( fs . readFileSync ( `./data/orthgroups/${ file } ` , { 'encoding' : 'utf8' } ) , familySet ) ;
72
73
} ) ;
73
74
74
75
const paxdbUniprotIdsMap = loadUniprotMapping ( ) ;
75
76
const uniprotPaxdbIdsMap = { } ;
76
77
const speciesForProtein = { } ;
77
78
78
- async . eachSeries ( speciesIds , function ( speciesId , callback ) {
79
+ async . eachSeries ( speciesIds , function ( speciesId , callback ) {
79
80
console . log ( `loading proteins for ${ speciesId } ` ) ;
80
81
const proteins = { } ;
81
82
const sql = `select protein_id, protein_external_id, preferred_name, annotation ` +
82
- ` from items .proteins where species_id = ${ speciesId } ` ;
83
+ ` from paxdb5_0 .proteins where species_id = ${ speciesId } ` ;
83
84
client . query ( sql ) . then ( res => {
84
- res . rows . forEach ( function ( r ) {
85
+ res . rows . forEach ( function ( r ) {
85
86
proteins [ r . protein_id ] = {
86
87
id : r . protein_id ,
87
88
externalId : r . protein_external_id ,
@@ -92,7 +93,7 @@ function loadProteins(cb, createProteinModules = false) {
92
93
if ( Object . prototype . hasOwnProperty . call ( paxdbUniprotIdsMap , r . protein_external_id ) ) {
93
94
const ac = paxdbUniprotIdsMap [ r . protein_external_id ] ;
94
95
proteins [ r . protein_id ] . uniprotId = ac ;
95
- if ( ! Object . prototype . hasOwnProperty . call ( uniprotPaxdbIdsMap , ac ) ) {
96
+ if ( ! Object . prototype . hasOwnProperty . call ( uniprotPaxdbIdsMap , ac ) ) {
96
97
uniprotPaxdbIdsMap [ ac ] = r . protein_id ;
97
98
} else {
98
99
let prev = uniprotPaxdbIdsMap [ ac ] ;
@@ -117,7 +118,7 @@ function loadProteins(cb, createProteinModules = false) {
117
118
}
118
119
callback ( ) ;
119
120
} ) ;
120
- } , function ( err ) {
121
+ } , function ( err ) {
121
122
console . log ( `loading proteins DONE` ) ;
122
123
if ( err ) throw err ;
123
124
cb ( speciesForProtein , uniprotPaxdbIdsMap ) ;
@@ -131,7 +132,7 @@ function loadDatasetInfo(cb) {
131
132
const abundances_asc = { } ;
132
133
const abundances_desc = { } ;
133
134
const proteinsCovered = { }
134
- async . eachSeries ( fs . readdirSync ( './data/abundances' ) , function ( d , callback ) {
135
+ async . eachSeries ( fs . readdirSync ( './data/abundances' ) , function ( d , callback ) {
135
136
const dataset = { } ;
136
137
const abundances = { } ;
137
138
const peptideCounts = { } ;
@@ -145,7 +146,7 @@ function loadDatasetInfo(cb) {
145
146
datasets [ species ] . push ( dataset ) ;
146
147
const input = fs . createReadStream ( `./data/abundances/${ d } ` ) ;
147
148
const rl = readline . createInterface ( { input } )
148
- rl . on ( 'close' , function ( ) {
149
+ rl . on ( 'close' , function ( ) {
149
150
150
151
//add ranks
151
152
var abundancesSorted = [ ]
@@ -180,7 +181,7 @@ function loadDatasetInfo(cb) {
180
181
callback ( null ) ;
181
182
} ) ;
182
183
183
- rl . on ( 'line' , function ( line ) {
184
+ rl . on ( 'line' , function ( line ) {
184
185
if ( ! line . startsWith ( "#" ) ) {
185
186
var rec = line . split ( '\t' ) ;
186
187
if ( rec . length > 1 ) {
@@ -224,10 +225,10 @@ function loadDatasetInfo(cb) {
224
225
}
225
226
}
226
227
} )
227
- } , function ( err ) {
228
+ } , function ( err ) {
228
229
console . log ( `loading dataset info DONE` ) ;
229
230
if ( err ) throw err ;
230
- speciesIds . forEach ( function ( id ) {
231
+ speciesIds . forEach ( function ( id ) {
231
232
proteinsCovered [ id ] = proteinsCovered [ id ] . size
232
233
} ) ;
233
234
@@ -236,16 +237,16 @@ function loadDatasetInfo(cb) {
236
237
}
237
238
238
239
function loadGenomeSources ( callback ) {
239
- const input = fs . createReadStream ( './data/eggnog4_genome_linkout .txt' ) ;
240
+ const input = fs . createReadStream ( './data/eggnog5_genome_linkout .txt' ) ;
240
241
const rl = readline . createInterface ( { input } )
241
242
const sources = { } ;
242
243
const versions = { } ;
243
244
244
- rl . on ( 'close' , function ( ) {
245
+ rl . on ( 'close' , function ( ) {
245
246
callback ( sources , versions ) ;
246
247
} ) ;
247
248
248
- rl . on ( 'line' , function ( line ) {
249
+ rl . on ( 'line' , function ( line ) {
249
250
const rec = line . split ( '\t' ) ;
250
251
if ( rec . length > 4 ) {
251
252
sources [ parseInt ( rec [ 1 ] ) ] = `<a href='${ rec [ 4 ] } '>${ rec [ 2 ] } </a>` ;
@@ -420,7 +421,7 @@ function build_proteins_index(){
420
421
uniprotIdsMap [ rec [ 1 ] ] = externalToInternalMap [ rec [ 0 ] ] ;
421
422
//append linkout ids as well:
422
423
let protein = proteins [ externalToInternalMap [ rec [ 0 ] ] ] ;
423
- if ( protein . uniprotId && ! ( protein . uniprotId in uniprotIdsMap ) ) {
424
+ if ( protein . uniprotId && ! ( protein . uniprotId in uniprotIdsMap ) ) {
424
425
uniprotIdsMap [ protein . uniprotId ] = protein . id ;
425
426
}
426
427
}
@@ -431,7 +432,7 @@ function build_proteins_index(){
431
432
}
432
433
433
434
console . log ( 'writing proteins_index.js' ) ;
434
- let writeStream = fs . createWriteStream ( './lib/proteins_index.js' ) ;
435
+ let writeStream = fs . createWriteStream ( './lib/proteins_index.js' ) ;
435
436
writeStream . write ( `//FILE GENERATED BY build.js on ${ new Date ( ) } , DO NOT MODIFY!\n` ) ;
436
437
writeStream . write ( "const speciesForProtein = " ) ;
437
438
writeStream . write ( JSON . stringify ( speciesForProtein ) ) ;
@@ -499,7 +500,7 @@ function buildPayload() {
499
500
"legend_file": "${ PAXDB_URL } images/payload_legend.png",
500
501
"name" : "PaxDB"
501
502
}` ) ;
502
- payloadStream . end ( e => {
503
+ payloadStream . end ( e => {
503
504
if ( e ) console . log ( `error writing ${ speciesId } payload: ${ e . message } ` ) ; else console . log ( `${ speciesId } payload written` ) ;
504
505
} ) ;
505
506
@@ -514,12 +515,12 @@ function buildPayload() {
514
515
const abundance = dataset . abundances [ proteinId ] ;
515
516
const hexColor = ranking . toRGB ( abundance . r ) ;
516
517
nodesStream . write ( `${ proteins [ proteinId ] . externalId } \t${ hexColor } \tAbundance: ${ datasetLib . formattedAbundance ( abundance . a ) } , rank: ${ ranking . formatRank ( abundance . r ) } \t` ) ;
517
- nodesStream . write ( `${ PAXDB_URL } protein/${ proteinId } /${ proteins [ proteinId ] . name } \t` ) ;
518
- nodesStream . write ( `${ PAXDB_URL } dataset/${ d . id } /histogram?hightlightProteinId=${ proteinId } \n` ) ;
518
+ nodesStream . write ( `${ PAXDB_API_URL } protein/${ proteinId } /${ proteins [ proteinId ] . name } \t` ) ;
519
+ nodesStream . write ( `${ PAXDB_API_URL } dataset/${ d . id } /histogram?hightlightProteinId=${ proteinId } \n` ) ;
519
520
520
521
} ) ;
521
522
522
- nodesStream . end ( e => {
523
+ nodesStream . end ( e => {
523
524
if ( e ) console . log ( `error writing ${ speciesId } payload nodes: ${ e . message } ` ) ; else console . log ( `${ speciesId } payload nodes written` )
524
525
} ) ;
525
526
@@ -530,7 +531,7 @@ buildSpecies();
530
531
buildDatasets ( ) ;
531
532
buildProteins ( ) ;
532
533
//TODO FIXME writing streams is async, so lib/species.js won' show up before buildPayload is called
533
- // buildPayload();
534
- // buildHistograms();
534
+ buildPayload ( ) ;
535
+ buildHistograms ( ) ;
535
536
536
537
0 commit comments