Skip to content

Commit 8b6f2e1

Browse files
committed
added data directory and buildinfo
1 parent 989d078 commit 8b6f2e1

6 files changed

+212
-31
lines changed

src/input.json

+7-1
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,14 @@
44
{
55
"pert_id": "BRD-U00134347",
66
"pert_plate": "PMTS066",
7+
"src_project": "MTS024_ANDREW_AGUIRRE",
8+
"dest_project": "NEW_PROJECT"
9+
},
10+
{
11+
"pert_id": "BRD-U00134320",
12+
"pert_plate": "PMTS066",
713
"src_project": "MTS024_ADAM_DURBIN",
8-
"dest_project": "MTS024_ANDREW_AGUIRRE"
14+
"dest_project": "NEW_PROJECT"
915
}
1016
]
1117
}

src/launch.ts

+72-27
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ let csvToJson = require('convert-csv-to-json');
1313
let promises = [];
1414
const cp_dirs = [];
1515
for (let project_2_cpd of project_2_cpds) {
16+
17+
//replace with an s3 ls | grep?
1618
let src_dir = "etl/" + project_2_cpd.src_project.toLowerCase() + "/" +
1719
project_2_cpd.src_project.toUpperCase() + "/" +
1820
project_2_cpd.pert_plate + "/";
@@ -23,7 +25,10 @@ let csvToJson = require('convert-csv-to-json');
2325
project_2_cpd.pert_plate + "/";
2426

2527
const cp_dest_dir = dest_dir + project_2_cpd.pert_id;
26-
cp_dirs.push({src: cp_src_dir, dest: cp_dest_dir});
28+
cp_dirs.push({
29+
src: cp_src_dir, dest: cp_dest_dir,
30+
delete_from_src: project_2_cpd.delete_from_src
31+
});
2732
let command = "aws s3 sync " + input.screen_root +
2833
project_2_cpd.src_project.toLowerCase() + "/" +
2934
project_2_cpd.src_project.toUpperCase() + "/" +
@@ -44,8 +49,6 @@ let csvToJson = require('convert-csv-to-json');
4449
}
4550
await Promise.all(promises);
4651

47-
//delete src directory
48-
4952

5053
const csvColumnFilesGlob = [];
5154
for (let project_2_cpd of project_2_cpds) {
@@ -68,46 +71,88 @@ let csvToJson = require('convert-csv-to-json');
6871
let dest_dir = "etl/" + project_2_cpd.dest_project.toLowerCase() + "/" +
6972
project_2_cpd.dest_project.toUpperCase() + "/" +
7073
project_2_cpd.pert_plate + "/" + project_2_cpd.pert_id + "/";
71-
for (let rowFile of rowFiles) {
74+
75+
const cpdRowFiles = rowFiles.filter((f) => f.includes(project_2_cpd.pert_id));
76+
for (let rowFile of cpdRowFiles) {
7277
const destFile = dest_dir + path.basename(rowFile);
7378
promises.push(SliceData.replaceProjectNames(project_2_cpd.dest_project,rowFile,destFile,","));
7479
}
7580
}
7681
await Promise.all(promises);
7782

83+
//delete the src directories
7884
promises = [];
7985
for (let cp_dir of cp_dirs) {
80-
promises.push(SliceData.cleanUp(cp_dir.src));
86+
if (cp_dir.delete_from_src) {
87+
promises.push(SliceData.cleanUp(cp_dir.src));
88+
}
8189
}
82-
await Promise.all(promises);
90+
// await Promise.all(promises);
8391

84-
//sync to s3
85-
for (let project_2_cpd of project_2_cpds) {
86-
const local_src_dir = "etl/" + project_2_cpd.src_project.toLowerCase() + "/" +
87-
project_2_cpd.src_project.toUpperCase() + "/" +
88-
project_2_cpd.pert_plate + "/";
89-
90-
const local_dest_dir = "etl/" + project_2_cpd.dest_project.toLowerCase() + "/" +
91-
project_2_cpd.dest_project.toUpperCase() + "/" +
92-
project_2_cpd.pert_plate + "/";
9392

93+
//go through each destination project and creat a data/ folder
94+
const destProjects = _.uniq(_.pluck(project_2_cpds,"dest_project"));
95+
promises=[];
96+
for (const project of destProjects) {
97+
console.log("project",project)
98+
promises.push(SliceData.makeDataDirectory(project));
99+
}
100+
await Promise.all(promises);
94101

95-
let command = "aws s3 sync " + local_src_dir + " " +
96-
input.screen_root +
97-
project_2_cpd.src_project.toLowerCase() + "/" +
98-
project_2_cpd.src_project.toUpperCase() + "/" +
99-
project_2_cpd.pert_plate + "/ --delete";
102+
//gzip each file in data folder
103+
promises = [];
104+
for (const project of destProjects) {
105+
const dest_dir = "etl/" + project.toLowerCase() + "/" + project.toUpperCase() + "/";
106+
promises.push(SliceData.gzipFiles(dest_dir + "data/"));
107+
}
108+
await Promise.all(promises);
109+
console.log("foo")
110+
111+
//
112+
// promises = [];
113+
// //sync compounds folders to s3
114+
// for (let project_2_cpd of project_2_cpds) {
115+
// const local_src_dir = "etl/" + project_2_cpd.src_project.toLowerCase() + "/" +
116+
// project_2_cpd.src_project.toUpperCase() + "/" +
117+
// project_2_cpd.pert_plate + "/";
118+
//
119+
// const local_dest_dir = "etl/" + project_2_cpd.dest_project.toLowerCase() + "/" +
120+
// project_2_cpd.dest_project.toUpperCase() + "/" +
121+
// project_2_cpd.pert_plate + "/";
122+
//
123+
//
124+
// let command = "aws s3 sync " + local_src_dir + " " +
125+
// input.screen_root +
126+
// project_2_cpd.src_project.toLowerCase() + "/" +
127+
// project_2_cpd.src_project.toUpperCase() + "/" +
128+
// project_2_cpd.pert_plate + "/ --delete";
129+
//
130+
// console.log("sync s3 src command",command)
131+
// promises.push(SliceConstants.execShellCommand(command));
132+
//
133+
// command = "aws s3 sync " + local_dest_dir + " " +
134+
// input.screen_root +
135+
// project_2_cpd.dest_project.toLowerCase() + "/" +
136+
// project_2_cpd.dest_project.toUpperCase() + "/" +
137+
// project_2_cpd.pert_plate + "/ --delete";
138+
// console.log("sync s3 dest command",command)
139+
// promises.push(SliceConstants.execShellCommand(command));
140+
// }
141+
// await Promise.all(promises);
100142

101-
console.log("sync s3 src command",command)
102-
promises.push(SliceConstants.execShellCommand(command));
143+
promises = [];
144+
for (let destProject of destProjects) {
145+
const local_data_dir = "etl/" + destProject.toLowerCase() + "/" +
146+
destProject.toUpperCase() + "/data/";
103147

104-
command = "aws s3 sync " + local_dest_dir + " " +
148+
let command = "aws s3 sync " + local_data_dir + " " +
105149
input.screen_root +
106-
project_2_cpd.dest_project.toLowerCase() + "/" +
107-
project_2_cpd.dest_project.toUpperCase() + "/" +
108-
project_2_cpd.pert_plate + "/ --delete";
109-
console.log("sync s3 dest command",command)
150+
destProject.toLowerCase() + "/" +
151+
destProject.toUpperCase() + "/data/";
152+
153+
console.log("sync s3 command", command)
110154
promises.push(SliceConstants.execShellCommand(command));
111155
}
112156
await Promise.all(promises);
157+
113158
})();

src/slice_constants.ts

+17
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,23 @@ export class SliceConstants {
3131
'_model_table.csv'
3232
// '_RF_table.csv',
3333
];
34+
35+
/**
36+
* Files available from compound directories to be put into
37+
* the project data directory
38+
* @private
39+
*/
40+
public static PROJECT_DATA_FILES = [
41+
'continuous_associations.csv',
42+
'discrete_associations.csv',
43+
'DRC_TABLE.csv',
44+
'LEVEL3_LMFI_*.csv',
45+
'LEVEL4_LFC_COMBAT_*.csv',
46+
'LEVEL5_LFC_COMBAT_*.csv',
47+
'model_table.csv',
48+
'RF_table.csv',
49+
];
50+
3451
/**
3552
* Rows with pert plates in this project are retained
3653
* @type {string[]}

src/slice_data.ts

+114-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import {Configs, ProjectConfigs} from "./types";
22
import * as fs from 'fs'
3+
import * as zlib from 'zlib';
34
import * as path from 'path';
45
import {glob} from 'glob'
56
import * as csv from 'csv';
@@ -152,10 +153,111 @@ export class SliceData {
152153
console.log('error.....', err);
153154
ws.end()
154155
reject(err)
155-
156156
});
157157
});
158158
};
159+
160+
public static async makeDataDirectory(project: string) {
161+
const destDir = "etl/" + project.toLowerCase() + "/" + project.toUpperCase() + "/";
162+
const promises = [];
163+
fs.mkdirSync(destDir + '/data/', { recursive: true });
164+
for (let name of SliceConstants.PROJECT_DATA_FILES) {
165+
console.log("name",name);
166+
let gl = destDir + "*/*/" + name;
167+
console.log(gl);
168+
const filesToCollate = await glob(gl);
169+
console.log("filesToCollate",filesToCollate)
170+
let outFile = destDir + '/data/' + `${project.toUpperCase()}_${name}`;
171+
outFile = outFile.replace("_*.csv", ".csv");
172+
promises.push(SliceData.concatFiles(filesToCollate,outFile, ','));
173+
}
174+
175+
await Promise.all(promises);
176+
}
177+
178+
179+
public static async gzipFiles(data_dir: string) {
180+
const files = await glob(data_dir + "*.*", { ignore: data_dir + "*.gz" });
181+
182+
const promises = [];
183+
for (let file of files) {
184+
promises.push(new Promise((resolve, reject) => {
185+
const gzip = zlib.createGzip();
186+
const inp = fs.createReadStream(file);
187+
const out = fs.createWriteStream(file + '.gz');
188+
out.on('finish', () => {
189+
console.log('Successfully compressed', file);
190+
resolve('Successfully compressed');
191+
fs.unlink(file, (err) => {
192+
if (err) {
193+
console.error('Error deleting file:', err);
194+
reject(err)
195+
} else {
196+
resolve('Deleted file')
197+
}
198+
});
199+
})
200+
out.on('error', (err) => {
201+
console.error('Error compressing file:', err);
202+
reject(err);
203+
});
204+
inp.pipe(gzip).pipe(out);
205+
}));
206+
}
207+
return promises;
208+
}
209+
210+
public static async concatFiles(filePaths: string[], destFile: string,delimiter:string) {
211+
fs.rmSync(destFile, {force: true})
212+
if (filePaths.length === 0) {
213+
console.log('No files to concatenate');
214+
return;
215+
}
216+
217+
return new Promise((resolve, reject) => {
218+
const parser = csv.parse({ delimiter: delimiter, columns: true });
219+
const stringifier = csv.stringify({ header: true });
220+
const outputStream = fs.createWriteStream(destFile);
221+
222+
outputStream.on('finish', () => {
223+
console.log('All files have been concatenated successfully');
224+
resolve('Concatenation completed');
225+
});
226+
227+
outputStream.on('error', (err) => {
228+
console.error('Error writing to the output file:', err);
229+
reject(err);
230+
});
231+
232+
stringifier.on('error', (err) => {
233+
console.error('Error in CSV stringification:', err);
234+
reject(err);
235+
});
236+
237+
stringifier.pipe(outputStream);
238+
239+
let fileCount = filePaths.length;
240+
filePaths.forEach((filePath, index) => {
241+
fs.createReadStream(filePath)
242+
.pipe(csv.parse({ delimiter: delimiter, columns: true }))
243+
.on('data', (data) => {
244+
stringifier.write(data);
245+
})
246+
.on('end', () => {
247+
console.log(`Finished processing file: ${filePath}`);
248+
if (--fileCount === 0) {
249+
stringifier.end(); // Close the stringifier stream after the last file
250+
}
251+
})
252+
.on('error', (err) => {
253+
console.error('Error reading or parsing file:', err);
254+
reject(err);
255+
});
256+
});
257+
});
258+
259+
}
260+
159261
public static async replaceProjectNames_old(project:string, srcFile: string, destFile: string,delimiter:string) {
160262
fs.rmSync(destFile, {force: true})
161263
const rs = fs.createReadStream(srcFile);
@@ -284,6 +386,17 @@ export class SliceData {
284386
zipper.sync.zip(folderToZip + "/").compress().save(zipFileName);
285387
console.log("Zip file", zipFileName)
286388
};
389+
390+
public static async createDataFolder(projectConfigs: ProjectConfigs) {
391+
//go through the destination project folder
392+
//glob all compound directory files
393+
394+
395+
396+
//for each type of file vertically concatenate them and write them to the dest-folder/data/
397+
}
398+
399+
287400
}
288401

289402
//sync to s3

tsconfig.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"strict": true,
99
"strictPropertyInitialization": false,
1010
"useUnknownInCatchVariables": false,
11-
"incremental": true,
11+
"incremental": false,
1212
"moduleResolution": "node",
1313
"sourceMap": true,
1414
"declaration": true,

tsconfig.tsbuildinfo

+1-1
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)