Skip to content

Commit 989d078

Browse files
committed
ETL module
1 parent 31d5e41 commit 989d078

16 files changed

+1166
-20
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,6 @@ config.json
22
node_modules
33
package-lock.json
44
.idea
5+
local.json
6+
dist
7+
etl

README.md

+19-15
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,25 @@
22

33
A module for moving compounds/asssets from project A to project B.
44

5+
Warning: This would make changes to existing files so make sure to use a test directory
6+
on s3 (for example analysis.clue.io/etl/) while testing.
57

6-
1. Checkout this repository
7-
2. CD into the checked out repos and pull the reports folder from whereever it is hosted. Make sure you the reports are deposited into the root of the cheched out repos.
8-
3. Edit copy config.json.template as config.json.
9-
4. Edit config.json appropriately
10-
* PARENT_SRC_PROJECT_DIR - Should point to the name of the folder holding the reports for example mts001_validation_compounds
11-
* PARENT_DEST_PROJECT_DIR - This is the name of the destination folder that would hold the sliced reports
12-
* PERT_INAMES - This is an arry of pert_inames associated with the pert_ids that you want to move
13-
* PERT_IDS - This is an array of pert ids that you want to slice out of the reports
14-
* PERT_PLATES - An array of pert plates that the pert_ids are on
15-
16-
5. Then run `node slice.js`
17-
This will generate the needed assets into the PARENT_DEST_PROJECT_DIR
18-
6. CD into that folder and edit index.html to keep only the perts that are needed. You may also want to change the title to reflect the new project
19-
20-
7. sync the PARENT_DEST_PROJECT_DIR to the host system
218

9+
1. Checkout this repository
10+
2. Edit src/input.json to reflect the changes you would like to make
11+
* screen_root - Should point to the S# root of the screen (for e.g analysis.clue.io)
12+
* src_project - The project that holds the pert that needs to be moved
13+
* dest_project - The project that the pert would be moved into
14+
* pert_id - The pert_id to move to destination project
15+
* pert_plate - The pert plate that the pert_id is on
16+
3. cd to the checkout directory and run
17+
* ``npm install``
18+
* ``tsc --outDir dist`` to compile the src
19+
* ``npm run start``
20+
4. The last command will do the following
21+
* Check out the src and destination projects into the etl folder
22+
* Copy the pert_id folder from the src project to the det project c.
23+
* Update LEVEL4 and LEVEL5 csv combat files with the dest project name
24+
* Delete pert_id folder from the src project
25+
* Sync the src and det projects back to s3 using the aws s3 sync delete flag
2226

config/default.json

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"base_url": "https://dev-api.clue.io/api",
3+
"api_key": "xxxxxxx"
4+
}
5+

infrastructure/clean.js

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
const fs = require('fs');
2+
if (fs.existsSync("./dist")) {
3+
fs.rmSync("./dist", { recursive: true });
4+
console.log(`deleted ./dist`);
5+
}

js-legacy-files/appendFiles.js

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
const FILE_CONSTANTS = require("./file_constants");
2+
const fs = require('fs');
3+
const path = require("path")
4+
const fsPromises = fs.promises;
5+
const {
6+
glob
7+
} = require('glob')
8+
const csv = require('csv');
9+
const readline = require('readline')
10+
11+
const processLineByLine = async function (file, outputFileName,index) {
12+
const fileStream = fs.createReadStream(file);
13+
const rl = readline.createInterface({
14+
input: fileStream,
15+
crlfDelay: Infinity
16+
});
17+
try {
18+
// Note: we use the crlfDelay option to recognize all instances of CR LF
19+
// ('\r\n') in input.txt as a single line break.
20+
let t = 0;
21+
for await (const line of rl) {
22+
if(t===0 && index === 0) {
23+
//write the header
24+
fs.appendFileSync(outputFileName, line + "\n");
25+
}else if(t===0){
26+
//skip the first line
27+
}else{
28+
fs.appendFileSync(outputFileName, line + "\n");
29+
}
30+
++t;
31+
}
32+
} finally {
33+
console.log("Finished Processing", file)
34+
rl.close();
35+
fileStream.close();
36+
}
37+
};
38+
//sync to s3
39+
(async () => {
40+
const continuous_associationsGlob = "data/continuous_associations/*continuous_associations.csv";
41+
const outDir = "data/continuous_associations_merge";
42+
const outFileName = outDir + "/continuous_associations.csv";
43+
if (fs.existsSync(outDir)) {
44+
fs.rmSync(outDir, {recursive: true});
45+
}
46+
fs.mkdirSync(outDir, {recursive: true});
47+
const files = await glob(continuous_associationsGlob);
48+
for(let index=0; index < files.length; index++){
49+
await processLineByLine(files[index], outFileName,index)
50+
}
51+
console.log("Done");
52+
})();
53+
54+
File renamed without changes.

slice.js js-legacy-files/slice.js

+11-5
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
const FILE_CONSTANTS = require("./file_constants");
22
const fs = require('fs');
3-
const path = require("path")
3+
const path = require("path");
44
const fsPromises = fs.promises;
55
const {
66
glob
7-
} = require('glob')
7+
} = require('glob');
88
const csv = require('csv');
9-
const readline = require('readline')
9+
const readline = require('readline');
10+
11+
const axios = require("axios");
12+
const _ = require("underscore");
13+
14+
1015

1116
const processLineByLine = async function (file, list_to_include) {
1217
let outputFileName = file.replace(FILE_CONSTANTS.SOURCE_DATA_DIR, FILE_CONSTANTS.DESTINATION_DATA_DIR);
@@ -151,8 +156,8 @@ const copyDirectory = function (source, destination) {
151156
}
152157
});
153158
}
154-
const copyPertDirectory = async function(){
155-
for(let pert_plate of FILE_CONSTANTS.PERT_PLATES) {
159+
const copyPertDirectory = async function () {
160+
for (let pert_plate of FILE_CONSTANTS.PERT_PLATES) {
156161
for (let pert_id of FILE_CONSTANTS.PERT_IDS) {
157162
//check if the source exists
158163
const srcFolder = FILE_CONSTANTS.SOURCE_DIR + "/" + pert_plate + "/" + pert_id;
@@ -189,6 +194,7 @@ const zipFiles = async function () {
189194
//sync to s3
190195

191196
(async () => {
197+
192198
const csvColumnFilesGlob = [];
193199
const csvRowFilesGlob = [];
194200
const csvPlateFilesGlob = [];

package.json

+40
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,48 @@
11
{
2+
"repository": {
3+
"type": "git",
4+
"url": "https://github.com/loopbackio/loopback-next.git",
5+
"directory": "examples/todo-jwt"
6+
},
7+
"engines": {
8+
"node": "16 || 18 || 20"
9+
},
10+
"scripts": {
11+
"build": "tsc --outDir dist",
12+
"build:watch": "tsc --watch",
13+
"clean": "node infrastructure/clean.js",
14+
"lint": "npm run prettier:check && npm run eslint",
15+
"lint:fix": "npm run eslint:fix && npm run prettier:fix",
16+
"prettier:cli": "prettier \"**/*.ts\"",
17+
"prettier:check": "npm run prettier:cli -- -l",
18+
"prettier:fix": "npm run prettier:cli -- --write",
19+
"eslint": "eslint --report-unused-disable-directives .",
20+
"eslint:fix": "npm run eslint -- --fix",
21+
"pretest": "npm run rebuild",
22+
"test": "mocha \"dist/__tests__/**/*.js\"",
23+
"test:dev": "mocha --allow-console-logs dist/__tests__/**/*.js && npm run posttest",
24+
"verify": "npm pack && tree package && npm run clean",
25+
"premigrate": "npm run build",
26+
"rebuild": "npm run clean && npm run build",
27+
"prestart": "npm run rebuild",
28+
"start": "node dist/launch.js"
29+
},
230
"dependencies": {
31+
"axios": "^1.6.5",
332
"cheerio": "^1.0.0-rc.12",
33+
"config": "^3.3.10",
34+
"convert-csv-to-json": "^2.0.0",
435
"csv": "^6.3.6",
536
"glob": "^10.3.10",
37+
"json-2-csv": "^5.0.1",
38+
"node-async-exec": "^1.2.0",
39+
"node-fetch": "^3.3.2",
40+
"underscore": "^1.13.6",
641
"zip-local": "^0.3.5"
42+
},
43+
"devDependencies": {
44+
"@types/node": "^20.11.6",
45+
"tslib": "^2.6.2",
46+
"typescript": "^5.3.3"
747
}
848
}

src/input.json

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"screen_root" : "s3://analysis.clue.io/etl/",
3+
"project_to_compounds": [
4+
{
5+
"pert_id": "BRD-U00134347",
6+
"pert_plate": "PMTS066",
7+
"src_project": "MTS024_ADAM_DURBIN",
8+
"dest_project": "MTS024_ANDREW_AGUIRRE"
9+
}
10+
]
11+
}

src/launch.ts

+113
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import {SliceData} from "./slice_data";
2+
import {SliceConstants} from "./slice_constants";
3+
import {glob} from "glob";
4+
const path = require('path');
5+
6+
const _ = require('underscore');
7+
let csvToJson = require('convert-csv-to-json');
8+
9+
(async () => {
10+
const input = require("./input.json")
11+
12+
const project_2_cpds = input.project_to_compounds;
13+
let promises = [];
14+
const cp_dirs = [];
15+
for (let project_2_cpd of project_2_cpds) {
16+
let src_dir = "etl/" + project_2_cpd.src_project.toLowerCase() + "/" +
17+
project_2_cpd.src_project.toUpperCase() + "/" +
18+
project_2_cpd.pert_plate + "/";
19+
const cp_src_dir = src_dir + project_2_cpd.pert_id + "/";
20+
21+
let dest_dir = "etl/" + project_2_cpd.dest_project.toLowerCase() + "/" +
22+
project_2_cpd.dest_project.toUpperCase() + "/" +
23+
project_2_cpd.pert_plate + "/";
24+
25+
const cp_dest_dir = dest_dir + project_2_cpd.pert_id;
26+
cp_dirs.push({src: cp_src_dir, dest: cp_dest_dir});
27+
let command = "aws s3 sync " + input.screen_root +
28+
project_2_cpd.src_project.toLowerCase() + "/" +
29+
project_2_cpd.src_project.toUpperCase() + "/" +
30+
project_2_cpd.pert_plate + "/ " + src_dir;
31+
promises.push(SliceConstants.execShellCommand(command));
32+
33+
command = "aws s3 sync " + input.screen_root +
34+
project_2_cpd.dest_project.toLowerCase() + "/" +
35+
project_2_cpd.dest_project.toUpperCase() + "/" +
36+
project_2_cpd.pert_plate + "/ " + dest_dir;
37+
promises.push(SliceConstants.execShellCommand(command));
38+
}
39+
await Promise.all(promises);
40+
41+
promises = [];
42+
for (let cp_dir of cp_dirs) {
43+
promises.push(SliceData.copyDirectory(cp_dir.src, cp_dir.dest))
44+
}
45+
await Promise.all(promises);
46+
47+
//delete src directory
48+
49+
50+
const csvColumnFilesGlob = [];
51+
for (let project_2_cpd of project_2_cpds) {
52+
for (let name of SliceConstants.REPLACE_PROJECT_NAMES) {
53+
let gl = "etl/" +
54+
project_2_cpd.src_project.toLowerCase() + "/" +
55+
project_2_cpd.src_project.toUpperCase() + "/" +
56+
project_2_cpd.pert_plate + "/" +
57+
project_2_cpd.pert_id + "/*" + name;
58+
csvColumnFilesGlob.push(gl)
59+
}
60+
}
61+
promises = [
62+
glob(csvColumnFilesGlob)
63+
];
64+
const ps = await Promise.all(promises);
65+
const rowFiles = ps[0];
66+
promises = [];
67+
for (let project_2_cpd of project_2_cpds) {
68+
let dest_dir = "etl/" + project_2_cpd.dest_project.toLowerCase() + "/" +
69+
project_2_cpd.dest_project.toUpperCase() + "/" +
70+
project_2_cpd.pert_plate + "/" + project_2_cpd.pert_id + "/";
71+
for (let rowFile of rowFiles) {
72+
const destFile = dest_dir + path.basename(rowFile);
73+
promises.push(SliceData.replaceProjectNames(project_2_cpd.dest_project,rowFile,destFile,","));
74+
}
75+
}
76+
await Promise.all(promises);
77+
78+
promises = [];
79+
for (let cp_dir of cp_dirs) {
80+
promises.push(SliceData.cleanUp(cp_dir.src));
81+
}
82+
await Promise.all(promises);
83+
84+
//sync to s3
85+
for (let project_2_cpd of project_2_cpds) {
86+
const local_src_dir = "etl/" + project_2_cpd.src_project.toLowerCase() + "/" +
87+
project_2_cpd.src_project.toUpperCase() + "/" +
88+
project_2_cpd.pert_plate + "/";
89+
90+
const local_dest_dir = "etl/" + project_2_cpd.dest_project.toLowerCase() + "/" +
91+
project_2_cpd.dest_project.toUpperCase() + "/" +
92+
project_2_cpd.pert_plate + "/";
93+
94+
95+
let command = "aws s3 sync " + local_src_dir + " " +
96+
input.screen_root +
97+
project_2_cpd.src_project.toLowerCase() + "/" +
98+
project_2_cpd.src_project.toUpperCase() + "/" +
99+
project_2_cpd.pert_plate + "/ --delete";
100+
101+
console.log("sync s3 src command",command)
102+
promises.push(SliceConstants.execShellCommand(command));
103+
104+
command = "aws s3 sync " + local_dest_dir + " " +
105+
input.screen_root +
106+
project_2_cpd.dest_project.toLowerCase() + "/" +
107+
project_2_cpd.dest_project.toUpperCase() + "/" +
108+
project_2_cpd.pert_plate + "/ --delete";
109+
console.log("sync s3 dest command",command)
110+
promises.push(SliceConstants.execShellCommand(command));
111+
}
112+
await Promise.all(promises);
113+
})();

0 commit comments

Comments
 (0)