forked from cmap/dockerized_mts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathqc.R
76 lines (61 loc) · 2.77 KB
/
qc.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# Script to run the QC step of the MTS pipeline
# creates SSMD_TABLE
# import necessary libraries and functions
suppressMessages(source("./src/qc_functions.R"))
#---- Read arguments ----
# initialize parser
parser <- ArgumentParser()
# specify our desired options
parser$add_argument("-b", "--base_dir", default="", help="Input Directory")
parser$add_argument("-o", "--out", default=getwd(), help = "Output path. Default is working directory")
parser$add_argument("-n", "--name", default="", help = "Build name. Default is none")
# get command line options, if help option encountered print help and exit
args <- parser$parse_args()
base_dir <- args$base_dir
out_dir <- args$out
build_name <- args$name
if (!dir.exists(out_dir)) {dir.create(out_dir, recursive = T)}
# paths to data (make sure directory of data has these files)
path_data <- paste0(base_dir, "/logMFI.csv")
#---- Load the data ----
print("Loading data")
logMFI_files <- list.files(base_dir, pattern = "LEVEL3_LMFI", full.names = T)
if (length(logMFI_files) == 1) {
logMFI_normalized <- data.table::fread(logMFI_files[[1]])
} else {
stop(paste("There are", length(logMFI_files), "level 3 tables in this directory. Please ensure there is one and try again."),
call. = FALSE)
}
#---- Calculate QC metrics ----
print("Calculating SSMDs")
# calculate SSMD and NNMD (allow for no QC)
qc_table <- calc_ssmd(logMFI_normalized %>% dplyr::filter(pool_id != "CTLBC"))
if (any(is.na(qc_table$ssmd))) {
message("Unable to calculate some QC metrics: control condition(s) may be missing")
}
# if there are positive controls
if ("trt_poscon_md" %in% colnames(qc_table)) {
# calculate error rate of normalized table (based on threshold classifier)
print("Calculating error rates")
error_table <- calc_er(logMFI_normalized)
# join with SSMD table
qc_table %<>%
dplyr::left_join(error_table, by = c("prism_replicate", "pert_plate", "ccle_name", "rid", "culture")) %>%
dplyr::mutate(dr = ctl_vehicle_md - trt_poscon_md,
pass = error_rate <= 0.05 & dr > -log2(0.3)) %>%
dplyr::group_by(rid, ccle_name, culture, pert_plate) %>%
dplyr::mutate(pass = pass & sum(pass, na.rm = T) / n_distinct(prism_replicate) > 0.5) %>%
dplyr::ungroup()
} else {
# add empty columns (so reports don't break)
qc_table %<>% dplyr::mutate(trt_poscon_md = NA,
trt_poscon_mad = NA,
error_rate = NA,
pert_plate = stringr::word(prism_replicate, 1,
sep = stringr::fixed("_")),
dr = NA,
pass = NA)
}
#---- Write data ----
# Write QC table
readr::write_csv(qc_table, paste0(out_dir, "/", build_name, "_QC_TABLE.csv"))