-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_management.R
92 lines (77 loc) · 2.68 KB
/
data_management.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# Function to export metadata
export_metadata <- function(meta_data, file_name) {
# Add rownames in the first column & get names of MD to export
meta_data <- cbind(cellIDs = rownames(meta_data), meta_data)
addedMD <- names(meta_data)[!names(meta_data) %in% defaultMDnames]
# Export added MD for the dataset
write.table(
meta_data[addedMD],
file.path(PATH_OUT_ANALYSIS, file_name),
row.names = FALSE,
col.names = TRUE,
sep = ","
)
}
# Function to export object as RDS
export_rds <- function(SO, file_name) {
# Reduce to the default MD & save the object
[email protected] <- [email protected][, defaultMDnames]
saveRDS(
SO,
file.path(PATH_RDS_OBJECTS, file_name)
)
}
# Function to export dimension reduction tables
export_dimred <- function(SO, dr, file_name) {
coordinates <- as.data.frame(Embeddings(SO, reduction = dr))
write.table(x = coordinates,
file = file.path(
PATH_OUT_ANALYSIS,
file_name),
sep = ",",
row.names = TRUE,
col.names = TRUE
)
}
# Get sorted groups for manual filter
check_empty_groups <- function(df, groups){
empty_groups <- setdiff(levels(df), groups)
if(length(empty_groups) != 0){
stop(paste0("Error: the following groups are empty: ",paste(empty_groups, collapse=", ", sep=", "), ". Change your group thresholds."))
}
}
# Function to extract top features
extract_top_features <- function(df, topn = 20) {
# if "group1" is in the column names, sort by group1, then sort by padj
if ("cluster" %in% names(df)) {
markers <- df %>%
dplyr::arrange(cluster, desc(log_fc))
topn <- markers %>%
group_by(cluster) %>%
top_n(n = topn, wt = log_fc)
topnMarkers <- topn %>%
dplyr::arrange(cluster, desc(log_fc))
} else { # else sort by padj
markers <- df %>%
dplyr::arrange(padj)
topn <- markers %>%
top_n(n = topn, wt = log_fc)
topnMarkers <- topn %>%
dplyr::arrange(log_fc)
}
return(topnMarkers)
}
merge_features <- function(dataset_list) {
library(data.table)
outfile.path <- file.path(PATH_ROOT, "00_rawdata", paste0("combined_", DATASET, "_features.tsv.gz"))
if(file.exists(outfile.path)) return()
features.list <- lapply(dataset_list, function(dataset) {
config <- parseToml(paste0(PATH_REQUIREMENTS, "globalParameters_", dataset,".toml"))
features <- as.data.frame(fread(file.path(PATH_ROOT, config$path$input$PATH_INPUT_LABDATA, "features.tsv.gz"), header = FALSE, sep = "\t", col.names = c("ENSid", "GeneName", "Type")))
return(features)
})
combined.features <- unique(Reduce(function(x, y) rbind(x, y, all=TRUE), features.list))
outfile <- gzfile(outfile.path, "w")
write.table(combined.features, outfile, col.names=FALSE, sep = "\t", row.names=FALSE)
close(outfile)
}