Skip to content

Commit

Permalink
Merge pull request #92 from BIGslu/dev-kadm
Browse files Browse the repository at this point in the history
Various open kimma issues
  • Loading branch information
kdillmcfarland authored Aug 3, 2023
2 parents 9550bf3 + 7bb1f57 commit da62c3c
Show file tree
Hide file tree
Showing 14 changed files with 1,194 additions and 783 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: kimma
Type: Package
Title: Kinship In Mixed Model Analysis of RNA-seq
Version: 1.6.0
Version: 1.6.1
Author: Kim Dill-McFarland
Maintainer: Kim Dill-McFarland <[email protected]>
Description: Linear mixed effects models with pairwise kinship for RNA-seq data.
Expand Down
2 changes: 0 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@ export(extract_lmFit)
export(kmFit)
export(kmFit_eQTL)
export(summarise_kmFit)
export(summarise_lmFit)
export(summarize_kmFit)
export(summarize_lmFit)
importFrom(foreach,"%dopar%")
importFrom(magrittr,"%>%")
importFrom(tidyselect,vars_select_helpers)
67 changes: 45 additions & 22 deletions R/kimma_cleaning.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
kimma_cleaning <- function(dat=NULL, kin=NULL, patientID="ptID", libraryID="libID",
counts=NULL, meta=NULL, genes=NULL, weights=NULL,
subset_var = NULL, subset_lvl = NULL, subset_genes = NULL,
model_lm = NULL, genotype_name = NULL){
model_lm = NULL, genotype_name = NULL,
run_lmerel = FALSE){
i <- rowname <- NULL
#If data are NOT a voom EList, create a mock version
if(is.null(dat)) {
Expand Down Expand Up @@ -169,7 +170,7 @@ kimma_cleaning <- function(dat=NULL, kin=NULL, patientID="ptID", libraryID="libI
dat.subset$targets[,to.modelID] <- dat.subset$targets[,patientID]
} else{ to.modelID <- patientID }

if(!is.null(kin)){
if(!is.null(kin) & run_lmerel){
#Format kinship matrix if rownames inside matrix
if(!is.numeric(as.matrix(kin))){
name.col <- as.data.frame(kin) %>%
Expand All @@ -189,10 +190,17 @@ kimma_cleaning <- function(dat=NULL, kin=NULL, patientID="ptID", libraryID="libI
#Combine expression data (E) and sample metadata (targets)
to.model <- dat.subset$E %>%
tidyr::pivot_longer(-rowname, names_to = libraryID, values_to = "expression") %>%
dplyr::inner_join(dat.subset$targets, by=libraryID) %>%
#Remove samples missing kinship
dplyr::filter(get(to.modelID) %in% colnames(kin.format)) %>%
dplyr::arrange(get(to.modelID))
dplyr::inner_join(dat.subset$targets, by=libraryID)

if(run_lmerel){
to.model <- to.model %>%
#Remove samples missing kinship
dplyr::filter(get(to.modelID) %in% colnames(kin.format)) %>%
dplyr::arrange(get(to.modelID))
} else{
to.model <- to.model %>%
dplyr::arrange(get(to.modelID))
}

#Add weights if available
if(!is.null(dat.subset$weights)){
Expand All @@ -207,13 +215,17 @@ kimma_cleaning <- function(dat=NULL, kin=NULL, patientID="ptID", libraryID="libI

#Remove samples from kinship missing expression data
#Order kinship as in to.model
to.keep <- unique(unlist(to.model[,to.modelID]))
kin.subset <- as.data.frame(kin.format) %>%
tibble::rownames_to_column() %>%
dplyr::filter(rowname %in% to.keep) %>%
dplyr::select(rowname, tidyselect::all_of(to.keep)) %>%
dplyr::arrange(rowname) %>%
tibble::column_to_rownames()
if(run_lmerel){
to.keep <- unique(unlist(to.model[,to.modelID]))
kin.subset <- as.data.frame(kin.format) %>%
tibble::rownames_to_column() %>%
dplyr::filter(rowname %in% to.keep) %>%
dplyr::select(rowname, tidyselect::all_of(to.keep)) %>%
dplyr::arrange(rowname) %>%
tibble::column_to_rownames()
} else{
kin.subset <- NULL
}

#Total samples messages
##total libraries
Expand All @@ -223,19 +235,23 @@ kimma_cleaning <- function(dat=NULL, kin=NULL, patientID="ptID", libraryID="libI
if(patientID %in% colnames(dat.subset$targets)){
pt.no <- dat.subset$targets %>%
dplyr::distinct(get(patientID)) %>% nrow()
message("Input: ", lib.no, " libraries from ", pt.no, " unique patients")
message("Input: ", lib.no, " libraries from ", pt.no, " unique individuals")
} else {
message("Input: ",lib.no, " libraries")
}

## Missing data
## Missing kinship
kin.no <- to.model %>%
dplyr::distinct(get(libraryID)) %>% nrow()
kin.miss <- lib.no-kin.no
if(run_lmerel){
kin.no <- to.model %>%
dplyr::distinct(get(libraryID)) %>% nrow()
kin.miss <- lib.no-kin.no

if(kin.miss > 0 ){
message("- ", kin.miss, " libraries missing kinship")
if(kin.miss > 0){
message("- ", kin.miss, " libraries missing kinship")
}
} else{
kin.miss <- 0
}

##Missing other variables
Expand All @@ -248,9 +264,16 @@ kimma_cleaning <- function(dat=NULL, kin=NULL, patientID="ptID", libraryID="libI
dplyr::distinct() %>%
tidyr::drop_na() %>% nrow()

miss.no <- lib.no-kin.miss-complete
if(miss.no>0){
message("- ", miss.no, " libraries missing fixed effect variable(s)") }
if(run_lmerel){
miss.no <- lib.no-kin.miss-complete
if(miss.no>0){
message("- ", miss.no, " additional libraries missing fixed effect variable(s)") }
} else{
miss.no <- lib.no-complete
if(miss.no>0){
message("- ", miss.no, " libraries missing fixed effect variable(s)") }
}


message("Model: ",lib.no-kin.miss-miss.no, " libraries")
} else{
Expand Down
7 changes: 6 additions & 1 deletion R/kmFit.R
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,11 @@ kmFit <- function(dat=NULL, kin=NULL, patientID="ptID", libraryID="libID",
message("WARNING: To use weights provided in dat$weights or weights, set use_weights = TRUE\n")
}
}
if(run_lme | run_lmerel){
if(!grepl(patientID, model)){
stop("patientID value does not match variable used in model.")
}
}

###### Formulae #####
#Make formulae. as.formula does not work
Expand Down Expand Up @@ -263,7 +268,7 @@ kmFit <- function(dat=NULL, kin=NULL, patientID="ptID", libraryID="libID",
to.model.ls <- kimma_cleaning(dat, kin, patientID, libraryID,
counts, meta, genes, weights,
subset_var, subset_lvl, subset_genes,
model.lm, genotype_name)
model.lm, genotype_name, run_lmerel)

###### Run models ######
#create blank df to hold results
Expand Down
64 changes: 46 additions & 18 deletions R/summarise_kmFit.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@
#' kin = example.kin,
#' run_lme = TRUE, run_lmerel=TRUE, run_contrast=TRUE,
#' subset_genes = c("ENSG00000250479","ENSG00000250510","ENSG00000255823"),
#' model = "~ virus + (1|ptID)")
#' model = "~ virus + asthma + (1|ptID)")
#'
#' # Or extract limma results
#' # design <- model.matrix(~ virus + asthma, data = example.voom$targets)
#' # fit <- limma::eBayes(limma::lmFit(example.voom$E, design))
#' # model_results <- extract_lmFit(design = design, fit = fit)
#'
#' # Summarise results
#' summarise_kmFit(fdr = model_results$lmerel, fdr_cutoff = c(0.01, 0.5),
Expand All @@ -42,9 +47,11 @@ summarise_kmFit <- function(fdr, fdr_cutoff = c(0.05,0.1,0.2,0.3,0.4,0.5),
if(!is.null(p.cutoff)){p_cutoff <- p.cutoff}

if(intercept | "contrast_ref" %in% colnames(fdr)){
fdr.filter <- fdr
fdr.filter <- fdr %>%
dplyr::mutate(variable = as.factor(variable))
} else{
fdr.filter <- dplyr::filter(fdr, variable != '(Intercept)') %>%
dplyr::mutate(variable = as.factor(variable)) %>%
droplevels()
}

Expand All @@ -58,7 +65,10 @@ summarise_kmFit <- function(fdr, fdr_cutoff = c(0.05,0.1,0.2,0.3,0.4,0.5),

if(FCgroup){
fdr.filter.FC <- fdr.filter %>%
dplyr::mutate(FCgroup = ifelse(estimate < 0, "down", "up"))
dplyr::mutate(FCgroup = dplyr::case_when(estimate < 0 ~ "down",
estimate > 0 ~ "up",
TRUE ~ "0"),
FCgroup = as.factor(FCgroup))
#Blank df for results
result <- data.frame()

Expand All @@ -67,15 +77,21 @@ summarise_kmFit <- function(fdr, fdr_cutoff = c(0.05,0.1,0.2,0.3,0.4,0.5),
#Calculate total, nonredundant signif genes at different levels
total.temp <- fdr.filter.FC %>%
dplyr::filter(get(fdr.var) <= FDR.i) %>%
droplevels() %>%
dplyr::distinct(gene, FCgroup) %>%
dplyr::count(FCgroup, .drop = FALSE) %>%
dplyr::mutate(variable = "total (nonredundant)")

#Summarize signif genes per variable at various levels
if("contrast_ref" %in% colnames(fdr.filter.FC)){
group.temp <- fdr.filter.FC %>%
dplyr::mutate(dplyr::across(c(contrast_ref, contrast_lvl),
~as.factor(.))) %>%
dplyr::filter(get(fdr.var) <= FDR.i) %>%
dplyr::count(variable, contrast_ref, contrast_lvl, FCgroup, .drop = FALSE)
dplyr::count(variable, contrast_ref, contrast_lvl, FCgroup, .drop = FALSE) %>%
dplyr::inner_join(dplyr::distinct(fdr.filter.FC,
variable, contrast_ref, contrast_lvl),
by = c("variable", "contrast_ref", "contrast_lvl"))

result.temp <- total.temp %>%
dplyr::bind_rows(group.temp) %>%
Expand All @@ -85,7 +101,8 @@ summarise_kmFit <- function(fdr, fdr_cutoff = c(0.05,0.1,0.2,0.3,0.4,0.5),
} else{
group.temp <- fdr.filter.FC %>%
dplyr::filter(get(fdr.var) <= FDR.i) %>%
dplyr::count(variable, FCgroup, .drop = FALSE)
dplyr::count(variable, FCgroup, .drop = FALSE) %>%
dplyr::filter(FCgroup != "0" | n > 0)

result.temp <- dplyr::bind_rows(total.temp, group.temp) %>%
dplyr::mutate(group = name.fdr)
Expand All @@ -102,15 +119,21 @@ summarise_kmFit <- function(fdr, fdr_cutoff = c(0.05,0.1,0.2,0.3,0.4,0.5),
#Calculate total, nonredundant signif genes at different levels
total.temp <- fdr.filter %>%
dplyr::filter(get(fdr.var) <= FDR.i) %>%
droplevels() %>%
dplyr::distinct(gene) %>%
dplyr::mutate(variable = "total (nonredundant)") %>%
dplyr::count(variable, .drop = FALSE)

#Summarize signif genes per variable at various levels
if("contrast_ref" %in% colnames(fdr.filter)){
group.temp <- fdr.filter %>%
dplyr::mutate(dplyr::across(c(contrast_ref, contrast_lvl),
~as.factor(.))) %>%
dplyr::filter(get(fdr.var) <= FDR.i) %>%
dplyr::count(variable, contrast_ref, contrast_lvl, .drop = FALSE)
dplyr::count(variable, contrast_ref, contrast_lvl, .drop = FALSE) %>%
dplyr::inner_join(dplyr::distinct(fdr.filter,
variable, contrast_ref, contrast_lvl),
by = c("variable", "contrast_ref", "contrast_lvl"))

result.temp <- total.temp %>%
dplyr::bind_rows(group.temp) %>%
Expand All @@ -131,27 +154,32 @@ summarise_kmFit <- function(fdr, fdr_cutoff = c(0.05,0.1,0.2,0.3,0.4,0.5),
}

#Stop if not signif
if(nrow(result) == 0){
if(sum(result$n) == 0){
stop("No significant genes. Please increase FDR or P-value cutoffs.")
}

#Format to wide output
result.format <- tidyr::pivot_wider(result, names_from = group,
values_from = n) %>%
dplyr::mutate(variable = forcats::fct_relevel(factor(variable),
"total (nonredundant)",
after=Inf)) %>%
dplyr::arrange(variable)

#rename for P-value if specified
if(!is.null(p_cutoff)) {
result.format <- result.format %>%
dplyr::rename_all(~gsub("fdr_","p_",.))
}
values_from = n) %>%
dplyr::mutate(variable = forcats::fct_relevel(factor(variable),
"total (nonredundant)",
after=Inf)) %>%
dplyr::arrange(variable) %>%
dplyr::mutate(dplyr::across(dplyr::contains("fdr"),
~tidyr::replace_na(., 0))) %>%
dplyr::rename_all(~gsub("fdr_","fdr<",.))

#rename for P-value if specified
if(!is.null(p_cutoff)) {
result.format <- result.format %>%
dplyr::rename_all(~gsub("fdr_","p_",.))
}

return(result.format)
}

#' @rdname summarise_kmFit
#' @export
summarize_kmFit <- summarise_kmFit
summarise_lmFit <- summarise_kmFit
summarize_lmFit <- summarise_kmFit
Loading

0 comments on commit da62c3c

Please sign in to comment.