Skip to content

Commit 08bd673

Browse files
authored
Merge pull request #246 from stemangiola/dev
improve docs for deconvolution
2 parents 9d162fe + 7fd7953 commit 08bd673

File tree

7 files changed

+80
-39
lines changed

7 files changed

+80
-39
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: tidybulk
33
Title: Brings transcriptomics to the tidyverse
4-
Version: 1.9.1
4+
Version: 1.9.2
55
Authors@R: c(person("Stefano", "Mangiola", email = "[email protected]",
66
role = c("aut", "cre")),
77
person("Maria", "Doyle", email = "[email protected]",

R/functions.R

+3
Original file line numberDiff line numberDiff line change
@@ -2917,6 +2917,9 @@ run_epic = function(mix, reference = NULL) {
29172917
)
29182918
} else { Y <- mix }
29192919

2920+
# Check if it is not matrix or data.frame, for example DelayedMatrix
2921+
if(!is(Y, "matrix") & !is(Y, "data.frame"))
2922+
Y = as.matrix(Y)
29202923

29212924
results <- EPIC(Y, reference = reference)$cellFractions %>% data.frame()
29222925
#results[results < 0] <- 0

R/methods.R

+43-15
Original file line numberDiff line numberDiff line change
@@ -175,28 +175,56 @@ setGeneric("as_SummarizedExperiment", function(.data,
175175
feature_cols = col_direction$vertical_cols
176176
counts_cols = col_direction$counts_cols
177177

178-
colData = .data %>% select(!!.sample, sample_cols) %>% distinct %>% arrange(!!.sample) %>% {
179-
S4Vectors::DataFrame((.) %>% select(-!!.sample),
180-
row.names = (.) %>% pull(!!.sample))
178+
colData =
179+
.data %>%
180+
select(!!.sample, sample_cols) %>%
181+
distinct() %>%
182+
183+
# Unite if multiple sample columns
184+
tidyr::unite(!!sample__$name, !!.sample, remove = FALSE, sep = "___") |>
185+
186+
arrange(!!sample__$symbol) %>% {
187+
S4Vectors::DataFrame(
188+
(.) %>% select(-!!sample__$symbol),
189+
row.names = (.) %>% pull(!!sample__$symbol)
190+
)
181191
}
182192

183-
rowData = .data %>% select(!!.transcript, feature_cols) %>% distinct %>% arrange(!!.transcript) %>% {
184-
S4Vectors::DataFrame((.) %>% select(-!!.transcript),
185-
row.names = (.) %>% pull(!!.transcript))
193+
rowData =
194+
.data %>%
195+
select(!!.transcript, feature_cols) %>%
196+
distinct() %>%
197+
198+
# Unite if multiple sample columns
199+
tidyr::unite(!!feature__$name, !!.transcript, remove = FALSE, sep = "___") |>
200+
201+
arrange(!!feature__$symbol) %>% {
202+
S4Vectors::DataFrame(
203+
(.) %>% select(-!!feature__$symbol),
204+
row.names = (.) %>% pull(!!feature__$symbol)
205+
)
186206
}
187207

188208
my_assays =
189209
.data %>%
190-
select(!!.sample,
191-
!!.transcript,
192-
!!.abundance,
193-
!!.abundance_scaled,
194-
counts_cols) %>%
195-
distinct() %>%
196-
pivot_longer( cols=-c(!!.transcript,!!.sample), names_to="assay", values_to= ".a") %>%
210+
211+
# Unite if multiple sample columns
212+
tidyr::unite(!!sample__$name, !!.sample, remove = FALSE, sep = "___") |>
213+
214+
# Unite if multiple sample columns
215+
tidyr::unite(!!feature__$name, !!.transcript, remove = FALSE, sep = "___") |>
216+
217+
select(!!sample__$symbol,
218+
!!feature__$symbol,
219+
!!.abundance,
220+
!!.abundance_scaled,
221+
counts_cols) %>%
222+
distinct() %>%
223+
224+
pivot_longer( cols=-c(!!feature__$symbol,!!sample__$symbol), names_to="assay", values_to= ".a") %>%
197225
nest(`data` = -`assay`) %>%
198226
mutate(`data` = `data` %>% map(
199-
~ .x %>% spread(!!.sample, .a) %>% as_matrix(rownames = quo_name(.transcript))
227+
~ .x %>% spread(!!sample__$symbol, .a) %>% as_matrix(rownames = feature__$name)
200228
))
201229

202230
# Build the object
@@ -1737,7 +1765,7 @@ setMethod("aggregate_duplicates", "tidybulk", .aggregate_duplicates)
17371765
#' @param .sample The name of the sample column
17381766
#' @param .transcript The name of the transcript/gene column
17391767
#' @param .abundance The name of the transcript/gene abundance column
1740-
#' @param reference A data frame. A rectangular dataframe with genes as rows names, cell types as column names and gene-transcript abundance as values. The transcript/cell_type data frame of integer transcript abundance. If NULL, the default reference for each algorithm will be used. For llsr will be LM22.
1768+
#' @param reference A data frame. The methods cibersort and llsr can accept a custom rectangular dataframe with genes as rows names, cell types as column names and gene-transcript abundance as values. For exampler tidybulk::X_cibersort. The transcript/cell_type data frame of integer transcript abundance. If NULL, the default reference for each algorithm will be used. For llsr will be LM22.
17411769
#' @param method A character string. The method to be used. At the moment Cibersort (default), epic and llsr (linear least squares regression) are available.
17421770
#' @param prefix A character string. The prefix you would like to add to the result columns. It is useful if you want to reshape data.
17431771
#' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).

R/methods_SE.R

+6-2
Original file line numberDiff line numberDiff line change
@@ -825,6 +825,9 @@ setMethod("adjust_abundance",
825825
rownames(.x) = rowData(.data)[,quo_name(.transcript)]
826826

827827
# Combine
828+
if(rownames(.x) |> is.na() |> which() |> length() |> gt(0))
829+
stop(sprintf("tidybulk says: you have some %s that are NAs", quo_name(.transcript)))
830+
828831
.x = combineByRow(.x, aggregation_function)
829832
.x = .x[match(new_row_data[,quo_name(.transcript)], rownames(.x)),,drop=FALSE]
830833
rownames(.x) = rownames(new_row_data)
@@ -931,6 +934,7 @@ setMethod("aggregate_duplicates",
931934
...) {
932935

933936
.transcript = enquo(.transcript)
937+
.sample = s_(.data)$symbol
934938

935939
my_assay =
936940
.data %>%
@@ -1043,15 +1047,15 @@ setMethod("aggregate_duplicates",
10431047

10441048
# Parse results and return
10451049
setNames(c(
1046-
"sample",
1050+
quo_name(.sample),
10471051
(.) %>% select(-1) %>% colnames() %>% sprintf("%s%s", prefix, .)
10481052

10491053
))
10501054

10511055
# Att proportions
10521056
colData(.data) = colData(.data) %>% cbind(
10531057
my_proportions %>%
1054-
as_matrix(rownames = "sample") %>%
1058+
as_matrix(rownames = .sample) %>%
10551059
.[match(rownames(colData(.data)), rownames(.)),]
10561060
)
10571061

R/utilities.R

+14-11
Original file line numberDiff line numberDiff line change
@@ -463,17 +463,17 @@ add_class = function(var, name) {
463463
#' @return A list of column enquo or error
464464
get_sample_transcript_counts = function(.data, .sample, .transcript, .abundance){
465465

466-
if( .sample %>% quo_is_symbol() ) .sample = .sample
466+
if( quo_is_symbolic(.sample) ) .sample = .sample
467467
else if(".sample" %in% (.data %>% get_tt_columns() %>% names))
468468
.sample = get_tt_columns(.data)$.sample
469469
else my_stop()
470470

471-
if( .transcript %>% quo_is_symbol() ) .transcript = .transcript
471+
if( quo_is_symbolic(.transcript) ) .transcript = .transcript
472472
else if(".transcript" %in% (.data %>% get_tt_columns() %>% names))
473473
.transcript = get_tt_columns(.data)$.transcript
474474
else my_stop()
475475

476-
if( .abundance %>% quo_is_symbolic() ) .abundance = .abundance
476+
if( quo_is_symbolic(.abundance) ) .abundance = .abundance
477477
else if(".abundance" %in% (.data %>% get_tt_columns() %>% names))
478478
.abundance = get_tt_columns(.data)$.abundance
479479
else my_stop()
@@ -894,8 +894,8 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance,
894894
.abundance_scaled = enquo(.abundance_scaled)
895895

896896
# x-annotation df
897-
n_x = .data %>% distinct(!!.horizontal) %>% nrow
898-
n_y = .data %>% distinct(!!.vertical) %>% nrow
897+
n_x = .data %>% select(!!.horizontal) |> distinct() |> nrow()
898+
n_y = .data %>% select(!!.vertical) |> distinct() |> nrow()
899899

900900
# Sample wise columns
901901
horizontal_cols=
@@ -907,8 +907,9 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance,
907907
.x %>%
908908
when(
909909
.data %>%
910-
distinct(!!.horizontal, !!as.symbol(.x)) %>%
911-
nrow %>%
910+
select(!!.horizontal, !!as.symbol(.x)) %>%
911+
distinct() |>
912+
nrow() %>%
912913
equals(n_x) ~ .x,
913914
~ NULL
914915
)
@@ -928,8 +929,9 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance,
928929
.x %>%
929930
ifelse_pipe(
930931
.data %>%
931-
distinct(!!.vertical, !!as.symbol(.x)) %>%
932-
nrow %>%
932+
select(!!.vertical, !!as.symbol(.x)) |>
933+
distinct() |>
934+
nrow() %>%
933935
equals(n_y),
934936
~ .x,
935937
~ NULL
@@ -963,8 +965,9 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance,
963965
.x %>%
964966
ifelse_pipe(
965967
.data %>%
966-
distinct(!!.vertical, !!.horizontal, !!as.symbol(.x)) %>%
967-
nrow %>%
968+
select(!!.vertical, !!.horizontal, !!as.symbol(.x)) %>%
969+
distinct() |>
970+
nrow() %>%
968971
equals(n_x * n_y),
969972
~ .x,
970973
~ NULL

man/deconvolve_cellularity-methods.Rd

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-bulk_methods.R

+12-9
Original file line numberDiff line numberDiff line change
@@ -1940,6 +1940,17 @@ test_that("gene over representation",{
19401940

19411941
})
19421942

1943+
test_that("as_SummarizedExperiment",{
1944+
input_df |>
1945+
as_SummarizedExperiment(
1946+
.sample = c(a, condition),
1947+
.transcript = c(b, entrez),
1948+
.abundance = c
1949+
) |>
1950+
nrow() |>
1951+
expect_equal(527)
1952+
1953+
})
19431954

19441955
# test_that("bibliography",{
19451956
#
@@ -1954,12 +1965,4 @@ test_that("gene over representation",{
19541965
#
19551966
# })
19561967
#
1957-
# test_that("as_SummarizedExperiment",{
1958-
# input_df |>
1959-
# as_SummarizedExperiment(
1960-
# .sample = a,
1961-
# .transcript = b,
1962-
# .abundance = c
1963-
# )
1964-
#
1965-
# })
1968+

0 commit comments

Comments
 (0)