diff --git a/DESCRIPTION b/DESCRIPTION index 5eb6c8a..4d98642 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: categoryCompare2 -Version: 0.200.3 +Version: 0.200.4 Title: Meta-Analysis of High-Throughput Experiments Using Feature Annotations Authors@R: c( diff --git a/R/managing_annotations.R b/R/managing_annotations.R index 1f68eb9..6336275 100644 --- a/R/managing_annotations.R +++ b/R/managing_annotations.R @@ -1,19 +1,19 @@ #' orgdb annotations -#' +#' #' Generate an annotation object for genes based on an "org.*.db" object, and pulling #' information from it. -#' +#' #' @param orgdb the name of the org.*.db object #' @param features which features to get annotations for #' @param feature_type which type of IDs to map (see details) #' @param annotation_type the type of annotation to grab (see details) -#' +#' #' @md -#' +#' #' @details This function generates a `categoryCompare2` annotation object #' from a Bioconductor "org.*.db" object. Even though different gene identifiers can #' be used, almost all of the mappings are via ENTREZID. -#' +#' #' The set of feature or gene keys that can be used to create the annotations include: #' * ENTREZID: ENTREZ gene ids #' * ACCNUM: genbank accession numbers @@ -25,7 +25,7 @@ #' * REFSEQ: reference sequence IDs, NM, NP, NR, XP, etc #' * UNIGENE: gene ids from UNIPROT eg Hs.88556 #' * UNIPROT: protein ids from UNIPROT eg P80404 -#' +#' #' The set of annotations that can be mapped to features include: #' * GO: annotations from gene ontology #' * PATH: KEGG Pathway identifiers (not updated since 2011!) @@ -35,54 +35,72 @@ #' * PROSITE #' * PFAM: protein family identifiers #' * IPI: protein-protein interactions -#' +#' #' For GO annotations, it is also possible to pass `GO` to use all 3 sub-ontologies simultaneously, #' or any combination of `BP`, `MF`, and `CC`. -#' +#' #' @export #' @return annotation object -#' -get_db_annotation <- function(orgdb = "org.Hs.eg.db", features = NULL, feature_type = "ENTREZID", - annotation_type = "GO"){ +#' +get_db_annotation <- function( + orgdb = "org.Hs.eg.db", + features = NULL, + feature_type = "ENTREZID", + annotation_type = "GO" +) { go_types <- c("GO", "BP", "MF", "CC") go_sub <- c("BP", "MF", "CC") check_package_installed(orgdb) suppressPackageStartupMessages(library(orgdb, character.only = TRUE)) - + annotation_src <- eval(parse(text = orgdb)) annotation_columns <- AnnotationDbi::columns(annotation_src) annotation_keytypes <- AnnotationDbi::keytypes(annotation_src) - + if (!(annotation_type %in% c(go_types, annotation_columns))) { stop("Unknown annotation type!") } - + if (!(feature_type %in% annotation_keytypes)) { stop("Unknown feature_type!") } - + if (is.null(features)) { features <- AnnotationDbi::keys(annotation_src, feature_type) } - + if (annotation_type %in% go_types) { check_package_installed("GO.db") requireNamespace("GO.db") godb = GO.db::GO.db - feature_ann_map <- suppressMessages(AnnotationDbi::select(annotation_src, keys = features, - keytype = feature_type, - columns = "GOALL")) - + feature_ann_map <- suppressMessages(AnnotationDbi::select( + annotation_src, + keys = features, + keytype = feature_type, + columns = "GOALL" + )) + if (annotation_type %in% go_sub) { - feature_ann_map <- feature_ann_map[feature_ann_map$ONTOLOGYALL %in% annotation_type, ] + feature_ann_map <- feature_ann_map[ + feature_ann_map$ONTOLOGYALL %in% annotation_type, + ] } - - - ann_feature_list <- split(feature_ann_map[[feature_type]], feature_ann_map[["GOALL"]]) + + ann_feature_list <- split( + feature_ann_map[[feature_type]], + feature_ann_map[["GOALL"]] + ) ann_feature_list <- lapply(ann_feature_list, unique) - ann_description <- suppressMessages(AnnotationDbi::select(godb, keys = names(ann_feature_list), columns = "TERM", keytype = "GOID")$TERM) + ann_description <- suppressMessages( + AnnotationDbi::select( + godb, + keys = names(ann_feature_list), + columns = "TERM", + keytype = "GOID" + )$TERM + ) names(ann_description) <- names(ann_feature_list) - + if (annotation_type %in% "GO") { go_ontology_map <- unique(feature_ann_map[, c("GOALL", "ONTOLOGYALL")]) go_ontology <- go_ontology_map$ONTOLOGYALL @@ -91,42 +109,51 @@ get_db_annotation <- function(orgdb = "org.Hs.eg.db", features = NULL, feature_t ann_description <- paste0(go_ontology, ":", ann_description) names(ann_description) <- names(go_ontology) } - - - annotation_obj <- categoryCompare2::annotation(annotation_features = ann_feature_list, - description = ann_description, - annotation_type = annotation_type, - feature_type = feature_type) + + annotation_obj <- categoryCompare2::annotation( + annotation_features = ann_feature_list, + description = ann_description, + annotation_type = annotation_type, + feature_type = feature_type + ) } else { - feature_ann_map <- suppressMessages(AnnotationDbi::select(annotation_src, keys = features, - keytype = feature_type, - columns = annotation_type)) - ann_feature_list <- split(feature_ann_map[[feature_type]], feature_ann_map[[annotation_type]]) + feature_ann_map <- suppressMessages(AnnotationDbi::select( + annotation_src, + keys = features, + keytype = feature_type, + columns = annotation_type + )) + ann_feature_list <- split( + feature_ann_map[[feature_type]], + feature_ann_map[[annotation_type]] + ) ann_feature_list <- lapply(ann_feature_list, unique) - - annotation_obj <- categoryCompare2::annotation(annotation_features = ann_feature_list, - annotation_type = annotation_type, - feature_type = feature_type) + + annotation_obj <- categoryCompare2::annotation( + annotation_features = ann_feature_list, + annotation_type = annotation_type, + feature_type = feature_type + ) } - + annotation_obj } #' annotation to json -#' +#' #' Given a `categoryCompare2` annotation object, generate a JSON representation #' that can be used with the command line executable -#' +#' #' @param annotation_obj the annotation object #' @param json_file the file to save it to -#' +#' #' @return the json string (invisibly) #' @export -annotation_2_json <- function(annotation_obj, json_file = NULL){ - obj_list <- purrr::map(slotNames(annotation_obj), function(x){ +annotation_2_json <- function(annotation_obj, json_file = NULL) { + obj_list <- purrr::map(slotNames(annotation_obj), function(x) { tmp_data <- slot(annotation_obj, x) - + if (length(tmp_data) != 0) { if (x %in% c("description", "links")) { out_data <- as.list(tmp_data) @@ -139,17 +166,17 @@ annotation_2_json <- function(annotation_obj, json_file = NULL){ out_data }) names(obj_list) <- slotNames(annotation_obj) - + obj_list <- obj_list[!purrr::map_lgl(obj_list, is.null)] - - obj_json <- jsonlite::toJSON(obj_list, pretty = TRUE, auto_unbox = TRUE) + + obj_json <- jsonlite::toJSON(obj_list, pretty = TRUE, auto_unbox = FALSE) if (!is.null(json_file)) { cat(obj_json, file = json_file, sep = "\n") - } + } invisible(obj_json) } -replace_null <- function(x){ +replace_null <- function(x) { if (is.null(x)) { NA } else { @@ -158,26 +185,32 @@ replace_null <- function(x){ } #' json to annotation -#' +#' #' Given a JSON based annotation object, read it in and create the `annotation` -#' for actually doing enrichment. -#' +#' for actually doing enrichment. +#' #' @param json_file the json annotation file -#' +#' #' @return annotation object #' @export -json_2_annotation <- function(json_file){ +json_2_annotation <- function(json_file) { stopifnot(file.exists(json_file)) annotation_list <- jsonlite::fromJSON(json_file, simplifyVector = TRUE) #print(names(annotation_list)) - + if (is.null(names(annotation_list))) { annotation_list <- annotation_list[[1]] } - + if (!is.null(annotation_list$description)) { - annotation_list$description <- purrr::map(annotation_list$description, replace_null) - annotation_list$description <- unlist(annotation_list$description, use.names = TRUE) + annotation_list$description <- purrr::map( + annotation_list$description, + replace_null + ) + annotation_list$description <- unlist( + annotation_list$description, + use.names = TRUE + ) } else { annotation_list$description <- character(0) } @@ -189,44 +222,54 @@ json_2_annotation <- function(json_file){ #print(names(annotation_list)) #print(annotation_list$description) - annotation(annotation_features = annotation_list$annotation_features, - annotation_type = annotation_list$annotation_type, - description = annotation_list$description, - links = annotation_list$links, - feature_type = annotation_list$feature_type) + annotation( + annotation_features = annotation_list$annotation_features, + annotation_type = annotation_list$annotation_type, + description = annotation_list$description, + links = annotation_list$links, + feature_type = annotation_list$feature_type + ) } #' annotation reversal -#' +#' #' Given a JSON file of features to annotations, reverse to turn it into #' annotations to features, and optionally add some meta-information about them. -#' +#' #' @param json_file the json file to use #' @param out_file the json file to write out to #' @param feature_type the type of features #' @param annotation_type the type of annotations -#' +#' #' @importFrom jsonlite fromJSON toJSON #' @export #' @return the json object, invisibly -#' -json_annotation_reversal <- function(json_file, out_file = "annotations.json", - feature_type = NULL, annotation_type = NULL){ +#' +json_annotation_reversal <- function( + json_file, + out_file = "annotations.json", + feature_type = NULL, + annotation_type = NULL +) { stopifnot(file.exists(json_file)) - - in_annotation <- jsonlite::fromJSON(json_file, simplifyVector = FALSE, flatten = TRUE) + + in_annotation <- jsonlite::fromJSON( + json_file, + simplifyVector = FALSE, + flatten = TRUE + ) if (length(in_annotation) == 1) { in_annotation <- in_annotation[[1]] } - + if (!is.null(in_annotation$Annotations)) { gene_annotations <- in_annotation$Annotations } else { gene_annotations <- in_annotation # we assume that if there is no Annotation - # specific entry, then it is probably just the - # gene annotations, and grab them all. + # specific entry, then it is probably just the + # gene annotations, and grab them all. } - + if (!is.null(in_annotation$Description)) { annotation_description <- in_annotation$Description if (is.list(annotation_description)) { @@ -234,20 +277,22 @@ json_annotation_reversal <- function(json_file, out_file = "annotations.json", } else { warning("Description must be a named list! Removing Descriptions!") annotation_description <- character(0) - } + } } else { annotation_description <- character(0) } - + rev_annotation <- Biobase::reverseSplit(gene_annotations) rev_annotation <- purrr::map(rev_annotation, unique) - - out_annotation <- annotation(annotation_features = rev_annotation, - description = annotation_description, - links = character(0), - annotation_type = annotation_type, - feature_type = feature_type) - + + out_annotation <- annotation( + annotation_features = rev_annotation, + description = annotation_description, + links = character(0), + annotation_type = annotation_type, + feature_type = feature_type + ) + out_json <- annotation_2_json(out_annotation, out_file) out_json } diff --git a/exec/feature_files_2_json.R b/exec/feature_files_2_json.R index 8b6daa0..cccc673 100755 --- a/exec/feature_files_2_json.R +++ b/exec/feature_files_2_json.R @@ -128,4 +128,7 @@ if (!("universe" %in% names(file_args))) { feature_lists <- get_significant_feature_lists(file_args) } -cat(jsonlite::toJSON(feature_lists, pretty = TRUE), file = script_options$json) +cat( + jsonlite::toJSON(feature_lists, pretty = TRUE, auto_unbox = FALSE), + file = script_options$json +) diff --git a/tests/testthat/test-gocats.R b/tests/testthat/test-gocats.R index bbc02f4..bc66c64 100644 --- a/tests/testthat/test-gocats.R +++ b/tests/testthat/test-gocats.R @@ -1,17 +1,18 @@ -test_that("gocats annotation importing works", { - ancestors_file = system.file( - "extdata", - "test_data", - "ancestors.json.gz", - package = "categoryCompare2" - ) - namespace_file = system.file( - "extdata", - "test_data", - "namespace.json.gz", - package = "categoryCompare2" - ) +ancestors_file = system.file( + "extdata", + "test_data", + "ancestors.json.gz", + package = "categoryCompare2" +) +namespace_file = system.file( + "extdata", + "test_data", + "namespace.json.gz", + package = "categoryCompare2" +) + +test_that("gocats annotation importing works", { ensembl_keys = AnnotationDbi::keys( org.Hs.eg.db::org.Hs.eg.db, keytype = "ENSEMBL" @@ -83,3 +84,29 @@ test_that("gocats annotation importing works", { expect_equal(with_translation@annotation_type, "whatever") expect_equal(with_translation@feature_type, "ENSEMBL") }) + +test_that("json exporting and importing works properly", { + without_namespace = gocats_to_annotation( + ancestors_file, + namespace_file = NULL + ) + withr::with_file("test_all.json", { + annotation_2_json(without_namespace, json_file = "test_all.json") + in_annotation = json_2_annotation("test_all.json") + expect_equal(without_namespace, in_annotation) + }) + + withr::with_file("test_small.json", { + without_namespace2 = without_namespace + without_namespace2@annotation_features[[ + 1 + ]] = without_namespace@annotation_features[[1]][1] + annotation_2_json(without_namespace2, json_file = "test_small.json") + line_data = base::readLines("test_small.json", 20) + expect_true(grepl("\\[.*\\]", line_data[3])) + + without_namespace2@counts[1] = 1 + in_annotation = json_2_annotation("test_small.json") + expect_equal(without_namespace2, in_annotation) + }) +})