Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: categoryCompare2
Version: 0.200.3
Version: 0.200.4
Title: Meta-Analysis of High-Throughput Experiments Using Feature
Annotations
Authors@R: c(
Expand Down
219 changes: 132 additions & 87 deletions R/managing_annotations.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
#' orgdb annotations
#'
#'
#' Generate an annotation object for genes based on an "org.*.db" object, and pulling
#' information from it.
#'
#'
#' @param orgdb the name of the org.*.db object
#' @param features which features to get annotations for
#' @param feature_type which type of IDs to map (see details)
#' @param annotation_type the type of annotation to grab (see details)
#'
#'
#' @md
#'
#'
#' @details This function generates a `categoryCompare2` annotation object
#' from a Bioconductor "org.*.db" object. Even though different gene identifiers can
#' be used, almost all of the mappings are via ENTREZID.
#'
#'
#' The set of feature or gene keys that can be used to create the annotations include:
#' * ENTREZID: ENTREZ gene ids
#' * ACCNUM: genbank accession numbers
Expand All @@ -25,7 +25,7 @@
#' * REFSEQ: reference sequence IDs, NM, NP, NR, XP, etc
#' * UNIGENE: gene ids from UNIPROT eg Hs.88556
#' * UNIPROT: protein ids from UNIPROT eg P80404
#'
#'
#' The set of annotations that can be mapped to features include:
#' * GO: annotations from gene ontology
#' * PATH: KEGG Pathway identifiers (not updated since 2011!)
Expand All @@ -35,54 +35,72 @@
#' * PROSITE
#' * PFAM: protein family identifiers
#' * IPI: protein-protein interactions
#'
#'
#' For GO annotations, it is also possible to pass `GO` to use all 3 sub-ontologies simultaneously,
#' or any combination of `BP`, `MF`, and `CC`.
#'
#'
#' @export
#' @return annotation object
#'
get_db_annotation <- function(orgdb = "org.Hs.eg.db", features = NULL, feature_type = "ENTREZID",
annotation_type = "GO"){
#'
get_db_annotation <- function(
orgdb = "org.Hs.eg.db",
features = NULL,
feature_type = "ENTREZID",
annotation_type = "GO"
) {
go_types <- c("GO", "BP", "MF", "CC")
go_sub <- c("BP", "MF", "CC")
check_package_installed(orgdb)
suppressPackageStartupMessages(library(orgdb, character.only = TRUE))

annotation_src <- eval(parse(text = orgdb))
annotation_columns <- AnnotationDbi::columns(annotation_src)
annotation_keytypes <- AnnotationDbi::keytypes(annotation_src)

if (!(annotation_type %in% c(go_types, annotation_columns))) {
stop("Unknown annotation type!")
}

if (!(feature_type %in% annotation_keytypes)) {
stop("Unknown feature_type!")
}

if (is.null(features)) {
features <- AnnotationDbi::keys(annotation_src, feature_type)
}

if (annotation_type %in% go_types) {
check_package_installed("GO.db")
requireNamespace("GO.db")
godb = GO.db::GO.db
feature_ann_map <- suppressMessages(AnnotationDbi::select(annotation_src, keys = features,
keytype = feature_type,
columns = "GOALL"))

feature_ann_map <- suppressMessages(AnnotationDbi::select(
annotation_src,
keys = features,
keytype = feature_type,
columns = "GOALL"
))

if (annotation_type %in% go_sub) {
feature_ann_map <- feature_ann_map[feature_ann_map$ONTOLOGYALL %in% annotation_type, ]
feature_ann_map <- feature_ann_map[
feature_ann_map$ONTOLOGYALL %in% annotation_type,
]
}


ann_feature_list <- split(feature_ann_map[[feature_type]], feature_ann_map[["GOALL"]])

ann_feature_list <- split(
feature_ann_map[[feature_type]],
feature_ann_map[["GOALL"]]
)
ann_feature_list <- lapply(ann_feature_list, unique)
ann_description <- suppressMessages(AnnotationDbi::select(godb, keys = names(ann_feature_list), columns = "TERM", keytype = "GOID")$TERM)
ann_description <- suppressMessages(
AnnotationDbi::select(
godb,
keys = names(ann_feature_list),
columns = "TERM",
keytype = "GOID"
)$TERM
)
names(ann_description) <- names(ann_feature_list)

if (annotation_type %in% "GO") {
go_ontology_map <- unique(feature_ann_map[, c("GOALL", "ONTOLOGYALL")])
go_ontology <- go_ontology_map$ONTOLOGYALL
Expand All @@ -91,42 +109,51 @@ get_db_annotation <- function(orgdb = "org.Hs.eg.db", features = NULL, feature_t
ann_description <- paste0(go_ontology, ":", ann_description)
names(ann_description) <- names(go_ontology)
}


annotation_obj <- categoryCompare2::annotation(annotation_features = ann_feature_list,
description = ann_description,
annotation_type = annotation_type,
feature_type = feature_type)

annotation_obj <- categoryCompare2::annotation(
annotation_features = ann_feature_list,
description = ann_description,
annotation_type = annotation_type,
feature_type = feature_type
)
} else {
feature_ann_map <- suppressMessages(AnnotationDbi::select(annotation_src, keys = features,
keytype = feature_type,
columns = annotation_type))
ann_feature_list <- split(feature_ann_map[[feature_type]], feature_ann_map[[annotation_type]])
feature_ann_map <- suppressMessages(AnnotationDbi::select(
annotation_src,
keys = features,
keytype = feature_type,
columns = annotation_type
))
ann_feature_list <- split(
feature_ann_map[[feature_type]],
feature_ann_map[[annotation_type]]
)
ann_feature_list <- lapply(ann_feature_list, unique)

annotation_obj <- categoryCompare2::annotation(annotation_features = ann_feature_list,
annotation_type = annotation_type,
feature_type = feature_type)

annotation_obj <- categoryCompare2::annotation(
annotation_features = ann_feature_list,
annotation_type = annotation_type,
feature_type = feature_type
)
}

annotation_obj
}


#' annotation to json
#'
#'
#' Given a `categoryCompare2` annotation object, generate a JSON representation
#' that can be used with the command line executable
#'
#'
#' @param annotation_obj the annotation object
#' @param json_file the file to save it to
#'
#'
#' @return the json string (invisibly)
#' @export
annotation_2_json <- function(annotation_obj, json_file = NULL){
obj_list <- purrr::map(slotNames(annotation_obj), function(x){
annotation_2_json <- function(annotation_obj, json_file = NULL) {
obj_list <- purrr::map(slotNames(annotation_obj), function(x) {
tmp_data <- slot(annotation_obj, x)

if (length(tmp_data) != 0) {
if (x %in% c("description", "links")) {
out_data <- as.list(tmp_data)
Expand All @@ -139,17 +166,17 @@ annotation_2_json <- function(annotation_obj, json_file = NULL){
out_data
})
names(obj_list) <- slotNames(annotation_obj)

obj_list <- obj_list[!purrr::map_lgl(obj_list, is.null)]
obj_json <- jsonlite::toJSON(obj_list, pretty = TRUE, auto_unbox = TRUE)

obj_json <- jsonlite::toJSON(obj_list, pretty = TRUE, auto_unbox = FALSE)
if (!is.null(json_file)) {
cat(obj_json, file = json_file, sep = "\n")
}
}
invisible(obj_json)
}

replace_null <- function(x){
replace_null <- function(x) {
if (is.null(x)) {
NA
} else {
Expand All @@ -158,26 +185,32 @@ replace_null <- function(x){
}

#' json to annotation
#'
#'
#' Given a JSON based annotation object, read it in and create the `annotation`
#' for actually doing enrichment.
#'
#' for actually doing enrichment.
#'
#' @param json_file the json annotation file
#'
#'
#' @return annotation object
#' @export
json_2_annotation <- function(json_file){
json_2_annotation <- function(json_file) {
stopifnot(file.exists(json_file))
annotation_list <- jsonlite::fromJSON(json_file, simplifyVector = TRUE)
#print(names(annotation_list))

if (is.null(names(annotation_list))) {
annotation_list <- annotation_list[[1]]
}

if (!is.null(annotation_list$description)) {
annotation_list$description <- purrr::map(annotation_list$description, replace_null)
annotation_list$description <- unlist(annotation_list$description, use.names = TRUE)
annotation_list$description <- purrr::map(
annotation_list$description,
replace_null
)
annotation_list$description <- unlist(
annotation_list$description,
use.names = TRUE
)
} else {
annotation_list$description <- character(0)
}
Expand All @@ -189,65 +222,77 @@ json_2_annotation <- function(json_file){

#print(names(annotation_list))
#print(annotation_list$description)
annotation(annotation_features = annotation_list$annotation_features,
annotation_type = annotation_list$annotation_type,
description = annotation_list$description,
links = annotation_list$links,
feature_type = annotation_list$feature_type)
annotation(
annotation_features = annotation_list$annotation_features,
annotation_type = annotation_list$annotation_type,
description = annotation_list$description,
links = annotation_list$links,
feature_type = annotation_list$feature_type
)
}

#' annotation reversal
#'
#'
#' Given a JSON file of features to annotations, reverse to turn it into
#' annotations to features, and optionally add some meta-information about them.
#'
#'
#' @param json_file the json file to use
#' @param out_file the json file to write out to
#' @param feature_type the type of features
#' @param annotation_type the type of annotations
#'
#'
#' @importFrom jsonlite fromJSON toJSON
#' @export
#' @return the json object, invisibly
#'
json_annotation_reversal <- function(json_file, out_file = "annotations.json",
feature_type = NULL, annotation_type = NULL){
#'
json_annotation_reversal <- function(
json_file,
out_file = "annotations.json",
feature_type = NULL,
annotation_type = NULL
) {
stopifnot(file.exists(json_file))

in_annotation <- jsonlite::fromJSON(json_file, simplifyVector = FALSE, flatten = TRUE)

in_annotation <- jsonlite::fromJSON(
json_file,
simplifyVector = FALSE,
flatten = TRUE
)
if (length(in_annotation) == 1) {
in_annotation <- in_annotation[[1]]
}

if (!is.null(in_annotation$Annotations)) {
gene_annotations <- in_annotation$Annotations
} else {
gene_annotations <- in_annotation # we assume that if there is no Annotation
# specific entry, then it is probably just the
# gene annotations, and grab them all.
# specific entry, then it is probably just the
# gene annotations, and grab them all.
}

if (!is.null(in_annotation$Description)) {
annotation_description <- in_annotation$Description
if (is.list(annotation_description)) {
annotation_description <- unlist(annotation_description, use.names = TRUE)
} else {
warning("Description must be a named list! Removing Descriptions!")
annotation_description <- character(0)
}
}
} else {
annotation_description <- character(0)
}

rev_annotation <- Biobase::reverseSplit(gene_annotations)
rev_annotation <- purrr::map(rev_annotation, unique)

out_annotation <- annotation(annotation_features = rev_annotation,
description = annotation_description,
links = character(0),
annotation_type = annotation_type,
feature_type = feature_type)


out_annotation <- annotation(
annotation_features = rev_annotation,
description = annotation_description,
links = character(0),
annotation_type = annotation_type,
feature_type = feature_type
)

out_json <- annotation_2_json(out_annotation, out_file)
out_json
}
5 changes: 4 additions & 1 deletion exec/feature_files_2_json.R
Original file line number Diff line number Diff line change
Expand Up @@ -128,4 +128,7 @@ if (!("universe" %in% names(file_args))) {
feature_lists <- get_significant_feature_lists(file_args)
}

cat(jsonlite::toJSON(feature_lists, pretty = TRUE), file = script_options$json)
cat(
jsonlite::toJSON(feature_lists, pretty = TRUE, auto_unbox = FALSE),
file = script_options$json
)
Loading
Loading