Skip to content

Commit

Permalink
Remove KEGG ID to gene name conversion from get_kegg_gsets (#206)
Browse files Browse the repository at this point in the history
* remove conversion functionality from `get_kegg_gsets()` and return KEGG IDs

* update NEWS; bump dev version
  • Loading branch information
egeulgen authored May 2, 2024
1 parent 3aa79ae commit e171685
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 34 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: pathfindR
Type: Package
Title: Enrichment Analysis Utilizing Active Subnetworks
Version: 2.4.0.9000
Version: 2.4.0.9001
Authors@R: c(person("Ege", "Ulgen",
role = c("cre", "cph"),
email = "[email protected]",
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# pathfindR (development version)

# Minor Changes and Bug Fixes

- fixed a bug regarding KEGG gene set fetching: removed the conversion functionality in `get_kegg_gsets()` which now returns KEGG IDs so that the user can convert the returned identifiers using a more appropriate tool (e.g. BioMart) should they wish

# pathfindR 2.4.0

## Major Changes
Expand Down
29 changes: 5 additions & 24 deletions R/data_generation.R
Original file line number Diff line number Diff line change
Expand Up @@ -175,26 +175,13 @@ gset_list_from_gmt <- function(path2gmt, descriptions_idx = 2) {
#' of all available organisms, see \url{https://www.genome.jp/kegg/catalog/org_list.html}
#'
#' @return list containing 2 elements: \itemize{
#' \item{gene_sets - A list containing the genes involved in each KEGG pathway}
#' \item{gene_sets - A list containing KEGG IDs for the genes involved in each KEGG pathway}
#' \item{descriptions - A named vector containing the descriptions for each KEGG pathway}
#' }
get_kegg_gsets <- function(org_code = "hsa") {

message("Grab a cup of coffee, this will take a while...")

gene_table_url <- paste0("https://rest.kegg.jp/list/", org_code)
gene_table_result <- httr::GET(gene_table_url)
gene_table_result <- httr::content(gene_table_result, "text")

parsed_gene_table_result <- strsplit(gene_table_result, "\n")[[1]]
kegg_gene_table <- data.frame(
kegg_id = unname(vapply(parsed_gene_table_result, function(x) unlist(strsplit(x, "\t"))[1], "org:123")),
symbol = unname(vapply(parsed_gene_table_result, function(x) unlist(strsplit(unlist(strsplit(x, "\t"))[4], ";"))[1], "symbol"))
)
# remove mistaken lines
kegg_gene_table <- kegg_gene_table[grep("^((,\\s)?[A-Za-z0-9_-]+(\\@)?)+$", kegg_gene_table$symbol), ]


all_pathways_url <- paste0("https://rest.kegg.jp/list/pathway/", org_code)
all_pathways_result <- httr::GET(all_pathways_url)
all_pathways_result <- httr::content(all_pathways_result, "text")
Expand All @@ -205,16 +192,10 @@ get_kegg_gsets <- function(org_code = "hsa") {

genes_by_pathway <- lapply(pathway_ids, function(pw_id) {
pathways_graph <- ggkegg::pathway(pid = pw_id, directory = tempdir(), use_cache = FALSE, return_tbl_graph = FALSE)
all_pw_gene_ids <- igraph::V(pathways_graph)$name[igraph::V(pathways_graph)$type == "gene"]
all_pw_gene_ids <- unlist(strsplit(all_pw_gene_ids, " "))
all_pw_gene_ids <- unique(all_pw_gene_ids)

all_pw_gene_symbols <- kegg_gene_table$symbol[match(all_pw_gene_ids, kegg_gene_table$kegg_id)]
all_pw_gene_symbols <- all_pw_gene_symbols[!is.na(all_pw_gene_symbols)]
all_pw_gene_symbols <- unname(vapply(all_pw_gene_symbols, function(x) unlist(strsplit(x, ", "))[1], "symbol"))
all_pw_gene_symbols <- unique(all_pw_gene_symbols)

return(all_pw_gene_symbols)
all_pw_kegg_ids <- igraph::V(pathways_graph)$name[igraph::V(pathways_graph)$type == "gene"]
all_pw_kegg_ids <- unlist(strsplit(all_pw_kegg_ids, " "))
all_pw_kegg_ids <- unique(all_pw_kegg_ids)
return(all_pw_kegg_ids)
})

names(genes_by_pathway) <- pathway_ids
Expand Down
2 changes: 1 addition & 1 deletion man/get_kegg_gsets.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 2 additions & 8 deletions tests/testthat/test-data_generation.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,17 +92,11 @@ test_that("`gset_list_from_gmt()` -- works as expected", {

test_that("`get_kegg_gsets()` -- works as expected", {
skip_on_cran()
mock_responses <- c(
httr::content(httr::GET(paste0("https://rest.kegg.jp/list/eco")), "text"),
"eco00010\tdescription\neco00071\tdescription2"
)

call_count <- 0
mock_response <- "eco00010\tdescription\neco00071\tdescription2"

# function to manage sequential responses
mock_content <- function(...) {
call_count <<- call_count + 1
return(mock_responses[call_count])
return(mock_response)
}


Expand Down

0 comments on commit e171685

Please sign in to comment.