Skip to content

Commit

Permalink
Fix/implement non hsa id conversion in get kegg gsets (#201)
Browse files Browse the repository at this point in the history
* fix kegg id to symbol conversion in `get_kegg_gsets()`

* bump dev version
  • Loading branch information
egeulgen authored Apr 26, 2024
1 parent 5bb9800 commit 9d2d51a
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 24 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: pathfindR
Type: Package
Title: Enrichment Analysis Utilizing Active Subnetworks
Version: 2.3.1.9001
Version: 2.3.1.9002
Authors@R: c(person("Ege", "Ulgen",
role = c("cre", "cph"),
email = "[email protected]",
Expand Down
35 changes: 22 additions & 13 deletions R/data_generation.R
Original file line number Diff line number Diff line change
Expand Up @@ -174,28 +174,37 @@ get_kegg_gsets <- function(org_code = "hsa") {

message("Grab a cup of coffee, this will take a while...")

url <- paste0("https://rest.kegg.jp/list/pathway/", org_code)
result <- httr::GET(url)
result <- httr::content(result, "text")

parsed_result <- strsplit(result, "\n")[[1]]
pathway_ids <- vapply(parsed_result, function(x) unlist(strsplit(x, "\t"))[1], "id")
pathway_descriptons <- vapply(parsed_result, function(x) unlist(strsplit(x, "\t"))[2], "description")
gene_table_url <- paste0("https://rest.kegg.jp/list/", org_code)
gene_table_result <- httr::GET(gene_table_url)
gene_table_result <- httr::content(gene_table_result, "text")

parsed_gene_table_result <- strsplit(gene_table_result, "\n")[[1]]
kegg_gene_table <- data.frame(
kegg_id = unname(vapply(parsed_gene_table_result, function(x) unlist(strsplit(x, "\t"))[1], "org:123")),
symbol = unname(vapply(parsed_gene_table_result, function(x) unlist(strsplit(unlist(strsplit(x, "\t"))[4], ";"))[1], "symbol"))
)
# remove mistaken lines
kegg_gene_table <- kegg_gene_table[grep("^((,\\s)?[A-Za-z0-9_-]+(\\@)?)+$", kegg_gene_table$symbol), ]


all_pathways_url <- paste0("https://rest.kegg.jp/list/pathway/", org_code)
all_pathways_result <- httr::GET(all_pathways_url)
all_pathways_result <- httr::content(all_pathways_result, "text")
parsed_all_pathways_result <- strsplit(all_pathways_result, "\n")[[1]]
pathway_ids <- vapply(parsed_all_pathways_result, function(x) unlist(strsplit(x, "\t"))[1], "id")
pathway_descriptons <- vapply(parsed_all_pathways_result, function(x) unlist(strsplit(x, "\t"))[2], "description")
names(pathway_descriptons) <- pathway_ids

genes_by_pathway <- lapply(pathway_ids, function(pw_id) {
pathways_graph <- ggkegg::pathway(pid = pw_id, directory = tempdir(), use_cache = FALSE, return_tbl_graph = FALSE)
all_pw_gene_ids <- igraph::V(pathways_graph)$name[igraph::V(pathways_graph)$type == "gene"]
all_pw_gene_ids <- unlist(strsplit(all_pw_gene_ids, " "))
all_pw_gene_ids <- unique(all_pw_gene_ids)
all_pw_gene_ids <- sub("^hsa:", "", all_pw_gene_ids)

all_pw_gene_symbols <- AnnotationDbi::mget(
all_pw_gene_ids, org.Hs.eg.db::org.Hs.egSYMBOL, ifnotfound = NA
)

all_pw_gene_symbols <- unique(unname(unlist(all_pw_gene_symbols)))
all_pw_gene_symbols <- kegg_gene_table$symbol[match(all_pw_gene_ids, kegg_gene_table$kegg_id)]
all_pw_gene_symbols <- all_pw_gene_symbols[!is.na(all_pw_gene_symbols)]
all_pw_gene_symbols <- unname(vapply(all_pw_gene_symbols, function(x) unlist(strsplit(x, ", "))[1], "symbol"))
all_pw_gene_symbols <- unique(all_pw_gene_symbols)

return(all_pw_gene_symbols)
})
Expand Down
35 changes: 25 additions & 10 deletions tests/testthat/test-data_generation.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,20 +74,35 @@ test_that("`gset_list_from_gmt()` -- works as expected", {

test_that("`get_kegg_gsets()` -- works as expected", {
skip_on_cran()
mock_content <- "hsa04972\tdescription\nhsa04962\tdescription2"
with_mock(`httr::content` = function(...) mock_content, {
expect_is(hsa_kegg <- pathfindR:::get_kegg_gsets(), "list")
mock_responses <- c(
httr::content(httr::GET(paste0("https://rest.kegg.jp/list/eco")), "text"),
"eco00010\tdescription\neco00071\tdescription2"
)

call_count <- 0

# function to manage sequential responses
mock_content <- function(...) {
call_count <<- call_count + 1
return(mock_responses[call_count])
}


with_mock(`httr::content` = mock_content, {
expect_is(toy_eco_kegg <- pathfindR:::get_kegg_gsets(), "list")
})

expect_length(hsa_kegg, 2)
expect_true(all(names(hsa_kegg) == c("gene_sets", "descriptions")))
expect_true(all(names(hsa_kegg[["gene_sets"]]) %in% names(hsa_kegg[["descriptions"]])))
expect_length(toy_eco_kegg, 2)
expect_true(all(names(toy_eco_kegg) == c("gene_sets", "descriptions")))
expect_true(all(names(toy_eco_kegg[["gene_sets"]]) %in% names(toy_eco_kegg[["descriptions"]])))
expect_length(toy_eco_kegg[["gene_sets"]], 2)
expect_length(toy_eco_kegg[["descriptions"]], 2)

expect_true(hsa_kegg[["descriptions"]]["hsa04972"] == "description")
expect_true(hsa_kegg[["descriptions"]]["hsa04962"] == "description2")
expect_true(toy_eco_kegg[["descriptions"]]["eco00010"] == "description")
expect_true(toy_eco_kegg[["descriptions"]]["eco00071"] == "description2")

expect_length(hsa_kegg[["gene_sets"]][["hsa04972"]], 102)
expect_length(hsa_kegg[["gene_sets"]][["hsa04962"]], 44)
expect_length(toy_eco_kegg[["gene_sets"]][["eco00010"]], 47)
expect_length(toy_eco_kegg[["gene_sets"]][["eco00071"]], 15)
})

test_that("`get_reactome_gsets()` -- works as expected", {
Expand Down

0 comments on commit 9d2d51a

Please sign in to comment.