Skip to content

Commit

Permalink
Feature/ add ability to fetch latest version of BioGRID (#203)
Browse files Browse the repository at this point in the history
* updat `get_biogrid_pin() so that it can determine the latest version and download/process it from BioGRID

* update NEWS; bump dev version
  • Loading branch information
egeulgen authored Apr 27, 2024
1 parent fb21312 commit 6269316
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 6 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: pathfindR
Type: Package
Title: Enrichment Analysis Utilizing Active Subnetworks
Version: 2.3.1.9002
Version: 2.3.1.9003
Authors@R: c(person("Ege", "Ulgen",
role = c("cre", "cph"),
email = "[email protected]",
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
- updated the `get_kegg_gsets()` function to also use `ggkegg` for fetching genes per pathway data
- removed unneeded dependencies: `magick`, `KEGGgraph` and `KEGGREST`

## Minor Changes and Bug Fixes

- updated the `get_biogrid_pin()` function so that it can now determine the latest version and download/process it from BioGRID (via setting `release = "latest"`, which is now the default behavior)

# pathfindR 2.3.1

## Minor Changes and Bug Fixes
Expand Down
12 changes: 10 additions & 2 deletions R/data_generation.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ process_pin <- function(pin_df) {
#' list of available organisms (default = 'Homo_sapiens')
#' @param path2pin the path of the file to save the PIN data. By default, the
#' PIN data is saved in a temporary file
#' @param release the requested BioGRID release (default = '4.4.224')
#' @param release the requested BioGRID release (default = 'latest')
#'
#' @return the path of the file in which the PIN data was saved. If
#' \code{path2pin} was not supplied by the user, the PIN data is saved in a
#' temporary file
get_biogrid_pin <- function(org = "Homo_sapiens", path2pin, release = "4.4.224") {
get_biogrid_pin <- function(org = "Homo_sapiens", path2pin, release = "latest") {
# check organism name
all_org_names <- c("Anopheles_gambiae_PEST", "Apis_mellifera", "Arabidopsis_thaliana_Columbia",
"Bacillus_subtilis_168", "Bos_taurus", "Caenorhabditis_elegans", "Candida_albicans_SC5314",
Expand All @@ -55,6 +55,14 @@ get_biogrid_pin <- function(org = "Homo_sapiens", path2pin, release = "4.4.224")
stop(paste(org, "is not a valid Biogrid organism.", "Available organisms are listed on: https://wiki.thebiogrid.org/doku.php/statistics"))
}

if (release == "latest") {
result <- httr::GET("https://downloads.thebiogrid.org/BioGRID/Latest-Release/")
result <- httr::content(result, "text")

h2_matches <- regexpr("(?<=<h2>BioGRID Release\\s)(\\d\\.\\d\\.\\d+)", result, perl = TRUE)
release <- regmatches(result, h2_matches)
}

# release directory for download
rel_dir <- paste0("BIOGRID-", release)

Expand Down
4 changes: 2 additions & 2 deletions man/get_biogrid_pin.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 19 additions & 1 deletion tests/testthat/test-data_generation.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,31 @@ test_that("`get_biogrid_pin()` -- returns a path to a valid PIN file", {
expected_biogrid_pin_df <- data.frame(V1 = expected_biogrid_pin_df$Interactor_A,
V2 = "pp", V3 = expected_biogrid_pin_df$Interactor_B)

pin_path <- get_biogrid_pin()
pin_path <- get_biogrid_pin(release = "4.4.211")
pin_df <- read.delim(pin_path, header = FALSE)
expect_true(ncol(pin_df) == 3)
expect_true(all(pin_df[, 2] == "pp"))
expect_identical(pin_df, expected_biogrid_pin_df)
})

test_that("`get_biogrid_pin()` -- determines and downloads the latest version", {
mockery::stub(get_biogrid_pin, "utils::download.file", NULL)
mockery::stub(get_biogrid_pin, "utils::unzip", list(Name = "BIOGRID-ORGANISM-Homo_sapiens-X.X.X.tab3.txt"))
mockery::stub(get_biogrid_pin, "utils::read.delim", toy_biogrid_pin)

expected_biogrid_pin_df <- toy_biogrid_pin
colnames(expected_biogrid_pin_df) <- c("Interactor_A", "Interactor_B")
expected_biogrid_pin_df <- process_pin(expected_biogrid_pin_df)
expected_biogrid_pin_df <- data.frame(V1 = expected_biogrid_pin_df$Interactor_A,
V2 = "pp", V3 = expected_biogrid_pin_df$Interactor_B)

pin_path <- get_biogrid_pin()
pin_df <- read.delim(pin_path, header = FALSE)
expect_true(ncol(pin_df) == 3)
expect_true(all(pin_df[, 2] == "pp"))
expect_identical(pin_df, expected_biogrid_pin_df)
})

test_that("`get_biogrid_pin()` -- error check works", {
# invalid organism error
expect_error(get_biogrid_pin(org = "Hsapiens"), paste("Hsapiens is not a valid Biogrid organism.",
Expand Down

0 comments on commit 6269316

Please sign in to comment.