Skip to content

Commit

Permalink
Merge pull request #24 from ruralinnovation/dev/fix_new_api
Browse files Browse the repository at this point in the history
add referer
  • Loading branch information
defuneste authored Dec 26, 2024
2 parents cefe089 + c403ece commit 6d28263
Show file tree
Hide file tree
Showing 24 changed files with 140 additions and 196 deletions.
16 changes: 7 additions & 9 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: cori.data.fcc
Title: Process FCC data
Version: 0.1.0
Version: 0.1.2
Authors@R:
person(given="Olivier", family="Leroy", email="[email protected]", role = c("aut", "cre"))
Description: Functions to get and process FCC data.
Expand All @@ -11,20 +11,18 @@ RoxygenNote: 7.3.2
Depends:
R (>= 2.10)
LazyData: true
Suggests:
testthat (>= 3.0.0),
pkgdown,
Suggests:
dplyr,
DT,
knitr,
rmarkdown
pkgdown,
rmarkdown,
testthat (>= 3.0.0)
Config/testthat/edition: 3
Imports:
curl,
Imports:
duckdb,
jsonlite,
utils,
DBI,
duckdb,
stringi
URL: https://ruralinnovation.github.io/cori.data.fcc/
Config/Needs/website: rmarkdown
Expand Down
3 changes: 0 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,5 @@ export(get_frn_nbm_bl)
export(get_nbm_available)
export(get_nbm_bl)
export(get_nbm_release)
export(set_user_agent)
export(user_agent)
import(DBI)
import(duckdb)
importFrom(stringi,stri_pad_left)
31 changes: 25 additions & 6 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,28 @@
# cori.data.fcc 0.1.2

## Minor improvements

* Use CORI-brewed `download_file` function; global user_agent no longer needed


## cori.data.fcc 0.1.1

## Minor improvements

* correct typos in data stories Thanks @camdenblatchly

* update with FCC new API request that is needed a referer

* bring back the changelog that you are reading


# cori.data.fcc 0.1.0

## Major Changes

### New datasets

* Add NBM Block: CORI opinionted version designed at the Census block level
* Add NBM Block: CORI opinionated version designed at the Census block level

* Add NBM raws, past 4 releases

Expand All @@ -14,7 +32,7 @@

* `get_nbm_bl()`allows you to get all block from one county

* `get_county_nbm_raws()` allows you to get raws NBM data for a specific county and for a release, by default the last one.
* `get_county_nbm_raws()` allows you to get raw NBM data for a specific county and for a release, by default the last one.

### Updated functions

Expand All @@ -28,7 +46,7 @@

## Major Changes

* Provides way to acess Form 477
* Provides way to access Form 477

* Provides data story on Form 477

Expand All @@ -39,8 +57,9 @@
* Organize reference of function by themes


# cori.data.fcc (first release)
# cori.data.fcc 0.0.0

* First functions released.
* First release!
- FCC BDC download functions

* Improve functions with a user_agent defined in aaa.R #9
* Improve functions with a user_agent defined in aaa.R #9
40 changes: 0 additions & 40 deletions R/aaa.R

This file was deleted.

17 changes: 11 additions & 6 deletions R/dl_nbm.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#' @param release_date a string can be "December 31, 2023" or "June 30, 2023"
#' @param data_type a string "Fixed Broadband"
#' @param data_category a string "Nationwide"
#' @param user_agent a string set up by default
#' @param ... additional parameters for download.file()
#'
#' @return Zipped csv
Expand All @@ -23,8 +22,7 @@
dl_nbm <- function(path_to_dl = "~/data_swamp",
release_date = "June 30, 2023",
data_type = "Fixed Broadband",
data_category = "Nationwide",
user_agent = the$user_agent, ...) {
data_category = "Nationwide", ...) {
# clean my mess
prev_timeout <- getOption("timeout")
on.exit(options(timeout = prev_timeout), add = TRUE)
Expand Down Expand Up @@ -52,8 +50,15 @@ dl_nbm <- function(path_to_dl = "~/data_swamp",
next
}

try(utils::download.file(url = paste0(base_url, one_release_to_dl$id[i], "/1"),
destfile = dest_file,
headers = c("User-Agent" = user_agent), ...))
get_data_url <- paste0(base_url, one_release_to_dl$id[i], "/1")

res <- download_file(get_data_url, dest_file)

# Check res
if (!(dest_file %in% res)) {
message(paste0("Error in download result: ", res))
stop(sprintf("Downloading %s failed", get_data_url))
}

}
}
38 changes: 38 additions & 0 deletions R/download_file.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#' Download file function (replacement for download.file)
#'
#' @param remote_file_url URL to download file from
#' @param local_file_path Local path to save file to
#' @return path to local file
#'
#' @examples
#' \dontrun{
#' system("mkdir -p ~/data_swamp")
#' retrieved_file <- download_file(
#' "https://archive.org/offshoot_assets/assets/ia-logo-2c2c2c.03bd7e88c8814d63d0fc..svg",
#' "~/data_swamp/archive.svg")
#' }
#'
#'
download_file <- function (remote_file_url, local_file_path) {
res <- NULL
res <- system(
sprintf(
paste0("curl '%s' --compressed ",
"-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:131.0) Gecko/20100101 Firefox/131.0' ",
"-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8' ",
"-H 'Accept-Language: en-US,en;q=0.5' -H 'Accept-Encoding: gzip, deflate, br, zstd' ",
"-H 'Connection: keep-alive' -H 'Upgrade-Insecure-Requests: 1' ",
"-H 'Referer: https://broadbandmap.fcc.gov/data-download' ",
"-H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: none' -H 'Sec-Fetch-User: ?1' ",
"-H 'Sec-GPC: 1' -H 'Priority: u=0, i' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: trailers' ",
"-o %s"),
remote_file_url, local_file_path
)
)

if (is.null(res) || res > 0) {
return(res)
} else {
return(invisible(local_file_path))
}
}
5 changes: 2 additions & 3 deletions R/f477.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,17 @@
#'
#' @export
#' @import DBI
#' @import duckdb
#'
#'@examples
#'\dontrun{
#'NC <- get_f477(state_abbr = "NC")
#' NC <- get_f477(state_abbr = "NC")
#'}

get_f477 <- function(state_abbr, frn = "all") {

state_abbr <- state_abbr_lookup(state_abbr)

con <- DBI::dbConnect(duckdb())
con <- DBI::dbConnect(duckdb::duckdb())
DBI::dbExecute(con,
sprintf("SET temp_directory ='%s';", tempdir()))
on.exit(DBI::dbDisconnect(con), add = TRUE)
Expand Down
5 changes: 2 additions & 3 deletions R/get_county_nbm_raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,17 @@
#'
#' @export
#' @import DBI
#' @import duckdb
#'
#'@examples
#'\dontrun{
#'guilford_cty <- get_county_nbm_raw(geoid_co = "37081")
#' guilford_cty <- get_county_nbm_raw(geoid_co = "37081")
#'}

get_county_nbm_raw <- function(geoid_co, frn = "all", release = "2023-12-01") {

# do I need a look up for county?

con <- DBI::dbConnect(duckdb())
con <- DBI::dbConnect(duckdb::duckdb())
DBI::dbExecute(con,
sprintf("SET temp_directory ='%s';", tempdir()))
on.exit(DBI::dbDisconnect(con), add = TRUE)
Expand Down
5 changes: 2 additions & 3 deletions R/get_frn_nbm_bl.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,17 @@
#'
#' @export
#' @import DBI
#' @import duckdb
#'
#'@examples
#'\dontrun{
#' skymesh <- get_frn_nbm_bl("0027136753")
#' skymesh <- get_frn_nbm_bl("0027136753")
#'}

get_frn_nbm_bl <- function(frn) {

if (nchar(frn) != 10L) stop("frn should be a 10-digit string")

con <- DBI::dbConnect(duckdb())
con <- DBI::dbConnect(duckdb::duckdb())
DBI::dbExecute(con,
sprintf("SET temp_directory ='%s';", tempdir()))
on.exit(DBI::dbDisconnect(con), add = TRUE)
Expand Down
45 changes: 18 additions & 27 deletions R/get_nbm_available.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
#' paste0("https://broadbandmap.fcc.gov/nbm/",
#' "map/api/national_map_process/nbm_get_data_download/")
#' ```
#' @param get_data_url a string providing NBM filing API.
#' @param user_agent a string set up by default
#' @param get_root_url a string providing NBM filing API.
#'
#' @return A data frame.
#' @export
Expand All @@ -16,40 +15,32 @@
#' head(nbm_data)

get_nbm_available <- function(
get_data_url = paste0("https://broadbandmap.fcc.gov/nbm/map/",
"api/national_map_process/nbm_get_data_download/"),
user_agent = the$user_agent) {
get_root_url = paste0("https://broadbandmap.fcc.gov/nbm/map/",
"api/national_map_process/nbm_get_data_download/")
) {

# get csv to dl only get a table with all link to be downloaded
get_csv_to_dl <- function(release_file, release_nb) {
get_data_url <- paste0(get_data_url,
get_data_url <- paste0(get_root_url,
release_file[release_nb, "process_uuid"])
# h <- curl::new_handle()
# curl::handle_setheaders(h, "User-Agent" = user_agent)
#
# raw_dat <- curl::curl_fetch_memory(get_data_url)
#
# csv_to_dl <- jsonlite::fromJSON(rawToChar(raw_dat$content))$data

res <- system(
sprintf(
paste0("curl '%s' --compressed ",
"-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:131.0) Gecko/20100101 Firefox/131.0' ",
"-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8' ",
"-H 'Accept-Language: en-US,en;q=0.5' -H 'Accept-Encoding: gzip, deflate, br, zstd' ",
"-H 'Connection: keep-alive' -H 'Upgrade-Insecure-Requests: 1' ",
"-H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: none' -H 'Sec-Fetch-User: ?1' ",
"-H 'Sec-GPC: 1' -H 'Priority: u=0, i' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: trailers'"
),
get_data_url
),
intern = TRUE)
dest_file <- paste0(tempdir(), "/", release_file[release_nb, "process_uuid"], ".json")

res <- download_file(get_data_url, dest_file)

# Check res
if (!(dest_file %in% res)) {
message(paste0("Error in download result: ", res))
stop(sprintf("Downloading %s failed", get_data_url))
}

csv_to_dl <- jsonlite::fromJSON(res)[["data"]]

csv_to_dl[["release"]] <- release_file[release_nb, "filing_subtype"]
return(csv_to_dl)
}

release <- cori.data.fcc::get_nbm_release()
release <- get_nbm_release()

release

Expand All @@ -63,4 +54,4 @@ get_nbm_available <- function(

slim_all_data <- all_data[, col_to_keep]
return(slim_all_data)
}
}
9 changes: 5 additions & 4 deletions R/get_nbm_bl.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,17 @@
#'
#' @export
#' @import DBI
#' @import duckdb
#'
#'@examples
#' nbm_bl <- get_nbm_bl(geoid_co = "47051")
#' @examples
#' \dontrun{
#' nbm_bl <- get_nbm_bl(geoid_co = "47051")
#' }

get_nbm_bl <- function(geoid_co) {

if (nchar(geoid_co) != 5L) stop("geoid_co should be a 5-digit string")

con <- DBI::dbConnect(duckdb())
con <- DBI::dbConnect(duckdb::duckdb())
DBI::dbExecute(con,
sprintf("SET temp_directory ='%s';", tempdir()))
on.exit(DBI::dbDisconnect(con), add = TRUE)
Expand Down
Loading

0 comments on commit 6d28263

Please sign in to comment.