diff --git a/DESCRIPTION b/DESCRIPTION index 5f94a2c4..27162325 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: cfbfastR Title: Access College Football Play by Play Data -Version: 2.0.0 +Version: 2.1.0 Authors@R: c( person("Saiem", "Gilani", , "saiem.gilani@gmail.com", role = c("cre", "aut")), person("Akshay", "Easwaran", , "akeaswaran@me.com", role = "aut"), diff --git a/NEWS.md b/NEWS.md index 07073b7c..fe8a074d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# **cfbfastR v2.1.0** + +* Fixes a bug in `cfbd_pbp_data()` where play-by-play data for some games were not as expected. +* Improves `add_yardage()` where plays with missing yardage values were not being handled correctly. + + # **cfbfastR v2.0.0** ### Breaking Changes to Loading Functions diff --git a/R/cfbd_pbp_data.R b/R/cfbd_pbp_data.R index 4aa39f4d..dd9fc0c5 100644 --- a/R/cfbd_pbp_data.R +++ b/R/cfbd_pbp_data.R @@ -586,8 +586,15 @@ cfbd_pbp_data <- function(year, play_df <- purrr::map_dfr( g_ids, function(x){ - play_df <- play_df %>% - dplyr::filter(.data$game_id == x) %>% + # Note: this should be changed to a complete data validation test in the future + # filter out games with less than 10 plays to avoid issues with EPA/WPA models + game_plays <- play_df %>% + dplyr::filter(.data$game_id == x) + if (nrow(game_plays) < 20) { + cli::cli_alert_danger(glue::glue("Skipping game_id {x} with only {nrow(game_plays)} plays")) + return(NULL) + } + game_plays <- game_plays %>% penalty_detection() %>% add_play_counts() %>% clean_pbp_dat() %>% @@ -599,7 +606,7 @@ cfbd_pbp_data <- function(year, # create_wpa_betting() %>% create_wpa_naive(wp_model = wp_model) p(sprintf("x=%s", as.integer(x))) - return(play_df) + return(game_plays) }, ...) # } else{ # play_df <- purrr::map_dfr( diff --git a/R/cfbd_stats.R b/R/cfbd_stats.R index bcdda963..597a4399 100644 --- a/R/cfbd_stats.R +++ b/R/cfbd_stats.R @@ -266,6 +266,8 @@ cfbd_stats_game_advanced <- function(year, colnames(df) <- gsub("_Start", "_start", colnames(df)) colnames(df) <- gsub(".db", "_db", colnames(df)) colnames(df) <- gsub("Id", "_id", colnames(df)) + colnames(df) <- gsub("seasonType", "season_type", colnames(df)) + df <- df %>% diff --git a/R/helper_pbp_add_yardage.R b/R/helper_pbp_add_yardage.R index d2b72af1..9d83cb51 100644 --- a/R/helper_pbp_add_yardage.R +++ b/R/helper_pbp_add_yardage.R @@ -85,12 +85,24 @@ add_yardage <- function(play_df) { -1 * as.numeric(stringr::str_extract( stringi::stri_extract_first_regex(.data$play_text, "(?<= for a loss of)[^,]+"), "\\d+" )), + .data$pass == 1 & + stringr::str_detect(.data$play_text, regex("pass to", ignore_case = TRUE)) & + stringr::str_detect(.data$play_text, regex("for a loss of", ignore_case = TRUE)) ~ + -1 * as.numeric(stringr::str_extract( + stringi::stri_extract_first_regex(.data$play_text, "(?<= for a loss of)[^,]+"), "\\d+" + )), .data$pass == 1 & stringr::str_detect(.data$play_text, regex("pass complete to", ignore_case = TRUE)) & stringr::str_detect(.data$play_text, regex(" for \\d+ y\\w*ds?", ignore_case = TRUE)) ~ as.numeric(stringr::str_extract( stringi::stri_extract_first_regex(.data$play_text, "(?<= for)[^,]+"), "\\d+" )), + .data$pass == 1 & + stringr::str_detect(.data$play_text, regex("pass to", ignore_case = TRUE)) & + stringr::str_detect(.data$play_text, regex(" for \\d+ y\\w*ds?", ignore_case = TRUE)) ~ + as.numeric(stringr::str_extract( + stringi::stri_extract_first_regex(.data$play_text, "(?<= for)[^,]+"), "\\d+" + )), .data$pass == 1 & stringr::str_detect(.data$play_text, regex("Yd pass", ignore_case = TRUE)) ~ as.numeric(stringr::str_extract( @@ -99,6 +111,32 @@ add_yardage <- function(play_df) { .data$pass == 1 & stringr::str_detect(.data$play_text, regex("pass complete to", ignore_case = TRUE)) ~ yards_gained, # 2024 has games that don't have yards in the PBP text but do have them in the yards_gained field. + + # 2025 has some plays list "PASSER pass" at the very end of the play_text + .data$pass == 1 & + stringr::str_detect(.data$play_text, regex("pass \\(\\w", ignore_case = TRUE)) & + stringr::str_detect(.data$play_text, regex("^to ", ignore_case = FALSE)) ~ as.numeric(stringr::str_extract( + stringi::stri_extract_first_regex(.data$play_text, "(?<= for)[^,]+"), "\\d+" + )), + .data$pass == 1 & + stringr::str_detect(.data$play_text, regex("pass$", ignore_case = TRUE)) & + stringr::str_detect(.data$play_text, regex("^to ", ignore_case = FALSE)) ~ as.numeric(stringr::str_extract( + stringi::stri_extract_first_regex(.data$play_text, "(?<= for)[^,]+"), "\\d+" + )), + # 2025 has some plays that have yards in the PBP but no listed passer. the format is the same though + .data$pass == 1 & + stringr::str_detect(.data$play_text, regex("^to ", ignore_case = FALSE)) ~ as.numeric(stringr::str_extract( + stringi::stri_extract_first_regex(.data$play_text, "(?<= for)[^,]+"), "\\d+" + )), + .data$pass == 1 & + stringr::str_detect(.data$play_text, regex("^to ", ignore_case = FALSE)) & + stringr::str_detect(.data$play_text, regex("for a loss of", ignore_case = TRUE)) ~ + -1 * as.numeric(stringr::str_extract( + stringi::stri_extract_first_regex(.data$play_text, "(?<= for a loss of)[^,]+"), "\\d+" + )), + .data$pass == 1 & + stringr::str_detect(.data$play_text, regex("^to ", ignore_case = FALSE)) & + stringr::str_detect(.data$play_text, regex("for no gain", ignore_case = TRUE)) ~ 0, TRUE ~ NA_real_ ) ) diff --git a/cran-comments.md b/cran-comments.md index 5ad601ea..3c92bb45 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,30 +1,10 @@ ## Release summary -This is a major release that: - -* Addresses the noted documentation issues which caused the previous release to be archived by CRAN. -* Addresses the error from the previous CRAN submission today though does not change official version number. -* Addresses the missing documentation for the `update_cfb_pbp()` function noted in the previous CRAN response. -* Addresses minor functionality issues in `cfbd_play_stats_player()` function. -* Updates the `cfbd_*()` functions to use the new College Football Data API v2. -* Addresses the most recent CRAN comments from the previous submission. - -The following functions were added: - * `cfbd_metrics_fg_ep()` - * `cfbd_metrics_wepa_team_season()` - * `cfbd_metrics_wepa_players_passing()` - * `cfbd_metrics_wepa_players_rushing()` - * `cfbd_metrics_wepa_players_kicking()` - * `cfbd_ratings_fpi()` - * `cfbd_live_scoreboard()` - * `cfbd_live_plays()` - * `cfbd_api_key_info()` - -There are minor changes to the existing `cfbd_*()` functions under the hood. See `NEWS.md` for more details. - -While I believe I updated all twitter links in the `README.md` to non-redirecting links, they do give status 403 -when you try to access them without authentication. If this behavior is too problematic and against policy, please let me know and I will -make the changes to the `README.md`. +This is a minor release that: + +* Fixes a bug in `cfbd_pbp_data()` where play-by-play data for some games were not as expected. +* Improves `add_yardage()` where plays with missing yardage values were not being handled correctly. + ## R CMD check results diff --git a/tests/testthat/test-cfbd_betting_lines.R b/tests/testthat/test-cfbd_betting_lines.R index 4480f85d..e7d7dab2 100644 --- a/tests/testthat/test-cfbd_betting_lines.R +++ b/tests/testthat/test-cfbd_betting_lines.R @@ -19,16 +19,16 @@ test_that("CFB Betting Lines", { cols <- c( "game_id", "season", "season_type", "week", "start_date", - "home_team", "home_conference", "home_classification", "home_score", - "away_team", "away_conference", "away_classification", "away_score", + "home_team_id", "home_team", "home_conference", "home_classification", "home_score", + "away_team_id", "away_team", "away_conference", "away_classification", "away_score", "provider", "spread", "formatted_spread", "spread_open", "over_under", "over_under_open", "home_moneyline", "away_moneyline" ) expect_equal(nrow(x), 4) expect_equal(nrow(y), 4) - expect_setequal(colnames(x), cols) - expect_setequal(colnames(y), cols) + expect_in(cols, colnames(x)) + expect_in(cols, colnames(y)) expect_s3_class(x, "data.frame") expect_s3_class(y, "data.frame") }) diff --git a/tests/testthat/test-cfbd_live_scoreboard.R b/tests/testthat/test-cfbd_live_scoreboard.R index dd8f7144..a835fdfc 100644 --- a/tests/testthat/test-cfbd_live_scoreboard.R +++ b/tests/testthat/test-cfbd_live_scoreboard.R @@ -51,8 +51,8 @@ test_that("CFB Live Scoreboard", { x <- cfbd_live_scoreboard(division='fbs', conference = "B12") y <- cfbd_live_scoreboard(division='fbs') - expect_in(colnames(x), cols) - expect_in(colnames(y), cols) + expect_in(cols, colnames(x)) + expect_in(cols, colnames(y)) expect_s3_class(x, "data.frame") expect_s3_class(y, "data.frame") }) diff --git a/tests/testthat/test-cfbd_stats_game_advanced.R b/tests/testthat/test-cfbd_stats_game_advanced.R index 7b11314f..4912c226 100644 --- a/tests/testthat/test-cfbd_stats_game_advanced.R +++ b/tests/testthat/test-cfbd_stats_game_advanced.R @@ -33,9 +33,9 @@ test_that("CFB Stats Game - Advanced", { y <- cfbd_stats_game_advanced(2019, team = "LSU") z <- cfbd_stats_game_advanced(2013, team = "Florida State") - expect_setequal(colnames(x), cols) - expect_setequal(colnames(y), cols) - expect_setequal(colnames(z), cols) + expect_in(cols, colnames(x)) + expect_in(cols, colnames(y)) + expect_in(cols, colnames(z)) expect_s3_class(x, "data.frame") expect_s3_class(y, "data.frame") expect_s3_class(z, "data.frame")