diff --git a/DESCRIPTION b/DESCRIPTION index 27162325..8cbf7c6f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: cfbfastR Title: Access College Football Play by Play Data -Version: 2.1.0 +Version: 2.2.0 Authors@R: c( person("Saiem", "Gilani", , "saiem.gilani@gmail.com", role = c("cre", "aut")), person("Akshay", "Easwaran", , "akeaswaran@me.com", role = "aut"), @@ -58,6 +58,7 @@ Suggests: DBI, ggplot2, ggrepel, + patrick, qs (>= 0.25.1), rmarkdown, RSQLite, diff --git a/NAMESPACE b/NAMESPACE index 16590601..1e82b08e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -67,6 +67,7 @@ export(cfbd_venues) export(clean_drive_dat) export(clean_drive_info) export(clean_pbp_dat) +export(clean_play_text) export(create_epa) export(create_wpa_naive) export(epa_fg_probs) @@ -148,6 +149,7 @@ importFrom(stringr,str_detect) importFrom(stringr,str_extract) importFrom(stringr,str_length) importFrom(stringr,str_remove) +importFrom(stringr,str_replace) importFrom(stringr,str_replace_all) importFrom(stringr,str_sub) importFrom(stringr,str_trim) diff --git a/NEWS.md b/NEWS.md index fe8a074d..842d87f3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# **cfbfastR v2.2.0** + +* Fixes a bug in `validate_week()` utility function where some inputs were not being handled correctly (i.e. week 16). Fixes trickle down to `cfbd_pbp_data()` and other functions. +* Default value for `season_type` parameter in `cfbd_game_info()` and `cfbd_play_stats_player()` function changed from "regular" to "both" to align with other functions in the package. + # **cfbfastR v2.1.0** * Fixes a bug in `cfbd_pbp_data()` where play-by-play data for some games were not as expected. diff --git a/R/cfbd_games.R b/R/cfbd_games.R index 3760ab65..715bf479 100644 --- a/R/cfbd_games.R +++ b/R/cfbd_games.R @@ -67,7 +67,7 @@ NULL #' **Get results information from games.** #' @param year (*Integer* required): Year, 4 digit format(*YYYY*) #' @param week (*Integer* optional): Week - values from 1-15, 1-14 for seasons pre-playoff (i.e. 2013 or earlier) -#' @param season_type (*String* default regular): Select Season Type: regular, postseason, both, allstar, spring_regular, spring_postseason +#' @param season_type (*String* default both): Select Season Type: regular, postseason, both, allstar, spring_regular, spring_postseason #' @param team (*String* optional): D-I Team #' @param home_team (*String* optional): Home D-I Team #' @param away_team (*String* optional): Away D-I Team @@ -129,7 +129,7 @@ NULL cfbd_game_info <- function(year, week = NULL, - season_type = "regular", + season_type = "both", team = NULL, home_team = NULL, away_team = NULL, diff --git a/R/cfbd_pbp_data.R b/R/cfbd_pbp_data.R index dd9fc0c5..991da2ff 100644 --- a/R/cfbd_pbp_data.R +++ b/R/cfbd_pbp_data.R @@ -595,6 +595,7 @@ cfbd_pbp_data <- function(year, return(NULL) } game_plays <- game_plays %>% + clean_play_text() %>% penalty_detection() %>% add_play_counts() %>% clean_pbp_dat() %>% @@ -801,8 +802,6 @@ cfbd_pbp_data <- function(year, return(play_df) } - - #' **Series of functions to help clean the play-by-play data for analysis** #' @name helpers_pbp NULL @@ -2146,3 +2145,35 @@ clean_drive_info <- function(drive_df) { return(clean_drive) } + + +#' @rdname helpers_pbp +#' +#' @param play_df (*data.frame* required) Plays dataframe pulled from API via the `cfbd_play()` or within the `cfbd_pbp_data()` function. +#' @details Cleans CFB play-by-play text to be compliant with existing play-by-play parsing. Generally not recommended for standalone use. This method exists due to ESPN PBP changes midway through the 2025 season. +#' \describe{ +#' \item{`play_text`: Returned as `play_text`}{.} +#' } +#' @return The original `play_df` with the following columns appended to it: +#' \describe{ +#' \item{`cleaned_text`: `play_text` with miscellanous items removed: pass depth/location, clock timestamps, No Huddle/Shotgun status, etc.}{.} +#' } +#' @keywords internal +#' @importFrom rlang .data +#' @importFrom stringr str_replace +#' @importFrom dplyr mutate +#' @export +#' + +clean_play_text <- function(play_df) { + play_df <- play_df %>% + dplyr::mutate( + cleaned_text = stringr::str_replace(.data$play_text, "^\\(\\d{1,2}:\\d{2}\\)\\s+", ""), + cleaned_text = stringr::str_replace(.data$cleaned_text, "\\s(short|deep)\\s", " "), + cleaned_text = stringr::str_replace(.data$cleaned_text, "\\s(left|middle|right)\\s", " "), + cleaned_text = stringr::str_replace(.data$cleaned_text, "\\s*No Huddle-Shotgun\\s+", ""), + cleaned_text = stringr::str_replace(.data$cleaned_text, "No Huddle-?", ""), + cleaned_text = stringr::str_replace(.data$cleaned_text, "\\s*Shotgun\\s+", ""), + cleaned_text = stringr::str_replace(.data$cleaned_text, "\\s+", " "), + ) +} diff --git a/R/cfbd_play.R b/R/cfbd_play.R index 1feb1507..d2d560e4 100644 --- a/R/cfbd_play.R +++ b/R/cfbd_play.R @@ -189,7 +189,7 @@ cfbd_plays <- function(year = 2020, #' Can be found using the [cfbd_player_info()] function. #' @param stat_type_id (*Integer* optional): Stat Type ID filter for querying a single stat type #' Can be found using the [cfbd_play_stats_types()] function -#' @param season_type (*String* default regular): Season type - regular, postseason, both, allstar, spring_regular, spring_postseason +#' @param season_type (*String* default both): Season type - regular, postseason, both, allstar, spring_regular, spring_postseason #' @return [cfbd_play_stats_player()] - A data frame with 54 variables: #' \describe{ #' \item{`play_id`: character.}{Referencing play id.} @@ -279,7 +279,7 @@ cfbd_play_stats_player <- function(year = NULL, game_id = NULL, athlete_id = NULL, stat_type_id = NULL, - season_type = "regular") { + season_type = "both") { # Validation ---- validate_api_key() diff --git a/R/helper_pbp_add_yardage.R b/R/helper_pbp_add_yardage.R index 9d83cb51..2600dc53 100644 --- a/R/helper_pbp_add_yardage.R +++ b/R/helper_pbp_add_yardage.R @@ -52,91 +52,133 @@ add_yardage <- function(play_df) { play_df$yds_sacked <- NA_real_ play_df$yds_penalty <- NA_real_ + if (!("cleaned_text" %in% colnames(play_df))) { + play_df <- play_df %>% + clean_play_text() + } + play_df <- play_df %>% dplyr::mutate( yds_rushed = dplyr::case_when( - .data$rush == 1 & stringr::str_detect(.data$play_text, regex("run for no gain", ignore_case = TRUE)) ~ 0, + .data$rush == 1 & stringr::str_detect(.data$cleaned_text, regex("run for no gain", ignore_case = TRUE)) ~ 0, .data$rush == 1 & - stringr::str_detect(.data$play_text, regex("run for a loss of", ignore_case = TRUE)) ~ + stringr::str_detect(.data$cleaned_text, regex("run for a loss of", ignore_case = TRUE)) ~ -1 * as.numeric(stringr::str_extract( - stringi::stri_extract_first_regex(.data$play_text, "(?<= run for a loss of)[^,]+"), "\\d+" + stringi::stri_extract_first_regex(.data$cleaned_text, "(?<= run for a loss of)[^,]+"), "\\d+" )), .data$rush == 1 & - stringr::str_detect(.data$play_text, regex("run for", ignore_case = TRUE)) ~ + stringr::str_detect(.data$cleaned_text, regex("run for \\d+ y.*ds? loss", ignore_case = TRUE)) ~ + -1 * as.numeric(stringr::str_extract( + stringi::stri_extract_first_regex(.data$cleaned_text, "(?<= run for)[^,]+"), "\\d+" + )), + .data$rush == 1 & + stringr::str_detect(.data$cleaned_text, regex("run for", ignore_case = TRUE)) ~ as.numeric(stringr::str_extract( - stringi::stri_extract_first_regex(.data$play_text, "(?<= run for)[^,]+"), "\\d+" + stringi::stri_extract_first_regex(.data$cleaned_text, "(?<= run for)[^,]+"), "\\d+" )), .data$rush == 1 & - stringr::str_detect(.data$play_text, regex("yd run", ignore_case = TRUE)) ~ + stringr::str_detect(.data$cleaned_text, regex("yd run", ignore_case = TRUE)) ~ as.numeric( stringr::str_remove( - stringr::str_extract(.data$play_text, regex("\\d{0,2} Yd Run", ignore_case = TRUE)), + stringr::str_extract(.data$cleaned_text, regex("\\d{0,2} yd run", ignore_case = TRUE)), regex("yd run", ignore_case = TRUE) ) ), + + # same regexes with rush instead + .data$rush == 1 & stringr::str_detect(.data$cleaned_text, regex("rush for no gain", ignore_case = TRUE)) ~ 0, + .data$rush == 1 & + stringr::str_detect(.data$cleaned_text, regex("rush for a loss of", ignore_case = TRUE)) ~ + -1 * as.numeric(stringr::str_extract( + stringi::stri_extract_first_regex(.data$cleaned_text, "(?<= rush for a loss of)[^,]+"), "\\d+" + )), + .data$rush == 1 & + stringr::str_detect(.data$cleaned_text, regex("rush for \\d+ y.*ds? loss", ignore_case = TRUE)) ~ + -1 * as.numeric(stringr::str_extract( + stringi::stri_extract_first_regex(.data$cleaned_text, "(?<= rush for)[^,]+"), "\\d+" + )), + .data$rush == 1 & + stringr::str_detect(.data$cleaned_text, regex("rush for", ignore_case = TRUE)) ~ + as.numeric(stringr::str_extract( + stringi::stri_extract_first_regex(.data$cleaned_text, "(?<= rush for)[^,]+"), "\\d+" + )), + .data$rush == 1 & + stringr::str_detect(.data$cleaned_text, regex("yd rush", ignore_case = TRUE)) ~ + as.numeric( + stringr::str_remove( + stringr::str_extract(.data$cleaned_text, regex("\\d{0,2} yd rush", ignore_case = TRUE)), + regex("yd rush", ignore_case = TRUE) + ) + ), TRUE ~ NA_real_ ), yds_receiving = dplyr::case_when( - .data$pass == 1 & stringr::str_detect(.data$play_text, regex("pass complete to", ignore_case = TRUE)) & - stringr::str_detect(.data$play_text, regex("for no gain", ignore_case = TRUE)) ~ 0, + .data$pass == 1 & stringr::str_detect(.data$cleaned_text, regex("pass complete to", ignore_case = TRUE)) & + stringr::str_detect(.data$cleaned_text, regex("for no gain", ignore_case = TRUE)) ~ 0, .data$pass == 1 & - stringr::str_detect(.data$play_text, regex("pass complete to", ignore_case = TRUE)) & - stringr::str_detect(.data$play_text, regex("for a loss of", ignore_case = TRUE)) ~ + stringr::str_detect(.data$cleaned_text, regex("pass complete to", ignore_case = TRUE)) & + stringr::str_detect(.data$cleaned_text, regex("for a loss of", ignore_case = TRUE)) ~ -1 * as.numeric(stringr::str_extract( - stringi::stri_extract_first_regex(.data$play_text, "(?<= for a loss of)[^,]+"), "\\d+" + stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for a loss of)[^,]+"), "\\d+" )), .data$pass == 1 & - stringr::str_detect(.data$play_text, regex("pass to", ignore_case = TRUE)) & - stringr::str_detect(.data$play_text, regex("for a loss of", ignore_case = TRUE)) ~ + stringr::str_detect(.data$cleaned_text, regex("pass to", ignore_case = TRUE)) & + stringr::str_detect(.data$cleaned_text, regex("for a loss of", ignore_case = TRUE)) ~ -1 * as.numeric(stringr::str_extract( - stringi::stri_extract_first_regex(.data$play_text, "(?<= for a loss of)[^,]+"), "\\d+" + stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for a loss of)[^,]+"), "\\d+" + )), + .data$pass == 1 & + stringr::str_detect(.data$cleaned_text, regex("pass complete to", ignore_case = TRUE)) & + stringr::str_detect(.data$cleaned_text, regex(" for \\d+ y\\w*ds? loss", ignore_case = TRUE)) ~ + -1 * as.numeric(stringr::str_extract( + stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for)[^,]+"), "\\d+" )), .data$pass == 1 & - stringr::str_detect(.data$play_text, regex("pass complete to", ignore_case = TRUE)) & - stringr::str_detect(.data$play_text, regex(" for \\d+ y\\w*ds?", ignore_case = TRUE)) ~ + stringr::str_detect(.data$cleaned_text, regex("pass complete to", ignore_case = TRUE)) & + stringr::str_detect(.data$cleaned_text, regex(" for \\d+ y\\w*ds?", ignore_case = TRUE)) ~ as.numeric(stringr::str_extract( - stringi::stri_extract_first_regex(.data$play_text, "(?<= for)[^,]+"), "\\d+" + stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for)[^,]+"), "\\d+" )), .data$pass == 1 & - stringr::str_detect(.data$play_text, regex("pass to", ignore_case = TRUE)) & - stringr::str_detect(.data$play_text, regex(" for \\d+ y\\w*ds?", ignore_case = TRUE)) ~ + stringr::str_detect(.data$cleaned_text, regex("pass to", ignore_case = TRUE)) & + stringr::str_detect(.data$cleaned_text, regex(" for \\d+ y\\w*ds?", ignore_case = TRUE)) ~ as.numeric(stringr::str_extract( - stringi::stri_extract_first_regex(.data$play_text, "(?<= for)[^,]+"), "\\d+" + stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for)[^,]+"), "\\d+" )), .data$pass == 1 & - stringr::str_detect(.data$play_text, regex("Yd pass", ignore_case = TRUE)) ~ + stringr::str_detect(.data$cleaned_text, regex("Yd pass", ignore_case = TRUE)) ~ as.numeric(stringr::str_extract( - stringi::stri_extract_first_regex(.data$play_text, "(\\d+)\\s+Yd\\s+pass"), "\\d+" + stringi::stri_extract_first_regex(.data$cleaned_text, "(\\d+)\\s+Yd\\s+pass"), "\\d+" )), .data$pass == 1 & - stringr::str_detect(.data$play_text, regex("pass complete to", ignore_case = TRUE)) ~ + stringr::str_detect(.data$cleaned_text, regex("pass complete to", ignore_case = TRUE)) ~ yards_gained, # 2024 has games that don't have yards in the PBP text but do have them in the yards_gained field. # 2025 has some plays list "PASSER pass" at the very end of the play_text .data$pass == 1 & - stringr::str_detect(.data$play_text, regex("pass \\(\\w", ignore_case = TRUE)) & - stringr::str_detect(.data$play_text, regex("^to ", ignore_case = FALSE)) ~ as.numeric(stringr::str_extract( - stringi::stri_extract_first_regex(.data$play_text, "(?<= for)[^,]+"), "\\d+" + stringr::str_detect(.data$cleaned_text, regex("pass \\(\\w", ignore_case = TRUE)) & + stringr::str_detect(.data$cleaned_text, regex("^to ", ignore_case = FALSE)) ~ as.numeric(stringr::str_extract( + stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for)[^,]+"), "\\d+" )), .data$pass == 1 & - stringr::str_detect(.data$play_text, regex("pass$", ignore_case = TRUE)) & - stringr::str_detect(.data$play_text, regex("^to ", ignore_case = FALSE)) ~ as.numeric(stringr::str_extract( - stringi::stri_extract_first_regex(.data$play_text, "(?<= for)[^,]+"), "\\d+" + stringr::str_detect(.data$cleaned_text, regex("pass$", ignore_case = TRUE)) & + stringr::str_detect(.data$cleaned_text, regex("^to ", ignore_case = FALSE)) ~ as.numeric(stringr::str_extract( + stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for)[^,]+"), "\\d+" )), # 2025 has some plays that have yards in the PBP but no listed passer. the format is the same though .data$pass == 1 & - stringr::str_detect(.data$play_text, regex("^to ", ignore_case = FALSE)) ~ as.numeric(stringr::str_extract( - stringi::stri_extract_first_regex(.data$play_text, "(?<= for)[^,]+"), "\\d+" + stringr::str_detect(.data$cleaned_text, regex("^to ", ignore_case = FALSE)) ~ as.numeric(stringr::str_extract( + stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for)[^,]+"), "\\d+" )), .data$pass == 1 & - stringr::str_detect(.data$play_text, regex("^to ", ignore_case = FALSE)) & - stringr::str_detect(.data$play_text, regex("for a loss of", ignore_case = TRUE)) ~ + stringr::str_detect(.data$cleaned_text, regex("^to ", ignore_case = FALSE)) & + stringr::str_detect(.data$cleaned_text, regex("for a loss of", ignore_case = TRUE)) ~ -1 * as.numeric(stringr::str_extract( - stringi::stri_extract_first_regex(.data$play_text, "(?<= for a loss of)[^,]+"), "\\d+" + stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for a loss of)[^,]+"), "\\d+" )), .data$pass == 1 & - stringr::str_detect(.data$play_text, regex("^to ", ignore_case = FALSE)) & - stringr::str_detect(.data$play_text, regex("for no gain", ignore_case = TRUE)) ~ 0, + stringr::str_detect(.data$cleaned_text, regex("^to ", ignore_case = FALSE)) & + stringr::str_detect(.data$cleaned_text, regex("for no gain", ignore_case = TRUE)) ~ 0, TRUE ~ NA_real_ ) ) diff --git a/R/utils.R b/R/utils.R index ba8704ba..aac9f4ca 100644 --- a/R/utils.R +++ b/R/utils.R @@ -254,7 +254,7 @@ validate_week <- function(week = NULL){ if(!is.null(week)){ checks <- c( num_check = is.numeric(week), - range_check = dplyr::between(as.numeric(week), 1, 15) + range_check = dplyr::between(as.numeric(week), 1, 16) ) if(!all(checks)){ cli::cli_abort(glue::glue("Enter valid {deparse(substitute(week))} 1-15\n(14 for seasons pre-playoff, i.e. 2014 or earlier)")) diff --git a/cfbfastR.Rproj b/cfbfastR.Rproj index 6daccaaa..270314b8 100644 --- a/cfbfastR.Rproj +++ b/cfbfastR.Rproj @@ -1,21 +1,21 @@ -Version: 1.0 - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX - -AutoAppendNewline: Yes -StripTrailingWhitespace: Yes - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source -PackageRoxygenize: rd,collate,namespace +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/cran-comments.md b/cran-comments.md index 3c92bb45..0afd6fd7 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -4,6 +4,8 @@ This is a minor release that: * Fixes a bug in `cfbd_pbp_data()` where play-by-play data for some games were not as expected. * Improves `add_yardage()` where plays with missing yardage values were not being handled correctly. +* Fixes a small bug in an underlying helper function, `validate_week()`, used throughout the package. +* Updates documentation to reflect a handful of default parameter changes made. ## R CMD check results diff --git a/man/cfbd_game_info.Rd b/man/cfbd_game_info.Rd index d3a58456..fb612b90 100644 --- a/man/cfbd_game_info.Rd +++ b/man/cfbd_game_info.Rd @@ -7,7 +7,7 @@ cfbd_game_info( year, week = NULL, - season_type = "regular", + season_type = "both", team = NULL, home_team = NULL, away_team = NULL, @@ -22,7 +22,7 @@ cfbd_game_info( \item{week}{(\emph{Integer} optional): Week - values from 1-15, 1-14 for seasons pre-playoff (i.e. 2013 or earlier)} -\item{season_type}{(\emph{String} default regular): Select Season Type: regular, postseason, both, allstar, spring_regular, spring_postseason} +\item{season_type}{(\emph{String} default both): Select Season Type: regular, postseason, both, allstar, spring_regular, spring_postseason} \item{team}{(\emph{String} optional): D-I Team} diff --git a/man/cfbd_play_stats_player.Rd b/man/cfbd_play_stats_player.Rd index 21f1b79f..ae8bd6d0 100644 --- a/man/cfbd_play_stats_player.Rd +++ b/man/cfbd_play_stats_player.Rd @@ -11,7 +11,7 @@ cfbd_play_stats_player( game_id = NULL, athlete_id = NULL, stat_type_id = NULL, - season_type = "regular" + season_type = "both" ) } \arguments{ @@ -30,7 +30,7 @@ Can be found using the \code{\link[=cfbd_player_info]{cfbd_player_info()}} funct \item{stat_type_id}{(\emph{Integer} optional): Stat Type ID filter for querying a single stat type Can be found using the \code{\link[=cfbd_play_stats_types]{cfbd_play_stats_types()}} function} -\item{season_type}{(\emph{String} default regular): Season type - regular, postseason, both, allstar, spring_regular, spring_postseason} +\item{season_type}{(\emph{String} default both): Season type - regular, postseason, both, allstar, spring_regular, spring_postseason} } \value{ \code{\link[=cfbd_play_stats_player]{cfbd_play_stats_player()}} - A data frame with 54 variables: diff --git a/man/helpers_pbp.Rd b/man/helpers_pbp.Rd index a75cf340..cd52613d 100644 --- a/man/helpers_pbp.Rd +++ b/man/helpers_pbp.Rd @@ -8,6 +8,7 @@ \alias{clean_drive_dat} \alias{prep_epa_df_after} \alias{clean_drive_info} +\alias{clean_play_text} \alias{add_player_cols} \alias{add_yardage} \alias{clean_pbp_dat} @@ -22,6 +23,8 @@ prep_epa_df_after(dat) clean_drive_info(drive_df) +clean_play_text(play_df) + add_player_cols(pbp) add_yardage(play_df) @@ -175,6 +178,11 @@ The original \code{drive_df} with the following columns appended to it: \item{\code{scoring}: Logical flag for if drive was a scoring drive updated}{.} } +The original \code{play_df} with the following columns appended to it: +\describe{ +\item{\code{cleaned_text}: \code{play_text} with miscellanous items removed: pass depth/location, clock timestamps, No Huddle/Shotgun status, etc.}{.} +} + The original \code{pbp} with the following columns appended to it: \describe{ \item{\code{rusher_player_name}}{.} @@ -331,6 +339,11 @@ Cleans CFB (D-I) Drive-By-Drive Data to create \code{pts_drive} column. Requires \item{\code{game_id}: Unique game identifier}{.} } +Cleans CFB play-by-play text to be compliant with existing play-by-play parsing. Generally not recommended for standalone use. This method exists due to ESPN PBP changes midway through the 2025 season. +\describe{ +\item{\code{play_text}: Returned as \code{play_text}}{.} +} + Cleans CFB (D-I) player Data to create player name columns. Requires the following columns be present: \describe{ \item{\code{rush}}{.} diff --git a/tests/testthat/test-cfbd_pbp_data.R b/tests/testthat/test-cfbd_pbp_data.R index 8780a161..4289b776 100644 --- a/tests/testthat/test-cfbd_pbp_data.R +++ b/tests/testthat/test-cfbd_pbp_data.R @@ -40,3 +40,37 @@ test_that("base case 2023 pbp are already properly handled", { testthat::expect_equal(sum(completions$same_same), nrow(completions)) }) + + +patrick::with_parameters_test_that( + "[2025 new PBP] Yardage is successfully calculated", + { + skip_on_cran() + plays = cfbd_pbp_data( + year = year, + season_type = season_type, + week = week, + team = team, + epa_wpa = T, + ) + + target_plays = plays[which(plays$play_text == play_text), ] + testthat::expect_equal(nrow(target_plays), 1) + testthat::expect_equal(target_plays[1, yards_field][[1]], expected_yards) + }, + patrick::cases( + "401754571-yds_receiving-1" = list(year = 2025, season_type = "regular", week = 9, team = "Georgia Tech", play_text = "(14:46) Shotgun #10 H.King pass complete short right to #1 J.Haynes caught at GT27, for 15 yards to the GT40 (#13 G.Bryant III), 1ST DOWN", yards_field = "yds_receiving", expected_yards = 15), + "401754571-yds_receiving-2" = list(year = 2025, season_type = "regular", week = 9, team = "Georgia Tech", play_text = "(14:17) No Huddle-Shotgun #10 H.King pass complete short right to #4 I.Canion caught at GT46, for 2 yards to the GT42 fumbled by #4 I.Canion at GT46 forced by #16 C.Peal recovered by SU #8 D.Reese at GT42, End Of Play", yards_field = "yds_receiving", expected_yards = 2), + "401754571-yds_receiving-3" = list(year = 2025, season_type = "regular", week = 9, team = "Georgia Tech", play_text = "(06:15) Shotgun #10 H.King pass incomplete short left to #17 J.Beetham thrown to SU01", yards_field = "yds_receiving", expected_yards = NA_integer_), + "401754571-yds_rushed-1" = list(year = 2025, season_type = "regular", week = 9, team = "Georgia Tech", play_text = "(13:31) Shotgun #10 H.King rush right for 7 yards gain to the SU30, out of bounds at SU30, 1ST DOWN", yards_field = "yds_rushed", expected_yards = 7), + "401754571-yds_rushed-2" = list(year = 2025, season_type = "regular", week = 9, team = "Georgia Tech", play_text = "(07:16) No Huddle-Shotgun #1 J.Haynes rush left for 4 yards loss to the SU35 (#6 J.Heard Jr.; #3 K.Singleton)", yards_field = "yds_rushed", expected_yards = -4), + "401754571-yds_receiving-4" = list(year = 2025, season_type = "regular", week = 9, team = "Syracuse", play_text = "(15:00) No Huddle-Shotgun #10 R.Collins pass complete deep right to #2 J.Cook II caught at GT37, for 41 yards to the GT34 (#6 R.Shelley), 1ST DOWN", yards_field = "yds_receiving", expected_yards = 41), + "401754571-yds_receiving-5" = list(year = 2025, season_type = "regular", week = 9, team = "Syracuse", play_text = "(15:00) No Huddle-Shotgun #10 R.Collins pass complete deep right to #2 J.Cook II caught at GT37, for 41 yards to the GT34 (#6 R.Shelley), 1ST DOWN", yards_field = "yds_receiving", expected_yards = 41), + "401754571-yds_receiving-6" = list(year = 2025, season_type = "regular", week = 9, team = "Syracuse", play_text = "(09:25) No Huddle-Shotgun #10 R.Collins pass complete short left to #2 J.Cook II caught at SU31, for 4 yards to the SU34 (#2 E.Lightsey)", yards_field = "yds_receiving", expected_yards = 4), + "401754571-yds_receiving-7" = list(year = 2025, season_type = "regular", week = 9, team = "Georgia Tech", play_text = "(05:49) Shotgun #10 H.King pass complete short middle to #85 J.Allen caught at SU33, for 19 yards to the SU09 (#0 B.Long Jr.)", yards_field = "yds_receiving", expected_yards = 19), + "401777353-yds_receiving-1" = list(year = 2025, season_type = "regular", week = 15, team = "Ohio State", play_text = "(07:37) Shotgun #10 J.Sayin pass complete short left to #4 J.Smith caught at OSU29, for 5 yards loss to the OSU32 (#12 D.Boykin)", yards_field = "yds_receiving", expected_yards = -5), + "401778302-yds_receiving-1" = list(year = 2025, season_type = "postseason", week = 1, team = "Boise State", play_text = "Shotgun #14 M.Cutforth pass complete deep middle to #3 L.Caples caught at WAS06, for 22 yards to the WAS06 (#18 R.Dillard-Allen), 1ST DOWN", yards_field = "yds_receiving", expected_yards = 22), + "401634169-base-case-old-pbp-yds_receiving" = list(year = 2024, season_type = "regular", week = 1, team = "Purdue", play_text = "Hudson Card pass complete to Drew Biber for 2 yds fumbled, forced by Maddix Blackwell, recovered by INST Garret Ollendieck G. Ollendieck return for 0 yds", yards_field = "yds_receiving", expected_yards = 2), + "401634169-base-case-old-pbp-yds_rushed" = list(year = 2024, season_type = "regular", week = 1, team = "Purdue", play_text = "Devin Mockobee run for 11 yds to the INST 21 for a 1ST down", yards_field = "yds_rushed", expected_yards = 11) + ) +) diff --git a/tests/testthat/test-cfbd_play_stats_types.R b/tests/testthat/test-cfbd_play_stats_types.R index 0584fd29..a3eb157f 100644 --- a/tests/testthat/test-cfbd_play_stats_types.R +++ b/tests/testthat/test-cfbd_play_stats_types.R @@ -5,7 +5,7 @@ cols <- c("play_stat_type_id", "name") test_that("CFB Play Stats Types", { skip_on_cran() x <- cfbd_play_stats_types() - expect_equal(nrow(x), 25) + expect_equal(nrow(x), 26) expect_equal(ncol(x), 2) expect_setequal(colnames(x), cols) expect_s3_class(x, "data.frame") diff --git a/tests/testthat/test-cfbd_play_types.R b/tests/testthat/test-cfbd_play_types.R index 6b14f933..49d19f45 100644 --- a/tests/testthat/test-cfbd_play_types.R +++ b/tests/testthat/test-cfbd_play_types.R @@ -5,7 +5,7 @@ cols <- c("play_type_id", "text", "abbreviation") test_that("CFB Play Types", { skip_on_cran() x <- cfbd_play_types() - expect_equal(nrow(x), 48) + expect_equal(nrow(x), 49) expect_equal(ncol(x), 3) expect_setequal(colnames(x), cols) expect_s3_class(x, "data.frame")