sportsdataverse · saiemgilani · Jan 12, 2026 · Sep 3, 2025 · Sep 4, 2025 · Sep 4, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: cfbfastR
 Title: Access College Football Play by Play Data
-Version: 2.1.0
+Version: 2.2.0
 Authors@R: c(
     person("Saiem", "Gilani", , "[email protected]", role = c("cre", "aut")),
     person("Akshay", "Easwaran", , "[email protected]", role = "aut"),
@@ -58,6 +58,7 @@ Suggests:
     DBI,
     ggplot2,
     ggrepel,
+    patrick,
     qs (>= 0.25.1),
     rmarkdown,
     RSQLite,

diff --git a/NAMESPACE b/NAMESPACE
@@ -67,6 +67,7 @@ export(cfbd_venues)
 export(clean_drive_dat)
 export(clean_drive_info)
 export(clean_pbp_dat)
+export(clean_play_text)
 export(create_epa)
 export(create_wpa_naive)
 export(epa_fg_probs)
@@ -148,6 +149,7 @@ importFrom(stringr,str_detect)
 importFrom(stringr,str_extract)
 importFrom(stringr,str_length)
 importFrom(stringr,str_remove)
+importFrom(stringr,str_replace)
 importFrom(stringr,str_replace_all)
 importFrom(stringr,str_sub)
 importFrom(stringr,str_trim)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+# **cfbfastR v2.2.0**
+
+* Fixes a bug in `validate_week()` utility function where some inputs were not being handled correctly (i.e. week 16). Fixes trickle down to `cfbd_pbp_data()` and other functions.
+* Default value for `season_type` parameter in `cfbd_game_info()` and `cfbd_play_stats_player()` function changed from "regular" to "both" to align with other functions in the package.
+
 # **cfbfastR v2.1.0**
 
 * Fixes a bug in `cfbd_pbp_data()` where play-by-play data for some games were not as expected.

diff --git a/R/cfbd_games.R b/R/cfbd_games.R
@@ -67,7 +67,7 @@ NULL
 #' **Get results information from games.**
 #' @param year (*Integer* required): Year, 4 digit format(*YYYY*)
 #' @param week (*Integer* optional): Week - values from 1-15, 1-14 for seasons pre-playoff (i.e. 2013 or earlier)
-#' @param season_type (*String* default regular): Select Season Type: regular, postseason, both, allstar, spring_regular, spring_postseason
+#' @param season_type (*String* default both): Select Season Type: regular, postseason, both, allstar, spring_regular, spring_postseason
 #' @param team (*String* optional): D-I Team
 #' @param home_team (*String* optional): Home D-I Team
 #' @param away_team (*String* optional): Away D-I Team
@@ -129,7 +129,7 @@ NULL
 
 cfbd_game_info <- function(year,
                            week = NULL,
-                           season_type = "regular",
+                           season_type = "both",
                            team = NULL,
                            home_team = NULL,
                            away_team = NULL,

diff --git a/R/cfbd_pbp_data.R b/R/cfbd_pbp_data.R
@@ -595,6 +595,7 @@ cfbd_pbp_data <- function(year,
           return(NULL)
         }
         game_plays <- game_plays %>%
+          clean_play_text() %>%
           penalty_detection() %>%
           add_play_counts() %>%
           clean_pbp_dat() %>%
@@ -801,8 +802,6 @@ cfbd_pbp_data <- function(year,
   return(play_df)
 }
 
-
-
 #' **Series of functions to help clean the play-by-play data for analysis**
 #' @name helpers_pbp
 NULL
@@ -2146,3 +2145,35 @@ clean_drive_info <- function(drive_df) {
 
   return(clean_drive)
 }
+
+
+#' @rdname helpers_pbp
+#'
+#' @param play_df (*data.frame* required) Plays dataframe pulled from API via the `cfbd_play()` or within the `cfbd_pbp_data()` function.
+#' @details Cleans CFB play-by-play text to be compliant with existing play-by-play parsing. Generally not recommended for standalone use. This method exists due to ESPN PBP changes midway through the 2025 season.
+#' \describe{
+#' \item{`play_text`: Returned as `play_text`}{.}
+#' }
+#' @return The original `play_df` with the following columns appended to it:
+#' \describe{
+#' \item{`cleaned_text`: `play_text` with miscellanous items removed: pass depth/location, clock timestamps, No Huddle/Shotgun status, etc.}{.}
+#' }
+#' @keywords internal
+#' @importFrom rlang .data
+#' @importFrom stringr str_replace
+#' @importFrom dplyr mutate
+#' @export
+#'
+
+clean_play_text <- function(play_df) {
+  play_df <- play_df %>%
+    dplyr::mutate(
+      cleaned_text = stringr::str_replace(.data$play_text, "^\\(\\d{1,2}:\\d{2}\\)\\s+", ""),
+      cleaned_text = stringr::str_replace(.data$cleaned_text, "\\s(short|deep)\\s", " "),
+      cleaned_text = stringr::str_replace(.data$cleaned_text, "\\s(left|middle|right)\\s", " "),
+      cleaned_text = stringr::str_replace(.data$cleaned_text, "\\s*No Huddle-Shotgun\\s+", ""),
+      cleaned_text = stringr::str_replace(.data$cleaned_text, "No Huddle-?", ""),
+      cleaned_text = stringr::str_replace(.data$cleaned_text, "\\s*Shotgun\\s+", ""),
+      cleaned_text = stringr::str_replace(.data$cleaned_text, "\\s+", " "),
+    )
+}
diff --git a/R/cfbd_play.R b/R/cfbd_play.R
@@ -189,7 +189,7 @@ cfbd_plays <- function(year = 2020,
 #' Can be found using the [cfbd_player_info()] function.
 #' @param stat_type_id (*Integer* optional): Stat Type ID filter for querying a single stat type
 #' Can be found using the [cfbd_play_stats_types()] function
-#' @param season_type (*String* default regular): Season type - regular, postseason, both, allstar, spring_regular, spring_postseason
+#' @param season_type (*String* default both): Season type - regular, postseason, both, allstar, spring_regular, spring_postseason
 #' @return [cfbd_play_stats_player()] - A data frame with 54 variables:
 #' \describe{
 #'   \item{`play_id`: character.}{Referencing play id.}
@@ -279,7 +279,7 @@ cfbd_play_stats_player <- function(year = NULL,
                                    game_id = NULL,
                                    athlete_id = NULL,
                                    stat_type_id = NULL,
-                                   season_type = "regular") {
+                                   season_type = "both") {
 
   # Validation ----
   validate_api_key()

diff --git a/R/helper_pbp_add_yardage.R b/R/helper_pbp_add_yardage.R
@@ -52,91 +52,133 @@ add_yardage <- function(play_df) {
   play_df$yds_sacked <- NA_real_
   play_df$yds_penalty <- NA_real_
 
+  if (!("cleaned_text" %in% colnames(play_df))) {
+    play_df <- play_df %>%
+      clean_play_text()
+  }
+
   play_df <- play_df %>%
     dplyr::mutate(
       yds_rushed = dplyr::case_when(
-        .data$rush == 1 & stringr::str_detect(.data$play_text, regex("run for no gain", ignore_case = TRUE)) ~ 0,
+        .data$rush == 1 & stringr::str_detect(.data$cleaned_text, regex("run for no gain", ignore_case = TRUE)) ~ 0,
         .data$rush == 1 &
-          stringr::str_detect(.data$play_text, regex("run for a loss of", ignore_case = TRUE)) ~
+          stringr::str_detect(.data$cleaned_text, regex("run for a loss of", ignore_case = TRUE)) ~
         -1 * as.numeric(stringr::str_extract(
-          stringi::stri_extract_first_regex(.data$play_text, "(?<= run for a loss of)[^,]+"), "\\d+"
+          stringi::stri_extract_first_regex(.data$cleaned_text, "(?<= run for a loss of)[^,]+"), "\\d+"
         )),
         .data$rush == 1 &
-          stringr::str_detect(.data$play_text, regex("run for", ignore_case = TRUE)) ~
+          stringr::str_detect(.data$cleaned_text, regex("run for \\d+ y.*ds? loss", ignore_case = TRUE)) ~
+          -1 * as.numeric(stringr::str_extract(
+            stringi::stri_extract_first_regex(.data$cleaned_text, "(?<= run for)[^,]+"), "\\d+"
+          )),
+        .data$rush == 1 &
+          stringr::str_detect(.data$cleaned_text, regex("run for", ignore_case = TRUE)) ~
         as.numeric(stringr::str_extract(
-          stringi::stri_extract_first_regex(.data$play_text, "(?<= run for)[^,]+"), "\\d+"
+          stringi::stri_extract_first_regex(.data$cleaned_text, "(?<= run for)[^,]+"), "\\d+"
         )),
         .data$rush == 1 &
-          stringr::str_detect(.data$play_text, regex("yd run", ignore_case = TRUE)) ~
+          stringr::str_detect(.data$cleaned_text, regex("yd run", ignore_case = TRUE)) ~
         as.numeric(
           stringr::str_remove(
-            stringr::str_extract(.data$play_text, regex("\\d{0,2} Yd Run", ignore_case = TRUE)),
+            stringr::str_extract(.data$cleaned_text, regex("\\d{0,2} yd run", ignore_case = TRUE)),
             regex("yd run", ignore_case = TRUE)
           )
         ),
+
+        # same regexes with rush instead
+        .data$rush == 1 & stringr::str_detect(.data$cleaned_text, regex("rush for no gain", ignore_case = TRUE)) ~ 0,
+        .data$rush == 1 &
+          stringr::str_detect(.data$cleaned_text, regex("rush for a loss of", ignore_case = TRUE)) ~
+        -1 * as.numeric(stringr::str_extract(
+          stringi::stri_extract_first_regex(.data$cleaned_text, "(?<= rush for a loss of)[^,]+"), "\\d+"
+        )),
+        .data$rush == 1 &
+          stringr::str_detect(.data$cleaned_text, regex("rush for \\d+ y.*ds? loss", ignore_case = TRUE)) ~
+          -1 * as.numeric(stringr::str_extract(
+            stringi::stri_extract_first_regex(.data$cleaned_text, "(?<= rush for)[^,]+"), "\\d+"
+          )),
+        .data$rush == 1 &
+          stringr::str_detect(.data$cleaned_text, regex("rush for", ignore_case = TRUE)) ~
+        as.numeric(stringr::str_extract(
+          stringi::stri_extract_first_regex(.data$cleaned_text, "(?<= rush for)[^,]+"), "\\d+"
+        )),
+        .data$rush == 1 &
+          stringr::str_detect(.data$cleaned_text, regex("yd rush", ignore_case = TRUE)) ~
+        as.numeric(
+          stringr::str_remove(
+            stringr::str_extract(.data$cleaned_text, regex("\\d{0,2} yd rush", ignore_case = TRUE)),
+            regex("yd rush", ignore_case = TRUE)
+          )
+        ),
         TRUE ~ NA_real_
       ),
       yds_receiving = dplyr::case_when(
-        .data$pass == 1 & stringr::str_detect(.data$play_text, regex("pass complete to", ignore_case = TRUE)) &
-          stringr::str_detect(.data$play_text, regex("for no gain", ignore_case = TRUE)) ~ 0,
+        .data$pass == 1 & stringr::str_detect(.data$cleaned_text, regex("pass complete to", ignore_case = TRUE)) &
+          stringr::str_detect(.data$cleaned_text, regex("for no gain", ignore_case = TRUE)) ~ 0,
         .data$pass == 1 &
-          stringr::str_detect(.data$play_text, regex("pass complete to", ignore_case = TRUE)) &
-          stringr::str_detect(.data$play_text, regex("for a loss of", ignore_case = TRUE)) ~
+          stringr::str_detect(.data$cleaned_text, regex("pass complete to", ignore_case = TRUE)) &
+          stringr::str_detect(.data$cleaned_text, regex("for a loss of", ignore_case = TRUE)) ~
         -1 * as.numeric(stringr::str_extract(
-          stringi::stri_extract_first_regex(.data$play_text, "(?<= for a loss of)[^,]+"), "\\d+"
+          stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for a loss of)[^,]+"), "\\d+"
         )),
         .data$pass == 1 &
-          stringr::str_detect(.data$play_text, regex("pass to", ignore_case = TRUE)) &
-          stringr::str_detect(.data$play_text, regex("for a loss of", ignore_case = TRUE)) ~
+          stringr::str_detect(.data$cleaned_text, regex("pass to", ignore_case = TRUE)) &
+          stringr::str_detect(.data$cleaned_text, regex("for a loss of", ignore_case = TRUE)) ~
           -1 * as.numeric(stringr::str_extract(
-            stringi::stri_extract_first_regex(.data$play_text, "(?<= for a loss of)[^,]+"), "\\d+"
+            stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for a loss of)[^,]+"), "\\d+"
+          )),
+        .data$pass == 1 &
+          stringr::str_detect(.data$cleaned_text, regex("pass complete to", ignore_case = TRUE)) &
+          stringr::str_detect(.data$cleaned_text, regex(" for \\d+ y\\w*ds? loss", ignore_case = TRUE)) ~
+          -1 * as.numeric(stringr::str_extract(
+            stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for)[^,]+"), "\\d+"
           )),
         .data$pass == 1 &
-          stringr::str_detect(.data$play_text, regex("pass complete to", ignore_case = TRUE)) &
-          stringr::str_detect(.data$play_text, regex(" for \\d+ y\\w*ds?", ignore_case = TRUE)) ~
+          stringr::str_detect(.data$cleaned_text, regex("pass complete to", ignore_case = TRUE)) &
+          stringr::str_detect(.data$cleaned_text, regex(" for \\d+ y\\w*ds?", ignore_case = TRUE)) ~
         as.numeric(stringr::str_extract(
-          stringi::stri_extract_first_regex(.data$play_text, "(?<= for)[^,]+"), "\\d+"
+          stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for)[^,]+"), "\\d+"
         )),
         .data$pass == 1 &
-          stringr::str_detect(.data$play_text, regex("pass to", ignore_case = TRUE)) &
-          stringr::str_detect(.data$play_text, regex(" for \\d+ y\\w*ds?", ignore_case = TRUE)) ~
+          stringr::str_detect(.data$cleaned_text, regex("pass to", ignore_case = TRUE)) &
+          stringr::str_detect(.data$cleaned_text, regex(" for \\d+ y\\w*ds?", ignore_case = TRUE)) ~
           as.numeric(stringr::str_extract(
-            stringi::stri_extract_first_regex(.data$play_text, "(?<= for)[^,]+"), "\\d+"
+            stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for)[^,]+"), "\\d+"
           )),
         .data$pass == 1 &
-          stringr::str_detect(.data$play_text, regex("Yd pass", ignore_case = TRUE)) ~
+          stringr::str_detect(.data$cleaned_text, regex("Yd pass", ignore_case = TRUE)) ~
           as.numeric(stringr::str_extract(
-            stringi::stri_extract_first_regex(.data$play_text, "(\\d+)\\s+Yd\\s+pass"), "\\d+"
+            stringi::stri_extract_first_regex(.data$cleaned_text, "(\\d+)\\s+Yd\\s+pass"), "\\d+"
           )),
         .data$pass == 1 &
-          stringr::str_detect(.data$play_text, regex("pass complete to", ignore_case = TRUE)) ~
+          stringr::str_detect(.data$cleaned_text, regex("pass complete to", ignore_case = TRUE)) ~
           yards_gained, # 2024 has games that don't have yards in the PBP text but do have them in the yards_gained field.
 
         # 2025 has some plays list "PASSER pass" at the very end of the play_text
         .data$pass == 1 &
-          stringr::str_detect(.data$play_text, regex("pass \\(\\w", ignore_case = TRUE)) &
-          stringr::str_detect(.data$play_text, regex("^to ", ignore_case = FALSE)) ~ as.numeric(stringr::str_extract(
-            stringi::stri_extract_first_regex(.data$play_text, "(?<= for)[^,]+"), "\\d+"
+          stringr::str_detect(.data$cleaned_text, regex("pass \\(\\w", ignore_case = TRUE)) &
+          stringr::str_detect(.data$cleaned_text, regex("^to ", ignore_case = FALSE)) ~ as.numeric(stringr::str_extract(
+            stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for)[^,]+"), "\\d+"
           )),
         .data$pass == 1 &
-          stringr::str_detect(.data$play_text, regex("pass$", ignore_case = TRUE)) &
-          stringr::str_detect(.data$play_text, regex("^to ", ignore_case = FALSE)) ~ as.numeric(stringr::str_extract(
-            stringi::stri_extract_first_regex(.data$play_text, "(?<= for)[^,]+"), "\\d+"
+          stringr::str_detect(.data$cleaned_text, regex("pass$", ignore_case = TRUE)) &
+          stringr::str_detect(.data$cleaned_text, regex("^to ", ignore_case = FALSE)) ~ as.numeric(stringr::str_extract(
+            stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for)[^,]+"), "\\d+"
           )),
         # 2025 has some plays that have yards in the PBP but no listed passer. the format is the same though
         .data$pass == 1 &
-          stringr::str_detect(.data$play_text, regex("^to ", ignore_case = FALSE)) ~ as.numeric(stringr::str_extract(
-            stringi::stri_extract_first_regex(.data$play_text, "(?<= for)[^,]+"), "\\d+"
+          stringr::str_detect(.data$cleaned_text, regex("^to ", ignore_case = FALSE)) ~ as.numeric(stringr::str_extract(
+            stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for)[^,]+"), "\\d+"
           )),
         .data$pass == 1 &
-          stringr::str_detect(.data$play_text, regex("^to ", ignore_case = FALSE)) &
-          stringr::str_detect(.data$play_text, regex("for a loss of", ignore_case = TRUE)) ~
+          stringr::str_detect(.data$cleaned_text, regex("^to ", ignore_case = FALSE)) &
+          stringr::str_detect(.data$cleaned_text, regex("for a loss of", ignore_case = TRUE)) ~
           -1 * as.numeric(stringr::str_extract(
-            stringi::stri_extract_first_regex(.data$play_text, "(?<= for a loss of)[^,]+"), "\\d+"
+            stringi::stri_extract_first_regex(.data$cleaned_text, "(?<=[\\s,]for a loss of)[^,]+"), "\\d+"
           )),
         .data$pass == 1 &
-          stringr::str_detect(.data$play_text, regex("^to ", ignore_case = FALSE)) &
-          stringr::str_detect(.data$play_text, regex("for no gain", ignore_case = TRUE)) ~ 0,
+          stringr::str_detect(.data$cleaned_text, regex("^to ", ignore_case = FALSE)) &
+          stringr::str_detect(.data$cleaned_text, regex("for no gain", ignore_case = TRUE)) ~ 0,
         TRUE ~ NA_real_
       )
     )

diff --git a/R/utils.R b/R/utils.R
@@ -254,7 +254,7 @@ validate_week <- function(week = NULL){
   if(!is.null(week)){
     checks <- c(
       num_check = is.numeric(week),
-      range_check = dplyr::between(as.numeric(week), 1, 15)
+      range_check = dplyr::between(as.numeric(week), 1, 16)
     )
     if(!all(checks)){
       cli::cli_abort(glue::glue("Enter valid {deparse(substitute(week))} 1-15\n(14 for seasons pre-playoff, i.e. 2014 or earlier)"))

diff --git a/cfbfastR.Rproj b/cfbfastR.Rproj
@@ -1,21 +1,21 @@
-Version: 1.0
-
-RestoreWorkspace: Default
-SaveWorkspace: Default
-AlwaysSaveHistory: Default
-
-EnableCodeIndexing: Yes
-UseSpacesForTab: Yes
-NumSpacesForTab: 2
-Encoding: UTF-8
-
-RnwWeave: Sweave
-LaTeX: pdfLaTeX
-
-AutoAppendNewline: Yes
-StripTrailingWhitespace: Yes
-
-BuildType: Package
-PackageUseDevtools: Yes
-PackageInstallArgs: --no-multiarch --with-keep.source
-PackageRoxygenize: rd,collate,namespace
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
+PackageRoxygenize: rd,collate,namespace
diff --git a/cran-comments.md b/cran-comments.md
@@ -4,6 +4,8 @@ This is a minor release that:
 
 * Fixes a bug in `cfbd_pbp_data()` where play-by-play data for some games were not as expected.
 * Improves `add_yardage()` where plays with missing yardage values were not being handled correctly.
+* Fixes a small bug in an underlying helper function, `validate_week()`, used throughout the package.
+* Updates documentation to reflect a handful of default parameter changes made.
 
 
 ## R CMD check results

diff --git a/man/cfbd_game_info.Rd b/man/cfbd_game_info.Rd
diff --git a/man/cfbd_play_stats_player.Rd b/man/cfbd_play_stats_player.Rd