diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index c8841c356..e7e5fa890 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -14,6 +14,11 @@ Please:
       (backwards-incompatible changes to the documented interface) are noted.
       Collect the changes under the next release number (e.g. if you are on
       1.7.2, then write your changes under the 1.8 heading).
+- [ ] Styling and documentation checks. Make a PR comment with:
+  - `/style` to check the style and fix any issues.
+  - `/document` to check the package documentation and fix any issues.
+  - `/preview-docs` to preview the docs.
+  - See Actions GitHub tab to track progress of these commands.
 - See [DEVELOPMENT.md](DEVELOPMENT.md) for more information on the development
   process.
 
diff --git a/.github/workflows/doc-preview.yaml b/.github/workflows/doc-preview.yaml
deleted file mode 100644
index 068184225..000000000
--- a/.github/workflows/doc-preview.yaml
+++ /dev/null
@@ -1,65 +0,0 @@
-on:
-  issue_comment:
-    types: [created]
-
-name: doc-preview.yaml
-
-permissions: read-all
-
-jobs:
-  preview:
-    if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'CONTRIBUTOR' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/preview-docs') }}
-
-    runs-on: ubuntu-latest
-    permissions:
-      # Needed to write a comment on the PR
-      pull-requests: write
-      # Needed to read the PR branch
-      contents: read
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          # Checkout the PR branch
-          ref: refs/pull/${{ github.event.issue.number }}/head
-
-      - uses: r-lib/actions/setup-pandoc@v2
-
-      - uses: r-lib/actions/setup-r@v2
-        with:
-          use-public-rspm: true
-
-      - uses: r-lib/actions/setup-r-dependencies@v2
-        with:
-          extra-packages: any::pkgdown, local::.
-          needs: website
-
-      - name: Build site
-        run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
-        shell: Rscript {0}
-
-      - name: Deploy to Netlify
-        uses: nwtgck/actions-netlify@v3.0
-        with:
-          # Standard config
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-          deploy-message: "Deploy from GitHub Actions"
-          # 'docs/' is the default directory for pkgdown::build_site()
-          # we add 'dev' because _pkgdown.yml has 'development: mode: devel'
-          publish-dir: './docs/dev'
-          # Development deploys only
-          production-deploy: false
-          # Enable pull request comment (default)
-          enable-pull-request-comment: true
-          # Overwrite the pull request comment with updated link (default)
-          overwrites-pull-request-comment: true
-          # Don't deploy to GitHub
-          enable-github-deployment: false
-          # Don't update the status of the commit
-          enable-commit-status: false
-          # Don't comment on the commit
-          enable-commit-comment: false
-        env:
-          # Netlify credentials (currently from Dmitry's account)
-          NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}
-          NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }}
-        timeout-minutes: 1
diff --git a/.github/workflows/document.yaml b/.github/workflows/document.yaml
deleted file mode 100644
index bfc3c43da..000000000
--- a/.github/workflows/document.yaml
+++ /dev/null
@@ -1,58 +0,0 @@
-# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
-# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
-#
-# Modifications:
-# - devtools::build_readme()
-# - workflow_dispatch added to allow manual triggering of the workflow
-# - trigger branches changed
-# - API key secrets.DELPHI_GITHUB_ACTIONS_EPIDATA_API_KEY
-on:
-  push:
-    paths: ["R/**", "README.Rmd"]
-  workflow_dispatch:
-
-name: Document
-
-jobs:
-  document:
-    runs-on: ubuntu-latest
-    env:
-      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
-      DELPHI_EPIDATA_KEY: ${{ secrets.DELPHI_GITHUB_ACTIONS_EPIDATA_API_KEY }}
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-
-      - name: Setup R
-        uses: r-lib/actions/setup-r@v2
-        with:
-          use-public-rspm: true
-
-      - name: Install dependencies
-        uses: r-lib/actions/setup-r-dependencies@v2
-        with:
-          extra-packages: |
-            any::devtools
-            any::roxygen2
-          needs: |
-            devtools
-            roxygen2
-
-      - name: Document
-        run: roxygen2::roxygenise()
-        shell: Rscript {0}
-
-      - name: Build README.md from README.Rmd
-        run: Rscript -e 'if (file.exists("README.Rmd")) devtools::build_readme()'
-
-      - name: Commit and push changes
-        run: |
-          git config --local user.name "$GITHUB_ACTOR"
-          git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
-          git add README.md
-          git add man/\* NAMESPACE DESCRIPTION
-          git commit -m "docs: document (GHA)" || echo "No changes to commit"
-          git pull --rebase
-          git push origin
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
index 21d030c94..78eae465f 100644
--- a/.github/workflows/lint.yaml
+++ b/.github/workflows/lint.yaml
@@ -1,10 +1,5 @@
 # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
-#
-# Modifications:
-#   * workflow_dispatch added to allow manual triggering of the workflow
-#   * trigger branches changed
-#   * API key secrets.DELPHI_GITHUB_ACTIONS_EPIDATA_API_KEY
 on:
   push:
     branches: [main, dev]
@@ -12,16 +7,17 @@ on:
     branches: [main, dev]
   workflow_dispatch:
 
-name: Lint
+name: lint.yaml
+
+permissions: read-all
 
 jobs:
   lint:
     runs-on: ubuntu-latest
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
-      DELPHI_EPIDATA_KEY: ${{ secrets.DELPHI_GITHUB_ACTIONS_EPIDATA_API_KEY }}
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-r@v2
         with:
diff --git a/.github/workflows/pr-commands.yaml b/.github/workflows/pr-commands.yaml
new file mode 100644
index 000000000..6cf91e878
--- /dev/null
+++ b/.github/workflows/pr-commands.yaml
@@ -0,0 +1,149 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+# Modifications:
+# - Allow more roles to trigger each PR command
+# - Document builds README.md from README.Rmd with devtools::build_readme()
+# - Include a doc-preview command (uses Netlify to preview the docs)
+on:
+  issue_comment:
+    types: [created]
+
+name: pr-commands.yaml
+
+permissions: read-all
+
+jobs:
+  document:
+    if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'CONTRIBUTOR' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }}
+    name: document
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    permissions:
+      contents: write
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/pr-fetch@v2
+        with:
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::roxygen2
+          needs: pr-document
+
+      - name: Document
+        run: roxygen2::roxygenise()
+        shell: Rscript {0}
+
+      - name: Build README.md from README.Rmd
+        run: Rscript -e 'if (file.exists("README.Rmd")) devtools::build_readme()'
+
+      - name: commit
+        run: |
+          git config --local user.name "$GITHUB_ACTOR"
+          git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
+          git add man/\* NAMESPACE
+          git commit -m 'Document'
+
+      - uses: r-lib/actions/pr-push@v2
+        with:
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
+
+  style:
+    if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'CONTRIBUTOR' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }}
+    name: style
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    permissions:
+      contents: write
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/pr-fetch@v2
+        with:
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
+
+      - uses: r-lib/actions/setup-r@v2
+
+      - name: Install dependencies
+        run: install.packages("styler")
+        shell: Rscript {0}
+
+      - name: Style
+        run: styler::style_pkg()
+        shell: Rscript {0}
+
+      - name: commit
+        run: |
+          git config --local user.name "$GITHUB_ACTOR"
+          git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
+          git add \*.R
+          git commit -m 'Style'
+
+      - uses: r-lib/actions/pr-push@v2
+        with:
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
+
+  preview:
+    if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'CONTRIBUTOR' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/preview-docs') }}
+
+    runs-on: ubuntu-latest
+    permissions:
+      # Needed to write a comment on the PR
+      pull-requests: write
+      # Needed to read the PR branch
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          # Checkout the PR branch
+          ref: refs/pull/${{ github.event.issue.number }}/head
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::pkgdown, local::.
+          needs: website
+
+      - name: Build site
+        run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
+        shell: Rscript {0}
+
+      - name: Deploy to Netlify
+        uses: nwtgck/actions-netlify@v3.0
+        with:
+          # Standard config
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          deploy-message: "Deploy from GitHub Actions"
+          # 'docs/' is the default directory for pkgdown::build_site()
+          # we add 'dev' because _pkgdown.yml has 'development: mode: devel'
+          publish-dir: './docs/dev'
+          # Development deploys only
+          production-deploy: false
+          # Enable pull request comment (default)
+          enable-pull-request-comment: true
+          # Overwrite the pull request comment with updated link (default)
+          overwrites-pull-request-comment: true
+          # Don't deploy to GitHub
+          enable-github-deployment: false
+          # Don't update the status of the commit
+          enable-commit-status: false
+          # Don't comment on the commit
+          enable-commit-comment: false
+        env:
+          # Netlify credentials (currently from Dmitry's account)
+          NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}
+          NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }}
+        timeout-minutes: 1
diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml
deleted file mode 100644
index 396200a5f..000000000
--- a/.github/workflows/style.yml
+++ /dev/null
@@ -1,87 +0,0 @@
-# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
-# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
-#
-# Modifications:
-#   * workflow_dispatch added to allow manual triggering of the workflow
-#   * trigger branches changed
-#   * API key secrets.DELPHI_GITHUB_ACTIONS_EPIDATA_API_KEY
-on:
-  push:
-    paths:
-      [
-        "**.[rR]",
-        "**.[qrR]md",
-        "**.[rR]markdown",
-        "**.[rR]nw",
-        "**.[rR]profile",
-      ]
-  workflow_dispatch:
-
-name: Style
-
-jobs:
-  style:
-    runs-on: ubuntu-latest
-    env:
-      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
-      DELPHI_EPIDATA_KEY: ${{ secrets.DELPHI_GITHUB_ACTIONS_EPIDATA_API_KEY }}
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-
-      - name: Setup R
-        uses: r-lib/actions/setup-r@v2
-        with:
-          use-public-rspm: true
-
-      - name: Install dependencies
-        uses: r-lib/actions/setup-r-dependencies@v2
-        with:
-          extra-packages: any::styler, any::roxygen2
-          needs: styler
-
-      - name: Enable styler cache
-        run: styler::cache_activate()
-        shell: Rscript {0}
-
-      - name: Determine cache location
-        id: styler-location
-        run: |
-          cat(
-            "location=",
-            styler::cache_info(format = "tabular")$location,
-            "\n",
-            file = Sys.getenv("GITHUB_OUTPUT"),
-            append = TRUE,
-            sep = ""
-          )
-        shell: Rscript {0}
-
-      - name: Cache styler
-        uses: actions/cache@v3
-        with:
-          path: ${{ steps.styler-location.outputs.location }}
-          key: ${{ runner.os }}-styler-${{ github.sha }}
-          restore-keys: |
-            ${{ runner.os }}-styler-
-            ${{ runner.os }}-
-
-      - name: Style
-        run: styler::style_pkg()
-        shell: Rscript {0}
-
-      - name: Commit and push changes
-        run: |
-          if FILES_TO_COMMIT=($(git diff-index --name-only ${{ github.sha }} \
-              | egrep --ignore-case '\.(R|[qR]md|Rmarkdown|Rnw|Rprofile)$'))
-          then
-            git config --local user.name "$GITHUB_ACTOR"
-            git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
-            git commit ${FILES_TO_COMMIT[*]} -m "style: styler (GHA)"
-            git pull --rebase
-            git push origin
-          else
-            echo "No changes to commit."
-          fi
diff --git a/DESCRIPTION b/DESCRIPTION
index 57981a195..decdb2802 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: epiprocess
 Title: Tools for basic signal processing in epidemiology
-Version: 0.11.0
+Version: 0.12.0
 Authors@R: c(
     person("Jacob", "Bien", role = "ctb"),
     person("Logan", "Brooks", , "lcbrooks+github@andrew.cmu.edu", role = c("aut", "cre")),
@@ -35,7 +35,8 @@ Description: This package introduces common data structures for working
     package is designed to be used in conjunction with `epipredict` for
     building and evaluating epidemiological models.
 License: MIT + file LICENSE
-URL: https://cmu-delphi.github.io/epiprocess/
+URL: https://github.com/cmu-delphi/epiprocess,
+    https://cmu-delphi.github.io/epiprocess/
 Depends:
     epidatasets,
     R (>= 3.6)
@@ -46,9 +47,10 @@ Imports:
     dplyr (>= 1.1.0),
     ggplot2,
     glue,
-    lifecycle (>= 1.0.1),
+    lifecycle,
     lubridate,
     magrittr,
+    methods,
     pkgconfig,
     purrr,
     rlang,
@@ -66,12 +68,13 @@ Suggests:
     distributional,
     epidatr,
     epipredict,
+    hardhat,
     here,
     knitr,
     outbreaks,
     readr,
     rmarkdown,
-    testthat (>= 3.1.5),
+    testthat,
     trendfilter,
     withr
 VignetteBuilder:
@@ -102,6 +105,7 @@ Collate:
     'methods-epi_archive.R'
     'grouped_epi_archive.R'
     'growth_rate.R'
+    'inline-roxygen.R'
     'key_colnames.R'
     'methods-epi_df.R'
     'outliers.R'
diff --git a/NAMESPACE b/NAMESPACE
index 2f97e5b3d..a77fca828 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -19,6 +19,7 @@ S3method(as_epi_df,tbl_df)
 S3method(as_epi_df,tbl_ts)
 S3method(as_tibble,epi_df)
 S3method(as_tsibble,epi_df)
+S3method(autoplot,epi_archive)
 S3method(autoplot,epi_df)
 S3method(clone,epi_archive)
 S3method(clone,grouped_epi_archive)
@@ -31,6 +32,7 @@ S3method(epix_slide,epi_archive)
 S3method(epix_slide,grouped_epi_archive)
 S3method(epix_truncate_versions_after,epi_archive)
 S3method(epix_truncate_versions_after,grouped_epi_archive)
+S3method(filter,epi_archive)
 S3method(group_by,epi_archive)
 S3method(group_by,epi_df)
 S3method(group_by,grouped_epi_archive)
@@ -46,10 +48,13 @@ S3method(key_colnames,epi_archive)
 S3method(key_colnames,epi_df)
 S3method(key_colnames,tbl_ts)
 S3method(mean,epi_df)
+S3method(plot,epi_archive)
+S3method(plot,epi_df)
 S3method(print,epi_archive)
 S3method(print,epi_df)
 S3method(print,grouped_epi_archive)
 S3method(print,growth_rate_params)
+S3method(print,revision_analysis)
 S3method(summary,epi_df)
 S3method(ungroup,epi_df)
 S3method(ungroup,grouped_epi_archive)
@@ -73,6 +78,7 @@ export(epi_slide_mean)
 export(epi_slide_opt)
 export(epi_slide_sum)
 export(epix_as_of)
+export(epix_as_of_current)
 export(epix_fill_through_version)
 export(epix_merge)
 export(epix_slide)
@@ -86,6 +92,7 @@ export(group_modify)
 export(growth_rate)
 export(growth_rate_params)
 export(guess_period)
+export(is_epi_archive)
 export(is_epi_df)
 export(is_grouped_epi_archive)
 export(key_colnames)
@@ -94,7 +101,9 @@ export(new_epi_archive)
 export(new_epi_df)
 export(relocate)
 export(rename)
+export(revision_analysis)
 export(revision_summary)
+export(set_versions_end)
 export(slice)
 export(sum_groups_epi_df)
 export(time_column_names)
diff --git a/NEWS.md b/NEWS.md
index 3ac814aa2..ba277bb49 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,8 +2,28 @@
 
 Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicate PR's.
 
+# epiprocess 0.12
+
+## New features
+
+- `is_epi_archive` function has been reintroduced.
+- `epix_as_of_current()` introduced as an alias for `epix_as_of(.$versions_end)`.
+- Added `dplyr::filter` implementation for `epi_archive`s.
+
+## Improvements
+
+- Various documentation has been updated, simplified, and improved with better
+  examples.
+
 # epiprocess 0.11
 
+## Highlights
+
+`{epiprocess}` should once again not require Rtools or a compiler to be able to
+install! We've also updated some function interfaces to be more consistent
+throughout the package & with tidyverse, and improved the generality of and
+fixed bugs in various functions and documentation.
+
 ## Breaking changes
 
 - `growth_rate()` argument order and names have changed. You will need to
@@ -15,7 +35,7 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
   for users without a compiler, we have placed `{trendfilter}` in Suggests:; if
   you want to use `method = "trendfilter"` you will need to manually install
   this dependency (e.g., with `remotes::install_github("glmgen/trendfilter")`).
-- In `revision_summary()`:
+- In `revision_summary()` (aliased to `revision_analysis()`):
   - The `should_compactify` argument is now called `compactify`. To migrate,
     change any calls with `should_compactfiy =` to `compactify =`.
   - Output now uses the name `lag_near_latest` instead of `time_near_latest`. To
@@ -29,6 +49,11 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
   - `min_waiting_period` now defines a nonstrict inequality instead of a strict
     one. To obtain the old bounds, bump the `min_waiting_period` up to the next
     possible value for your `time_type`.
+  - Added a `print()` method to take the place of `print_inform`.
+  - Removed the the argument `print_inform`. This is now always false. Replaced
+    with an option, `return_only_tibble` to return only the tibble of results 
+    rather than the full S3 object.
+    
 - In `key_colnames()`:
   - On regular (non-`epi_df`) data frames, now requires manual specification of
     `geo_keys`, `other_keys`, and `time_keys`.
@@ -42,12 +67,16 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
   `new_epi_archive()`.
 - `validate_epi_archive()` now follows the validator convention of operating on
   an "unvalidated" `epi_archive` (from `new_epi_archive`) rather than arguments.
+- `autoplot.epi_df()` argument `.max_facets` has been deprecated in favor of
+  `.facet_filter` which allows explicit selection of facets to show.
 
 ## Improvements
-- `revision_summary()` now supports all `time_type`s.
+- `revision_summary()` now supports all `time_type`s. Printed summary is more attractive.
 - The compactification tolerance setting now works with integer-type columns.
 - Various functions are now faster, using faster variants of core operations and
   avoiding reconstructing grouped `epi_df`s when unnecessary.
+- Add `autoplot.epi_archive()` to display revision patterns.
+- `sum_groups_epi_df()` now supports tidyselect syntax in it's second argument (#655).
 
 ## Bug fixes
 
diff --git a/R/archive.R b/R/archive.R
index 922f77835..44058f8b2 100644
--- a/R/archive.R
+++ b/R/archive.R
@@ -52,13 +52,6 @@ validate_version_bound <- function(version_bound, x, na_ok = FALSE,
         class = "epiprocess__version_bound_mismatched_class"
       )
     }
-    if (!identical(typeof(version_bound), typeof(x[["version"]]))) {
-      cli_abort(
-        "{version_bound_arg} must have the same `typeof` as x$version,
-        which has a `typeof` of {typeof(x$version)}",
-        class = "epiprocess__version_bound_mismatched_typeof"
-      )
-    }
   }
 
   return(invisible(NULL))
@@ -207,38 +200,35 @@ next_after.Date <- function(x) x + 1L
 #'   undergo tiny nonmeaningful revisions and the archive object with the
 #'   default setting is too large.
 #' @param clobberable_versions_start Optional; `length`-1; either a value of the
-#'   same `class` and `typeof` as `x$version`, or an `NA` of any `class` and
-#'   `typeof`: specifically, either (a) the earliest version that could be
-#'   subject to "clobbering" (being overwritten with different update data, but
-#'   using the *same* version tag as the old update data), or (b) `NA`, to
-#'   indicate that no versions are clobberable. There are a variety of reasons
-#'   why versions could be clobberable under routine circumstances, such as (a)
-#'   today's version of one/all of the columns being published after initially
-#'   being filled with `NA` or LOCF, (b) a buggy version of today's data being
-#'   published but then fixed and republished later in the day, or (c) data
-#'   pipeline delays (e.g., publisher uploading, periodic scraping, database
-#'   syncing, periodic fetching, etc.) that make events (a) or (b) reflected
-#'   later in the day (or even on a different day) than expected; potential
-#'   causes vary between different data pipelines. The default value is `NA`,
-#'   which doesn't consider any versions to be clobberable. Another setting that
-#'   may be appropriate for some pipelines is `max_version_with_row_in(x)`.
-#' @param versions_end Optional; length-1, same `class` and `typeof` as
-#'   `x$version`: what is the last version we have observed? The default is
+#'   same `class` as `x$version`, or an `NA` of any `class`: specifically,
+#'   either (a) the earliest version that could be subject to "clobbering"
+#'   (being overwritten with different update data, but using the *same* version
+#'   tag as the old update data), or (b) `NA`, to indicate that no versions are
+#'   clobberable. There are a variety of reasons why versions could be
+#'   clobberable under routine circumstances, such as (a) today's version of
+#'   one/all of the columns being published after initially being filled with
+#'   `NA` or LOCF, (b) a buggy version of today's data being published but then
+#'   fixed and republished later in the day, or (c) data pipeline delays (e.g.,
+#'   publisher uploading, periodic scraping, database syncing, periodic
+#'   fetching, etc.) that make events (a) or (b) reflected later in the day (or
+#'   even on a different day) than expected; potential causes vary between
+#'   different data pipelines. The default value is `NA`, which doesn't consider
+#'   any versions to be clobberable. Another setting that may be appropriate for
+#'   some pipelines is `max_version_with_row_in(x)`.
+#' @param versions_end Optional; length-1, same `class` as `x$version`: what is
+#'   the last version we have observed? The default is
 #'   `max_version_with_row_in(x)`, but values greater than this could also be
 #'   valid, and would indicate that we observed additional versions of the data
 #'   beyond `max(x$version)`, but they all contained empty updates. (The default
 #'   value of `clobberable_versions_start` does not fully trust these empty
 #'   updates, and assumes that any version `>= max(x$version)` could be
 #'   clobbered.) If `nrow(x) == 0`, then this argument is mandatory.
-#' @return An `epi_archive` object.
+#' @return * Of `new_epi_archive`: an (unvalidated) `epi_archive`
 #'
 #' @seealso [`epix_as_of`] [`epix_merge`] [`epix_slide`]
 #' @importFrom dplyr if_any if_all everything
 #' @importFrom utils capture.output
 #'
-#' @name epi_archive
-#' @export
-#'
 #' @examples
 #' # Simple ex. with necessary keys
 #' tib <- tibble::tibble(
@@ -277,6 +267,9 @@ next_after.Date <- function(x) x + 1L
 #'
 #' x <- df %>% as_epi_archive(other_keys = "county")
 #'
+#' @name epi_archive
+#' @order 3
+#' @export
 new_epi_archive <- function(
     x,
     geo_type,
@@ -329,7 +322,14 @@ new_epi_archive <- function(
 
 #' Perform second (costly) round of validation that `x` is a proper `epi_archive`
 #'
+#' Does not validate `geo_type` or `time_type` against `geo_value` and
+#' `time_value` columns. These are assumed to have been set to compatibly.
+#'
+#' @return * Of `validate_epi_archive`: an `epi_archive`,
+#'   [invisibly][base::invisible] (or raises an error if `x` was invalid)
+#'
 #' @rdname epi_archive
+#' @order 4
 #' @export
 validate_epi_archive <- function(x) {
   assert_class(x, "epi_archive")
@@ -515,8 +515,10 @@ is_locf <- function(vec, abs_tol, is_key) { # nolint: object_usage_linter
 #' @param .versions_end location based versions_end, used to avoid prefix
 #'   `version = issue` from being assigned to `versions_end` instead of being
 #'   used to rename columns.
+#' @return * Of `as_epi_archive`: an `epi_archive` object
 #'
 #' @rdname epi_archive
+#' @order 1
 #'
 #' @export
 as_epi_archive <- function(
@@ -808,3 +810,16 @@ clone.epi_archive <- function(x) {
   x$DT <- data.table::copy(x$DT)
   x
 }
+
+#' Test for `epi_archive` format
+#'
+#' @param x An object.
+#' @return * Of `is_epi_archive`: `TRUE` if the object inherits from `epi_archive`,
+#'           otherwise `FALSE`.
+#'
+#' @rdname epi_archive
+#' @order 2
+#' @export
+is_epi_archive <- function(x) {
+  inherits(x, "epi_archive")
+}
diff --git a/R/autoplot.R b/R/autoplot.R
index ecfe5f1c9..2644f3ba4 100644
--- a/R/autoplot.R
+++ b/R/autoplot.R
@@ -1,7 +1,7 @@
-#' Automatically plot an epi_df
+#' Automatically plot an epi_df or epi_archive
 #'
-#' @param object An `epi_df`
-#' @param ... <[`tidy-select`][dplyr_tidy_select]> One or more unquoted
+#' @param object,x An `epi_df` or `epi_archive`
+#' @param ... <[`tidy-select`][dplyr::dplyr_tidy_select]> One or more unquoted
 #'   expressions separated by commas. Variable names can be used as if they
 #'   were positions in the data frame, so expressions like `x:y` can
 #'   be used to select a range of variables.
@@ -16,16 +16,27 @@
 #'   * `none` - no coloring aesthetic is applied
 #' @param .facet_by Similar to `.color_by` except that the default is to display
 #'   each numeric variable on a separate facet
-#' @param .base_color Lines will be shown with this color. For example, with a
-#'   single numeric variable and faceting by `geo_value`, all locations would
-#'   share the same color line.
-#' @param .max_facets Cut down of the number of facets displayed. Especially
-#'   useful for testing when there are many `geo_value`'s or keys.
+#' @param .base_color Lines will be shown with this color if `.color_by == "none"`.
+#'  For example, with a single numeric variable and faceting by `geo_value`, all
+#'  locations would share the same color line.
+#' @param .max_facets `r lifecycle::badge("deprecated")`
+#' @param .facet_filter Select which facets will be displayed. Especially
+#'   useful for when there are many `geo_value`'s or keys. This is a
+#'   <[`rlang`][rlang::args_data_masking]> expression along the lines of [dplyr::filter()].
+#'   However, it must be a single expression combined with the `&` operator. This
+#'   contrasts to the typical use case which allows multiple comma-separated expressions
+#'   which are implicitly combined with `&`. When multiple variables are selected
+#'   with `...`, their names can be filtered in combination with other factors
+#'   by using `.response_name`. See the examples below.
 #'
-#' @return A ggplot object
+#'
+#'
+#' @return A [ggplot2::ggplot] object
 #' @export
+#' @name autoplot-epi
 #'
 #' @examples
+#' # -- Use it on an `epi_df`
 #' autoplot(cases_deaths_subset, cases, death_rate_7d_av)
 #' autoplot(cases_deaths_subset, case_rate_7d_av, .facet_by = "geo_value")
 #' autoplot(cases_deaths_subset, case_rate_7d_av,
@@ -41,16 +52,39 @@
 #' autoplot(cases_deaths_subset, case_rate_7d_av,
 #'   .base_color = "red", .facet_by = "geo_value"
 #' )
+#'
+#' # filter to only some facets, must be explicitly combined
+#' autoplot(cases_deaths_subset, cases, death_rate_7d_av,
+#'   .facet_by = "all",
+#'   .facet_filter = (.response_name == "cases" & geo_value %in% c("tx", "pa")) |
+#'     (.response_name == "death_rate_7d_av" &
+#'       geo_value %in% c("ca", "fl", "ga", "ny"))
+#' )
+#' # Just an alias for convenience
+#' plot(cases_deaths_subset, cases, death_rate_7d_av,
+#'   .facet_by = "all",
+#'   .facet_filter = (.response_name == "cases" & geo_value %in% c("tx", "pa")) |
+#'     (.response_name == "death_rate_7d_av" &
+#'       geo_value %in% c("ca", "fl", "ga", "ny"))
+#' )
 autoplot.epi_df <- function(
     object, ...,
     .color_by = c("all_keys", "geo_value", "other_keys", ".response", "all", "none"),
     .facet_by = c(".response", "other_keys", "all_keys", "geo_value", "all", "none"),
     .base_color = "#3A448F",
-    .max_facets = Inf) {
+    .facet_filter = NULL,
+    .max_facets = deprecated()) {
   .color_by <- rlang::arg_match(.color_by)
   .facet_by <- rlang::arg_match(.facet_by)
+  .facet_filter <- rlang::enquo(.facet_filter)
 
-  assert(anyInfinite(.max_facets), checkInt(.max_facets), combine = "or")
+  if (lifecycle::is_present(.max_facets)) {
+    lifecycle::deprecate_warn(
+      "0.11.1",
+      "autoplot.epi_df(.max_facets = )",
+      "autoplot.epi_df(.facet_filter = )"
+    )
+  }
   assert_character(.base_color, len = 1)
 
   key_cols <- key_colnames(object)
@@ -58,44 +92,14 @@ autoplot.epi_df <- function(
   geo_and_other_keys <- key_colnames(object, exclude = "time_value")
 
   # --- check for numeric variables
-  allowed <- purrr::map_lgl(object[non_key_cols], is.numeric)
-  allowed <- allowed[allowed]
-  if (length(allowed) == 0 && rlang::dots_n(...) == 0L) {
-    cli::cli_abort("No numeric variables were available to plot automatically.",
-      class = "epiprocess__no_numeric_vars_available"
-    )
-  }
-  vars <- tidyselect::eval_select(rlang::expr(c(...)), object)
-  if (rlang::is_empty(vars)) { # find them automatically if unspecified
-    vars <- tidyselect::eval_select(names(allowed)[1], object)
-    cli::cli_warn(
-      "Plot variable was unspecified. Automatically selecting {.var {names(allowed)[1]}}.",
-      class = "epiprocess__unspecified_plot_var"
-    )
-  } else { # if variables were specified, ensure that they are numeric
-    ok <- names(vars) %in% names(allowed)
-    if (!any(ok)) {
-      cli::cli_abort(
-        "None of the requested variables {.var {names(vars)}} are numeric.",
-        class = "epiprocess__all_requested_vars_not_numeric"
-      )
-    } else if (!all(ok)) {
-      cli::cli_warn(
-        c(
-          "Only the requested variables {.var {names(vars)[ok]}} are numeric.",
-          i = "`autoplot()` cannot display {.var {names(vars)[!ok]}}."
-        ),
-        class = "epiprocess__some_requested_vars_not_numeric"
-      )
-      vars <- vars[ok]
-    }
-  }
+  vars <- autoplot_check_viable_response_vars(object, ..., non_key_cols = non_key_cols)
+  nvars <- length(vars)
 
   # --- create a viable df to plot
   pos <- tidyselect::eval_select(
     rlang::expr(c("time_value", tidyselect::all_of(geo_and_other_keys), names(vars))), object
   )
-  if (length(vars) > 1) {
+  if (nvars > 1) {
     object <- tidyr::pivot_longer(
       object[pos], tidyselect::all_of(names(vars)),
       values_to = ".response",
@@ -106,35 +110,34 @@ autoplot.epi_df <- function(
   }
   all_keys <- rlang::syms(as.list(geo_and_other_keys))
   other_keys <- rlang::syms(as.list(setdiff(geo_and_other_keys, "geo_value")))
-  all_avail <- rlang::syms(as.list(c(geo_and_other_keys, ".response_name")))
+  all_avail <- rlang::syms(as.list(c(
+    geo_and_other_keys,
+    if (nvars > 1) ".response_name" else NULL
+  )))
 
   object <- object %>%
     dplyr::mutate(
       .colours = switch(.color_by,
-        all_keys = interaction(!!!all_keys, sep = "/"),
+        all_keys = interaction(!!!all_keys, sep = " / "),
         geo_value = .data$geo_value,
-        other_keys = interaction(!!!other_keys, sep = "/"),
-        all = interaction(!!!all_avail, sep = "/"),
+        other_keys = interaction(!!!other_keys, sep = " / "),
+        all = interaction(!!!all_avail, sep = " / "),
         NULL
       ),
       .facets = switch(.facet_by,
-        all_keys = interaction(!!!all_keys, sep = "/"),
+        all_keys = interaction(!!!all_keys, sep = " / "),
         geo_value = as.factor(.data$geo_value),
-        other_keys = interaction(!!!other_keys, sep = "/"),
-        all = interaction(!!!all_avail, sep = "/"),
+        other_keys = interaction(!!!other_keys, sep = " / "),
+        all = interaction(!!!all_avail, sep = " / "),
         NULL
       )
     )
 
-  if (.max_facets < Inf && ".facets" %in% names(object)) {
-    n_facets <- nlevels(object$.facets)
-    if (n_facets > .max_facets) {
-      top_n <- levels(as.factor(object$.facets))[seq_len(.max_facets)]
-      object <- dplyr::filter(object, .data$.facets %in% top_n) %>%
-        dplyr::mutate(.facets = droplevels(.data$.facets))
-      if (".colours" %in% names(object)) {
-        object <- dplyr::mutate(object, .colours = droplevels(.data$.colours))
-      }
+  if (!rlang::quo_is_null(.facet_filter) && ".facets" %in% names(object)) {
+    object <- dplyr::filter(object, !!.facet_filter) %>%
+      dplyr::mutate(.facets = droplevels(.data$.facets))
+    if (".colours" %in% names(object)) {
+      object <- dplyr::mutate(object, .colours = droplevels(.data$.colours))
     }
   }
 
@@ -170,3 +173,197 @@ autoplot.epi_df <- function(
   }
   p
 }
+
+autoplot_check_viable_response_vars <- function(
+    object, ..., non_key_cols, call = caller_env()) {
+  allowed <- purrr::map_lgl(object[non_key_cols], is.numeric)
+  allowed <- allowed[allowed]
+  if (length(allowed) == 0 && rlang::dots_n(...) == 0L) {
+    cli::cli_abort("No numeric variables were available to plot automatically.",
+      class = "epiprocess__no_numeric_vars_available",
+      call = call
+    )
+  }
+  vars <- tidyselect::eval_select(rlang::expr(c(...)), object)
+  if (rlang::is_empty(vars)) { # find them automatically if unspecified
+    vars <- tidyselect::eval_select(names(allowed)[1], object)
+    cli::cli_warn(
+      "Plot variable was unspecified. Automatically selecting {.var {names(allowed)[1]}}.",
+      class = "epiprocess__unspecified_plot_var",
+      call = call
+    )
+  } else { # if variables were specified, ensure that they are numeric
+    ok <- names(vars) %in% names(allowed)
+    if (!any(ok)) {
+      cli::cli_abort(
+        "None of the requested variables {.var {names(vars)}} are numeric.",
+        class = "epiprocess__all_requested_vars_not_numeric",
+        call = call
+      )
+    } else if (!all(ok)) {
+      cli::cli_warn(
+        c(
+          "Only the requested variables {.var {names(vars)[ok]}} are numeric.",
+          i = "`autoplot()` cannot display {.var {names(vars)[!ok]}}."
+        ),
+        class = "epiprocess__some_requested_vars_not_numeric",
+        call = call
+      )
+      vars <- vars[ok]
+    }
+  }
+  vars
+}
+
+
+
+#' @param .versions Select which versions will be displayed. By default,
+#'   a separate line will be shown with the data as it would have appeared on
+#'   every day in the archive. This can sometimes become overwhelming. For
+#'   example, daily data would display a line for what the data would have looked
+#'   like on every single day. To override this, you can select specific dates,
+#'   by passing a vector of values here. Alternatively, a sequence can be
+#'   automatically created by passing a string like `"2 weeks"` or `"month"`.
+#'   For time types where the `time_value` is a date object, any string that
+#'   is interpretable by [base::seq.Date()] is allowed.
+#'
+#'   For `time_type = "integer"`, an integer larger than 1 will give a subset
+#'   of versions.
+#' @param .mark_versions Logical. Indicate whether to mark each version with
+#'   a vertical line. Note that displaying many versions can become busy.
+#'
+#' @export
+#' @rdname autoplot-epi
+#'
+#' @examples
+#'
+#' # -- Use it on an archive
+#'
+#' autoplot(archive_cases_dv_subset, percent_cli, .versions = "week")
+#' autoplot(archive_cases_dv_subset_all_states, percent_cli,
+#'   .versions = "week",
+#'   .facet_filter = geo_value %in% c("or", "az", "vt", "ms")
+#' )
+#' autoplot(archive_cases_dv_subset, percent_cli,
+#'   .versions = "month",
+#'   .facet_filter = geo_value == "ca"
+#' )
+#' autoplot(archive_cases_dv_subset_all_states, percent_cli,
+#'   .versions = "1 month",
+#'   .facet_filter = geo_value %in% c("or", "az", "vt", "ms"),
+#'   .mark_versions = TRUE
+#' )
+#' # Just an alias for convenience
+#' plot(archive_cases_dv_subset_all_states, percent_cli,
+#'   .versions = "1 month",
+#'   .facet_filter = geo_value %in% c("or", "az", "vt", "ms"),
+#'   .mark_versions = TRUE
+#' )
+autoplot.epi_archive <- function(object, ...,
+                                 .base_color = "black",
+                                 .versions = NULL,
+                                 .mark_versions = FALSE,
+                                 .facet_filter = NULL) {
+  time_type <- object$time_type
+  checkmate::assert_logical(.mark_versions, len = 1L)
+  if (time_type == "custom") {
+    cli_abort(
+      "This `epi_archive` has custom `time_type`. This is currently unsupported.",
+      class = "epiprocess__autoplot_archive_custom_time_type"
+    )
+  }
+
+  max_version <- max(object$DT$version)
+  min_version <- min(object$DT$version)
+
+  tt_lookup <- c("day" = "day", "week" = "week", "yearmonth" = "month")
+  .versions <- .versions %||% ifelse(time_type == "integer", 1L, unname(tt_lookup[time_type]))
+  if (is.character(.versions) || length(.versions) == 1L) {
+    if (is.numeric(.versions)) .versions <- round(abs(.versions))
+    .versions <- seq(min_version, max_version - 1, by = .versions)
+  } else if (methods::is(.versions, "Date") || is.numeric(.versions)) {
+    .versions <- .versions[min_version <= .versions & .versions <= max_version]
+  } else {
+    cli_abort(
+      "Requested `.versions` don't appear to match the available `time_type`.",
+      class = "epiprocess__autoplot_archive_bad_versions"
+    )
+  }
+
+
+  finalized <- epix_as_of(object, max_version)
+  key_cols <- key_colnames(finalized)
+  non_key_cols <- setdiff(names(finalized), key_cols)
+  vars <- autoplot_check_viable_response_vars(finalized, ..., non_key_cols = non_key_cols)
+  nvars <- length(vars)
+
+  bp <- autoplot.epi_df(
+    finalized, ...,
+    .base_color = .base_color, .facet_by = "all",
+    .facet_filter = {{ .facet_filter }}, .color_by = "none"
+  ) + ggplot2::xlab("Date")
+  geo_and_other_keys <- key_colnames(object, exclude = c("time_value", "version"))
+  all_avail <- rlang::syms(as.list(c(
+    geo_and_other_keys,
+    if (nvars > 1) ".response_name" else NULL
+  )))
+
+  snapshots <- purrr::map(
+    .versions,
+    function(v) {
+      dplyr::mutate(epix_as_of(object, v), version = v)
+    }
+  ) %>%
+    purrr::list_rbind() %>%
+    dplyr::mutate(.facets = interaction(!!!all_avail, sep = " / "))
+
+  if (nvars > 1) {
+    snapshots <- tidyr::pivot_longer(
+      snapshots, tidyselect::all_of(names(vars)),
+      values_to = ".response",
+      names_to = ".response_name"
+    )
+  } else {
+    snapshots <- dplyr::rename(snapshots, .response := !!names(vars)) # nolint: object_usage_linter
+  }
+  snapshots <- snapshots %>%
+    dplyr::filter(!is.na(.response), .data$.facets %in% unique(bp$data$.facets))
+
+  bp <- bp +
+    ggplot2::geom_line(
+      data = snapshots,
+      mapping = ggplot2::aes(y = .response, color = version, group = factor(version))
+    )
+
+  if (methods::is(.versions, "Date")) {
+    bp <- bp + ggplot2::scale_color_viridis_c(name = "Version", trans = "date")
+  } else {
+    bp <- bp + ggplot2::scale_color_viridis_c(name = "Version")
+  }
+
+  if (.mark_versions) {
+    bp <- bp +
+      ggplot2::geom_vline(
+        data = snapshots,
+        ggplot2::aes(color = version, xintercept = version),
+        linewidth = .5,
+        linetype = 3,
+        show.legend = FALSE
+      )
+  }
+  # make the finalized layer last
+  bp$layers <- rev(bp$layers)
+  bp
+}
+
+#' @export
+#' @rdname autoplot-epi
+plot.epi_df <- function(x, ...) {
+  autoplot(x, ...)
+}
+
+#' @export
+#' @rdname autoplot-epi
+plot.epi_archive <- function(x, ...) {
+  autoplot(x, ...)
+}
diff --git a/R/epi_df.R b/R/epi_df.R
index 4955ab08d..b9d999d97 100644
--- a/R/epi_df.R
+++ b/R/epi_df.R
@@ -6,7 +6,8 @@
 #' which can be seen as measured variables at each key. In brief, an `epi_df`
 #' represents a snapshot of an epidemiological data set at a point in time.
 #'
-#' @details An `epi_df` is a tibble with (at least) the following columns:
+#' @details An `epi_df` is a kind of tibble with (at least) the following
+#'   columns:
 #'
 #' - `geo_value`: A character vector representing the geographical unit of
 #'    observation. This could be a country code, a state name, a county code,
@@ -14,10 +15,11 @@
 #' - `time_value`: A date or integer vector representing the time of observation.
 #'
 #' Other columns can be considered as measured variables, which we also refer to
-#'   as signal variables. An `epi_df` object also has metadata with (at least)
-#'   the following fields:
+#'   as indicators or signals. An `epi_df` object also has metadata with (at
+#'   least) the following fields:
 #'
 #' * `geo_type`: the type for the geo values.
+#' * `time_type`: the type for the time values.
 #' * `as_of`: the time value at which the given data were available.
 #'
 #' Most users should use `as_epi_df`. The input tibble `x` to the constructor
diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R
index 378ea13bc..5a64e3ff1 100644
--- a/R/grouped_epi_archive.R
+++ b/R/grouped_epi_archive.R
@@ -203,13 +203,12 @@ ungroup.grouped_epi_archive <- function(x, ...) {
 }
 
 
-#' @rdname epix_slide
-#'
 #' @importFrom data.table key address rbindlist setDF copy
 #' @importFrom tibble as_tibble new_tibble validate_tibble
 #' @importFrom dplyr group_by groups
 #' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms
 #'  env missing_arg
+#'
 #' @export
 epix_slide.grouped_epi_archive <- function(
     .x,
@@ -437,9 +436,13 @@ epix_slide.grouped_epi_archive <- function(
   out <- lapply(.versions, function(.version) {
     # Ungrouped as-of data; `epi_df` if `all_versions` is `FALSE`,
     # `epi_archive` if `all_versions` is `TRUE`:
+    min_time_value <- .version - .before
+    if (is.na(min_time_value)) {
+      min_time_value <- -Inf
+    }
     as_of_raw <- .x$private$ungrouped %>% epix_as_of(
       .version,
-      min_time_value = .version - .before,
+      min_time_value = min_time_value,
       all_versions = .all_versions
     )
 
diff --git a/R/inline-roxygen.R b/R/inline-roxygen.R
new file mode 100644
index 000000000..e6fb4cd11
--- /dev/null
+++ b/R/inline-roxygen.R
@@ -0,0 +1,16 @@
+# Helpers here are meant to be used inside inline R expressions within roxygen2
+# documentation when @template is inappropriate.
+
+#' Description of a single arg that tidyselects value variables
+#'
+#' Not meant for when describing tidyselect `...`.
+#'
+#' @keywords internal
+tidyselect_arg_roxygen <- '
+  <[`tidy-select`][dplyr::dplyr_tidy_select]> An unquoted column
+  name (e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`),
+  [other tidy-select expression][tidyselect::language], or a vector of
+  characters (e.g. `c("cases", "deaths")`). Variable names can be used as if
+  they were positions in the data frame, so expressions like `x:y` can be
+  used to select a range of variables.
+'
diff --git a/R/key_colnames.R b/R/key_colnames.R
index eeecce05b..f685b7210 100644
--- a/R/key_colnames.R
+++ b/R/key_colnames.R
@@ -91,8 +91,8 @@ key_colnames.epi_df <- function(x, ...,
   if (!identical(other_keys, expected_other_keys)) {
     cli_abort(c(
       "The provided `other_keys` argument didn't match the `other_keys` of `x`",
-      "*" = "`other_keys` was {format_chr_with_quotes(other_keys)}",
-      "*" = "`expected_other_keys` was {format_chr_with_quotes(expected_other_keys)}",
+      "*" = "`other_keys` was {format_chr_deparse(other_keys)}",
+      "*" = "`expected_other_keys` was {format_chr_deparse(expected_other_keys)}",
       "i" = "If you know that `x` will always be an `epi_df` and
              resolve this discrepancy by adjusting the metadata of `x`, you
              shouldn't have to pass `other_keys =` here anymore,
diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index 362fd4eaa..6c372d9bf 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -58,6 +58,7 @@
 #' epix_as_of(archive_cases_dv_subset2, max(archive_cases_dv_subset$DT$version))
 #'
 #' @importFrom data.table between key
+#' @importFrom checkmate assert_scalar assert_logical assert_class
 #' @export
 epix_as_of <- function(x, version, min_time_value = -Inf, all_versions = FALSE,
                        max_version = deprecated()) {
@@ -79,15 +80,17 @@ epix_as_of <- function(x, version, min_time_value = -Inf, all_versions = FALSE,
       "`version` must have the same `class` vector as `epi_archive$DT$version`."
     )
   }
-  if (!identical(typeof(version), typeof(x$DT$version))) {
-    cli_abort(
-      "`version` must have the same `typeof` as `epi_archive$DT$version`."
-    )
-  }
   assert_scalar(version, na.ok = FALSE)
   if (version > x$versions_end) {
     cli_abort("`version` must be at most `epi_archive$versions_end`.")
   }
+  assert_scalar(min_time_value, na.ok = FALSE)
+  min_time_value_inf <- is.infinite(min_time_value) && min_time_value < 0
+  min_time_value_same_type <- identical(class(min_time_value), class(x$DT$time_value))
+  if (!min_time_value_inf && !min_time_value_same_type) {
+    cli_abort("`min_time_value` must be either -Inf or a time_value of the same type and
+      class as `epi_archive$time_value`.")
+  }
   assert_logical(all_versions, len = 1)
   if (!is.na(x$clobberable_versions_start) && version >= x$clobberable_versions_start) {
     cli_warn(
@@ -100,39 +103,63 @@ epix_as_of <- function(x, version, min_time_value = -Inf, all_versions = FALSE,
     )
   }
 
-  # We can't disable nonstandard evaluation nor use the `..` feature in the `i`
-  # argument of `[.data.table` below; try to avoid problematic names and abort
-  # if we fail to do so:
-  .min_time_value <- min_time_value
-  .version <- version
-  if (any(c(".min_time_value", ".version") %in% names(x$DT))) {
-    cli_abort("epi_archives can't contain a `.min_time_value` or `.version` column")
-  }
-
   # Filter by version and return
   if (all_versions) {
     # epi_archive is copied into result, so we can modify result directly
     result <- epix_truncate_versions_after(x, version)
-    result$DT <- result$DT[time_value >= .min_time_value, ] # nolint: object_usage_linter
+    if (!min_time_value_inf) {
+      # See below for why we need this branch.
+      filter_mask <- result$DT$time_value >= min_time_value
+      result$DT <- result$DT[filter_mask, ] # nolint: object_usage_linter
+    }
     return(result)
   }
 
   # Make sure to use data.table ways of filtering and selecting
-  as_of_epi_df <- x$DT[time_value >= .min_time_value & version <= .version, ] %>% # nolint: object_usage_linter
-    unique(
-      by = c("geo_value", "time_value", other_keys),
-      fromLast = TRUE
-    ) %>%
+  if (min_time_value_inf) {
+    # This branch is needed for `epix_as_of` to work with `yearmonth` time type
+    # to avoid time_value > .min_time_value, which is NA for `yearmonth`.
+    filter_mask <- x$DT$version <= version
+  } else {
+    filter_mask <- x$DT$time_value >= min_time_value & x$DT$version <= version
+  }
+  as_of_epi_df <- x$DT[filter_mask, ] %>%
+    unique(by = c("geo_value", "time_value", other_keys), fromLast = TRUE) %>%
+    as.data.frame() %>%
     tibble::as_tibble() %>%
     dplyr::select(-"version") %>%
-    as_epi_df(
-      as_of = version,
-      other_keys = other_keys
-    )
+    as_epi_df(as_of = version, other_keys = other_keys)
 
   return(as_of_epi_df)
 }
 
+#' Get the latest snapshot from an `epi_archive` object.
+#'
+#' The latest snapshot is the snapshot of the last known version.
+#'
+#' @param x An `epi_archive` object
+#' @return The latest snapshot from an `epi_archive` object
+#' @export
+epix_as_of_current <- function(x) {
+  assert_class(x, "epi_archive")
+  x %>% epix_as_of(.$versions_end)
+}
+
+#' Set the `versions_end` attribute of an `epi_archive` object
+#'
+#' An escape hatch for epix_as_of, which does not allow version >
+#' `$versions_end`.
+#'
+#' @param x An `epi_archive` object
+#' @param versions_end The new `versions_end` value
+#' @return An `epi_archive` object with the updated `versions_end` attribute
+#' @export
+set_versions_end <- function(x, versions_end) {
+  assert_class(x, "epi_archive")
+  validate_version_bound(versions_end, x$DT, na_ok = FALSE)
+  x$versions_end <- versions_end
+  x
+}
 
 #' Fill `epi_archive` unobserved history
 #'
@@ -450,7 +477,8 @@ epix_merge <- function(x, y,
   y_nonby_colnames <- setdiff(names(y_dt), by)
   if (length(intersect(x_nonby_colnames, y_nonby_colnames)) != 0L) {
     cli_abort("
-            `x` and `y` DTs have overlapping non-by column names;
+            `x` and `y` DTs both have measurement columns named
+            {format_chr_with_quotes(intersect(x_nonby_colnames, y_nonby_colnames))};
             this is currently not supported; please manually fix up first:
             any overlapping columns that can are key-like should be
             incorporated into the key, and other columns should be renamed.
@@ -622,72 +650,89 @@ epix_detailed_restricted_mutate <- function(.data, ...) {
 }
 
 
-#' Slide a function over variables in an `epi_archive` or `grouped_epi_archive`
+#' Take each requested (group and) version in an archive, run a computation (e.g., forecast)
 #'
-#' Slides a given function over variables in an `epi_archive` object. This
-#' behaves similarly to `epi_slide()`, with the key exception that it is
-#' version-aware: the sliding computation at any given reference time t is
-#' performed on **data that would have been available as of t**. This function
-#' is intended for use in accurate backtesting of models; see
-#' `vignette("backtesting", package="epipredict")` for a walkthrough.
+#' ... and collect the results. This is useful for more accurately simulating
+#' how a forecaster, nowcaster, or other algorithm would have behaved in real
+#' time, factoring in reporting latency and data revisions; see
+#' \href{https://cmu-delphi.github.io/epipredict/articles/backtesting.html}{`vignette("backtesting",
+#' package="epipredict")`} for a walkthrough.
+#'
+#' This is similar to looping over versions and calling [`epix_as_of`], but has
+#' some conveniences such as working naturally with [`grouped_epi_archive`]s,
+#' optional time windowing, and syntactic sugar to make things shorter to write.
 #'
 #' @param .x An [`epi_archive`] or [`grouped_epi_archive`] object. If ungrouped,
 #'   all data in `x` will be treated as part of a single data group.
 #' @param .f Function, formula, or missing; together with `...` specifies the
-#'   computation to slide. To "slide" means to apply a computation over a
-#'   sliding (a.k.a. "rolling") time window for each data group. The window is
-#'   determined by the `.before` parameter (see details for more). If a
-#'   function, `.f` must have the form `function(x, g, t, ...)`, where
-#'
-#'   - "x" is an epi_df with the same column names as the archive's `DT`, minus
-#'     the `version` column
-#'   - "g" is a one-row tibble containing the values of the grouping variables
-#'   for the associated group
-#'   - "t" is the ref_time_value for the current window
-#'   - "..." are additional arguments
+#'   computation. The computation will be run on each requested group-version
+#'   combination, with a time window filter applied if `.before` is supplied.
+#'
+#'   If `.f` is a function must have the form `function(x, g, v)` or
+#'     `function(x, g, v, <additional configuration args>)`, where
+#'
+#'     - `x` is an `epi_df` with the same column names as the archive's `DT`,
+#'       minus the `version` column. (Or, if `.all_versions = TRUE`, an
+#'       `epi_archive` with the requested partial version history.)
+#'
+#'     - `g` is a one-row tibble containing the values of the grouping variables
+#'       for the associated group.
+#'
+#'     - `v` (length-1) is the associated `version` (one of the requested
+#'       `.versions`)
+#'
+#'     - `<additional configuration args>` are optional; you can add such
+#'       arguments to your function and set them by passing them through the
+#'       `...` argument to `epix_slide()`.
 #'
 #'   If a formula, `.f` can operate directly on columns accessed via `.x$var` or
 #'   `.$var`, as in `~ mean (.x$var)` to compute a mean of a column `var` for
 #'   each group-`ref_time_value` combination. The group key can be accessed via
-#'   `.y` or `.group_key`, and the reference time value can be accessed via `.z`
-#'   or `.ref_time_value`. If `.f` is missing, then `...` will specify the
-#'   computation.
+#'   `.y` or `.group_key`, and the reference time value can be accessed via
+#'   `.z`, `.version`, or `.ref_time_value`. If `.f` is missing, then `...` will
+#'   specify the computation.
 #' @param ... Additional arguments to pass to the function or formula specified
 #'   via `f`. Alternatively, if `.f` is missing, then the `...` is interpreted
 #'   as a ["data-masking"][rlang::args_data_masking] expression or expressions
 #'   for tidy evaluation; in addition to referring columns directly by name, the
 #'   expressions have access to `.data` and `.env` pronouns as in `dplyr` verbs,
 #'   and can also refer to `.x` (not the same as the input epi_archive),
-#'   `.group_key`, and `.ref_time_value`. See details for more.
-#' @param .before How many time values before the `.ref_time_value`
-#'   should each snapshot handed to the function `.f` contain? If provided, it
-#'   should be a single value that is compatible with the time_type of the
-#'   time_value column (more below), but most commonly an integer. This window
-#'   endpoint is inclusive. For example, if `.before = 7`, `time_type`
-#'   in the archive is "day", and the `.ref_time_value` is January 8, then the
-#'   smallest time_value in the snapshot will be January 1. If missing, then the
-#'   default is no limit on the time values, so the full snapshot is given.
-#' @param .versions Reference time values / versions for sliding
-#'   computations; each element of this vector serves both as the anchor point
-#'   for the `time_value` window for the computation and the `max_version`
-#'   `epix_as_of` which we fetch data in this window. If missing, then this will
-#'   set to a regularly-spaced sequence of values set to cover the range of
-#'   `version`s in the `DT` plus the `versions_end`; the spacing of values will
-#'   be guessed (using the GCD of the skips between values).
+#'   `.group_key` and `.version`/`.ref_time_value`. See details for more.
+#' @param .before Optional; applies a `time_value` filter before running each
+#'   computation. The default is not to apply a `time_value` filter. If
+#'   provided, it should be a single integer or difftime that is compatible with
+#'   the time_type of the time_value column. If an integer, then the minimum
+#'   possible `time_value` included will be that many time steps (according to
+#'   the `time_type`) before each requested `.version`. This window endpoint is
+#'   inclusive. For example, if `.before = 14`, the `time_type` in the archive
+#'   is "day", and the requested `.version` is January 15, then the smallest
+#'   possible `time_value` possible in the snapshot will be January 1. Note that
+#'   this does not mean that there will be 14 or 15 distinct `time_value`s
+#'   actually appearing in the data; for most reporting streams, reporting as of
+#'   January 15 won't include `time_value`s all the way through January 14, due
+#'   to reporting latency. Unlike `epi_slide()`, `epix_slide()` won't fill in
+#'   any missing `time_values` in this window.
+#' @param .versions Requested versions on which to run the computation. Each
+#'   requested `.version` also serves as the anchor point from which
+#'   the `time_value` window specified by `.before` is drawn. If `.versions` is
+#'   missing, it will be set to a regularly-spaced sequence of values set to
+#'   cover the range of `version`s in the `DT` plus the `versions_end`; the
+#'   spacing of values will be guessed (using the GCD of the skips between
+#'   values).
 #' @param .new_col_name Either `NULL` or a string indicating the name of the new
 #'   column that will contain the derived values. The default, `NULL`, will use
 #'   the name "slide_value" unless your slide computations output data frames,
-#'   in which case they will be unpacked into the constituent columns and those
-#'   names used. If the resulting column name(s) overlap with the column names
-#'   used for labeling the computations, which are `group_vars(x)` and
-#'   `"version"`, then the values for these columns must be identical to the
-#'   labels we assign.
+#'   in which case they will be unpacked into the constituent columns and the
+#'   data frame's column names will be used instead. If the resulting column
+#'   name(s) overlap with the column names used for labeling the computations,
+#'   which are `group_vars(x)` and `"version"`, then the values for these
+#'   columns must be identical to the labels we assign.
 #' @param .all_versions (Not the same as `.all_rows` parameter of `epi_slide`.)
 #'   If `.all_versions = TRUE`, then the slide computation will be passed the
-#'   version history (all `version <= .version` where `.version` is one of the
-#'   requested `.versions`) for rows having a `time_value` of at least `.version
-#'   - before`. Otherwise, the slide computation will be passed only the most
-#'   recent `version` for every unique `time_value`. Default is `FALSE`.
+#'   version history (all versions `<= .version` where `.version` is one of the
+#'   requested `.version`s), in `epi_archive` format. Otherwise, the slide
+#'   computation will be passed only the most recent `version` for every unique
+#'   `time_value`, in `epi_df` format. Default is `FALSE`.
 #' @return A tibble whose columns are: the grouping variables (if any),
 #'   `time_value`, containing the reference time values for the slide
 #'   computation, and a column named according to the `.new_col_name` argument,
@@ -696,16 +741,17 @@ epix_detailed_restricted_mutate <- function(.data, ...) {
 #' @details A few key distinctions between the current function and `epi_slide()`:
 #'   1. In `.f` functions for `epix_slide`, one should not assume that the input
 #'   data to contain any rows with `time_value` matching the computation's
-#'   `.ref_time_value` (accessible via `attributes(<data>)$metadata$as_of`); for
-#'   typical epidemiological surveillance data, observations pertaining to a
-#'   particular time period (`time_value`) are first reported `as_of` some
-#'   instant after that time period has ended.
+#'   `.version`, due to reporting latency; for typical epidemiological
+#'   surveillance data, observations pertaining to a particular time period
+#'   (`time_value`) are first reported `as_of` some instant after that time
+#'   period has ended. No time window completion is performed as in
+#'   `epi_slide()`.
 #'   2. The input class and columns are similar but different: `epix_slide`
 #'   (with the default `.all_versions=FALSE`) keeps all columns and the
 #'   `epi_df`-ness of the first argument to each computation; `epi_slide` only
 #'   provides the grouping variables in the second input, and will convert the
 #'   first input into a regular tibble if the grouping variables include the
-#'   essential `geo_value` column. (With .all_versions=TRUE`, `epix_slide` will
+#'   essential `geo_value` column. (With `.all_versions=TRUE`, `epix_slide`
 #'   will provide an `epi_archive` rather than an `epi-df` to each
 #'   computation.)
 #'   3. The output class and columns are similar but different: `epix_slide()`
@@ -719,81 +765,60 @@ epix_detailed_restricted_mutate <- function(.data, ...) {
 #'   4. There are no size stability checks or element/row recycling to maintain
 #'   size stability in `epix_slide`, unlike in `epi_slide`. (`epix_slide` is
 #'   roughly analogous to [`dplyr::group_modify`], while `epi_slide` is roughly
-#'   analogous to `dplyr::mutate` followed by `dplyr::arrange`) This is detailed
-#'   in the "advanced" vignette.
+#'   analogous to [`dplyr::mutate`].)
 #'   5. `.all_rows` is not supported in `epix_slide`; since the slide
 #'   computations are allowed more flexibility in their outputs than in
 #'   `epi_slide`, we can't guess a good representation for missing computations
 #'   for excluded group-`.ref_time_value` pairs.
-#'   76. The `.versions` default for `epix_slide` is based on making an
+#'   6. The `.versions` default for `epix_slide` is based on making an
 #'   evenly-spaced sequence out of the `version`s in the `DT` plus the
-#'   `versions_end`, rather than the `time_value`s.
+#'   `versions_end`, rather than all unique `time_value`s.
+#'   7. `epix_slide()` computations can refer to the current element of
+#'   `.versions` as either `.version` or `.ref_time_value`, while `epi_slide()`
+#'   computations refer to the current element of `.ref_time_values` with
+#'   `.ref_time_value`.
 #'
 #' Apart from the above distinctions, the interfaces between `epix_slide()` and
 #' `epi_slide()` are the same.
 #'
-#' Furthermore, the current function can be considerably slower than
-#'   `epi_slide()`, for two reasons: (1) it must repeatedly fetch
-#'   properly-versioned snapshots from the data archive (via `epix_as_of()`),
-#'   and (2) it performs a "manual" sliding of sorts, and does not benefit from
-#'   the highly efficient `slider` package. For this reason, it should never be
-#'   used in place of `epi_slide()`, and only used when version-aware sliding is
-#'   necessary (as it its purpose).
-#'
 #' @examples
 #' library(dplyr)
 #'
-#' # Reference time points for which we want to compute slide values:
-#' versions <- seq(as.Date("2020-06-02"),
-#'   as.Date("2020-06-15"),
-#'   by = "1 day"
-#' )
+#' # Request only a small set of versions, for example's sake:
+#' requested_versions <-
+#'   seq(as.Date("2020-09-02"), as.Date("2020-09-15"), by = "1 day")
 #'
-#' # A simple (but not very useful) example (see the archive vignette for a more
-#' # realistic one):
+#' # Investigate reporting lag of `percent_cli` signal (though normally we'd
+#' # probably work off of the dedicated `revision_summary()` function instead):
 #' archive_cases_dv_subset %>%
-#'   group_by(geo_value) %>%
 #'   epix_slide(
-#'     .f = ~ mean(.x$case_rate_7d_av),
-#'     .before = 2,
-#'     .versions = versions,
-#'     .new_col_name = "case_rate_7d_av_recent_av"
-#'   ) %>%
-#'   ungroup()
-#' # We requested time windows that started 2 days before the corresponding time
-#' # values. The actual number of `time_value`s in each computation depends on
-#' # the reporting latency of the signal and `time_value` range covered by the
-#' # archive (2020-06-01 -- 2021-11-30 in this example).  In this case, we have
-#' # * 0 `time_value`s, for ref time 2020-06-01 --> the result is automatically
-#' #                                                discarded
-#' # * 1 `time_value`, for ref time 2020-06-02
-#' # * 2 `time_value`s, for the rest of the results
-#' # * never the 3 `time_value`s we would get from `epi_slide`, since, because
-#' #   of data latency, we'll never have an observation
-#' #   `time_value == .ref_time_value` as of `.ref_time_value`.
-#' # The example below shows this type of behavior in more detail.
-#'
-#' # Examining characteristics of the data passed to each computation with
-#' # `all_versions=FALSE`.
+#'     geowide_percent_cli_max_time = max(time_value[!is.na(percent_cli)]),
+#'     geowide_percent_cli_rpt_lag = .version - geowide_percent_cli_max_time,
+#'     .versions = requested_versions
+#'   )
 #' archive_cases_dv_subset %>%
 #'   group_by(geo_value) %>%
 #'   epix_slide(
-#'     function(x, gk, rtv) {
-#'       tibble(
-#'         time_range = if (nrow(x) == 0L) {
-#'           "0 `time_value`s"
-#'         } else {
-#'           sprintf("%s -- %s", min(x$time_value), max(x$time_value))
-#'         },
-#'         n = nrow(x),
-#'         class1 = class(x)[[1L]]
-#'       )
-#'     },
-#'     .before = 5, .all_versions = FALSE,
-#'     .versions = versions
-#'   ) %>%
-#'   ungroup() %>%
-#'   arrange(geo_value, version)
+#'     percent_cli_max_time = max(time_value[!is.na(percent_cli)]),
+#'     percent_cli_rpt_lag = .version - percent_cli_max_time,
+#'     .versions = requested_versions
+#'   )
+#'
+#' # Backtest a forecaster "pseudoprospectively" (i.e., faithfully with respect
+#' # to the data version history):
+#' case_death_rate_archive %>%
+#'   epix_slide(
+#'     .versions = as.Date(c("2021-10-01", "2021-10-08")),
+#'     function(x, g, v) {
+#'       epipredict::arx_forecaster(
+#'         x,
+#'         outcome = "death_rate",
+#'         predictors = c("death_rate_7d_av", "case_rate_7d_av")
+#'       )$predictions
+#'     }
+#'   )
+#' # See `vignette("backtesting", package="epipredict")` for a full walkthrough
+#' # on backtesting forecasters, including plots, etc.
 #'
 #' # --- Advanced: ---
 #'
@@ -825,7 +850,7 @@ epix_detailed_restricted_mutate <- function(.data, ...) {
 #'       )
 #'     },
 #'     .before = 5, .all_versions = TRUE,
-#'     .versions = versions
+#'     .versions = requested_versions
 #'   ) %>%
 #'   ungroup() %>%
 #'   # Focus on one geo_value so we can better see the columns above:
@@ -845,7 +870,6 @@ epix_slide <- function(
 }
 
 
-#' @rdname epix_slide
 #' @export
 epix_slide.epi_archive <- function(
     .x,
@@ -879,10 +903,13 @@ epix_slide.epi_archive <- function(
 #' @noRd
 epix_slide_versions_default <- function(ea) {
   versions_with_updates <- c(ea$DT$version, ea$versions_end)
-  tidyr::full_seq(versions_with_updates, guess_period(versions_with_updates))
+  if (ea$time_type == "yearmonth") {
+    min(versions_with_updates) + seq(0, max(versions_with_updates) - min(versions_with_updates), by = 1)
+  } else {
+    tidyr::full_seq(versions_with_updates, guess_period(versions_with_updates))
+  }
 }
 
-
 #' Filter an `epi_archive` object to keep only older versions
 #'
 #' Generates a filtered `epi_archive` from an `epi_archive` object, keeping
@@ -904,9 +931,6 @@ epix_truncate_versions_after.epi_archive <- function(x, max_version) {
   if (!identical(class(max_version), class(x$DT$version))) {
     cli_abort("`max_version` must have the same `class` as `epi_archive$DT$version`.")
   }
-  if (!identical(typeof(max_version), typeof(x$DT$version))) {
-    cli_abort("`max_version` must have the same `typeof` as `epi_archive$DT$version`.")
-  }
   assert_scalar(max_version, na.ok = FALSE)
   if (max_version > x$versions_end) {
     cli_abort("`max_version` must be at most `epi_archive$versions_end`.")
@@ -983,3 +1007,163 @@ dplyr_col_modify.col_modify_recorder_df <- function(data, cols) {
   attr(data, "epiprocess::col_modify_recorder_df::cols") <- cols
   data
 }
+
+
+
+#' [`dplyr::filter`] for `epi_archive`s
+#'
+#' @param .data an `epi_archive`
+#' @param ... as in [`dplyr::filter`]; using the `version` column is not allowed
+#'   unless you use `.format_aware = TRUE`; see details.
+#' @param .by as in [`dplyr::filter`]
+#' @param .format_aware optional, `TRUE` or `FALSE`; default `FALSE`. See
+#'   details.
+#'
+#' @details
+#'
+#' By default, using the `version` column or measurement columns is disabled as
+#' it's easy to get unexpected results. See if either [`epix_as_of`] or
+#' [`epix_slide`] works for any version selection you have in mind: for version
+#' selection, see the `version` or `.versions` args, respectively; for
+#' measurement column-based filtering, try `filter`ing after `epix_as_of` or
+#' inside the `.f` in `epix_slide()`. If they don't cover your use case, then
+#' you can set `.format_aware = TRUE` to enable usage of these columns, but be
+#' careful to:
+#' * Factor in that `.data$DT` may have been converted into a compact format
+#'   based on diffing consecutive versions, and the last version of each
+#'   observation in `.data$DT` will always be carried forward to future
+#'   `version`s`; see details of [`as_epi_archive`].
+#' * Set `clobberable_versions_start` and `versions_end` of the result
+#'   appropriately after the `filter` call. They will be initialized with the
+#'   same values as in `.data`.
+#'
+#' `dplyr::filter` also has an optional argument `.preserve`, which should not
+#' have an impact on (ungrouped) `epi_archive`s, and `grouped_epi_archive`s do
+#' not currently support `dplyr::filter`.
+#'
+#' @examples
+#'
+#' # Filter to one location and a particular time range:
+#' archive_cases_dv_subset %>%
+#'   filter(geo_value == "fl", time_value >= as.Date("2020-10-01"))
+#'
+#' # Convert to weekly by taking the Saturday data for each week, so that
+#' # `case_rate_7d_av` represents a Sun--Sat average:
+#' archive_cases_dv_subset %>%
+#'   filter(as.POSIXlt(time_value)$wday == 6L)
+#'
+#' # Filtering involving the `version` column or measurement columns requires
+#' # extra care. See epix_as_of and epix_slide instead for some common
+#' # operations. One semi-common operation that ends up being fairly simple is
+#' # treating observations as finalized after some amount of time, and ignoring
+#' # any revisions that were made after that point:
+#' archive_cases_dv_subset %>%
+#'   filter(
+#'     version <= time_value + as.difftime(60, units = "days"),
+#'     .format_aware = TRUE
+#'   )
+#'
+#' @export
+filter.epi_archive <- function(.data, ..., .by = NULL, .format_aware = FALSE) {
+  in_tbl <- tibble::as_tibble(as.list(.data$DT), .name_repair = "minimal")
+  if (.format_aware) {
+    out_tbl <- in_tbl %>%
+      filter(..., .by = {{ .by }})
+  } else {
+    measurement_colnames <- setdiff(names(.data$DT), key_colnames(.data))
+    forbidden_colnames <- c("version", measurement_colnames)
+    out_tbl <- in_tbl %>%
+      filter(
+        # Add our own fake filter arg to the user's ..., to update the data mask
+        # to prevent `version` column usage.
+        {
+          # We should be evaluating inside the data mask. To disable both
+          # `version` and `.data$version` etc., we need to go to the ancestor
+          # environment containing the data mask's column bindings. This is
+          # likely just the parent env, but search to make sure, in a way akin
+          # to `<<-`:
+          e <- environment()
+          while (!identical(e, globalenv()) && !identical(e, emptyenv())) { # nolint:vector_logic_linter
+            if ("version" %in% names(e)) {
+              # This is where the column bindings are. Replace the forbidden ones.
+              # They are expected to be active bindings, so directly
+              # assigning has issues; `rm` first.
+              rm(list = forbidden_colnames, envir = e)
+              eval_env <- new.env(parent = asNamespace("epiprocess")) # see (2) below
+              delayedAssign(
+                "version",
+                cli_abort(c(
+                  "Using `version` in `filter.epi_archive` may produce unexpected results.",
+                  ">" = "See if `epix_as_of` or `epix_slide` would work instead.",
+                  ">" = "If not, see `?filter.epi_archive` details for how to proceed."
+                ), class = "epiprocess__filter_archive__used_version"),
+                eval.env = eval_env,
+                assign.env = e
+              )
+              for (measurement_colname in measurement_colnames) {
+                # Record current `measurement_colname` and set up execution for
+                # the promise for the error in its own dedicated environment, so
+                # that (1) `for` loop updating its value and `rm` cleanup don't
+                # mess things up. We can also (2) prevent changes to data mask
+                # ancestry (to involve user's quosure env rather than our
+                # quosure env) or contents (from edge case of user binding
+                # functions inside the mask) from potentially interfering by
+                # setting the promise's execution environment to skip over the
+                # data mask.
+                eval_env <- new.env(parent = asNamespace("epiprocess"))
+                eval_env[["local_measurement_colname"]] <- measurement_colname
+                delayedAssign(
+                  measurement_colname,
+                  cli_abort(c(
+                    "Using `{format_varname(local_measurement_colname)}`
+                     in `filter.epi_archive` may produce unexpected results.",
+                    ">" = "See `?filter.epi_archive` details for how to proceed."
+                  ), class = "epiprocess__filter_archive__used_measurement"),
+                  eval.env = eval_env,
+                  assign.env = e
+                )
+              }
+              break
+            }
+            e <- parent.env(e)
+          }
+          # Don't mask similarly-named user objects in ancestor envs:
+          rm(list = c("e", "measurement_colname", "eval_env"))
+          TRUE
+        },
+        ...,
+        .by = {{ .by }}
+      )
+  }
+  # We could try to re-infer the geo_type, e.g., when filtering from
+  # national+state to just state. However, we risk inference failures such as
+  # "hrr" -> "hhs" from filtering to hrr 10, or "custom" -> USA-related when
+  # working with non-USA data:
+  out_geo_type <- .data$geo_type
+  if (.data$time_type == "day") {
+    # We might be going from daily to weekly; re-infer:
+    out_time_type <- guess_time_type(out_tbl$time_value)
+  } else {
+    # We might be filtering weekly to a single time_value; avoid re-inferring to
+    # stay "week". Or in other cases, just skip inferring, as re-inferring is
+    # expected to match the input time_type:
+    out_time_type <- .data$time_type
+  }
+  # Even if they narrow down to just a single value of an other_keys column,
+  # it's probably still better (& simpler) to treat it as an other_keys column
+  # since it still exists in the result:
+  out_other_keys <- .data$other_keys
+  # `filter` makes no guarantees about not aliasing columns in its result when
+  # the filter condition is all TRUE, so don't setDT.
+  out_dtbl <- as.data.table(out_tbl, key = c("geo_value", "time_value", out_other_keys, "version"))
+  result <- new_epi_archive(
+    out_dtbl,
+    out_geo_type, out_time_type, out_other_keys,
+    # Assume version-related metadata unchanged; part of why we want to push
+    # back on filter expressions like `.data$version <= .env$as_of`:
+    .data$clobberable_versions_start, .data$versions_end
+  )
+  # Filtering down rows while keeping all (ukey) columns should preserve ukey
+  # uniqueness.
+  result
+}
diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R
index 1191521c9..51f6cf33f 100644
--- a/R/methods-epi_df.R
+++ b/R/methods-epi_df.R
@@ -499,34 +499,40 @@ group_epi_df <- function(x, exclude = character()) {
 #' the resulting `epi_df` will have `geo_value` set to `"total"`.
 #'
 #' @param .x an `epi_df`
-#' @param sum_cols character vector of the columns to aggregate
+#' @param sum_cols `r tidyselect_arg_roxygen`
 #' @param group_cols character vector of column names to group by. "time_value" is
-#' included by default.
+#'   included by default.
 #' @return an `epi_df` object
 #'
+#' @examples
+#' # This data has other_keys age_group and edu_qual:
+#' grad_employ_subset
+#'
+#' # Aggregate num_graduates within each geo_value (and time_value):
+#' grad_employ_subset %>%
+#'   sum_groups_epi_df(num_graduates, group_cols = "geo_value")
+#'
 #' @export
-sum_groups_epi_df <- function(.x, sum_cols = "value", group_cols = character()) {
+sum_groups_epi_df <- function(.x, sum_cols, group_cols = "time_value") {
   assert_class(.x, "epi_df")
-  assert_character(sum_cols)
   assert_character(group_cols)
-  checkmate::assert_subset(sum_cols, setdiff(names(.x), key_colnames(.x)))
   checkmate::assert_subset(group_cols, key_colnames(.x))
   if (!"time_value" %in% group_cols) {
     group_cols <- c("time_value", group_cols)
   }
-
-  out <- .x %>%
-    group_by(across(all_of(group_cols))) %>%
-    dplyr::summarize(across(all_of(sum_cols), sum), .groups = "drop")
+  # Attempt tidyselection ourselves to get "Error in `sum_groups_epi_df()`"
+  # rather than "in `dplyr::summarize()`", before forwarding:
+  sum_cols <- rlang::enquo(sum_cols)
+  tidyselect::eval_select(sum_cols, .x)
+  out <- group_by(.x, across(all_of(group_cols))) %>%
+    dplyr::summarize(across(!!sum_cols, sum), .groups = "drop")
 
   # To preserve epi_df-ness, we need to ensure that the `geo_value` column is
   # present.
-  out <- if (!"geo_value" %in% group_cols) {
-    out %>%
+  if (!"geo_value" %in% group_cols) {
+    out <- out %>%
       mutate(geo_value = "total") %>%
-      relocate(geo_value, .before = 1)
-  } else {
-    out
+      relocate("geo_value", .before = 1)
   }
 
   # The `geo_type` will be correctly inherited here by the following logic:
@@ -535,10 +541,10 @@ sum_groups_epi_df <- function(.x, sum_cols = "value", group_cols = character())
   # - if `geo_value` is not in `group_cols`, then the constructor will see
   #   the unrecognizeable "total" value and will correctly infer the "custom"
   #   geo_type.
-  out %>%
-    as_epi_df(
-      as_of = attr(.x, "metadata")$as_of,
-      other_keys = intersect(attr(.x, "metadata")$other_keys, group_cols)
-    ) %>%
+  as_epi_df(
+    out,
+    as_of = attr(.x, "metadata")$as_of,
+    other_keys = intersect(attr(.x, "metadata")$other_keys, group_cols)
+  ) %>%
     arrange_canonical()
 }
diff --git a/R/revision_analysis.R b/R/revision_analysis.R
index f36dcc16a..4e12fb77c 100644
--- a/R/revision_analysis.R
+++ b/R/revision_analysis.R
@@ -26,7 +26,7 @@
 #'   the window afterwards at 150.
 #'
 #' @param epi_arch an epi_archive to be analyzed
-#' @param ... <[`tidyselect`][dplyr_tidy_select]>, used to choose the column to
+#' @param ... <[`tidyselect`][dplyr::dplyr_tidy_select]>, used to choose the column to
 #'   summarize. If empty and there is only one value/measurement column (i.e.,
 #'   not in [`key_colnames`]) in the archive, it will automatically select it.
 #'   If supplied, `...` must select exactly one column.
@@ -34,31 +34,16 @@
 #'   `NA`'s compactify is run again if `compactify` is `TRUE` to make
 #'   sure there are no duplicate values from occasions when the signal is
 #'   revised to `NA`, and then back to its immediately-preceding value.
-#' @param print_inform bool, determines whether to print summary information, or
-#'   only return the full summary tibble
 #' @param min_waiting_period `difftime`, integer or `NULL`. Sets a cutoff: any
 #'   time_values that have not had at least `min_waiting_period` to stabilize as
 #'   of the `versions_end` are removed. `min_waiting_period` should characterize
 #'   the typical time during which most significant revisions occur. The default
 #'   of 60 days corresponds to a typical near-final value for case counts as
 #'   reported in the context of insurance. To avoid this filtering, either set
-#'   to `NULL` or 0.
+#'   to `NULL` or 0. A `difftime` will be rounded up to the appropriate `time_type` if
+#'   necessary (that is 5 days will be rounded to 1 week if the data is weekly).
 #' @param within_latest double between 0 and 1. Determines the threshold
 #'   used for the `lag_to`
-#' @param quick_revision difftime or integer (integer is treated as days), for
-#'   the printed summary, the amount of time between the final revision and the
-#'   actual time_value to consider the revision quickly resolved. Default of 3
-#'   days
-#' @param few_revisions integer, for the printed summary, the upper bound on the
-#'   number of revisions to consider "few". Default is 3.
-#' @param abs_spread_threshold length-1 numeric, for the printed summary, the
-#'   maximum spread used to characterize revisions which don't actually change
-#'   very much. Default is 5% of the maximum value in the dataset, but this is
-#'   the most unit dependent of values, and likely needs to be chosen
-#'   appropriate for the scale of the dataset.
-#' @param rel_spread_threshold length-1 double between 0 and 1, for the printed
-#'   summary, the relative spread fraction used to characterize revisions which
-#'   don't actually change very much. Default is .1, or 10% of the final value
 #' @param compactify bool. If `TRUE`, we will compactify after the signal
 #'   requested in `...` has been selected on its own and the `drop_nas` step.
 #'   This helps, for example, to give similar results when called on
@@ -67,6 +52,8 @@
 #'   requested signal. The default is `TRUE`.
 #' @param compactify_abs_tol length-1 double, used if `compactify` is `TRUE`, it
 #'   determines the threshold for when two doubles are considered identical.
+#' @param return_only_tibble boolean to return only the simple `tibble` of
+#'   computational results rather than the complete S3 object.
 #'
 #' @details Applies to `epi_archive`s with `time_type`s of `"day"`, `"week"`,
 #'   and `"yearmonth"`. It can also work with a `time_type` of `"integer"` if
@@ -76,9 +63,15 @@
 #'   produce incorrect results for some calculations, since week numbering
 #'   contains jumps at year boundaries.
 #'
+#' @return An S3 object with class `revision_behavior`. This function is typically
+#'   called for the purposes of inspecting the printed output. The
+#'   results of the computations are available in
+#'   `revision_analysis(...)$revision_behavior`. If you only want to access
+#'   the internal computations, use `return_only_tibble = TRUE`.
+#'
 #' @examples
-#' revision_example <- revision_summary(archive_cases_dv_subset, percent_cli)
-#' revision_example %>% arrange(desc(spread))
+#' revision_example <- revision_analysis(archive_cases_dv_subset, percent_cli)
+#' revision_example$revision_behavior %>% arrange(desc(spread))
 #'
 #' @export
 #' @importFrom cli cli_inform cli_abort cli_li
@@ -86,21 +79,19 @@
 #' @importFrom vctrs vec_cast
 #' @importFrom dplyr mutate group_by arrange filter if_any all_of across pull pick c_across
 #'   everything ungroup summarize if_else %>%
-revision_summary <- function(epi_arch,
-                             ...,
-                             drop_nas = TRUE,
-                             print_inform = TRUE,
-                             min_waiting_period = as.difftime(60, units = "days") %>%
-                               difftime_approx_ceiling_time_delta(epi_arch$time_type),
-                             within_latest = 0.2,
-                             quick_revision = as.difftime(3, units = "days") %>%
-                               difftime_approx_ceiling_time_delta(epi_arch$time_type),
-                             few_revisions = 3,
-                             abs_spread_threshold = NULL,
-                             rel_spread_threshold = 0.1,
-                             compactify = TRUE,
-                             compactify_abs_tol = 0) {
+revision_analysis <- function(epi_arch,
+                              ...,
+                              drop_nas = TRUE,
+                              min_waiting_period = as.difftime(60, units = "days"),
+                              within_latest = 0.2,
+                              compactify = TRUE,
+                              compactify_abs_tol = 0,
+                              return_only_tibble = FALSE) {
   assert_class(epi_arch, "epi_archive")
+  if (methods::is(min_waiting_period, "difftime")) {
+    min_waiting_period <- min_waiting_period %>%
+      difftime_approx_ceiling_time_delta(epi_arch$time_type)
+  }
   # if the column to summarize isn't specified, use the only one if there is only one
   if (dots_n(...) == 0) {
     # Choose the first column that's not a key:
@@ -126,11 +117,6 @@ revision_summary <- function(epi_arch,
       cli_abort("Not currently implementing more than one column at a time. Run each separately.")
     }
   }
-  if (is.null(abs_spread_threshold)) {
-    abs_spread_threshold <- .05 * epi_arch$DT %>%
-      pull(!!arg) %>%
-      max(na.rm = TRUE)
-  }
   # for each time_value, get
   #   the number of revisions
   #   the maximum spread in value (both absolute and relative)
@@ -193,63 +179,113 @@ revision_summary <- function(epi_arch,
       time_value, geo_value, all_of(epikey_names), n_revisions, min_lag, max_lag, # nolint: object_usage_linter
       lag_near_latest, spread, rel_spread, min_value, max_value, median_value # nolint: object_usage_linter
     )
-  if (print_inform) {
-    cli_inform("Min lag (time to first version):")
-    time_delta_summary(revision_behavior$min_lag, time_type) %>% print()
-    if (!drop_nas) {
-      total_na <- epi_arch$DT %>%
-        filter(is.na(c_across(!!arg))) %>% # nolint: object_usage_linter
-        nrow()
-      cli_inform("Fraction of all versions that are `NA`:")
-      cli_li(num_percent(total_na, nrow(epi_arch$DT), ""))
-      cli_inform("")
-    }
-    cli_inform("Fraction of epi_key+time_values with")
-    total_num <- nrow(revision_behavior) # nolint: object_usage_linter
-    total_num_unrevised <- sum(revision_behavior$n_revisions == 0) # nolint: object_usage_linter
-    cli_inform("No revisions:")
-    cli_li(num_percent(total_num_unrevised, total_num, ""))
-    total_quickly_revised <- sum( # nolint: object_usage_linter
-      time_delta_to_n_steps(revision_behavior$max_lag, time_type) <=
-        time_delta_to_n_steps(quick_revision, time_type)
-    )
-    cli_inform("Quick revisions (last revision within {format_time_delta(quick_revision, time_type)}
-                of the `time_value`):")
-    cli_li(num_percent(total_quickly_revised, total_num, ""))
-    total_barely_revised <- sum( # nolint: object_usage_linter
-      revision_behavior$n_revisions <=
-        few_revisions
-    )
-    cli_inform("Few revisions (At most {few_revisions} revisions for that `time_value`):")
-    cli_li(num_percent(total_barely_revised, total_num, ""))
-    cli_inform("")
-    cli_inform("Fraction of revised epi_key+time_values which have:")
+  total_na <- epi_arch$DT %>%
+    filter(is.na(c_across(!!arg))) %>% # nolint: object_usage_linter
+    nrow()
+  if (!return_only_tibble) {
+    revision_behavior <- structure(list(
+      revision_behavior = revision_behavior,
+      range_time_values = range(epi_arch$DT$time_value),
+      signal_variable = arg,
+      drop_nas = drop_nas,
+      time_type = time_type,
+      total_na = total_na,
+      max_val = max(epi_arch$DT[[arg]], na.rm = TRUE),
+      n_obs = nrow(epi_arch$DT),
+      within_latest = within_latest
+    ), class = "revision_analysis")
+  }
+  return(revision_behavior)
+}
+
+
 
-    real_revisions <- revision_behavior %>% filter(n_revisions > 0) # nolint: object_usage_linter
-    n_real_revised <- nrow(real_revisions) # nolint: object_usage_linter
-    rel_spread <- sum( # nolint: object_usage_linter
-      real_revisions$rel_spread <
-        rel_spread_threshold,
-      na.rm = TRUE
-    ) + sum(is.na(real_revisions$rel_spread))
-    cli_inform("Less than {rel_spread_threshold} spread in relative value:")
-    cli_li(num_percent(rel_spread, n_real_revised, ""))
-    abs_spread <- sum( # nolint: object_usage_linter
-      real_revisions$spread >
-        abs_spread_threshold
-    ) # nolint: object_usage_linter
-    cli_inform("Spread of more than {abs_spread_threshold} in actual value (when revised):")
-    cli_li(num_percent(abs_spread, n_real_revised, ""))
 
-    # time_type_unit_pluralizer[[time_type]] is a format string controlled by us
-    # and/or downstream devs, so we can paste it onto our format string safely:
-    units_plural <- pluralize(paste0("{qty(2)}", time_type_unit_pluralizer[[time_type]])) # nolint: object_usage_linter
-    cli_inform("{toTitleCase(units_plural)} until within {within_latest*100}% of the latest value:")
-    time_delta_summary(revision_behavior[["lag_near_latest"]], time_type) %>% print()
+#' Print a `revision_analysis` object
+#'
+#' @param x a `revision_analysis` object
+#' @param quick_revision Difftime or integer (integer is treated as days).
+#'   The amount of time between the final revision and the
+#'   actual time_value to consider the revision quickly resolved. Default of 3
+#'   days. This will be rounded up to the appropriate `time_type` if
+#'   necessary (that is 5 days will be rounded to 1 week if the data is weekly).
+#' @param few_revisions Integer. The upper bound on the
+#'   number of revisions to consider "few". Default is 3.
+#' @param abs_spread_threshold Scalar numeric. The
+#'   maximum spread used to characterize revisions which don't actually change
+#'   very much. Default is 5% of the maximum value in the dataset, but this is
+#'   the most unit dependent of values, and likely needs to be chosen
+#'   appropriate for the scale of the dataset.
+#' @param rel_spread_threshold Scalar between 0 and 1. The relative spread fraction used to characterize revisions which
+#'   don't actually change very much. Default is .1, or 10% of the final value
+#'
+#' @rdname revision_analysis
+#' @export
+print.revision_analysis <- function(x,
+                                    quick_revision = as.difftime(3, units = "days"),
+                                    few_revisions = 3,
+                                    abs_spread_threshold = NULL,
+                                    rel_spread_threshold = 0.1,
+                                    ...) {
+  if (methods::is(quick_revision, "difftime")) {
+    quick_revision <- quick_revision %>%
+      difftime_approx_ceiling_time_delta(x$time_type)
   }
-  return(revision_behavior)
+  if (is.null(abs_spread_threshold)) abs_spread_threshold <- .05 * x$max_val
+  rev_beh <- x$revision_behavior
+  cli::cli_h2("An epi_archive spanning {.val {x$range_time_values[1]}} to {.val {x$range_time_values[2]}}.")
+  cli::cli_h3("Min lag (time to first version):")
+  time_delta_summary(rev_beh$min_lag, x$time_type) %>% print()
+  if (!x$drop_nas) {
+    cli_inform("Fraction of all versions that are `NA`:")
+    cli_li(num_percent(x$total_na, x$n_obs, ""))
+    cli_inform("")
+  }
+  cli::cli_h3("Fraction of epi_key + time_values with")
+  total_num <- nrow(rev_beh) # nolint: object_usage_linter
+  total_num_unrevised <- sum(rev_beh$n_revisions == 0) # nolint: object_usage_linter
+  cli_inform("No revisions:")
+  cli_li(num_percent(total_num_unrevised, total_num, ""))
+  total_quickly_revised <- sum( # nolint: object_usage_linter
+    time_delta_to_n_steps(rev_beh$max_lag, x$time_type) <=
+      time_delta_to_n_steps(quick_revision, x$time_type)
+  )
+  cli_inform("Quick revisions (last revision within {format_time_delta(quick_revision, x$time_type)}
+                of the `time_value`):")
+  cli_li(num_percent(total_quickly_revised, total_num, ""))
+  total_barely_revised <- sum(rev_beh$n_revisions <= few_revisions)
+  cli_inform("Few revisions (At most {.val {few_revisions}} revisions for that `time_value`):")
+  cli_li(num_percent(total_barely_revised, total_num, ""))
+
+  cli::cli_h3("Fraction of revised epi_key + time_values which have:")
+
+  real_revisions <- rev_beh %>% filter(n_revisions > 0) # nolint: object_usage_linter
+  n_real_revised <- nrow(real_revisions) # nolint: object_usage_linter
+  rel_spread <- sum( # nolint: object_usage_linter
+    real_revisions$rel_spread < rel_spread_threshold,
+    na.rm = TRUE
+  ) + sum(is.na(real_revisions$rel_spread))
+  cli_inform("Less than {.val {rel_spread_threshold}} spread in relative value:")
+  cli_li(num_percent(rel_spread, n_real_revised, ""))
+  abs_spread <- sum( # nolint: object_usage_linter
+    real_revisions$spread > abs_spread_threshold
+  ) # nolint: object_usage_linter
+  divid <- cli::cli_div(theme = list(.val = list(digits = 3)))
+  cli_inform("Spread of more than {.val {abs_spread_threshold}} in actual value (when revised):")
+  cli::cli_end(divid)
+  cli_li(num_percent(abs_spread, n_real_revised, ""))
+
+  # time_type_unit_pluralizer[[time_type]] is a format string controlled by us
+  # and/or downstream devs, so we can paste it onto our format string safely:
+  units_plural <- pluralize(paste0("{qty(2)}", time_type_unit_pluralizer[[x$time_type]])) # nolint: object_usage_linter
+  cli::cli_h3("{toTitleCase(units_plural)} until within {.val {x$within_latest*100}}% of the latest value:")
+  time_delta_summary(rev_beh[["lag_near_latest"]], x$time_type) %>% print()
 }
 
+#' @export
+#' @rdname revision_analysis
+revision_summary <- revision_analysis
+
 #' pull the value from lags when values starts indefinitely being within prop of its latest value.
 #' @param lags vector of lags; should be sorted
 #' @param values this should be a vector (e.g., a column) with length matching that of `lags`
diff --git a/R/slide.R b/R/slide.R
index abc7c3b77..bc993bf4a 100644
--- a/R/slide.R
+++ b/R/slide.R
@@ -1,47 +1,82 @@
-#' Slide a function over variables in an `epi_df` object
+#' More general form of [`epi_slide_opt`] for rolling/running computations
 #'
-#' @description Slides a given function over variables in an `epi_df` object.
-#' This is useful for computations like rolling averages. The function supports
-#' many ways to specify the computation, but by far the most common use case is
-#' as follows:
+#' Most rolling/running computations can be handled by [`epi_slide_mean`],
+#' [`epi_slide_sum`], or the medium-generality [`epi_slide_opt`] functions,
+#' which have been specialized to run more quickly. `epi_slide()` is a slower
+#' but even more general function for rolling/running computations, and uses a
+#' different interface to specify the computations; you typically only need to
+#' consider using `epi_slide()` if you have a computation that depends on
+#' multiple columns simultaneously, outputs multiple columns simultaneously, or
+#' produces non-numeric output. For example, this computation depends on
+#' multiple columns:
 #'
 #' ```
-#' # Create new column `cases_7dmed` that contains a 7-day trailing median of cases
-#' epi_slide(edf, cases_7dmed = median(cases), .window_size = 7)
+#' cases_deaths_subset %>%
+#'   epi_slide(
+#'     cfr_estimate_v0 = death_rate_7d_av[[22]]/case_rate_7d_av[[1]],
+#'     .window_size = 22
+#'   ) %>%
+#'   print(n = 30)
 #' ```
 #'
-#' For two very common use cases, we provide optimized functions that are much
-#' faster than `epi_slide`: `epi_slide_mean()` and `epi_slide_sum()`. We
-#' recommend using these functions when possible.
+#' (Here, the value 22 was selected using `epi_cor()` and averaging across
+#' `geo_value`s. See
+#' \href{https://www.medrxiv.org/content/10.1101/2024.12.27.24319518v1}{this
+#' manuscript} for some warnings & information using similar types of CFR
+#' estimators.)
 #'
 #' See `vignette("epi_df")` for more examples.
 #'
 #' @template basic-slide-params
-#' @param .f Function, formula, or missing; together with `...` specifies the
-#'   computation to slide. The return of the computation should either be a
-#'   scalar or a 1-row data frame. Data frame returns will be
-#'    `tidyr::unpack()`-ed, if named, and will be [`tidyr::pack`]-ed columns, if
-#'    not named. See examples.
+#' @param .f,... The computation to slide. The input will be a time window of
+#'   the data for a single subpopulation (i.e., a single `geo_value` and single
+#'   value for any [`other_keys`][as_epi_df] you set up, such as age groups, race, etc.).
+#'   The input will always have the same size, determined by `.window_size`, and
+#'   will fill in any missing `time_values`, using `NA` values for missing
+#'   measurements. The output should be a scalar value or a 1-row data frame;
+#'   these outputs will be collected into a new column or columns in the
+#'   `epi_slide()` result. Data frame outputs will be unpacked into multiple
+#'   columns in the result by default, or [`tidyr::pack`]ed into a single
+#'   data-frame-type column if you provide a name for such a column (e.g., via
+#'   `.new_col_name`).
+#'
+#' You can specify the computation in one of the following ways:
+#'
+#' - Don't provide `.f`, and instead use one or more
+#'   [`dplyr::summarize`]-esque ["data-masking"][rlang::args_data_masking]
+#'   expressions in `...`, e.g., `cfr_estimate_v0 =
+#'   death_rate_7d_av[[22]]/case_rate_7d_av[[1]]`. This way is sometimes more
+#'   convenient, but also has the most computational overhead.
+#'
+#' - Provide a formula in `.f`, e.g., `~
+#'   .x$death_rate_7d_av[[22]]/.x$case_rate_7d_av[[1]]`. In this formula, `.x`
+#'   is an `epi_df` containing data for a single time window as described above,
+#'   taken from the original `.x` fed into `epi_slide()`.
+#'
+#' - Provide a function in `.f`, e.g., `function(x, g, t)
+#'   x$death_rate_7d_av[[22]]/x$case_rate_7d_av[[1]]`. The function should be of
+#'   the form `function(x, g, t)` or `function(x, g, t, <additional
+#'   configuration arguments>)`, where:
 #'
-#'   - If `.f` is missing, then `...` will specify the computation via
-#'     tidy-evaluation. This is usually the most convenient way to use
-#'     `epi_slide`. See examples.
-#'   - If `.f` is a formula, then the formula should use `.x` (not the same as
-#'     the input `epi_df`) to operate on the columns of the input `epi_df`, e.g.
-#'     `~mean(.x$var)` to compute a mean of `var`.
-#'   - If a function, `.f` must have the form `function(x, g, t, ...)`, where:
 #'     - `x` is a data frame with the same column names as the original object,
-#'     minus any grouping variables, with only the windowed data for one
-#'     group-`.ref_time_value` combination
-#'     - `g` is a one-row tibble containing the values of the grouping variables
-#'     for the associated group
+#'       minus any grouping variables, with only the windowed data for one
+#'       group-`.ref_time_value` combination
+#'
+#'     - `g` is a one-row tibble specifying the `geo_value` and value of any
+#'       `other_keys` for this computation
+#'
 #'     - `t` is the `.ref_time_value` for the current window
-#'     - `...` are additional arguments
 #'
-#' @param ... Additional arguments to pass to the function or formula specified
-#'   via `.f`. Alternatively, if `.f` is missing, then the `...` is interpreted
-#'   as a ["data-masking"][rlang::args_data_masking] expression or expressions
-#'   for tidy evaluation.
+#'     - If you have a complex `.f` containing `<additional configuration
+#'     arguments>`, you can provide values for those arguments in the `...`
+#'     argument to `epi_slide()`.
+#'
+#'   The values of `g` and `t` are also available to data-masking expression and
+#'   formula-based computations as `.group_key` and `.ref_time_value`,
+#'   respectively. Formula computations also let you use `.y` or `.z`,
+#'   respectively, as additional names for these same quantities (similar to
+#'   [`dplyr::group_modify`]).
+#'
 #' @param .new_col_name Name for the new column that will contain the computed
 #'   values. The default is "slide_value" unless your slide computations output
 #'   data frames, in which case they will be unpacked (as in `tidyr::unpack()`)
@@ -49,14 +84,34 @@
 #'   be given names that clash with the existing columns of `.x`.
 #'
 #' @details
+#'
+#' ## Motivation and lower-level alternatives
+#'
+#' `epi_slide()` is focused on preventing errors and providing a convenient
+#' interface. If you need computational speed, many computations can be optimized
+#' by one of the following:
+#'
+#' * Performing core sliding operations with `epi_slide_opt()` with
+#'   `frollapply`, and using potentially-grouped `mutate()`s to transform or
+#'   combine the results.
+#'
+#' * Grouping by `geo_value` and any `other_keys`; [`complete()`]ing with
+#'   `full_seq()` to fill in time gaps; `arrange()`ing by `time_value`s within
+#'   each group; using `mutate()` with vectorized operations and shift operators
+#'   like `dplyr::lead()` and `dplyr::lag()` to perform the core operations,
+#'   being careful to give the desired results for the least and most recent
+#'   `time_value`s (often `NA`s for the least recent); ungrouping; and
+#'   `filter()`ing back down to only rows that existed before the `complete()`
+#'   stage if necessary.
+#'
 #' ## Advanced uses of `.f` via tidy evaluation
 #'
-#' If specifying `.f` via tidy evaluation, in addition to the standard [`.data`]
-#' and [`.env`], we make some additional "pronoun"-like bindings available:
+#' If specifying `.f` via tidy evaluation, in addition to the standard [`.data`][rlang::.data]
+#' and [`.env`][rlang::.env], we make some additional "pronoun"-like bindings available:
 #'
 #'   - .x, which is like `.x` in [`dplyr::group_modify`]; an ordinary object
 #'     like an `epi_df` rather than an rlang [pronoun][rlang::as_data_pronoun]
-#'     like [`.data`]; this allows you to use additional `dplyr`, `tidyr`, and
+#'     like `.data`; this allows you to use additional `dplyr`, `tidyr`, and
 #'     `epiprocess` operations. If you have multiple expressions in `...`, this
 #'     won't let you refer to the output of the earlier expressions, but `.data`
 #'     will.
@@ -72,34 +127,43 @@
 #' @examples
 #' library(dplyr)
 #'
-#' # Get the 7-day trailing standard deviation of cases and the 7-day trailing mean of cases
-#' cases_deaths_subset %>%
+#' # Generate some simple time-varying CFR estimates:
+#' with_cfr_estimates <- cases_deaths_subset %>%
 #'   epi_slide(
-#'     cases_7sd = sd(cases, na.rm = TRUE),
-#'     cases_7dav = mean(cases, na.rm = TRUE),
-#'     .window_size = 7
-#'   ) %>%
-#'   select(geo_value, time_value, cases, cases_7sd, cases_7dav)
-#' # Note that epi_slide_mean could be used to more quickly calculate cases_7dav.
+#'     cfr_estimate_v0 = death_rate_7d_av[[22]] / case_rate_7d_av[[1]],
+#'     .window_size = 22
+#'   )
+#' with_cfr_estimates %>%
+#'   print(n = 30)
+#' # (Here, the value 22 was selected using `epi_cor()` and averaging across
+#' # `geo_value`s. See
+#' # https://www.medrxiv.org/content/10.1101/2024.12.27.24319518v1 for some
+#' # warnings & information using CFR estimators along these lines.)
 #'
-#' # In addition to the [`dplyr::mutate`]-like syntax, you can feed in a function or
-#' # formula in a way similar to [`dplyr::group_modify`]:
-#' my_summarizer <- function(window_data) {
-#'   window_data %>%
-#'     summarize(
-#'       cases_7sd = sd(cases, na.rm = TRUE),
-#'       cases_7dav = mean(cases, na.rm = TRUE)
-#'     )
+#' # In addition to the [`dplyr::mutate`]-like syntax, you can feed in a
+#' # function or formula in a way similar to [`dplyr::group_modify`]; these
+#' # often run much more quickly:
+#' my_computation <- function(window_data) {
+#'   tibble(
+#'     cfr_estimate_v0 = window_data$death_rate_7d_av[[nrow(window_data)]] /
+#'       window_data$case_rate_7d_av[[1]]
+#'   )
 #' }
-#' cases_deaths_subset %>%
+#' with_cfr_estimates2 <- cases_deaths_subset %>%
 #'   epi_slide(
-#'     ~ my_summarizer(.x),
-#'     .window_size = 7
-#'   ) %>%
-#'   select(geo_value, time_value, cases, cases_7sd, cases_7dav)
-#'
-#'
-#'
+#'     ~ my_computation(.x),
+#'     .window_size = 22
+#'   )
+#' with_cfr_estimates3 <- cases_deaths_subset %>%
+#'   epi_slide(
+#'     function(window_data, g, t) {
+#'       tibble(
+#'         cfr_estimate_v0 = window_data$death_rate_7d_av[[nrow(window_data)]] /
+#'           window_data$case_rate_7d_av[[1]]
+#'       )
+#'     },
+#'     .window_size = 22
+#'   )
 #'
 #'
 #' #### Advanced: ####
@@ -549,20 +613,25 @@ get_before_after_from_window <- function(window_size, align, time_type) {
   list(before = before, after = after)
 }
 
-#' Optimized slide functions for common cases
+#' Calculate rolling or running means, sums, etc., or custom calculations
 #'
-#' @description `epi_slide_opt` allows sliding an n-timestep [data.table::froll]
-#' or [slider::summary-slide] function over variables in an `epi_df` object.
-#' These functions tend to be much faster than `epi_slide()`. See
-#' `vignette("epi_df")` for more examples.
+#' @description These methods take each subpopulation (i.e., a single
+#'   `geo_value` and combination of any `other_keys` you set up for age groups,
+#'   etc.) and perform a `.window_size`-width time window rolling/sliding
+#'   computation, or alternatively, a running/cumulative computation (with
+#'   `.window_size = Inf`) on the requested columns. Explicit `NA` measurements
+#'   are temporarily added to fill in any time gaps, and, for rolling
+#'   computations, to pad the time series to ensure that the first & last
+#'   computations use exactly `.window_size` values.
+#'
+#' `epi_slide_opt` allows you to use any [data.table::froll] or
+#' [slider::summary-slide] function. If none of those specialized functions fit
+#' your usecase, you can use `data.table::frollapply` together with a non-rolling
+#' function (e.g., `median`). See [`epi_slide`] if you need to work with
+#' multiple columns at once or output a custom type.
 #'
 #' @template basic-slide-params
-#' @param .col_names <[`tidy-select`][dplyr_tidy_select]> An unquoted column
-#'   name (e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`),
-#'   [other tidy-select expression][tidyselect::language], or a vector of
-#'   characters (e.g. `c("cases", "deaths")`). Variable names can be used as if
-#'   they were positions in the data frame, so expressions like `x:y` can be
-#'   used to select a range of variables.
+#' @param .col_names `r tidyselect_arg_roxygen`
 #'
 #'   The tidy-selection renaming interface is not supported, and cannot be used
 #'   to provide output column names; if you want to customize the output column
@@ -1108,7 +1177,7 @@ epi_slide_sum <- function(
 #' `before` and `after` args are assumed to have been validated by the calling
 #' function (using `validate_slide_window_arg`).
 #'
-#' @importFrom checkmate assert_function
+#' @importFrom checkmate assert_function anyInfinite
 #' @keywords internal
 full_date_seq <- function(x, before, after, time_type) {
   if (!time_type %in% c("day", "week", "yearmonth", "integer")) {
@@ -1126,7 +1195,7 @@ full_date_seq <- function(x, before, after, time_type) {
   if (time_type %in% c("yearmonth", "integer")) {
     all_dates <- seq(min(x$time_value), max(x$time_value), by = 1L)
 
-    if (before != 0 && before != Inf) {
+    if (before != 0 && !anyInfinite(before)) {
       pad_early_dates <- all_dates[1L] - before:1
     }
     if (after != 0) {
@@ -1139,7 +1208,7 @@ full_date_seq <- function(x, before, after, time_type) {
     )
 
     all_dates <- seq(min(x$time_value), max(x$time_value), by = by)
-    if (before != 0 && before != Inf) {
+    if (before != 0 && !anyInfinite(before)) {
       # The behavior is analogous to the branch with tsibble types above. For
       # more detail, note that the function `seq.Date(from, ..., length.out =
       # n)` returns `from + 0:n`. Since we want `from + 1:n`, we drop the first
diff --git a/R/utils.R b/R/utils.R
index 66270d69c..972170998 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -98,26 +98,29 @@ format_chr_deparse <- function(x) {
   paste(collapse = "", deparse(x))
 }
 
-#' Format a character vector as a string via deparsing/quoting each
+#' Format each entry in a character vector via quoting; special replacement for length 0
+#'
+#' Performs no escaping within the strings; if you want something that reader
+#' could copy-paste to debug, look into `format_deparse` (note that this
+#' collapses into a single string).
+#'
+#' @param x chr; e.g., `colnames` of some data frame
+#' @param empty chr, likely string; what should be output if `x` is of length 0?
+#' @return chr; same `length` as `x` if `x` had nonzero length; value of `empty` otherwise
+#'
+#' @examples
+#' cli::cli_inform('{epiprocess:::format_chr_with_quotes("x")}')
+#' cli::cli_inform('{epiprocess:::format_chr_with_quotes(c("x","y"))}')
+#' nms <- c("x", "\"Total Cases\"")
+#' cli::cli_inform("{epiprocess:::format_chr_with_quotes(nms)}")
+#' cli::cli_inform("{epiprocess:::format_chr_with_quotes(character())}")
 #'
-#' @param x `chr`; e.g., `colnames` of some data frame
-#' @param empty string; what should be output if `x` is of length 0?
-#' @return string
 #' @keywords internal
 format_chr_with_quotes <- function(x, empty = "*none*") {
   if (length(x) == 0L) {
     empty
   } else {
-    # Deparse to get quoted + escape-sequenced versions of varnames; collapse to
-    # single line (assuming no newlines in `x`). Though if we hand this to cli
-    # it may insert them (even in middle of quotes) while wrapping lines.
-    deparsed_collapsed <- paste(collapse = "", deparse(x))
-    if (length(x) == 1L) {
-      deparsed_collapsed
-    } else {
-      # remove surrounding `c()`:
-      substr(deparsed_collapsed, 3L, nchar(deparsed_collapsed) - 1L)
-    }
+    paste0('"', x, '"')
   }
 }
 
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 3742bd416..ebd73dc84 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -37,20 +37,12 @@ articles:
     contents:
       - compactify
 
-repo:
-  url:
-    home: https://github.com/cmu-delphi/epiprocess/
-    source: https://github.com/cmu-delphi/epiprocess/
-    issue: https://github.com/cmu-delphi/epiprocess/issues
-
 reference:
   - title: "`epi_df` basics"
     desc: Constructors and information for `epi_df` objects.
   - contents:
       - epi_df
-      - print.epi_df
       - group_epi_df
-      - autoplot.epi_df
 
   - title: "`epi_df` manipulation"
     desc: Functions operating on `epi_df` objects.
@@ -71,7 +63,6 @@ reference:
     desc: Constructors and information for `epi_archive` objects.
   - contents:
       - epi_archive
-      - print.epi_archive
       - clone
       - group_by.epi_archive
 
@@ -79,11 +70,20 @@ reference:
     desc: Functions operating on `epi_archive` objects.
   - contents:
       - epix_as_of
+      - epix_as_of_current
       - epix_slide
-      - epix_merge
       - revision_summary
+      - epix_merge
+      - filter.epi_archive
       - epix_fill_through_version
       - epix_truncate_versions_after
+      - set_versions_end
+
+  - title: Basic analysis and visualization
+  - contents:
+      - starts_with("autoplot")
+      - starts_with("print")
+      - revision_summary
 
   - title: Example data
   - contents:
diff --git a/man/autoplot-epi.Rd b/man/autoplot-epi.Rd
new file mode 100644
index 000000000..c7c8a7c4f
--- /dev/null
+++ b/man/autoplot-epi.Rd
@@ -0,0 +1,149 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/autoplot.R
+\name{autoplot-epi}
+\alias{autoplot-epi}
+\alias{autoplot.epi_df}
+\alias{autoplot.epi_archive}
+\alias{plot.epi_df}
+\alias{plot.epi_archive}
+\title{Automatically plot an epi_df or epi_archive}
+\usage{
+\method{autoplot}{epi_df}(
+  object,
+  ...,
+  .color_by = c("all_keys", "geo_value", "other_keys", ".response", "all", "none"),
+  .facet_by = c(".response", "other_keys", "all_keys", "geo_value", "all", "none"),
+  .base_color = "#3A448F",
+  .facet_filter = NULL,
+  .max_facets = deprecated()
+)
+
+\method{autoplot}{epi_archive}(
+  object,
+  ...,
+  .base_color = "black",
+  .versions = NULL,
+  .mark_versions = FALSE,
+  .facet_filter = NULL
+)
+
+\method{plot}{epi_df}(x, ...)
+
+\method{plot}{epi_archive}(x, ...)
+}
+\arguments{
+\item{object, x}{An \code{epi_df} or \code{epi_archive}}
+
+\item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> One or more unquoted
+expressions separated by commas. Variable names can be used as if they
+were positions in the data frame, so expressions like \code{x:y} can
+be used to select a range of variables.}
+
+\item{.color_by}{Which variables should determine the color(s) used to plot
+lines. Options include:
+\itemize{
+\item \code{all_keys} - the default uses the interaction of any key variables
+including the \code{geo_value}
+\item \code{geo_value} - \code{geo_value} only
+\item \code{other_keys} - any available keys that are not \code{geo_value}
+\item \code{.response} - the numeric variables (same as the y-axis)
+\item \code{all} - uses the interaction of all keys and numeric variables
+\item \code{none} - no coloring aesthetic is applied
+}}
+
+\item{.facet_by}{Similar to \code{.color_by} except that the default is to display
+each numeric variable on a separate facet}
+
+\item{.base_color}{Lines will be shown with this color if \code{.color_by == "none"}.
+For example, with a single numeric variable and faceting by \code{geo_value}, all
+locations would share the same color line.}
+
+\item{.facet_filter}{Select which facets will be displayed. Especially
+useful for when there are many \code{geo_value}'s or keys. This is a
+<\code{\link[rlang:args_data_masking]{rlang}}> expression along the lines of \code{\link[dplyr:filter]{dplyr::filter()}}.
+However, it must be a single expression combined with the \code{&} operator. This
+contrasts to the typical use case which allows multiple comma-separated expressions
+which are implicitly combined with \code{&}. When multiple variables are selected
+with \code{...}, their names can be filtered in combination with other factors
+by using \code{.response_name}. See the examples below.}
+
+\item{.max_facets}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}}
+
+\item{.versions}{Select which versions will be displayed. By default,
+a separate line will be shown with the data as it would have appeared on
+every day in the archive. This can sometimes become overwhelming. For
+example, daily data would display a line for what the data would have looked
+like on every single day. To override this, you can select specific dates,
+by passing a vector of values here. Alternatively, a sequence can be
+automatically created by passing a string like \code{"2 weeks"} or \code{"month"}.
+For time types where the \code{time_value} is a date object, any string that
+is interpretable by \code{\link[base:seq.Date]{base::seq.Date()}} is allowed.
+
+For \code{time_type = "integer"}, an integer larger than 1 will give a subset
+of versions.}
+
+\item{.mark_versions}{Logical. Indicate whether to mark each version with
+a vertical line. Note that displaying many versions can become busy.}
+}
+\value{
+A \link[ggplot2:ggplot]{ggplot2::ggplot} object
+}
+\description{
+Automatically plot an epi_df or epi_archive
+}
+\examples{
+# -- Use it on an `epi_df`
+autoplot(cases_deaths_subset, cases, death_rate_7d_av)
+autoplot(cases_deaths_subset, case_rate_7d_av, .facet_by = "geo_value")
+autoplot(cases_deaths_subset, case_rate_7d_av,
+  .color_by = "none",
+  .facet_by = "geo_value"
+)
+autoplot(cases_deaths_subset, case_rate_7d_av,
+  .color_by = "none",
+  .base_color = "red", .facet_by = "geo_value"
+)
+
+# .base_color specification won't have any effect due .color_by default
+autoplot(cases_deaths_subset, case_rate_7d_av,
+  .base_color = "red", .facet_by = "geo_value"
+)
+
+# filter to only some facets, must be explicitly combined
+autoplot(cases_deaths_subset, cases, death_rate_7d_av,
+  .facet_by = "all",
+  .facet_filter = (.response_name == "cases" & geo_value \%in\% c("tx", "pa")) |
+    (.response_name == "death_rate_7d_av" &
+      geo_value \%in\% c("ca", "fl", "ga", "ny"))
+)
+# Just an alias for convenience
+plot(cases_deaths_subset, cases, death_rate_7d_av,
+  .facet_by = "all",
+  .facet_filter = (.response_name == "cases" & geo_value \%in\% c("tx", "pa")) |
+    (.response_name == "death_rate_7d_av" &
+      geo_value \%in\% c("ca", "fl", "ga", "ny"))
+)
+
+# -- Use it on an archive
+
+autoplot(archive_cases_dv_subset, percent_cli, .versions = "week")
+autoplot(archive_cases_dv_subset_all_states, percent_cli,
+  .versions = "week",
+  .facet_filter = geo_value \%in\% c("or", "az", "vt", "ms")
+)
+autoplot(archive_cases_dv_subset, percent_cli,
+  .versions = "month",
+  .facet_filter = geo_value == "ca"
+)
+autoplot(archive_cases_dv_subset_all_states, percent_cli,
+  .versions = "1 month",
+  .facet_filter = geo_value \%in\% c("or", "az", "vt", "ms"),
+  .mark_versions = TRUE
+)
+# Just an alias for convenience
+plot(archive_cases_dv_subset_all_states, percent_cli,
+  .versions = "1 month",
+  .facet_filter = geo_value \%in\% c("or", "az", "vt", "ms"),
+  .mark_versions = TRUE
+)
+}
diff --git a/man/autoplot.epi_df.Rd b/man/autoplot.epi_df.Rd
deleted file mode 100644
index d53335c14..000000000
--- a/man/autoplot.epi_df.Rd
+++ /dev/null
@@ -1,68 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/autoplot.R
-\name{autoplot.epi_df}
-\alias{autoplot.epi_df}
-\title{Automatically plot an epi_df}
-\usage{
-\method{autoplot}{epi_df}(
-  object,
-  ...,
-  .color_by = c("all_keys", "geo_value", "other_keys", ".response", "all", "none"),
-  .facet_by = c(".response", "other_keys", "all_keys", "geo_value", "all", "none"),
-  .base_color = "#3A448F",
-  .max_facets = Inf
-)
-}
-\arguments{
-\item{object}{An \code{epi_df}}
-
-\item{...}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> One or more unquoted
-expressions separated by commas. Variable names can be used as if they
-were positions in the data frame, so expressions like \code{x:y} can
-be used to select a range of variables.}
-
-\item{.color_by}{Which variables should determine the color(s) used to plot
-lines. Options include:
-\itemize{
-\item \code{all_keys} - the default uses the interaction of any key variables
-including the \code{geo_value}
-\item \code{geo_value} - \code{geo_value} only
-\item \code{other_keys} - any available keys that are not \code{geo_value}
-\item \code{.response} - the numeric variables (same as the y-axis)
-\item \code{all} - uses the interaction of all keys and numeric variables
-\item \code{none} - no coloring aesthetic is applied
-}}
-
-\item{.facet_by}{Similar to \code{.color_by} except that the default is to display
-each numeric variable on a separate facet}
-
-\item{.base_color}{Lines will be shown with this color. For example, with a
-single numeric variable and faceting by \code{geo_value}, all locations would
-share the same color line.}
-
-\item{.max_facets}{Cut down of the number of facets displayed. Especially
-useful for testing when there are many \code{geo_value}'s or keys.}
-}
-\value{
-A ggplot object
-}
-\description{
-Automatically plot an epi_df
-}
-\examples{
-autoplot(cases_deaths_subset, cases, death_rate_7d_av)
-autoplot(cases_deaths_subset, case_rate_7d_av, .facet_by = "geo_value")
-autoplot(cases_deaths_subset, case_rate_7d_av,
-  .color_by = "none",
-  .facet_by = "geo_value"
-)
-autoplot(cases_deaths_subset, case_rate_7d_av,
-  .color_by = "none",
-  .base_color = "red", .facet_by = "geo_value"
-)
-
-# .base_color specification won't have any effect due .color_by default
-autoplot(cases_deaths_subset, case_rate_7d_av,
-  .base_color = "red", .facet_by = "geo_value"
-)
-}
diff --git a/man/epi_archive.Rd b/man/epi_archive.Rd
index f98666e03..32cba5b5c 100644
--- a/man/epi_archive.Rd
+++ b/man/epi_archive.Rd
@@ -1,23 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/archive.R
-\name{epi_archive}
+\name{as_epi_archive}
+\alias{as_epi_archive}
+\alias{is_epi_archive}
 \alias{epi_archive}
 \alias{new_epi_archive}
 \alias{validate_epi_archive}
-\alias{as_epi_archive}
-\title{\code{epi_archive} object}
+\title{\code{as_epi_archive} converts a data frame, data table, or tibble into an
+\code{epi_archive} object.}
 \usage{
-new_epi_archive(
-  x,
-  geo_type,
-  time_type,
-  other_keys,
-  clobberable_versions_start,
-  versions_end
-)
-
-validate_epi_archive(x)
-
 as_epi_archive(
   x,
   geo_type = deprecated(),
@@ -30,6 +21,19 @@ as_epi_archive(
   ...,
   versions_end = .versions_end
 )
+
+is_epi_archive(x)
+
+new_epi_archive(
+  x,
+  geo_type,
+  time_type,
+  other_keys,
+  clobberable_versions_start,
+  versions_end
+)
+
+validate_epi_archive(x)
 }
 \arguments{
 \item{x}{A data.frame, data.table, or tibble, with columns \code{geo_value},
@@ -47,32 +51,6 @@ that should be considered key variables (in the language of \code{data.table})
 apart from "geo_value", "time_value", and "version". Typical examples
 are "age" or more granular geographies.}
 
-\item{clobberable_versions_start}{Optional; \code{length}-1; either a value of the
-same \code{class} and \code{typeof} as \code{x$version}, or an \code{NA} of any \code{class} and
-\code{typeof}: specifically, either (a) the earliest version that could be
-subject to "clobbering" (being overwritten with different update data, but
-using the \emph{same} version tag as the old update data), or (b) \code{NA}, to
-indicate that no versions are clobberable. There are a variety of reasons
-why versions could be clobberable under routine circumstances, such as (a)
-today's version of one/all of the columns being published after initially
-being filled with \code{NA} or LOCF, (b) a buggy version of today's data being
-published but then fixed and republished later in the day, or (c) data
-pipeline delays (e.g., publisher uploading, periodic scraping, database
-syncing, periodic fetching, etc.) that make events (a) or (b) reflected
-later in the day (or even on a different day) than expected; potential
-causes vary between different data pipelines. The default value is \code{NA},
-which doesn't consider any versions to be clobberable. Another setting that
-may be appropriate for some pipelines is \code{max_version_with_row_in(x)}.}
-
-\item{versions_end}{Optional; length-1, same \code{class} and \code{typeof} as
-\code{x$version}: what is the last version we have observed? The default is
-\code{max_version_with_row_in(x)}, but values greater than this could also be
-valid, and would indicate that we observed additional versions of the data
-beyond \code{max(x$version)}, but they all contained empty updates. (The default
-value of \code{clobberable_versions_start} does not fully trust these empty
-updates, and assumes that any version \verb{>= max(x$version)} could be
-clobbered.) If \code{nrow(x) == 0}, then this argument is mandatory.}
-
 \item{compactify}{Optional; \code{TRUE}, \code{FALSE}, or \code{"message"}. \code{TRUE} will
 remove some redundant rows, \code{FALSE} will not. \code{"message"} is like \code{TRUE}
 but will emit a message if anything was changed. Default is \code{TRUE}. See
@@ -84,15 +62,57 @@ corresponds to exact equality. Consider using this if your value columns
 undergo tiny nonmeaningful revisions and the archive object with the
 default setting is too large.}
 
+\item{clobberable_versions_start}{Optional; \code{length}-1; either a value of the
+same \code{class} as \code{x$version}, or an \code{NA} of any \code{class}: specifically,
+either (a) the earliest version that could be subject to "clobbering"
+(being overwritten with different update data, but using the \emph{same} version
+tag as the old update data), or (b) \code{NA}, to indicate that no versions are
+clobberable. There are a variety of reasons why versions could be
+clobberable under routine circumstances, such as (a) today's version of
+one/all of the columns being published after initially being filled with
+\code{NA} or LOCF, (b) a buggy version of today's data being published but then
+fixed and republished later in the day, or (c) data pipeline delays (e.g.,
+publisher uploading, periodic scraping, database syncing, periodic
+fetching, etc.) that make events (a) or (b) reflected later in the day (or
+even on a different day) than expected; potential causes vary between
+different data pipelines. The default value is \code{NA}, which doesn't consider
+any versions to be clobberable. Another setting that may be appropriate for
+some pipelines is \code{max_version_with_row_in(x)}.}
+
 \item{.versions_end}{location based versions_end, used to avoid prefix
 \code{version = issue} from being assigned to \code{versions_end} instead of being
 used to rename columns.}
 
 \item{...}{used for specifying column names, as in \code{\link[dplyr:rename]{dplyr::rename}}. For
 example \code{version = release_date}}
+
+\item{versions_end}{Optional; length-1, same \code{class} as \code{x$version}: what is
+the last version we have observed? The default is
+\code{max_version_with_row_in(x)}, but values greater than this could also be
+valid, and would indicate that we observed additional versions of the data
+beyond \code{max(x$version)}, but they all contained empty updates. (The default
+value of \code{clobberable_versions_start} does not fully trust these empty
+updates, and assumes that any version \verb{>= max(x$version)} could be
+clobbered.) If \code{nrow(x) == 0}, then this argument is mandatory.}
 }
 \value{
-An \code{epi_archive} object.
+\itemize{
+\item Of \code{as_epi_archive}: an \code{epi_archive} object
+}
+
+\itemize{
+\item Of \code{is_epi_archive}: \code{TRUE} if the object inherits from \code{epi_archive},
+otherwise \code{FALSE}.
+}
+
+\itemize{
+\item Of \code{new_epi_archive}: an (unvalidated) \code{epi_archive}
+}
+
+\itemize{
+\item Of \code{validate_epi_archive}: an \code{epi_archive},
+\link[base:invisible]{invisibly} (or raises an error if \code{x} was invalid)
+}
 }
 \description{
 The second main data structure for storing time series in
@@ -109,6 +129,9 @@ only performs some fast, basic checks on the inputs. \code{validate_epi_archive}
 can perform more costly validation checks on its output. But most users
 should use \code{as_epi_archive}, which performs all necessary checks and has some
 additional features.
+
+Does not validate \code{geo_type} or \code{time_type} against \code{geo_value} and
+\code{time_value} columns. These are assumed to have been set to compatibly.
 }
 \details{
 An \code{epi_archive} contains a \code{data.table} object \code{DT} (from the
diff --git a/man/epi_df.Rd b/man/epi_df.Rd
index a67827187..0ec5830ca 100644
--- a/man/epi_df.Rd
+++ b/man/epi_df.Rd
@@ -89,7 +89,8 @@ which can be seen as measured variables at each key. In brief, an \code{epi_df}
 represents a snapshot of an epidemiological data set at a point in time.
 }
 \details{
-An \code{epi_df} is a tibble with (at least) the following columns:
+An \code{epi_df} is a kind of tibble with (at least) the following
+columns:
 \itemize{
 \item \code{geo_value}: A character vector representing the geographical unit of
 observation. This could be a country code, a state name, a county code,
@@ -98,10 +99,11 @@ etc.
 }
 
 Other columns can be considered as measured variables, which we also refer to
-as signal variables. An \code{epi_df} object also has metadata with (at least)
-the following fields:
+as indicators or signals. An \code{epi_df} object also has metadata with (at
+least) the following fields:
 \itemize{
 \item \code{geo_type}: the type for the geo values.
+\item \code{time_type}: the type for the time values.
 \item \code{as_of}: the time value at which the given data were available.
 }
 
diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd
index f053909db..92c1fa914 100644
--- a/man/epi_slide.Rd
+++ b/man/epi_slide.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/slide.R
 \name{epi_slide}
 \alias{epi_slide}
-\title{Slide a function over variables in an \code{epi_df} object}
+\title{More general form of \code{\link{epi_slide_opt}} for rolling/running computations}
 \usage{
 epi_slide(
   .x,
@@ -20,34 +20,46 @@ epi_slide(
 and any columns in \code{other_keys}. If grouped, we make sure the grouping is by
 \code{geo_value} and \code{other_keys}.}
 
-\item{.f}{Function, formula, or missing; together with \code{...} specifies the
-computation to slide. The return of the computation should either be a
-scalar or a 1-row data frame. Data frame returns will be
-\code{tidyr::unpack()}-ed, if named, and will be \code{\link[tidyr:pack]{tidyr::pack}}-ed columns, if
-not named. See examples.
+\item{.f, ...}{The computation to slide. The input will be a time window of
+the data for a single subpopulation (i.e., a single \code{geo_value} and single
+value for any \code{\link[=as_epi_df]{other_keys}} you set up, such as age groups, race, etc.).
+The input will always have the same size, determined by \code{.window_size}, and
+will fill in any missing \code{time_values}, using \code{NA} values for missing
+measurements. The output should be a scalar value or a 1-row data frame;
+these outputs will be collected into a new column or columns in the
+\code{epi_slide()} result. Data frame outputs will be unpacked into multiple
+columns in the result by default, or \code{\link[tidyr:pack]{tidyr::pack}}ed into a single
+data-frame-type column if you provide a name for such a column (e.g., via
+\code{.new_col_name}).
+
+You can specify the computation in one of the following ways:
 \itemize{
-\item If \code{.f} is missing, then \code{...} will specify the computation via
-tidy-evaluation. This is usually the most convenient way to use
-\code{epi_slide}. See examples.
-\item If \code{.f} is a formula, then the formula should use \code{.x} (not the same as
-the input \code{epi_df}) to operate on the columns of the input \code{epi_df}, e.g.
-\code{~mean(.x$var)} to compute a mean of \code{var}.
-\item If a function, \code{.f} must have the form \verb{function(x, g, t, ...)}, where:
+\item Don't provide \code{.f}, and instead use one or more
+\code{\link[dplyr:summarise]{dplyr::summarize}}-esque \link[rlang:args_data_masking]{"data-masking"}
+expressions in \code{...}, e.g., \code{cfr_estimate_v0 = death_rate_7d_av[[22]]/case_rate_7d_av[[1]]}. This way is sometimes more
+convenient, but also has the most computational overhead.
+\item Provide a formula in \code{.f}, e.g., \code{~ .x$death_rate_7d_av[[22]]/.x$case_rate_7d_av[[1]]}. In this formula, \code{.x}
+is an \code{epi_df} containing data for a single time window as described above,
+taken from the original \code{.x} fed into \code{epi_slide()}.
+\item Provide a function in \code{.f}, e.g., \code{function(x, g, t) x$death_rate_7d_av[[22]]/x$case_rate_7d_av[[1]]}. The function should be of
+the form \verb{function(x, g, t)} or \verb{function(x, g, t, <additional configuration arguments>)}, where:
 \itemize{
 \item \code{x} is a data frame with the same column names as the original object,
 minus any grouping variables, with only the windowed data for one
 group-\code{.ref_time_value} combination
-\item \code{g} is a one-row tibble containing the values of the grouping variables
-for the associated group
+\item \code{g} is a one-row tibble specifying the \code{geo_value} and value of any
+\code{other_keys} for this computation
 \item \code{t} is the \code{.ref_time_value} for the current window
-\item \code{...} are additional arguments
+\item If you have a complex \code{.f} containing \verb{<additional configuration   arguments>}, you can provide values for those arguments in the \code{...}
+argument to \code{epi_slide()}.
 }
-}}
 
-\item{...}{Additional arguments to pass to the function or formula specified
-via \code{.f}. Alternatively, if \code{.f} is missing, then the \code{...} is interpreted
-as a \link[rlang:args_data_masking]{"data-masking"} expression or expressions
-for tidy evaluation.}
+The values of \code{g} and \code{t} are also available to data-masking expression and
+formula-based computations as \code{.group_key} and \code{.ref_time_value},
+respectively. Formula computations also let you use \code{.y} or \code{.z},
+respectively, as additional names for these same quantities (similar to
+\code{\link[dplyr:group_map]{dplyr::group_modify}}).
+}}
 
 \item{.window_size}{The size of the sliding window. The accepted values
 depend on the type of the \code{time_value} column in \code{.x}:
@@ -95,30 +107,60 @@ added. It will be ungrouped if \code{.x} was ungrouped, and have the same groups
 as \code{.x} if \code{.x} was grouped.
 }
 \description{
-Slides a given function over variables in an \code{epi_df} object.
-This is useful for computations like rolling averages. The function supports
-many ways to specify the computation, but by far the most common use case is
-as follows:
-
-\if{html}{\out{<div class="sourceCode">}}\preformatted{# Create new column `cases_7dmed` that contains a 7-day trailing median of cases
-epi_slide(edf, cases_7dmed = median(cases), .window_size = 7)
+Most rolling/running computations can be handled by \code{\link{epi_slide_mean}},
+\code{\link{epi_slide_sum}}, or the medium-generality \code{\link{epi_slide_opt}} functions,
+which have been specialized to run more quickly. \code{epi_slide()} is a slower
+but even more general function for rolling/running computations, and uses a
+different interface to specify the computations; you typically only need to
+consider using \code{epi_slide()} if you have a computation that depends on
+multiple columns simultaneously, outputs multiple columns simultaneously, or
+produces non-numeric output. For example, this computation depends on
+multiple columns:
+}
+\details{
+\if{html}{\out{<div class="sourceCode">}}\preformatted{cases_deaths_subset \%>\%
+  epi_slide(
+    cfr_estimate_v0 = death_rate_7d_av[[22]]/case_rate_7d_av[[1]],
+    .window_size = 22
+  ) \%>\%
+  print(n = 30)
 }\if{html}{\out{</div>}}
 
-For two very common use cases, we provide optimized functions that are much
-faster than \code{epi_slide}: \code{epi_slide_mean()} and \code{epi_slide_sum()}. We
-recommend using these functions when possible.
+(Here, the value 22 was selected using \code{epi_cor()} and averaging across
+\code{geo_value}s. See
+\href{https://www.medrxiv.org/content/10.1101/2024.12.27.24319518v1}{this
+manuscript} for some warnings & information using similar types of CFR
+estimators.)
 
 See \code{vignette("epi_df")} for more examples.
+\subsection{Motivation and lower-level alternatives}{
+
+\code{epi_slide()} is focused on preventing errors and providing a convenient
+interface. If you need computational speed, many computations can be optimized
+by one of the following:
+\itemize{
+\item Performing core sliding operations with \code{epi_slide_opt()} with
+\code{frollapply}, and using potentially-grouped \code{mutate()}s to transform or
+combine the results.
+\item Grouping by \code{geo_value} and any \code{other_keys}; \code{\link[=complete]{complete()}}ing with
+\code{full_seq()} to fill in time gaps; \code{arrange()}ing by \code{time_value}s within
+each group; using \code{mutate()} with vectorized operations and shift operators
+like \code{dplyr::lead()} and \code{dplyr::lag()} to perform the core operations,
+being careful to give the desired results for the least and most recent
+\code{time_value}s (often \code{NA}s for the least recent); ungrouping; and
+\code{filter()}ing back down to only rows that existed before the \code{complete()}
+stage if necessary.
 }
-\details{
+}
+
 \subsection{Advanced uses of \code{.f} via tidy evaluation}{
 
-If specifying \code{.f} via tidy evaluation, in addition to the standard \code{\link{.data}}
-and \code{\link{.env}}, we make some additional "pronoun"-like bindings available:
+If specifying \code{.f} via tidy evaluation, in addition to the standard \code{\link[rlang:dot-data]{.data}}
+and \code{\link[rlang:dot-data]{.env}}, we make some additional "pronoun"-like bindings available:
 \itemize{
 \item .x, which is like \code{.x} in \code{\link[dplyr:group_map]{dplyr::group_modify}}; an ordinary object
 like an \code{epi_df} rather than an rlang \link[rlang:as_data_mask]{pronoun}
-like \code{\link{.data}}; this allows you to use additional \code{dplyr}, \code{tidyr}, and
+like \code{.data}; this allows you to use additional \code{dplyr}, \code{tidyr}, and
 \code{epiprocess} operations. If you have multiple expressions in \code{...}, this
 won't let you refer to the output of the earlier expressions, but \code{.data}
 will.
@@ -131,34 +173,43 @@ determined the time window for the current computation.
 \examples{
 library(dplyr)
 
-# Get the 7-day trailing standard deviation of cases and the 7-day trailing mean of cases
-cases_deaths_subset \%>\%
+# Generate some simple time-varying CFR estimates:
+with_cfr_estimates <- cases_deaths_subset \%>\%
   epi_slide(
-    cases_7sd = sd(cases, na.rm = TRUE),
-    cases_7dav = mean(cases, na.rm = TRUE),
-    .window_size = 7
-  ) \%>\%
-  select(geo_value, time_value, cases, cases_7sd, cases_7dav)
-# Note that epi_slide_mean could be used to more quickly calculate cases_7dav.
-
-# In addition to the [`dplyr::mutate`]-like syntax, you can feed in a function or
-# formula in a way similar to [`dplyr::group_modify`]:
-my_summarizer <- function(window_data) {
-  window_data \%>\%
-    summarize(
-      cases_7sd = sd(cases, na.rm = TRUE),
-      cases_7dav = mean(cases, na.rm = TRUE)
-    )
+    cfr_estimate_v0 = death_rate_7d_av[[22]] / case_rate_7d_av[[1]],
+    .window_size = 22
+  )
+with_cfr_estimates \%>\%
+  print(n = 30)
+# (Here, the value 22 was selected using `epi_cor()` and averaging across
+# `geo_value`s. See
+# https://www.medrxiv.org/content/10.1101/2024.12.27.24319518v1 for some
+# warnings & information using CFR estimators along these lines.)
+
+# In addition to the [`dplyr::mutate`]-like syntax, you can feed in a
+# function or formula in a way similar to [`dplyr::group_modify`]; these
+# often run much more quickly:
+my_computation <- function(window_data) {
+  tibble(
+    cfr_estimate_v0 = window_data$death_rate_7d_av[[nrow(window_data)]] /
+      window_data$case_rate_7d_av[[1]]
+  )
 }
-cases_deaths_subset \%>\%
+with_cfr_estimates2 <- cases_deaths_subset \%>\%
   epi_slide(
-    ~ my_summarizer(.x),
-    .window_size = 7
-  ) \%>\%
-  select(geo_value, time_value, cases, cases_7sd, cases_7dav)
-
-
-
+    ~ my_computation(.x),
+    .window_size = 22
+  )
+with_cfr_estimates3 <- cases_deaths_subset \%>\%
+  epi_slide(
+    function(window_data, g, t) {
+      tibble(
+        cfr_estimate_v0 = window_data$death_rate_7d_av[[nrow(window_data)]] /
+          window_data$case_rate_7d_av[[1]]
+      )
+    },
+    .window_size = 22
+  )
 
 
 #### Advanced: ####
diff --git a/man/epi_slide_opt.Rd b/man/epi_slide_opt.Rd
index 4b75e9ffb..e1cb20924 100644
--- a/man/epi_slide_opt.Rd
+++ b/man/epi_slide_opt.Rd
@@ -4,7 +4,7 @@
 \alias{epi_slide_opt}
 \alias{epi_slide_mean}
 \alias{epi_slide_sum}
-\title{Optimized slide functions for common cases}
+\title{Calculate rolling or running means, sums, etc., or custom calculations}
 \usage{
 epi_slide_opt(
   .x,
@@ -51,7 +51,7 @@ epi_slide_sum(
 and any columns in \code{other_keys}. If grouped, we make sure the grouping is by
 \code{geo_value} and \code{other_keys}.}
 
-\item{.col_names}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> An unquoted column
+\item{.col_names}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> An unquoted column
 name (e.g., \code{cases}), multiple column names (e.g., \code{c(cases, deaths)}),
 \link[tidyselect:language]{other tidy-select expression}, or a vector of
 characters (e.g. \code{c("cases", "deaths")}). Variable names can be used as if
@@ -134,10 +134,20 @@ added. It will be ungrouped if \code{.x} was ungrouped, and have the same groups
 as \code{.x} if \code{.x} was grouped.
 }
 \description{
-\code{epi_slide_opt} allows sliding an n-timestep \link[data.table:froll]{data.table::froll}
-or \link[slider:summary-slide]{slider::summary-slide} function over variables in an \code{epi_df} object.
-These functions tend to be much faster than \code{epi_slide()}. See
-\code{vignette("epi_df")} for more examples.
+These methods take each subpopulation (i.e., a single
+\code{geo_value} and combination of any \code{other_keys} you set up for age groups,
+etc.) and perform a \code{.window_size}-width time window rolling/sliding
+computation, or alternatively, a running/cumulative computation (with
+\code{.window_size = Inf}) on the requested columns. Explicit \code{NA} measurements
+are temporarily added to fill in any time gaps, and, for rolling
+computations, to pad the time series to ensure that the first & last
+computations use exactly \code{.window_size} values.
+
+\code{epi_slide_opt} allows you to use any \link[data.table:froll]{data.table::froll} or
+\link[slider:summary-slide]{slider::summary-slide} function. If none of those specialized functions fit
+your usecase, you can use \code{data.table::frollapply} together with a non-rolling
+function (e.g., \code{median}). See \code{\link{epi_slide}} if you need to work with
+multiple columns at once or output a custom type.
 
 \code{epi_slide_mean} is a wrapper around \code{epi_slide_opt} with \code{.f = data.table::frollmean}.
 
diff --git a/man/epiprocess-package.Rd b/man/epiprocess-package.Rd
index fe79c01e8..12e8a3c42 100644
--- a/man/epiprocess-package.Rd
+++ b/man/epiprocess-package.Rd
@@ -11,6 +11,7 @@ This package introduces common data structures for working with epidemiological
 \seealso{
 Useful links:
 \itemize{
+  \item \url{https://github.com/cmu-delphi/epiprocess}
   \item \url{https://cmu-delphi.github.io/epiprocess/}
 }
 
diff --git a/man/epix_as_of_current.Rd b/man/epix_as_of_current.Rd
new file mode 100644
index 000000000..8dcb53afc
--- /dev/null
+++ b/man/epix_as_of_current.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/methods-epi_archive.R
+\name{epix_as_of_current}
+\alias{epix_as_of_current}
+\title{Get the latest snapshot from an \code{epi_archive} object.}
+\usage{
+epix_as_of_current(x)
+}
+\arguments{
+\item{x}{An \code{epi_archive} object}
+}
+\value{
+The latest snapshot from an \code{epi_archive} object
+}
+\description{
+The latest snapshot is the snapshot of the last known version.
+}
diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd
index cadb19833..ad6be18b5 100644
--- a/man/epix_slide.Rd
+++ b/man/epix_slide.Rd
@@ -1,10 +1,8 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/methods-epi_archive.R, R/grouped_epi_archive.R
+% Please edit documentation in R/methods-epi_archive.R
 \name{epix_slide}
 \alias{epix_slide}
-\alias{epix_slide.epi_archive}
-\alias{epix_slide.grouped_epi_archive}
-\title{Slide a function over variables in an \code{epi_archive} or \code{grouped_epi_archive}}
+\title{Take each requested (group and) version in an archive, run a computation (e.g., forecast)}
 \usage{
 epix_slide(
   .x,
@@ -15,51 +13,39 @@ epix_slide(
   .new_col_name = NULL,
   .all_versions = FALSE
 )
-
-\method{epix_slide}{epi_archive}(
-  .x,
-  .f,
-  ...,
-  .before = Inf,
-  .versions = NULL,
-  .new_col_name = NULL,
-  .all_versions = FALSE
-)
-
-\method{epix_slide}{grouped_epi_archive}(
-  .x,
-  .f,
-  ...,
-  .before = Inf,
-  .versions = NULL,
-  .new_col_name = NULL,
-  .all_versions = FALSE
-)
 }
 \arguments{
 \item{.x}{An \code{\link{epi_archive}} or \code{\link{grouped_epi_archive}} object. If ungrouped,
 all data in \code{x} will be treated as part of a single data group.}
 
 \item{.f}{Function, formula, or missing; together with \code{...} specifies the
-computation to slide. To "slide" means to apply a computation over a
-sliding (a.k.a. "rolling") time window for each data group. The window is
-determined by the \code{.before} parameter (see details for more). If a
-function, \code{.f} must have the form \verb{function(x, g, t, ...)}, where
-\itemize{
-\item "x" is an epi_df with the same column names as the archive's \code{DT}, minus
-the \code{version} column
-\item "g" is a one-row tibble containing the values of the grouping variables
-for the associated group
-\item "t" is the ref_time_value for the current window
-\item "..." are additional arguments
-}
+computation. The computation will be run on each requested group-version
+combination, with a time window filter applied if \code{.before} is supplied.
+
+If \code{.f} is a function must have the form \verb{function(x, g, v)} or
+\verb{function(x, g, v, <additional configuration args>)}, where
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{- `x` is an `epi_df` with the same column names as the archive's `DT`,
+  minus the `version` column. (Or, if `.all_versions = TRUE`, an
+  `epi_archive` with the requested partial version history.)
+
+- `g` is a one-row tibble containing the values of the grouping variables
+  for the associated group.
+
+- `v` (length-1) is the associated `version` (one of the requested
+  `.versions`)
+
+- `<additional configuration args>` are optional; you can add such
+  arguments to your function and set them by passing them through the
+  `...` argument to `epix_slide()`.
+}\if{html}{\out{</div>}}
 
 If a formula, \code{.f} can operate directly on columns accessed via \code{.x$var} or
 \code{.$var}, as in \code{~ mean (.x$var)} to compute a mean of a column \code{var} for
 each group-\code{ref_time_value} combination. The group key can be accessed via
-\code{.y} or \code{.group_key}, and the reference time value can be accessed via \code{.z}
-or \code{.ref_time_value}. If \code{.f} is missing, then \code{...} will specify the
-computation.}
+\code{.y} or \code{.group_key}, and the reference time value can be accessed via
+\code{.z}, \code{.version}, or \code{.ref_time_value}. If \code{.f} is missing, then \code{...} will
+specify the computation.}
 
 \item{...}{Additional arguments to pass to the function or formula specified
 via \code{f}. Alternatively, if \code{.f} is missing, then the \code{...} is interpreted
@@ -67,41 +53,46 @@ as a \link[rlang:args_data_masking]{"data-masking"} expression or expressions
 for tidy evaluation; in addition to referring columns directly by name, the
 expressions have access to \code{.data} and \code{.env} pronouns as in \code{dplyr} verbs,
 and can also refer to \code{.x} (not the same as the input epi_archive),
-\code{.group_key}, and \code{.ref_time_value}. See details for more.}
-
-\item{.before}{How many time values before the \code{.ref_time_value}
-should each snapshot handed to the function \code{.f} contain? If provided, it
-should be a single value that is compatible with the time_type of the
-time_value column (more below), but most commonly an integer. This window
-endpoint is inclusive. For example, if \code{.before = 7}, \code{time_type}
-in the archive is "day", and the \code{.ref_time_value} is January 8, then the
-smallest time_value in the snapshot will be January 1. If missing, then the
-default is no limit on the time values, so the full snapshot is given.}
-
-\item{.versions}{Reference time values / versions for sliding
-computations; each element of this vector serves both as the anchor point
-for the \code{time_value} window for the computation and the \code{max_version}
-\code{epix_as_of} which we fetch data in this window. If missing, then this will
-set to a regularly-spaced sequence of values set to cover the range of
-\code{version}s in the \code{DT} plus the \code{versions_end}; the spacing of values will
-be guessed (using the GCD of the skips between values).}
+\code{.group_key} and \code{.version}/\code{.ref_time_value}. See details for more.}
+
+\item{.before}{Optional; applies a \code{time_value} filter before running each
+computation. The default is not to apply a \code{time_value} filter. If
+provided, it should be a single integer or difftime that is compatible with
+the time_type of the time_value column. If an integer, then the minimum
+possible \code{time_value} included will be that many time steps (according to
+the \code{time_type}) before each requested \code{.version}. This window endpoint is
+inclusive. For example, if \code{.before = 14}, the \code{time_type} in the archive
+is "day", and the requested \code{.version} is January 15, then the smallest
+possible \code{time_value} possible in the snapshot will be January 1. Note that
+this does not mean that there will be 14 or 15 distinct \code{time_value}s
+actually appearing in the data; for most reporting streams, reporting as of
+January 15 won't include \code{time_value}s all the way through January 14, due
+to reporting latency. Unlike \code{epi_slide()}, \code{epix_slide()} won't fill in
+any missing \code{time_values} in this window.}
+
+\item{.versions}{Requested versions on which to run the computation. Each
+requested \code{.version} also serves as the anchor point from which
+the \code{time_value} window specified by \code{.before} is drawn. If \code{.versions} is
+missing, it will be set to a regularly-spaced sequence of values set to
+cover the range of \code{version}s in the \code{DT} plus the \code{versions_end}; the
+spacing of values will be guessed (using the GCD of the skips between
+values).}
 
 \item{.new_col_name}{Either \code{NULL} or a string indicating the name of the new
 column that will contain the derived values. The default, \code{NULL}, will use
 the name "slide_value" unless your slide computations output data frames,
-in which case they will be unpacked into the constituent columns and those
-names used. If the resulting column name(s) overlap with the column names
-used for labeling the computations, which are \code{group_vars(x)} and
-\code{"version"}, then the values for these columns must be identical to the
-labels we assign.}
+in which case they will be unpacked into the constituent columns and the
+data frame's column names will be used instead. If the resulting column
+name(s) overlap with the column names used for labeling the computations,
+which are \code{group_vars(x)} and \code{"version"}, then the values for these
+columns must be identical to the labels we assign.}
 
 \item{.all_versions}{(Not the same as \code{.all_rows} parameter of \code{epi_slide}.)
 If \code{.all_versions = TRUE}, then the slide computation will be passed the
-version history (all \code{version <= .version} where \code{.version} is one of the
-requested \code{.versions}) for rows having a \code{time_value} of at least `.version
-\itemize{
-\item before\verb{. Otherwise, the slide computation will be passed only the most   recent }version\verb{for every unique}time_value\verb{. Default is }FALSE`.
-}}
+version history (all versions \verb{<= .version} where \code{.version} is one of the
+requested \code{.version}s), in \code{epi_archive} format. Otherwise, the slide
+computation will be passed only the most recent \code{version} for every unique
+\code{time_value}, in \code{epi_df} format. Default is \code{FALSE}.}
 }
 \value{
 A tibble whose columns are: the grouping variables (if any),
@@ -110,28 +101,32 @@ computation, and a column named according to the \code{.new_col_name} argument,
 containing the slide values. It will be grouped by the grouping variables.
 }
 \description{
-Slides a given function over variables in an \code{epi_archive} object. This
-behaves similarly to \code{epi_slide()}, with the key exception that it is
-version-aware: the sliding computation at any given reference time t is
-performed on \strong{data that would have been available as of t}. This function
-is intended for use in accurate backtesting of models; see
-\code{vignette("backtesting", package="epipredict")} for a walkthrough.
+... and collect the results. This is useful for more accurately simulating
+how a forecaster, nowcaster, or other algorithm would have behaved in real
+time, factoring in reporting latency and data revisions; see
+\href{https://cmu-delphi.github.io/epipredict/articles/backtesting.html}{\code{vignette("backtesting", package="epipredict")}} for a walkthrough.
 }
 \details{
+This is similar to looping over versions and calling \code{\link{epix_as_of}}, but has
+some conveniences such as working naturally with \code{\link{grouped_epi_archive}}s,
+optional time windowing, and syntactic sugar to make things shorter to write.
+
 A few key distinctions between the current function and \code{epi_slide()}:
 \enumerate{
 \item In \code{.f} functions for \code{epix_slide}, one should not assume that the input
 data to contain any rows with \code{time_value} matching the computation's
-\code{.ref_time_value} (accessible via \verb{attributes(<data>)$metadata$as_of}); for
-typical epidemiological surveillance data, observations pertaining to a
-particular time period (\code{time_value}) are first reported \code{as_of} some
-instant after that time period has ended.
+\code{.version}, due to reporting latency; for typical epidemiological
+surveillance data, observations pertaining to a particular time period
+(\code{time_value}) are first reported \code{as_of} some instant after that time
+period has ended. No time window completion is performed as in
+\code{epi_slide()}.
 \item The input class and columns are similar but different: \code{epix_slide}
 (with the default \code{.all_versions=FALSE}) keeps all columns and the
 \code{epi_df}-ness of the first argument to each computation; \code{epi_slide} only
 provides the grouping variables in the second input, and will convert the
 first input into a regular tibble if the grouping variables include the
-essential \code{geo_value} column. (With .all_versions=TRUE\verb{, }epix_slide\verb{will   will provide an}epi_archive\verb{rather than an}epi-df` to each
+essential \code{geo_value} column. (With \code{.all_versions=TRUE}, \code{epix_slide}
+will provide an \code{epi_archive} rather than an \code{epi-df} to each
 computation.)
 \item The output class and columns are similar but different: \code{epix_slide()}
 returns a tibble containing only the grouping variables, \code{time_value}, and
@@ -144,82 +139,61 @@ results as they are not supported by tibbles.)
 \item There are no size stability checks or element/row recycling to maintain
 size stability in \code{epix_slide}, unlike in \code{epi_slide}. (\code{epix_slide} is
 roughly analogous to \code{\link[dplyr:group_map]{dplyr::group_modify}}, while \code{epi_slide} is roughly
-analogous to \code{dplyr::mutate} followed by \code{dplyr::arrange}) This is detailed
-in the "advanced" vignette.
+analogous to \code{\link[dplyr:mutate]{dplyr::mutate}}.)
 \item \code{.all_rows} is not supported in \code{epix_slide}; since the slide
 computations are allowed more flexibility in their outputs than in
 \code{epi_slide}, we can't guess a good representation for missing computations
 for excluded group-\code{.ref_time_value} pairs.
 \item The \code{.versions} default for \code{epix_slide} is based on making an
 evenly-spaced sequence out of the \code{version}s in the \code{DT} plus the
-\code{versions_end}, rather than the \code{time_value}s.
+\code{versions_end}, rather than all unique \code{time_value}s.
+\item \code{epix_slide()} computations can refer to the current element of
+\code{.versions} as either \code{.version} or \code{.ref_time_value}, while \code{epi_slide()}
+computations refer to the current element of \code{.ref_time_values} with
+\code{.ref_time_value}.
 }
 
 Apart from the above distinctions, the interfaces between \code{epix_slide()} and
 \code{epi_slide()} are the same.
-
-Furthermore, the current function can be considerably slower than
-\code{epi_slide()}, for two reasons: (1) it must repeatedly fetch
-properly-versioned snapshots from the data archive (via \code{epix_as_of()}),
-and (2) it performs a "manual" sliding of sorts, and does not benefit from
-the highly efficient \code{slider} package. For this reason, it should never be
-used in place of \code{epi_slide()}, and only used when version-aware sliding is
-necessary (as it its purpose).
 }
 \examples{
 library(dplyr)
 
-# Reference time points for which we want to compute slide values:
-versions <- seq(as.Date("2020-06-02"),
-  as.Date("2020-06-15"),
-  by = "1 day"
-)
+# Request only a small set of versions, for example's sake:
+requested_versions <-
+  seq(as.Date("2020-09-02"), as.Date("2020-09-15"), by = "1 day")
 
-# A simple (but not very useful) example (see the archive vignette for a more
-# realistic one):
+# Investigate reporting lag of `percent_cli` signal (though normally we'd
+# probably work off of the dedicated `revision_summary()` function instead):
 archive_cases_dv_subset \%>\%
-  group_by(geo_value) \%>\%
   epix_slide(
-    .f = ~ mean(.x$case_rate_7d_av),
-    .before = 2,
-    .versions = versions,
-    .new_col_name = "case_rate_7d_av_recent_av"
-  ) \%>\%
-  ungroup()
-# We requested time windows that started 2 days before the corresponding time
-# values. The actual number of `time_value`s in each computation depends on
-# the reporting latency of the signal and `time_value` range covered by the
-# archive (2020-06-01 -- 2021-11-30 in this example).  In this case, we have
-# * 0 `time_value`s, for ref time 2020-06-01 --> the result is automatically
-#                                                discarded
-# * 1 `time_value`, for ref time 2020-06-02
-# * 2 `time_value`s, for the rest of the results
-# * never the 3 `time_value`s we would get from `epi_slide`, since, because
-#   of data latency, we'll never have an observation
-#   `time_value == .ref_time_value` as of `.ref_time_value`.
-# The example below shows this type of behavior in more detail.
-
-# Examining characteristics of the data passed to each computation with
-# `all_versions=FALSE`.
+    geowide_percent_cli_max_time = max(time_value[!is.na(percent_cli)]),
+    geowide_percent_cli_rpt_lag = .version - geowide_percent_cli_max_time,
+    .versions = requested_versions
+  )
 archive_cases_dv_subset \%>\%
   group_by(geo_value) \%>\%
   epix_slide(
-    function(x, gk, rtv) {
-      tibble(
-        time_range = if (nrow(x) == 0L) {
-          "0 `time_value`s"
-        } else {
-          sprintf("\%s -- \%s", min(x$time_value), max(x$time_value))
-        },
-        n = nrow(x),
-        class1 = class(x)[[1L]]
-      )
-    },
-    .before = 5, .all_versions = FALSE,
-    .versions = versions
-  ) \%>\%
-  ungroup() \%>\%
-  arrange(geo_value, version)
+    percent_cli_max_time = max(time_value[!is.na(percent_cli)]),
+    percent_cli_rpt_lag = .version - percent_cli_max_time,
+    .versions = requested_versions
+  )
+
+# Backtest a forecaster "pseudoprospectively" (i.e., faithfully with respect
+# to the data version history):
+case_death_rate_archive \%>\%
+  epix_slide(
+    .versions = as.Date(c("2021-10-01", "2021-10-08")),
+    function(x, g, v) {
+      epipredict::arx_forecaster(
+        x,
+        outcome = "death_rate",
+        predictors = c("death_rate_7d_av", "case_rate_7d_av")
+      )$predictions
+    }
+  )
+# See `vignette("backtesting", package="epipredict")` for a full walkthrough
+# on backtesting forecasters, including plots, etc.
 
 # --- Advanced: ---
 
@@ -251,7 +225,7 @@ archive_cases_dv_subset \%>\%
       )
     },
     .before = 5, .all_versions = TRUE,
-    .versions = versions
+    .versions = requested_versions
   ) \%>\%
   ungroup() \%>\%
   # Focus on one geo_value so we can better see the columns above:
diff --git a/man/figures/lifecycle-deprecated.svg b/man/figures/lifecycle-deprecated.svg
new file mode 100644
index 000000000..b61c57c3f
--- /dev/null
+++ b/man/figures/lifecycle-deprecated.svg
@@ -0,0 +1,21 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="126" height="20" role="img" aria-label="lifecycle: deprecated">
+    <title>lifecycle: deprecated</title>
+    <linearGradient id="s" x2="0" y2="100%">
+        <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
+        <stop offset="1" stop-opacity=".1" />
+    </linearGradient>
+    <clipPath id="r">
+        <rect width="126" height="20" rx="3" fill="#fff" />
+    </clipPath>
+    <g clip-path="url(#r)">
+        <rect width="55" height="20" fill="#555" />
+        <rect x="55" width="71" height="20" fill="#fe7d37" />
+        <rect width="126" height="20" fill="url(#s)" />
+    </g>
+    <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
+        <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
+        <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
+        <text aria-hidden="true" x="895" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="610">deprecated</text>
+        <text x="895" y="140" transform="scale(.1)" fill="#fff" textLength="610">deprecated</text>
+    </g>
+</svg>
diff --git a/man/figures/lifecycle-experimental.svg b/man/figures/lifecycle-experimental.svg
new file mode 100644
index 000000000..5d88fc2c6
--- /dev/null
+++ b/man/figures/lifecycle-experimental.svg
@@ -0,0 +1,21 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="138" height="20" role="img" aria-label="lifecycle: experimental">
+    <title>lifecycle: experimental</title>
+    <linearGradient id="s" x2="0" y2="100%">
+        <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
+        <stop offset="1" stop-opacity=".1" />
+    </linearGradient>
+    <clipPath id="r">
+        <rect width="138" height="20" rx="3" fill="#fff" />
+    </clipPath>
+    <g clip-path="url(#r)">
+        <rect width="55" height="20" fill="#555" />
+        <rect x="55" width="83" height="20" fill="#fe7d37" />
+        <rect width="138" height="20" fill="url(#s)" />
+    </g>
+    <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
+        <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
+        <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
+        <text aria-hidden="true" x="955" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="730">experimental</text>
+        <text x="955" y="140" transform="scale(.1)" fill="#fff" textLength="730">experimental</text>
+    </g>
+</svg>
diff --git a/man/figures/lifecycle-stable.svg b/man/figures/lifecycle-stable.svg
new file mode 100644
index 000000000..9bf21e76b
--- /dev/null
+++ b/man/figures/lifecycle-stable.svg
@@ -0,0 +1,29 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="98" height="20" role="img" aria-label="lifecycle: stable">
+    <title>lifecycle: stable</title>
+    <linearGradient id="s" x2="0" y2="100%">
+        <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
+        <stop offset="1" stop-opacity=".1" />
+    </linearGradient>
+    <clipPath id="r">
+        <rect width="98" height="20" rx="3" fill="#fff" />
+    </clipPath>
+    <g clip-path="url(#r)">
+        <rect width="55" height="20" fill="#555" />
+        <rect x="55" width="43" height="20" fill="#4c1" />
+        <rect width="98" height="20" fill="url(#s)" />
+    </g>
+    <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
+        <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">
+      lifecycle
+    </text>
+        <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">
+      lifecycle
+    </text>
+        <text aria-hidden="true" x="755" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="330">
+      stable
+    </text>
+        <text x="755" y="140" transform="scale(.1)" fill="#fff" textLength="330">
+      stable
+    </text>
+    </g>
+</svg>
diff --git a/man/figures/lifecycle-superseded.svg b/man/figures/lifecycle-superseded.svg
new file mode 100644
index 000000000..db8d757f7
--- /dev/null
+++ b/man/figures/lifecycle-superseded.svg
@@ -0,0 +1,21 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="20" role="img" aria-label="lifecycle: superseded">
+    <title>lifecycle: superseded</title>
+    <linearGradient id="s" x2="0" y2="100%">
+        <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
+        <stop offset="1" stop-opacity=".1" />
+    </linearGradient>
+    <clipPath id="r">
+        <rect width="128" height="20" rx="3" fill="#fff" />
+    </clipPath>
+    <g clip-path="url(#r)">
+        <rect width="55" height="20" fill="#555" />
+        <rect x="55" width="73" height="20" fill="#007ec6" />
+        <rect width="128" height="20" fill="url(#s)" />
+    </g>
+    <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
+        <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
+        <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
+        <text aria-hidden="true" x="905" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="630">superseded</text>
+        <text x="905" y="140" transform="scale(.1)" fill="#fff" textLength="630">superseded</text>
+    </g>
+</svg>
diff --git a/man/filter.epi_archive.Rd b/man/filter.epi_archive.Rd
new file mode 100644
index 000000000..5f9d72db4
--- /dev/null
+++ b/man/filter.epi_archive.Rd
@@ -0,0 +1,68 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/methods-epi_archive.R
+\name{filter.epi_archive}
+\alias{filter.epi_archive}
+\title{\code{\link[dplyr:filter]{dplyr::filter}} for \code{epi_archive}s}
+\usage{
+\method{filter}{epi_archive}(.data, ..., .by = NULL, .format_aware = FALSE)
+}
+\arguments{
+\item{.data}{an \code{epi_archive}}
+
+\item{...}{as in \code{\link[dplyr:filter]{dplyr::filter}}; using the \code{version} column is not allowed
+unless you use \code{.format_aware = TRUE}; see details.}
+
+\item{.by}{as in \code{\link[dplyr:filter]{dplyr::filter}}}
+
+\item{.format_aware}{optional, \code{TRUE} or \code{FALSE}; default \code{FALSE}. See
+details.}
+}
+\description{
+\code{\link[dplyr:filter]{dplyr::filter}} for \code{epi_archive}s
+}
+\details{
+By default, using the \code{version} column or measurement columns is disabled as
+it's easy to get unexpected results. See if either \code{\link{epix_as_of}} or
+\code{\link{epix_slide}} works for any version selection you have in mind: for version
+selection, see the \code{version} or \code{.versions} args, respectively; for
+measurement column-based filtering, try \code{filter}ing after \code{epix_as_of} or
+inside the \code{.f} in \code{epix_slide()}. If they don't cover your use case, then
+you can set \code{.format_aware = TRUE} to enable usage of these columns, but be
+careful to:
+\itemize{
+\item Factor in that \code{.data$DT} may have been converted into a compact format
+based on diffing consecutive versions, and the last version of each
+observation in \code{.data$DT} will always be carried forward to future
+\code{version}s\verb{; see details of [}as_epi_archive`].
+\item Set \code{clobberable_versions_start} and \code{versions_end} of the result
+appropriately after the \code{filter} call. They will be initialized with the
+same values as in \code{.data}.
+}
+
+\code{dplyr::filter} also has an optional argument \code{.preserve}, which should not
+have an impact on (ungrouped) \code{epi_archive}s, and \code{grouped_epi_archive}s do
+not currently support \code{dplyr::filter}.
+}
+\examples{
+
+# Filter to one location and a particular time range:
+archive_cases_dv_subset \%>\%
+  filter(geo_value == "fl", time_value >= as.Date("2020-10-01"))
+
+# Convert to weekly by taking the Saturday data for each week, so that
+# `case_rate_7d_av` represents a Sun--Sat average:
+archive_cases_dv_subset \%>\%
+  filter(as.POSIXlt(time_value)$wday == 6L)
+
+# Filtering involving the `version` column or measurement columns requires
+# extra care. See epix_as_of and epix_slide instead for some common
+# operations. One semi-common operation that ends up being fairly simple is
+# treating observations as finalized after some amount of time, and ignoring
+# any revisions that were made after that point:
+archive_cases_dv_subset \%>\%
+  filter(
+    version <= time_value + as.difftime(60, units = "days"),
+    .format_aware = TRUE
+  )
+
+}
diff --git a/man/format_chr_with_quotes.Rd b/man/format_chr_with_quotes.Rd
index 49beffb00..555dd402c 100644
--- a/man/format_chr_with_quotes.Rd
+++ b/man/format_chr_with_quotes.Rd
@@ -2,19 +2,29 @@
 % Please edit documentation in R/utils.R
 \name{format_chr_with_quotes}
 \alias{format_chr_with_quotes}
-\title{Format a character vector as a string via deparsing/quoting each}
+\title{Format each entry in a character vector via quoting; special replacement for length 0}
 \usage{
 format_chr_with_quotes(x, empty = "*none*")
 }
 \arguments{
-\item{x}{\code{chr}; e.g., \code{colnames} of some data frame}
+\item{x}{chr; e.g., \code{colnames} of some data frame}
 
-\item{empty}{string; what should be output if \code{x} is of length 0?}
+\item{empty}{chr, likely string; what should be output if \code{x} is of length 0?}
 }
 \value{
-string
+chr; same \code{length} as \code{x} if \code{x} had nonzero length; value of \code{empty} otherwise
 }
 \description{
-Format a character vector as a string via deparsing/quoting each
+Performs no escaping within the strings; if you want something that reader
+could copy-paste to debug, look into \code{format_deparse} (note that this
+collapses into a single string).
+}
+\examples{
+cli::cli_inform('{epiprocess:::format_chr_with_quotes("x")}')
+cli::cli_inform('{epiprocess:::format_chr_with_quotes(c("x","y"))}')
+nms <- c("x", "\"Total Cases\"")
+cli::cli_inform("{epiprocess:::format_chr_with_quotes(nms)}")
+cli::cli_inform("{epiprocess:::format_chr_with_quotes(character())}")
+
 }
 \keyword{internal}
diff --git a/man/revision_summary.Rd b/man/revision_analysis.Rd
similarity index 70%
rename from man/revision_summary.Rd
rename to man/revision_analysis.Rd
index 54abf098c..59f0c9593 100644
--- a/man/revision_summary.Rd
+++ b/man/revision_analysis.Rd
@@ -1,30 +1,46 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/revision_analysis.R
-\name{revision_summary}
+\name{revision_analysis}
+\alias{revision_analysis}
+\alias{print.revision_analysis}
 \alias{revision_summary}
 \title{A function to describe revision behavior for an archive.}
 \usage{
-revision_summary(
+revision_analysis(
   epi_arch,
   ...,
   drop_nas = TRUE,
-  print_inform = TRUE,
-  min_waiting_period = as.difftime(60, units = "days") \%>\%
-    difftime_approx_ceiling_time_delta(epi_arch$time_type),
+  min_waiting_period = as.difftime(60, units = "days"),
   within_latest = 0.2,
-  quick_revision = as.difftime(3, units = "days") \%>\%
-    difftime_approx_ceiling_time_delta(epi_arch$time_type),
+  compactify = TRUE,
+  compactify_abs_tol = 0,
+  return_only_tibble = FALSE
+)
+
+\method{print}{revision_analysis}(
+  x,
+  quick_revision = as.difftime(3, units = "days"),
   few_revisions = 3,
   abs_spread_threshold = NULL,
   rel_spread_threshold = 0.1,
+  ...
+)
+
+revision_summary(
+  epi_arch,
+  ...,
+  drop_nas = TRUE,
+  min_waiting_period = as.difftime(60, units = "days"),
+  within_latest = 0.2,
   compactify = TRUE,
-  compactify_abs_tol = 0
+  compactify_abs_tol = 0,
+  return_only_tibble = FALSE
 )
 }
 \arguments{
 \item{epi_arch}{an epi_archive to be analyzed}
 
-\item{...}{<\code{\link[=dplyr_tidy_select]{tidyselect}}>, used to choose the column to
+\item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidyselect}}>, used to choose the column to
 summarize. If empty and there is only one value/measurement column (i.e.,
 not in \code{\link{key_colnames}}) in the archive, it will automatically select it.
 If supplied, \code{...} must select exactly one column.}
@@ -34,47 +50,57 @@ If supplied, \code{...} must select exactly one column.}
 sure there are no duplicate values from occasions when the signal is
 revised to \code{NA}, and then back to its immediately-preceding value.}
 
-\item{print_inform}{bool, determines whether to print summary information, or
-only return the full summary tibble}
-
 \item{min_waiting_period}{\code{difftime}, integer or \code{NULL}. Sets a cutoff: any
 time_values that have not had at least \code{min_waiting_period} to stabilize as
 of the \code{versions_end} are removed. \code{min_waiting_period} should characterize
 the typical time during which most significant revisions occur. The default
 of 60 days corresponds to a typical near-final value for case counts as
 reported in the context of insurance. To avoid this filtering, either set
-to \code{NULL} or 0.}
+to \code{NULL} or 0. A \code{difftime} will be rounded up to the appropriate \code{time_type} if
+necessary (that is 5 days will be rounded to 1 week if the data is weekly).}
 
 \item{within_latest}{double between 0 and 1. Determines the threshold
 used for the \code{lag_to}}
 
-\item{quick_revision}{difftime or integer (integer is treated as days), for
-the printed summary, the amount of time between the final revision and the
+\item{compactify}{bool. If \code{TRUE}, we will compactify after the signal
+requested in \code{...} has been selected on its own and the \code{drop_nas} step.
+This helps, for example, to give similar results when called on
+\link[=epix_merge]{merged} and single-signal archives, since merged archives
+record an update when any of the other signals change, not just the
+requested signal. The default is \code{TRUE}.}
+
+\item{compactify_abs_tol}{length-1 double, used if \code{compactify} is \code{TRUE}, it
+determines the threshold for when two doubles are considered identical.}
+
+\item{return_only_tibble}{boolean to return only the simple \code{tibble} of
+computational results rather than the complete S3 object.}
+
+\item{x}{a \code{revision_analysis} object}
+
+\item{quick_revision}{Difftime or integer (integer is treated as days).
+The amount of time between the final revision and the
 actual time_value to consider the revision quickly resolved. Default of 3
-days}
+days. This will be rounded up to the appropriate \code{time_type} if
+necessary (that is 5 days will be rounded to 1 week if the data is weekly).}
 
-\item{few_revisions}{integer, for the printed summary, the upper bound on the
+\item{few_revisions}{Integer. The upper bound on the
 number of revisions to consider "few". Default is 3.}
 
-\item{abs_spread_threshold}{length-1 numeric, for the printed summary, the
+\item{abs_spread_threshold}{Scalar numeric. The
 maximum spread used to characterize revisions which don't actually change
 very much. Default is 5\% of the maximum value in the dataset, but this is
 the most unit dependent of values, and likely needs to be chosen
 appropriate for the scale of the dataset.}
 
-\item{rel_spread_threshold}{length-1 double between 0 and 1, for the printed
-summary, the relative spread fraction used to characterize revisions which
+\item{rel_spread_threshold}{Scalar between 0 and 1. The relative spread fraction used to characterize revisions which
 don't actually change very much. Default is .1, or 10\% of the final value}
-
-\item{compactify}{bool. If \code{TRUE}, we will compactify after the signal
-requested in \code{...} has been selected on its own and the \code{drop_nas} step.
-This helps, for example, to give similar results when called on
-\link[=epix_merge]{merged} and single-signal archives, since merged archives
-record an update when any of the other signals change, not just the
-requested signal. The default is \code{TRUE}.}
-
-\item{compactify_abs_tol}{length-1 double, used if \code{compactify} is \code{TRUE}, it
-determines the threshold for when two doubles are considered identical.}
+}
+\value{
+An S3 object with class \code{revision_behavior}. This function is typically
+called for the purposes of inspecting the printed output. The
+results of the computations are available in
+\code{revision_analysis(...)$revision_behavior}. If you only want to access
+the internal computations, use \code{return_only_tibble = TRUE}.
 }
 \description{
 \code{revision_summary} removes all missing values (if requested), and then
@@ -113,7 +139,7 @@ produce incorrect results for some calculations, since week numbering
 contains jumps at year boundaries.
 }
 \examples{
-revision_example <- revision_summary(archive_cases_dv_subset, percent_cli)
-revision_example \%>\% arrange(desc(spread))
+revision_example <- revision_analysis(archive_cases_dv_subset, percent_cli)
+revision_example$revision_behavior \%>\% arrange(desc(spread))
 
 }
diff --git a/man/set_versions_end.Rd b/man/set_versions_end.Rd
new file mode 100644
index 000000000..3d38f23f2
--- /dev/null
+++ b/man/set_versions_end.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/methods-epi_archive.R
+\name{set_versions_end}
+\alias{set_versions_end}
+\title{Set the \code{versions_end} attribute of an \code{epi_archive} object}
+\usage{
+set_versions_end(x, versions_end)
+}
+\arguments{
+\item{x}{An \code{epi_archive} object}
+
+\item{versions_end}{The new \code{versions_end} value}
+}
+\value{
+An \code{epi_archive} object with the updated \code{versions_end} attribute
+}
+\description{
+An escape hatch for epix_as_of, which does not allow version >
+\verb{$versions_end}.
+}
diff --git a/man/sum_groups_epi_df.Rd b/man/sum_groups_epi_df.Rd
index f1ba84745..62eecf29d 100644
--- a/man/sum_groups_epi_df.Rd
+++ b/man/sum_groups_epi_df.Rd
@@ -4,12 +4,17 @@
 \alias{sum_groups_epi_df}
 \title{Aggregate an \code{epi_df} object}
 \usage{
-sum_groups_epi_df(.x, sum_cols = "value", group_cols = character())
+sum_groups_epi_df(.x, sum_cols, group_cols = "time_value")
 }
 \arguments{
 \item{.x}{an \code{epi_df}}
 
-\item{sum_cols}{character vector of the columns to aggregate}
+\item{sum_cols}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> An unquoted column
+name (e.g., \code{cases}), multiple column names (e.g., \code{c(cases, deaths)}),
+\link[tidyselect:language]{other tidy-select expression}, or a vector of
+characters (e.g. \code{c("cases", "deaths")}). Variable names can be used as if
+they were positions in the data frame, so expressions like \code{x:y} can be
+used to select a range of variables.}
 
 \item{group_cols}{character vector of column names to group by. "time_value" is
 included by default.}
@@ -22,3 +27,12 @@ Aggregates an \code{epi_df} object by the specified group columns, summing the
 \code{value} column, and returning an \code{epi_df}. If aggregating over \code{geo_value},
 the resulting \code{epi_df} will have \code{geo_value} set to \code{"total"}.
 }
+\examples{
+# This data has other_keys age_group and edu_qual:
+grad_employ_subset
+
+# Aggregate num_graduates within each geo_value (and time_value):
+grad_employ_subset \%>\%
+  sum_groups_epi_df(num_graduates, group_cols = "geo_value")
+
+}
diff --git a/man/tidyselect_arg_roxygen.Rd b/man/tidyselect_arg_roxygen.Rd
new file mode 100644
index 000000000..27cb264d9
--- /dev/null
+++ b/man/tidyselect_arg_roxygen.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/inline-roxygen.R
+\docType{data}
+\name{tidyselect_arg_roxygen}
+\alias{tidyselect_arg_roxygen}
+\title{Description of a single arg that tidyselects value variables}
+\format{
+An object of class \code{character} of length 1.
+}
+\usage{
+tidyselect_arg_roxygen
+}
+\description{
+Not meant for when describing tidyselect \code{...}.
+}
+\keyword{internal}
diff --git a/tests/testthat.R b/tests/testthat.R
index b26d274b5..3c4cbff24 100644
--- a/tests/testthat.R
+++ b/tests/testthat.R
@@ -1,4 +1,6 @@
 library(testthat)
 library(epiprocess)
 
+stopifnot(packageVersion("testthat") >= "3.1.5")
+
 test_check("epiprocess")
diff --git a/tests/testthat/_snaps/methods-epi_archive.md b/tests/testthat/_snaps/methods-epi_archive.md
new file mode 100644
index 000000000..200e4202e
--- /dev/null
+++ b/tests/testthat/_snaps/methods-epi_archive.md
@@ -0,0 +1,35 @@
+# filter.epi_archive works as expected
+
+    Code
+      ea2 %>% filter(version <= as.Date("2020-06-02"))
+    Condition <rlang_error>
+      Error in `filter()`:
+      i In argument: `version <= as.Date("2020-06-02")`.
+      Caused by error:
+      ! Using `version` in `filter.epi_archive` may produce unexpected results.
+      > See if `epix_as_of` or `epix_slide` would work instead.
+      > If not, see `?filter.epi_archive` details for how to proceed.
+
+---
+
+    Code
+      ea2 %>% filter(time_value >= as.Date("2020-06-02"), cases >= 2L)
+    Condition <rlang_error>
+      Error in `filter()`:
+      i In argument: `cases >= 2L`.
+      Caused by error:
+      ! Using `cases` in `filter.epi_archive` may produce unexpected results.
+      > See `?filter.epi_archive` details for how to proceed.
+
+---
+
+    Code
+      ea2p %>% filter(cases >= median(cases), .by = geo_value)
+    Condition <rlang_error>
+      Error in `filter()`:
+      i In argument: `cases >= median(cases)`.
+      i In group 1: `geo_value = "ca"`.
+      Caused by error:
+      ! Using `cases` in `filter.epi_archive` may produce unexpected results.
+      > See `?filter.epi_archive` details for how to proceed.
+
diff --git a/tests/testthat/_snaps/revision-latency-functions.md b/tests/testthat/_snaps/revision-latency-functions.md
index 4f2bfe269..424dff11e 100644
--- a/tests/testthat/_snaps/revision-latency-functions.md
+++ b/tests/testthat/_snaps/revision-latency-functions.md
@@ -1,29 +1,41 @@
 # revision_summary works for dummy datasets
 
     Code
-      dummy_ex %>% revision_summary() %>% print(n = 10, width = 300)
+      rs1
     Message
-      Min lag (time to first version):
+      
+      -- An epi_archive spanning 2020-01-01 to 2020-01-04. --
+      
+      -- Min lag (time to first version): 
     Output
            min median     mean    max
         0 days 1 days 1.6 days 4 days
     Message
-      Fraction of epi_key+time_values with
+      
+      -- Fraction of epi_key + time_values with 
       No revisions:
       * 3 out of 7 (42.86%)
       Quick revisions (last revision within 3 days of the `time_value`):
       * 4 out of 7 (57.14%)
       Few revisions (At most 3 revisions for that `time_value`):
       * 6 out of 7 (85.71%)
-      Fraction of revised epi_key+time_values which have:
+      
+      -- Fraction of revised epi_key + time_values which have: 
       Less than 0.1 spread in relative value:
       * 1 out of 4 (25%)
       Spread of more than 5.1 in actual value (when revised):
       * 3 out of 4 (75%)
-      Days until within 20% of the latest value:
+      
+      -- Days until within 20% of the latest value: 
     Output
            min median     mean     max
         0 days 3 days 6.9 days 19 days
+
+---
+
+    Code
+      rs1$revision_behavior %>% print(n = 10, width = 300)
+    Output
       # A tibble: 7 x 11
         time_value geo_value n_revisions min_lag max_lag lag_near_latest spread
         <date>     <chr>           <dbl> <drtn>  <drtn>  <drtn>           <dbl>
@@ -47,31 +59,43 @@
 ---
 
     Code
-      dummy_ex %>% revision_summary(drop_nas = FALSE) %>% print(n = 10, width = 300)
+      rs2
     Message
-      Min lag (time to first version):
+      
+      -- An epi_archive spanning 2020-01-01 to 2020-01-04. --
+      
+      -- Min lag (time to first version): 
     Output
            min median     mean    max
         0 days 1 days 1.4 days 4 days
     Message
       Fraction of all versions that are `NA`:
       * 2 out of 19 (10.53%)
-      Fraction of epi_key+time_values with
+      
+      -- Fraction of epi_key + time_values with 
       No revisions:
       * 2 out of 7 (28.57%)
       Quick revisions (last revision within 3 days of the `time_value`):
       * 4 out of 7 (57.14%)
       Few revisions (At most 3 revisions for that `time_value`):
       * 6 out of 7 (85.71%)
-      Fraction of revised epi_key+time_values which have:
+      
+      -- Fraction of revised epi_key + time_values which have: 
       Less than 0.1 spread in relative value:
       * 2 out of 5 (40%)
       Spread of more than 5.1 in actual value (when revised):
       * 3 out of 5 (60%)
-      Days until within 20% of the latest value:
+      
+      -- Days until within 20% of the latest value: 
     Output
            min median     mean     max
         0 days 3 days 6.9 days 19 days
+
+---
+
+    Code
+      rs2$revision_behavior %>% print(n = 10, width = 300)
+    Output
       # A tibble: 7 x 11
         time_value geo_value n_revisions min_lag max_lag lag_near_latest spread
         <date>     <chr>           <dbl> <drtn>  <drtn>  <drtn>           <dbl>
@@ -95,31 +119,43 @@
 ---
 
     Code
-      dummy_ex_weekly %>% revision_summary(drop_nas = FALSE) %>% print(n = 10, width = 300)
+      rs3
     Message
-      Min lag (time to first version):
+      
+      -- An epi_archive spanning 2020-01-01 to 2020-01-22. --
+      
+      -- Min lag (time to first version): 
     Output
             min  median      mean     max
         0 weeks 1 weeks 1.4 weeks 4 weeks
     Message
       Fraction of all versions that are `NA`:
       * 2 out of 19 (10.53%)
-      Fraction of epi_key+time_values with
+      
+      -- Fraction of epi_key + time_values with 
       No revisions:
       * 2 out of 7 (28.57%)
       Quick revisions (last revision within 1 week of the `time_value`):
       * 2 out of 7 (28.57%)
       Few revisions (At most 3 revisions for that `time_value`):
       * 6 out of 7 (85.71%)
-      Fraction of revised epi_key+time_values which have:
+      
+      -- Fraction of revised epi_key + time_values which have: 
       Less than 0.1 spread in relative value:
       * 2 out of 5 (40%)
       Spread of more than 5.1 in actual value (when revised):
       * 3 out of 5 (60%)
-      Weeks until within 20% of the latest value:
+      
+      -- Weeks until within 20% of the latest value: 
     Output
             min  median      mean      max
         0 weeks 3 weeks 6.9 weeks 19 weeks
+
+---
+
+    Code
+      rs3$revision_behavior %>% print(n = 10, width = 300)
+    Output
       # A tibble: 7 x 11
         time_value geo_value n_revisions min_lag max_lag  lag_near_latest spread
         <date>     <chr>           <dbl> <drtn>  <drtn>   <drtn>           <dbl>
@@ -143,32 +179,43 @@
 ---
 
     Code
-      dummy_ex_yearmonthly %>% revision_summary(drop_nas = FALSE) %>% print(n = 10,
-        width = 300)
+      rs4
     Message
-      Min lag (time to first version):
+      
+      -- An epi_archive spanning 2020 Jan to 2020 Apr. --
+      
+      -- Min lag (time to first version): 
     Output
         min median mean max
           0      1  1.4   4
     Message
       Fraction of all versions that are `NA`:
       * 2 out of 19 (10.53%)
-      Fraction of epi_key+time_values with
+      
+      -- Fraction of epi_key + time_values with 
       No revisions:
       * 2 out of 7 (28.57%)
       Quick revisions (last revision within 1 month of the `time_value`):
       * 2 out of 7 (28.57%)
       Few revisions (At most 3 revisions for that `time_value`):
       * 6 out of 7 (85.71%)
-      Fraction of revised epi_key+time_values which have:
+      
+      -- Fraction of revised epi_key + time_values which have: 
       Less than 0.1 spread in relative value:
       * 2 out of 5 (40%)
       Spread of more than 5.1 in actual value (when revised):
       * 3 out of 5 (60%)
-      Months until within 20% of the latest value:
+      
+      -- Months until within 20% of the latest value: 
     Output
         min median mean max
           0      3  6.9  19
+
+---
+
+    Code
+      rs4$revision_behavior %>% print(n = 10, width = 300)
+    Output
       # A tibble: 7 x 11
         time_value geo_value n_revisions min_lag max_lag lag_near_latest spread
              <mth> <chr>           <dbl>   <dbl>   <dbl>           <dbl>  <dbl>
@@ -192,32 +239,43 @@
 ---
 
     Code
-      dummy_ex_integerly %>% revision_summary(min_waiting_period = 60,
-        quick_revision = 3, drop_nas = FALSE) %>% print(n = 10, width = 300)
+      print(rs5, quick_revision = 3)
     Message
-      Min lag (time to first version):
+      
+      -- An epi_archive spanning 1 to 4. --
+      
+      -- Min lag (time to first version): 
     Output
         min median mean max
           0      1  1.4   4
     Message
       Fraction of all versions that are `NA`:
       * 2 out of 19 (10.53%)
-      Fraction of epi_key+time_values with
+      
+      -- Fraction of epi_key + time_values with 
       No revisions:
       * 2 out of 7 (28.57%)
       Quick revisions (last revision within 3 time steps of the `time_value`):
       * 4 out of 7 (57.14%)
       Few revisions (At most 3 revisions for that `time_value`):
       * 6 out of 7 (85.71%)
-      Fraction of revised epi_key+time_values which have:
+      
+      -- Fraction of revised epi_key + time_values which have: 
       Less than 0.1 spread in relative value:
       * 2 out of 5 (40%)
       Spread of more than 5.1 in actual value (when revised):
       * 3 out of 5 (60%)
-      Time Steps until within 20% of the latest value:
+      
+      -- Time Steps until within 20% of the latest value: 
     Output
         min median mean max
           0      3  6.9  19
+
+---
+
+    Code
+      rs5$revision_behavior %>% print(n = 10, width = 300)
+    Output
       # A tibble: 7 x 11
         time_value geo_value n_revisions min_lag max_lag lag_near_latest spread
              <dbl> <chr>           <dbl>   <dbl>   <dbl>           <dbl>  <dbl>
diff --git a/tests/testthat/test-archive-version-bounds.R b/tests/testthat/test-archive-version-bounds.R
index ee500d308..878cde1c0 100644
--- a/tests/testthat/test-archive-version-bounds.R
+++ b/tests/testthat/test-archive-version-bounds.R
@@ -71,14 +71,21 @@ test_that("`validate_version_bound` validate and class checks together allow and
   # Bad:
   expect_error(validate_version_bound(3.5, x_int, TRUE, "vb"), regexp = "must have the same `class`")
   expect_error(validate_version_bound(.Machine$integer.max, x_dbl, TRUE, "vb"), regexp = "must have the same `class`")
-  expect_error(validate_version_bound(
-    `class<-`(list(2), "clazz"),
-    tibble::tibble(version = `class<-`(5L, "clazz")), TRUE, "vb"
-  ), regexp = "must have the same `typeof`", class = "epiprocess__version_bound_mismatched_typeof")
   # Maybe questionable:
   expect_error(validate_version_bound(3, x_int, TRUE, "vb"))
   expect_error(validate_version_bound(3L, x_dbl, TRUE, "vb"))
+  # Maybe questionable, but accept to relax things a bit, as this is happening
+  # with Dates in some R(?) versions. Might need to turn some things into
+  # vec_cast_common, but idea is just make Date stuff work for now:
+  validate_version_bound(
+    `class<-`(list(2), "clazz"),
+    tibble::tibble(version = `class<-`(5L, "clazz")), TRUE, "vb"
+  )
   # Good:
+  validate_version_bound(
+    `class<-`(2, "Date"),
+    tibble::tibble(version = `class<-`(5L, "Date")), TRUE, "vb"
+  )
   validate_version_bound(my_int, x_int, TRUE, "vb")
   validate_version_bound(my_dbl, x_dbl, TRUE, "vb")
   validate_version_bound(my_list, x_list, TRUE, "vb")
diff --git a/tests/testthat/test-compactify.R b/tests/testthat/test-compactify.R
index 2eed5025f..229af8453 100644
--- a/tests/testthat/test-compactify.R
+++ b/tests/testthat/test-compactify.R
@@ -120,37 +120,54 @@ test_that("compactify does not alter the default clobberable and observed versio
   expect_identical(ea_true$versions_end, ea_false$versions_end)
 })
 
+quantile_pred_once <- function(estimates_vec, levels_vec) {
+  hardhat::quantile_pred(t(as.matrix(estimates_vec)), levels_vec)
+}
 test_that("compactify works on distributions", {
+  skip("Until #611 is merged or hardhat/epipredict is patched")
   forecasts <- tibble(
     ahead = 2L,
     geo_value = "ak",
     target_end_date = as.Date("2020-01-19"),
-    forecast_date = as.Date("2020-01-17") + 1:8,
+    forecast_date = as.Date("2020-01-17") + 1:6,
     actual = 25,
     .pred_distn = c(
-      epipredict::dist_quantiles(c(1, 5, 9), c(0.1, 0.5, 0.9)),
-      epipredict::dist_quantiles(c(1, NA, 9), c(0.1, 0.5, 0.9)), # single NA in quantiles
-      epipredict::dist_quantiles(c(NA, NA, NA), c(0.1, 0.5, 0.9)), # all NAs in quantiles
-      distributional::dist_missing(1), # the actual `NA` for distributions
-      epipredict::dist_quantiles(c(1, 5, 9), c(0.1, 0.5, 0.9)), # and back
-      epipredict::dist_quantiles(c(3, 5, 9), c(0.1, 0.5, 0.9)), # change quantile
-      epipredict::dist_quantiles(c(3, 5, 9), c(0.2, 0.5, 0.8)), # change level
-      epipredict::dist_quantiles(c(3, 5, 9), c(0.2, 0.5, 0.8)) # LOCF
+      quantile_pred_once(c(1, 5, 9), c(0.1, 0.5, 0.9)),
+      quantile_pred_once(c(1, NA, 9), c(0.1, 0.5, 0.9)), # single NA in quantiles
+      quantile_pred_once(c(NA, NA, NA), c(0.1, 0.5, 0.9)), # all NAs in quantiles
+      quantile_pred_once(c(1, 5, 9), c(0.1, 0.5, 0.9)), # and back
+      quantile_pred_once(c(3, 5, 9), c(0.1, 0.5, 0.9)), # change quantile
+      quantile_pred_once(c(3, 5, 9), c(0.1, 0.5, 0.9)) # LOCF
     )
   )
   expect_equal(
     forecasts %>%
-      as_epi_archive(
-        other_keys = "ahead", time_value = target_end_date, version = forecast_date,
-        compactify = TRUE
-      ) %>%
+      as_epi_archive(other_keys = "ahead", time_value = target_end_date, version = forecast_date) %>%
       .$DT %>%
       as.data.frame() %>%
       as_tibble(),
-    forecasts[-8, ] %>%
+    forecasts[-6, ] %>%
       rename(time_value = target_end_date, version = forecast_date)
   )
 })
+test_that("epix_merge works with distributions", {
+  skip("Until hardhat/epipredict is patched")
+  forecasts1ea <- tibble(
+    ahead = 2L,
+    geo_value = "ak",
+    target_end_date = as.Date("2020-01-19"),
+    forecast_date = as.Date("2020-01-17") + 1,
+    .pred_distn1 = quantile_pred_once(c(1, 5, 9), c(0.1, 0.5, 0.9))
+  ) %>% as_epi_archive(other_keys = "ahead", time_value = target_end_date, version = forecast_date)
+  forecasts2ea <- tibble(
+    ahead = 2L,
+    geo_value = "ak",
+    target_end_date = as.Date("2020-01-19"),
+    forecast_date = as.Date("2020-01-17") + 2,
+    .pred_distn2 = quantile_pred_once(c(2, 4, 8), c(0.1, 0.5, 0.9))
+  ) %>% as_epi_archive(other_keys = "ahead", time_value = target_end_date, version = forecast_date)
+  forecasts12ea <- epix_merge(forecasts1ea, forecasts2ea, sync = "locf")
+})
 
 test_that("Large compactify_abs_tol does not drop edf keys", {
   # several epikeytimes, each with a single version
diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R
index 0aa4aca7f..b9be125df 100644
--- a/tests/testthat/test-epi_slide.R
+++ b/tests/testthat/test-epi_slide.R
@@ -610,7 +610,7 @@ test_that("epi_slide_opt helper `full_date_seq` returns expected date values", {
   before <- 2L
   after <- 1L
 
-  expect_identical(
+  expect_equal(
     full_date_seq(
       epi_data_missing %>%
         mutate(time_value = days) %>%
@@ -627,7 +627,7 @@ test_that("epi_slide_opt helper `full_date_seq` returns expected date values", {
       pad_late_dates = as.Date(c("2022-01-08"))
     )
   )
-  expect_identical(
+  expect_equal(
     full_date_seq(
       epi_data_missing %>%
         mutate(time_value = weeks) %>%
@@ -677,7 +677,7 @@ test_that("epi_slide_opt helper `full_date_seq` returns expected date values", {
   before <- 5L
   after <- 0L
 
-  expect_identical(
+  expect_equal(
     full_date_seq(
       epi_data_missing %>%
         mutate(time_value = days) %>%
@@ -701,7 +701,7 @@ test_that("epi_slide_opt helper `full_date_seq` returns expected date values", {
   before <- 0L
   after <- 3L
 
-  expect_identical(
+  expect_equal(
     full_date_seq(
       epi_data_missing %>%
         mutate(time_value = days) %>%
diff --git a/tests/testthat/test-epix_merge.R b/tests/testthat/test-epix_merge.R
index 5b3de284f..ded143531 100644
--- a/tests/testthat/test-epix_merge.R
+++ b/tests/testthat/test-epix_merge.R
@@ -175,7 +175,7 @@ test_that("epix_merge forbids and warns on metadata and naming issues", {
       as_epi_archive(tibble::tibble(geo_value = "ak", time_value = test_date, version = test_date + 1L, value = 1L)),
       as_epi_archive(tibble::tibble(geo_value = "ak", time_value = test_date, version = test_date + 1L, value = 2L))
     ),
-    regexp = "overlapping.*names"
+    regexp = 'both have measurement columns named "value"'
   )
 })
 
diff --git a/tests/testthat/test-methods-epi_archive.R b/tests/testthat/test-methods-epi_archive.R
index 45ba6ea12..c544368a5 100644
--- a/tests/testthat/test-methods-epi_archive.R
+++ b/tests/testthat/test-methods-epi_archive.R
@@ -128,3 +128,179 @@ test_that("group_vars works as expected", {
     "geo_value"
   )
 })
+
+test_that("epix_as_of_now works as expected", {
+  expect_equal(
+    attr(ea2_data %>% as_epi_archive() %>% epix_as_of_current(), "metadata")$as_of,
+    as.Date("2020-06-04")
+  )
+  time_value <- as.Date("2020-06-01")
+  df <- dplyr::tribble(
+    ~geo_value, ~time_value, ~version, ~cases,
+    "ca", time_value, time_value, 1,
+    "ca", time_value + 7, time_value + 7, 2,
+  )
+  expect_equal(
+    attr(df %>% as_epi_archive() %>% epix_as_of_current(), "metadata")$as_of,
+    as.Date("2020-06-08")
+  )
+  time_value <- tsibble::yearmonth(as.Date("2020-06-01") - lubridate::month(1))
+  df <- dplyr::tribble(
+    ~geo_value, ~time_value, ~version, ~cases,
+    "ca", time_value, time_value, 1,
+    "ca", time_value + lubridate::month(1), time_value + lubridate::month(1), 2,
+  )
+  expect_equal(
+    attr(df %>% as_epi_archive() %>% epix_as_of_current(), "metadata")$as_of,
+    tsibble::yearmonth("2020-06")
+  )
+  time_value <- 2020
+  df <- dplyr::tribble(
+    ~geo_value, ~time_value, ~version, ~cases,
+    "ca", time_value, time_value, 1,
+    "ca", time_value + 7, time_value + 7, 2,
+  )
+  expect_equal(
+    attr(df %>% as_epi_archive() %>% epix_as_of_current(), "metadata")$as_of,
+    2027
+  )
+})
+
+test_that("filter.epi_archive works as expected", {
+  ea2 <- ea2_data %>%
+    as_epi_archive()
+
+  # Some basic output value checks:
+
+  expect_equal(
+    ea2 %>% filter(geo_value == "tn"),
+    new_epi_archive(
+      ea2$DT[FALSE],
+      ea2$geo_type, ea2$time_type, ea2$other_keys,
+      ea2$clobberable_versions_start, ea2$versions_end
+    )
+  )
+
+  expect_equal(
+    ea2 %>% filter(geo_value == "ca", time_value == as.Date("2020-06-02")),
+    new_epi_archive(
+      data.table::data.table(
+        geo_value = "ca", time_value = as.Date("2020-06-02"),
+        version = as.Date("2020-06-02") + 0:2, cases = 0:2
+      ),
+      ea2$geo_type, ea2$time_type, ea2$other_keys,
+      ea2$clobberable_versions_start, ea2$versions_end
+    )
+  )
+
+  # Output geo_type and time_type behavior:
+
+  hrr_day_ea <- tibble(
+    geo_value = c(rep(1, 14), 100),
+    time_value = as.Date("2020-01-01") - 1 + c(1:14, 14),
+    version = time_value + 3,
+    value = 1:15
+  ) %>%
+    as_epi_archive()
+
+  expect_equal(hrr_day_ea$geo_type, "hrr")
+  expect_equal(hrr_day_ea$time_type, "day")
+
+  hrr_week_ea <- hrr_day_ea %>%
+    filter(geo_value == 1, as.POSIXlt(time_value)$wday == 6L)
+
+  expect_equal(hrr_week_ea$geo_type, "hrr")
+  expect_equal(hrr_week_ea$time_type, "week")
+
+  hrr_one_week_ea <- hrr_week_ea %>%
+    filter(time_value == time_value[[1]])
+
+  expect_equal(hrr_one_week_ea$time_type, "week")
+
+  intcustom_day_ea <- hrr_day_ea
+  intcustom_day_ea$geo_type <- "custom"
+
+  intcustom_week_ea <- intcustom_day_ea %>%
+    filter(geo_value == 1, as.POSIXlt(time_value)$wday == 6L)
+
+  expect_equal(intcustom_week_ea$geo_type, "custom")
+  expect_equal(intcustom_week_ea$time_type, "week")
+
+  # Environment variables should be fine:
+  version <- as.Date("2020-06-02") + 1
+  e <- version
+  expected <- ea2 %>% filter(geo_value == "ca", as.Date("2020-06-02") + 1 <= time_value)
+  expect_equal(ea2 %>% filter(geo_value == "ca", .env$version <= time_value), expected)
+  expect_equal(ea2 %>% filter(geo_value == "ca", e <= time_value), expected)
+  expect_equal(ea2 %>% filter(geo_value == "ca", .env$e <= time_value), expected)
+
+  # Error-raising:
+  expect_error(
+    ea2 %>% filter(version == as.Date("2020-06-02")),
+    class = "epiprocess__filter_archive__used_version"
+  )
+  expect_error(
+    ea2 %>% filter(version <= as.Date("2020-06-02")),
+    class = "epiprocess__filter_archive__used_version"
+  )
+  expect_snapshot(
+    ea2 %>% filter(version <= as.Date("2020-06-02")),
+    error = TRUE, cnd_class = TRUE
+  )
+  expect_error(
+    ea2 %>% filter(time_value >= as.Date("2020-06-02"), cases >= 2L),
+    class = "epiprocess__filter_archive__used_measurement"
+  )
+  expect_snapshot(
+    ea2 %>% filter(time_value >= as.Date("2020-06-02"), cases >= 2L),
+    error = TRUE, cnd_class = TRUE
+  )
+  expect_error(
+    ea2 %>% filter(time_value >= as.Date("2020-06-02"), cases >= 2L),
+    class = "epiprocess__filter_archive__used_measurement"
+  )
+  # Check for `for` + `delayedAssign` mishap in `expect_snapshot` (we should say
+  # something about `cases` (the relevant colname), not `deaths` (the last
+  # measurement colname)):
+  ea2p <- ea2_data %>%
+    mutate(deaths = 0) %>%
+    as_epi_archive()
+  expect_error(
+    ea2p %>% filter(cases >= median(cases), .by = geo_value),
+    class = "epiprocess__filter_archive__used_measurement"
+  )
+  expect_snapshot(
+    ea2p %>% filter(cases >= median(cases), .by = geo_value),
+    error = TRUE, cnd_class = TRUE
+  )
+  # Check that we are insulated from other lazy eval traps:
+  expected <- rlang::catch_cnd(ea2p %>% filter(cases >= median(cases), .by = geo_value))
+  expect_class(expected$parent, "epiprocess__filter_archive__used_measurement")
+  with(list(cli_abort = function(...) stop("now, pretend user didn't have cli attached")), {
+    expect_equal(
+      rlang::catch_cnd(ea2p %>% filter(cases >= median(cases), .by = geo_value))$parent$message,
+      expected$parent$message
+    )
+  })
+  expect_equal(
+    rlang::catch_cnd(ea2p %>% filter(
+      {
+        c <- function(...) stop("and that they overwrote `c` to try to debug their own code")
+        cases >= median(cases)
+      },
+      .by = geo_value
+    ))$parent$message,
+    expected$parent$message
+  )
+
+
+  # Escape hatch:
+  expect_equal(
+    ea2 %>%
+      filter(version <= time_value + as.difftime(1, units = "days"),
+        .format_aware = TRUE
+      ) %>%
+      .$DT,
+    ea2$DT[version <= time_value + as.difftime(1, units = "days"), ]
+  )
+})
diff --git a/tests/testthat/test-methods-epi_df.R b/tests/testthat/test-methods-epi_df.R
index 3e5c180b0..bc9f1e35d 100644
--- a/tests/testthat/test-methods-epi_df.R
+++ b/tests/testthat/test-methods-epi_df.R
@@ -311,20 +311,25 @@ test_that("complete.epi_df works", {
 })
 
 test_that("sum_groups_epi_df works", {
-  out <- toy_epi_df %>% sum_groups_epi_df(sum_cols = "x")
+  out <- toy_epi_df %>% sum_groups_epi_df("x")
   expected_out <- toy_epi_df %>%
     group_by(time_value) %>%
     summarize(x = sum(x)) %>%
     mutate(geo_value = "total") %>%
     as_epi_df(as_of = attr(toy_epi_df, "metadata")$as_of)
   expect_equal(out, expected_out)
+  out <- toy_epi_df %>% sum_groups_epi_df(x)
+  expect_equal(out, expected_out)
 
   out <- toy_epi_df %>%
-    sum_groups_epi_df(sum_cols = c("x", "y"), group_cols = c("time_value", "geo_value", "indic_var1"))
+    sum_groups_epi_df(c(x, y), group_cols = c("time_value", "geo_value", "indic_var1"))
   expected_out <- toy_epi_df %>%
     group_by(time_value, geo_value, indic_var1) %>%
     summarize(x = sum(x), y = sum(y), .groups = "drop") %>%
     as_epi_df(as_of = attr(toy_epi_df, "metadata")$as_of, other_keys = "indic_var1") %>%
     arrange_canonical()
   expect_equal(out, expected_out)
+  out <- toy_epi_df %>%
+    sum_groups_epi_df(x:y, group_cols = c("time_value", "geo_value", "indic_var1"))
+  expect_equal(out, expected_out)
 })
diff --git a/tests/testthat/test-revision-latency-functions.R b/tests/testthat/test-revision-latency-functions.R
index b636bf32e..1a61e8d88 100644
--- a/tests/testthat/test-revision-latency-functions.R
+++ b/tests/testthat/test-revision-latency-functions.R
@@ -64,27 +64,36 @@ dummy_ex_integerly <- dummy_ex$DT %>%
 stopifnot(dummy_ex_integerly$time_type == "integer")
 
 test_that("revision_summary works for dummy datasets", {
-  expect_snapshot(dummy_ex %>% revision_summary() %>% print(n = 10, width = 300))
-  expect_snapshot(dummy_ex %>% revision_summary(drop_nas = FALSE) %>% print(n = 10, width = 300))
+  rs1 <- dummy_ex %>% revision_summary()
+  rs2 <- dummy_ex %>% revision_summary(drop_nas = FALSE)
+  expect_snapshot(rs1)
+  expect_snapshot(rs1$revision_behavior %>% print(n = 10, width = 300))
+  expect_snapshot(rs2)
+  expect_snapshot(rs2$revision_behavior %>% print(n = 10, width = 300))
 
   # Weekly dummy is mostly just "day" -> "week", but quick-revision summary changes:
-  expect_snapshot(dummy_ex_weekly %>% revision_summary(drop_nas = FALSE) %>% print(n = 10, width = 300))
+  rs3 <- dummy_ex_weekly %>% revision_summary(drop_nas = FALSE)
+  expect_snapshot(rs3)
+  expect_snapshot(rs3$revision_behavior %>% print(n = 10, width = 300))
   # Yearmonthly has the same story. It would have been close to encountering
   # min_waiting_period-based filtering but we actually set its versions_end to
   # sometime in 2080 rather than 2022:
-  expect_snapshot(dummy_ex_yearmonthly %>% revision_summary(drop_nas = FALSE) %>% print(n = 10, width = 300))
+  rs4 <- dummy_ex_yearmonthly %>% revision_summary(drop_nas = FALSE)
+  expect_snapshot(rs4)
+  expect_snapshot(rs4$revision_behavior %>% print(n = 10, width = 300))
   # Integer is very much like daily. We have to provide some of the
   # configuration arguments since we have no idea about what the integers
   # represent. If the possible integers being used have large jumps like
   # YYYYww-as-integer epiweek labels (e.g., 200053 jumps to 200101) or are
   # regularly spaced apart but by more than 1, we'll still be producing
   # something nonsensical, but we tried.
-  expect_snapshot(dummy_ex_integerly %>%
+  rs5 <- dummy_ex_integerly %>%
     revision_summary(
-      min_waiting_period = 60, quick_revision = 3,
+      min_waiting_period = 60,
       drop_nas = FALSE
-    ) %>%
-    print(n = 10, width = 300))
+    )
+  expect_snapshot(print(rs5, quick_revision = 3))
+  expect_snapshot(rs5$revision_behavior %>% print(n = 10, width = 300))
 })
 
 test_that("tidyselect is functional", {
@@ -133,7 +142,7 @@ test_that("revision_summary default min_waiting_period works as expected", {
       value = 1:2
     ) %>%
       as_epi_archive(versions_end = as.Date("2020-01-01") + 1 + 59) %>%
-      revision_summary(print_inform = FALSE) %>%
+      revision_summary(return_only_tibble = TRUE) %>%
       pull(time_value),
     as.Date("2020-01-01")
   )
@@ -146,7 +155,7 @@ test_that("revision_summary default min_waiting_period works as expected", {
       value = 1:2
     ) %>%
       as_epi_archive(versions_end = as.Date("2020-01-01") + 7 + 56) %>%
-      revision_summary(print_inform = FALSE) %>%
+      revision_summary(return_only_tibble = TRUE) %>%
       pull(time_value),
     as.Date("2020-01-01")
   )
@@ -159,7 +168,7 @@ test_that("revision_summary default min_waiting_period works as expected", {
       value = 1:2
     ) %>%
       as_epi_archive(versions_end = tsibble::make_yearmonth(2000, 3)) %>%
-      revision_summary(print_inform = FALSE) %>%
+      revision_summary(return_only_tibble = TRUE) %>%
       pull(time_value),
     tsibble::make_yearmonth(2000, 1)
   )
@@ -172,7 +181,7 @@ test_that("revision_summary default min_waiting_period works as expected", {
       value = 1:2
     ) %>%
       as_epi_archive(versions_end = 1 + 1 + 59) %>%
-      revision_summary(print_inform = FALSE),
+      revision_summary(return_only_tibble = TRUE),
     regexp = "Unsupported time_type"
   )
 })
diff --git a/tests/testthat/test-time-utils.R b/tests/testthat/test-time-utils.R
index 6fe8d78ad..7ddd70c04 100644
--- a/tests/testthat/test-time-utils.R
+++ b/tests/testthat/test-time-utils.R
@@ -17,11 +17,11 @@ test_that("guess_period works", {
   # On Dates:
   daily_dates <- seq(as.Date("2020-01-01"), as.Date("2020-01-15"), by = "day")
   weekly_dates <- seq(as.Date("2020-01-01"), as.Date("2020-01-15"), by = "week")
-  expect_identical(
+  expect_equal(
     daily_dates[[1L]] + guess_period(daily_dates) * (seq_along(daily_dates) - 1L),
     daily_dates
   )
-  expect_identical(
+  expect_equal(
     weekly_dates[[1L]] + guess_period(weekly_dates) * (seq_along(weekly_dates) - 1L),
     weekly_dates
   )
diff --git a/vignettes/epi_archive.Rmd b/vignettes/epi_archive.Rmd
index f87ea2915..7e6eaea58 100644
--- a/vignettes/epi_archive.Rmd
+++ b/vignettes/epi_archive.Rmd
@@ -162,7 +162,8 @@ addition to the per key summary, it also returns an overall summary. Here is an
 a sample of the output:
 
 ```{r}
-revision_details <- revision_summary(dv_archive, print_inform = TRUE)
+revision_details <- revision_summary(dv_archive)
+revision_details
 ```
 
 We can see from the output that, as mentioned above, this data set has a lot of
@@ -174,9 +175,9 @@ inspect the returned `revision_details` tibble. Here we collect a number of
 statistics for each state:
 
 ```{r}
-revision_details %>%
+revision_details$revision_behavior %>%
   group_by(geo_value) %>%
-  summarise(
+  summarize(
     n_rev = mean(n_revisions),
     min_lag = min(min_lag),
     max_lag = max(max_lag),
@@ -234,7 +235,8 @@ observation carried forward (LOCF). For more information, see `epix_merge()`.
 ## Backtesting forecasting models
 
 One of the most common use cases of `epiprocess::epi_archive()` object is for
-accurate model backtesting. See `vignette("backtesting", package="epipredict")`
+accurate model backtesting. See [`vignette("backtesting",
+package="epipredict")`](https://cmu-delphi.github.io/epipredict/articles/backtesting.html)
 for an in-depth demo, using a pre-built forecaster in that package.
 
 ## Attribution