-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
Add more text around which methods output selection probability and which output expected hits. Add more information about how expected hits is calculated.
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(SampleSelectR)
set.seed(8675309)
county_2023_slim_n <- county_2023 |>
select(GEOID, Region, Pop_Tot) |>
mutate(
n=50,
ExpHits_man=10*Pop_Tot/sum(Pop_Tot),
.by="Region"
)
sampsizes <- county_2023_slim_n |>
distinct(Region) |>
mutate(sample_size=10)
samp1 <- county_2023_slim_n |>
select_sample("sys_pps", n=sampsizes, strata="Region", mos="Pop_Tot", outall = TRUE)
#> No sorting variables are provided so frame is assumed to be already sorted for systematic sampling.
#> Stratum: Region = South
#> --Frame size: 1422
#> --Sample size: 10
#> --Sampling interval (k): 12763101
#> --Random start (r): 2035506
#> Stratum: Region = West
#> --Frame size: 449
#> --Sample size: 10
#> --Sampling interval (k): 7864612
#> --Random start (r): 3760766
#> Stratum: Region = Northeast
#> --Frame size: 218
#> --Sample size: 10
#> --Sampling interval (k): 5722245
#> --Random start (r): 4376366
#> Stratum: Region = Midwest
#> --Frame size: 1055
#> --Sample size: 10
#> --Sampling interval (k): 6888795
#> --Random start (r): 5302221
samp2 <- county_2023_slim_n |>
select(-n) |>
select_sample("sys_pps", n=sampsizes, strata="Region", mos="Pop_Tot", outall = TRUE)
#> No sorting variables are provided so frame is assumed to be already sorted for systematic sampling.
#> Stratum: Region = South
#> --Frame size: 1422
#> --Sample size: 10
#> --Sampling interval (k): 12763101
#> --Random start (r): 3427512
#> Stratum: Region = West
#> --Frame size: 449
#> --Sample size: 10
#> --Sampling interval (k): 7864612
#> --Random start (r): 5293245
#> Stratum: Region = Northeast
#> --Frame size: 218
#> --Sample size: 10
#> --Sampling interval (k): 5722245
#> --Random start (r): 5600881
#> Stratum: Region = Midwest
#> --Frame size: 1055
#> --Sample size: 10
#> --Sampling interval (k): 6888795
#> --Random start (r): 5830173
samp1
#> # A tidytable: 3,144 × 9
#> Region GEOID Pop_Tot n ExpHits_man SelectionIndicator SamplingWeight
#> <fct> <chr> <dbl> <dbl> <dbl> <lgl> <dbl>
#> 1 South 01001 59285 50 0.00465 FALSE NA
#> 2 South 01003 239945 50 0.0188 FALSE NA
#> 3 South 01005 24757 50 0.00194 FALSE NA
#> 4 South 01007 22152 50 0.00174 FALSE NA
#> 5 South 01009 59292 50 0.00465 FALSE NA
#> 6 South 01011 10157 50 0.000796 FALSE NA
#> 7 South 01013 18807 50 0.00147 FALSE NA
#> 8 South 01015 116141 50 0.00910 FALSE NA
#> 9 South 01017 34450 50 0.00270 FALSE NA
#> 10 South 01019 25224 50 0.00198 FALSE NA
#> # ℹ 3,134 more rows
#> # ℹ 2 more variables: NumberHits <int>, ExpectedHits <dbl>
samp2
#> # A tidytable: 3,144 × 8
#> Region GEOID Pop_Tot ExpHits_man SelectionIndicator SamplingWeight NumberHits
#> <fct> <chr> <dbl> <dbl> <lgl> <dbl> <int>
#> 1 South 01001 59285 0.00465 FALSE NA 0
#> 2 South 01003 239945 0.0188 FALSE NA 0
#> 3 South 01005 24757 0.00194 FALSE NA 0
#> 4 South 01007 22152 0.00174 FALSE NA 0
#> 5 South 01009 59292 0.00465 FALSE NA 0
#> 6 South 01011 10157 0.000796 FALSE NA 0
#> 7 South 01013 18807 0.00147 FALSE NA 0
#> 8 South 01015 116141 0.00910 FALSE NA 0
#> 9 South 01017 34450 0.00270 FALSE NA 0
#> 10 South 01019 25224 0.00198 FALSE NA 0
#> # ℹ 3,134 more rows
#> # ℹ 1 more variable: ExpectedHits <dbl>
waldo::compare(
samp1 |> select(-c(SelectionIndicator, SamplingWeight, NumberHits, n)),
samp2 |> select(-c(SelectionIndicator, SamplingWeight, NumberHits))
)
#> old vs new
#> ExpectedHits
#> - old[1, ] 2.322515e-02
#> + new[1, ] 4.645031e-03
#> - old[2, ] 9.399949e-02
#> + new[2, ] 1.879990e-02
#> - old[3, ] 9.698662e-03
#> + new[3, ] 1.939732e-03
#> - old[4, ] 8.678142e-03
#> + new[4, ] 1.735628e-03
#> - old[5, ] 2.322790e-02
#> + new[5, ] 4.645579e-03
#> - old[6, ] 3.979049e-03
#> + new[6, ] 7.958097e-04
#> - old[7, ] 7.367723e-03
#> + new[7, ] 1.473545e-03
#> - old[8, ] 4.549874e-02
#> + new[8, ] 9.099748e-03
#> - old[9, ] 1.349594e-02
#> + new[9, ] 2.699187e-03
#> - old[10, ] 9.881611e-03
#> + new[10, ] 1.976322e-03
#> and 3134 more ...
#>
#> old$ExpectedHits | new$ExpectedHits
#> [1] 0.023225 - 0.004645 [1]
#> [2] 0.093999 - 0.018800 [2]
#> [3] 0.009699 - 0.001940 [3]
#> [4] 0.008678 - 0.001736 [4]
#> [5] 0.023228 - 0.004646 [5]
#> [6] 0.003979 - 0.000796 [6]
#> [7] 0.007368 - 0.001474 [7]
#> [8] 0.045499 - 0.009100 [8]
#> [9] 0.013496 - 0.002699 [9]
#> [10] 0.009882 - 0.001976 [10]
#> ... ... ... and 3134 more ...Created on 2025-11-19 with reprex v2.1.1
Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.5.1 (2025-06-13 ucrt)
#> os Windows 11 x64 (build 22631)
#> system x86_64, mingw32
#> ui RTerm
#> language (EN)
#> collate English_United States.utf8
#> ctype English_United States.utf8
#> tz America/New_York
#> date 2025-11-19
#> pandoc 3.6.3 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
#> quarto 1.8.24 @ C:\\Users\\sazimmer\\AppData\\Local\\Programs\\Quarto\\bin\\quarto.exe
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> cli 3.6.5 2025-04-23 [1] CRAN (R 4.5.0)
#> crayon 1.5.3 2024-06-20 [1] CRAN (R 4.5.0)
#> data.table 1.17.8 2025-07-10 [1] RSPM (R 4.5.0)
#> diffobj 0.3.6 2025-04-21 [1] CRAN (R 4.5.0)
#> digest 0.6.37 2024-08-19 [1] CRAN (R 4.5.0)
#> dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.5.0)
#> evaluate 1.0.5 2025-08-27 [1] RSPM (R 4.5.0)
#> fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.5.0)
#> fs 1.6.6 2025-04-12 [1] CRAN (R 4.5.0)
#> generics 0.1.4 2025-05-09 [1] CRAN (R 4.5.0)
#> glue 1.8.0 2024-09-30 [1] CRAN (R 4.5.0)
#> htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.5.0)
#> knitr 1.50 2025-03-16 [1] CRAN (R 4.5.0)
#> lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.5.0)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.5.0)
#> pillar 1.11.0 2025-07-04 [1] RSPM (R 4.5.0)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.5.0)
#> R6 2.6.1 2025-02-15 [1] CRAN (R 4.5.0)
#> reprex 2.1.1 2024-07-06 [1] CRAN (R 4.5.0)
#> rlang 1.1.6 2025-04-11 [1] CRAN (R 4.5.0)
#> rmarkdown 2.29 2024-11-04 [1] CRAN (R 4.5.0)
#> rstudioapi 0.17.1 2024-10-22 [1] CRAN (R 4.5.0)
#> SampleSelectR * 1.0.0 2025-09-22 [1] Github (rti-international/SampleSelectR@2f7d23c)
#> sessioninfo 1.2.3.9000 2025-09-18 [1] Github (r-lib/sessioninfo@ec4dd0c)
#> tibble 3.3.0 2025-06-08 [1] RSPM (R 4.5.0)
#> tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.5.0)
#> tidytable 0.11.2 2024-12-11 [1] CRAN (R 4.5.0)
#> utf8 1.2.6 2025-06-08 [1] RSPM (R 4.5.0)
#> vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.5.0)
#> waldo 0.6.2 2025-07-11 [1] RSPM
#> withr 3.0.2 2024-10-28 [1] CRAN (R 4.5.0)
#> xfun 0.53 2025-08-19 [1] RSPM (R 4.5.0)
#> yaml 2.3.10 2024-07-26 [1] CRAN (R 4.5.0)
#>
#> [1] C:/Users/sazimmer/AppData/Local/R/win-library/4.5
#> [2] C:/Program Files/R/R-4.5.1/library
#> * ── Packages attached to the search path.
#>
#> ──────────────────────────────────────────────────────────────────────────────Metadata
Metadata
Assignees
Labels
No labels