Skip to content

Commit 3814899

Browse files
committed
updated Facebook DUA line 1008 with clarification
Clarification: API users won't need a DUA as they won't have access to raw facebook data. What's available to users is aggregated Delphi data. (According to @alexreinhart and Geographically aggregated data from this survey is publicly available through the COVIDcast API as the fb-survey data source <https://cmu-delphi.github.io/delphi-epidata/symptom-survey/>.)
1 parent b49fe31 commit 3814899

File tree

1 file changed

+22
-20
lines changed

1 file changed

+22
-20
lines changed

scripts/signal_spreadsheet_updater.R

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ source_updated[, col] <- cadence_map[source_updated$data_source]
502502
# # Not available for all indicators. Try nation. Avoid smaller geos because
503503
# # processing later will take a while.
504504
# geo_type <- "state"
505-
#
505+
#
506506
# # Consider a range of issues. About 2 weeks is probably fine. Not all indicators
507507
# # are available in this time range, so you may need to make another range of
508508
# # dates that is years or months different.
@@ -523,8 +523,8 @@ source_updated[, col] <- cadence_map[source_updated$data_source]
523523
# "2021-02-15",
524524
# "2021-02-16"
525525
# )
526-
#
527-
#
526+
#
527+
#
528528
# epidata <- pub_covidcast(
529529
# source,
530530
# signal,
@@ -533,16 +533,16 @@ source_updated[, col] <- cadence_map[source_updated$data_source]
533533
# time_type = "day",
534534
# issues = about_2weeks_issues
535535
# )
536-
#
537-
#
536+
#
537+
#
538538
# # Make sure data is looking reasonable
539539
# # Number of reference dates reported in each issue
540540
# count(epidata, issue)
541-
#
541+
#
542542
# # Number of locations reported for each issue and reference date
543543
# count(epidata, issue, time_value)
544-
#
545-
#
544+
#
545+
#
546546
# ## Revision cadence
547547
# # For each location and reference date, are all reported values the same across
548548
# # all lags we're checking?
@@ -559,17 +559,19 @@ source_updated[, col] <- cadence_map[source_updated$data_source]
559559
# )
560560
# # Are all reference dates without any lag?
561561
# all(revision_comparison$no_backfill == "TRUE")
562+
# #all(revision_comparison[revision_comparison$no_backfill != "TRUE", ])
563+
# revision_comparison[revision_comparison$no_backfill != "TRUE", ]
562564
# View(revision_comparison)
563-
#
564-
#
565-
# ## Reporting lag
566-
# # Find how lagged the newest reported value is for each issue.
567-
# epidata_slice <- epidata %>% group_by(issue) %>% slice_min(lag)
568-
# # Find the most common min lag. We expect a relatively narrow range of lags. At
569-
# # most, a data source should be updated weekly such that it has a range of lags
570-
# # of 7 days (e.g. 5-12 days). For data updated daily, we expect a range of lags
571-
# # of only a few days (e.g. 2-4 days or even 2-3 days).
572-
# table(epidata_slice$lag)
565+
566+
567+
## Reporting lag
568+
# Find how lagged the newest reported value is for each issue.
569+
epidata_slice <- epidata %>% group_by(issue) %>% slice_min(lag)
570+
# Find the most common min lag. We expect a relatively narrow range of lags. At
571+
# most, a data source should be updated weekly such that it has a range of lags
572+
# of 7 days (e.g. 5-12 days). For data updated daily, we expect a range of lags
573+
# of only a few days (e.g. 2-4 days or even 2-3 days).
574+
table(epidata_slice$lag)
573575

574576

575577
col <- "Typical Reporting Lag"
@@ -1003,9 +1005,9 @@ dua_link <- c(
10031005
"covid-act-now" = NA_character_, #public, maybe contract for other specific project #@Carlyn
10041006
"doctor-visits" = "https://drive.google.com/drive/u/1/folders/11kvTzVR5Yd3lVszxmPHxFZcAYjIpoLcf", #"https://cmu.box.com/s/l2tz6kmiws6jyty2azwb43poiepz0565"
10051007
"dsew-cpr" = NA_character_, #public
1006-
"fb-survey" = "https://cmu.box.com/s/qfxplcdrcn9retfzx4zniyugbd9h3bos",
1008+
"fb-survey" = NA_character_, #@AlexR public aggregated by Delphi, but raw data requires DUA "https://cmu.box.com/s/qfxplcdrcn9retfzx4zniyugbd9h3bos",
10071009
"ght" = NA_character_, #public, has an API doesn't require password. No Delphi documentation. See <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4215636/>
1008-
"google-survey" = NA_character_, #@Carlyn has requested DUA from Roni
1010+
"google-survey" = NA_character_, #@Carlyn has requested DUA from Roni, waiting.
10091011
"google-symptoms" = NA_character_, #public
10101012
"hhs" = NA_character_, #public gov't
10111013
"hospital-admissions" = "https://drive.google.com/drive/u/1/folders/11kvTzVR5Yd3lVszxmPHxFZcAYjIpoLcf", #"https://cmu.box.com/s/l2tz6kmiws6jyty2azwb43poiepz0565"

0 commit comments

Comments
 (0)