Skip to content

Commit 58a1478

Browse files
committed
add extra duplication checks and check if tables exists
1 parent 4d283a6 commit 58a1478

File tree

2 files changed

+14
-14
lines changed

2 files changed

+14
-14
lines changed

src/acquisition/rvdss/pull_historic.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -491,13 +491,15 @@ def fetch_one_season_from_report(url):
491491
# The columns have the region information (i.e Pr tests, meaning this columns has the tests for the prairies)
492492

493493
if "reporting laboratory" in str(table.columns):
494+
respiratory_detection_table_exists = True
494495
respiratory_detection_table = create_detections_table(table,modified_date,current_week,current_week_end,season[0])
495496
respiratory_detection_table = respiratory_detection_table.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'])
496497
elif "number" in caption.text.lower():
497498
number_table_exists = True
498499
number_detections_table = create_number_detections_table(table,modified_date,season[0])
499500
number_detections_table = number_detections_table.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'])
500501
elif "positive" in caption.text.lower():
502+
positive_table_exists = True
501503
flu = " influenza" in caption.text.lower()
502504

503505
# tables are missing week 53
@@ -531,20 +533,21 @@ def fetch_one_season_from_report(url):
531533

532534
# Check if the indices are already in the season table
533535
# If not, add the weeks tables into the season table
534-
536+
535537
# check for deduplication pandas
536-
if not number_detections_table.index.isin(all_number_tables.index).any():
537-
all_number_tables=pd.concat([all_number_tables,number_detections_table])
538-
539-
if not respiratory_detection_table.index.isin(all_respiratory_detection_tables.index).any():
540-
all_respiratory_detection_tables= pd.concat([all_respiratory_detection_tables,respiratory_detection_table])
541-
542-
if not combined_positive_tables.index.isin(all_positive_tables.index).any():
543-
all_positive_tables=pd.concat([all_positive_tables,combined_positive_tables])
544-
538+
if respiratory_detection_table_exists:
539+
if not respiratory_detection_table.index.isin(all_respiratory_detection_tables.index).any():
540+
all_respiratory_detection_tables= pd.concat([all_respiratory_detection_tables,respiratory_detection_table])
541+
del respiratory_detection_table
542+
if positive_table_exists:
543+
if not combined_positive_tables.index.isin(all_positive_tables.index).any():
544+
all_positive_tables=pd.concat([all_positive_tables,combined_positive_tables])
545+
del combined_positive_tables
546+
del pos_table
545547
if number_table_exists:
546548
if not number_detections_table.index.isin(all_number_tables.index).any():
547549
all_number_tables=pd.concat([all_number_tables,number_detections_table])
550+
del number_detections_table
548551

549552
return {
550553
"respiratory_detection": all_respiratory_detection_tables,

src/acquisition/rvdss/run.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from delphi.epidata.acquisition.rvdss.database import respiratory_detections_cols, pct_positive_cols, detections_counts_cols, expected_table_names, expected_columns, get_num_rows, update
1515

1616
def update_current_data():
17-
1817
## Check if data for current update date has already been fetched
1918
headers = {
2019
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
@@ -27,9 +26,8 @@ def update_current_data():
2726
with open(UPDATE_DATES_FILE, 'a') as testfile:
2827
testfile.write(update_date+ "\n")
2928

30-
3129
data_dict = fetch_dashboard_data(DASHBOARD_BASE_URL)
32-
## TODO
30+
# update database
3331
update(data_dict)
3432
else:
3533
print("Data is already up to date")
@@ -65,7 +63,6 @@ def update_historical_data():
6563
update(hist_dict_list)
6664

6765

68-
6966
def main():
7067
# args and usage
7168
parser = argparse.ArgumentParser()

0 commit comments

Comments
 (0)