From e50eff828a90c5f0e91bb48c39189d678ef7e21d Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Thu, 4 Jul 2024 13:44:36 +0200 Subject: [PATCH 01/33] data gerneration graphODE without dampings --- .../surrogatemodel/GNN/data_generation_nodamp | 314 ++++++++++++++++++ 1 file changed, 314 insertions(+) create mode 100644 pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp new file mode 100644 index 0000000000..3beecebdff --- /dev/null +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp @@ -0,0 +1,314 @@ +import copy +import os +import pickle +import random +import json +from datetime import date + +import numpy as np +from progress.bar import Bar +from sklearn.preprocessing import FunctionTransformer + +from memilio.simulation import (AgeGroup, LogLevel, set_log_level) + +from memilio.simulation.osecir import (Index_InfectionState, interpolate_simulation_result, ParameterStudy, + InfectionState, Model, ModelGraph, + interpolate_simulation_result, set_edges) + +from memilio.epidata import geoModificationGermany as geoger +from memilio.epidata import transformMobilityData as tmd +from memilio.epidata import getDataIntoPandasDataFrame as gd + + +def run_secir_groups_simulation(days, populations): + """! Uses an ODE SECIR model allowing for asymptomatic infection with 6 different age groups. The model is not stratified by region. + Virus-specific parameters are fixed and initial number of persons in the particular infection states are chosen randomly from defined ranges. + @param Days Describes how many days we simulate within a single run. + @param damping_day The day when damping is applied. + @param populations List containing the population in each age group. + @return List containing the populations in each compartment used to initialize the run. + """ + set_log_level(LogLevel.Off) + + start_day = 1 + start_month = 1 + start_year = 2019 + dt = 0.1 + + # get county ids + countykey_list = geoger.get_county_ids(merge_eisenach=True, zfill=True) + + # Define age Groups + groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] + num_groups = len(groups) + num_regions = len(populations) + models = [] + + # Initialize Parameters + for region in range(num_regions): + model = Model(num_groups) + + # Set parameters + for i in range(num_groups): + # Compartment transition duration + model.parameters.TimeExposed[AgeGroup(i)] = 3.2 + model.parameters.TimeInfectedNoSymptoms[AgeGroup(i)] = 2. + model.parameters.TimeInfectedSymptoms[AgeGroup(i)] = 6. + model.parameters.TimeInfectedSevere[AgeGroup(i)] = 12. + model.parameters.TimeInfectedCritical[AgeGroup(i)] = 8. + + # Initial number of people in each compartment with random numbers + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.Exposed)] = random.uniform( + 0.00025, 0.005) * populations[region][i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedNoSymptoms)] = random.uniform( + 0.0001, 0.0035) * populations[region][i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedNoSymptomsConfirmed)] = 0 + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedSymptoms)] = random.uniform( + 0.00007, 0.001) * populations[region][i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedSymptomsConfirmed)] = 0 + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedSevere)] = random.uniform( + 0.00003, 0.0006) * populations[region][i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedCritical)] = random.uniform( + 0.00001, 0.0002) * populations[region][i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.Recovered)] = random.uniform( + 0.002, 0.08) * populations[region][i] + model.populations[AgeGroup(i), + Index_InfectionState(InfectionState.Dead)] = random.uniform( + 0, 0.0003) * populations[region][i] + model.populations.set_difference_from_group_total_AgeGroup( + (AgeGroup(i), Index_InfectionState(InfectionState.Susceptible)), populations[region][i]) + + # Compartment transition propabilities + model.parameters.RelativeTransmissionNoSymptoms[AgeGroup(i)] = 0.5 + model.parameters.TransmissionProbabilityOnContact[AgeGroup( + i)] = 0.1 + model.parameters.RecoveredPerInfectedNoSymptoms[AgeGroup(i)] = 0.09 + model.parameters.RiskOfInfectionFromSymptomatic[AgeGroup(i)] = 0.25 + model.parameters.SeverePerInfectedSymptoms[AgeGroup(i)] = 0.2 + model.parameters.CriticalPerSevere[AgeGroup(i)] = 0.25 + model.parameters.DeathsPerCritical[AgeGroup(i)] = 0.3 + # twice the value of RiskOfInfectionFromSymptomatic + model.parameters.MaxRiskOfInfectionFromSymptomatic[AgeGroup( + i)] = 0.5 + + # StartDay is the n-th day of the year + model.parameters.StartDay = ( + date(start_year, start_month, start_day) - date(start_year, 1, 1)).days + + # Load baseline and minimum contact matrix and assign them to the model + baseline = getBaselineMatrix() + #minimum = getMinimumMatrix() + + model.parameters.ContactPatterns.cont_freq_mat[0].baseline = baseline + model.parameters.ContactPatterns.cont_freq_mat[0].minimum = np.ones( + (num_groups, num_groups)) * 0 + + # Apply mathematical constraints to parameters + model.apply_constraints() + models.append(model) + + graph = ModelGraph() + for i in range(num_regions): + graph.add_node(int(countykey_list[i]), models[i]) + + # get mobility data directory + arg_dict = gd.cli("commuter_official") + + directory = arg_dict['out_folder'].split('/pydata')[0] + directory = os.path.join(directory, 'mobility/') + + # Merge Eisenach and Wartbugkreis in Input Data + tmd.updateMobility2022(directory, mobility_file='twitter_scaled_1252') + tmd.updateMobility2022(directory, mobility_file='commuter_migration_scaled') + + num_locations = 4 + + set_edges(os.path.abspath(os.path.join(directory, os.pardir)), graph, num_locations) + + study = ParameterStudy(graph, 0, days, dt=dt, num_runs=1) + study.run() + + graph_run = study.run()[0] + results = interpolate_simulation_result(graph_run) + + for result_indx in range(len(results)): + results[result_indx] = remove_confirmed_compartments( + np.transpose(results[result_indx].as_ndarray()[1:, :]), num_groups) + + # Omit first column, as the time points are not of interest here. + dataset_entry = copy.deepcopy(results) + + return dataset_entry + +def remove_confirmed_compartments(dataset_entries, num_groups): + """! The compartments which contain confirmed cases are not needed and are therefore omitted by summarizing the confirmed + compartment with the original compartment. + @param dataset_entries Array that contains the compartmental data with confirmed compartments. + @param num_groups Number of age groups. + @return Array that contains the compartmental data without confirmed compartments. + """ + new_dataset_entries = [] + for i in dataset_entries : + dataset_entries_reshaped = i.reshape([num_groups, int(np.asarray(dataset_entries).shape[1]/num_groups) ]) + sum_inf_no_symp = np.sum(dataset_entries_reshaped [:, [2, 3]], axis=1) + sum_inf_symp = np.sum(dataset_entries_reshaped [:, [4, 5]], axis=1) + dataset_entries_reshaped[:, 2] = sum_inf_no_symp + dataset_entries_reshaped[:, 4] = sum_inf_symp + new_dataset_entries.append(np.delete(dataset_entries_reshaped , [3, 5], axis=1).flatten()) + return new_dataset_entries + + +def get_population(path="data/pydata/Germany/county_population.json"): + """! loads population data + @param path Path to population file. + @return List with all 400 populations and 6 age groups. + """ + with open(path) as f: + data = json.load(f) + population = [] + for data_entry in data: + population_county = [] + population_county.append( + data_entry['<3 years'] + data_entry['3-5 years'] / 2) + population_county.append(data_entry['6-14 years']) + population_county.append( + data_entry['15-17 years'] + data_entry['18-24 years'] + + data_entry['25-29 years'] + data_entry['30-39 years'] / 2) + population_county.append( + data_entry['30-39 years'] / 2 + data_entry['40-49 years'] + + data_entry['50-64 years'] * 2 / 3) + population_county.append( + data_entry['65-74 years'] + data_entry['>74 years'] * 0.2 + + data_entry['50-64 years'] * 1 / 3) + population_county.append( + data_entry['>74 years'] * 0.8) + + population.append(population_county) + return population + +def getBaselineMatrix(): + """! loads the baselinematrix + """ + + baseline_contact_matrix0 = os.path.join( + "./data/contacts/baseline_home.txt") + baseline_contact_matrix1 = os.path.join( + "./data/contacts/baseline_school_pf_eig.txt") + baseline_contact_matrix2 = os.path.join( + "./data/contacts/baseline_work.txt") + baseline_contact_matrix3 = os.path.join( + "./data/contacts/baseline_other.txt") + + baseline = np.loadtxt(baseline_contact_matrix0) \ + + np.loadtxt(baseline_contact_matrix1) + \ + np.loadtxt(baseline_contact_matrix2) + \ + np.loadtxt(baseline_contact_matrix3) + + return baseline + +def generate_data( + num_runs, path, input_width, days, save_data=True): + """! Generate dataset by calling run_secir_simulation (num_runs)-often + @param num_runs Number of times, the function run_secir_simulation is called. + @param path Path, where the datasets are stored. + @param input_width number of time steps used for model input. + @param label_width number of time steps (days) used as model output/label. + @param save_data Option to deactivate the save of the dataset. Per default true. + """ + population = get_population() + days_sum = days+input_width-1 + + + # days = damping_days[-1]+20 + data = {"inputs": [], + "labels": [], + } + + # show progess in terminal for longer runs + # Due to the random structure, theres currently no need to shuffle the data + bar = Bar('Number of Runs done', max=num_runs) + + for _ in range(num_runs): + + data_run = run_secir_groups_simulation( + days_sum, population) + + inputs = np.asarray(data_run).transpose(1,2,0)[:input_width] + data["inputs"].append(inputs) + + data["labels"].append(np.asarray(data_run).transpose(1,2,0)[input_width:]) + + bar.next() + + bar.finish() + + if save_data: + num_groups = int(np.asarray(data['inputs']).shape[2]/8) + transformer = FunctionTransformer(np.log1p, validate=True) + + # Scale inputs + inputs = np.asarray( + data['inputs']).transpose(2,0,1,3).reshape(num_groups*8,-1) + scaled_inputs = transformer.transform(inputs) + original_shape_input = np.asarray(data['inputs']).shape + + # Step 1: Reverse the reshape + reshaped_back = scaled_inputs.reshape(original_shape_input[2], original_shape_input[0], + original_shape_input[1], original_shape_input[3]) + + # Step 2: Reverse the transpose + original_inputs = reshaped_back.transpose(1, 2, 0, 3) + scaled_inputs = original_inputs.transpose(0, 3, 1, 2) + + + # Scale labels + labels = np.asarray( + data['labels']).transpose(2,0,1,3).reshape(num_groups*8, -1) + scaled_labels = transformer.transform(labels) + original_shape_labels = np.asarray(data['labels']).shape + + # Step 1: Reverse the reshape + reshaped_back = scaled_labels.reshape(original_shape_labels[2], original_shape_labels[0], + original_shape_labels[1], original_shape_labels[3]) + + # Step 2: Reverse the transpose + original_labels = reshaped_back.transpose(1, 2, 0, 3) + scaled_labels = original_labels.transpose(0, 3, 1, 2) + + all_data = {"inputs": scaled_inputs, + "labels": scaled_labels, + } + + # check if data directory exists. If necessary create it. + if not os.path.isdir(path): + os.mkdir(path) + + # save dict to json file + with open(os.path.join(path, 'data_secir_age_groups.pickle'), 'wb') as f: + pickle.dump(all_data, f) + + +if __name__ == "__main__": + + path = os.path.dirname(os.path.realpath(__file__)) + path_data = os.path.join( + os.path.dirname( + os.path.realpath(os.path.dirname(os.path.realpath(path)))), + 'data_GNN_nodamp') + + + input_width = 5 + days = 30 + num_runs = 1000 + number_of_populations = 400 + generate_data(num_runs, path_data, input_width, + days, number_of_populations) + From 21e6df5fc7587977e7fd5bdd6c45ff01f154541e Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Tue, 9 Jul 2024 09:41:47 +0200 Subject: [PATCH 02/33] GNN data generator with dampings - graphODE --- .../GNN/data_generation_withdamp.py | 417 ++++++++++++++++++ 1 file changed, 417 insertions(+) create mode 100644 pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py new file mode 100644 index 0000000000..4056108962 --- /dev/null +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py @@ -0,0 +1,417 @@ +import copy +import os +import pickle +import random +import json +from datetime import date + +import numpy as np +from progress.bar import Bar +from sklearn.preprocessing import FunctionTransformer + +from memilio.simulation import (AgeGroup, LogLevel, set_log_level, Damping) + +from memilio.simulation.osecir import (Index_InfectionState, interpolate_simulation_result, ParameterStudy, + InfectionState, Model, ModelGraph, + interpolate_simulation_result, set_edges) + +from memilio.epidata import geoModificationGermany as geoger +from memilio.epidata import transformMobilityData as tmd +from memilio.epidata import getDataIntoPandasDataFrame as gd + + +def run_secir_groups_simulation(days, populations, num_dampings): + """! Uses an ODE SECIR model allowing for asymptomatic infection with 6 different age groups. The model is not stratified by region. + Virus-specific parameters are fixed and initial number of persons in the particular infection states are chosen randomly from defined ranges. + @param Days Describes how many days we simulate within a single run. + @param damping_day The day when damping is applied. + @param populations List containing the population in each age group. + @return List containing the populations in each compartment used to initialize the run. + """ + set_log_level(LogLevel.Off) + + start_day = 1 + start_month = 1 + start_year = 2019 + dt = 0.1 + + # get county ids + countykey_list = geoger.get_county_ids(merge_eisenach=True, zfill=True) + + # Define age Groups + groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] + num_groups = len(groups) + num_regions = len(populations) + models = [] + + # Initialize Parameters + for region in range(num_regions): + model = Model(num_groups) + + # Set parameters + for i in range(num_groups): + # Compartment transition duration + model.parameters.TimeExposed[AgeGroup(i)] = 3.2 + model.parameters.TimeInfectedNoSymptoms[AgeGroup(i)] = 2. + model.parameters.TimeInfectedSymptoms[AgeGroup(i)] = 6. + model.parameters.TimeInfectedSevere[AgeGroup(i)] = 12. + model.parameters.TimeInfectedCritical[AgeGroup(i)] = 8. + + # Initial number of people in each compartment with random numbers + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.Exposed)] = random.uniform( + 0.00025, 0.005) * populations[region][i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedNoSymptoms)] = random.uniform( + 0.0001, 0.0035) * populations[region][i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedNoSymptomsConfirmed)] = 0 + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedSymptoms)] = random.uniform( + 0.00007, 0.001) * populations[region][i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedSymptomsConfirmed)] = 0 + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedSevere)] = random.uniform( + 0.00003, 0.0006) * populations[region][i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedCritical)] = random.uniform( + 0.00001, 0.0002) * populations[region][i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.Recovered)] = random.uniform( + 0.002, 0.08) * populations[region][i] + model.populations[AgeGroup(i), + Index_InfectionState(InfectionState.Dead)] = random.uniform( + 0, 0.0003) * populations[region][i] + model.populations.set_difference_from_group_total_AgeGroup( + (AgeGroup(i), Index_InfectionState(InfectionState.Susceptible)), populations[region][i]) + + # Compartment transition propabilities + model.parameters.RelativeTransmissionNoSymptoms[AgeGroup(i)] = 0.5 + model.parameters.TransmissionProbabilityOnContact[AgeGroup( + i)] = 0.1 + model.parameters.RecoveredPerInfectedNoSymptoms[AgeGroup(i)] = 0.09 + model.parameters.RiskOfInfectionFromSymptomatic[AgeGroup(i)] = 0.25 + model.parameters.SeverePerInfectedSymptoms[AgeGroup(i)] = 0.2 + model.parameters.CriticalPerSevere[AgeGroup(i)] = 0.25 + model.parameters.DeathsPerCritical[AgeGroup(i)] = 0.3 + # twice the value of RiskOfInfectionFromSymptomatic + model.parameters.MaxRiskOfInfectionFromSymptomatic[AgeGroup( + i)] = 0.5 + + # StartDay is the n-th day of the year + model.parameters.StartDay = ( + date(start_year, start_month, start_day) - date(start_year, 1, 1)).days + + # Load baseline and minimum contact matrix and assign them to the model + baseline = getBaselineMatrix() + minimum = getMinimumMatrix() + + model.parameters.ContactPatterns.cont_freq_mat[0].baseline = baseline + model.parameters.ContactPatterns.cont_freq_mat[0].minimum = minimum + + # Generate a damping matrix and assign it to the model + damped_matrices = [] + damping_coeff = [] + for day in num_dampings: + + # generat a random damping factor + damping = np.ones((num_groups, num_groups) + ) * np.float16(random.uniform(0, 0.5)) + + # add damping to model + model.parameters.ContactPatterns.cont_freq_mat.add_damping(Damping( + coeffs=(damping), t=day, level=0, type=0)) + + damped_matrices.append(model.parameters.ContactPatterns.cont_freq_mat.get_matrix_at( + day+1)) + damping_coeff.append(damping[0][0]) + + # Apply mathematical constraints to parameters + model.apply_constraints() + models.append(model) + + graph = ModelGraph() + for i in range(num_regions): + graph.add_node(int(countykey_list[i]), models[i]) + + # get mobility data directory + arg_dict = gd.cli("commuter_official") + + directory = arg_dict['out_folder'].split('/pydata')[0] + directory = os.path.join(directory, 'mobility/') + + # Merge Eisenach and Wartbugkreis in Input Data + tmd.updateMobility2022(directory, mobility_file='twitter_scaled_1252') + tmd.updateMobility2022(directory, mobility_file='commuter_migration_scaled') + + num_locations = 4 + + set_edges(os.path.abspath(os.path.join(directory, os.pardir)), graph, num_locations) + + study = ParameterStudy(graph, 0, days, dt=dt, num_runs=1) + study.run() + + graph_run = study.run()[0] + results = interpolate_simulation_result(graph_run) + + for result_indx in range(len(results)): + results[result_indx] = remove_confirmed_compartments( + np.transpose(results[result_indx].as_ndarray()[1:, :]), num_groups) + + # Omit first column, as the time points are not of interest here. + dataset_entry = copy.deepcopy(results) + + return dataset_entry, damped_matrices, num_dampings, damping_coeff + +def remove_confirmed_compartments(dataset_entries, num_groups): + """! The compartments which contain confirmed cases are not needed and are therefore omitted by summarizing the confirmed + compartment with the original compartment. + @param dataset_entries Array that contains the compartmental data with confirmed compartments. + @param num_groups Number of age groups. + @return Array that contains the compartmental data without confirmed compartments. + """ + new_dataset_entries = [] + for i in dataset_entries : + dataset_entries_reshaped = i.reshape([num_groups, int(np.asarray(dataset_entries).shape[1]/num_groups) ]) + sum_inf_no_symp = np.sum(dataset_entries_reshaped [:, [2, 3]], axis=1) + sum_inf_symp = np.sum(dataset_entries_reshaped [:, [4, 5]], axis=1) + dataset_entries_reshaped[:, 2] = sum_inf_no_symp + dataset_entries_reshaped[:, 4] = sum_inf_symp + new_dataset_entries.append(np.delete(dataset_entries_reshaped , [3, 5], axis=1).flatten()) + return new_dataset_entries + + +def get_population(path="data/pydata/Germany/county_population.json"): + """! loads population data + @param path Path to population file. + @return List with all 400 populations and 6 age groups. + """ + with open(path) as f: + data = json.load(f) + population = [] + for data_entry in data: + population_county = [] + population_county.append( + data_entry['<3 years'] + data_entry['3-5 years'] / 2) + population_county.append(data_entry['6-14 years']) + population_county.append( + data_entry['15-17 years'] + data_entry['18-24 years'] + + data_entry['25-29 years'] + data_entry['30-39 years'] / 2) + population_county.append( + data_entry['30-39 years'] / 2 + data_entry['40-49 years'] + + data_entry['50-64 years'] * 2 / 3) + population_county.append( + data_entry['65-74 years'] + data_entry['>74 years'] * 0.2 + + data_entry['50-64 years'] * 1 / 3) + population_county.append( + data_entry['>74 years'] * 0.8) + + population.append(population_county) + return population + +def getBaselineMatrix(): + """! loads the baselinematrix + """ + + baseline_contact_matrix0 = os.path.join( + "./data/contacts/baseline_home.txt") + baseline_contact_matrix1 = os.path.join( + "./data/contacts/baseline_school_pf_eig.txt") + baseline_contact_matrix2 = os.path.join( + "./data/contacts/baseline_work.txt") + baseline_contact_matrix3 = os.path.join( + "./data/contacts/baseline_other.txt") + + baseline = np.loadtxt(baseline_contact_matrix0) \ + + np.loadtxt(baseline_contact_matrix1) + \ + np.loadtxt(baseline_contact_matrix2) + \ + np.loadtxt(baseline_contact_matrix3) + + return baseline + +def getMinimumMatrix(): + """! loads the minimum matrix + """ + + minimum_contact_matrix0 = os.path.join( + "./data/contacts/minimum_home.txt") + minimum_contact_matrix1 = os.path.join( + "./data/contacts/minimum_school_pf_eig.txt") + minimum_contact_matrix2 = os.path.join( + "./data/contacts/minimum_work.txt") + minimum_contact_matrix3 = os.path.join( + "./data/contacts/minimum_other.txt") + + minimum = np.loadtxt(minimum_contact_matrix0) \ + + np.loadtxt(minimum_contact_matrix1) + \ + np.loadtxt(minimum_contact_matrix2) + \ + np.loadtxt(minimum_contact_matrix3) + + return minimum + + +def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min_damping_day, n_runs): + """! Draw damping days while keeping a minimum distance between the damping days. This method aims to create a + uniform ditribution of drawn damping days. + @param num_of_dampings Number of dampings that have to be drawn. + @param days Number of days which are simulated (label_width). + @param min_distance The minimum number of days between two dampings. + @param min_damping_day The earliest day of the simualtion where a damping can take place. + @param n_runs Number of simulation runs. + """ + + all_dampings = [] + count_runs = 0 + count_shadow = 0 + while len(all_dampings)= number_of_dampings: + #dampings = random.sample(dampings, 5) + all_dampings.append(sorted(dampings)) + # if count_runs % 2 == 0: + + return np.asarray(all_dampings) + +def generate_data( + num_runs, path, input_width, label_width, save_data=True): + """! Generate dataset by calling run_secir_simulation (num_runs)-often + @param num_runs Number of times, the function run_secir_simulation is called. + @param path Path, where the datasets are stored. + @param input_width Number of time steps used for model input. + @param label_width Number of time steps (days) used as model output/label. + @param save_data Option to deactivate the save of the dataset. Per default true. + """ + population = get_population() + days_sum = label_width+input_width-1 + + #generate dampings + damping_days = generate_dampings_withshadowdamp(number_of_dampings = number_of_dampings, days= label_width, min_distance=7, min_damping_day=input_width, n_runs = num_runs) + + # all data including damping information + all_data = {"inputs": [], + "labels": [], + "damping_coeff": [], + "damping_day": [], + "damped_matrix": []} + + # data that needs to be scaled + data = {"inputs": [], + "labels": []} + + # show progess in terminal for longer runs + # Due to the random structure, theres currently no need to shuffle the data + bar = Bar('Number of Runs done', max=num_runs) + + for i in range(num_runs): + + data_run, damped_contact_matrix, damping_days_s, damping_factor = run_secir_groups_simulation( + days_sum, population, damping_days[i]) + + inputs = np.asarray(data_run).transpose(1,2,0)[:input_width] + data["inputs"].append(inputs) + data["labels"].append(np.asarray(data_run).transpose(1,2,0)[input_width:]) + all_data["damping_coeff"].append(damping_factor) + all_data["damping_day"].append(damping_days_s) + all_data["damped_matrix"].append(damped_contact_matrix) + + bar.next() + + bar.finish() + + if save_data: + num_groups = int(np.asarray(data['inputs']).shape[2]/8) + transformer = FunctionTransformer(np.log1p, validate=True) + + # Scale inputs + inputs = np.asarray( + data['inputs']).transpose(2,0,1,3).reshape(num_groups*8,-1) + scaled_inputs = transformer.transform(inputs) + original_shape_input = np.asarray(data['inputs']).shape + + # Step 1: Reverse the reshape + reshaped_back = scaled_inputs.reshape(original_shape_input[2], original_shape_input[0], + original_shape_input[1], original_shape_input[3]) + + # Step 2: Reverse the transpose + original_inputs = reshaped_back.transpose(1, 2, 0, 3) + scaled_inputs = original_inputs.transpose(0, 3, 1, 2) + + + # Scale labels + labels = np.asarray( + data['labels']).transpose(2,0,1,3).reshape(num_groups*8, -1) + scaled_labels = transformer.transform(labels) + original_shape_labels = np.asarray(data['labels']).shape + + # Step 1: Reverse the reshape + reshaped_back = scaled_labels.reshape(original_shape_labels[2], original_shape_labels[0], + original_shape_labels[1], original_shape_labels[3]) + + # Step 2: Reverse the transpose + original_labels = reshaped_back.transpose(1, 2, 0, 3) + scaled_labels = original_labels.transpose(0, 3, 1, 2) + + all_data = {"inputs": scaled_inputs, + "labels": scaled_labels, + } + + # check if data directory exists. If necessary create it. + if not os.path.isdir(path): + os.mkdir(path) + + # save dict to json file + with open(os.path.join(path, 'data_secir_age_groups.pickle'), 'wb') as f: + pickle.dump(all_data, f) + + +if __name__ == "__main__": + + input_width = 5 + label_width = 100 + number_of_dampings = 3 + num_runs = 4 + number_of_populations = 400 + + path = os.path.dirname(os.path.realpath(__file__)) + path_data = os.path.join( + os.path.dirname( + os.path.realpath(os.path.dirname(os.path.realpath(path)))), + 'data_GNN_with '+str(number_of_dampings)+' dampings') + + generate_data(num_runs, path_data, input_width, + label_width, number_of_populations) + From 95a9d154604907dbed0c59b0cf05f111f34ce43c Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Tue, 9 Jul 2024 10:25:00 +0200 Subject: [PATCH 03/33] add damping information to output --- .../GNN/data_generation_withdamp.py | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py index 4056108962..0bae453fee 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py @@ -330,7 +330,10 @@ def generate_data( # data that needs to be scaled data = {"inputs": [], - "labels": []} + "labels": [], + "damping_coeff": [], + "damping_day": [], + "damped_matrix": []} # show progess in terminal for longer runs # Due to the random structure, theres currently no need to shuffle the data @@ -344,9 +347,9 @@ def generate_data( inputs = np.asarray(data_run).transpose(1,2,0)[:input_width] data["inputs"].append(inputs) data["labels"].append(np.asarray(data_run).transpose(1,2,0)[input_width:]) - all_data["damping_coeff"].append(damping_factor) - all_data["damping_day"].append(damping_days_s) - all_data["damped_matrix"].append(damped_contact_matrix) + data["damping_coeff"].append(damping_factor) + data["damping_day"].append(damping_days_s) + data["damped_matrix"].append(damped_contact_matrix) bar.next() @@ -387,7 +390,9 @@ def generate_data( all_data = {"inputs": scaled_inputs, "labels": scaled_labels, - } + "damping_coeff": data['damping_coeff'], + "damping_day": data['damping_day'], + "damped_matrix": data['damped_matrix']} # check if data directory exists. If necessary create it. if not os.path.isdir(path): @@ -401,16 +406,16 @@ def generate_data( if __name__ == "__main__": input_width = 5 - label_width = 100 + label_width = 30 number_of_dampings = 3 - num_runs = 4 + num_runs = 2 number_of_populations = 400 path = os.path.dirname(os.path.realpath(__file__)) path_data = os.path.join( os.path.dirname( os.path.realpath(os.path.dirname(os.path.realpath(path)))), - 'data_GNN_with '+str(number_of_dampings)+' dampings') + 'data_GNN_with_'+str(number_of_dampings)+'_dampings') generate_data(num_runs, path_data, input_width, label_width, number_of_populations) From 300b9562bf97b6da34f1288391bd240ec18f3226 Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Wed, 7 Aug 2024 15:17:57 +0200 Subject: [PATCH 04/33] adjust to code guidelines --- .../GNN/data_generation_nodamp.py | 328 ++++++++++++++++++ 1 file changed, 328 insertions(+) create mode 100644 pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py new file mode 100644 index 0000000000..e15fccf453 --- /dev/null +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py @@ -0,0 +1,328 @@ +import copy +import os +import pickle +import random +import json +from datetime import date +import numpy as np + +from progress.bar import Bar +from sklearn.preprocessing import FunctionTransformer + +from memilio.simulation import (AgeGroup, LogLevel, set_log_level) +from memilio.simulation.osecir import (Index_InfectionState, interpolate_simulation_result, ParameterStudy, + InfectionState, Model, ModelGraph, + interpolate_simulation_result, set_edges) +from memilio.epidata import geoModificationGermany as geoger +from memilio.epidata import transformMobilityData as tmd +from memilio.epidata import getDataIntoPandasDataFrame as gd + + +def run_secir_groups_simulation(days, populations): + """! Uses an ODE SECIR model allowing for asymptomatic infection with 6 + different age groups. The model is not stratified by region. + Virus-specific parameters are fixed and initial number of persons + in the particular infection states are chosen randomly from defined ranges. + @param Days Describes how many days we simulate within a single run. + @param damping_day The day when damping is applied. + @param populations List containing the population in each age group. + @return List containing the populations in each compartment used to initialize + the run. + """ + + set_log_level(LogLevel.Off) + + start_day = 1 + start_month = 1 + start_year = 2019 + dt = 0.1 + + # get county ids + countykey_list = geoger.get_county_ids(merge_eisenach=True, zfill=True) + + # Define age Groups + groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] + num_groups = len(groups) + num_regions = len(populations) + models = [] + + # Initialize Parameters + for region in range(num_regions): + model = Model(num_groups) + + # Set parameters + for i in range(num_groups): + # Compartment transition duration + model.parameters.TimeExposed[AgeGroup(i)] = 3.2 + model.parameters.TimeInfectedNoSymptoms[AgeGroup(i)] = 2. + model.parameters.TimeInfectedSymptoms[AgeGroup(i)] = 6. + model.parameters.TimeInfectedSevere[AgeGroup(i)] = 12. + model.parameters.TimeInfectedCritical[AgeGroup(i)] = 8. + + # Initial number of people in each compartment with random numbers + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.Exposed)] = random.uniform( + 0.00025, 0.005) * populations[region][i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedNoSymptoms)] = random.uniform( + 0.0001, 0.0035) * populations[region][i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedNoSymptomsConfirmed)] = 0 + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedSymptoms)] = random.uniform( + 0.00007, 0.001) * populations[region][i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedSymptomsConfirmed)] = 0 + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedSevere)] = random.uniform( + 0.00003, 0.0006) * populations[region][i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedCritical)] = random.uniform( + 0.00001, 0.0002) * populations[region][i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.Recovered)] = random.uniform( + 0.002, 0.08) * populations[region][i] + model.populations[AgeGroup(i), + Index_InfectionState(InfectionState.Dead)] = random.uniform( + 0, 0.0003) * populations[region][i] + model.populations.set_difference_from_group_total_AgeGroup( + (AgeGroup(i), Index_InfectionState(InfectionState.Susceptible)), + populations[region][i]) + + # Compartment transition propabilities + model.parameters.RelativeTransmissionNoSymptoms[AgeGroup(i)] = 0.5 + model.parameters.TransmissionProbabilityOnContact[AgeGroup( + i)] = 0.1 + model.parameters.RecoveredPerInfectedNoSymptoms[AgeGroup(i)] = 0.09 + model.parameters.RiskOfInfectionFromSymptomatic[AgeGroup(i)] = 0.25 + model.parameters.SeverePerInfectedSymptoms[AgeGroup(i)] = 0.2 + model.parameters.CriticalPerSevere[AgeGroup(i)] = 0.25 + model.parameters.DeathsPerCritical[AgeGroup(i)] = 0.3 + # twice the value of RiskOfInfectionFromSymptomatic + model.parameters.MaxRiskOfInfectionFromSymptomatic[AgeGroup( + i)] = 0.5 + + # StartDay is the n-th day of the year + model.parameters.StartDay = ( + date(start_year, start_month, start_day) - date(start_year, 1, 1)).days + + # Load baseline and minimum contact matrix and assign them to the model + baseline = getBaselineMatrix() + #minimum = getMinimumMatrix() + + model.parameters.ContactPatterns.cont_freq_mat[0].baseline = baseline + model.parameters.ContactPatterns.cont_freq_mat[0].minimum = np.ones( + (num_groups, num_groups)) * 0 + + # Apply mathematical constraints to parameters + model.apply_constraints() + models.append(model) + + graph = ModelGraph() + for i in range(num_regions): + graph.add_node(int(countykey_list[i]), models[i]) + + # get mobility data directory + arg_dict = gd.cli("commuter_official") + + directory = arg_dict['out_folder'].split('/pydata')[0] + directory = os.path.join(directory, 'mobility/') + + # Merge Eisenach and Wartbugkreis in Input Data + tmd.updateMobility2022(directory, mobility_file='twitter_scaled_1252') + tmd.updateMobility2022(directory, mobility_file='commuter_migration_scaled') + + num_locations = 4 + + set_edges(os.path.abspath(os.path.join(directory, os.pardir)), + graph, num_locations) + + study = ParameterStudy(graph, 0, days, dt=dt, num_runs=1) + study.run() + + graph_run = study.run()[0] + results = interpolate_simulation_result(graph_run) + + for result_indx in range(len(results)): + results[result_indx] = remove_confirmed_compartments( + np.transpose(results[result_indx].as_ndarray()[1:, :]), num_groups) + + # Omit first column, as the time points are not of interest here. + dataset_entry = copy.deepcopy(results) + + return dataset_entry + +def remove_confirmed_compartments(dataset_entries, num_groups): + """! The compartments which contain confirmed cases are not needed and are + therefore omitted by summarizing the confirmed compartment with the + original compartment. + @param dataset_entries Array that contains the compartmental data with + confirmed compartments. + @param num_groups Number of age groups. + @return Array that contains the compartmental data without confirmed compartments. + """ + + new_dataset_entries = [] + for i in dataset_entries : + dataset_entries_reshaped = i.reshape( + [num_groups, int(np.asarray(dataset_entries).shape[1]/num_groups)] + ) + sum_inf_no_symp = np.sum(dataset_entries_reshaped [:, [2, 3]], axis=1) + sum_inf_symp = np.sum(dataset_entries_reshaped [:, [4, 5]], axis=1) + dataset_entries_reshaped[:, 2] = sum_inf_no_symp + dataset_entries_reshaped[:, 4] = sum_inf_symp + new_dataset_entries.append( + np.delete(dataset_entries_reshaped , [3, 5], axis=1).flatten() + ) + return new_dataset_entries + + +def get_population(path="data/pydata/Germany/county_population.json"): + """! loads population data + @param path Path to population file. + @return List with all 400 populations and 6 age groups. + """ + + with open(path) as f: + data = json.load(f) + population = [] + for data_entry in data: + population_county = [] + population_county.append( + data_entry['<3 years'] + data_entry['3-5 years'] / 2) + population_county.append(data_entry['6-14 years']) + population_county.append( + data_entry['15-17 years'] + data_entry['18-24 years'] + + data_entry['25-29 years'] + data_entry['30-39 years'] / 2) + population_county.append( + data_entry['30-39 years'] / 2 + data_entry['40-49 years'] + + data_entry['50-64 years'] * 2 / 3) + population_county.append( + data_entry['65-74 years'] + data_entry['>74 years'] * 0.2 + + data_entry['50-64 years'] * 1 / 3) + population_county.append( + data_entry['>74 years'] * 0.8) + + population.append(population_county) + return population + +def getBaselineMatrix(): + """! loads the baselinematrix + """ + + baseline_contact_matrix0 = os.path.join( + "./data/contacts/baseline_home.txt") + baseline_contact_matrix1 = os.path.join( + "./data/contacts/baseline_school_pf_eig.txt") + baseline_contact_matrix2 = os.path.join( + "./data/contacts/baseline_work.txt") + baseline_contact_matrix3 = os.path.join( + "./data/contacts/baseline_other.txt") + + baseline = np.loadtxt(baseline_contact_matrix0) \ + + np.loadtxt(baseline_contact_matrix1) + \ + np.loadtxt(baseline_contact_matrix2) + \ + np.loadtxt(baseline_contact_matrix3) + + return baseline + +def generate_data( + num_runs, path, input_width, days, save_data=True): + """! Generate dataset by calling run_secir_simulation (num_runs)-often + @param num_runs Number of times, the function run_secir_simulation is called. + @param path Path, where the datasets are stored. + @param input_width number of time steps used for model input. + @param label_width number of time steps (days) used as model output/label. + @param save_data Option to deactivate the save of the dataset. Per default true. + """ + + population = get_population() + days_sum = days + input_width - 1 + + data = {"inputs": [], + "labels": [], + } + + # show progess in terminal for longer runs + # Due to the random structure, theres currently no need to shuffle the data + bar = Bar('Number of Runs done', max=num_runs) + + for _ in range(num_runs): + + data_run = run_secir_groups_simulation( + days_sum, population) + + inputs = np.asarray(data_run).transpose(1, 2, 0)[: input_width] + data["inputs"].append(inputs) + + data["labels"].append(np.asarray(data_run).transpose(1, 2, 0)[input_width:]) + + bar.next() + + bar.finish() + + if save_data: + num_groups = int(np.asarray(data['inputs']).shape[2] / 8) + transformer = FunctionTransformer(np.log1p, validate=True) + + # Scale inputs + inputs = np.asarray( + data['inputs']).transpose(2, 0, 1, 3).reshape(num_groups * 8, -1) + scaled_inputs = transformer.transform(inputs) + original_shape_input = np.asarray(data['inputs']).shape + + # Step 1: Reverse the reshape + reshaped_back = scaled_inputs.reshape(original_shape_input[2], + original_shape_input[0], + original_shape_input[1], + original_shape_input[3]) + + # Step 2: Reverse the transpose + original_inputs = reshaped_back.transpose(1, 2, 0, 3) + scaled_inputs = original_inputs.transpose(0, 3, 1, 2) + + + # Scale labels + labels = np.asarray( + data['labels']).transpose(2, 0, 1, 3).reshape(num_groups * 8, -1) + scaled_labels = transformer.transform(labels) + original_shape_labels = np.asarray(data['labels']).shape + + # Step 1: Reverse the reshape + reshaped_back = scaled_labels.reshape(original_shape_labels[2], + original_shape_labels[0], + original_shape_labels[1], + original_shape_labels[3]) + + # Step 2: Reverse the transpose + original_labels = reshaped_back.transpose(1, 2, 0, 3) + scaled_labels = original_labels.transpose(0, 3, 1, 2) + + all_data = {"inputs": scaled_inputs, + "labels": scaled_labels, + } + + # check if data directory exists. If necessary create it. + if not os.path.isdir(path): + os.mkdir(path) + + # save dict to json file + with open(os.path.join(path, 'data_secir_age_groups.pickle'), 'wb') as f: + pickle.dump(all_data, f) + + +if __name__ == "__main__": + + path = os.path.dirname(os.path.realpath(__file__)) + path_data = os.path.join( + os.path.dirname( + os.path.realpath(os.path.dirname(os.path.realpath(path)))), + 'data_GNN_nodamp') + + input_width = 5 + days = 30 + num_runs = 1000 + number_of_populations = 400 + generate_data(num_runs, path_data, input_width, + days, number_of_populations) + From 75ce80d9d054b4bd997c9bc14ee9c0db9f346f01 Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Wed, 7 Aug 2024 15:19:55 +0200 Subject: [PATCH 05/33] delete file --- .../surrogatemodel/GNN/data_generation_nodamp | 314 ------------------ 1 file changed, 314 deletions(-) delete mode 100644 pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp deleted file mode 100644 index 3beecebdff..0000000000 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp +++ /dev/null @@ -1,314 +0,0 @@ -import copy -import os -import pickle -import random -import json -from datetime import date - -import numpy as np -from progress.bar import Bar -from sklearn.preprocessing import FunctionTransformer - -from memilio.simulation import (AgeGroup, LogLevel, set_log_level) - -from memilio.simulation.osecir import (Index_InfectionState, interpolate_simulation_result, ParameterStudy, - InfectionState, Model, ModelGraph, - interpolate_simulation_result, set_edges) - -from memilio.epidata import geoModificationGermany as geoger -from memilio.epidata import transformMobilityData as tmd -from memilio.epidata import getDataIntoPandasDataFrame as gd - - -def run_secir_groups_simulation(days, populations): - """! Uses an ODE SECIR model allowing for asymptomatic infection with 6 different age groups. The model is not stratified by region. - Virus-specific parameters are fixed and initial number of persons in the particular infection states are chosen randomly from defined ranges. - @param Days Describes how many days we simulate within a single run. - @param damping_day The day when damping is applied. - @param populations List containing the population in each age group. - @return List containing the populations in each compartment used to initialize the run. - """ - set_log_level(LogLevel.Off) - - start_day = 1 - start_month = 1 - start_year = 2019 - dt = 0.1 - - # get county ids - countykey_list = geoger.get_county_ids(merge_eisenach=True, zfill=True) - - # Define age Groups - groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] - num_groups = len(groups) - num_regions = len(populations) - models = [] - - # Initialize Parameters - for region in range(num_regions): - model = Model(num_groups) - - # Set parameters - for i in range(num_groups): - # Compartment transition duration - model.parameters.TimeExposed[AgeGroup(i)] = 3.2 - model.parameters.TimeInfectedNoSymptoms[AgeGroup(i)] = 2. - model.parameters.TimeInfectedSymptoms[AgeGroup(i)] = 6. - model.parameters.TimeInfectedSevere[AgeGroup(i)] = 12. - model.parameters.TimeInfectedCritical[AgeGroup(i)] = 8. - - # Initial number of people in each compartment with random numbers - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.Exposed)] = random.uniform( - 0.00025, 0.005) * populations[region][i] - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedNoSymptoms)] = random.uniform( - 0.0001, 0.0035) * populations[region][i] - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedNoSymptomsConfirmed)] = 0 - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedSymptoms)] = random.uniform( - 0.00007, 0.001) * populations[region][i] - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedSymptomsConfirmed)] = 0 - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedSevere)] = random.uniform( - 0.00003, 0.0006) * populations[region][i] - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedCritical)] = random.uniform( - 0.00001, 0.0002) * populations[region][i] - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.Recovered)] = random.uniform( - 0.002, 0.08) * populations[region][i] - model.populations[AgeGroup(i), - Index_InfectionState(InfectionState.Dead)] = random.uniform( - 0, 0.0003) * populations[region][i] - model.populations.set_difference_from_group_total_AgeGroup( - (AgeGroup(i), Index_InfectionState(InfectionState.Susceptible)), populations[region][i]) - - # Compartment transition propabilities - model.parameters.RelativeTransmissionNoSymptoms[AgeGroup(i)] = 0.5 - model.parameters.TransmissionProbabilityOnContact[AgeGroup( - i)] = 0.1 - model.parameters.RecoveredPerInfectedNoSymptoms[AgeGroup(i)] = 0.09 - model.parameters.RiskOfInfectionFromSymptomatic[AgeGroup(i)] = 0.25 - model.parameters.SeverePerInfectedSymptoms[AgeGroup(i)] = 0.2 - model.parameters.CriticalPerSevere[AgeGroup(i)] = 0.25 - model.parameters.DeathsPerCritical[AgeGroup(i)] = 0.3 - # twice the value of RiskOfInfectionFromSymptomatic - model.parameters.MaxRiskOfInfectionFromSymptomatic[AgeGroup( - i)] = 0.5 - - # StartDay is the n-th day of the year - model.parameters.StartDay = ( - date(start_year, start_month, start_day) - date(start_year, 1, 1)).days - - # Load baseline and minimum contact matrix and assign them to the model - baseline = getBaselineMatrix() - #minimum = getMinimumMatrix() - - model.parameters.ContactPatterns.cont_freq_mat[0].baseline = baseline - model.parameters.ContactPatterns.cont_freq_mat[0].minimum = np.ones( - (num_groups, num_groups)) * 0 - - # Apply mathematical constraints to parameters - model.apply_constraints() - models.append(model) - - graph = ModelGraph() - for i in range(num_regions): - graph.add_node(int(countykey_list[i]), models[i]) - - # get mobility data directory - arg_dict = gd.cli("commuter_official") - - directory = arg_dict['out_folder'].split('/pydata')[0] - directory = os.path.join(directory, 'mobility/') - - # Merge Eisenach and Wartbugkreis in Input Data - tmd.updateMobility2022(directory, mobility_file='twitter_scaled_1252') - tmd.updateMobility2022(directory, mobility_file='commuter_migration_scaled') - - num_locations = 4 - - set_edges(os.path.abspath(os.path.join(directory, os.pardir)), graph, num_locations) - - study = ParameterStudy(graph, 0, days, dt=dt, num_runs=1) - study.run() - - graph_run = study.run()[0] - results = interpolate_simulation_result(graph_run) - - for result_indx in range(len(results)): - results[result_indx] = remove_confirmed_compartments( - np.transpose(results[result_indx].as_ndarray()[1:, :]), num_groups) - - # Omit first column, as the time points are not of interest here. - dataset_entry = copy.deepcopy(results) - - return dataset_entry - -def remove_confirmed_compartments(dataset_entries, num_groups): - """! The compartments which contain confirmed cases are not needed and are therefore omitted by summarizing the confirmed - compartment with the original compartment. - @param dataset_entries Array that contains the compartmental data with confirmed compartments. - @param num_groups Number of age groups. - @return Array that contains the compartmental data without confirmed compartments. - """ - new_dataset_entries = [] - for i in dataset_entries : - dataset_entries_reshaped = i.reshape([num_groups, int(np.asarray(dataset_entries).shape[1]/num_groups) ]) - sum_inf_no_symp = np.sum(dataset_entries_reshaped [:, [2, 3]], axis=1) - sum_inf_symp = np.sum(dataset_entries_reshaped [:, [4, 5]], axis=1) - dataset_entries_reshaped[:, 2] = sum_inf_no_symp - dataset_entries_reshaped[:, 4] = sum_inf_symp - new_dataset_entries.append(np.delete(dataset_entries_reshaped , [3, 5], axis=1).flatten()) - return new_dataset_entries - - -def get_population(path="data/pydata/Germany/county_population.json"): - """! loads population data - @param path Path to population file. - @return List with all 400 populations and 6 age groups. - """ - with open(path) as f: - data = json.load(f) - population = [] - for data_entry in data: - population_county = [] - population_county.append( - data_entry['<3 years'] + data_entry['3-5 years'] / 2) - population_county.append(data_entry['6-14 years']) - population_county.append( - data_entry['15-17 years'] + data_entry['18-24 years'] + - data_entry['25-29 years'] + data_entry['30-39 years'] / 2) - population_county.append( - data_entry['30-39 years'] / 2 + data_entry['40-49 years'] + - data_entry['50-64 years'] * 2 / 3) - population_county.append( - data_entry['65-74 years'] + data_entry['>74 years'] * 0.2 + - data_entry['50-64 years'] * 1 / 3) - population_county.append( - data_entry['>74 years'] * 0.8) - - population.append(population_county) - return population - -def getBaselineMatrix(): - """! loads the baselinematrix - """ - - baseline_contact_matrix0 = os.path.join( - "./data/contacts/baseline_home.txt") - baseline_contact_matrix1 = os.path.join( - "./data/contacts/baseline_school_pf_eig.txt") - baseline_contact_matrix2 = os.path.join( - "./data/contacts/baseline_work.txt") - baseline_contact_matrix3 = os.path.join( - "./data/contacts/baseline_other.txt") - - baseline = np.loadtxt(baseline_contact_matrix0) \ - + np.loadtxt(baseline_contact_matrix1) + \ - np.loadtxt(baseline_contact_matrix2) + \ - np.loadtxt(baseline_contact_matrix3) - - return baseline - -def generate_data( - num_runs, path, input_width, days, save_data=True): - """! Generate dataset by calling run_secir_simulation (num_runs)-often - @param num_runs Number of times, the function run_secir_simulation is called. - @param path Path, where the datasets are stored. - @param input_width number of time steps used for model input. - @param label_width number of time steps (days) used as model output/label. - @param save_data Option to deactivate the save of the dataset. Per default true. - """ - population = get_population() - days_sum = days+input_width-1 - - - # days = damping_days[-1]+20 - data = {"inputs": [], - "labels": [], - } - - # show progess in terminal for longer runs - # Due to the random structure, theres currently no need to shuffle the data - bar = Bar('Number of Runs done', max=num_runs) - - for _ in range(num_runs): - - data_run = run_secir_groups_simulation( - days_sum, population) - - inputs = np.asarray(data_run).transpose(1,2,0)[:input_width] - data["inputs"].append(inputs) - - data["labels"].append(np.asarray(data_run).transpose(1,2,0)[input_width:]) - - bar.next() - - bar.finish() - - if save_data: - num_groups = int(np.asarray(data['inputs']).shape[2]/8) - transformer = FunctionTransformer(np.log1p, validate=True) - - # Scale inputs - inputs = np.asarray( - data['inputs']).transpose(2,0,1,3).reshape(num_groups*8,-1) - scaled_inputs = transformer.transform(inputs) - original_shape_input = np.asarray(data['inputs']).shape - - # Step 1: Reverse the reshape - reshaped_back = scaled_inputs.reshape(original_shape_input[2], original_shape_input[0], - original_shape_input[1], original_shape_input[3]) - - # Step 2: Reverse the transpose - original_inputs = reshaped_back.transpose(1, 2, 0, 3) - scaled_inputs = original_inputs.transpose(0, 3, 1, 2) - - - # Scale labels - labels = np.asarray( - data['labels']).transpose(2,0,1,3).reshape(num_groups*8, -1) - scaled_labels = transformer.transform(labels) - original_shape_labels = np.asarray(data['labels']).shape - - # Step 1: Reverse the reshape - reshaped_back = scaled_labels.reshape(original_shape_labels[2], original_shape_labels[0], - original_shape_labels[1], original_shape_labels[3]) - - # Step 2: Reverse the transpose - original_labels = reshaped_back.transpose(1, 2, 0, 3) - scaled_labels = original_labels.transpose(0, 3, 1, 2) - - all_data = {"inputs": scaled_inputs, - "labels": scaled_labels, - } - - # check if data directory exists. If necessary create it. - if not os.path.isdir(path): - os.mkdir(path) - - # save dict to json file - with open(os.path.join(path, 'data_secir_age_groups.pickle'), 'wb') as f: - pickle.dump(all_data, f) - - -if __name__ == "__main__": - - path = os.path.dirname(os.path.realpath(__file__)) - path_data = os.path.join( - os.path.dirname( - os.path.realpath(os.path.dirname(os.path.realpath(path)))), - 'data_GNN_nodamp') - - - input_width = 5 - days = 30 - num_runs = 1000 - number_of_populations = 400 - generate_data(num_runs, path_data, input_width, - days, number_of_populations) - From 10b6e923a1c71b29a2137f367bbd17aaf845f4a4 Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Wed, 7 Aug 2024 15:28:49 +0200 Subject: [PATCH 06/33] adjust to code guidelines --- .../GNN/data_generation_withdamp.py | 82 +++++++++++-------- 1 file changed, 50 insertions(+), 32 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py index 0bae453fee..18c73565bd 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py @@ -3,30 +3,30 @@ import pickle import random import json +import numpy as np from datetime import date -import numpy as np from progress.bar import Bar from sklearn.preprocessing import FunctionTransformer from memilio.simulation import (AgeGroup, LogLevel, set_log_level, Damping) - from memilio.simulation.osecir import (Index_InfectionState, interpolate_simulation_result, ParameterStudy, InfectionState, Model, ModelGraph, interpolate_simulation_result, set_edges) - from memilio.epidata import geoModificationGermany as geoger from memilio.epidata import transformMobilityData as tmd from memilio.epidata import getDataIntoPandasDataFrame as gd - def run_secir_groups_simulation(days, populations, num_dampings): - """! Uses an ODE SECIR model allowing for asymptomatic infection with 6 different age groups. The model is not stratified by region. - Virus-specific parameters are fixed and initial number of persons in the particular infection states are chosen randomly from defined ranges. + """! Uses an ODE SECIR model allowing for asymptomatic infection + with 6 different age groups. The model is not stratified by region. + Virus-specific parameters are fixed and initial number of persons + in the particular infection states are chosen randomly from defined ranges. @param Days Describes how many days we simulate within a single run. @param damping_day The day when damping is applied. @param populations List containing the population in each age group. - @return List containing the populations in each compartment used to initialize the run. + @return List containing the populations in each compartment + used to initialize the run. """ set_log_level(LogLevel.Off) @@ -121,7 +121,7 @@ def run_secir_groups_simulation(days, populations, num_dampings): # add damping to model model.parameters.ContactPatterns.cont_freq_mat.add_damping(Damping( - coeffs=(damping), t=day, level=0, type=0)) + coeffs = (damping), t=day, level=0, type=0)) damped_matrices.append(model.parameters.ContactPatterns.cont_freq_mat.get_matrix_at( day+1)) @@ -165,20 +165,28 @@ def run_secir_groups_simulation(days, populations, num_dampings): return dataset_entry, damped_matrices, num_dampings, damping_coeff def remove_confirmed_compartments(dataset_entries, num_groups): - """! The compartments which contain confirmed cases are not needed and are therefore omitted by summarizing the confirmed + """! The compartments which contain confirmed cases are not + needed and are therefore omitted by summarizing the confirmed compartment with the original compartment. - @param dataset_entries Array that contains the compartmental data with confirmed compartments. + @param dataset_entries Array that contains the compartmental data with + confirmed compartments. @param num_groups Number of age groups. - @return Array that contains the compartmental data without confirmed compartments. + @return Array that contains the compartmental data without + confirmed compartments. """ + new_dataset_entries = [] for i in dataset_entries : - dataset_entries_reshaped = i.reshape([num_groups, int(np.asarray(dataset_entries).shape[1]/num_groups) ]) + dataset_entries_reshaped = i.reshape( + [num_groups, int(np.asarray(dataset_entries).shape[1]/num_groups)] + ) sum_inf_no_symp = np.sum(dataset_entries_reshaped [:, [2, 3]], axis=1) sum_inf_symp = np.sum(dataset_entries_reshaped [:, [4, 5]], axis=1) dataset_entries_reshaped[:, 2] = sum_inf_no_symp dataset_entries_reshaped[:, 4] = sum_inf_symp - new_dataset_entries.append(np.delete(dataset_entries_reshaped , [3, 5], axis=1).flatten()) + new_dataset_entries.append(np.delete( + dataset_entries_reshaped , [3, 5], axis=1).flatten() + ) return new_dataset_entries @@ -187,6 +195,7 @@ def get_population(path="data/pydata/Germany/county_population.json"): @param path Path to population file. @return List with all 400 populations and 6 age groups. """ + with open(path) as f: data = json.load(f) population = [] @@ -252,12 +261,14 @@ def getMinimumMatrix(): def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min_damping_day, n_runs): - """! Draw damping days while keeping a minimum distance between the damping days. This method aims to create a - uniform ditribution of drawn damping days. + """! Draw damping days while keeping a minimum distance between the + damping days. This method aims to create a uniform ditribution of + drawn damping days. @param num_of_dampings Number of dampings that have to be drawn. @param days Number of days which are simulated (label_width). @param min_distance The minimum number of days between two dampings. - @param min_damping_day The earliest day of the simualtion where a damping can take place. + @param min_damping_day The earliest day of the simualtion where a damping + can take place. @param n_runs Number of simulation runs. """ @@ -266,10 +277,11 @@ def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min count_shadow = 0 while len(all_dampings) Date: Mon, 12 Aug 2024 13:44:51 +0200 Subject: [PATCH 07/33] create a make_graph function --- .../GNN/data_generation_nodamp.py | 51 ++++++++++++------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py index e15fccf453..c3d1bf4dcc 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py @@ -118,24 +118,7 @@ def run_secir_groups_simulation(days, populations): model.apply_constraints() models.append(model) - graph = ModelGraph() - for i in range(num_regions): - graph.add_node(int(countykey_list[i]), models[i]) - - # get mobility data directory - arg_dict = gd.cli("commuter_official") - - directory = arg_dict['out_folder'].split('/pydata')[0] - directory = os.path.join(directory, 'mobility/') - - # Merge Eisenach and Wartbugkreis in Input Data - tmd.updateMobility2022(directory, mobility_file='twitter_scaled_1252') - tmd.updateMobility2022(directory, mobility_file='commuter_migration_scaled') - - num_locations = 4 - - set_edges(os.path.abspath(os.path.join(directory, os.pardir)), - graph, num_locations) + graph = make_graph(num_regions, countykey_list, models) study = ParameterStudy(graph, 0, days, dt=dt, num_runs=1) study.run() @@ -152,6 +135,7 @@ def run_secir_groups_simulation(days, populations): return dataset_entry + def remove_confirmed_compartments(dataset_entries, num_groups): """! The compartments which contain confirmed cases are not needed and are therefore omitted by summarizing the confirmed compartment with the @@ -226,6 +210,35 @@ def getBaselineMatrix(): return baseline +def make_graph(num_regions, countykey_list, models): + """! + @param num_regions Number (int) of counties that should be added to the + grap-ODE model. Equals 400 for whole Germany. + @param countykey_list List of keys/IDs for each county. + @models models List of osecir Model with one model per population. + @return graph Graph-ODE model. + """ + graph = ModelGraph() + for i in range(num_regions): + graph.add_node(int(countykey_list[i]), models[i]) + + # get mobility data directory + arg_dict = gd.cli("commuter_official") + + directory = arg_dict['out_folder'].split('/pydata')[0] + directory = os.path.join(directory, 'mobility/') + + # Merge Eisenach and Wartbugkreis in Input Data + tmd.updateMobility2022(directory, mobility_file='twitter_scaled_1252') + tmd.updateMobility2022(directory, mobility_file='commuter_migration_scaled') + + num_locations = 4 + + set_edges(os.path.abspath(os.path.join(directory, os.pardir)), + graph, num_locations) + return graph + + def generate_data( num_runs, path, input_width, days, save_data=True): """! Generate dataset by calling run_secir_simulation (num_runs)-often @@ -317,7 +330,7 @@ def generate_data( path_data = os.path.join( os.path.dirname( os.path.realpath(os.path.dirname(os.path.realpath(path)))), - 'data_GNN_nodamp') + 'data_GNN_nodamp_test') input_width = 5 days = 30 From c9b26cf01a5759eabd64d5bfbeb62592f55813a9 Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Mon, 12 Aug 2024 14:22:37 +0200 Subject: [PATCH 08/33] create make graph function --- .../GNN/data_generation_withdamp.py | 53 ++++++++++++------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py index 18c73565bd..852cb0dfbf 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py @@ -131,23 +131,7 @@ def run_secir_groups_simulation(days, populations, num_dampings): model.apply_constraints() models.append(model) - graph = ModelGraph() - for i in range(num_regions): - graph.add_node(int(countykey_list[i]), models[i]) - - # get mobility data directory - arg_dict = gd.cli("commuter_official") - - directory = arg_dict['out_folder'].split('/pydata')[0] - directory = os.path.join(directory, 'mobility/') - - # Merge Eisenach and Wartbugkreis in Input Data - tmd.updateMobility2022(directory, mobility_file='twitter_scaled_1252') - tmd.updateMobility2022(directory, mobility_file='commuter_migration_scaled') - - num_locations = 4 - - set_edges(os.path.abspath(os.path.join(directory, os.pardir)), graph, num_locations) + graph = make_graph(num_regions, countykey_list, models) study = ParameterStudy(graph, 0, days, dt=dt, num_runs=1) study.run() @@ -260,6 +244,35 @@ def getMinimumMatrix(): return minimum +def make_graph(num_regions, countykey_list, models): + """! + @param num_regions Number (int) of counties that should be added to the + grap-ODE model. Equals 400 for whole Germany. + @param countykey_list List of keys/IDs for each county. + @models models List of osecir Model with one model per population. + @return graph Graph-ODE model. + """ + graph = ModelGraph() + for i in range(num_regions): + graph.add_node(int(countykey_list[i]), models[i]) + + # get mobility data directory + arg_dict = gd.cli("commuter_official") + + directory = arg_dict['out_folder'].split('/pydata')[0] + directory = os.path.join(directory, 'mobility/') + + # Merge Eisenach and Wartbugkreis in Input Data + tmd.updateMobility2022(directory, mobility_file='twitter_scaled_1252') + tmd.updateMobility2022(directory, mobility_file='commuter_migration_scaled') + + num_locations = 4 + + set_edges(os.path.abspath(os.path.join(directory, os.pardir)), + graph, num_locations) + return graph + + def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min_damping_day, n_runs): """! Draw damping days while keeping a minimum distance between the damping days. This method aims to create a uniform ditribution of @@ -425,15 +438,15 @@ def generate_data( input_width = 5 label_width = 30 - number_of_dampings = 3 + number_of_dampings = 1 num_runs = 2 - number_of_populations = 400 + number_of_populations = 20 path = os.path.dirname(os.path.realpath(__file__)) path_data = os.path.join( os.path.dirname( os.path.realpath(os.path.dirname(os.path.realpath(path)))), - 'data_GNN_with_'+str(number_of_dampings)+'_dampings') + 'data_GNN_with_'+str(number_of_dampings)+'_dampings_test') generate_data(num_runs, path_data, input_width, label_width, number_of_populations) From d97a1f7df16636947b8608d09cbec6bec7b88527 Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Wed, 14 Aug 2024 12:44:27 +0200 Subject: [PATCH 09/33] fix population number at 400 --- .../memilio/surrogatemodel/GNN/data_generation_nodamp.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py index c3d1bf4dcc..06a4a4c67b 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py @@ -323,6 +323,7 @@ def generate_data( with open(os.path.join(path, 'data_secir_age_groups.pickle'), 'wb') as f: pickle.dump(all_data, f) + return data if __name__ == "__main__": @@ -330,12 +331,12 @@ def generate_data( path_data = os.path.join( os.path.dirname( os.path.realpath(os.path.dirname(os.path.realpath(path)))), - 'data_GNN_nodamp_test') + 'data_GNN_nodamp') input_width = 5 days = 30 num_runs = 1000 - number_of_populations = 400 + generate_data(num_runs, path_data, input_width, - days, number_of_populations) + days) From e20fe2d6386995a493e4a8391585f3a613a045c1 Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Wed, 14 Aug 2024 12:50:28 +0200 Subject: [PATCH 10/33] fix number of population to 400 and rename damping information --- .../GNN/data_generation_withdamp.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py index 852cb0dfbf..5bc060c84b 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py @@ -17,7 +17,7 @@ from memilio.epidata import transformMobilityData as tmd from memilio.epidata import getDataIntoPandasDataFrame as gd -def run_secir_groups_simulation(days, populations, num_dampings): +def run_secir_groups_simulation(days, populations, dampings): """! Uses an ODE SECIR model allowing for asymptomatic infection with 6 different age groups. The model is not stratified by region. Virus-specific parameters are fixed and initial number of persons @@ -113,7 +113,7 @@ def run_secir_groups_simulation(days, populations, num_dampings): # Generate a damping matrix and assign it to the model damped_matrices = [] damping_coeff = [] - for day in num_dampings: + for day in dampings: # generat a random damping factor damping = np.ones((num_groups, num_groups) @@ -146,7 +146,7 @@ def run_secir_groups_simulation(days, populations, num_dampings): # Omit first column, as the time points are not of interest here. dataset_entry = copy.deepcopy(results) - return dataset_entry, damped_matrices, num_dampings, damping_coeff + return dataset_entry, damped_matrices, dampings, damping_coeff def remove_confirmed_compartments(dataset_entries, num_groups): """! The compartments which contain confirmed cases are not @@ -331,7 +331,7 @@ def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min return np.asarray(all_dampings) def generate_data( - num_runs, path, input_width, label_width, save_data=True): + num_runs, path, input_width, label_width, number_of_dampings, save_data=True): """! Generate dataset by calling run_secir_simulation (num_runs)-often @param num_runs Number of times, the function run_secir_simulation is called. @param path Path, where the datasets are stored. @@ -437,11 +437,9 @@ def generate_data( if __name__ == "__main__": input_width = 5 - label_width = 30 - number_of_dampings = 1 - num_runs = 2 - number_of_populations = 20 - + label_width = 100 + number_of_dampings = 2 + num_runs = 1000 path = os.path.dirname(os.path.realpath(__file__)) path_data = os.path.join( os.path.dirname( @@ -449,5 +447,5 @@ def generate_data( 'data_GNN_with_'+str(number_of_dampings)+'_dampings_test') generate_data(num_runs, path_data, input_width, - label_width, number_of_populations) + label_width, number_of_dampings) From d9110e5c6289cbdf7965a910df5bfff2502b1ff3 Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Thu, 15 Aug 2024 11:07:57 +0200 Subject: [PATCH 11/33] add "return data" for case without saving --- .../memilio/surrogatemodel/GNN/data_generation_withdamp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py index 5bc060c84b..ac7c7d4599 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py @@ -432,6 +432,7 @@ def generate_data( # save dict to json file with open(os.path.join(path, 'data_secir_age_groups.pickle'), 'wb') as f: pickle.dump(all_data, f) + return data if __name__ == "__main__": From 8823aa1fccfd6e107f46b63759a3242a38e5a50e Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Thu, 15 Aug 2024 13:50:37 +0200 Subject: [PATCH 12/33] tests for simulation run and datageneration for models with and without dampings --- .../test_surrogatemodel_GNN.py | 284 ++++++++++++++++++ 1 file changed, 284 insertions(+) create mode 100644 pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py new file mode 100644 index 0000000000..7f86a6d469 --- /dev/null +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py @@ -0,0 +1,284 @@ +############################################################################# +# Copyright (C) 2020-2023 German Aerospace Center (DLR-SC) +# +# Authors: +# +# Contact: Martin J. Kuehn +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################# +from pyfakefs import fake_filesystem_unittest + +from memilio.surrogatemodel.GNN import (data_generation_nodamp, data_generation_withdamp) +import memilio.simulation as mio +import memilio.simulation.osecir as osecir + +from unittest.mock import patch +import os +import unittest + +import numpy as np +import logging +import json +# suppress all autograph warnings from tensorflow + +#logging.getLogger("tensorflow").setLevel(logging.ERROR) +import tensorflow as tf +tf.get_logger().setLevel('ERROR') + +class TestSurrogatemodelGNN(fake_filesystem_unittest.TestCase): + + path = '/home/' + + num_groups = 6 + model = osecir.Model(num_groups) + graph = osecir.ModelGraph() + graph.add_node(0, model) + graph.add_node(1, model) + mobility_coefficients = 0.01 * np.ones(model.populations.numel()) + for i in range(num_groups): + flat_index = model.populations.get_flat_index( + osecir.MultiIndex_PopulationsArray(mio.AgeGroup(i), osecir.InfectionState.Dead)) + mobility_coefficients[flat_index] = 0 + graph.add_edge(0, 1, mobility_coefficients) + graph.add_edge(1, 0, mobility_coefficients) + + def setUp(self): + self.setUpPyfakefs() + +#### test simulation no damp #### + + @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.getBaselineMatrix', + return_value=0.6 * np.ones((6, 6))) + + # create mock graph + @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.make_graph', + return_value= graph) + + def test_simulation_run_nodamp(self, mock_baseline, mockgraph): + days_1 = 10 + days_2 = 30 + days_3 = 50 + + population = [[5256.0, 10551, 32368.5, + 43637.833333333336, 22874.066666666666, 8473.6], + [4000, 8000, 40000, + 28000, 15000, 6000]] + + simulation_1 = data_generation_nodamp.run_secir_groups_simulation( + days_1, population) + simulation_2 = data_generation_nodamp.run_secir_groups_simulation( + days_2, population) + simulation_3 = data_generation_nodamp.run_secir_groups_simulation( + days_3, population) + + # result length + self.assertEqual(len(simulation_1[0]), days_1+1) + self.assertEqual(len(simulation_2[0]), days_2+1) + self.assertEqual(len(simulation_3[0]), days_3+1) + +#### test data genertion no damp #### + + @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.getBaselineMatrix', + return_value=0.6 * np.ones((6, 6))) + + # create mock graph + @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.make_graph', + return_value= graph) + + @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.get_population', + return_value= np.random.randint(0, 700001, size=(400, 6))) + + def test_data_generation_runs_nodamp( + self, mock_population, mock_baseline, mock_graph): + + input_width_1 = 1 + input_width_2 = 5 + + label_width_1 = 1 + label_width_2 = 10 + + num_runs_1 = 1 + num_runs_2 = 2 + + # test data generation without damping + data_1 = data_generation_nodamp.generate_data( + num_runs_1, self.path, input_width_1, label_width_1, + save_data=False) + self.assertEqual(len(data_1['inputs']), num_runs_1) + self.assertEqual(len(data_1['inputs'][0]), input_width_1) + self.assertEqual(len(data_1['inputs'][0][0]), 48) + self.assertEqual(len(data_1['labels']), num_runs_1) + self.assertEqual(len(data_1['labels'][0]), label_width_1) + self.assertEqual(len(data_1['labels'][0][0]), 48) + + data_2 = data_generation_nodamp.generate_data( + num_runs_2, self.path, input_width_2, label_width_2, + save_data=False) + self.assertEqual(len(data_2['inputs']), num_runs_2) + self.assertEqual(len(data_2['inputs'][0]), input_width_2) + self.assertEqual(len(data_2['inputs'][0][0]), 48) + self.assertEqual(len(data_2['labels']), num_runs_2) + self.assertEqual(len(data_2['labels'][0]), label_width_2) + self.assertEqual(len(data_2['labels'][0][0]), 48) + + + + # @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.getBaselineMatrix', + # return_value=0.6 * np.ones((6, 6))) + + # @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.get_population', + # return_value= np.random.randint(0, 700001, size=(400, 6))) + # # create mock graph + # @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.make_graph', + # return_value= graph) + + # def test_data_generation_save_nodamp( + # self, mock_population, mock_baseline, mock_graph): + + # input_width = 2 + # label_width = 3 + # num_runs = 1 + + # data_generation_nodamp.generate_data(num_runs, self.path, input_width, + # label_width) + # self.assertEqual(len(os.listdir(self.path)), 1) + + # self.assertEqual(os.listdir(self.path), + # ['data_secir_groups.pickle']) + + + + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.getMinimumMatrix', + return_value=0 * np.ones((6, 6))) + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.getBaselineMatrix', + return_value=0.6 * np.ones((6, 6))) + # create mock graph + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.make_graph', + return_value= graph) + + def test_simulation_run_withdamp(self, mock_baseline, mock_minimum, mock_graph): + + days_1 = 10 + days_2 = 30 + days_3 = 50 + + dampings1 = [5] + dampings2 = [6, 15] + dampings3 = [8, 18, 35] + + + population = [[5256.0, 10551, 32368.5, + 43637.833333333336, 22874.066666666666, 8473.6], + [4000, 8000, 40000, + 28000, 15000, 6000]] + + dataset_entry1, damped_matrices1, num_damp1, damping_coeff1 = data_generation_withdamp.run_secir_groups_simulation( + days_1, population, dampings1) + dataset_entry2, damped_matrices2, num_damp2, damping_coeff2 = data_generation_withdamp.run_secir_groups_simulation( + days_2, population, dampings2) + dataset_entry3, damped_matrices3, num_damp3, damping_coeff3 = data_generation_withdamp.run_secir_groups_simulation( + days_3, population, dampings3) + + # result length + self.assertEqual(len(dataset_entry1[0]), days_1+1) + self.assertEqual(len(dataset_entry2[0]), days_2+1) + self.assertEqual(len(dataset_entry3[0]), days_3+1) + + + baseline = data_generation_withdamp.getBaselineMatrix() + #damping factor + self.assertEqual(damped_matrices1[0].all(), + (baseline * damping_coeff1[0]).all()) + self.assertEqual( + damped_matrices2[1].all(), + (baseline * damping_coeff2[1]).all()) + self.assertEqual( + damped_matrices3[2].all(), + (baseline * damping_coeff3[2]).all()) + + # number of dampings length + self.assertEqual(len(damping_coeff1), len(dampings1)) + self.assertEqual(len(damping_coeff2), len(dampings2)) + self.assertEqual(len(damping_coeff3), len(dampings3)) + + + + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.getMinimumMatrix', + return_value=0 * np.ones((6, 6))) + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.getBaselineMatrix', + return_value=0.6 * np.ones((6, 6))) + # create mock graph + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.make_graph', + return_value= graph) + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.get_population', + return_value= np.random.randint(0, 700001, size=(400, 6))) + + def test_data_generation_runs_withdamp( + self, mock_population, mock_baseline, mock_minimum, mock_graph): + + input_width_1 = 1 + input_width_2 = 5 + + label_width_1 = 10 + label_width_2 = 30 + + num_runs_1 = 1 + num_runs_2 = 2 + + damping1 = 1 + damping2 = 2 + + data_1 = data_generation_withdamp.generate_data( + num_runs_1, self.path, input_width_1, label_width_1, + damping1, save_data=False) + self.assertEqual(len(data_1['inputs']), num_runs_1) + self.assertEqual(len(data_1['inputs'][0]), input_width_1) + self.assertEqual(len(data_1['inputs'][0][0]), 48) + self.assertEqual(len(data_1['labels']), num_runs_1) + self.assertEqual(len(data_1['labels'][0]), label_width_1) + self.assertEqual(len(data_1['labels'][0][0]), 48) + + data_2 = data_generation_withdamp.generate_data( + num_runs_2, self.path, input_width_2, label_width_2, + damping2, save_data=False) + self.assertEqual(len(data_2['inputs']), num_runs_2) + self.assertEqual(len(data_2['inputs'][0]), input_width_2) + self.assertEqual(len(data_2['inputs'][0][0]), 48) + self.assertEqual(len(data_2['labels']), num_runs_2) + self.assertEqual(len(data_2['labels'][0]), label_width_2) + self.assertEqual(len(data_2['labels'][0][0]), 48) + + + + + + + # def test_data_generation_save_withdamp( + # self, mock_population, mock_baseline, mock_minimum): + + # input_width = 2 + # label_width = 3 + # num_runs = 1 + # dampings = 3 + + # data_generation_withdamp.generate_data(num_runs, self.path, "", input_width, + # label_width) + # self.assertEqual(len(os.listdir(self.path)), 1) + + # self.assertEqual(os.listdir(self.path), + # ['data_secir_groups.pickle']) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From 74da6d8dcd132f1bd4ded0037d571c38ed993b6b Mon Sep 17 00:00:00 2001 From: AgathaSchmidt <62878663+AgathaSchmidt@users.noreply.github.com> Date: Mon, 26 Aug 2024 09:50:59 +0200 Subject: [PATCH 13/33] Apply suggestions from code review Co-authored-by: Henrik Zunker <69154294+HenrZu@users.noreply.github.com> --- .../memilio/surrogatemodel/GNN/data_generation_nodamp.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py index 06a4a4c67b..a55fe77a0e 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py @@ -24,7 +24,6 @@ def run_secir_groups_simulation(days, populations): Virus-specific parameters are fixed and initial number of persons in the particular infection states are chosen randomly from defined ranges. @param Days Describes how many days we simulate within a single run. - @param damping_day The day when damping is applied. @param populations List containing the population in each age group. @return List containing the populations in each compartment used to initialize the run. @@ -98,7 +97,6 @@ def run_secir_groups_simulation(days, populations): model.parameters.SeverePerInfectedSymptoms[AgeGroup(i)] = 0.2 model.parameters.CriticalPerSevere[AgeGroup(i)] = 0.25 model.parameters.DeathsPerCritical[AgeGroup(i)] = 0.3 - # twice the value of RiskOfInfectionFromSymptomatic model.parameters.MaxRiskOfInfectionFromSymptomatic[AgeGroup( i)] = 0.5 @@ -108,7 +106,6 @@ def run_secir_groups_simulation(days, populations): # Load baseline and minimum contact matrix and assign them to the model baseline = getBaselineMatrix() - #minimum = getMinimumMatrix() model.parameters.ContactPatterns.cont_freq_mat[0].baseline = baseline model.parameters.ContactPatterns.cont_freq_mat[0].minimum = np.ones( From 47f99eb2e0d0f1a090b25d3f37b20d2cda8b110a Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Thu, 29 Aug 2024 13:29:31 +0200 Subject: [PATCH 14/33] apply suggestions from code review --- .../GNN/data_generation_nodamp.py | 266 ++++++------------ 1 file changed, 90 insertions(+), 176 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py index a55fe77a0e..0bbb2a6bc3 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py @@ -2,20 +2,20 @@ import os import pickle import random -import json -from datetime import date import numpy as np - +import pandas as pd + from progress.bar import Bar from sklearn.preprocessing import FunctionTransformer +from datetime import date from memilio.simulation import (AgeGroup, LogLevel, set_log_level) from memilio.simulation.osecir import (Index_InfectionState, interpolate_simulation_result, ParameterStudy, - InfectionState, Model, ModelGraph, - interpolate_simulation_result, set_edges) + InfectionState, Model, interpolate_simulation_result) from memilio.epidata import geoModificationGermany as geoger -from memilio.epidata import transformMobilityData as tmd -from memilio.epidata import getDataIntoPandasDataFrame as gd +import memilio.epidata.getPopulationData as gpd +from memilio.epidata import modifyDataframeSeries as mdfs +from .GNN_utils import getBaselineMatrix, transform_mobility_directory, make_graph, remove_confirmed_compartments def run_secir_groups_simulation(days, populations): @@ -28,7 +28,7 @@ def run_secir_groups_simulation(days, populations): @return List containing the populations in each compartment used to initialize the run. """ - + set_log_level(LogLevel.Off) start_day = 1 @@ -59,6 +59,7 @@ def run_secir_groups_simulation(days, populations): model.parameters.TimeInfectedCritical[AgeGroup(i)] = 8. # Initial number of people in each compartment with random numbers + # Numbers are chosen heuristically based on experience model.populations[AgeGroup(i), Index_InfectionState( InfectionState.Exposed)] = random.uniform( 0.00025, 0.005) * populations[region][i] @@ -82,11 +83,11 @@ def run_secir_groups_simulation(days, populations): InfectionState.Recovered)] = random.uniform( 0.002, 0.08) * populations[region][i] model.populations[AgeGroup(i), - Index_InfectionState(InfectionState.Dead)] = random.uniform( + Index_InfectionState(InfectionState.Dead)] = random.uniform( 0, 0.0003) * populations[region][i] model.populations.set_difference_from_group_total_AgeGroup( - (AgeGroup(i), Index_InfectionState(InfectionState.Susceptible)), - populations[region][i]) + (AgeGroup(i), Index_InfectionState(InfectionState.Susceptible)), + populations[region][i]) # Compartment transition propabilities model.parameters.RelativeTransmissionNoSymptoms[AgeGroup(i)] = 0.5 @@ -109,13 +110,14 @@ def run_secir_groups_simulation(days, populations): model.parameters.ContactPatterns.cont_freq_mat[0].baseline = baseline model.parameters.ContactPatterns.cont_freq_mat[0].minimum = np.ones( - (num_groups, num_groups)) * 0 + (num_groups, num_groups)) * 0 # Apply mathematical constraints to parameters model.apply_constraints() models.append(model) - graph = make_graph(num_regions, countykey_list, models) + directory = transform_mobility_directory() + graph = make_graph(directory, num_regions, countykey_list, models) study = ParameterStudy(graph, 0, days, dt=dt, num_runs=1) study.run() @@ -133,109 +135,6 @@ def run_secir_groups_simulation(days, populations): return dataset_entry -def remove_confirmed_compartments(dataset_entries, num_groups): - """! The compartments which contain confirmed cases are not needed and are - therefore omitted by summarizing the confirmed compartment with the - original compartment. - @param dataset_entries Array that contains the compartmental data with - confirmed compartments. - @param num_groups Number of age groups. - @return Array that contains the compartmental data without confirmed compartments. - """ - - new_dataset_entries = [] - for i in dataset_entries : - dataset_entries_reshaped = i.reshape( - [num_groups, int(np.asarray(dataset_entries).shape[1]/num_groups)] - ) - sum_inf_no_symp = np.sum(dataset_entries_reshaped [:, [2, 3]], axis=1) - sum_inf_symp = np.sum(dataset_entries_reshaped [:, [4, 5]], axis=1) - dataset_entries_reshaped[:, 2] = sum_inf_no_symp - dataset_entries_reshaped[:, 4] = sum_inf_symp - new_dataset_entries.append( - np.delete(dataset_entries_reshaped , [3, 5], axis=1).flatten() - ) - return new_dataset_entries - - -def get_population(path="data/pydata/Germany/county_population.json"): - """! loads population data - @param path Path to population file. - @return List with all 400 populations and 6 age groups. - """ - - with open(path) as f: - data = json.load(f) - population = [] - for data_entry in data: - population_county = [] - population_county.append( - data_entry['<3 years'] + data_entry['3-5 years'] / 2) - population_county.append(data_entry['6-14 years']) - population_county.append( - data_entry['15-17 years'] + data_entry['18-24 years'] + - data_entry['25-29 years'] + data_entry['30-39 years'] / 2) - population_county.append( - data_entry['30-39 years'] / 2 + data_entry['40-49 years'] + - data_entry['50-64 years'] * 2 / 3) - population_county.append( - data_entry['65-74 years'] + data_entry['>74 years'] * 0.2 + - data_entry['50-64 years'] * 1 / 3) - population_county.append( - data_entry['>74 years'] * 0.8) - - population.append(population_county) - return population - -def getBaselineMatrix(): - """! loads the baselinematrix - """ - - baseline_contact_matrix0 = os.path.join( - "./data/contacts/baseline_home.txt") - baseline_contact_matrix1 = os.path.join( - "./data/contacts/baseline_school_pf_eig.txt") - baseline_contact_matrix2 = os.path.join( - "./data/contacts/baseline_work.txt") - baseline_contact_matrix3 = os.path.join( - "./data/contacts/baseline_other.txt") - - baseline = np.loadtxt(baseline_contact_matrix0) \ - + np.loadtxt(baseline_contact_matrix1) + \ - np.loadtxt(baseline_contact_matrix2) + \ - np.loadtxt(baseline_contact_matrix3) - - return baseline - -def make_graph(num_regions, countykey_list, models): - """! - @param num_regions Number (int) of counties that should be added to the - grap-ODE model. Equals 400 for whole Germany. - @param countykey_list List of keys/IDs for each county. - @models models List of osecir Model with one model per population. - @return graph Graph-ODE model. - """ - graph = ModelGraph() - for i in range(num_regions): - graph.add_node(int(countykey_list[i]), models[i]) - - # get mobility data directory - arg_dict = gd.cli("commuter_official") - - directory = arg_dict['out_folder'].split('/pydata')[0] - directory = os.path.join(directory, 'mobility/') - - # Merge Eisenach and Wartbugkreis in Input Data - tmd.updateMobility2022(directory, mobility_file='twitter_scaled_1252') - tmd.updateMobility2022(directory, mobility_file='commuter_migration_scaled') - - num_locations = 4 - - set_edges(os.path.abspath(os.path.join(directory, os.pardir)), - graph, num_locations) - return graph - - def generate_data( num_runs, path, input_width, days, save_data=True): """! Generate dataset by calling run_secir_simulation (num_runs)-often @@ -245,13 +144,23 @@ def generate_data( @param label_width number of time steps (days) used as model output/label. @param save_data Option to deactivate the save of the dataset. Per default true. """ - - population = get_population() + + df_population = pd.read_json( + 'data/pydata/Germany/county_population.json') + age_groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80-130'] + + df_population_agegroups = pd.DataFrame( + columns=[df_population.columns[0]] + age_groups) + for region_id in df_population.iloc[:, 0]: + df_population_agegroups.loc[len(df_population_agegroups.index), :] = [int(region_id)] + list( + mdfs.fit_age_group_intervals(df_population[df_population.iloc[:, 0] == int(region_id)].iloc[:, 2:], age_groups)) + + population = df_population_agegroups.values.tolist days_sum = days + input_width - 1 data = {"inputs": [], - "labels": [], - } + "labels": [], + } # show progess in terminal for longer runs # Due to the random structure, theres currently no need to shuffle the data @@ -259,69 +168,75 @@ def generate_data( for _ in range(num_runs): - data_run = run_secir_groups_simulation( - days_sum, population) + data_run = run_secir_groups_simulation( + days_sum, population) - inputs = np.asarray(data_run).transpose(1, 2, 0)[: input_width] - data["inputs"].append(inputs) + inputs = np.asarray(data_run).transpose(1, 2, 0)[: input_width] + data["inputs"].append(inputs) - data["labels"].append(np.asarray(data_run).transpose(1, 2, 0)[input_width:]) + data["labels"].append(np.asarray( + data_run).transpose(1, 2, 0)[input_width:]) - bar.next() + bar.next() bar.finish() if save_data: - num_groups = int(np.asarray(data['inputs']).shape[2] / 8) - transformer = FunctionTransformer(np.log1p, validate=True) - - # Scale inputs - inputs = np.asarray( - data['inputs']).transpose(2, 0, 1, 3).reshape(num_groups * 8, -1) - scaled_inputs = transformer.transform(inputs) - original_shape_input = np.asarray(data['inputs']).shape - - # Step 1: Reverse the reshape - reshaped_back = scaled_inputs.reshape(original_shape_input[2], - original_shape_input[0], - original_shape_input[1], - original_shape_input[3]) - - # Step 2: Reverse the transpose - original_inputs = reshaped_back.transpose(1, 2, 0, 3) - scaled_inputs = original_inputs.transpose(0, 3, 1, 2) - - - # Scale labels - labels = np.asarray( - data['labels']).transpose(2, 0, 1, 3).reshape(num_groups * 8, -1) - scaled_labels = transformer.transform(labels) - original_shape_labels = np.asarray(data['labels']).shape - - # Step 1: Reverse the reshape - reshaped_back = scaled_labels.reshape(original_shape_labels[2], - original_shape_labels[0], - original_shape_labels[1], - original_shape_labels[3]) - - # Step 2: Reverse the transpose - original_labels = reshaped_back.transpose(1, 2, 0, 3) - scaled_labels = original_labels.transpose(0, 3, 1, 2) - - all_data = {"inputs": scaled_inputs, - "labels": scaled_labels, - } - - # check if data directory exists. If necessary create it. - if not os.path.isdir(path): - os.mkdir(path) - - # save dict to json file - with open(os.path.join(path, 'data_secir_age_groups.pickle'), 'wb') as f: - pickle.dump(all_data, f) + + # we use a logistic transformer to reduce the + # influence of ouliers and to correct the skewness of out dataset + # as a result we obtaina dataset which is handled better by the NN + + num_groups = int(np.asarray(data['inputs']).shape[2] / 8) + transformer = FunctionTransformer(np.log1p, validate=True) + + # Scale inputs + inputs = np.asarray( + data['inputs']).transpose(2, 0, 1, 3).reshape(num_groups * 8, -1) + scaled_inputs = transformer.transform(inputs) + original_shape_input = np.asarray(data['inputs']).shape + + # Reverse the reshape + reshaped_back = scaled_inputs.reshape(original_shape_input[2], + original_shape_input[0], + original_shape_input[1], + original_shape_input[3]) + + # Reverse the transpose + original_inputs = reshaped_back.transpose(1, 2, 0, 3) + scaled_inputs = original_inputs.transpose(0, 3, 1, 2) + + # Scale labels + labels = np.asarray( + data['labels']).transpose(2, 0, 1, 3).reshape(num_groups * 8, -1) + scaled_labels = transformer.transform(labels) + original_shape_labels = np.asarray(data['labels']).shape + + # Reverse the reshape + reshaped_back = scaled_labels.reshape(original_shape_labels[2], + original_shape_labels[0], + original_shape_labels[1], + original_shape_labels[3]) + + # Reverse the transpose + original_labels = reshaped_back.transpose(1, 2, 0, 3) + scaled_labels = original_labels.transpose(0, 3, 1, 2) + + all_data = {"inputs": scaled_inputs, + "labels": scaled_labels, + } + + # check if data directory exists. If necessary create it. + if not os.path.isdir(path): + os.mkdir(path) + + # save dict to json file + with open(os.path.join(path, 'data_secir_age_groups.pickle'), 'wb') as f: + pickle.dump(all_data, f) return data + if __name__ == "__main__": path = os.path.dirname(os.path.realpath(__file__)) @@ -329,11 +244,10 @@ def generate_data( os.path.dirname( os.path.realpath(os.path.dirname(os.path.realpath(path)))), 'data_GNN_nodamp') - + input_width = 5 days = 30 num_runs = 1000 - + generate_data(num_runs, path_data, input_width, days) - From 58fc8e87f9a5c2e5fd8427bd68e1f735391b81f3 Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Thu, 29 Aug 2024 13:30:12 +0200 Subject: [PATCH 15/33] a python file with functions frequently used for our GNNs --- .../memilio/surrogatemodel/GNN/GNN_utils.py | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py new file mode 100644 index 0000000000..d2b333bb30 --- /dev/null +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py @@ -0,0 +1,85 @@ +import numpy as np +import os + +from memilio.epidata import transformMobilityData as tmd +from memilio.epidata import getDataIntoPandasDataFrame as gd +from memilio.simulation.osecir import (ModelGraph, set_edges) + + +def remove_confirmed_compartments(dataset_entries, num_groups): + """! The compartments which contain confirmed cases are not needed and are + therefore omitted by summarizing the confirmed compartment with the + original compartment. + @param dataset_entries Array that contains the compartmental data with + confirmed compartments. + @param num_groups Number of age groups. + @return Array that contains the compartmental data without confirmed compartments. + """ + + new_dataset_entries = [] + for i in dataset_entries : + dataset_entries_reshaped = i.reshape( + [num_groups, int(np.asarray(dataset_entries).shape[1]/num_groups)] + ) + sum_inf_no_symp = np.sum(dataset_entries_reshaped [:, [2, 3]], axis=1) + sum_inf_symp = np.sum(dataset_entries_reshaped [:, [4, 5]], axis=1) + dataset_entries_reshaped[:, 2] = sum_inf_no_symp + dataset_entries_reshaped[:, 4] = sum_inf_symp + new_dataset_entries.append( + np.delete(dataset_entries_reshaped , [3, 5], axis=1).flatten() + ) + return new_dataset_entries + +def getBaselineMatrix(): + """! loads the baselinematrix + """ + + baseline_contact_matrix0 = os.path.join( + "./data/contacts/baseline_home.txt") + baseline_contact_matrix1 = os.path.join( + "./data/contacts/baseline_school_pf_eig.txt") + baseline_contact_matrix2 = os.path.join( + "./data/contacts/baseline_work.txt") + baseline_contact_matrix3 = os.path.join( + "./data/contacts/baseline_other.txt") + + baseline = np.loadtxt(baseline_contact_matrix0) \ + + np.loadtxt(baseline_contact_matrix1) + \ + np.loadtxt(baseline_contact_matrix2) + \ + np.loadtxt(baseline_contact_matrix3) + + return baseline + +def make_graph(directory, num_regions, countykey_list, models): + """! + @param directory Directory with mobility data. + @param num_regions Number (int) of counties that should be added to the + grap-ODE model. Equals 400 for whole Germany. + @param countykey_list List of keys/IDs for each county. + @models models List of osecir Model with one model per population. + @return graph Graph-ODE model. + """ + graph = ModelGraph() + for i in range(num_regions): + graph.add_node(int(countykey_list[i]), models[i]) + + num_locations = 4 + + set_edges(os.path.abspath(os.path.join(directory, os.pardir)), + graph, num_locations) + return graph + + +def transform_mobility_directory(): + """! Transforms the mobility data by merging Eisenach and Wartburgkreis + """ + # get mobility data directory + arg_dict = gd.cli("commuter_official") + + directory = arg_dict['out_folder'].split('/pydata')[0] + directory = os.path.join(directory, 'mobility/') + + # Merge Eisenach and Wartbugkreis in Input Data + tmd.updateMobility2022(directory, mobility_file='twitter_scaled_1252') + tmd.updateMobility2022(directory, mobility_file='commuter_migration_scaled') + \ No newline at end of file From ffa076e37012e0d1d3accb1746a183794df4fb49 Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Mon, 2 Sep 2024 09:48:30 +0200 Subject: [PATCH 16/33] add get population and get minimum matrix to utils --- .../memilio/surrogatemodel/GNN/GNN_utils.py | 82 ++++++++++++++----- 1 file changed, 61 insertions(+), 21 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py index d2b333bb30..97e19d71ea 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py @@ -1,9 +1,11 @@ -import numpy as np -import os +import numpy as np +import pandas as pd +import os from memilio.epidata import transformMobilityData as tmd from memilio.epidata import getDataIntoPandasDataFrame as gd from memilio.simulation.osecir import (ModelGraph, set_edges) +from memilio.epidata import modifyDataframeSeries as mdfs def remove_confirmed_compartments(dataset_entries, num_groups): @@ -15,21 +17,22 @@ def remove_confirmed_compartments(dataset_entries, num_groups): @param num_groups Number of age groups. @return Array that contains the compartmental data without confirmed compartments. """ - + new_dataset_entries = [] - for i in dataset_entries : - dataset_entries_reshaped = i.reshape( - [num_groups, int(np.asarray(dataset_entries).shape[1]/num_groups)] - ) - sum_inf_no_symp = np.sum(dataset_entries_reshaped [:, [2, 3]], axis=1) - sum_inf_symp = np.sum(dataset_entries_reshaped [:, [4, 5]], axis=1) - dataset_entries_reshaped[:, 2] = sum_inf_no_symp - dataset_entries_reshaped[:, 4] = sum_inf_symp - new_dataset_entries.append( - np.delete(dataset_entries_reshaped , [3, 5], axis=1).flatten() - ) + for i in dataset_entries: + dataset_entries_reshaped = i.reshape( + [num_groups, int(np.asarray(dataset_entries).shape[1]/num_groups)] + ) + sum_inf_no_symp = np.sum(dataset_entries_reshaped[:, [2, 3]], axis=1) + sum_inf_symp = np.sum(dataset_entries_reshaped[:, [4, 5]], axis=1) + dataset_entries_reshaped[:, 2] = sum_inf_no_symp + dataset_entries_reshaped[:, 4] = sum_inf_symp + new_dataset_entries.append( + np.delete(dataset_entries_reshaped, [3, 5], axis=1).flatten() + ) return new_dataset_entries + def getBaselineMatrix(): """! loads the baselinematrix """ @@ -50,6 +53,28 @@ def getBaselineMatrix(): return baseline + +def getMinimumMatrix(): + """! loads the minimum matrix + """ + + minimum_contact_matrix0 = os.path.join( + "./data/contacts/minimum_home.txt") + minimum_contact_matrix1 = os.path.join( + "./data/contacts/minimum_school_pf_eig.txt") + minimum_contact_matrix2 = os.path.join( + "./data/contacts/minimum_work.txt") + minimum_contact_matrix3 = os.path.join( + "./data/contacts/minimum_other.txt") + + minimum = np.loadtxt(minimum_contact_matrix0) \ + + np.loadtxt(minimum_contact_matrix1) + \ + np.loadtxt(minimum_contact_matrix2) + \ + np.loadtxt(minimum_contact_matrix3) + + return minimum + + def make_graph(directory, num_regions, countykey_list, models): """! @param directory Directory with mobility data. @@ -65,10 +90,10 @@ def make_graph(directory, num_regions, countykey_list, models): num_locations = 4 - set_edges(os.path.abspath(os.path.join(directory, os.pardir)), - graph, num_locations) + set_edges(os.path.abspath(os.path.join(directory, os.pardir)), + graph, num_locations) return graph - + def transform_mobility_directory(): """! Transforms the mobility data by merging Eisenach and Wartburgkreis @@ -77,9 +102,24 @@ def transform_mobility_directory(): arg_dict = gd.cli("commuter_official") directory = arg_dict['out_folder'].split('/pydata')[0] - directory = os.path.join(directory, 'mobility/') + directory = os.path.join(directory, 'mobility/') - # Merge Eisenach and Wartbugkreis in Input Data + # Merge Eisenach and Wartbugkreis in Input Data tmd.updateMobility2022(directory, mobility_file='twitter_scaled_1252') - tmd.updateMobility2022(directory, mobility_file='commuter_migration_scaled') - \ No newline at end of file + tmd.updateMobility2022( + directory, mobility_file='commuter_migration_scaled') + + +def get_population(): + df_population = pd.read_json( + 'data/pydata/Germany/county_population.json') + age_groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80-130'] + + df_population_agegroups = pd.DataFrame( + columns=[df_population.columns[0]] + age_groups) + for region_id in df_population.iloc[:, 0]: + df_population_agegroups.loc[len(df_population_agegroups.index), :] = [int(region_id)] + list( + mdfs.fit_age_group_intervals(df_population[df_population.iloc[:, 0] == int(region_id)].iloc[:, 2:], age_groups)) + + population = df_population_agegroups.values.tolist + return population From 0cbda2b0d2f7b492205a497bcd72db520e480e75 Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Mon, 2 Sep 2024 09:48:59 +0200 Subject: [PATCH 17/33] remove get population function from this file and import it from utils --- .../surrogatemodel/GNN/data_generation_nodamp.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py index 0bbb2a6bc3..dc78c1ccc6 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py @@ -14,8 +14,8 @@ InfectionState, Model, interpolate_simulation_result) from memilio.epidata import geoModificationGermany as geoger import memilio.epidata.getPopulationData as gpd -from memilio.epidata import modifyDataframeSeries as mdfs -from .GNN_utils import getBaselineMatrix, transform_mobility_directory, make_graph, remove_confirmed_compartments +from .GNN_utils import (getBaselineMatrix, transform_mobility_directory, + make_graph, remove_confirmed_compartments, get_population) def run_secir_groups_simulation(days, populations): @@ -145,17 +145,7 @@ def generate_data( @param save_data Option to deactivate the save of the dataset. Per default true. """ - df_population = pd.read_json( - 'data/pydata/Germany/county_population.json') - age_groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80-130'] - - df_population_agegroups = pd.DataFrame( - columns=[df_population.columns[0]] + age_groups) - for region_id in df_population.iloc[:, 0]: - df_population_agegroups.loc[len(df_population_agegroups.index), :] = [int(region_id)] + list( - mdfs.fit_age_group_intervals(df_population[df_population.iloc[:, 0] == int(region_id)].iloc[:, 2:], age_groups)) - - population = df_population_agegroups.values.tolist + population = get_population() days_sum = days + input_width - 1 data = {"inputs": [], From e8fb58036b1f133fb95a45bb94e7b15920c51ddc Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Mon, 2 Sep 2024 09:49:27 +0200 Subject: [PATCH 18/33] remove functons and import them from utlis --- .../GNN/data_generation_withdamp.py | 335 ++++++------------ 1 file changed, 109 insertions(+), 226 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py index ac7c7d4599..d882194713 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py @@ -5,17 +5,19 @@ import json import numpy as np from datetime import date - + from progress.bar import Bar from sklearn.preprocessing import FunctionTransformer from memilio.simulation import (AgeGroup, LogLevel, set_log_level, Damping) from memilio.simulation.osecir import (Index_InfectionState, interpolate_simulation_result, ParameterStudy, - InfectionState, Model, ModelGraph, - interpolate_simulation_result, set_edges) + InfectionState, Model, + interpolate_simulation_result) from memilio.epidata import geoModificationGermany as geoger -from memilio.epidata import transformMobilityData as tmd -from memilio.epidata import getDataIntoPandasDataFrame as gd + +from .GNN_utils import (getBaselineMatrix, make_graph, + remove_confirmed_compartments, getMinimumMatrix, get_population) + def run_secir_groups_simulation(days, populations, dampings): """! Uses an ODE SECIR model allowing for asymptomatic infection @@ -81,7 +83,7 @@ def run_secir_groups_simulation(days, populations, dampings): InfectionState.Recovered)] = random.uniform( 0.002, 0.08) * populations[region][i] model.populations[AgeGroup(i), - Index_InfectionState(InfectionState.Dead)] = random.uniform( + Index_InfectionState(InfectionState.Dead)] = random.uniform( 0, 0.0003) * populations[region][i] model.populations.set_difference_from_group_total_AgeGroup( (AgeGroup(i), Index_InfectionState(InfectionState.Susceptible)), populations[region][i]) @@ -117,11 +119,11 @@ def run_secir_groups_simulation(days, populations, dampings): # generat a random damping factor damping = np.ones((num_groups, num_groups) - ) * np.float16(random.uniform(0, 0.5)) - + ) * np.float16(random.uniform(0, 0.5)) + # add damping to model model.parameters.ContactPatterns.cont_freq_mat.add_damping(Damping( - coeffs = (damping), t=day, level=0, type=0)) + coeffs=(damping), t=day, level=0, type=0)) damped_matrices.append(model.parameters.ContactPatterns.cont_freq_mat.get_matrix_at( day+1)) @@ -148,130 +150,6 @@ def run_secir_groups_simulation(days, populations, dampings): return dataset_entry, damped_matrices, dampings, damping_coeff -def remove_confirmed_compartments(dataset_entries, num_groups): - """! The compartments which contain confirmed cases are not - needed and are therefore omitted by summarizing the confirmed - compartment with the original compartment. - @param dataset_entries Array that contains the compartmental data with - confirmed compartments. - @param num_groups Number of age groups. - @return Array that contains the compartmental data without - confirmed compartments. - """ - - new_dataset_entries = [] - for i in dataset_entries : - dataset_entries_reshaped = i.reshape( - [num_groups, int(np.asarray(dataset_entries).shape[1]/num_groups)] - ) - sum_inf_no_symp = np.sum(dataset_entries_reshaped [:, [2, 3]], axis=1) - sum_inf_symp = np.sum(dataset_entries_reshaped [:, [4, 5]], axis=1) - dataset_entries_reshaped[:, 2] = sum_inf_no_symp - dataset_entries_reshaped[:, 4] = sum_inf_symp - new_dataset_entries.append(np.delete( - dataset_entries_reshaped , [3, 5], axis=1).flatten() - ) - return new_dataset_entries - - -def get_population(path="data/pydata/Germany/county_population.json"): - """! loads population data - @param path Path to population file. - @return List with all 400 populations and 6 age groups. - """ - - with open(path) as f: - data = json.load(f) - population = [] - for data_entry in data: - population_county = [] - population_county.append( - data_entry['<3 years'] + data_entry['3-5 years'] / 2) - population_county.append(data_entry['6-14 years']) - population_county.append( - data_entry['15-17 years'] + data_entry['18-24 years'] + - data_entry['25-29 years'] + data_entry['30-39 years'] / 2) - population_county.append( - data_entry['30-39 years'] / 2 + data_entry['40-49 years'] + - data_entry['50-64 years'] * 2 / 3) - population_county.append( - data_entry['65-74 years'] + data_entry['>74 years'] * 0.2 + - data_entry['50-64 years'] * 1 / 3) - population_county.append( - data_entry['>74 years'] * 0.8) - - population.append(population_county) - return population - -def getBaselineMatrix(): - """! loads the baselinematrix - """ - - baseline_contact_matrix0 = os.path.join( - "./data/contacts/baseline_home.txt") - baseline_contact_matrix1 = os.path.join( - "./data/contacts/baseline_school_pf_eig.txt") - baseline_contact_matrix2 = os.path.join( - "./data/contacts/baseline_work.txt") - baseline_contact_matrix3 = os.path.join( - "./data/contacts/baseline_other.txt") - - baseline = np.loadtxt(baseline_contact_matrix0) \ - + np.loadtxt(baseline_contact_matrix1) + \ - np.loadtxt(baseline_contact_matrix2) + \ - np.loadtxt(baseline_contact_matrix3) - - return baseline - -def getMinimumMatrix(): - """! loads the minimum matrix - """ - - minimum_contact_matrix0 = os.path.join( - "./data/contacts/minimum_home.txt") - minimum_contact_matrix1 = os.path.join( - "./data/contacts/minimum_school_pf_eig.txt") - minimum_contact_matrix2 = os.path.join( - "./data/contacts/minimum_work.txt") - minimum_contact_matrix3 = os.path.join( - "./data/contacts/minimum_other.txt") - - minimum = np.loadtxt(minimum_contact_matrix0) \ - + np.loadtxt(minimum_contact_matrix1) + \ - np.loadtxt(minimum_contact_matrix2) + \ - np.loadtxt(minimum_contact_matrix3) - - return minimum - - -def make_graph(num_regions, countykey_list, models): - """! - @param num_regions Number (int) of counties that should be added to the - grap-ODE model. Equals 400 for whole Germany. - @param countykey_list List of keys/IDs for each county. - @models models List of osecir Model with one model per population. - @return graph Graph-ODE model. - """ - graph = ModelGraph() - for i in range(num_regions): - graph.add_node(int(countykey_list[i]), models[i]) - - # get mobility data directory - arg_dict = gd.cli("commuter_official") - - directory = arg_dict['out_folder'].split('/pydata')[0] - directory = os.path.join(directory, 'mobility/') - - # Merge Eisenach and Wartbugkreis in Input Data - tmd.updateMobility2022(directory, mobility_file='twitter_scaled_1252') - tmd.updateMobility2022(directory, mobility_file='commuter_migration_scaled') - - num_locations = 4 - - set_edges(os.path.abspath(os.path.join(directory, os.pardir)), - graph, num_locations) - return graph - def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min_damping_day, n_runs): """! Draw damping days while keeping a minimum distance between the @@ -286,9 +164,9 @@ def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min """ all_dampings = [] - count_runs = 0 + count_runs = 0 count_shadow = 0 - while len(all_dampings)= number_of_dampings: - #dampings = random.sample(dampings, 5) + # dampings = random.sample(dampings, 5) all_dampings.append(sorted(dampings)) # if count_runs % 2 == 0: - + return np.asarray(all_dampings) + def generate_data( num_runs, path, input_width, label_width, number_of_dampings, save_data=True): """! Generate dataset by calling run_secir_simulation (num_runs)-often @@ -342,96 +226,96 @@ def generate_data( population = get_population() days_sum = label_width+input_width-1 - #generate dampings + # generate dampings damping_days = generate_dampings_withshadowdamp( - number_of_dampings = number_of_dampings, days = label_width, - min_distance = 7, min_damping_day = input_width, n_runs = num_runs - ) - + number_of_dampings=number_of_dampings, days=label_width, + min_distance=7, min_damping_day=input_width, n_runs=num_runs + ) + # all data including damping information all_data = {"inputs": [], - "labels": [], - "damping_coeff": [], - "damping_day": [], - "damped_matrix": []} + "labels": [], + "damping_coeff": [], + "damping_day": [], + "damped_matrix": []} - # data that needs to be scaled + # data that needs to be scaled data = {"inputs": [], "labels": [], "damping_coeff": [], - "damping_day": [], + "damping_day": [], "damped_matrix": []} - + # show progess in terminal for longer runs # Due to the random structure, theres currently no need to shuffle the data - bar = Bar('Number of Runs done', max = num_runs) + bar = Bar('Number of Runs done', max=num_runs) for i in range(num_runs): - - data_run, damped_contact_matrix, damping_days_s, damping_factor = run_secir_groups_simulation( - days_sum, population, damping_days[i]) - inputs = np.asarray(data_run).transpose(1, 2, 0)[:input_width] - data["inputs"].append(inputs) - data["labels"].append(np.asarray(data_run).transpose(1, 2, 0)[input_width:]) - data["damping_coeff"].append(damping_factor) - data["damping_day"].append(damping_days_s) - data["damped_matrix"].append(damped_contact_matrix) + data_run, damped_contact_matrix, damping_days_s, damping_factor = run_secir_groups_simulation( + days_sum, population, damping_days[i]) - bar.next() + inputs = np.asarray(data_run).transpose(1, 2, 0)[:input_width] + data["inputs"].append(inputs) + data["labels"].append(np.asarray( + data_run).transpose(1, 2, 0)[input_width:]) + data["damping_coeff"].append(damping_factor) + data["damping_day"].append(damping_days_s) + data["damped_matrix"].append(damped_contact_matrix) + + bar.next() bar.finish() if save_data: - num_groups = int(np.asarray(data['inputs']).shape[2]/8) - transformer = FunctionTransformer(np.log1p, validate=True) - - # Scale inputs - inputs = np.asarray( - data['inputs']).transpose(2, 0, 1, 3).reshape(num_groups*8, -1) - scaled_inputs = transformer.transform(inputs) - original_shape_input = np.asarray(data['inputs']).shape - - # Step 1: Reverse the reshape - reshaped_back = scaled_inputs.reshape( - original_shape_input[2], original_shape_input[0], - original_shape_input[1], original_shape_input[3] - ) - - # Step 2: Reverse the transpose - original_inputs = reshaped_back.transpose(1, 2, 0, 3) - scaled_inputs = original_inputs.transpose(0, 3, 1, 2) - - - # Scale labels - labels = np.asarray( - data['labels']).transpose(2, 0, 1, 3).reshape(num_groups*8, -1) - scaled_labels = transformer.transform(labels) - original_shape_labels = np.asarray(data['labels']).shape - - # Step 1: Reverse the reshape - reshaped_back = scaled_labels.reshape( - original_shape_labels[2], original_shape_labels[0], - original_shape_labels[1], original_shape_labels[3] - ) - - # Step 2: Reverse the transpose - original_labels = reshaped_back.transpose(1, 2, 0, 3) - scaled_labels = original_labels.transpose(0, 3, 1, 2) - - all_data = {"inputs": scaled_inputs, - "labels": scaled_labels, - "damping_coeff": data['damping_coeff'], - "damping_day": data['damping_day'], - "damped_matrix": data['damped_matrix']} - - # check if data directory exists. If necessary create it. - if not os.path.isdir(path): - os.mkdir(path) - - # save dict to json file - with open(os.path.join(path, 'data_secir_age_groups.pickle'), 'wb') as f: - pickle.dump(all_data, f) + num_groups = int(np.asarray(data['inputs']).shape[2]/8) + transformer = FunctionTransformer(np.log1p, validate=True) + + # Scale inputs + inputs = np.asarray( + data['inputs']).transpose(2, 0, 1, 3).reshape(num_groups*8, -1) + scaled_inputs = transformer.transform(inputs) + original_shape_input = np.asarray(data['inputs']).shape + + # Reverse the reshape of inputs + reshaped_back = scaled_inputs.reshape( + original_shape_input[2], original_shape_input[0], + original_shape_input[1], original_shape_input[3] + ) + + # Reverse the transpose of inputs + original_inputs = reshaped_back.transpose(1, 2, 0, 3) + scaled_inputs = original_inputs.transpose(0, 3, 1, 2) + + # Scale labels + labels = np.asarray( + data['labels']).transpose(2, 0, 1, 3).reshape(num_groups*8, -1) + scaled_labels = transformer.transform(labels) + original_shape_labels = np.asarray(data['labels']).shape + + # Reverse the reshape of labels + reshaped_back = scaled_labels.reshape( + original_shape_labels[2], original_shape_labels[0], + original_shape_labels[1], original_shape_labels[3] + ) + + # Reverse the transpose of labels + original_labels = reshaped_back.transpose(1, 2, 0, 3) + scaled_labels = original_labels.transpose(0, 3, 1, 2) + + all_data = {"inputs": scaled_inputs, + "labels": scaled_labels, + "damping_coeff": data['damping_coeff'], + "damping_day": data['damping_day'], + "damped_matrix": data['damped_matrix']} + + # check if data directory exists. If necessary create it. + if not os.path.isdir(path): + os.mkdir(path) + + # save dict to json file + with open(os.path.join(path, 'data_secir_age_groups.pickle'), 'wb') as f: + pickle.dump(all_data, f) return data @@ -439,7 +323,7 @@ def generate_data( input_width = 5 label_width = 100 - number_of_dampings = 2 + number_of_dampings = 3 num_runs = 1000 path = os.path.dirname(os.path.realpath(__file__)) path_data = os.path.join( @@ -449,4 +333,3 @@ def generate_data( generate_data(num_runs, path_data, input_width, label_width, number_of_dampings) - From de35ca64efa4ab73b520ddf3d45c7f6995e0c9ab Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Mon, 2 Sep 2024 10:06:16 +0200 Subject: [PATCH 19/33] add comments as proposed by review --- .../GNN/data_generation_withdamp.py | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py index d882194713..9d36331859 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py @@ -2,7 +2,6 @@ import os import pickle import random -import json import numpy as np from datetime import date @@ -24,8 +23,8 @@ def run_secir_groups_simulation(days, populations, dampings): with 6 different age groups. The model is not stratified by region. Virus-specific parameters are fixed and initial number of persons in the particular infection states are chosen randomly from defined ranges. - @param Days Describes how many days we simulate within a single run. - @param damping_day The day when damping is applied. + @param days (int) Describes how many days we simulate within a single run. + @param damping_day (int) The day when damping is applied. @param populations List containing the population in each age group. @return List containing the populations in each compartment used to initialize the run. @@ -60,6 +59,7 @@ def run_secir_groups_simulation(days, populations, dampings): model.parameters.TimeInfectedCritical[AgeGroup(i)] = 8. # Initial number of people in each compartment with random numbers + # Numbers are chosen heuristically based on experience model.populations[AgeGroup(i), Index_InfectionState( InfectionState.Exposed)] = random.uniform( 0.00025, 0.005) * populations[region][i] @@ -155,12 +155,12 @@ def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min """! Draw damping days while keeping a minimum distance between the damping days. This method aims to create a uniform ditribution of drawn damping days. - @param num_of_dampings Number of dampings that have to be drawn. - @param days Number of days which are simulated (label_width). - @param min_distance The minimum number of days between two dampings. - @param min_damping_day The earliest day of the simualtion where a damping + @param num_of_dampings (int) Number of dampings that have to be drawn. + @param days (int) Number of days which are simulated (label_width). + @param min_distance (int) The minimum number of days between two dampings. + @param min_damping_day (int) The earliest day of the simualtion where a damping can take place. - @param n_runs Number of simulation runs. + @param n_runs 8int) Number of simulation runs. """ all_dampings = [] @@ -207,9 +207,7 @@ def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min count_shadow += 1 # select first or last five dampings if len(dampings) >= number_of_dampings: - # dampings = random.sample(dampings, 5) all_dampings.append(sorted(dampings)) - # if count_runs % 2 == 0: return np.asarray(all_dampings) @@ -221,6 +219,7 @@ def generate_data( @param path Path, where the datasets are stored. @param input_width Number of time steps used for model input. @param label_width Number of time steps (days) used as model output/label. + @param number_of_dampings (int) The number of contact change points applied to the simulation. @param save_data Option to deactivate the save of the dataset. Per default true. """ population = get_population() @@ -268,6 +267,11 @@ def generate_data( bar.finish() if save_data: + + # we use a logistic transformer to reduce the + # influence of ouliers and to correct the skewness of out dataset + # as a result we obtaina dataset which is handled better by the NN + num_groups = int(np.asarray(data['inputs']).shape[2]/8) transformer = FunctionTransformer(np.log1p, validate=True) From 3d5a644587f27a8b10184e49367abda1848aeef6 Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Mon, 16 Sep 2024 14:29:24 +0200 Subject: [PATCH 20/33] pre commit --- .../test_surrogatemodel_GNN.py | 140 ++++++++---------- 1 file changed, 59 insertions(+), 81 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py index 7f86a6d469..0f67fdb022 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py @@ -19,10 +19,11 @@ ############################################################################# from pyfakefs import fake_filesystem_unittest -from memilio.surrogatemodel.GNN import (data_generation_nodamp, data_generation_withdamp) +from memilio.surrogatemodel.GNN import ( + data_generation_nodamp, data_generation_withdamp) import memilio.simulation as mio import memilio.simulation.osecir as osecir - + from unittest.mock import patch import os import unittest @@ -32,14 +33,15 @@ import json # suppress all autograph warnings from tensorflow -#logging.getLogger("tensorflow").setLevel(logging.ERROR) +# logging.getLogger("tensorflow").setLevel(logging.ERROR) import tensorflow as tf tf.get_logger().setLevel('ERROR') + class TestSurrogatemodelGNN(fake_filesystem_unittest.TestCase): path = '/home/' - + num_groups = 6 model = osecir.Model(num_groups) graph = osecir.ModelGraph() @@ -55,16 +57,14 @@ class TestSurrogatemodelGNN(fake_filesystem_unittest.TestCase): def setUp(self): self.setUpPyfakefs() - + #### test simulation no damp #### @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.getBaselineMatrix', return_value=0.6 * np.ones((6, 6))) - - # create mock graph - @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.make_graph', - return_value= graph) - + # create mock graph + @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.make_graph', + return_value=graph) def test_simulation_run_nodamp(self, mock_baseline, mockgraph): days_1 = 10 days_2 = 30 @@ -91,14 +91,11 @@ def test_simulation_run_nodamp(self, mock_baseline, mockgraph): @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.getBaselineMatrix', return_value=0.6 * np.ones((6, 6))) - - # create mock graph - @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.make_graph', - return_value= graph) - + # create mock graph + @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.make_graph', + return_value=graph) @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.get_population', - return_value= np.random.randint(0, 700001, size=(400, 6))) - + return_value=np.random.randint(0, 700001, size=(400, 6))) def test_data_generation_runs_nodamp( self, mock_population, mock_baseline, mock_graph): @@ -132,17 +129,15 @@ def test_data_generation_runs_nodamp( self.assertEqual(len(data_2['labels'][0]), label_width_2) self.assertEqual(len(data_2['labels'][0][0]), 48) - - # @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.getBaselineMatrix', # return_value=0.6 * np.ones((6, 6))) - + # @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.get_population', # return_value= np.random.randint(0, 700001, size=(400, 6))) - # # create mock graph - # @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.make_graph', + # # create mock graph + # @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.make_graph', # return_value= graph) - + # def test_data_generation_save_nodamp( # self, mock_population, mock_baseline, mock_graph): @@ -155,29 +150,25 @@ def test_data_generation_runs_nodamp( # self.assertEqual(len(os.listdir(self.path)), 1) # self.assertEqual(os.listdir(self.path), - # ['data_secir_groups.pickle']) - + # ['data_secir_groups.pickle']) - @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.getMinimumMatrix', return_value=0 * np.ones((6, 6))) @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.getBaselineMatrix', return_value=0.6 * np.ones((6, 6))) - # create mock graph - @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.make_graph', - return_value= graph) - + # create mock graph + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.make_graph', + return_value=graph) def test_simulation_run_withdamp(self, mock_baseline, mock_minimum, mock_graph): - + days_1 = 10 days_2 = 30 days_3 = 50 dampings1 = [5] - dampings2 = [6, 15] + dampings2 = [6, 15] dampings3 = [8, 18, 35] - population = [[5256.0, 10551, 32368.5, 43637.833333333336, 22874.066666666666, 8473.6], [4000, 8000, 40000, @@ -195,75 +186,66 @@ def test_simulation_run_withdamp(self, mock_baseline, mock_minimum, mock_graph): self.assertEqual(len(dataset_entry2[0]), days_2+1) self.assertEqual(len(dataset_entry3[0]), days_3+1) - baseline = data_generation_withdamp.getBaselineMatrix() - #damping factor + # damping factor self.assertEqual(damped_matrices1[0].all(), - (baseline * damping_coeff1[0]).all()) + (baseline * damping_coeff1[0]).all()) self.assertEqual( damped_matrices2[1].all(), (baseline * damping_coeff2[1]).all()) self.assertEqual( damped_matrices3[2].all(), (baseline * damping_coeff3[2]).all()) - + # number of dampings length self.assertEqual(len(damping_coeff1), len(dampings1)) self.assertEqual(len(damping_coeff2), len(dampings2)) self.assertEqual(len(damping_coeff3), len(dampings3)) - - @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.getMinimumMatrix', return_value=0 * np.ones((6, 6))) @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.getBaselineMatrix', return_value=0.6 * np.ones((6, 6))) - # create mock graph - @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.make_graph', - return_value= graph) + # create mock graph + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.make_graph', + return_value=graph) @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.get_population', - return_value= np.random.randint(0, 700001, size=(400, 6))) - + return_value=np.random.randint(0, 700001, size=(400, 6))) def test_data_generation_runs_withdamp( - self, mock_population, mock_baseline, mock_minimum, mock_graph): - - input_width_1 = 1 - input_width_2 = 5 + self, mock_population, mock_baseline, mock_minimum, mock_graph): - label_width_1 = 10 - label_width_2 = 30 - - num_runs_1 = 1 - num_runs_2 = 2 - - damping1 = 1 - damping2 = 2 - - data_1 = data_generation_withdamp.generate_data( - num_runs_1, self.path, input_width_1, label_width_1, - damping1, save_data=False) - self.assertEqual(len(data_1['inputs']), num_runs_1) - self.assertEqual(len(data_1['inputs'][0]), input_width_1) - self.assertEqual(len(data_1['inputs'][0][0]), 48) - self.assertEqual(len(data_1['labels']), num_runs_1) - self.assertEqual(len(data_1['labels'][0]), label_width_1) - self.assertEqual(len(data_1['labels'][0][0]), 48) + input_width_1 = 1 + input_width_2 = 5 - data_2 = data_generation_withdamp.generate_data( - num_runs_2, self.path, input_width_2, label_width_2, - damping2, save_data=False) - self.assertEqual(len(data_2['inputs']), num_runs_2) - self.assertEqual(len(data_2['inputs'][0]), input_width_2) - self.assertEqual(len(data_2['inputs'][0][0]), 48) - self.assertEqual(len(data_2['labels']), num_runs_2) - self.assertEqual(len(data_2['labels'][0]), label_width_2) - self.assertEqual(len(data_2['labels'][0][0]), 48) + label_width_1 = 10 + label_width_2 = 30 + num_runs_1 = 1 + num_runs_2 = 2 + damping1 = 1 + damping2 = 2 + data_1 = data_generation_withdamp.generate_data( + num_runs_1, self.path, input_width_1, label_width_1, + damping1, save_data=False) + self.assertEqual(len(data_1['inputs']), num_runs_1) + self.assertEqual(len(data_1['inputs'][0]), input_width_1) + self.assertEqual(len(data_1['inputs'][0][0]), 48) + self.assertEqual(len(data_1['labels']), num_runs_1) + self.assertEqual(len(data_1['labels'][0]), label_width_1) + self.assertEqual(len(data_1['labels'][0][0]), 48) + data_2 = data_generation_withdamp.generate_data( + num_runs_2, self.path, input_width_2, label_width_2, + damping2, save_data=False) + self.assertEqual(len(data_2['inputs']), num_runs_2) + self.assertEqual(len(data_2['inputs'][0]), input_width_2) + self.assertEqual(len(data_2['inputs'][0][0]), 48) + self.assertEqual(len(data_2['labels']), num_runs_2) + self.assertEqual(len(data_2['labels'][0]), label_width_2) + self.assertEqual(len(data_2['labels'][0][0]), 48) - # def test_data_generation_save_withdamp( # self, mock_population, mock_baseline, mock_minimum): @@ -271,14 +253,10 @@ def test_data_generation_runs_withdamp( # label_width = 3 # num_runs = 1 # dampings = 3 - # data_generation_withdamp.generate_data(num_runs, self.path, "", input_width, # label_width) # self.assertEqual(len(os.listdir(self.path)), 1) - # self.assertEqual(os.listdir(self.path), # ['data_secir_groups.pickle']) - - if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() From 2dfbe4f54a35a5142f39315c1b202a3e767d4ef3 Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Wed, 18 Sep 2024 09:42:12 +0200 Subject: [PATCH 21/33] mock transform mobility function --- .../test_surrogatemodel_GNN.py | 65 ++++++++++++++----- 1 file changed, 49 insertions(+), 16 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py index 0f67fdb022..f3fc0794a0 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py @@ -62,19 +62,27 @@ def setUp(self): @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.getBaselineMatrix', return_value=0.6 * np.ones((6, 6))) - # create mock graph @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.make_graph', return_value=graph) - def test_simulation_run_nodamp(self, mock_baseline, mockgraph): + @patch('memilio.epidata.transformMobilityData.updateMobility2022') + @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.transform_mobility_directory', + autospec=True) + def test_simulation_run_nodamp(self, mock_transform_mobility, + mock_update_mobility, mock_baseline, mock_graph): + # Mock the behavior of the mobility transformation function, + # but it doesn't return anything. + # So, we just need it to be called without side effects. + mock_transform_mobility.side_effect = lambda: None + mock_update_mobility.side_effect = lambda directory, mobility_file: None + days_1 = 10 days_2 = 30 days_3 = 50 - population = [[5256.0, 10551, 32368.5, - 43637.833333333336, 22874.066666666666, 8473.6], - [4000, 8000, 40000, - 28000, 15000, 6000]] + population = [[5256.0, 10551, 32368.5, 43637.833333333336, 22874.066666666666, 8473.6], + [4000, 8000, 40000, 28000, 15000, 6000]] + # Call the actual function being tested simulation_1 = data_generation_nodamp.run_secir_groups_simulation( days_1, population) simulation_2 = data_generation_nodamp.run_secir_groups_simulation( @@ -82,22 +90,32 @@ def test_simulation_run_nodamp(self, mock_baseline, mockgraph): simulation_3 = data_generation_nodamp.run_secir_groups_simulation( days_3, population) - # result length - self.assertEqual(len(simulation_1[0]), days_1+1) - self.assertEqual(len(simulation_2[0]), days_2+1) - self.assertEqual(len(simulation_3[0]), days_3+1) + # Ensure that the results are of the correct length. + self.assertEqual(len(simulation_1[0]), days_1 + 1) + self.assertEqual(len(simulation_2[0]), days_2 + 1) + self.assertEqual(len(simulation_3[0]), days_3 + 1) + #### test data genertion no damp #### @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.getBaselineMatrix', return_value=0.6 * np.ones((6, 6))) - # create mock graph @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.make_graph', return_value=graph) @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.get_population', return_value=np.random.randint(0, 700001, size=(400, 6))) + @patch('memilio.epidata.transformMobilityData.updateMobility2022') + @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.transform_mobility_directory', + autospec=True) def test_data_generation_runs_nodamp( - self, mock_population, mock_baseline, mock_graph): + self, mock_transform_mobility, mock_update_mobility, + mock_baseline, mock_graph, mock_population): + + # Mock the behavior of the mobility transformation function, + # but it doesn't return anything. + # So, we just need it to be called without side effects. + mock_transform_mobility.side_effect = lambda: None + mock_update_mobility.side_effect = lambda directory, mobility_file: None input_width_1 = 1 input_width_2 = 5 @@ -108,7 +126,7 @@ def test_data_generation_runs_nodamp( num_runs_1 = 1 num_runs_2 = 2 - # test data generation without damping + # test data generation without dampings data_1 = data_generation_nodamp.generate_data( num_runs_1, self.path, input_width_1, label_width_1, save_data=False) @@ -159,7 +177,15 @@ def test_data_generation_runs_nodamp( # create mock graph @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.make_graph', return_value=graph) - def test_simulation_run_withdamp(self, mock_baseline, mock_minimum, mock_graph): + @patch('memilio.epidata.transformMobilityData.updateMobility2022') + @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.transform_mobility_directory', + autospec=True) + def test_simulation_run_withdamp(self, mock_transform_mobility, mock_update_mobility, + mock_minimum, mock_baseline, mock_graph): + # Mock the behavior of the mobility transformation function, but it doesn't return anything. + # So, we just need it to be called without side effects. + mock_transform_mobility.side_effect = lambda: None + mock_update_mobility.side_effect = lambda directory, mobility_file: None days_1 = 10 days_2 = 30 @@ -211,8 +237,15 @@ def test_simulation_run_withdamp(self, mock_baseline, mock_minimum, mock_graph): return_value=graph) @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.get_population', return_value=np.random.randint(0, 700001, size=(400, 6))) + # mock transform directory function + @patch('memilio.epidata.transformMobilityData.updateMobility2022') + @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.transform_mobility_directory', autospec=True) def test_data_generation_runs_withdamp( - self, mock_population, mock_baseline, mock_minimum, mock_graph): + self, mock_transform_mobility, mock_update_mobility, + mock_minimum, mock_baseline, mock_graph, mock_population): + + mock_transform_mobility.side_effect = lambda: None + mock_update_mobility.side_effect = lambda directory, mobility_file: None input_width_1 = 1 input_width_2 = 5 @@ -246,9 +279,9 @@ def test_data_generation_runs_withdamp( self.assertEqual(len(data_2['labels'][0]), label_width_2) self.assertEqual(len(data_2['labels'][0][0]), 48) + # def test_data_generation_save_withdamp( # self, mock_population, mock_baseline, mock_minimum): - # input_width = 2 # label_width = 3 # num_runs = 1 From 86a2e8923f50cfb2b3151eb57f63a675dd6ce8cd Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Thu, 19 Sep 2024 11:20:48 +0200 Subject: [PATCH 22/33] adjust utils: add surrogate utils and add scling to GNN utis --- .../memilio/surrogatemodel/GNN/GNN_utils.py | 116 ++++++------------ .../GNN/data_generation_nodamp.py | 47 +------ .../GNN/data_generation_withdamp.py | 48 ++------ .../surrogatemodel/utils_surrogatemodel.py | 86 +++++++++++++ 4 files changed, 136 insertions(+), 161 deletions(-) create mode 100644 pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils_surrogatemodel.py diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py index 97e19d71ea..4e88f33b0c 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py @@ -2,77 +2,11 @@ import pandas as pd import os +from sklearn.preprocessing import FunctionTransformer + from memilio.epidata import transformMobilityData as tmd from memilio.epidata import getDataIntoPandasDataFrame as gd from memilio.simulation.osecir import (ModelGraph, set_edges) -from memilio.epidata import modifyDataframeSeries as mdfs - - -def remove_confirmed_compartments(dataset_entries, num_groups): - """! The compartments which contain confirmed cases are not needed and are - therefore omitted by summarizing the confirmed compartment with the - original compartment. - @param dataset_entries Array that contains the compartmental data with - confirmed compartments. - @param num_groups Number of age groups. - @return Array that contains the compartmental data without confirmed compartments. - """ - - new_dataset_entries = [] - for i in dataset_entries: - dataset_entries_reshaped = i.reshape( - [num_groups, int(np.asarray(dataset_entries).shape[1]/num_groups)] - ) - sum_inf_no_symp = np.sum(dataset_entries_reshaped[:, [2, 3]], axis=1) - sum_inf_symp = np.sum(dataset_entries_reshaped[:, [4, 5]], axis=1) - dataset_entries_reshaped[:, 2] = sum_inf_no_symp - dataset_entries_reshaped[:, 4] = sum_inf_symp - new_dataset_entries.append( - np.delete(dataset_entries_reshaped, [3, 5], axis=1).flatten() - ) - return new_dataset_entries - - -def getBaselineMatrix(): - """! loads the baselinematrix - """ - - baseline_contact_matrix0 = os.path.join( - "./data/contacts/baseline_home.txt") - baseline_contact_matrix1 = os.path.join( - "./data/contacts/baseline_school_pf_eig.txt") - baseline_contact_matrix2 = os.path.join( - "./data/contacts/baseline_work.txt") - baseline_contact_matrix3 = os.path.join( - "./data/contacts/baseline_other.txt") - - baseline = np.loadtxt(baseline_contact_matrix0) \ - + np.loadtxt(baseline_contact_matrix1) + \ - np.loadtxt(baseline_contact_matrix2) + \ - np.loadtxt(baseline_contact_matrix3) - - return baseline - - -def getMinimumMatrix(): - """! loads the minimum matrix - """ - - minimum_contact_matrix0 = os.path.join( - "./data/contacts/minimum_home.txt") - minimum_contact_matrix1 = os.path.join( - "./data/contacts/minimum_school_pf_eig.txt") - minimum_contact_matrix2 = os.path.join( - "./data/contacts/minimum_work.txt") - minimum_contact_matrix3 = os.path.join( - "./data/contacts/minimum_other.txt") - - minimum = np.loadtxt(minimum_contact_matrix0) \ - + np.loadtxt(minimum_contact_matrix1) + \ - np.loadtxt(minimum_contact_matrix2) + \ - np.loadtxt(minimum_contact_matrix3) - - return minimum def make_graph(directory, num_regions, countykey_list, models): @@ -110,16 +44,40 @@ def transform_mobility_directory(): directory, mobility_file='commuter_migration_scaled') -def get_population(): - df_population = pd.read_json( - 'data/pydata/Germany/county_population.json') - age_groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80-130'] +def scale_data(data): + num_groups = int(np.asarray(data['inputs']).shape[2] / 8) + transformer = FunctionTransformer(np.log1p, validate=True) + + # Scale inputs + inputs = np.asarray( + data['inputs']).transpose(2, 0, 1, 3).reshape(num_groups * 8, -1) + scaled_inputs = transformer.transform(inputs) + original_shape_input = np.asarray(data['inputs']).shape + + # Reverse the reshape + reshaped_back = scaled_inputs.reshape(original_shape_input[2], + original_shape_input[0], + original_shape_input[1], + original_shape_input[3]) + + # Reverse the transpose + original_inputs = reshaped_back.transpose(1, 2, 0, 3) + scaled_inputs = original_inputs.transpose(0, 3, 1, 2) + + # Scale labels + labels = np.asarray( + data['labels']).transpose(2, 0, 1, 3).reshape(num_groups * 8, -1) + scaled_labels = transformer.transform(labels) + original_shape_labels = np.asarray(data['labels']).shape + + # Reverse the reshape + reshaped_back = scaled_labels.reshape(original_shape_labels[2], + original_shape_labels[0], + original_shape_labels[1], + original_shape_labels[3]) - df_population_agegroups = pd.DataFrame( - columns=[df_population.columns[0]] + age_groups) - for region_id in df_population.iloc[:, 0]: - df_population_agegroups.loc[len(df_population_agegroups.index), :] = [int(region_id)] + list( - mdfs.fit_age_group_intervals(df_population[df_population.iloc[:, 0] == int(region_id)].iloc[:, 2:], age_groups)) + # Reverse the transpose + original_labels = reshaped_back.transpose(1, 2, 0, 3) + scaled_labels = original_labels.transpose(0, 3, 1, 2) - population = df_population_agegroups.values.tolist - return population + return scaled_inputs, scaled_labels diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py index dc78c1ccc6..6e440af4c1 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py @@ -6,7 +6,7 @@ import pandas as pd from progress.bar import Bar -from sklearn.preprocessing import FunctionTransformer + from datetime import date from memilio.simulation import (AgeGroup, LogLevel, set_log_level) @@ -14,8 +14,10 @@ InfectionState, Model, interpolate_simulation_result) from memilio.epidata import geoModificationGermany as geoger import memilio.epidata.getPopulationData as gpd -from .GNN_utils import (getBaselineMatrix, transform_mobility_directory, - make_graph, remove_confirmed_compartments, get_population) +from .GNN_utils import (transform_mobility_directory, + make_graph, scale_data) +from memilio.surrogatemodel.utils_surrogatemodel import ( + getBaselineMatrix, remove_confirmed_compartments, get_population) def run_secir_groups_simulation(days, populations): @@ -173,44 +175,7 @@ def generate_data( if save_data: - # we use a logistic transformer to reduce the - # influence of ouliers and to correct the skewness of out dataset - # as a result we obtaina dataset which is handled better by the NN - - num_groups = int(np.asarray(data['inputs']).shape[2] / 8) - transformer = FunctionTransformer(np.log1p, validate=True) - - # Scale inputs - inputs = np.asarray( - data['inputs']).transpose(2, 0, 1, 3).reshape(num_groups * 8, -1) - scaled_inputs = transformer.transform(inputs) - original_shape_input = np.asarray(data['inputs']).shape - - # Reverse the reshape - reshaped_back = scaled_inputs.reshape(original_shape_input[2], - original_shape_input[0], - original_shape_input[1], - original_shape_input[3]) - - # Reverse the transpose - original_inputs = reshaped_back.transpose(1, 2, 0, 3) - scaled_inputs = original_inputs.transpose(0, 3, 1, 2) - - # Scale labels - labels = np.asarray( - data['labels']).transpose(2, 0, 1, 3).reshape(num_groups * 8, -1) - scaled_labels = transformer.transform(labels) - original_shape_labels = np.asarray(data['labels']).shape - - # Reverse the reshape - reshaped_back = scaled_labels.reshape(original_shape_labels[2], - original_shape_labels[0], - original_shape_labels[1], - original_shape_labels[3]) - - # Reverse the transpose - original_labels = reshaped_back.transpose(1, 2, 0, 3) - scaled_labels = original_labels.transpose(0, 3, 1, 2) + scaled_inputs, scaled_labels = scale_data(data) all_data = {"inputs": scaled_inputs, "labels": scaled_labels, diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py index 9d36331859..b11616f843 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py @@ -14,8 +14,10 @@ interpolate_simulation_result) from memilio.epidata import geoModificationGermany as geoger -from .GNN_utils import (getBaselineMatrix, make_graph, - remove_confirmed_compartments, getMinimumMatrix, get_population) +from .GNN_utils import (transform_mobility_directory, + make_graph, scale_data) +from memilio.surrogatemodel.utils_surrogatemodel import ( + getBaselineMatrix, remove_confirmed_compartments, get_population, getMinimumMatrix) def run_secir_groups_simulation(days, populations, dampings): @@ -133,7 +135,8 @@ def run_secir_groups_simulation(days, populations, dampings): model.apply_constraints() models.append(model) - graph = make_graph(num_regions, countykey_list, models) + directory = transform_mobility_directory() + graph = make_graph(directory, num_regions, countykey_list, models) study = ParameterStudy(graph, 0, days, dt=dt, num_runs=1) study.run() @@ -268,44 +271,7 @@ def generate_data( if save_data: - # we use a logistic transformer to reduce the - # influence of ouliers and to correct the skewness of out dataset - # as a result we obtaina dataset which is handled better by the NN - - num_groups = int(np.asarray(data['inputs']).shape[2]/8) - transformer = FunctionTransformer(np.log1p, validate=True) - - # Scale inputs - inputs = np.asarray( - data['inputs']).transpose(2, 0, 1, 3).reshape(num_groups*8, -1) - scaled_inputs = transformer.transform(inputs) - original_shape_input = np.asarray(data['inputs']).shape - - # Reverse the reshape of inputs - reshaped_back = scaled_inputs.reshape( - original_shape_input[2], original_shape_input[0], - original_shape_input[1], original_shape_input[3] - ) - - # Reverse the transpose of inputs - original_inputs = reshaped_back.transpose(1, 2, 0, 3) - scaled_inputs = original_inputs.transpose(0, 3, 1, 2) - - # Scale labels - labels = np.asarray( - data['labels']).transpose(2, 0, 1, 3).reshape(num_groups*8, -1) - scaled_labels = transformer.transform(labels) - original_shape_labels = np.asarray(data['labels']).shape - - # Reverse the reshape of labels - reshaped_back = scaled_labels.reshape( - original_shape_labels[2], original_shape_labels[0], - original_shape_labels[1], original_shape_labels[3] - ) - - # Reverse the transpose of labels - original_labels = reshaped_back.transpose(1, 2, 0, 3) - scaled_labels = original_labels.transpose(0, 3, 1, 2) + scaled_inputs, scaled_labels = scale_data(data) all_data = {"inputs": scaled_inputs, "labels": scaled_labels, diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils_surrogatemodel.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils_surrogatemodel.py new file mode 100644 index 0000000000..72f3335df6 --- /dev/null +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils_surrogatemodel.py @@ -0,0 +1,86 @@ +import numpy as np +import pandas as pd +import os +from memilio.epidata import modifyDataframeSeries as mdfs + + +def remove_confirmed_compartments(dataset_entries, num_groups): + """! The compartments which contain confirmed cases are not needed and are + therefore omitted by summarizing the confirmed compartment with the + original compartment. + @param dataset_entries Array that contains the compartmental data with + confirmed compartments. + @param num_groups Number of age groups. + @return Array that contains the compartmental data without confirmed compartments. + """ + + new_dataset_entries = [] + for i in dataset_entries: + dataset_entries_reshaped = i.reshape( + [num_groups, int(np.asarray(dataset_entries).shape[1]/num_groups)] + ) + sum_inf_no_symp = np.sum(dataset_entries_reshaped[:, [2, 3]], axis=1) + sum_inf_symp = np.sum(dataset_entries_reshaped[:, [4, 5]], axis=1) + dataset_entries_reshaped[:, 2] = sum_inf_no_symp + dataset_entries_reshaped[:, 4] = sum_inf_symp + new_dataset_entries.append( + np.delete(dataset_entries_reshaped, [3, 5], axis=1).flatten() + ) + return new_dataset_entries + + +def getBaselineMatrix(): + """! loads the baselinematrix + """ + + baseline_contact_matrix0 = os.path.join( + "./data/contacts/baseline_home.txt") + baseline_contact_matrix1 = os.path.join( + "./data/contacts/baseline_school_pf_eig.txt") + baseline_contact_matrix2 = os.path.join( + "./data/contacts/baseline_work.txt") + baseline_contact_matrix3 = os.path.join( + "./data/contacts/baseline_other.txt") + + baseline = np.loadtxt(baseline_contact_matrix0) \ + + np.loadtxt(baseline_contact_matrix1) + \ + np.loadtxt(baseline_contact_matrix2) + \ + np.loadtxt(baseline_contact_matrix3) + + return baseline + + +def getMinimumMatrix(): + """! loads the minimum matrix + """ + + minimum_contact_matrix0 = os.path.join( + "./data/contacts/minimum_home.txt") + minimum_contact_matrix1 = os.path.join( + "./data/contacts/minimum_school_pf_eig.txt") + minimum_contact_matrix2 = os.path.join( + "./data/contacts/minimum_work.txt") + minimum_contact_matrix3 = os.path.join( + "./data/contacts/minimum_other.txt") + + minimum = np.loadtxt(minimum_contact_matrix0) \ + + np.loadtxt(minimum_contact_matrix1) + \ + np.loadtxt(minimum_contact_matrix2) + \ + np.loadtxt(minimum_contact_matrix3) + + return minimum + + +def get_population(): + df_population = pd.read_json( + 'data/pydata/Germany/county_population.json') + age_groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80-130'] + + df_population_agegroups = pd.DataFrame( + columns=[df_population.columns[0]] + age_groups) + for region_id in df_population.iloc[:, 0]: + df_population_agegroups.loc[len(df_population_agegroups.index), :] = [int(region_id)] + list( + mdfs.fit_age_group_intervals(df_population[df_population.iloc[:, 0] == int(region_id)].iloc[:, 2:], age_groups)) + + population = df_population_agegroups.values.tolist + return population From 77af0ab5a2ec51841da045fc6f418e83cc2de36c Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Thu, 19 Sep 2024 13:29:43 +0200 Subject: [PATCH 23/33] add test for saving mechanism --- .../test_surrogatemodel_GNN.py | 106 +++++++++++++----- 1 file changed, 76 insertions(+), 30 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py index f3fc0794a0..a28e076c1b 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py @@ -98,6 +98,7 @@ def test_simulation_run_nodamp(self, mock_transform_mobility, #### test data genertion no damp #### + @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.getBaselineMatrix', return_value=0.6 * np.ones((6, 6))) @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.make_graph', @@ -147,28 +148,44 @@ def test_data_generation_runs_nodamp( self.assertEqual(len(data_2['labels'][0]), label_width_2) self.assertEqual(len(data_2['labels'][0][0]), 48) - # @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.getBaselineMatrix', - # return_value=0.6 * np.ones((6, 6))) + @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.getBaselineMatrix', + return_value=0.6 * np.ones((6, 6))) + @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.get_population', + return_value=np.random.randint(0, 700001, size=(400, 6))) + # create mock graph + @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.make_graph', + return_value=graph) + @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.scale_data', + autospec=True) + @patch('memilio.epidata.transformMobilityData.updateMobility2022') + @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.transform_mobility_directory', + autospec=True) + def test_data_generation_save_nodamp( + self, mock_transform_mobility, mock_update_mobility, mock_scale_data, + mock_population, mock_baseline, mock_graph): + + mock_transform_mobility.side_effect = lambda: None + mock_update_mobility.side_effect = lambda directory, mobility_file: None - # @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.get_population', - # return_value= np.random.randint(0, 700001, size=(400, 6))) - # # create mock graph - # @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.make_graph', - # return_value= graph) + # Mock the return value of scale_data with dummy inputs and labels + # Assuming scaled inputs and labels are 4D arrays based on the original function's output + mock_scale_data.return_value = (np.random.rand( + 10, 8, 10, 6), np.random.rand(10, 8, 10, 6)) - # def test_data_generation_save_nodamp( - # self, mock_population, mock_baseline, mock_graph): + input_width = 2 + label_width = 3 + num_runs = 1 - # input_width = 2 - # label_width = 3 - # num_runs = 1 + # Call the function being tested + data_generation_nodamp.generate_data(num_runs, self.path, input_width, + label_width) - # data_generation_nodamp.generate_data(num_runs, self.path, input_width, - # label_width) - # self.assertEqual(len(os.listdir(self.path)), 1) + # Check the number of generated files + self.assertEqual(len(os.listdir(self.path)), 1) - # self.assertEqual(os.listdir(self.path), - # ['data_secir_groups.pickle']) + # Check the contents of the directory + self.assertEqual(os.listdir(self.path), + ['data_secir_age_groups.pickle']) @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.getMinimumMatrix', return_value=0 * np.ones((6, 6))) @@ -178,7 +195,7 @@ def test_data_generation_runs_nodamp( @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.make_graph', return_value=graph) @patch('memilio.epidata.transformMobilityData.updateMobility2022') - @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.transform_mobility_directory', + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.transform_mobility_directory', autospec=True) def test_simulation_run_withdamp(self, mock_transform_mobility, mock_update_mobility, mock_minimum, mock_baseline, mock_graph): @@ -228,6 +245,8 @@ def test_simulation_run_withdamp(self, mock_transform_mobility, mock_update_mobi self.assertEqual(len(damping_coeff2), len(dampings2)) self.assertEqual(len(damping_coeff3), len(dampings3)) +# test data generation with dampings + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.getMinimumMatrix', return_value=0 * np.ones((6, 6))) @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.getBaselineMatrix', @@ -239,7 +258,7 @@ def test_simulation_run_withdamp(self, mock_transform_mobility, mock_update_mobi return_value=np.random.randint(0, 700001, size=(400, 6))) # mock transform directory function @patch('memilio.epidata.transformMobilityData.updateMobility2022') - @patch('memilio.surrogatemodel.GNN.data_generation_nodamp.transform_mobility_directory', autospec=True) + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.transform_mobility_directory', autospec=True) def test_data_generation_runs_withdamp( self, mock_transform_mobility, mock_update_mobility, mock_minimum, mock_baseline, mock_graph, mock_population): @@ -279,17 +298,44 @@ def test_data_generation_runs_withdamp( self.assertEqual(len(data_2['labels'][0]), label_width_2) self.assertEqual(len(data_2['labels'][0][0]), 48) + # test saving for model with dampings + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.getMinimumMatrix', + return_value=0 * np.ones((6, 6))) + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.getBaselineMatrix', + return_value=0.6 * np.ones((6, 6))) + # create mock graph + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.make_graph', + return_value=graph) + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.get_population', + return_value=np.random.randint(0, 700001, size=(400, 6))) + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.scale_data', + autospec=True) + # @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.generate_dampings_withshadowdamp', + # return_value=[4, 6]) + @patch('memilio.epidata.transformMobilityData.updateMobility2022') + @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.transform_mobility_directory', + autospec=True) + def test_data_generation_save_withdamp( + self, mock_transform_mobility, mock_update_mobility, mock_scale_data, + mock_population, mock_baseline, mcok_minimum, mock_graph): + + mock_transform_mobility.side_effect = lambda: None + mock_update_mobility.side_effect = lambda directory, mobility_file: None + + mock_scale_data.return_value = (np.random.rand( + 10, 8, 10, 6), np.random.rand(10, 8, 10, 6)) + + input_width = 5 + label_width = 20 + num_runs = 2 + num_dampings = 2 + data = data_generation_withdamp.generate_data(num_runs, self.path, input_width, + label_width, num_dampings, save_data=True) + + self.assertEqual(len(os.listdir(self.path)), 1) + self.assertEqual(os.listdir(self.path), + ['data_secir_age_groups.pickle']) + - # def test_data_generation_save_withdamp( - # self, mock_population, mock_baseline, mock_minimum): - # input_width = 2 - # label_width = 3 - # num_runs = 1 - # dampings = 3 - # data_generation_withdamp.generate_data(num_runs, self.path, "", input_width, - # label_width) - # self.assertEqual(len(os.listdir(self.path)), 1) - # self.assertEqual(os.listdir(self.path), - # ['data_secir_groups.pickle']) if __name__ == '__main__': unittest.main() From ce4a494f49594484d649647a0fc4297e1cf1d791 Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Thu, 19 Sep 2024 13:49:55 +0200 Subject: [PATCH 24/33] import functions from new utils file --- .../ode_secir_groups/data_generation.py | 65 ++----------------- .../ode_secir_simple/data_generation.py | 18 ++--- .../surrogatemodel/utils_surrogatemodel.py | 3 +- 3 files changed, 14 insertions(+), 72 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py index 1cb1dac82d..5e94ebf689 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py @@ -34,6 +34,9 @@ InfectionState, Model, interpolate_simulation_result, simulate) +from memilio.surrogatemodel.utils_surrogatemodel import ( + getBaselineMatrix, getMinimumMatrix, get_population) + def interpolate_age_groups(data_entry): """! Interpolates the age groups from the population data into the age groups used in the simulation. @@ -214,7 +217,8 @@ def generate_data( days = input_width + label_width - 1 # Load population data - population = get_population(path_population) + # population = get_population(path_population) + population = get_population() # show progess in terminal for longer runs # Due to the random structure, there's currently no need to shuffle the data @@ -256,61 +260,6 @@ def generate_data( return data -def getBaselineMatrix(): - """! loads the baselinematrix - """ - - baseline_contact_matrix0 = os.path.join( - "./data/contacts/baseline_home.txt") - baseline_contact_matrix1 = os.path.join( - "./data/contacts/baseline_school_pf_eig.txt") - baseline_contact_matrix2 = os.path.join( - "./data/contacts/baseline_work.txt") - baseline_contact_matrix3 = os.path.join( - "./data/contacts/baseline_other.txt") - - baseline = np.loadtxt(baseline_contact_matrix0) \ - + np.loadtxt(baseline_contact_matrix1) + \ - np.loadtxt(baseline_contact_matrix2) + \ - np.loadtxt(baseline_contact_matrix3) - - return baseline - - -def getMinimumMatrix(): - """! loads the minimum matrix - """ - - minimum_contact_matrix0 = os.path.join( - "./data/contacts/minimum_home.txt") - minimum_contact_matrix1 = os.path.join( - "./data/contacts/minimum_school_pf_eig.txt") - minimum_contact_matrix2 = os.path.join( - "./data/contacts/minimum_work.txt") - minimum_contact_matrix3 = os.path.join( - "./data/contacts/minimum_other.txt") - - minimum = np.loadtxt(minimum_contact_matrix0) \ - + np.loadtxt(minimum_contact_matrix1) + \ - np.loadtxt(minimum_contact_matrix2) + \ - np.loadtxt(minimum_contact_matrix3) - - return minimum - - -def get_population(path): - """! read population data in list from dataset - @param path Path to the dataset containing the population data - """ - - with open(path) as f: - data = json.load(f) - population = [] - for data_entry in data: - population.append(interpolate_age_groups(data_entry)) - return population - - if __name__ == "__main__": # Store data relative to current file two levels higher. path = os.path.dirname(os.path.realpath(__file__)) @@ -322,6 +271,6 @@ def get_population(path): input_width = 5 label_width = 30 - num_runs = 10000 + num_runs = 10 data = generate_data(num_runs, path_output, path_population, input_width, - label_width) + label_width, save_data=True) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py index 96eff3ac94..9dee453c70 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py @@ -35,13 +35,7 @@ InfectionState, Model, Simulation, interpolate_simulation_result, simulate) - -def remove_confirmed_compartments(result_array): - sum_inf_no_symp = np.sum(result_array[:, [2, 3]], axis=1) - sum_inf_symp = np.sum(result_array[:, [2, 3]], axis=1) - result_array[:, 2] = sum_inf_no_symp - result_array[:, 4] = sum_inf_symp - return np.delete(result_array, [3, 5], axis=1) +from memilio.surrogatemodel.utils_surrogatemodel import remove_confirmed_compartments def run_secir_simple_simulation(days): @@ -121,13 +115,11 @@ def run_secir_simple_simulation(days): result_array = result.as_ndarray() result_array = remove_confirmed_compartments( - result_array[1:, :].transpose()) - - dataset = [] + result_array[1:, :].transpose(), 1) dataset_entries = copy.deepcopy(result_array) - return dataset_entries.tolist() + return dataset_entries def generate_data( @@ -205,6 +197,6 @@ def generate_data( input_width = 5 label_width = 30 - num_runs = 1000 + num_runs = 10000 data = generate_data(num_runs, path_data, input_width, - label_width) + label_width, save_data=True) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils_surrogatemodel.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils_surrogatemodel.py index 72f3335df6..0fc619092d 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils_surrogatemodel.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils_surrogatemodel.py @@ -82,5 +82,6 @@ def get_population(): df_population_agegroups.loc[len(df_population_agegroups.index), :] = [int(region_id)] + list( mdfs.fit_age_group_intervals(df_population[df_population.iloc[:, 0] == int(region_id)].iloc[:, 2:], age_groups)) - population = df_population_agegroups.values.tolist + population = df_population_agegroups.values.tolist() + return population From 63980d97843a786ba5565cbdcbc71dddb5783c90 Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Thu, 19 Sep 2024 14:00:44 +0200 Subject: [PATCH 25/33] adjust import --- .../memilio/surrogatemodel/GNN/GNN_utils.py | 2 ++ .../surrogatemodel/GNN/data_generation_nodamp.py | 10 ++++------ .../surrogatemodel/GNN/data_generation_withdamp.py | 12 ++++++------ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py index 4e88f33b0c..6108d2d5a0 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py @@ -43,6 +43,8 @@ def transform_mobility_directory(): tmd.updateMobility2022( directory, mobility_file='commuter_migration_scaled') + return directory + def scale_data(data): num_groups = int(np.asarray(data['inputs']).shape[2] / 8) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py index 6e440af4c1..3901fc9ebd 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py @@ -3,7 +3,6 @@ import pickle import random import numpy as np -import pandas as pd from progress.bar import Bar @@ -13,9 +12,8 @@ from memilio.simulation.osecir import (Index_InfectionState, interpolate_simulation_result, ParameterStudy, InfectionState, Model, interpolate_simulation_result) from memilio.epidata import geoModificationGermany as geoger -import memilio.epidata.getPopulationData as gpd -from .GNN_utils import (transform_mobility_directory, - make_graph, scale_data) +from memilio.surrogatemodel.GNN.GNN_utils import (transform_mobility_directory, + make_graph, scale_data) from memilio.surrogatemodel.utils_surrogatemodel import ( getBaselineMatrix, remove_confirmed_compartments, get_population) @@ -202,7 +200,7 @@ def generate_data( input_width = 5 days = 30 - num_runs = 1000 + num_runs = 10000 generate_data(num_runs, path_data, input_width, - days) + days, save_data=True) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py index b11616f843..e255f4e3ce 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py @@ -14,8 +14,8 @@ interpolate_simulation_result) from memilio.epidata import geoModificationGermany as geoger -from .GNN_utils import (transform_mobility_directory, - make_graph, scale_data) +from memilio.surrogatemodel.GNN.GNN_utils import (transform_mobility_directory, + make_graph, scale_data) from memilio.surrogatemodel.utils_surrogatemodel import ( getBaselineMatrix, remove_confirmed_compartments, get_population, getMinimumMatrix) @@ -292,9 +292,9 @@ def generate_data( if __name__ == "__main__": input_width = 5 - label_width = 100 - number_of_dampings = 3 - num_runs = 1000 + label_width = 30 + number_of_dampings = 1 + num_runs = 2 path = os.path.dirname(os.path.realpath(__file__)) path_data = os.path.join( os.path.dirname( @@ -302,4 +302,4 @@ def generate_data( 'data_GNN_with_'+str(number_of_dampings)+'_dampings_test') generate_data(num_runs, path_data, input_width, - label_width, number_of_dampings) + label_width, number_of_dampings, save_data=False) From cabf5a16a292687613a5249a700f2722672242b2 Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Thu, 19 Sep 2024 14:02:38 +0200 Subject: [PATCH 26/33] adjust imports --- .../test_surrogatemodel_GNN.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py index a28e076c1b..8371aed65a 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_GNN.py @@ -29,11 +29,7 @@ import unittest import numpy as np -import logging -import json -# suppress all autograph warnings from tensorflow -# logging.getLogger("tensorflow").setLevel(logging.ERROR) import tensorflow as tf tf.get_logger().setLevel('ERROR') @@ -69,9 +65,7 @@ def setUp(self): autospec=True) def test_simulation_run_nodamp(self, mock_transform_mobility, mock_update_mobility, mock_baseline, mock_graph): - # Mock the behavior of the mobility transformation function, - # but it doesn't return anything. - # So, we just need it to be called without side effects. + mock_transform_mobility.side_effect = lambda: None mock_update_mobility.side_effect = lambda directory, mobility_file: None @@ -112,9 +106,6 @@ def test_data_generation_runs_nodamp( self, mock_transform_mobility, mock_update_mobility, mock_baseline, mock_graph, mock_population): - # Mock the behavior of the mobility transformation function, - # but it doesn't return anything. - # So, we just need it to be called without side effects. mock_transform_mobility.side_effect = lambda: None mock_update_mobility.side_effect = lambda directory, mobility_file: None @@ -199,8 +190,7 @@ def test_data_generation_save_nodamp( autospec=True) def test_simulation_run_withdamp(self, mock_transform_mobility, mock_update_mobility, mock_minimum, mock_baseline, mock_graph): - # Mock the behavior of the mobility transformation function, but it doesn't return anything. - # So, we just need it to be called without side effects. + mock_transform_mobility.side_effect = lambda: None mock_update_mobility.side_effect = lambda directory, mobility_file: None @@ -310,8 +300,6 @@ def test_data_generation_runs_withdamp( return_value=np.random.randint(0, 700001, size=(400, 6))) @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.scale_data', autospec=True) - # @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.generate_dampings_withshadowdamp', - # return_value=[4, 6]) @patch('memilio.epidata.transformMobilityData.updateMobility2022') @patch('memilio.surrogatemodel.GNN.data_generation_withdamp.transform_mobility_directory', autospec=True) From 06fb8dda94b65b0665a2a2b9c18a78be14a3f4a3 Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Tue, 24 Sep 2024 15:30:09 +0200 Subject: [PATCH 27/33] put function which read files outside if the run_simulation function --- .../memilio/surrogatemodel/GNN/GNN_utils.py | 85 ++++++++++++++++++- .../GNN/data_generation_nodamp.py | 24 +++--- .../GNN/data_generation_withdamp.py | 22 ++--- 3 files changed, 106 insertions(+), 25 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py index 6108d2d5a0..0f66e938b6 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py @@ -1,12 +1,79 @@ import numpy as np import pandas as pd import os - from sklearn.preprocessing import FunctionTransformer from memilio.epidata import transformMobilityData as tmd from memilio.epidata import getDataIntoPandasDataFrame as gd from memilio.simulation.osecir import (ModelGraph, set_edges) +from memilio.epidata import modifyDataframeSeries as mdfs + + +def remove_confirmed_compartments(dataset_entries, num_groups): + """! The compartments which contain confirmed cases are not needed and are + therefore omitted by summarizing the confirmed compartment with the + original compartment. + @param dataset_entries Array that contains the compartmental data with + confirmed compartments. + @param num_groups Number of age groups. + @return Array that contains the compartmental data without confirmed compartments. + """ + + new_dataset_entries = [] + for i in dataset_entries: + dataset_entries_reshaped = i.reshape( + [num_groups, int(np.asarray(dataset_entries).shape[1]/num_groups)] + ) + sum_inf_no_symp = np.sum(dataset_entries_reshaped[:, [2, 3]], axis=1) + sum_inf_symp = np.sum(dataset_entries_reshaped[:, [4, 5]], axis=1) + dataset_entries_reshaped[:, 2] = sum_inf_no_symp + dataset_entries_reshaped[:, 4] = sum_inf_symp + new_dataset_entries.append( + np.delete(dataset_entries_reshaped, [3, 5], axis=1).flatten() + ) + return new_dataset_entries + + +def getBaselineMatrix(): + """! loads the baselinematrix + """ + + baseline_contact_matrix0 = os.path.join( + "./data/contacts/baseline_home.txt") + baseline_contact_matrix1 = os.path.join( + "./data/contacts/baseline_school_pf_eig.txt") + baseline_contact_matrix2 = os.path.join( + "./data/contacts/baseline_work.txt") + baseline_contact_matrix3 = os.path.join( + "./data/contacts/baseline_other.txt") + + baseline = np.loadtxt(baseline_contact_matrix0) \ + + np.loadtxt(baseline_contact_matrix1) + \ + np.loadtxt(baseline_contact_matrix2) + \ + np.loadtxt(baseline_contact_matrix3) + + return baseline + + +def getMinimumMatrix(): + """! loads the minimum matrix + """ + + minimum_contact_matrix0 = os.path.join( + "./data/contacts/minimum_home.txt") + minimum_contact_matrix1 = os.path.join( + "./data/contacts/minimum_school_pf_eig.txt") + minimum_contact_matrix2 = os.path.join( + "./data/contacts/minimum_work.txt") + minimum_contact_matrix3 = os.path.join( + "./data/contacts/minimum_other.txt") + + minimum = np.loadtxt(minimum_contact_matrix0) \ + + np.loadtxt(minimum_contact_matrix1) + \ + np.loadtxt(minimum_contact_matrix2) + \ + np.loadtxt(minimum_contact_matrix3) + + return minimum def make_graph(directory, num_regions, countykey_list, models): @@ -42,10 +109,24 @@ def transform_mobility_directory(): tmd.updateMobility2022(directory, mobility_file='twitter_scaled_1252') tmd.updateMobility2022( directory, mobility_file='commuter_migration_scaled') - return directory +def get_population(): + df_population = pd.read_json( + 'data/pydata/Germany/county_population.json') + age_groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80-130'] + + df_population_agegroups = pd.DataFrame( + columns=[df_population.columns[0]] + age_groups) + for region_id in df_population.iloc[:, 0]: + df_population_agegroups.loc[len(df_population_agegroups.index), :] = [int(region_id)] + list( + mdfs.fit_age_group_intervals(df_population[df_population.iloc[:, 0] == int(region_id)].iloc[:, 2:], age_groups)) + + population = df_population_agegroups.values.tolist() + return population + + def scale_data(data): num_groups = int(np.asarray(data['inputs']).shape[2] / 8) transformer = FunctionTransformer(np.log1p, validate=True) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py index 3901fc9ebd..0e4f7caa30 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py @@ -18,7 +18,7 @@ getBaselineMatrix, remove_confirmed_compartments, get_population) -def run_secir_groups_simulation(days, populations): +def run_secir_groups_simulation(days, populations, countykey_list, directory, baseline): """! Uses an ODE SECIR model allowing for asymptomatic infection with 6 different age groups. The model is not stratified by region. Virus-specific parameters are fixed and initial number of persons @@ -36,9 +36,6 @@ def run_secir_groups_simulation(days, populations): start_year = 2019 dt = 0.1 - # get county ids - countykey_list = geoger.get_county_ids(merge_eisenach=True, zfill=True) - # Define age Groups groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] num_groups = len(groups) @@ -105,9 +102,6 @@ def run_secir_groups_simulation(days, populations): model.parameters.StartDay = ( date(start_year, start_month, start_day) - date(start_year, 1, 1)).days - # Load baseline and minimum contact matrix and assign them to the model - baseline = getBaselineMatrix() - model.parameters.ContactPatterns.cont_freq_mat[0].baseline = baseline model.parameters.ContactPatterns.cont_freq_mat[0].minimum = np.ones( (num_groups, num_groups)) * 0 @@ -116,7 +110,6 @@ def run_secir_groups_simulation(days, populations): model.apply_constraints() models.append(model) - directory = transform_mobility_directory() graph = make_graph(directory, num_regions, countykey_list, models) study = ParameterStudy(graph, 0, days, dt=dt, num_runs=1) @@ -152,6 +145,13 @@ def generate_data( "labels": [], } + # get county ids + countykey_list = geoger.get_county_ids(merge_eisenach=True, zfill=True) + # Load baseline and minimum contact matrix and assign them to the model + baseline = getBaselineMatrix() + # transform directiry by merging Einsenach and Wartburgkreis + directory = transform_mobility_directory() + # show progess in terminal for longer runs # Due to the random structure, theres currently no need to shuffle the data bar = Bar('Number of Runs done', max=num_runs) @@ -159,7 +159,7 @@ def generate_data( for _ in range(num_runs): data_run = run_secir_groups_simulation( - days_sum, population) + days_sum, population, countykey_list, directory, baseline) inputs = np.asarray(data_run).transpose(1, 2, 0)[: input_width] data["inputs"].append(inputs) @@ -196,11 +196,11 @@ def generate_data( path_data = os.path.join( os.path.dirname( os.path.realpath(os.path.dirname(os.path.realpath(path)))), - 'data_GNN_nodamp') + 'data_GNN_nodamp_test') input_width = 5 - days = 30 - num_runs = 10000 + days = 3 + num_runs = 2 generate_data(num_runs, path_data, input_width, days, save_data=True) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py index e255f4e3ce..a8485b34aa 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py @@ -20,7 +20,7 @@ getBaselineMatrix, remove_confirmed_compartments, get_population, getMinimumMatrix) -def run_secir_groups_simulation(days, populations, dampings): +def run_secir_groups_simulation(days, populations, dampings, countykey_list, directory, baseline): """! Uses an ODE SECIR model allowing for asymptomatic infection with 6 different age groups. The model is not stratified by region. Virus-specific parameters are fixed and initial number of persons @@ -38,9 +38,6 @@ def run_secir_groups_simulation(days, populations, dampings): start_year = 2019 dt = 0.1 - # get county ids - countykey_list = geoger.get_county_ids(merge_eisenach=True, zfill=True) - # Define age Groups groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] num_groups = len(groups) @@ -107,12 +104,9 @@ def run_secir_groups_simulation(days, populations, dampings): model.parameters.StartDay = ( date(start_year, start_month, start_day) - date(start_year, 1, 1)).days - # Load baseline and minimum contact matrix and assign them to the model - baseline = getBaselineMatrix() - minimum = getMinimumMatrix() - model.parameters.ContactPatterns.cont_freq_mat[0].baseline = baseline - model.parameters.ContactPatterns.cont_freq_mat[0].minimum = minimum + model.parameters.ContactPatterns.cont_freq_mat[0].minimum = np.ones( + (num_groups, num_groups)) * 0 # Generate a damping matrix and assign it to the model damped_matrices = [] @@ -135,7 +129,6 @@ def run_secir_groups_simulation(days, populations, dampings): model.apply_constraints() models.append(model) - directory = transform_mobility_directory() graph = make_graph(directory, num_regions, countykey_list, models) study = ParameterStudy(graph, 0, days, dt=dt, num_runs=1) @@ -248,6 +241,13 @@ def generate_data( "damping_day": [], "damped_matrix": []} + # get county ids + countykey_list = geoger.get_county_ids(merge_eisenach=True, zfill=True) + directory = transform_mobility_directory() + + # Load baseline matrix + baseline = getBaselineMatrix() + # show progess in terminal for longer runs # Due to the random structure, theres currently no need to shuffle the data bar = Bar('Number of Runs done', max=num_runs) @@ -255,7 +255,7 @@ def generate_data( for i in range(num_runs): data_run, damped_contact_matrix, damping_days_s, damping_factor = run_secir_groups_simulation( - days_sum, population, damping_days[i]) + days_sum, population, damping_days[i], countykey_list, countykey_list, baseline) inputs = np.asarray(data_run).transpose(1, 2, 0)[:input_width] data["inputs"].append(inputs) From f2345c710857a0785c2eeaaf5aa02b0e0d93095f Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Tue, 24 Sep 2024 15:40:54 +0200 Subject: [PATCH 28/33] add directory as parameter --- .../memilio/surrogatemodel/GNN/data_generation_withdamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py index a8485b34aa..d800bc7520 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py @@ -255,7 +255,7 @@ def generate_data( for i in range(num_runs): data_run, damped_contact_matrix, damping_days_s, damping_factor = run_secir_groups_simulation( - days_sum, population, damping_days[i], countykey_list, countykey_list, baseline) + days_sum, population, damping_days[i], countykey_list, directory, baseline) inputs = np.asarray(data_run).transpose(1, 2, 0)[:input_width] data["inputs"].append(inputs) From d26528e97633581c6e3751fe17044da603e695da Mon Sep 17 00:00:00 2001 From: Agatha Schmidt Date: Wed, 25 Sep 2024 16:16:25 +0200 Subject: [PATCH 29/33] set edges only one time --- .../memilio/surrogatemodel/GNN/GNN_utils.py | 6 +++--- .../GNN/data_generation_withdamp.py | 17 +++++++++++++---- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py index 0f66e938b6..274b310809 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py @@ -76,7 +76,7 @@ def getMinimumMatrix(): return minimum -def make_graph(directory, num_regions, countykey_list, models): +def make_graph(directory, num_regions, countykey_list, models, edges): """! @param directory Directory with mobility data. @param num_regions Number (int) of counties that should be added to the @@ -91,8 +91,8 @@ def make_graph(directory, num_regions, countykey_list, models): num_locations = 4 - set_edges(os.path.abspath(os.path.join(directory, os.pardir)), - graph, num_locations) + # set_edges(os.path.abspath(os.path.join(directory, os.pardir)), + # graph, num_locations) return graph diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py index d800bc7520..868effff97 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py @@ -11,7 +11,7 @@ from memilio.simulation import (AgeGroup, LogLevel, set_log_level, Damping) from memilio.simulation.osecir import (Index_InfectionState, interpolate_simulation_result, ParameterStudy, InfectionState, Model, - interpolate_simulation_result) + interpolate_simulation_result, ModelGraph, set_edges) from memilio.epidata import geoModificationGermany as geoger from memilio.surrogatemodel.GNN.GNN_utils import (transform_mobility_directory, @@ -20,7 +20,7 @@ getBaselineMatrix, remove_confirmed_compartments, get_population, getMinimumMatrix) -def run_secir_groups_simulation(days, populations, dampings, countykey_list, directory, baseline): +def run_secir_groups_simulation(days, populations, dampings, countykey_list, directory, baseline, edges): """! Uses an ODE SECIR model allowing for asymptomatic infection with 6 different age groups. The model is not stratified by region. Virus-specific parameters are fixed and initial number of persons @@ -129,7 +129,7 @@ def run_secir_groups_simulation(days, populations, dampings, countykey_list, dir model.apply_constraints() models.append(model) - graph = make_graph(directory, num_regions, countykey_list, models) + graph = make_graph(directory, num_regions, countykey_list, models, edges) study = ParameterStudy(graph, 0, days, dt=dt, num_runs=1) study.run() @@ -248,6 +248,15 @@ def generate_data( # Load baseline matrix baseline = getBaselineMatrix() + num_locations = 4 + graph = ModelGraph() + for i in range(len(countykey_list)): + graph.add_node(int(countykey_list[i]), Model(6)) + + set_edges(os.path.abspath(os.path.join(directory, os.pardir)), + graph, num_locations) + edges = graph.edges() + # show progess in terminal for longer runs # Due to the random structure, theres currently no need to shuffle the data bar = Bar('Number of Runs done', max=num_runs) @@ -255,7 +264,7 @@ def generate_data( for i in range(num_runs): data_run, damped_contact_matrix, damping_days_s, damping_factor = run_secir_groups_simulation( - days_sum, population, damping_days[i], countykey_list, directory, baseline) + days_sum, population, damping_days[i], countykey_list, directory, baseline, edges) inputs = np.asarray(data_run).transpose(1, 2, 0)[:input_width] data["inputs"].append(inputs) From e39fd71a98d8d7857c4603eb7c583cc45206025a Mon Sep 17 00:00:00 2001 From: HenrZu <69154294+HenrZu@users.noreply.github.com> Date: Wed, 2 Oct 2024 10:07:10 +0200 Subject: [PATCH 30/33] new structure for no damp --- .../memilio/surrogatemodel/GNN/GNN_utils.py | 6 +- .../GNN/data_generation_nodamp.py | 228 +++++++++++------- 2 files changed, 139 insertions(+), 95 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py index 274b310809..0f66e938b6 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/GNN_utils.py @@ -76,7 +76,7 @@ def getMinimumMatrix(): return minimum -def make_graph(directory, num_regions, countykey_list, models, edges): +def make_graph(directory, num_regions, countykey_list, models): """! @param directory Directory with mobility data. @param num_regions Number (int) of counties that should be added to the @@ -91,8 +91,8 @@ def make_graph(directory, num_regions, countykey_list, models, edges): num_locations = 4 - # set_edges(os.path.abspath(os.path.join(directory, os.pardir)), - # graph, num_locations) + set_edges(os.path.abspath(os.path.join(directory, os.pardir)), + graph, num_locations) return graph diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py index 0e4f7caa30..c0f84261b5 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_nodamp.py @@ -2,6 +2,9 @@ import os import pickle import random +import time +import memilio.simulation as mio +import memilio.simulation.osecir as osecir import numpy as np from progress.bar import Bar @@ -16,104 +19,147 @@ make_graph, scale_data) from memilio.surrogatemodel.utils_surrogatemodel import ( getBaselineMatrix, remove_confirmed_compartments, get_population) - - -def run_secir_groups_simulation(days, populations, countykey_list, directory, baseline): - """! Uses an ODE SECIR model allowing for asymptomatic infection with 6 - different age groups. The model is not stratified by region. - Virus-specific parameters are fixed and initial number of persons +from enum import Enum + + +class Location(Enum): + Home = 0 + School = 1 + Work = 2 + Other = 3 + + +start_date = mio.Date(2019, 1, 1) +end_date = mio.Date(2020, 12, 31) + + +def set_covid_parameters(model, num_groups=6): + for i in range(num_groups): + # Compartment transition duration + model.parameters.TimeExposed[AgeGroup(i)] = 3.2 + model.parameters.TimeInfectedNoSymptoms[AgeGroup(i)] = 2. + model.parameters.TimeInfectedSymptoms[AgeGroup(i)] = 6. + model.parameters.TimeInfectedSevere[AgeGroup(i)] = 12. + model.parameters.TimeInfectedCritical[AgeGroup(i)] = 8. + + # Compartment transition propabilities + model.parameters.RelativeTransmissionNoSymptoms[AgeGroup(i)] = 0.5 + model.parameters.TransmissionProbabilityOnContact[AgeGroup( + i)] = 0.1 + model.parameters.RecoveredPerInfectedNoSymptoms[AgeGroup(i)] = 0.09 + model.parameters.RiskOfInfectionFromSymptomatic[AgeGroup(i)] = 0.25 + model.parameters.SeverePerInfectedSymptoms[AgeGroup(i)] = 0.2 + model.parameters.CriticalPerSevere[AgeGroup(i)] = 0.25 + model.parameters.DeathsPerCritical[AgeGroup(i)] = 0.3 + model.parameters.MaxRiskOfInfectionFromSymptomatic[AgeGroup( + i)] = 0.5 + + # StartDay is the n-th day of the year + model.parameters.StartDay = start_date.day_in_year + + +def set_contact_matrices(model, data_dir, num_groups=6): + contact_matrices = mio.ContactMatrixGroup( + len(list(Location)), num_groups) + locations = ["home", "school_pf_eig", "work", "other"] + + for i, location in enumerate(locations): + baseline_file = os.path.join( + data_dir, "contacts", "baseline_" + location + ".txt") + minimum_file = os.path.join( + data_dir, "contacts", "minimum_" + location + ".txt") + contact_matrices[i] = mio.ContactMatrix( + mio.read_mobility_plain(baseline_file), + mio.read_mobility_plain(minimum_file) + ) + model.parameters.ContactPatterns.cont_freq_mat = contact_matrices + + +def get_graph(num_groups, data_dir): + model = Model(num_groups) + set_covid_parameters(model) + set_contact_matrices(model, data_dir) + + graph = osecir.ModelGraph() + + scaling_factor_infected = [2.5, 2.5, 2.5, 2.5, 2.5, 2.5] + scaling_factor_icu = 1.0 + tnt_capacity_factor = 7.5 / 100000. + + path_population_data = os.path.join( + data_dir, "pydata", "Germany", + "county_current_population.json") + + mio.osecir.set_nodes( + model.parameters, + mio.Date(start_date.year, + start_date.month, start_date.day), + mio.Date(end_date.year, + end_date.month, end_date.day), data_dir, + path_population_data, True, graph, scaling_factor_infected, + scaling_factor_icu, tnt_capacity_factor, 0, False) + + mio.osecir.set_edges( + data_dir, graph, len(Location)) + + return graph + + +def run_secir_groups_simulation(days, graph, num_groups=6): + """! Uses an ODE SECIR model allowing for asymptomatic infection with 6 + different age groups. The model is not stratified by region. + Virus-specific parameters are fixed and initial number of persons in the particular infection states are chosen randomly from defined ranges. @param Days Describes how many days we simulate within a single run. - @param populations List containing the population in each age group. - @return List containing the populations in each compartment used to initialize + @param Graph Graph initilized for the start_date with the population data which + is sampled during the run. + @return List containing the populations in each compartment used to initialize the run. """ - - set_log_level(LogLevel.Off) - - start_day = 1 - start_month = 1 - start_year = 2019 - dt = 0.1 - - # Define age Groups - groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] - num_groups = len(groups) - num_regions = len(populations) - models = [] - - # Initialize Parameters - for region in range(num_regions): - model = Model(num_groups) + for node_indx in range(graph.num_nodes): + model = graph.get_node(node_indx).property # Set parameters + # TODO: Put This in the draw_sample function in the ParameterStudy for i in range(num_groups): - # Compartment transition duration - model.parameters.TimeExposed[AgeGroup(i)] = 3.2 - model.parameters.TimeInfectedNoSymptoms[AgeGroup(i)] = 2. - model.parameters.TimeInfectedSymptoms[AgeGroup(i)] = 6. - model.parameters.TimeInfectedSevere[AgeGroup(i)] = 12. - model.parameters.TimeInfectedCritical[AgeGroup(i)] = 8. + age_group = AgeGroup(i) + pop_age_group = model.populations.get_group_total_AgeGroup( + age_group) # Initial number of people in each compartment with random numbers # Numbers are chosen heuristically based on experience - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.Exposed)] = random.uniform( - 0.00025, 0.005) * populations[region][i] - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedNoSymptoms)] = random.uniform( - 0.0001, 0.0035) * populations[region][i] - model.populations[AgeGroup(i), Index_InfectionState( + model.populations[age_group, Index_InfectionState(InfectionState.Exposed)] = random.uniform( + 0.00025, 0.005) * pop_age_group + model.populations[age_group, Index_InfectionState(InfectionState.InfectedNoSymptoms)] = random.uniform( + 0.0001, 0.0035) * pop_age_group + model.populations[age_group, Index_InfectionState( InfectionState.InfectedNoSymptomsConfirmed)] = 0 - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedSymptoms)] = random.uniform( - 0.00007, 0.001) * populations[region][i] - model.populations[AgeGroup(i), Index_InfectionState( + model.populations[age_group, Index_InfectionState(InfectionState.InfectedSymptoms)] = random.uniform( + 0.00007, 0.001) * pop_age_group + model.populations[age_group, Index_InfectionState( InfectionState.InfectedSymptomsConfirmed)] = 0 - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedSevere)] = random.uniform( - 0.00003, 0.0006) * populations[region][i] - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedCritical)] = random.uniform( - 0.00001, 0.0002) * populations[region][i] - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.Recovered)] = random.uniform( - 0.002, 0.08) * populations[region][i] - model.populations[AgeGroup(i), - Index_InfectionState(InfectionState.Dead)] = random.uniform( - 0, 0.0003) * populations[region][i] + model.populations[age_group, Index_InfectionState(InfectionState.InfectedSevere)] = random.uniform( + 0.00003, 0.0006) * pop_age_group + model.populations[age_group, Index_InfectionState(InfectionState.InfectedCritical)] = random.uniform( + 0.00001, 0.0002) * pop_age_group + model.populations[age_group, Index_InfectionState(InfectionState.Recovered)] = random.uniform( + 0.002, 0.08) * pop_age_group + model.populations[age_group, Index_InfectionState(InfectionState.Dead)] = random.uniform( + 0, 0.0003) * pop_age_group model.populations.set_difference_from_group_total_AgeGroup( - (AgeGroup(i), Index_InfectionState(InfectionState.Susceptible)), - populations[region][i]) - - # Compartment transition propabilities - model.parameters.RelativeTransmissionNoSymptoms[AgeGroup(i)] = 0.5 - model.parameters.TransmissionProbabilityOnContact[AgeGroup( - i)] = 0.1 - model.parameters.RecoveredPerInfectedNoSymptoms[AgeGroup(i)] = 0.09 - model.parameters.RiskOfInfectionFromSymptomatic[AgeGroup(i)] = 0.25 - model.parameters.SeverePerInfectedSymptoms[AgeGroup(i)] = 0.2 - model.parameters.CriticalPerSevere[AgeGroup(i)] = 0.25 - model.parameters.DeathsPerCritical[AgeGroup(i)] = 0.3 - model.parameters.MaxRiskOfInfectionFromSymptomatic[AgeGroup( - i)] = 0.5 - - # StartDay is the n-th day of the year - model.parameters.StartDay = ( - date(start_year, start_month, start_day) - date(start_year, 1, 1)).days - - model.parameters.ContactPatterns.cont_freq_mat[0].baseline = baseline - model.parameters.ContactPatterns.cont_freq_mat[0].minimum = np.ones( - (num_groups, num_groups)) * 0 + (age_group, Index_InfectionState(InfectionState.Susceptible)), + pop_age_group) # Apply mathematical constraints to parameters model.apply_constraints() - models.append(model) - graph = make_graph(directory, num_regions, countykey_list, models) + # set model to graph + graph.get_node(node_indx).property.populations = model.populations - study = ParameterStudy(graph, 0, days, dt=dt, num_runs=1) + study = ParameterStudy(graph, 0, days, dt=0.5, num_runs=1) + start_time = time.time() study.run() + print("Simulation took: ", time.time() - start_time) graph_run = study.run()[0] results = interpolate_simulation_result(graph_run) @@ -129,28 +175,24 @@ def run_secir_groups_simulation(days, populations, countykey_list, directory, ba def generate_data( - num_runs, path, input_width, days, save_data=True): + num_runs, data_dir, path, input_width, days, save_data=True): """! Generate dataset by calling run_secir_simulation (num_runs)-often @param num_runs Number of times, the function run_secir_simulation is called. + @param data_dir Directory with all data needed to initialize the models. @param path Path, where the datasets are stored. @param input_width number of time steps used for model input. @param label_width number of time steps (days) used as model output/label. @param save_data Option to deactivate the save of the dataset. Per default true. """ - - population = get_population() + set_log_level(mio.LogLevel.Error) days_sum = days + input_width - 1 data = {"inputs": [], "labels": [], } - # get county ids - countykey_list = geoger.get_county_ids(merge_eisenach=True, zfill=True) - # Load baseline and minimum contact matrix and assign them to the model - baseline = getBaselineMatrix() - # transform directiry by merging Einsenach and Wartburgkreis - directory = transform_mobility_directory() + num_groups = 6 + graph = get_graph(num_groups, data_dir) # show progess in terminal for longer runs # Due to the random structure, theres currently no need to shuffle the data @@ -159,7 +201,7 @@ def generate_data( for _ in range(num_runs): data_run = run_secir_groups_simulation( - days_sum, population, countykey_list, directory, baseline) + days_sum, graph) inputs = np.asarray(data_run).transpose(1, 2, 0)[: input_width] data["inputs"].append(inputs) @@ -198,9 +240,11 @@ def generate_data( os.path.realpath(os.path.dirname(os.path.realpath(path)))), 'data_GNN_nodamp_test') + data_dir = os.path.join(os.getcwd(), 'data') + input_width = 5 - days = 3 - num_runs = 2 + days = 30 + num_runs = 1 - generate_data(num_runs, path_data, input_width, + generate_data(num_runs, data_dir, path_data, input_width, days, save_data=True) From ff6c57149d4b579d0d59029525b5a1357e9a0073 Mon Sep 17 00:00:00 2001 From: HenrZu <69154294+HenrZu@users.noreply.github.com> Date: Wed, 2 Oct 2024 10:10:47 +0200 Subject: [PATCH 31/33] timing graph sim (Delete before Merge!) --- cpp/memilio/mobility/graph_simulation.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cpp/memilio/mobility/graph_simulation.h b/cpp/memilio/mobility/graph_simulation.h index 27f3942689..94000937f3 100644 --- a/cpp/memilio/mobility/graph_simulation.h +++ b/cpp/memilio/mobility/graph_simulation.h @@ -22,6 +22,7 @@ #include "memilio/mobility/graph.h" #include "memilio/utils/random_number_generator.h" +#include namespace mio { @@ -60,7 +61,9 @@ class GraphSimulationBase void advance(double t_max = 1.0) { - auto dt = m_dt; + auto dt = m_dt; + auto start_time = std::chrono::high_resolution_clock::now(); // Startzeit erfassen + while (m_t < t_max) { if (m_t + dt > t_max) { dt = t_max - m_t; @@ -77,6 +80,12 @@ class GraphSimulationBase m_graph.nodes()[e.end_node_idx].property); } } + + auto end_time = std::chrono::high_resolution_clock::now(); // Endzeit erfassen + std::chrono::duration execution_time = end_time - start_time; // Ausführungszeit berechnen + + std::cout << "t = " << m_t << " execution time (Graph Simulation): " << execution_time.count() << "sec" + << std::endl; } double get_t() const From ac705a46fc23671b94d2d59ab2a0d82d72a80bb4 Mon Sep 17 00:00:00 2001 From: HenrZu <69154294+HenrZu@users.noreply.github.com> Date: Wed, 2 Oct 2024 10:20:12 +0200 Subject: [PATCH 32/33] with_dampings --- .../GNN/data_generation_withdamp.py | 234 +++++++++++------- 1 file changed, 139 insertions(+), 95 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py index 868effff97..857c71f7da 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py @@ -3,7 +3,11 @@ import pickle import random import numpy as np +import time +import memilio.simulation as mio +import memilio.simulation.osecir as osecir from datetime import date +from enum import Enum from progress.bar import Bar from sklearn.preprocessing import FunctionTransformer @@ -20,93 +24,137 @@ getBaselineMatrix, remove_confirmed_compartments, get_population, getMinimumMatrix) -def run_secir_groups_simulation(days, populations, dampings, countykey_list, directory, baseline, edges): +class Location(Enum): + Home = 0 + School = 1 + Work = 2 + Other = 3 + + +start_date = mio.Date(2019, 1, 1) +end_date = mio.Date(2020, 12, 31) + + +def set_covid_parameters(model, num_groups=6): + for i in range(num_groups): + # Compartment transition duration + model.parameters.TimeExposed[AgeGroup(i)] = 3.2 + model.parameters.TimeInfectedNoSymptoms[AgeGroup(i)] = 2. + model.parameters.TimeInfectedSymptoms[AgeGroup(i)] = 6. + model.parameters.TimeInfectedSevere[AgeGroup(i)] = 12. + model.parameters.TimeInfectedCritical[AgeGroup(i)] = 8. + + # Compartment transition propabilities + model.parameters.RelativeTransmissionNoSymptoms[AgeGroup(i)] = 0.5 + model.parameters.TransmissionProbabilityOnContact[AgeGroup( + i)] = 0.1 + model.parameters.RecoveredPerInfectedNoSymptoms[AgeGroup(i)] = 0.09 + model.parameters.RiskOfInfectionFromSymptomatic[AgeGroup(i)] = 0.25 + model.parameters.SeverePerInfectedSymptoms[AgeGroup(i)] = 0.2 + model.parameters.CriticalPerSevere[AgeGroup(i)] = 0.25 + model.parameters.DeathsPerCritical[AgeGroup(i)] = 0.3 + model.parameters.MaxRiskOfInfectionFromSymptomatic[AgeGroup( + i)] = 0.5 + + # StartDay is the n-th day of the year + model.parameters.StartDay = start_date.day_in_year + + +def set_contact_matrices(model, data_dir, num_groups=6): + contact_matrices = mio.ContactMatrixGroup( + len(list(Location)), num_groups) + locations = ["home", "school_pf_eig", "work", "other"] + + for i, location in enumerate(locations): + baseline_file = os.path.join( + data_dir, "contacts", "baseline_" + location + ".txt") + minimum_file = os.path.join( + data_dir, "contacts", "minimum_" + location + ".txt") + contact_matrices[i] = mio.ContactMatrix( + mio.read_mobility_plain(baseline_file), + mio.read_mobility_plain(minimum_file) + ) + model.parameters.ContactPatterns.cont_freq_mat = contact_matrices + + +def get_graph(num_groups, data_dir): + model = Model(num_groups) + set_covid_parameters(model) + set_contact_matrices(model, data_dir) + + graph = osecir.ModelGraph() + + scaling_factor_infected = [2.5, 2.5, 2.5, 2.5, 2.5, 2.5] + scaling_factor_icu = 1.0 + tnt_capacity_factor = 7.5 / 100000. + + path_population_data = os.path.join( + data_dir, "pydata", "Germany", + "county_current_population.json") + + mio.osecir.set_nodes( + model.parameters, + mio.Date(start_date.year, + start_date.month, start_date.day), + mio.Date(end_date.year, + end_date.month, end_date.day), data_dir, + path_population_data, True, graph, scaling_factor_infected, + scaling_factor_icu, tnt_capacity_factor, 0, False) + + mio.osecir.set_edges( + data_dir, graph, len(Location)) + + return graph + + +def run_secir_groups_simulation(days, graph, dampings, num_groups=6): """! Uses an ODE SECIR model allowing for asymptomatic infection with 6 different age groups. The model is not stratified by region. Virus-specific parameters are fixed and initial number of persons in the particular infection states are chosen randomly from defined ranges. @param days (int) Describes how many days we simulate within a single run. + @param Graph Graph initilized for the start_date with the population data which + is sampled during the run. @param damping_day (int) The day when damping is applied. - @param populations List containing the population in each age group. @return List containing the populations in each compartment used to initialize the run. """ - set_log_level(LogLevel.Off) - - start_day = 1 - start_month = 1 - start_year = 2019 - dt = 0.1 - - # Define age Groups - groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] - num_groups = len(groups) - num_regions = len(populations) - models = [] - - # Initialize Parameters - for region in range(num_regions): - model = Model(num_groups) + for node_indx in range(graph.num_nodes): + model = graph.get_node(node_indx).property # Set parameters + # TODO: Put This in the draw_sample function in the ParameterStudy for i in range(num_groups): - # Compartment transition duration - model.parameters.TimeExposed[AgeGroup(i)] = 3.2 - model.parameters.TimeInfectedNoSymptoms[AgeGroup(i)] = 2. - model.parameters.TimeInfectedSymptoms[AgeGroup(i)] = 6. - model.parameters.TimeInfectedSevere[AgeGroup(i)] = 12. - model.parameters.TimeInfectedCritical[AgeGroup(i)] = 8. + age_group = AgeGroup(i) + pop_age_group = model.populations.get_group_total_AgeGroup( + age_group) # Initial number of people in each compartment with random numbers # Numbers are chosen heuristically based on experience - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.Exposed)] = random.uniform( - 0.00025, 0.005) * populations[region][i] - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedNoSymptoms)] = random.uniform( - 0.0001, 0.0035) * populations[region][i] - model.populations[AgeGroup(i), Index_InfectionState( + model.populations[age_group, Index_InfectionState(InfectionState.Exposed)] = random.uniform( + 0.00025, 0.005) * pop_age_group + model.populations[age_group, Index_InfectionState(InfectionState.InfectedNoSymptoms)] = random.uniform( + 0.0001, 0.0035) * pop_age_group + model.populations[age_group, Index_InfectionState( InfectionState.InfectedNoSymptomsConfirmed)] = 0 - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedSymptoms)] = random.uniform( - 0.00007, 0.001) * populations[region][i] - model.populations[AgeGroup(i), Index_InfectionState( + model.populations[age_group, Index_InfectionState(InfectionState.InfectedSymptoms)] = random.uniform( + 0.00007, 0.001) * pop_age_group + model.populations[age_group, Index_InfectionState( InfectionState.InfectedSymptomsConfirmed)] = 0 - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedSevere)] = random.uniform( - 0.00003, 0.0006) * populations[region][i] - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedCritical)] = random.uniform( - 0.00001, 0.0002) * populations[region][i] - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.Recovered)] = random.uniform( - 0.002, 0.08) * populations[region][i] - model.populations[AgeGroup(i), - Index_InfectionState(InfectionState.Dead)] = random.uniform( - 0, 0.0003) * populations[region][i] + model.populations[age_group, Index_InfectionState(InfectionState.InfectedSevere)] = random.uniform( + 0.00003, 0.0006) * pop_age_group + model.populations[age_group, Index_InfectionState(InfectionState.InfectedCritical)] = random.uniform( + 0.00001, 0.0002) * pop_age_group + model.populations[age_group, Index_InfectionState(InfectionState.Recovered)] = random.uniform( + 0.002, 0.08) * pop_age_group + model.populations[age_group, Index_InfectionState(InfectionState.Dead)] = random.uniform( + 0, 0.0003) * pop_age_group model.populations.set_difference_from_group_total_AgeGroup( - (AgeGroup(i), Index_InfectionState(InfectionState.Susceptible)), populations[region][i]) - - # Compartment transition propabilities - model.parameters.RelativeTransmissionNoSymptoms[AgeGroup(i)] = 0.5 - model.parameters.TransmissionProbabilityOnContact[AgeGroup( - i)] = 0.1 - model.parameters.RecoveredPerInfectedNoSymptoms[AgeGroup(i)] = 0.09 - model.parameters.RiskOfInfectionFromSymptomatic[AgeGroup(i)] = 0.25 - model.parameters.SeverePerInfectedSymptoms[AgeGroup(i)] = 0.2 - model.parameters.CriticalPerSevere[AgeGroup(i)] = 0.25 - model.parameters.DeathsPerCritical[AgeGroup(i)] = 0.3 - # twice the value of RiskOfInfectionFromSymptomatic - model.parameters.MaxRiskOfInfectionFromSymptomatic[AgeGroup( - i)] = 0.5 - - # StartDay is the n-th day of the year - model.parameters.StartDay = ( - date(start_year, start_month, start_day) - date(start_year, 1, 1)).days - - model.parameters.ContactPatterns.cont_freq_mat[0].baseline = baseline - model.parameters.ContactPatterns.cont_freq_mat[0].minimum = np.ones( - (num_groups, num_groups)) * 0 + (age_group, Index_InfectionState(InfectionState.Susceptible)), + pop_age_group) + + # Apply mathematical constraints to parameters + model.apply_constraints() # Generate a damping matrix and assign it to the model damped_matrices = [] @@ -127,12 +175,13 @@ def run_secir_groups_simulation(days, populations, dampings, countykey_list, dir # Apply mathematical constraints to parameters model.apply_constraints() - models.append(model) + # set model to graph + graph.get_node(node_indx).property.populations = model.populations - graph = make_graph(directory, num_regions, countykey_list, models, edges) - - study = ParameterStudy(graph, 0, days, dt=dt, num_runs=1) + study = ParameterStudy(graph, 0, days, dt=0.5, num_runs=1) + start_time = time.time() study.run() + print("Simulation took: ", time.time() - start_time) graph_run = study.run()[0] results = interpolate_simulation_result(graph_run) @@ -209,18 +258,22 @@ def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min def generate_data( - num_runs, path, input_width, label_width, number_of_dampings, save_data=True): + num_runs, data_dir, path, input_width, label_width, number_of_dampings, save_data=True): """! Generate dataset by calling run_secir_simulation (num_runs)-often @param num_runs Number of times, the function run_secir_simulation is called. + @param data_dir Directory with all data needed to initialize the models. @param path Path, where the datasets are stored. @param input_width Number of time steps used for model input. @param label_width Number of time steps (days) used as model output/label. @param number_of_dampings (int) The number of contact change points applied to the simulation. @param save_data Option to deactivate the save of the dataset. Per default true. """ - population = get_population() + set_log_level(mio.LogLevel.Error) days_sum = label_width+input_width-1 + num_groups = 6 + graph = get_graph(num_groups, data_dir) + # generate dampings damping_days = generate_dampings_withshadowdamp( number_of_dampings=number_of_dampings, days=label_width, @@ -241,30 +294,19 @@ def generate_data( "damping_day": [], "damped_matrix": []} - # get county ids - countykey_list = geoger.get_county_ids(merge_eisenach=True, zfill=True) - directory = transform_mobility_directory() - - # Load baseline matrix - baseline = getBaselineMatrix() - - num_locations = 4 - graph = ModelGraph() - for i in range(len(countykey_list)): - graph.add_node(int(countykey_list[i]), Model(6)) - - set_edges(os.path.abspath(os.path.join(directory, os.pardir)), - graph, num_locations) - edges = graph.edges() - # show progess in terminal for longer runs # Due to the random structure, theres currently no need to shuffle the data bar = Bar('Number of Runs done', max=num_runs) + model_params = graph.get_node(0).property.parameters + for i in range(num_runs): + # reset contact matrix in each node + for node_indx in range(graph.num_nodes): + graph.get_node(node_indx).property.parameters = model_params data_run, damped_contact_matrix, damping_days_s, damping_factor = run_secir_groups_simulation( - days_sum, population, damping_days[i], countykey_list, directory, baseline, edges) + days_sum, graph, damping_days[i]) inputs = np.asarray(data_run).transpose(1, 2, 0)[:input_width] data["inputs"].append(inputs) @@ -301,14 +343,16 @@ def generate_data( if __name__ == "__main__": input_width = 5 - label_width = 30 + label_width = 95 number_of_dampings = 1 - num_runs = 2 + num_runs = 1 path = os.path.dirname(os.path.realpath(__file__)) path_data = os.path.join( os.path.dirname( os.path.realpath(os.path.dirname(os.path.realpath(path)))), 'data_GNN_with_'+str(number_of_dampings)+'_dampings_test') - generate_data(num_runs, path_data, input_width, + data_dir = os.path.join(os.getcwd(), 'data') + + generate_data(num_runs, data_dir, path_data, input_width, label_width, number_of_dampings, save_data=False) From 10d5a9822e15bb279af2484c2484ad4f9421a814 Mon Sep 17 00:00:00 2001 From: HenrZu <69154294+HenrZu@users.noreply.github.com> Date: Wed, 2 Oct 2024 11:15:26 +0200 Subject: [PATCH 33/33] [ci skip] damping correctly setted and reseted after each run --- .../surrogatemodel/GNN/data_generation_withdamp.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py index 857c71f7da..1bf7857461 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/GNN/data_generation_withdamp.py @@ -157,6 +157,7 @@ def run_secir_groups_simulation(days, graph, dampings, num_groups=6): model.apply_constraints() # Generate a damping matrix and assign it to the model + # TODO: This can be done outside and is (currently) static for all models damped_matrices = [] damping_coeff = [] for day in dampings: @@ -177,6 +178,7 @@ def run_secir_groups_simulation(days, graph, dampings, num_groups=6): model.apply_constraints() # set model to graph graph.get_node(node_indx).property.populations = model.populations + graph.get_node(node_indx).property.parameters = model.parameters study = ParameterStudy(graph, 0, days, dt=0.5, num_runs=1) start_time = time.time() @@ -298,13 +300,13 @@ def generate_data( # Due to the random structure, theres currently no need to shuffle the data bar = Bar('Number of Runs done', max=num_runs) - model_params = graph.get_node(0).property.parameters + model_params = copy.deepcopy(graph.get_node(0).property.parameters) for i in range(num_runs): - + params_run = copy.deepcopy(model_params) # reset contact matrix in each node for node_indx in range(graph.num_nodes): - graph.get_node(node_indx).property.parameters = model_params + graph.get_node(node_indx).property.parameters = params_run data_run, damped_contact_matrix, damping_days_s, damping_factor = run_secir_groups_simulation( days_sum, graph, damping_days[i]) @@ -344,8 +346,8 @@ def generate_data( input_width = 5 label_width = 95 - number_of_dampings = 1 - num_runs = 1 + number_of_dampings = 3 + num_runs = 5 path = os.path.dirname(os.path.realpath(__file__)) path_data = os.path.join( os.path.dirname(