From 3c22df283ed71a6e6e49e6682845ff4214357ed4 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 11 Jun 2025 07:39:21 +0200 Subject: [PATCH 01/49] Adding different methods for generating dampings --- .../ode_secir_groups/dampings.py | 277 ++++++++++++++++++ 1 file changed, 277 insertions(+) create mode 100644 pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py new file mode 100644 index 0000000000..06509e802e --- /dev/null +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py @@ -0,0 +1,277 @@ +import random +import tensorflow as tf +import numpy as np + + +def generate_dampings(days, max_number_damping, method): + """ + Producing dampings for timeseries of length days according to the used method. + + :param days: Number of days per time series + :param max_number_damping: maximal possible number of active dampings, actual number of active dampings can be smaller. + :param method: Method used to generate the dampings, possible values "classic", "active", "random" + :returns: two lists containing the damping days and the associated damping factors + """ + if method == "classic": + damp_days, damp_factors = dampings_classic(days, max_number_damping) + elif method == "active": + damp_days, damp_factors = dampings_active(days, max_number_damping) + elif method == "random": + damp_days, damp_factors = dampings_random(days, max_number_damping) + else: + raise ValueError( + "The method argument has to be one of the following: 'classic', 'active' or 'random'.") + + return damp_days, damp_factors + + +# Active Damping +def dampings_active(days, max_number_damping): + """" + Generating list of damping days and corresponding damping factors using the active method. + + The damping days are created with equal distance on the interval [1, days-3]. + + :param days: Number of simulated days + :param max_number_damping: Maximal number of dampings per simulation + :returns: list of days and damping factors + """ + if max_number_damping > days-3: + raise ValueError( + "Number of dampings must be smaller than total number of days!" + ) + + # Setting parameters + gamma_pos = 0 + alpha = -1 + p0 = 0.5 + t1_max = -0.3 + t1_min = -1.2 + t2_max = 2 + t2_min = -0.5 + + # Defining possible damping days + distance_between_days = np.floor((days-3)/max_number_damping) + damp_days = [distance_between_days*(i+1) + for i in np.arange(max_number_damping)] + + # Generate damping factors + dampings = calc_factors_active( + max_number_damping, gamma_pos, alpha, p0, t1_max, t1_min, t2_max, t2_min) + + return damp_days, dampings + + +def calc_factors_active(n_ddays, gamma_pos=0, alpha=-1, p0=0.5, t1_max=-0.3, t1_min=-1.2, t2_max=2, t2_min=-0.5): + ''' + Producing damping factors using active damping method. + + The idea is the following: Damping factors are produced randomly until a threshold value is achieved. + In this case the factors are reduced stepwise until a moderate level is reached. + + :param n_ddays: Number of damping days in time series + :param gamma_pos: upper bound for h value + :param alpha: upper bound for h value, where active damping should be stopped + :param p0: probability for a change of the damping factor + :param t1_max: upper end point for size of active damping changes + :param t1_min: lower end point for size of active damping changes + :param t2_max: upper end point for size of active damping changes + :param t2_min: lower end point for size of base damping changes + :param t3_max: upper end + ''' + h = 0 + k = 0 + dampings = [] + + while k < n_ddays: + # If the threshold value is reached, active damping is started + if h > gamma_pos: + # active reducing the damping factor + while h > alpha and k < n_ddays: + delta_h = np.random.uniform(t1_min, t1_max) + h = h + delta_h + dampings.append(1-np.exp(h)) + k = k+1 + # otherwise changes of the damping factor are generated randomly + else: + # Whether or not a non-trivial change of the damping factor is applied + if np.random.binomial(1, p0): + delta_h = np.random.uniform(t2_min, t2_max) + + else: + delta_h = 0 + h = h+delta_h + dampings.append(1 - np.exp(h)) + k = k+1 + + return dampings + + +# Classic Damping +def dampings_classic(days, max_number_damping): + """ + Generate the damping days using shadow damping and picking days uniformly with a given minimal distance. + + The corresponding factors are drawn uniformly from the interval (0,0.5) + + :param days: Number of days simulated per run. + :param max_number_damping: Number of damping days generated. + :returns: Two lists of length max_number_dampings containing the days and the factors. + """ + # Generating damping days + # Setting parameters + min_distance = 2 + min_damping_day = 2 + + if min_distance*max_number_damping+min_damping_day > days: + raise ValueError("Invalid input: It's not possible to generate this number of damping" + "in the desired time interval.") + damp_days = generate_dampings_withshadowdamp( + max_number_damping, days, min_distance, min_damping_day, 1)[0] + + # Generating damping factors + damp_factors = np.random.uniform(0, 0.5, max_number_damping).tolist() + + return damp_days, damp_factors + + +def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min_damping_day, n_runs): + """ + Sampling the damping days according to the established method. + + The idea is to draw dampings with a minimum distance while traying to keep + the distribution of damping days uniformly. We create a list of all possible days, + draw one damping day and delete all days before and after the damping that + are within the range of the min_distance. To ensure that the the data is not biased, + we include days outside the usual range. A day x in the middle of the list can + be removed from the list by a drawn day before and after x. A day in the beggining + of the list can be removed only by drawn days y , y>x. This leads to the effect that + the first and last days are chosen more often. By drawing days ouside of the allowed range + (forbidden dampings) which are removed after, we ensure that also the days at the beginning and + end of the list can be removed from the list because of the minimum distance. + + :param number_of_dampings: Number of damping days per run + :param days: Total number of days per run + :param min_distance: Minimal distance between two damping days + :param min_damping_day: First day when a damping can be applied + :n_runs: Number of runs for which damping days should be generated + :returns: list of list of damping days. + """ + + number_of_dampings = number_of_dampings + days = days + min_distance = min_distance + min_damping_day = min_damping_day + number_of_runs = n_runs + + all_dampings = [] + count_runs = 0 + count_shadow = 0 + while len(all_dampings) < number_of_runs: + # Reset the days list and dampings for each run + days_list = list(range(min_damping_day, days)) + dampings = [] + + if count_shadow < 2: + for _ in range(number_of_dampings): + if len(days_list) > 0: + damp = random.choice(days_list) + days_before = list(range(damp - min_distance, damp)) + days_after = list(range(damp, damp + min_distance + 1)) + dampings.append(damp) + days_list = [ele for ele in days_list if ele not in ( + days_before + days_after)] + else: + # Restart the process when days_list is empty + break + else: + # Exit loop only if dampings were successfully drawn + forbidden_damping_values = list( + range(0 - min_distance, 0)) + list(range(days + 1, days + min_distance + 1)) + dampings = [ + ele for ele in dampings if ele not in forbidden_damping_values] + if len(dampings) >= number_of_dampings: + all_dampings.append(sorted(dampings)) + continue + else: + # Generate forbidden damping + damp = random.choice( + list(range(0 - min_distance, 0)) + + list(range(days + 1, days + min_distance + 1)) + ) + dampings.append(damp) + for _ in range(number_of_dampings): + if len(days_list) > 0: + damp = random.choice(days_list) + days_before = list(range(damp - min_distance, damp)) + days_after = list(range(damp, damp + min_distance + 1)) + dampings.append(damp) + days_list = [ele for ele in days_list if ele not in ( + days_before + days_after)] + else: + # Restart the process when days_list is empty + break + else: + # Reset shadow count only if dampings were successfully drawn + count_shadow = 0 + forbidden_damping_values = list( + range(0 - min_distance, 0)) + list(range(days + 1, days + min_distance + 1)) + dampings = [ + ele for ele in dampings if ele not in forbidden_damping_values] + if len(dampings) >= number_of_dampings: + all_dampings.append(sorted(dampings)) + continue + + # Restart process if any issue occurred + count_runs += 1 + count_shadow += 1 + + return all_dampings + + +# Random Damping + +def dampings_random(days, max_number_damping): + """ + Generate dampings according to an easy random rule. + + The days are drawn using geometrical distributed waiting times and fixed minimal distance between two damping days. + The factors are drwan uniformly on the interval (0,0.5) + + + :param days: Number of days simulated per run. + :param max_number_damping: Number of damping days generated. + :returns: Two lists of length max_number_dampings containing the days and the factors. + """ + # Setting parameters + min_damping_day = 2 + min_distance_damping_day = 2 + + # Calculating the expected distance between two dampings + distance_between_days = np.floor((days-min_damping_day)/max_number_damping) + # Reducing due to minimal distance restriction + reduced_distance = distance_between_days - min_distance_damping_day + + if reduced_distance < 0: + raise ValueError("Invalid input: It's not possible to generate this number of damping" + "in the desired time interval.") + + # Try till one admissible configuration of waiting times is produced + running = True + while running: + dist = np.random.geometric(1/reduced_distance, max_number_damping) + if np.sum(dist) + min_damping_day + max_number_damping*min_distance_damping_day < days: + running = False + + # Reconstructing the days using the waiting times + ddays = [] + day = min_damping_day + for k in np.arange(len(dist)): + day = day + dist[k] + ddays.append(day) + day = day + min_distance_damping_day + + # Generating the associated damping factors + damping_factors = np.random.uniform(0, 0.5, max_number_damping) + + return ddays, damping_factors From 9a1b6ccd245e392623f30869f44bd079ed963ab2 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 11 Jun 2025 09:04:10 +0200 Subject: [PATCH 02/49] [ci skip] included multiple dampings --- .../ode_secir_groups/data_generation.py | 82 ++++++++++++------- 1 file changed, 52 insertions(+), 30 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py index 418e6276ca..fe65e6f586 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py @@ -33,6 +33,7 @@ from memilio.simulation.osecir import (Index_InfectionState, InfectionState, Model, interpolate_simulation_result, simulate) +import memilio.surrogatemodel.ode_secir_groups.dampings as dampings def interpolate_age_groups(data_entry): @@ -77,21 +78,29 @@ def transform_data(data, transformer, num_runs): :returns: Transformed data. """ - data = np.asarray(data).transpose(2, 0, 1).reshape(48, -1) + num_groups = 6 + num_compartments = 8 + + data = np.asarray(data).transpose(2, 0, 1).reshape( + num_groups*num_compartments, -1) scaled_data = transformer.transform(data) - return tf.convert_to_tensor(scaled_data.transpose().reshape(num_runs, -1, 48)) + return tf.convert_to_tensor(scaled_data.transpose().reshape(num_runs, -1, num_groups*num_compartments)) -def run_secir_groups_simulation(days, damping_day, populations): +def run_secir_groups_simulation(days, damping_days, damping_factors, populations): """ Uses an ODE SECIR model allowing for asymptomatic infection with 6 different age groups. The model is not stratified by region. Virus-specific parameters are fixed and initial number of persons in the particular infection states are chosen randomly from defined ranges. :param days: Describes how many days we simulate within a single run. - :param damping_day: The day when damping is applied. + :param damping_days: The days when damping is applied. + :param damping_factors: damping factors associated to the damping days. :param populations: List containing the population in each age group. :returns: List containing the populations in each compartment used to initialize the run. """ + if len(damping_days) != len(damping_factors): + raise ValueError("Length of damping_days and damping_factors differ!") + set_log_level(LogLevel.Off) start_day = 1 @@ -166,14 +175,16 @@ def run_secir_groups_simulation(days, damping_day, populations): model.parameters.ContactPatterns.cont_freq_mat[0].minimum = minimum # Generate a damping matrix and assign it to the model - damping = np.ones((num_groups, num_groups) - ) * np.float16(random.uniform(0, 0.5)) - - model.parameters.ContactPatterns.cont_freq_mat.add_damping(Damping( - coeffs=(damping), t=damping_day, level=0, type=0)) + damped_matrices = [] - damped_contact_matrix = model.parameters.ContactPatterns.cont_freq_mat.get_matrix_at( - damping_day+1) + for i in np.arange(len(damping_days)): + damping = damping = np.ones((num_groups, num_groups) + ) * damping_factors[i] + day = damping_days[i] + model.parameters.ContactPatterns.cont_freq_mat.add_damping(Damping( + coeffs=(damping), t=day, level=0, type=0)) + damped_matrices.append(model.parameters.ContactPatterns.cont_freq_mat.get_matrix_at( + day+1)) # Apply mathematical constraints to parameters model.apply_constraints() @@ -184,19 +195,19 @@ def run_secir_groups_simulation(days, damping_day, populations): # Interpolate simulation result on days time scale result = interpolate_simulation_result(result) + # Omit first column, as the time points are not of interest here. result_array = remove_confirmed_compartments( np.transpose(result.as_ndarray()[1:, :])) - # Omit first column, as the time points are not of interest here. dataset_entries = copy.deepcopy(result_array) - return dataset_entries.tolist(), damped_contact_matrix + return dataset_entries.tolist(), damped_matrices def generate_data( num_runs, path_out, path_population, input_width, label_width, - normalize=True, save_data=True): - """ Generate data sets of num_runs many equation-based model simulations and transforms the computed results by a log(1+x) transformation. + normalize=True, save_data=True, damping_method="active", max_number_damping=5): + """ Generate data sets of num_runs many equation-based model simulations and possibly transforms the computed results by a log(1+x) transformation. Divides the results in input and label data sets and returns them as a dictionary of two TensorFlow Stacks. In general, we have 8 different compartments and 6 age groups. If we choose, input_width = 5 and label_width = 20, the dataset has @@ -210,14 +221,17 @@ def generate_data( :param label_width: Int value that defines the size of the labels. :param normalize: Default: true Option to transform dataset by logarithmic normalization. :param save_data: Default: true Option to save the dataset. + :param damping_method: String specifying the damping method, that should be used. Possible values "classic", "active", "random". + :param max_number_damping: Maximal number of possible dampings. :returns: Data dictionary of input and label data sets. """ data = { "inputs": [], "labels": [], - "contact_matrix": [], - "damping_day": [] + "contact_matrices": [], + "damping_factors": [], + "damping_days": [] } # The number of days is the same as the sum of input and label width. @@ -232,16 +246,17 @@ def generate_data( bar = Bar('Number of Runs done', max=num_runs) for _ in range(0, num_runs): - # Generate a random damping day - damping_day = random.randrange( - input_width, input_width+label_width) + # Generate random damping days + damping_days, damping_factors = dampings.generate_dampings( + days, max_number_damping, method=damping_method) - data_run, damped_contact_matrix = run_secir_groups_simulation( - days, damping_day, population[random.randint(0, len(population) - 1)]) + data_run, damped_matrices = run_secir_groups_simulation( + days, damping_days, damping_factors, population[random.randint(0, len(population) - 1)]) data['inputs'].append(data_run[:input_width]) data['labels'].append(data_run[input_width:]) - data['contact_matrix'].append(np.array(damped_contact_matrix)) - data['damping_day'].append([damping_day]) + data['contact_matrices'].append(damped_matrices) + data['damping_factors'].append(damping_factors) + data['damping_days'].append(damping_days) bar.next() bar.finish() @@ -261,8 +276,15 @@ def generate_data( if not os.path.isdir(path_out): os.mkdir(path_out) - # save dict to json file - with open(os.path.join(path_out, 'data_secir_groups.pickle'), 'wb') as f: + # save dict to pickle file + if num_runs < 1000: + filename = 'data_secir_groups_%ddays_%d_' % ( + label_width, num_runs) + damping_method+'.pickle' + else: + filename = 'data_secir_groups_%ddays_%dk_' % ( + label_width, num_runs//1000) + damping_method+'.pickle' + + with open(os.path.join(path_out, filename), 'wb') as f: pickle.dump(data, f) return data @@ -291,13 +313,13 @@ def getMinimumMatrix(): """ loads the minimum matrix""" minimum_contact_matrix0 = os.path.join( - "./data/contacts/minimum_home.txt") + "./data/Germany/contacts/minimum_home.txt") minimum_contact_matrix1 = os.path.join( - "./data/contacts/minimum_school_pf_eig.txt") + "./data/Germany/contacts/minimum_school_pf_eig.txt") minimum_contact_matrix2 = os.path.join( - "./data/contacts/minimum_work.txt") + "./data/Germany/contacts/minimum_work.txt") minimum_contact_matrix3 = os.path.join( - "./data/contacts/minimum_other.txt") + "./data/Germany/contacts/minimum_other.txt") minimum = np.loadtxt(minimum_contact_matrix0) \ + np.loadtxt(minimum_contact_matrix1) + \ From 26caf61e32559be61635cfd82c51278611c1c262 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 11 Jun 2025 10:29:52 +0200 Subject: [PATCH 03/49] Adjusting data preparation to handle multiple dampings --- .../surrogatemodel/ode_secir_groups/model.py | 91 +++++++++---------- 1 file changed, 41 insertions(+), 50 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py index 221291f36f..f48ddfa63e 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py @@ -38,10 +38,9 @@ def plot_compartment_prediction_model( :param inputs: test inputs for model prediction. :param labels: test labels. :param modeltype: type of model. Can be 'classic' or 'timeseries' - :param model: trained model. (Default value = None) - :param plot_col: string name of compartment to be plotted. - :param max_subplots: Number of the simulation runs to be plotted and compared against. (Default value = 8) + :param model: trained model. (Default value = None) :param plot_compartment: (Default value = 'InfectedSymptoms') + :param max_subplots: Number of the simulation runs to be plotted and compared against. (Default value = 8) :returns: No return """ num_groups = 6 @@ -53,6 +52,9 @@ def plot_compartment_prediction_model( (inputs.shape[1] - (1 + num_groups * num_groups)) / (num_groups * num_compartments)) elif modeltype == 'timeseries': input_width = int(inputs.shape[1]) + else: + ValueError("Modeltype "+modeltype + " not known.") + label_width = int(labels.shape[1]) plt.figure(figsize=(12, 8)) @@ -178,7 +180,7 @@ def prepare_data_classic(data): """ Transforming data to be processable by "classic" network, simply flattening and concatenating for each data instance. - :param data: dictionary produces by data_generation + :param data: dictionary produced by data_generation :returns: dictionary with entries { "train_inputs", "train_labels", "valid_inputs", "valid_labels", "test_inputs", "test_labels"} @@ -192,35 +194,20 @@ def prepare_data_classic(data): # Flattening all inputs inputs = flat_input(data["inputs"]) labels = data["labels"] - cmatrix = flat_input(data["contact_matrix"]) - dampdays = data["damping_day"] + cmatrices = flat_input(data["contact_matrices"]) + dampdays = flat_input(data["damping_days"]) + + aggregated_inputs = tf.concat( + [tf.cast(inputs, tf.float32), + tf.cast(cmatrices, tf.float32), + tf.cast(dampdays, tf.float32)], + axis=1, name='concat') # Splitting the data - compinputs_train, compinputs_valid, compinputs_test = tf.split( - inputs, split_indices, 0) labels_train, labels_valid, labels_test = tf.split( labels, split_indices, 0) - cmatrix_train, cmatrix_valid, cmatrix_test = tf.split( - cmatrix, split_indices, 0) - dampdays_train, dampdays_valid, dampdays_test = tf.split( - dampdays, split_indices, 0) - - # Combining the ingredients to one input object - inputs_train = tf.concat( - [tf.cast(compinputs_train, tf.float32), - tf.cast(cmatrix_train, tf.float32), - tf.cast(dampdays_train, tf.float32)], - axis=1, name='concat') - inputs_valid = tf.concat( - [tf.cast(compinputs_valid, tf.float32), - tf.cast(cmatrix_valid, tf.float32), - tf.cast(dampdays_valid, tf.float32)], - axis=1, name='concat') - inputs_test = tf.concat( - [tf.cast(compinputs_test, tf.float32), - tf.cast(cmatrix_test, tf.float32), - tf.cast(dampdays_test, tf.float32)], - axis=1, name='concat') + inputs_train, inputs_valid, inputs_test = tf.split( + aggregated_inputs, split_indices, 0) return { "train_inputs": inputs_train, @@ -236,7 +223,7 @@ def prod_time_series(obj, n, length_input): """ Repeating static informations to fit into a time series framework - :param obj: an array of objects, which should be repeated + :param obj: an array of objects of shape (n, shape_rest), which should be repeated :param n: total number of samples :param length_input: number of days observed per input :returns: a tensor of shape [n, length_input, -1], where for each sample the static object is repeated length_input times @@ -265,16 +252,17 @@ def prepare_data_timeseries(data): n = data["inputs"].shape[0] # number of days per input sample - input_width = 5 + input_width = data["inputs"][0].shape[0] # Reshaping the matrix input - cmatrix = flat_input(tf.stack(data["contact_matrix"])) + cmatrices = flat_input(tf.stack(data["contact_matrices"])) + dampdays = flat_input(tf.stack(data["damping_days"])) - # Repeat data (contact matrix and dampinhg day) to produce time series - cmatrix_repeated = prod_time_series(cmatrix, n, input_width) - dampdays_repeated = prod_time_series(data["damping_day"], n, input_width) + # Repeat data (contact matrix and damping day) to produce time series + cmatrices_repeated = prod_time_series(cmatrices, n, input_width) + dampdays_repeated = prod_time_series(dampdays, n, input_width) - # Calculate split inidces + # Calculate split indices split_indices = calc_split_index(n) # Splitting the data @@ -282,26 +270,26 @@ def prepare_data_timeseries(data): data["inputs"], split_indices, 0) labels_train, labels_valid, labels_test = tf.split( data["labels"], split_indices, 0) - cmatrix_train, cmatrix_valid, cmatrix_test = tf.split( - cmatrix_repeated, split_indices, 0) + cmatrices_train, cmatrices_valid, cmatrices_test = tf.split( + cmatrices_repeated, split_indices, 0) dampdays_train, dampdays_valid, dampdays_test = tf.split( dampdays_repeated, split_indices, 0) # Combining the ingredients to one input object inputs_train = tf.concat( - [tf.cast(compinputs_train, tf.float16), - tf.cast(cmatrix_train, tf.float16), - tf.cast(dampdays_train, tf.float16)], + [tf.cast(compinputs_train, tf.float32), + tf.cast(cmatrices_train, tf.float32), + tf.cast(dampdays_train, tf.float32)], axis=2, name='concat') inputs_valid = tf.concat( - [tf.cast(compinputs_valid, tf.float16), - tf.cast(cmatrix_valid, tf.float16), - tf.cast(dampdays_valid, tf.float16)], + [tf.cast(compinputs_valid, tf.float32), + tf.cast(cmatrices_valid, tf.float32), + tf.cast(dampdays_valid, tf.float32)], axis=2, name='concat') inputs_test = tf.concat( - [tf.cast(compinputs_test, tf.float16), - tf.cast(cmatrix_test, tf.float16), - tf.cast(dampdays_test, tf.float16)], + [tf.cast(compinputs_test, tf.float32), + tf.cast(cmatrices_test, tf.float32), + tf.cast(dampdays_test, tf.float32)], axis=2, name='concat') return { @@ -362,7 +350,7 @@ def initialize_model(parameters): def network_fit( - model, modeltype, training_parameter, path, filename='data_secir_groups_30days_Germany_10k_damp.pickle', plot=True): + model, modeltype, training_parameter, path, filename='data_secir_groups_30days_10k_active.pickle', plot_stats=True): """ Training and evaluation of a given model with mean squared error loss and Adam optimizer using the mean absolute error as a metric. :param model: Keras sequential model. @@ -372,7 +360,7 @@ def network_fit( metrics is a list of used training metrics, e.g. [tf.keras.metrics.MeanAbsoluteError(), tf.keras.metrics.MeanAbsolutePercentageError()] :param path: path of the dataset. :param filename: name of the file containing the data - :param plot: (Default value = True) + :param plot_stats: (Default value = True) :returns: training history as returned by the keras fit() method. """ # Unpacking training parameters @@ -393,6 +381,9 @@ def network_fit( elif modeltype == 'timeseries': data_prep = prepare_data_timeseries(data) + else: + raise ValueError("modeltype must be either classic or timeseries!") + # Setting up the training parameters batch_size = 32 early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', @@ -412,7 +403,7 @@ def network_fit( batch_size=batch_size, callbacks=[early_stopping]) - if (plot): + if (plot_stats): plot_losses(history) plot_compartment_prediction_model( test_inputs, test_labels, modeltype, model=model, From 3c93f21b0abdd074c3104637961c7e5b97ad6e56 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 11 Jun 2025 10:34:59 +0200 Subject: [PATCH 04/49] Updating tests due to changed functionality --- .../test_surrogatemodel_ode_secir_groups.py | 181 ++++++++++++------ 1 file changed, 125 insertions(+), 56 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 937e9a4efd..10e960cd28 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -20,7 +20,7 @@ from pyfakefs import fake_filesystem_unittest from memilio.surrogatemodel.ode_secir_groups import (data_generation, model, - network_architectures) + network_architectures, dampings) from unittest.mock import patch import os import unittest @@ -43,31 +43,89 @@ def setUp(self): """ """ self.setUpPyfakefs() + def test_dampings_active(self): + # length of generated sequence + factors1 = dampings.calc_factors_active(10) + factors2 = dampings.calc_factors_active(18) + + self.assertEqual(len(factors1), 10) + self.assertEqual(len(factors2), 18) + + with self.assertRaises(ValueError) as error: + dampings.dampings_active(10, 200) + error_message = "Number of dampings must be smaller than total number of days!" + self.assertEqual(str(error.exception), error_message) + + # Combine days and factors + days1, factors1 = dampings.dampings_active(30, 5) + days2, factors2 = dampings.dampings_active(40, 10) + + self.assertEqual(len(days1), 5) + self.assertEqual(len(factors1), len(days1)) + self.assertEqual(len(days2), 10) + self.assertEqual(len(days2), len(factors2)) + + def test_dampings_classic(self): + days1 = dampings.generate_dampings_withshadowdamp(4, 30, 3, 4, 1) + days2 = dampings.generate_dampings_withshadowdamp(10, 90, 2, 10, 1) + self.assertEqual(len(days1[0]), 4) + self.assertEqual(len(days2[0]), 10) + + with self.assertRaises(ValueError) as error: + dampings.dampings_classic(10, 200) + error_message = "Invalid input: It's not possible to generate this number of damping" \ + "in the desired time interval." + self.assertEqual(str(error.exception), error_message) + + # Combine days and factors + days1, factors1 = dampings.dampings_classic(30, 5) + days2, factors2 = dampings.dampings_classic(40, 5) + + self.assertEqual(len(days1), 5) + self.assertEqual(len(factors1), len(days1)) + self.assertEqual(len(days2), 5) + self.assertEqual(len(days2), len(factors2)) + + def test_dampings_random(self): + with self.assertRaises(ValueError) as error: + dampings.dampings_random(10, 200) + error_message = "Invalid input: It's not possible to generate this number of damping" \ + "in the desired time interval." + self.assertEqual(str(error.exception), error_message) + + days1, factors1 = dampings.dampings_random(30, 5) + days2, factors2 = dampings.dampings_random(40, 5) + + self.assertEqual(len(days1), 5) + self.assertEqual(len(factors1), len(days1)) + self.assertEqual(len(days2), 5) + self.assertEqual(len(days2), len(factors2)) + @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getMinimumMatrix', return_value=0.1 * np.ones((6, 6))) @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getBaselineMatrix', return_value=0.6 * np.ones((6, 6))) def test_simulation_run(self, mock_baseline, mock_minimum): """ - :param mock_baseline: :param mock_minimum: - """ + days_1 = 10 days_2 = 30 days_3 = 50 - damping_date = 5 + damping_days = [3, 5] + damping_factors = [0.3, 0.4] population = [5256.0, 10551, 32368.5, 43637.833333333336, 22874.066666666666, 8473.6] simulation_1 = data_generation.run_secir_groups_simulation( - days_1, damping_date, population) + days_1, damping_days, damping_factors, population) simulation_2 = data_generation.run_secir_groups_simulation( - days_2, damping_date, population) + days_2, damping_days, damping_factors, population) simulation_3 = data_generation.run_secir_groups_simulation( - days_3, damping_date, population) + days_3, damping_days, damping_factors, population) # result length self.assertEqual(len(simulation_1[0]), days_1+1) @@ -76,23 +134,23 @@ def test_simulation_run(self, mock_baseline, mock_minimum): # damping self.assertEqual( - simulation_1[1].size, + simulation_1[1][0].size, len(population) * len(population)) self.assertEqual( - simulation_2[1].size, + simulation_2[1][0].size, len(population) * len(population)) self.assertEqual( - simulation_3[1].size, + simulation_3[1][0].size, len(population) * len(population)) - self.assertLessEqual(simulation_1[1].max(), 1) - self.assertGreaterEqual(simulation_1[1].max(), 0) + self.assertLessEqual(simulation_1[1][0].max(), 1) + self.assertGreaterEqual(simulation_1[1][0].max(), 0) - self.assertLessEqual(simulation_2[1].max(), 1) - self.assertGreaterEqual(simulation_2[1].max(), 0) + self.assertLessEqual(simulation_2[1][0].max(), 1) + self.assertGreaterEqual(simulation_2[1][0].max(), 0) - self.assertLessEqual(simulation_3[1].max(), 1) - self.assertGreaterEqual(simulation_3[1].max(), 0) + self.assertLessEqual(simulation_3[1][0].max(), 1) + self.assertGreaterEqual(simulation_3[1][0].max(), 0) @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getMinimumMatrix', return_value=0.1 * np.ones((6, 6))) @@ -104,17 +162,13 @@ def test_simulation_run(self, mock_baseline, mock_minimum): def test_data_generation_runs( self, mock_population, mock_baseline, mock_minimum): """ - :param mock_population: :param mock_baseline: :param mock_minimum: - """ - - input_width_1 = 1 + input_width_1 = 10 input_width_2 = 5 - - label_width_1 = 1 + label_width_1 = 20 label_width_2 = 10 num_runs_1 = 1 @@ -150,15 +204,12 @@ def test_data_generation_runs( def test_data_generation_save( self, mock_population, mock_baseline, mock_minimum): """ - - :param mock_population: - :param mock_baseline: - :param mock_minimum: - + : param mock_population: + : param mock_baseline: + : param mock_minimum: """ - - input_width = 2 - label_width = 3 + input_width = 5 + label_width = 30 num_runs = 1 data_generation.generate_data(num_runs, self.path, "", input_width, @@ -166,9 +217,10 @@ def test_data_generation_save( self.assertEqual(len(os.listdir(self.path)), 1) self.assertEqual(os.listdir(self.path), - ['data_secir_groups.pickle']) + ['data_secir_groups_30days_1_active.pickle']) + +# # Testing network_architectures.py -# Testing network_architectures.py def test_mlp_multi_single(self): with self.assertRaises(ValueError) as error: network_architectures.mlp_multi_input_single_output( @@ -380,39 +432,56 @@ def test_calc_split_index(self): split_index = model.calc_split_index(10, 0.7, 0.1, 0.2) self.assertEqual(split_index, [7, 1, 2]) + def test_flat_input(self): + A = np.zeros((12, 12)) + a1 = [A for _ in np.arange(5)] + a2 = np.zeros((5, 144)) + a1_flatten = model.flat_input(a1) + b = a2 == a1_flatten + self.assertTrue(np.asarray(b).all()) + def test_prepare_data_classic(self): data = { - "inputs": np.zeros((10, 5, 1)), - "labels": np.zeros((10, 2)), - "contact_matrix": [np.zeros((2, 2)) for _ in np.arange(10)], - "damping_day": [[1] for _ in np.arange(10)] + "inputs": np.zeros((10, 5, 48)), + "labels": np.zeros((10, 25, 48)), + "contact_matrices": [[np.zeros((6, 6)) for _ in np.arange(2)] for _ in np.arange(10)], + "damping_days": [[1, 2] for _ in np.arange(10)] } data_new = model.prepare_data_classic(data) - res = tf.cast([0, 0, 0, 0, 0, 0, 0, 0, 0, 1], tf.float32) - self.assertTrue(all(res == data_new["train_inputs"][0])) - self.assertEqual(data_new["train_inputs"].shape, (7, 10)) - self.assertEqual(data_new["valid_labels"].shape, (2, 2)) + a = np.zeros(5*48 + 2*36) + b = np.array([1, 2]) + res = tf.cast(np.hstack((a, b)).reshape( + (1, 5*48 + 2*36+2)), tf.float32) + self.assertTrue(np.asarray(res == data_new["train_inputs"][0]).all()) + self.assertEqual(data_new["train_inputs"].shape, (7, 5*48 + 2*36 + 2)) + self.assertEqual(data_new["valid_labels"].shape, (2, 25, 48)) def test_prod_time_series(self): - obj = [1, 0, 2] + A = np.ones((2, 3)) + obj = [i*A for i in np.arange(3)] ts = model.prod_time_series(obj, 3, 2) + res = [[0*A, 0*A], [A, A], [2*A, 2*A]] bl = (ts == tf.reshape( - tf.stack([[1, 1], [0, 0], [2, 2]]), [3, 2, 1])) + tf.stack(res), [3, 2, -1])) self.assertTrue(tf.math.reduce_all(bl)) - """ def test_prepare_data_timeseries(self): data = { - "inputs": np.zeros((10, 5, 1)), - "labels": np.zeros((10, 2)), - "contact_matrix": [np.zeros((2, 2)) for _ in np.arange(10)], - "damping_day": [[1] for _ in np.arange(10)] + "inputs": np.zeros((10, 5, 48)), + "labels": np.zeros((10, 25, 48)), + "contact_matrices": [[np.zeros((6, 6)) for _ in np.arange(2)] for _ in np.arange(10)], + "damping_days": [[1, 2] for _ in np.arange(10)] } data_new = model.prepare_data_timeseries(data) - res = tf.cast([[0, 0, 0, 0, 0, 1] for _ in np.arange(5)], tf.float16) + a = np.zeros(48 + 2*36) + b = np.array([1, 2]) + res = tf.cast(np.hstack((a, b)).reshape( + (1, 48 + 2*36+2)), tf.float32) + res = tf.cast([res for _ in np.arange(5)], tf.float32) + self.assertTrue(tf.math.reduce_all( + data_new["train_inputs"].shape == (7, 5, 48 + 2*36 + 2))) self.assertTrue(tf.math.reduce_all(res == data_new["train_inputs"][0])) - """ def test_initialize_model(self): # Helper function to normalize the .getconfig() output @@ -514,7 +583,7 @@ def test_network_fit(self, mock_population, mock_baseline, mock_minimum): label_width = 10 num_runs = 5 data_generation.generate_data(num_runs, self.path, "", input_width, - label_width) + label_width, max_number_damping=1) # models with multiple outputs model_mlp_multi_input_multi_output = model.network_fit( @@ -522,8 +591,8 @@ def test_network_fit(self, mock_population, mock_baseline, mock_minimum): label_width), training_parameter=training_parameter, path=self.path, - filename="data_secir_groups.pickle", - modeltype='classic', plot=False) + filename="data_secir_groups_10days_5_active.pickle", + modeltype='classic', plot_stats=False) self.assertEqual( model_mlp_multi_input_multi_output.model.output_shape[1], label_width) @@ -537,8 +606,8 @@ def test_network_fit(self, mock_population, mock_baseline, mock_minimum): modeltype='timeseries', training_parameter=training_parameter, path=self.path, - filename="data_secir_groups.pickle", - plot=False) + filename="data_secir_groups_10days_5_active.pickle", + plot_stats=False) self.assertEqual( model_lstm_multi_output.model.output_shape[1], label_width) self.assertEqual( @@ -550,8 +619,8 @@ def test_network_fit(self, mock_population, mock_baseline, mock_minimum): modeltype='timeseries', training_parameter=training_parameter, path=self.path, - filename="data_secir_groups.pickle", - plot=False) + filename="data_secir_groups_10days_5_active.pickle", + plot_stats=False) self.assertEqual( cnn_output.model.output_shape[1], label_width) self.assertEqual( From e37e14396c5f0544b24b79dea92ee4c960fbccc3 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 16 Jun 2025 09:01:48 +0200 Subject: [PATCH 05/49] Adding comments --- .../ode_secir_groups/dampings.py | 21 +- .../ode_secir_groups/data_generation.py | 1 + .../ode_secir_groups/generate_dampings.py | 456 ++++++++++++++++++ .../surrogatemodel/ode_secir_groups/model.py | 13 +- .../test_surrogatemodel_ode_secir_groups.py | 2 +- 5 files changed, 485 insertions(+), 8 deletions(-) create mode 100644 pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/generate_dampings.py diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py index 06509e802e..7cec1614ae 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py @@ -1,5 +1,24 @@ +############################################################################# +# Copyright (C) 2020-2025 MEmilio +# +# Authors: Manuel Heger, Henrik Zunker +# +# Contact: Martin J. Kuehn +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################# + import random -import tensorflow as tf import numpy as np diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py index fe65e6f586..a96ef43f70 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py @@ -200,6 +200,7 @@ def run_secir_groups_simulation(days, damping_days, damping_factors, populations np.transpose(result.as_ndarray()[1:, :])) dataset_entries = copy.deepcopy(result_array) + dataset_entries = np.nan_to_num(dataset_entries) return dataset_entries.tolist(), damped_matrices diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/generate_dampings.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/generate_dampings.py new file mode 100644 index 0000000000..bc0247833b --- /dev/null +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/generate_dampings.py @@ -0,0 +1,456 @@ +import os +import pickle + +import matplotlib.pyplot as plt +import plotly.express as px +import plotly.graph_objects as go +import numpy as np +import pandas as pd +import tensorflow as tf + +import sklearn as sk +from sklearn.neighbors import KNeighborsRegressor +from sklearn.model_selection import train_test_split + +import copy +import os +import pickle +import random +import json +from datetime import date + +import numpy as np +import tensorflow as tf +from progress.bar import Bar +from sklearn.preprocessing import FunctionTransformer + +from memilio.simulation import (AgeGroup, Damping, LogLevel, set_log_level) +from memilio.simulation.osecir import (Index_InfectionState, + InfectionState, Model, + interpolate_simulation_result, simulate) + +from memilio.surrogatemodel.ode_secir_groups.data_generation import ( + remove_confirmed_compartments, get_population, getBaselineMatrix) +from memilio.surrogatemodel.ode_secir_groups.data_generation_test import transform_data + + +def calc_damps_var1(n_days, gamma_pos=0, alpha=-1, p0=0.5, t1_max=-0.3, t1_min=-1.2, t2_max=2, t2_min=-0.5): + ''' + :param n_days: Number of days per time series + :param gamma_pos: upper bound for h value + :param alpha: upper bound for h value, where active damping should be stopped + :param p0: probability for a change of the damping factor + :param t1_max: upper end point for size of active damping changes + :param t1_min: lower end point for size of active damping changes + :param t2_max: upper end point for size of active damping changes + :param t2_min: lower end point for size of base damping changes + :param t3_max: upper end + ''' + h = 0 + k = 0 + dampings = [] + while k < n_days: + if h > gamma_pos: + while h > alpha and k < n_days: + delta_h = np.random.uniform(t1_min, t1_max) + h = h + delta_h + dampings.append(1-np.exp(h)) + k = k+1 + + else: + if np.random.binomial(1, p0): + delta_h = np.random.uniform(t2_min, t2_max) + + else: + delta_h = 0 + h = h+delta_h + dampings.append(1 - np.exp(h)) + k = k+1 + + return dampings + + +def generate_dampings(N, ndays, ndampings, parameters): + """ + Generating N samples of dampings for a timeseries of length ndays. + + :param N: Number of samples to produce + :param ndays: Number of days per sample + :param ndampings: maximal number of dampings + :param parameters: Tuple of parameters used to generate the damping, this should be of the form + (gamma_pos, alpha, p0, t1_max, t1_min, t2_max, t2_min) + :returns: dictionary of lists, "damping_days" is a list of length N containing list of days of length ndays, + "damping_coeff" is a list of length N, where entry is a list of length ndays containing the damping factors. + + """ + # Unpacking the parameters + gamma_pos, alpha, p0, t1_max, t1_min, t2_max, t2_min = parameters + + # Initializing the returns + damping_days = [] + damping_coeff = [] + k = ndays//ndampings + + days = [j*k for j in np.arange(ndampings)] + + # Generating the damping coefficients for each sample + for i in np.arange(N): + damping_days.append(days) + damping_coeff.append(calc_damps_var1( + n_days=ndampings, gamma_pos=gamma_pos, alpha=alpha, p0=p0, t1_max=t1_max, t1_min=t1_min, + t2_max=t2_max, t2_min=t2_min + )) + + return { + "damping_days": damping_days, + "damping_coeff": damping_coeff + } + + +def run_secir_simulation_v1_1(days, damping_days, damping_factors, populations, divi_start): + """! Uses an ODE SECIR model allowing for asymptomatic infection with 6 different age groups. + The model is not stratified by region. + Virus-specific parameters are fixed and initial number of persons in the particular + infection states are chosen randomly from defined ranges. + :param days: Describes how many days we simulate within a single run. + :param damping_days: Days, where a damping is possible + :param damping_factors: Damping factors associated to the damping days. + :param populations: List containing the population in each age group. + :param divi_start: Start value for critical infected. + :return: List containing the populations in each compartment used to initialize the run. + """ + set_log_level(LogLevel.Off) + start_day = 1 + start_month = 1 + start_year = 2019 + dt = 0.1 + + # Define age Groups + groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] + num_groups = len(groups) + groups_id = np.arange(num_groups) + + # age specific parameters + TransmissionProbabilityOnContact = [0.03, 0.06, 0.06, 0.06, 0.09, 0.175] + RecoveredPerInfectedNoSymptoms = [0.25, 0.25, 0.2, 0.2, 0.2, 0.2] + SeverePerInfectedSymptoms = [0.0075, 0.0075, 0.019, 0.0615, 0.165, 0.225] + CriticalPerSevere = [0.075, 0.075, 0.075, 0.15, 0.3, 0.4] + DeathsPerCritical = [0.05, 0.05, 0.14, 0.14, 0.4, 0.6] + + TimeInfectedNoSymptoms = [2.74, 2.74, 2.565, 2.565, 2.565, 2.565] + TimeInfectedSymptoms = [7.02625, 7.02625, + 7.0665, 6.9385, 6.835, 6.775] + TimeInfectedSevere = [5, 5, 5.925, 7.55, 8.5, 11] + TimeInfectedCritical = [6.95, 6.95, 6.86, 17.36, 17.1, 11.6] + + # Initialize Parameters + model = Model(num_groups) + + # Set parameters + for i, rho, muCR, muHI, muUH, muDU, tc, ti, th, tu in zip(groups_id, TransmissionProbabilityOnContact, RecoveredPerInfectedNoSymptoms, SeverePerInfectedSymptoms, CriticalPerSevere, DeathsPerCritical, TimeInfectedNoSymptoms, TimeInfectedSymptoms, TimeInfectedSevere, TimeInfectedCritical): + # Compartment transition duration + model.parameters.TimeExposed[AgeGroup(i)] = 3.335 + model.parameters.TimeInfectedNoSymptoms[AgeGroup(i)] = tc + model.parameters.TimeInfectedSymptoms[AgeGroup(i)] = ti + model.parameters.TimeInfectedSevere[AgeGroup(i)] = th + model.parameters.TimeInfectedCritical[AgeGroup(i)] = tu + + # Initial number of people in each compartment with random numbers + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.Exposed)] = random.uniform( + 0.00025, 0.0005) * populations[i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedNoSymptoms)] = random.uniform( + 0.0001, 0.00035) * populations[i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedNoSymptomsConfirmed)] = random.uniform( + 0.0001, 0.00035) * populations[i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedSymptoms)] = random.uniform( + 0.00007, 0.0001) * populations[i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedSymptomsConfirmed)] = 0 + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedSevere)] = random.uniform( + 0.00003, 0.00006) * populations[i] + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.InfectedCritical)] = 1/6 * divi_start + model.populations[AgeGroup(i), Index_InfectionState( + InfectionState.Recovered)] = random.uniform( + 0.002, 0.008) * populations[i] + model.populations[AgeGroup(i), + Index_InfectionState(InfectionState.Dead)] = random.uniform( + 0, 0.0003) * populations[i] + model.populations.set_difference_from_group_total_AgeGroup( + (AgeGroup(i), Index_InfectionState(InfectionState.Susceptible)), populations[i]) + + # Compartment transition propabilities + model.parameters.RelativeTransmissionNoSymptoms[AgeGroup(i)] = 1 + model.parameters.TransmissionProbabilityOnContact[AgeGroup(i)] = rho + model.parameters.RecoveredPerInfectedNoSymptoms[AgeGroup(i)] = muCR + model.parameters.RiskOfInfectionFromSymptomatic[AgeGroup(i)] = 0.25 + model.parameters.SeverePerInfectedSymptoms[AgeGroup(i)] = muHI + model.parameters.CriticalPerSevere[AgeGroup(i)] = muUH + model.parameters.DeathsPerCritical[AgeGroup(i)] = muDU + # twice the value of RiskOfInfectionFromSymptomatic + model.parameters.MaxRiskOfInfectionFromSymptomatic[AgeGroup(i)] = 0.5 + + # StartDay is the n-th day of the year + model.parameters.StartDay = ( + date(start_year, start_month, start_day) - date(start_year, 1, 1)).days + + # Load baseline and minimum contact matrix and assign them to the model + baseline = getBaselineMatrix() + # set baseline to baseline matrix + model.parameters.ContactPatterns.cont_freq_mat[0].minimum = np.ones( + (num_groups, num_groups)) * 0 + + model.parameters.ContactPatterns.cont_freq_mat[0].baseline = baseline + + # Generate a damping matrix and assign it to the model + damped_matrices = [] + damping_coefficients = [] + + for i in np.arange(len(damping_days)): + factor = damping_factors[i] + # Catching to large values + if factor > 20000: + factor = 20000 + if factor < -20000: + factor = -20000 + + day = damping_days[i] + f = np.float16(factor) + damping = np.ones((num_groups, num_groups) + ) * f + model.parameters.ContactPatterns.cont_freq_mat.add_damping(Damping( + coeffs=(damping), t=day, level=0, type=0)) + + damped_matrices.append(model.parameters.ContactPatterns.cont_freq_mat.get_matrix_at( + day+1)) + damping_coefficients.append(damping) + + # Apply mathematical constraints to parameters + model.apply_constraints() + + # Run Simulation + result = simulate(0, days, dt, model) + + # Interpolate simulation result on days time scale + result = interpolate_simulation_result(result) + + result_array = remove_confirmed_compartments( + np.transpose(result.as_ndarray()[1:, :])) + + # Omit first column, as the time points are not of interest here. + dataset_entries = copy.deepcopy(result_array) + + return dataset_entries.tolist(), damped_matrices, damping_coefficients + + +def generate_data_v1_1( + num_runs, parameters, path_out, population, divi_start, num_days, filename, + normalize=False, save_data=True): + """ + Generating data for different population and parameter values. Using ODE-SECIR model without spatial resolution. + + :param num_runs: Number of generated samples + :param parameters: Tuple containing the parameters used in the generation of the damping factors + :param path_out: If the generated data should be stored, this is the associated path to the output file + :param population: List of the age group resolved data + :param divi_start: Start value for total critical infected + :param num_days: Length of the simulated time series + :param filename: Name of the file, which should contain the output + :param normalize: Boolean, whether a log-normalization should be applied + :param save_data: Boolean, whether the generated data should be stored or not. + :returns" A dictionary of "inputs" and "labels" + + """ + num_dampings = 9 + + data = { + "inputs": [], + "labels": [], + "contact_matrix": [], + "damping_day": [], + "damping_coeff": [] + } + + # The number of days is the same as the sum of input and label width. + # Since the first day of the input is day 0, we still need to subtract 1. + days = num_days - 1 + + # Load population data + damping_data = generate_dampings( + num_runs, num_days, num_dampings, parameters) + damping_days = damping_data["damping_days"] + damping_factors = damping_data["damping_coeff"] + # show progess in terminal for longer runs + # Due to the random structure, there's currently no need to shuffle the data + + # Setting seed to get the same initializations + np.random.seed(42) + for i in range(0, num_runs): + + data_run, _, _ = run_secir_simulation_v1_1( + days, damping_days=damping_days[i], damping_factors=damping_factors[i], populations=population, divi_start=divi_start) + data_run = np.nan_to_num(data_run) + if len(data_run) != 90: + print("Hääääääääääää??????") + data['inputs'].append(data_run) + data['labels'].append(data_run) + + if normalize: + # logarithmic normalization + transformer = FunctionTransformer(np.log1p, validate=True) + + # transform inputs and labels + data['inputs'] = transform_data(data['inputs'], transformer, num_runs) + data['labels'] = transform_data(data['labels'], transformer, num_runs) + else: + + data_inputs = tf.convert_to_tensor(data['inputs']) + data_labels = tf.convert_to_tensor(data['labels']) + + if save_data: + # check if data directory exists. If necessary, create it. + if not os.path.isdir(path_out): + os.mkdir(path_out) + + # save dict to json file + with open(os.path.join(path_out, filename), 'wb') as f: + pickle.dump(data, f) + return {"inputs": data_inputs, + "labels": data_labels} + + +def reduce_to_infected(data, num_runs): + """ + Reduce the age-resolved results to current number of infected. + + :param data: Data, that should be reduced + :num_runs: Number of samples represented by data + :returns: A numpy array of shape num_runs*num_days + """ + infected = [] + infected_critical = [] + for i in np.arange(num_runs): + d = data[i].numpy() + d = np.reshape(d, (90, 6, 8)) + d = np.sum(d, axis=1) + infected_critical.append(d[:, 5]) + infected.append(d[:, 2]+d[:, 3]+d[:, 4] + d[:, 5]) + return (np.asarray(infected), np.asarray(infected_critical)) + + +def calc_min_error(data_prod, real_data): + """ + Calculating the minimal error of an sample to the given real world data. + + :param data_prod: artificial data samples + :param real_data: real data sample + """ + neigh = KNeighborsRegressor(n_neighbors=1) + neigh.fit(data_prod, data_prod) + real_data = real_data.reshape(1, -1) + nearest = neigh.predict(real_data) + return sk.metrics.mean_absolute_percentage_error(real_data, nearest) + + +def evaluate_parameters(path_test_data, path_population, path_divi, path_output): + """ + Evaluating a set of possible parameter values in a grid search manner to find the optimal one. + + :param path_test_data: Path, containing the case data. + :param path_population: Path, containing the population data. + :param path_divi: Path, containing the divi case data + :param path_output: Path to store the results + """ + + # Setting + num_runs = 100 + num_days = 90 + n_populations = 250 + + # Initializing result array + results = [] + + # Loading case and population data + with open(os.path.join(path_population, "data_county_population.pickle"), 'rb') as f: + population_data = pickle.load(f) + + with open(os.path.join(path_test_data, "data_county_cases_90_days.pickle"), "rb") as file: + case_data = pickle.load(file) + + with open(os.path.join(path_divi, "data_county_divi_cases_90_days.pickle"), "rb") as file: + divi_data = pickle.load(file) + + print("data loaded") + # defining possible parameter values + gamma_pos = [-2, -1, 0, 1, 2] + delta_alpha = [1, 2, 3] + p0 = [0.1, 0.2, 0.5, 0.7, 0.9] + t1 = [-1, -0.5, 0, 0.5] # , 0.5, 0, -0.5] + delta_t1 = [0.5, 1, 1.5] # , 1.5] + t2 = [0, 0.5, 1, 1.5] + delta_t2 = [3, 2, 1, 0.5] + + parameters = [ + (gamma_p, gamma_p - delta_a, p, t1_m, t1_m-d_t1, t2_m, t2_m-d_t2) for + gamma_p in gamma_pos for delta_a in delta_alpha for p in p0 for t1_m in t1 for d_t1 in delta_t1 + for t2_m in t2 for d_t2 in delta_t2 + ] + + bar = Bar("Parameter checked", max=len(parameters)) + for parameter in parameters: + error_inf = [] + error_crit_inf = [] + for i in np.arange(n_populations): + data = generate_data_v1_1( + num_runs=num_runs, + parameters=parameter, + population=population_data[i], + divi_start=divi_data[i][0], + num_days=num_days, + path_out="", + filename="", + normalize=False, + save_data=False) + data_infected, data_critical = reduce_to_infected( + data["inputs"], num_runs=num_runs) + case_data_critical = divi_data[i] + error_inf.append(calc_min_error(data_infected, case_data[i])) + error_crit_inf.append(calc_min_error( + data_critical, case_data_critical)) + mean_error_inf = np.mean(error_inf) + var_error_inf = np.var(error_inf) + mean_error_crit_inf = np.mean(error_crit_inf) + var_error_crit_inf = np.var(error_crit_inf) + res = { + "gamma": parameter[0], + "alpha": parameter[1], + "p0": parameter[2], + "t1_max": parameter[3], + "t1_min": parameter[4], + "t2_max": parameter[5], + "t2_min": parameter[6], + "mean_error_inf": mean_error_inf, + "variance_inf": var_error_inf, + "mean_error_crit_inf": mean_error_crit_inf, + "variance_error_crit_inf": var_error_crit_inf + } + print(res) + bar.next() + results.append(res) + bar.finish() + # Saving results + filename = "data_parameter_optimization.pickle" + with open(os.path.join(path_output, filename), "wb") as f: + pickle.dump(results, f) + + +if __name__ == "__main__": + path = os.path.dirname(os.path.realpath(__file__)) + path = os.path.join(os.path.dirname(os.path.realpath( + os.path.dirname(os.path.realpath(path)))), 'data') + evaluate_parameters(path, path, path, path) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py index f48ddfa63e..29ef471c34 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py @@ -406,9 +406,10 @@ def network_fit( if (plot_stats): plot_losses(history) plot_compartment_prediction_model( - test_inputs, test_labels, modeltype, model=model, + data_prep["test_inputs"], data_prep["test_labels"], modeltype, model=model, plot_compartment='InfectedSymptoms', max_subplots=3) - df = get_test_statistic(test_inputs, test_labels, model) + df = get_test_statistic( + data_prep["test_inputs"], data_prep["test_labels"], model) print(df) return history @@ -465,13 +466,13 @@ def get_test_statistic(test_inputs, test_labels, model): return mean_percentage -def get_input_dim_lstm(path): +def get_input_dim_lstm(path_to_file): """ Extract the dimension of the input data - :param path: path to the data + :param path_to_file: path to the data """ - file = open(os.path.join(path, 'data_secir_groups.pickle'), 'rb') + file = open(path_to_file, 'rb') data = pickle.load(file) input_dim = data['inputs'].shape[2] + np.asarray( @@ -493,7 +494,7 @@ def get_input_dim_lstm(path): neurons_in_hidden_layer = 512 activation_function = 'relu' modelname = "Dense" - modeltype = "classic" # or "classic" + modeltype = "timeseries" # or "classic" model_parameters = (label_width, number_age_groups, number_compartments, hidden_layers, neurons_in_hidden_layer, activation_function, modelname) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 10e960cd28..f03800c987 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -1,7 +1,7 @@ ############################################################################# # Copyright (C) 2020-2025 MEmilio # -# Authors: +# Authors: Manuel Heger, Henrik Zunker # # Contact: Martin J. Kuehn # From ae4e9eec2d27329b581fbb1b299d0837dc41456e Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 16 Jun 2025 10:45:32 +0200 Subject: [PATCH 06/49] Update tests and add saving/loading model architecture --- .../surrogatemodel/ode_secir_groups/model.py | 28 +++++ .../test_surrogatemodel_ode_secir_groups.py | 100 ++++++++++++++++++ 2 files changed, 128 insertions(+) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py index 29ef471c34..77723b15f5 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py @@ -414,6 +414,34 @@ def network_fit( return history +def save_model(model, path, modelname): + """ + Saving a trained model. + + :param model: trained tensorflow keras model + :param path: path where the model should be stored + :param modelname: the name of the model + """ + if not os.path.isdir(path): + os.mkdir(path) + path_to_file = os.path.join(path, modelname + ".keras") + model.save(path_to_file) + print("Model successfully saved") + + +def load_model(path): + """ + Loading a trained model. + + :param path: path to the .keras file containing the desired model + :returns: trained tf.keras model + """ + if not os.path.isfile(path): + raise FileExistsError( + "There is no .keras model stored at the given directory.") + return tf.keras.models.load_model(path) + + ##################### # Plots etc. ##################### diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index f03800c987..f1db98eef8 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -219,8 +219,10 @@ def test_data_generation_save( self.assertEqual(os.listdir(self.path), ['data_secir_groups_30days_1_active.pickle']) + # # Testing network_architectures.py + def test_mlp_multi_single(self): with self.assertRaises(ValueError) as error: network_architectures.mlp_multi_input_single_output( @@ -626,6 +628,104 @@ def test_network_fit(self, mock_population, mock_baseline, mock_minimum): self.assertEqual( len(cnn_output.history['val_loss']), max_epochs) + @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getMinimumMatrix', + return_value=0.1 * np.ones((6, 6))) + @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getBaselineMatrix', + return_value=0.6 * np.ones((6, 6))) + @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.get_population', + return_value=[[5256.0, 10551, 32368.5, + 43637.833333333336, 22874.066666666666, 8473.6]]) + def test_save_model( + self, mock_population, mock_baseline, mock_minimum): + """ + : param mock_population: + : param mock_baseline: + : param mock_minimum: + """ + input_width = 5 + label_width = 10 + num_runs = 2 + + data_generation.generate_data(num_runs, self.path, "", input_width, + label_width) + max_epochs = 1 + early_stop = 100 + loss = tf.keras.losses.MeanAbsolutePercentageError() + optimizer = "Adam" + metric = [tf.keras.metrics.MeanAbsoluteError(), + tf.keras.metrics.MeanAbsolutePercentageError()] + + training_parameter = ( + early_stop, max_epochs, loss, optimizer, metric) + + model_mlp_multi_input_multi_output = model.network_fit( + model=network_architectures.mlp_multi_input_multi_output( + label_width), + training_parameter=training_parameter, + path=self.path, + filename="data_secir_groups_10days_2_active.pickle", + modeltype='classic', plot_stats=False) + + model.save_model(model_mlp_multi_input_multi_output.model, + self.path, "mlp_multi_multi") + + self.assertEqual(len(os.listdir(self.path)), 2) + + self.assertEqual(os.listdir(self.path), + ['data_secir_groups_10days_2_active.pickle', 'mlp_multi_multi.keras']) + + @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getMinimumMatrix', + return_value=0.1 * np.ones((6, 6))) + @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getBaselineMatrix', + return_value=0.6 * np.ones((6, 6))) + @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.get_population', + return_value=[[5256.0, 10551, 32368.5, + 43637.833333333336, 22874.066666666666, 8473.6]]) + def test_load_model( + self, mock_population, mock_baseline, mock_minimum): + """ + : param mock_population: + : param mock_baseline: + : param mock_minimum: + """ + # Helper function to normalize the .getconfig() output + def normalize_config(config): + config.pop('name', None) + for layer in config["layers"]: + layer["config"].pop("name", None) + return config + + input_width = 5 + label_width = 10 + num_runs = 2 + + data_generation.generate_data(num_runs, self.path, "", input_width, + label_width) + max_epochs = 1 + early_stop = 100 + loss = tf.keras.losses.MeanAbsolutePercentageError() + optimizer = "Adam" + metric = [tf.keras.metrics.MeanAbsoluteError(), + tf.keras.metrics.MeanAbsolutePercentageError()] + + training_parameter = ( + early_stop, max_epochs, loss, optimizer, metric) + + mlp1 = model.network_fit( + model=network_architectures.mlp_multi_input_multi_output( + label_width), + training_parameter=training_parameter, + path=self.path, + filename="data_secir_groups_10days_2_active.pickle", + modeltype='classic', plot_stats=False) + + model.save_model(mlp1.model, + self.path, "mlp_multi_multi") + + path = os.path.join(self.path, "mlp_multi_multi.keras") + mlp2 = model.load_model(path) + self.assertEqual(mlp1.model.summary(), mlp2.summary()) + if __name__ == '__main__': unittest.main() From 81a91dedbaa1a941fb184ed4f169036d77dce78e Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 16 Jun 2025 11:21:26 +0200 Subject: [PATCH 07/49] Update tests --- .../test_surrogatemodel_ode_secir_groups.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index f1db98eef8..6b4bd0fc72 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -688,13 +688,6 @@ def test_load_model( : param mock_baseline: : param mock_minimum: """ - # Helper function to normalize the .getconfig() output - def normalize_config(config): - config.pop('name', None) - for layer in config["layers"]: - layer["config"].pop("name", None) - return config - input_width = 5 label_width = 10 num_runs = 2 From 909bfeeaaa2bf0801891d750f76bef226f51bf99 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 16 Jun 2025 13:08:24 +0200 Subject: [PATCH 08/49] Changes in test file --- .../test_surrogatemodel_ode_secir_groups.py | 46 +------------------ 1 file changed, 2 insertions(+), 44 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 6b4bd0fc72..e876b93495 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -222,7 +222,6 @@ def test_data_generation_save( # # Testing network_architectures.py - def test_mlp_multi_single(self): with self.assertRaises(ValueError) as error: network_architectures.mlp_multi_input_single_output( @@ -658,7 +657,7 @@ def test_save_model( training_parameter = ( early_stop, max_epochs, loss, optimizer, metric) - model_mlp_multi_input_multi_output = model.network_fit( + mlp1 = model.network_fit( model=network_architectures.mlp_multi_input_multi_output( label_width), training_parameter=training_parameter, @@ -666,7 +665,7 @@ def test_save_model( filename="data_secir_groups_10days_2_active.pickle", modeltype='classic', plot_stats=False) - model.save_model(model_mlp_multi_input_multi_output.model, + model.save_model(mlp1.model, self.path, "mlp_multi_multi") self.assertEqual(len(os.listdir(self.path)), 2) @@ -674,47 +673,6 @@ def test_save_model( self.assertEqual(os.listdir(self.path), ['data_secir_groups_10days_2_active.pickle', 'mlp_multi_multi.keras']) - @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getMinimumMatrix', - return_value=0.1 * np.ones((6, 6))) - @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getBaselineMatrix', - return_value=0.6 * np.ones((6, 6))) - @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.get_population', - return_value=[[5256.0, 10551, 32368.5, - 43637.833333333336, 22874.066666666666, 8473.6]]) - def test_load_model( - self, mock_population, mock_baseline, mock_minimum): - """ - : param mock_population: - : param mock_baseline: - : param mock_minimum: - """ - input_width = 5 - label_width = 10 - num_runs = 2 - - data_generation.generate_data(num_runs, self.path, "", input_width, - label_width) - max_epochs = 1 - early_stop = 100 - loss = tf.keras.losses.MeanAbsolutePercentageError() - optimizer = "Adam" - metric = [tf.keras.metrics.MeanAbsoluteError(), - tf.keras.metrics.MeanAbsolutePercentageError()] - - training_parameter = ( - early_stop, max_epochs, loss, optimizer, metric) - - mlp1 = model.network_fit( - model=network_architectures.mlp_multi_input_multi_output( - label_width), - training_parameter=training_parameter, - path=self.path, - filename="data_secir_groups_10days_2_active.pickle", - modeltype='classic', plot_stats=False) - - model.save_model(mlp1.model, - self.path, "mlp_multi_multi") - path = os.path.join(self.path, "mlp_multi_multi.keras") mlp2 = model.load_model(path) self.assertEqual(mlp1.model.summary(), mlp2.summary()) From 5a0aeae96a95a8a5f25ea67baf6f34ed69e2e405 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 16 Jun 2025 13:42:38 +0200 Subject: [PATCH 09/49] updating tests --- .../test_surrogatemodel_ode_secir_groups.py | 48 +++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index e876b93495..748b7692ff 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -222,6 +222,7 @@ def test_data_generation_save( # # Testing network_architectures.py + def test_mlp_multi_single(self): with self.assertRaises(ValueError) as error: network_architectures.mlp_multi_input_single_output( @@ -657,7 +658,7 @@ def test_save_model( training_parameter = ( early_stop, max_epochs, loss, optimizer, metric) - mlp1 = model.network_fit( + model_mlp_multi_input_multi_output = model.network_fit( model=network_architectures.mlp_multi_input_multi_output( label_width), training_parameter=training_parameter, @@ -665,7 +666,7 @@ def test_save_model( filename="data_secir_groups_10days_2_active.pickle", modeltype='classic', plot_stats=False) - model.save_model(mlp1.model, + model.save_model(model_mlp_multi_input_multi_output.model, self.path, "mlp_multi_multi") self.assertEqual(len(os.listdir(self.path)), 2) @@ -673,7 +674,48 @@ def test_save_model( self.assertEqual(os.listdir(self.path), ['data_secir_groups_10days_2_active.pickle', 'mlp_multi_multi.keras']) - path = os.path.join(self.path, "mlp_multi_multi.keras") + @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getMinimumMatrix', + return_value=0.1 * np.ones((6, 6))) + @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getBaselineMatrix', + return_value=0.6 * np.ones((6, 6))) + @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.get_population', + return_value=[[5256.0, 10551, 32368.5, + 43637.833333333336, 22874.066666666666, 8473.6]]) + def test_load_model( + self, mock_population, mock_baseline, mock_minimum): + """ + : param mock_population: + : param mock_baseline: + : param mock_minimum: + """ + input_width = 5 + label_width = 10 + num_runs = 2 + + data_generation.generate_data(num_runs, self.path, "", input_width, + label_width) + max_epochs = 1 + early_stop = 100 + loss = tf.keras.losses.MeanAbsolutePercentageError() + optimizer = "Adam" + metric = [tf.keras.metrics.MeanAbsoluteError(), + tf.keras.metrics.MeanAbsolutePercentageError()] + + training_parameter = ( + early_stop, max_epochs, loss, optimizer, metric) + + mlp1 = model.network_fit( + model=network_architectures.mlp_multi_input_multi_output( + label_width), + training_parameter=training_parameter, + path=self.path, + filename="data_secir_groups_10days_2_active.pickle", + modeltype='classic', plot_stats=False) + + model.save_model(mlp1.model, + self.path, "mlp_multi_multi") + + path = "/home/mlp_multi_multi.keras" mlp2 = model.load_model(path) self.assertEqual(mlp1.model.summary(), mlp2.summary()) From 5e3d290c88c3c86251c05c062bc3d7b74e339723 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 16 Jun 2025 14:02:45 +0200 Subject: [PATCH 10/49] debugging test --- .../test_surrogatemodel_ode_secir_groups.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 748b7692ff..894a06e2c8 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -714,8 +714,9 @@ def test_load_model( model.save_model(mlp1.model, self.path, "mlp_multi_multi") + print(os.listdir(self.path)) - path = "/home/mlp_multi_multi.keras" + path = f"{self.path}/mlp_multi_multi.keras" mlp2 = model.load_model(path) self.assertEqual(mlp1.model.summary(), mlp2.summary()) From 653a6ec3cd15da212535172ee06ba47f61f7e1b4 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 16 Jun 2025 14:22:43 +0200 Subject: [PATCH 11/49] Changing test files --- .../test_surrogatemodel_ode_secir_groups.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 894a06e2c8..051624dcb4 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -222,7 +222,6 @@ def test_data_generation_save( # # Testing network_architectures.py - def test_mlp_multi_single(self): with self.assertRaises(ValueError) as error: network_architectures.mlp_multi_input_single_output( @@ -714,9 +713,9 @@ def test_load_model( model.save_model(mlp1.model, self.path, "mlp_multi_multi") - print(os.listdir(self.path)) - path = f"{self.path}/mlp_multi_multi.keras" + path = os.path.join(self.path, "mlp_multi_multi.keras") + print(path) mlp2 = model.load_model(path) self.assertEqual(mlp1.model.summary(), mlp2.summary()) From 0f12f57ba23e1375d5239016a6ca804aa378ea96 Mon Sep 17 00:00:00 2001 From: mhheger <122824218+mhheger@users.noreply.github.com> Date: Wed, 18 Jun 2025 06:39:08 +0200 Subject: [PATCH 12/49] Delete pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/generate_dampings.py replaced by dampings.py --- .../ode_secir_groups/generate_dampings.py | 456 ------------------ 1 file changed, 456 deletions(-) delete mode 100644 pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/generate_dampings.py diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/generate_dampings.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/generate_dampings.py deleted file mode 100644 index bc0247833b..0000000000 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/generate_dampings.py +++ /dev/null @@ -1,456 +0,0 @@ -import os -import pickle - -import matplotlib.pyplot as plt -import plotly.express as px -import plotly.graph_objects as go -import numpy as np -import pandas as pd -import tensorflow as tf - -import sklearn as sk -from sklearn.neighbors import KNeighborsRegressor -from sklearn.model_selection import train_test_split - -import copy -import os -import pickle -import random -import json -from datetime import date - -import numpy as np -import tensorflow as tf -from progress.bar import Bar -from sklearn.preprocessing import FunctionTransformer - -from memilio.simulation import (AgeGroup, Damping, LogLevel, set_log_level) -from memilio.simulation.osecir import (Index_InfectionState, - InfectionState, Model, - interpolate_simulation_result, simulate) - -from memilio.surrogatemodel.ode_secir_groups.data_generation import ( - remove_confirmed_compartments, get_population, getBaselineMatrix) -from memilio.surrogatemodel.ode_secir_groups.data_generation_test import transform_data - - -def calc_damps_var1(n_days, gamma_pos=0, alpha=-1, p0=0.5, t1_max=-0.3, t1_min=-1.2, t2_max=2, t2_min=-0.5): - ''' - :param n_days: Number of days per time series - :param gamma_pos: upper bound for h value - :param alpha: upper bound for h value, where active damping should be stopped - :param p0: probability for a change of the damping factor - :param t1_max: upper end point for size of active damping changes - :param t1_min: lower end point for size of active damping changes - :param t2_max: upper end point for size of active damping changes - :param t2_min: lower end point for size of base damping changes - :param t3_max: upper end - ''' - h = 0 - k = 0 - dampings = [] - while k < n_days: - if h > gamma_pos: - while h > alpha and k < n_days: - delta_h = np.random.uniform(t1_min, t1_max) - h = h + delta_h - dampings.append(1-np.exp(h)) - k = k+1 - - else: - if np.random.binomial(1, p0): - delta_h = np.random.uniform(t2_min, t2_max) - - else: - delta_h = 0 - h = h+delta_h - dampings.append(1 - np.exp(h)) - k = k+1 - - return dampings - - -def generate_dampings(N, ndays, ndampings, parameters): - """ - Generating N samples of dampings for a timeseries of length ndays. - - :param N: Number of samples to produce - :param ndays: Number of days per sample - :param ndampings: maximal number of dampings - :param parameters: Tuple of parameters used to generate the damping, this should be of the form - (gamma_pos, alpha, p0, t1_max, t1_min, t2_max, t2_min) - :returns: dictionary of lists, "damping_days" is a list of length N containing list of days of length ndays, - "damping_coeff" is a list of length N, where entry is a list of length ndays containing the damping factors. - - """ - # Unpacking the parameters - gamma_pos, alpha, p0, t1_max, t1_min, t2_max, t2_min = parameters - - # Initializing the returns - damping_days = [] - damping_coeff = [] - k = ndays//ndampings - - days = [j*k for j in np.arange(ndampings)] - - # Generating the damping coefficients for each sample - for i in np.arange(N): - damping_days.append(days) - damping_coeff.append(calc_damps_var1( - n_days=ndampings, gamma_pos=gamma_pos, alpha=alpha, p0=p0, t1_max=t1_max, t1_min=t1_min, - t2_max=t2_max, t2_min=t2_min - )) - - return { - "damping_days": damping_days, - "damping_coeff": damping_coeff - } - - -def run_secir_simulation_v1_1(days, damping_days, damping_factors, populations, divi_start): - """! Uses an ODE SECIR model allowing for asymptomatic infection with 6 different age groups. - The model is not stratified by region. - Virus-specific parameters are fixed and initial number of persons in the particular - infection states are chosen randomly from defined ranges. - :param days: Describes how many days we simulate within a single run. - :param damping_days: Days, where a damping is possible - :param damping_factors: Damping factors associated to the damping days. - :param populations: List containing the population in each age group. - :param divi_start: Start value for critical infected. - :return: List containing the populations in each compartment used to initialize the run. - """ - set_log_level(LogLevel.Off) - start_day = 1 - start_month = 1 - start_year = 2019 - dt = 0.1 - - # Define age Groups - groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] - num_groups = len(groups) - groups_id = np.arange(num_groups) - - # age specific parameters - TransmissionProbabilityOnContact = [0.03, 0.06, 0.06, 0.06, 0.09, 0.175] - RecoveredPerInfectedNoSymptoms = [0.25, 0.25, 0.2, 0.2, 0.2, 0.2] - SeverePerInfectedSymptoms = [0.0075, 0.0075, 0.019, 0.0615, 0.165, 0.225] - CriticalPerSevere = [0.075, 0.075, 0.075, 0.15, 0.3, 0.4] - DeathsPerCritical = [0.05, 0.05, 0.14, 0.14, 0.4, 0.6] - - TimeInfectedNoSymptoms = [2.74, 2.74, 2.565, 2.565, 2.565, 2.565] - TimeInfectedSymptoms = [7.02625, 7.02625, - 7.0665, 6.9385, 6.835, 6.775] - TimeInfectedSevere = [5, 5, 5.925, 7.55, 8.5, 11] - TimeInfectedCritical = [6.95, 6.95, 6.86, 17.36, 17.1, 11.6] - - # Initialize Parameters - model = Model(num_groups) - - # Set parameters - for i, rho, muCR, muHI, muUH, muDU, tc, ti, th, tu in zip(groups_id, TransmissionProbabilityOnContact, RecoveredPerInfectedNoSymptoms, SeverePerInfectedSymptoms, CriticalPerSevere, DeathsPerCritical, TimeInfectedNoSymptoms, TimeInfectedSymptoms, TimeInfectedSevere, TimeInfectedCritical): - # Compartment transition duration - model.parameters.TimeExposed[AgeGroup(i)] = 3.335 - model.parameters.TimeInfectedNoSymptoms[AgeGroup(i)] = tc - model.parameters.TimeInfectedSymptoms[AgeGroup(i)] = ti - model.parameters.TimeInfectedSevere[AgeGroup(i)] = th - model.parameters.TimeInfectedCritical[AgeGroup(i)] = tu - - # Initial number of people in each compartment with random numbers - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.Exposed)] = random.uniform( - 0.00025, 0.0005) * populations[i] - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedNoSymptoms)] = random.uniform( - 0.0001, 0.00035) * populations[i] - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedNoSymptomsConfirmed)] = random.uniform( - 0.0001, 0.00035) * populations[i] - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedSymptoms)] = random.uniform( - 0.00007, 0.0001) * populations[i] - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedSymptomsConfirmed)] = 0 - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedSevere)] = random.uniform( - 0.00003, 0.00006) * populations[i] - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.InfectedCritical)] = 1/6 * divi_start - model.populations[AgeGroup(i), Index_InfectionState( - InfectionState.Recovered)] = random.uniform( - 0.002, 0.008) * populations[i] - model.populations[AgeGroup(i), - Index_InfectionState(InfectionState.Dead)] = random.uniform( - 0, 0.0003) * populations[i] - model.populations.set_difference_from_group_total_AgeGroup( - (AgeGroup(i), Index_InfectionState(InfectionState.Susceptible)), populations[i]) - - # Compartment transition propabilities - model.parameters.RelativeTransmissionNoSymptoms[AgeGroup(i)] = 1 - model.parameters.TransmissionProbabilityOnContact[AgeGroup(i)] = rho - model.parameters.RecoveredPerInfectedNoSymptoms[AgeGroup(i)] = muCR - model.parameters.RiskOfInfectionFromSymptomatic[AgeGroup(i)] = 0.25 - model.parameters.SeverePerInfectedSymptoms[AgeGroup(i)] = muHI - model.parameters.CriticalPerSevere[AgeGroup(i)] = muUH - model.parameters.DeathsPerCritical[AgeGroup(i)] = muDU - # twice the value of RiskOfInfectionFromSymptomatic - model.parameters.MaxRiskOfInfectionFromSymptomatic[AgeGroup(i)] = 0.5 - - # StartDay is the n-th day of the year - model.parameters.StartDay = ( - date(start_year, start_month, start_day) - date(start_year, 1, 1)).days - - # Load baseline and minimum contact matrix and assign them to the model - baseline = getBaselineMatrix() - # set baseline to baseline matrix - model.parameters.ContactPatterns.cont_freq_mat[0].minimum = np.ones( - (num_groups, num_groups)) * 0 - - model.parameters.ContactPatterns.cont_freq_mat[0].baseline = baseline - - # Generate a damping matrix and assign it to the model - damped_matrices = [] - damping_coefficients = [] - - for i in np.arange(len(damping_days)): - factor = damping_factors[i] - # Catching to large values - if factor > 20000: - factor = 20000 - if factor < -20000: - factor = -20000 - - day = damping_days[i] - f = np.float16(factor) - damping = np.ones((num_groups, num_groups) - ) * f - model.parameters.ContactPatterns.cont_freq_mat.add_damping(Damping( - coeffs=(damping), t=day, level=0, type=0)) - - damped_matrices.append(model.parameters.ContactPatterns.cont_freq_mat.get_matrix_at( - day+1)) - damping_coefficients.append(damping) - - # Apply mathematical constraints to parameters - model.apply_constraints() - - # Run Simulation - result = simulate(0, days, dt, model) - - # Interpolate simulation result on days time scale - result = interpolate_simulation_result(result) - - result_array = remove_confirmed_compartments( - np.transpose(result.as_ndarray()[1:, :])) - - # Omit first column, as the time points are not of interest here. - dataset_entries = copy.deepcopy(result_array) - - return dataset_entries.tolist(), damped_matrices, damping_coefficients - - -def generate_data_v1_1( - num_runs, parameters, path_out, population, divi_start, num_days, filename, - normalize=False, save_data=True): - """ - Generating data for different population and parameter values. Using ODE-SECIR model without spatial resolution. - - :param num_runs: Number of generated samples - :param parameters: Tuple containing the parameters used in the generation of the damping factors - :param path_out: If the generated data should be stored, this is the associated path to the output file - :param population: List of the age group resolved data - :param divi_start: Start value for total critical infected - :param num_days: Length of the simulated time series - :param filename: Name of the file, which should contain the output - :param normalize: Boolean, whether a log-normalization should be applied - :param save_data: Boolean, whether the generated data should be stored or not. - :returns" A dictionary of "inputs" and "labels" - - """ - num_dampings = 9 - - data = { - "inputs": [], - "labels": [], - "contact_matrix": [], - "damping_day": [], - "damping_coeff": [] - } - - # The number of days is the same as the sum of input and label width. - # Since the first day of the input is day 0, we still need to subtract 1. - days = num_days - 1 - - # Load population data - damping_data = generate_dampings( - num_runs, num_days, num_dampings, parameters) - damping_days = damping_data["damping_days"] - damping_factors = damping_data["damping_coeff"] - # show progess in terminal for longer runs - # Due to the random structure, there's currently no need to shuffle the data - - # Setting seed to get the same initializations - np.random.seed(42) - for i in range(0, num_runs): - - data_run, _, _ = run_secir_simulation_v1_1( - days, damping_days=damping_days[i], damping_factors=damping_factors[i], populations=population, divi_start=divi_start) - data_run = np.nan_to_num(data_run) - if len(data_run) != 90: - print("Hääääääääääää??????") - data['inputs'].append(data_run) - data['labels'].append(data_run) - - if normalize: - # logarithmic normalization - transformer = FunctionTransformer(np.log1p, validate=True) - - # transform inputs and labels - data['inputs'] = transform_data(data['inputs'], transformer, num_runs) - data['labels'] = transform_data(data['labels'], transformer, num_runs) - else: - - data_inputs = tf.convert_to_tensor(data['inputs']) - data_labels = tf.convert_to_tensor(data['labels']) - - if save_data: - # check if data directory exists. If necessary, create it. - if not os.path.isdir(path_out): - os.mkdir(path_out) - - # save dict to json file - with open(os.path.join(path_out, filename), 'wb') as f: - pickle.dump(data, f) - return {"inputs": data_inputs, - "labels": data_labels} - - -def reduce_to_infected(data, num_runs): - """ - Reduce the age-resolved results to current number of infected. - - :param data: Data, that should be reduced - :num_runs: Number of samples represented by data - :returns: A numpy array of shape num_runs*num_days - """ - infected = [] - infected_critical = [] - for i in np.arange(num_runs): - d = data[i].numpy() - d = np.reshape(d, (90, 6, 8)) - d = np.sum(d, axis=1) - infected_critical.append(d[:, 5]) - infected.append(d[:, 2]+d[:, 3]+d[:, 4] + d[:, 5]) - return (np.asarray(infected), np.asarray(infected_critical)) - - -def calc_min_error(data_prod, real_data): - """ - Calculating the minimal error of an sample to the given real world data. - - :param data_prod: artificial data samples - :param real_data: real data sample - """ - neigh = KNeighborsRegressor(n_neighbors=1) - neigh.fit(data_prod, data_prod) - real_data = real_data.reshape(1, -1) - nearest = neigh.predict(real_data) - return sk.metrics.mean_absolute_percentage_error(real_data, nearest) - - -def evaluate_parameters(path_test_data, path_population, path_divi, path_output): - """ - Evaluating a set of possible parameter values in a grid search manner to find the optimal one. - - :param path_test_data: Path, containing the case data. - :param path_population: Path, containing the population data. - :param path_divi: Path, containing the divi case data - :param path_output: Path to store the results - """ - - # Setting - num_runs = 100 - num_days = 90 - n_populations = 250 - - # Initializing result array - results = [] - - # Loading case and population data - with open(os.path.join(path_population, "data_county_population.pickle"), 'rb') as f: - population_data = pickle.load(f) - - with open(os.path.join(path_test_data, "data_county_cases_90_days.pickle"), "rb") as file: - case_data = pickle.load(file) - - with open(os.path.join(path_divi, "data_county_divi_cases_90_days.pickle"), "rb") as file: - divi_data = pickle.load(file) - - print("data loaded") - # defining possible parameter values - gamma_pos = [-2, -1, 0, 1, 2] - delta_alpha = [1, 2, 3] - p0 = [0.1, 0.2, 0.5, 0.7, 0.9] - t1 = [-1, -0.5, 0, 0.5] # , 0.5, 0, -0.5] - delta_t1 = [0.5, 1, 1.5] # , 1.5] - t2 = [0, 0.5, 1, 1.5] - delta_t2 = [3, 2, 1, 0.5] - - parameters = [ - (gamma_p, gamma_p - delta_a, p, t1_m, t1_m-d_t1, t2_m, t2_m-d_t2) for - gamma_p in gamma_pos for delta_a in delta_alpha for p in p0 for t1_m in t1 for d_t1 in delta_t1 - for t2_m in t2 for d_t2 in delta_t2 - ] - - bar = Bar("Parameter checked", max=len(parameters)) - for parameter in parameters: - error_inf = [] - error_crit_inf = [] - for i in np.arange(n_populations): - data = generate_data_v1_1( - num_runs=num_runs, - parameters=parameter, - population=population_data[i], - divi_start=divi_data[i][0], - num_days=num_days, - path_out="", - filename="", - normalize=False, - save_data=False) - data_infected, data_critical = reduce_to_infected( - data["inputs"], num_runs=num_runs) - case_data_critical = divi_data[i] - error_inf.append(calc_min_error(data_infected, case_data[i])) - error_crit_inf.append(calc_min_error( - data_critical, case_data_critical)) - mean_error_inf = np.mean(error_inf) - var_error_inf = np.var(error_inf) - mean_error_crit_inf = np.mean(error_crit_inf) - var_error_crit_inf = np.var(error_crit_inf) - res = { - "gamma": parameter[0], - "alpha": parameter[1], - "p0": parameter[2], - "t1_max": parameter[3], - "t1_min": parameter[4], - "t2_max": parameter[5], - "t2_min": parameter[6], - "mean_error_inf": mean_error_inf, - "variance_inf": var_error_inf, - "mean_error_crit_inf": mean_error_crit_inf, - "variance_error_crit_inf": var_error_crit_inf - } - print(res) - bar.next() - results.append(res) - bar.finish() - # Saving results - filename = "data_parameter_optimization.pickle" - with open(os.path.join(path_output, filename), "wb") as f: - pickle.dump(results, f) - - -if __name__ == "__main__": - path = os.path.dirname(os.path.realpath(__file__)) - path = os.path.join(os.path.dirname(os.path.realpath( - os.path.dirname(os.path.realpath(path)))), 'data') - evaluate_parameters(path, path, path, path) From 2870857865798a87033430df60f041d198f39326 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 18 Jun 2025 07:17:42 +0200 Subject: [PATCH 13/49] small modifications --- .../ode_secir_groups/data_generation.py | 3 ++- .../surrogatemodel/ode_secir_groups/model.py | 4 ++-- .../test_surrogatemodel_ode_secir_groups.py | 21 +++++++++++-------- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py index a96ef43f70..67d98d7209 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py @@ -210,7 +210,7 @@ def generate_data( normalize=True, save_data=True, damping_method="active", max_number_damping=5): """ Generate data sets of num_runs many equation-based model simulations and possibly transforms the computed results by a log(1+x) transformation. Divides the results in input and label data sets and returns them as a dictionary of two TensorFlow Stacks. - In general, we have 8 different compartments and 6 age groups. If we choose, + In general, we have 8 different compartments and 6 age groups. If we choose input_width = 5 and label_width = 20, the dataset has - input with dimension 5 x 8 x 6 - labels with dimension 20 x 8 x 6 @@ -334,6 +334,7 @@ def get_population(path): """ read population data in list from dataset :param path: Path to the dataset containing the population data + :returns: List of interpolated age grouped population data """ diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py index 77723b15f5..91e5e9cb28 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py @@ -240,7 +240,7 @@ def prod_time_series(obj, n, length_input): def prepare_data_timeseries(data): """ - Transforming data to be processable by "time_series" network, simply repeating static values, flattening and concatenating for each data instance. + Transforming data to be processable by "timeseries" network, simply repeating static values, flattening and concatenating for each data instance. :param data: dictionary produces by data_generation :returns: dictionary with entries { @@ -522,7 +522,7 @@ def get_input_dim_lstm(path_to_file): neurons_in_hidden_layer = 512 activation_function = 'relu' modelname = "Dense" - modeltype = "timeseries" # or "classic" + modeltype = "classic" # or "classic" model_parameters = (label_width, number_age_groups, number_compartments, hidden_layers, neurons_in_hidden_layer, activation_function, modelname) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 051624dcb4..94ed253f6c 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -213,15 +213,16 @@ def test_data_generation_save( num_runs = 1 data_generation.generate_data(num_runs, self.path, "", input_width, - label_width) + label_width, damping_method="random") self.assertEqual(len(os.listdir(self.path)), 1) self.assertEqual(os.listdir(self.path), - ['data_secir_groups_30days_1_active.pickle']) + ['data_secir_groups_30days_1_random.pickle']) # # Testing network_architectures.py + def test_mlp_multi_single(self): with self.assertRaises(ValueError) as error: network_architectures.mlp_multi_input_single_output( @@ -301,8 +302,11 @@ def test_mlp_multi_multi(self): self.assertEqual(len(model.layers), 5) input_zero = np.zeros((5, 3, 7)) output_zeros = model(input_zero) + # Number of time series self.assertEqual(output_zeros.shape[0], 5) + # Number of days per predicted time series self.assertEqual(output_zeros.shape[1], 12) + # Size of feature space per day self.assertEqual(output_zeros.shape[2], 21) def test_cnn_multi_multi(self): @@ -356,7 +360,7 @@ def test_cnn_multi_multi(self): self.assertEqual(str(error.exception), error_message) model = network_architectures.cnn_multi_input_multi_output( - 21, 4, 3, 3, 256, 2) + 21, 4, 4, 3, 256, 2) self.assertEqual(len(model.layers), 6) input_zero = np.zeros((12, 5, 7)) output_zeros = model(input_zero) @@ -364,8 +368,8 @@ def test_cnn_multi_multi(self): self.assertEqual(output_zeros.shape[0], 12) # length of one time series self.assertEqual(output_zeros.shape[1], 21) - # Dimension of one time step - self.assertEqual(output_zeros.shape[2], 12) + # Dimension of one time step (16 = 4*4) + self.assertEqual(output_zeros.shape[2], 16) def test_lstm_multi_multi(self): with self.assertRaises(ValueError) as error: @@ -411,7 +415,7 @@ def test_lstm_multi_multi(self): self.assertEqual(str(error.exception), error_message) model = network_architectures.lstm_multi_input_multi_output( - 21, 4, 3, 12, 3, 12) + 21, 4, 4, 12, 3, 12) self.assertEqual(len(model.layers), 6) input_zero = np.zeros((12, 5, 7)) output_zeros = model(input_zero) @@ -419,8 +423,8 @@ def test_lstm_multi_multi(self): self.assertEqual(output_zeros.shape[0], 12) # length of one time series self.assertEqual(output_zeros.shape[1], 21) - # Dimension of one time step - self.assertEqual(output_zeros.shape[2], 12) + # Dimension of one time step (16 = 4*4) + self.assertEqual(output_zeros.shape[2], 16) # Testing model.py def test_calc_split_index(self): @@ -715,7 +719,6 @@ def test_load_model( self.path, "mlp_multi_multi") path = os.path.join(self.path, "mlp_multi_multi.keras") - print(path) mlp2 = model.load_model(path) self.assertEqual(mlp1.model.summary(), mlp2.summary()) From 166341157b18ba6268ece38f9094d08b051bce7d Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 18 Jun 2025 08:44:57 +0200 Subject: [PATCH 14/49] Updating Tests --- .../test_surrogatemodel_ode_secir_groups.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 94ed253f6c..79f5319ec5 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -717,10 +717,10 @@ def test_load_model( model.save_model(mlp1.model, self.path, "mlp_multi_multi") + self.assertEqual(len(os.listdir(self.path)), 2) - path = os.path.join(self.path, "mlp_multi_multi.keras") - mlp2 = model.load_model(path) - self.assertEqual(mlp1.model.summary(), mlp2.summary()) + self.assertEqual(os.listdir(self.path), + ['data_secir_groups_10days_2_active.pickle', 'mlp_multi_multi.keras']) if __name__ == '__main__': From 953c216ead961fb0b39a7ed692aa07fbeb8584b9 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 18 Jun 2025 09:26:46 +0200 Subject: [PATCH 15/49] Undo changes --- .../test_surrogatemodel_ode_secir_groups.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 79f5319ec5..94ed253f6c 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -717,10 +717,10 @@ def test_load_model( model.save_model(mlp1.model, self.path, "mlp_multi_multi") - self.assertEqual(len(os.listdir(self.path)), 2) - self.assertEqual(os.listdir(self.path), - ['data_secir_groups_10days_2_active.pickle', 'mlp_multi_multi.keras']) + path = os.path.join(self.path, "mlp_multi_multi.keras") + mlp2 = model.load_model(path) + self.assertEqual(mlp1.model.summary(), mlp2.summary()) if __name__ == '__main__': From ed2caefc6ed1a2b782b8b7592c5174330c814f0a Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 18 Jun 2025 09:43:47 +0200 Subject: [PATCH 16/49] Adding test, whether stored weights are equal --- .../test_surrogatemodel_ode_secir_groups.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 94ed253f6c..a096437dd7 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -222,7 +222,6 @@ def test_data_generation_save( # # Testing network_architectures.py - def test_mlp_multi_single(self): with self.assertRaises(ValueError) as error: network_architectures.mlp_multi_input_single_output( @@ -720,7 +719,11 @@ def test_load_model( path = os.path.join(self.path, "mlp_multi_multi.keras") mlp2 = model.load_model(path) - self.assertEqual(mlp1.model.summary(), mlp2.summary()) + + weights1 = mlp1.model.get_weights() + weights2 = mlp2.get_weights() + for w1, w2 in zip(weights1, weights2): + np.testing.assert_allclose(w1, w2) if __name__ == '__main__': From 35b54263edc8eaf822da6e470dadbf35a86a06d2 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 23 Jun 2025 08:50:58 +0200 Subject: [PATCH 17/49] Changing default method for dampings --- .../ode_secir_groups/dampings.py | 2 +- .../ode_secir_groups/data_generation.py | 2 +- .../test_surrogatemodel_ode_secir_groups.py | 22 +++++++++---------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py index 7cec1614ae..ceb7f269f0 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py @@ -271,7 +271,7 @@ def dampings_random(days, max_number_damping): # Reducing due to minimal distance restriction reduced_distance = distance_between_days - min_distance_damping_day - if reduced_distance < 0: + if reduced_distance <= 0: raise ValueError("Invalid input: It's not possible to generate this number of damping" "in the desired time interval.") diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py index 67d98d7209..205c697416 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py @@ -207,7 +207,7 @@ def run_secir_groups_simulation(days, damping_days, damping_factors, populations def generate_data( num_runs, path_out, path_population, input_width, label_width, - normalize=True, save_data=True, damping_method="active", max_number_damping=5): + normalize=True, save_data=True, damping_method="random", max_number_damping=5): """ Generate data sets of num_runs many equation-based model simulations and possibly transforms the computed results by a log(1+x) transformation. Divides the results in input and label data sets and returns them as a dictionary of two TensorFlow Stacks. In general, we have 8 different compartments and 6 age groups. If we choose diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index a096437dd7..716432cc8d 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -176,7 +176,7 @@ def test_data_generation_runs( data_1 = data_generation.generate_data( num_runs_1, self.path, "", input_width_1, label_width_1, - save_data=False) + save_data=False, max_number_damping=2) self.assertEqual(len(data_1['inputs']), num_runs_1) self.assertEqual(len(data_1['inputs'][0]), input_width_1) self.assertEqual(len(data_1['inputs'][0][0]), 48) @@ -186,7 +186,7 @@ def test_data_generation_runs( data_2 = data_generation.generate_data( num_runs_2, self.path, "", input_width_2, label_width_2, - save_data=False) + save_data=False, max_number_damping=2) self.assertEqual(len(data_2['inputs']), num_runs_2) self.assertEqual(len(data_2['inputs'][0]), input_width_2) self.assertEqual(len(data_2['inputs'][0][0]), 48) @@ -213,7 +213,7 @@ def test_data_generation_save( num_runs = 1 data_generation.generate_data(num_runs, self.path, "", input_width, - label_width, damping_method="random") + label_width, damping_method="random", max_number_damping=2) self.assertEqual(len(os.listdir(self.path)), 1) self.assertEqual(os.listdir(self.path), @@ -595,7 +595,7 @@ def test_network_fit(self, mock_population, mock_baseline, mock_minimum): label_width), training_parameter=training_parameter, path=self.path, - filename="data_secir_groups_10days_5_active.pickle", + filename="data_secir_groups_10days_5_random.pickle", modeltype='classic', plot_stats=False) self.assertEqual( model_mlp_multi_input_multi_output.model.output_shape[1], @@ -610,7 +610,7 @@ def test_network_fit(self, mock_population, mock_baseline, mock_minimum): modeltype='timeseries', training_parameter=training_parameter, path=self.path, - filename="data_secir_groups_10days_5_active.pickle", + filename="data_secir_groups_10days_5_random.pickle", plot_stats=False) self.assertEqual( model_lstm_multi_output.model.output_shape[1], label_width) @@ -623,7 +623,7 @@ def test_network_fit(self, mock_population, mock_baseline, mock_minimum): modeltype='timeseries', training_parameter=training_parameter, path=self.path, - filename="data_secir_groups_10days_5_active.pickle", + filename="data_secir_groups_10days_5_random.pickle", plot_stats=False) self.assertEqual( cnn_output.model.output_shape[1], label_width) @@ -649,7 +649,7 @@ def test_save_model( num_runs = 2 data_generation.generate_data(num_runs, self.path, "", input_width, - label_width) + label_width, max_number_damping=2) max_epochs = 1 early_stop = 100 loss = tf.keras.losses.MeanAbsolutePercentageError() @@ -665,7 +665,7 @@ def test_save_model( label_width), training_parameter=training_parameter, path=self.path, - filename="data_secir_groups_10days_2_active.pickle", + filename="data_secir_groups_10days_2_random.pickle", modeltype='classic', plot_stats=False) model.save_model(model_mlp_multi_input_multi_output.model, @@ -674,7 +674,7 @@ def test_save_model( self.assertEqual(len(os.listdir(self.path)), 2) self.assertEqual(os.listdir(self.path), - ['data_secir_groups_10days_2_active.pickle', 'mlp_multi_multi.keras']) + ['data_secir_groups_10days_2_random.pickle', 'mlp_multi_multi.keras']) @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getMinimumMatrix', return_value=0.1 * np.ones((6, 6))) @@ -695,7 +695,7 @@ def test_load_model( num_runs = 2 data_generation.generate_data(num_runs, self.path, "", input_width, - label_width) + label_width, max_number_damping=2) max_epochs = 1 early_stop = 100 loss = tf.keras.losses.MeanAbsolutePercentageError() @@ -711,7 +711,7 @@ def test_load_model( label_width), training_parameter=training_parameter, path=self.path, - filename="data_secir_groups_10days_2_active.pickle", + filename="data_secir_groups_10days_2_random.pickle", modeltype='classic', plot_stats=False) model.save_model(mlp1.model, From bb937fc0a02071abbb2a9d7e301793f3cbac1d9b Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 23 Jun 2025 13:18:05 +0200 Subject: [PATCH 18/49] Adding more variable error handling --- .../test_surrogatemodel_ode_secir_groups.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 716432cc8d..d708b85cf4 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -578,9 +578,14 @@ def test_network_fit(self, mock_population, mock_baseline, mock_minimum): model.network_fit( path=self.path, model=md, modeltype='classic', training_parameter=training_parameter, filename="data_secir_groups.pickle") - error_message = "[Errno 2] No such file or directory in the fake filesystem: '" + \ - self.path + "data_secir_groups.pickle'" - self.assertEqual(str(error.exception), error_message) + # check error message + error_message_part1 = "[Errno 2] No such file or directory" + error_message_part2 = self.path + error_message_part3 = "data_secir_groups.pickle" + + self.assertIn(error_message_part1, str(error.exception)) + self.assertIn(error_message_part2, str(error.exception)) + self.assertIn(error_message_part3, str(error.exception)) # generate dataset with multiple output input_width = 5 From fb5584454bffa4f09e4f6dc4fb6d65749b57c744 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 25 Jun 2025 06:33:06 +0200 Subject: [PATCH 19/49] Include minimal distance between dampings and minimal damping day --- .../ode_secir_groups/dampings.py | 27 +++++++++++-------- .../ode_secir_groups/data_generation.py | 5 ++-- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py index ceb7f269f0..42f9523383 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py @@ -22,21 +22,26 @@ import numpy as np -def generate_dampings(days, max_number_damping, method): +def generate_dampings(days, max_number_damping, method, min_distance=2, + min_damping_day=2): """ Producing dampings for timeseries of length days according to the used method. :param days: Number of days per time series :param max_number_damping: maximal possible number of active dampings, actual number of active dampings can be smaller. :param method: Method used to generate the dampings, possible values "classic", "active", "random" + :param min_distance: Minimal distance between two dampings + :min_damping_day: First day, where a damping can be applied :returns: two lists containing the damping days and the associated damping factors """ if method == "classic": - damp_days, damp_factors = dampings_classic(days, max_number_damping) + damp_days, damp_factors = dampings_classic( + days, max_number_damping, min_distance, min_damping_day) elif method == "active": damp_days, damp_factors = dampings_active(days, max_number_damping) elif method == "random": - damp_days, damp_factors = dampings_random(days, max_number_damping) + damp_days, damp_factors = dampings_random( + days, max_number_damping, min_distance, min_damping_day) else: raise ValueError( "The method argument has to be one of the following: 'classic', 'active' or 'random'.") @@ -127,7 +132,8 @@ def calc_factors_active(n_ddays, gamma_pos=0, alpha=-1, p0=0.5, t1_max=-0.3, t1_ # Classic Damping -def dampings_classic(days, max_number_damping): +def dampings_classic(days, max_number_damping, min_distance=2, + min_damping_day=2): """ Generate the damping days using shadow damping and picking days uniformly with a given minimal distance. @@ -135,13 +141,11 @@ def dampings_classic(days, max_number_damping): :param days: Number of days simulated per run. :param max_number_damping: Number of damping days generated. + :param min_distance: Minimal distance between two dampings + :param min_damping_day: First day, where a damping can be applied :returns: Two lists of length max_number_dampings containing the days and the factors. """ # Generating damping days - # Setting parameters - min_distance = 2 - min_damping_day = 2 - if min_distance*max_number_damping+min_damping_day > days: raise ValueError("Invalid input: It's not possible to generate this number of damping" "in the desired time interval.") @@ -250,7 +254,8 @@ def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min # Random Damping -def dampings_random(days, max_number_damping): +def dampings_random(days, max_number_damping, min_damping_day=2, + min_distance_damping_day=2): """ Generate dampings according to an easy random rule. @@ -260,11 +265,11 @@ def dampings_random(days, max_number_damping): :param days: Number of days simulated per run. :param max_number_damping: Number of damping days generated. + :param min_distance: Minimal distance between two dampings + :param min_damping_day: First day, where a damping can be applied :returns: Two lists of length max_number_dampings containing the days and the factors. """ # Setting parameters - min_damping_day = 2 - min_distance_damping_day = 2 # Calculating the expected distance between two dampings distance_between_days = np.floor((days-min_damping_day)/max_number_damping) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py index 205c697416..a4927b3fcc 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py @@ -249,7 +249,8 @@ def generate_data( # Generate random damping days damping_days, damping_factors = dampings.generate_dampings( - days, max_number_damping, method=damping_method) + days, max_number_damping, method=damping_method, min_distance=2, + min_damping_day=2) data_run, damped_matrices = run_secir_groups_simulation( days, damping_days, damping_factors, population[random.randint(0, len(population) - 1)]) @@ -359,4 +360,4 @@ def get_population(path): label_width = 30 num_runs = 10000 data = generate_data(num_runs, path_output, path_population, input_width, - label_width) + label_width, damping_method="classic") From b92d2e4fef18a3d4332d3bfc6459acc0593d4bee Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 30 Jun 2025 09:08:57 +0200 Subject: [PATCH 20/49] Applying the review comments --- .../ode_secir_groups/dampings.py | 76 +++++----- .../ode_secir_groups/data_generation.py | 55 +------ .../surrogatemodel/ode_secir_groups/model.py | 85 +---------- .../ode_secir_simple/data_generation.py | 21 +-- .../surrogatemodel/ode_secir_simple/model.py | 14 +- .../memilio/surrogatemodel/surrogate_utils.py | 139 ++++++++++++++++++ .../test_surrogatemodel_ode_secir_groups.py | 29 ++-- 7 files changed, 217 insertions(+), 202 deletions(-) create mode 100644 pycode/memilio-surrogatemodel/memilio/surrogatemodel/surrogate_utils.py diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py index 42f9523383..d4408c4284 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py @@ -22,13 +22,32 @@ import numpy as np +def calc_dist_days(days, min_day, n_dampings, min_distance=1): + """ + Calculating distance between two dampings if there are n_dampings on the interval + (min_day, days) + + :param days: Total number of days + :min_day: First day on which a damping can be applied + :n_dampings: Number of dampings + :min_distance: Lower bound for the distance between two dampings + """ + res = np.floor((days-min_day)/n_dampings) + + if res < min_distance: + raise ValueError( + "It's not possible to arrange this number of dampings in the desired interval with the given minimal distance.") + + return res + + def generate_dampings(days, max_number_damping, method, min_distance=2, min_damping_day=2): """ Producing dampings for timeseries of length days according to the used method. :param days: Number of days per time series - :param max_number_damping: maximal possible number of active dampings, actual number of active dampings can be smaller. + :param max_number_damping: Number of days on which damping can occur. :param method: Method used to generate the dampings, possible values "classic", "active", "random" :param min_distance: Minimal distance between two dampings :min_damping_day: First day, where a damping can be applied @@ -38,7 +57,8 @@ def generate_dampings(days, max_number_damping, method, min_distance=2, damp_days, damp_factors = dampings_classic( days, max_number_damping, min_distance, min_damping_day) elif method == "active": - damp_days, damp_factors = dampings_active(days, max_number_damping) + damp_days, damp_factors = dampings_active( + days, max_number_damping, min_damping_day) elif method == "random": damp_days, damp_factors = dampings_random( days, max_number_damping, min_distance, min_damping_day) @@ -50,7 +70,7 @@ def generate_dampings(days, max_number_damping, method, min_distance=2, # Active Damping -def dampings_active(days, max_number_damping): +def dampings_active(days, max_number_damping, min_damping_day): """" Generating list of damping days and corresponding damping factors using the active method. @@ -58,12 +78,9 @@ def dampings_active(days, max_number_damping): :param days: Number of simulated days :param max_number_damping: Maximal number of dampings per simulation + :param min_damping_day: First day, where a damping can be applied :returns: list of days and damping factors """ - if max_number_damping > days-3: - raise ValueError( - "Number of dampings must be smaller than total number of days!" - ) # Setting parameters gamma_pos = 0 @@ -75,8 +92,9 @@ def dampings_active(days, max_number_damping): t2_min = -0.5 # Defining possible damping days - distance_between_days = np.floor((days-3)/max_number_damping) - damp_days = [distance_between_days*(i+1) + distance_between_days = calc_dist_days( + days, min_damping_day, max_number_damping) + damp_days = [min_damping_day + distance_between_days*(i+1) for i in np.arange(max_number_damping)] # Generate damping factors @@ -92,6 +110,10 @@ def calc_factors_active(n_ddays, gamma_pos=0, alpha=-1, p0=0.5, t1_max=-0.3, t1_ The idea is the following: Damping factors are produced randomly until a threshold value is achieved. In this case the factors are reduced stepwise until a moderate level is reached. + The new contact matrix is always calculated with resepect to the initial matrix according + to the following rule: + + M = exp(h)*M_0 :param n_ddays: Number of damping days in time series :param gamma_pos: upper bound for h value @@ -101,7 +123,6 @@ def calc_factors_active(n_ddays, gamma_pos=0, alpha=-1, p0=0.5, t1_max=-0.3, t1_ :param t1_min: lower end point for size of active damping changes :param t2_max: upper end point for size of active damping changes :param t2_min: lower end point for size of base damping changes - :param t3_max: upper end ''' h = 0 k = 0 @@ -145,10 +166,10 @@ def dampings_classic(days, max_number_damping, min_distance=2, :param min_damping_day: First day, where a damping can be applied :returns: Two lists of length max_number_dampings containing the days and the factors. """ + # Checking, if the given parameters are compatible + calc_dist_days(days, min_damping_day, max_number_damping, min_distance) + # Generating damping days - if min_distance*max_number_damping+min_damping_day > days: - raise ValueError("Invalid input: It's not possible to generate this number of damping" - "in the desired time interval.") damp_days = generate_dampings_withshadowdamp( max_number_damping, days, min_distance, min_damping_day, 1)[0] @@ -158,7 +179,7 @@ def dampings_classic(days, max_number_damping, min_distance=2, return damp_days, damp_factors -def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min_damping_day, n_runs): +def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min_damping_day, number_of_runs): """ Sampling the damping days according to the established method. @@ -177,18 +198,11 @@ def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min :param days: Total number of days per run :param min_distance: Minimal distance between two damping days :param min_damping_day: First day when a damping can be applied - :n_runs: Number of runs for which damping days should be generated + :number_of_runs: Number of runs for which damping days should be generated :returns: list of list of damping days. """ - number_of_dampings = number_of_dampings - days = days - min_distance = min_distance - min_damping_day = min_damping_day - number_of_runs = n_runs - all_dampings = [] - count_runs = 0 count_shadow = 0 while len(all_dampings) < number_of_runs: # Reset the days list and dampings for each run @@ -241,12 +255,11 @@ def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min range(0 - min_distance, 0)) + list(range(days + 1, days + min_distance + 1)) dampings = [ ele for ele in dampings if ele not in forbidden_damping_values] - if len(dampings) >= number_of_dampings: + if len(dampings) == number_of_dampings: all_dampings.append(sorted(dampings)) continue # Restart process if any issue occurred - count_runs += 1 count_shadow += 1 return all_dampings @@ -257,11 +270,11 @@ def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min def dampings_random(days, max_number_damping, min_damping_day=2, min_distance_damping_day=2): """ - Generate dampings according to an easy random rule. - - The days are drawn using geometrical distributed waiting times and fixed minimal distance between two damping days. - The factors are drwan uniformly on the interval (0,0.5) + Generate random damping days according to the following rule. + The days are drawn using geometrical distributed waiting times and a fixed minimal distance betweem two + damping days. The first damping can occure at min_damping_day. The associated damping factors are drawn uniformly + between 0 and 0.5. :param days: Number of days simulated per run. :param max_number_damping: Number of damping days generated. @@ -272,14 +285,11 @@ def dampings_random(days, max_number_damping, min_damping_day=2, # Setting parameters # Calculating the expected distance between two dampings - distance_between_days = np.floor((days-min_damping_day)/max_number_damping) + distance_between_days = calc_dist_days( + days, min_damping_day, max_number_damping, min_distance_damping_day) # Reducing due to minimal distance restriction reduced_distance = distance_between_days - min_distance_damping_day - if reduced_distance <= 0: - raise ValueError("Invalid input: It's not possible to generate this number of damping" - "in the desired time interval.") - # Try till one admissible configuration of waiting times is produced running = True while running: diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py index a4927b3fcc..70d818ceb8 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py @@ -34,57 +34,8 @@ InfectionState, Model, interpolate_simulation_result, simulate) import memilio.surrogatemodel.ode_secir_groups.dampings as dampings - - -def interpolate_age_groups(data_entry): - """ Interpolates the age groups from the population data into the age groups used in the simulation. - We assume that the people in the age groups are uniformly distributed. - - :param data_entry: Data entry containing the population data. - :returns: List containing the population in each age group used in the simulation. - - """ - age_groups = { - "A00-A04": data_entry['<3 years'] + data_entry['3-5 years'] * 2 / 3, - "A05-A14": data_entry['3-5 years'] * 1 / 3 + data_entry['6-14 years'], - "A15-A34": data_entry['15-17 years'] + data_entry['18-24 years'] + data_entry['25-29 years'] + data_entry['30-39 years'] * 1 / 2, - "A35-A59": data_entry['30-39 years'] * 1 / 2 + data_entry['40-49 years'] + data_entry['50-64 years'] * 2 / 3, - "A60-A79": data_entry['50-64 years'] * 1 / 3 + data_entry['65-74 years'] + data_entry['>74 years'] * 1 / 5, - "A80+": data_entry['>74 years'] * 4 / 5 - } - return [age_groups[key] for key in age_groups] - - -def remove_confirmed_compartments(result_array): - """ Removes the confirmed compartments which are not used in the data generation. - - :param result_array: Array containing the simulation results. - :returns: Array containing the simulation results without the confirmed compartments. - - """ - num_groups = int(result_array.shape[1] / 10) - delete_indices = [index for i in range( - num_groups) for index in (3+10*i, 5+10*i)] - return np.delete(result_array, delete_indices, axis=1) - - -def transform_data(data, transformer, num_runs): - """ Transforms the data by a logarithmic normalization. - Reshaping is necessary, because the transformer needs an array with dimension <= 2. - - :param data: Data to be transformed. - :param transformer: Transformer used for the transformation. - :param num_runs: - :returns: Transformed data. - - """ - num_groups = 6 - num_compartments = 8 - - data = np.asarray(data).transpose(2, 0, 1).reshape( - num_groups*num_compartments, -1) - scaled_data = transformer.transform(data) - return tf.convert_to_tensor(scaled_data.transpose().reshape(num_runs, -1, num_groups*num_compartments)) +from memilio.surrogatemodel.surrogate_utils import ( + interpolate_age_groups, remove_confirmed_compartments, transform_data) def run_secir_groups_simulation(days, damping_days, damping_factors, populations): @@ -358,6 +309,6 @@ def get_population(path): input_width = 5 label_width = 30 - num_runs = 10000 + num_runs = 1 data = generate_data(num_runs, path_output, path_population, input_width, label_width, damping_method="classic") diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py index 91e5e9cb28..6561755ef1 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py @@ -19,6 +19,8 @@ ############################################################################# from memilio.surrogatemodel.ode_secir_groups import network_architectures from memilio.simulation.osecir import InfectionState +from memilio.surrogatemodel.surrogate_utils import ( + calc_split_index, flat_input) import os import pickle @@ -119,7 +121,7 @@ def plot_compartment_prediction_model( input_series = tf.expand_dims(inputs[n], axis=0) pred = model(input_series) pred = pred.numpy() - pred = pred.reshape((30, 48)) + pred = pred.reshape((label_width, number_groups*num_compartments)) mean_per_day_pred = [] for i in pred: @@ -142,39 +144,6 @@ def plot_compartment_prediction_model( #################### # Helper functions # #################### -def calc_split_index(n, split_train=0.7, - split_valid=0.2, split_test=0.1): - """ - Calculating the indixes for a split_train:split_valid:split_test decomposition of a set with size n - - It must hold split_train + split_valid + split_test = 1 - - :param n: integer value - :param split_train: value between 0 and 1 - :param split_valid: value between 0 and 1 - :param split_test: value between 0 and 1 - :returns: a list of the form [i_train, i_valid, i_test] - """ - if split_train + split_valid + split_test > 1 + 1e-10: - raise ValueError( - "Summed data set shares are greater than 1. Please adjust the values.") - n_train = int(n * split_train) - n_valid = int(n * split_valid) - n_test = n - n_train - n_valid - - return [n_train, n_valid, n_test] - - -def flat_input(input): - """ Flatten input dimension - - :param input: input array of size (n,k,l) - :returns: reshaped array of size (n, k*l) - - """ - dim = tf.reduce_prod(tf.shape(input)[1:]) - return tf.reshape(input, [-1, dim]) - def prepare_data_classic(data): """ @@ -413,39 +382,11 @@ def network_fit( print(df) return history - -def save_model(model, path, modelname): - """ - Saving a trained model. - - :param model: trained tensorflow keras model - :param path: path where the model should be stored - :param modelname: the name of the model - """ - if not os.path.isdir(path): - os.mkdir(path) - path_to_file = os.path.join(path, modelname + ".keras") - model.save(path_to_file) - print("Model successfully saved") - - -def load_model(path): - """ - Loading a trained model. - - :param path: path to the .keras file containing the desired model - :returns: trained tf.keras model - """ - if not os.path.isfile(path): - raise FileExistsError( - "There is no .keras model stored at the given directory.") - return tf.keras.models.load_model(path) - - ##################### # Plots etc. ##################### + def plot_losses(history): """ Plots the losses of the model training. @@ -494,21 +435,6 @@ def get_test_statistic(test_inputs, test_labels, model): return mean_percentage -def get_input_dim_lstm(path_to_file): - """ Extract the dimension of the input data - - :param path_to_file: path to the data - - """ - file = open(path_to_file, 'rb') - - data = pickle.load(file) - input_dim = data['inputs'].shape[2] + np.asarray( - data['contact_matrix']).shape[1] * np.asarray(data['contact_matrix']).shape[2]+1 - - return input_dim - - if __name__ == "__main__": path = os.path.dirname(os.path.realpath(__file__)) path_data = os.path.join(os.path.dirname(os.path.realpath( @@ -522,7 +448,7 @@ def get_input_dim_lstm(path_to_file): neurons_in_hidden_layer = 512 activation_function = 'relu' modelname = "Dense" - modeltype = "classic" # or "classic" + modeltype = "classic" # or "timeseries" model_parameters = (label_width, number_age_groups, number_compartments, hidden_layers, neurons_in_hidden_layer, activation_function, modelname) @@ -535,7 +461,6 @@ def get_input_dim_lstm(path_to_file): metrics = [tf.keras.metrics.MeanAbsoluteError()] training_parameters = (early_stop, max_epochs, loss, optimizer, metrics) - # input_dim = get_input_dim_lstm(path_data) -> Warum? model = initialize_model(model_parameters) model_output = network_fit( diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py index f7541f5e9f..091b7ddae2 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py @@ -33,6 +33,7 @@ from memilio.simulation.osecir import (Index_InfectionState, InfectionState, Model, interpolate_simulation_result, simulate) +from memilio.surrogatemodel.surrogate_utils import (transform_data) def remove_confirmed_compartments(result_array): @@ -188,19 +189,13 @@ def generate_data( if normalize: # logarithmic normalization transformer = FunctionTransformer(np.log1p, validate=True) - inputs = np.asarray(data['inputs']).transpose(2, 0, 1).reshape(8, -1) - scaled_inputs = transformer.transform(inputs) - scaled_inputs = scaled_inputs.transpose().reshape(num_runs, input_width, 8) - scaled_inputs_list = scaled_inputs.tolist() - - labels = np.asarray(data['labels']).transpose(2, 0, 1).reshape(8, -1) - scaled_labels = transformer.transform(labels) - scaled_labels = scaled_labels.transpose().reshape(num_runs, label_width, 8) - scaled_labels_list = scaled_labels.tolist() - - # cast dfs to tensors - data['inputs'] = tf.stack(scaled_inputs_list) - data['labels'] = tf.stack(scaled_labels_list) + data['inputs'] = transform_data( + data['inputs'], transformer, num_runs, 1) + data['labels'] = transform_data( + data['labels'], transformer, num_runs, 1) + else: + data['inputs'] = tf.convert_to_tensor(data['inputs']) + data['labels'] = tf.convert_to_tensor(data['labels']) if save_data: # check if data directory exists. If necessary, create it. diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/model.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/model.py index 4033a64120..6e9c891056 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/model.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/model.py @@ -27,6 +27,7 @@ from memilio.simulation.osecir import InfectionState import memilio.surrogatemodel.ode_secir_simple.network_architectures as architectures +from memilio.surrogatemodel.surrogate_utils import (calc_split_index) def initialize_model(parameters): @@ -126,22 +127,17 @@ def split_data(inputs, labels, split_train=0.7, 'valid_labels', 'test_inputs', 'test_labels'} """ - if split_train + split_valid + split_test > 1 + 1e-10: - raise ValueError( - "Summed data set shares are greater than 1. Please adjust the values.") - elif inputs.shape[0] != labels.shape[0] or inputs.shape[2] != labels.shape[2]: + if inputs.shape[0] != labels.shape[0] or inputs.shape[2] != labels.shape[2]: raise ValueError( "Number of batches or features different for input and labels") n = inputs.shape[0] - n_train = int(n * split_train) - n_valid = int(n * split_valid) - n_test = n - n_train - n_valid + splitting = calc_split_index(n, split_train, split_valid, split_test) inputs_train, inputs_valid, inputs_test = tf.split( - inputs, [n_train, n_valid, n_test], 0) + inputs, splitting, 0) labels_train, labels_valid, labels_test = tf.split( - labels, [n_train, n_valid, n_test], 0) + labels, splitting, 0) data = { 'train_inputs': inputs_train, diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/surrogate_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/surrogate_utils.py new file mode 100644 index 0000000000..b02b3d02d3 --- /dev/null +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/surrogate_utils.py @@ -0,0 +1,139 @@ +############################################################################# +# Copyright (C) 2020-2025 MEmilio +# +# Authors: Manuel Heger, Henrik Zunker +# +# Contact: Martin J. Kuehn +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################# + +import numpy as np +import tensorflow as tf +import os + + +def interpolate_age_groups(data_entry): + """ Interpolates the age groups from the population data into the age groups used in the simulation. + We assume that the people in the age groups are uniformly distributed. + + :param data_entry: Data entry containing the population data. + :returns: List containing the population in each age group used in the simulation. + + """ + age_groups = { + "A00-A04": data_entry['<3 years'] + data_entry['3-5 years'] * 2 / 3, + "A05-A14": data_entry['3-5 years'] * 1 / 3 + data_entry['6-14 years'], + "A15-A34": data_entry['15-17 years'] + data_entry['18-24 years'] + data_entry['25-29 years'] + data_entry['30-39 years'] * 1 / 2, + "A35-A59": data_entry['30-39 years'] * 1 / 2 + data_entry['40-49 years'] + data_entry['50-64 years'] * 2 / 3, + "A60-A79": data_entry['50-64 years'] * 1 / 3 + data_entry['65-74 years'] + data_entry['>74 years'] * 1 / 5, + "A80+": data_entry['>74 years'] * 4 / 5 + } + return [age_groups[key] for key in age_groups] + + +def remove_confirmed_compartments(result_array): + """ Removes the confirmed compartments which are not used in the data generation. + + :param result_array: Array containing the simulation results. + :returns: Array containing the simulation results without the confirmed compartments. + + """ + num_groups = int(result_array.shape[1] / 10) + delete_indices = [index for i in range( + num_groups) for index in (3+10*i, 5+10*i)] + return np.delete(result_array, delete_indices, axis=1) + + +def transform_data(data, transformer, num_runs, num_groups=6, num_compartments=8): + """ Transforms the data by a logarithmic normalization. + Reshaping is necessary, because the transformer needs an array with dimension <= 2. + + :param data: Data to be transformed. + :param transformer: Transformer used for the transformation. + :param num_runs: Number of samples represented by the data. + :param num_groups: Number of age groups represented by data. + :param num_compartments: Number of compartments. + :returns: Transformed data. + + """ + + data = np.asarray(data).transpose(2, 0, 1).reshape( + num_groups*num_compartments, -1) + scaled_data = transformer.transform(data) + return tf.convert_to_tensor(scaled_data.transpose().reshape(num_runs, -1, num_groups*num_compartments)) + + +def save_model(model, path, modelname): + """ + Saving a trained model. + + :param model: trained tensorflow keras model + :param path: path where the model should be stored + :param modelname: the name of the model + """ + if not os.path.isdir(path): + os.mkdir(path) + path_to_file = os.path.join(path, modelname + ".keras") + model.save(path_to_file) + # Wird noch gelöscht + print(path_to_file) + ##### + print("Model successfully saved") + + +def load_model(path): + """ + Loading a trained model. + + :param path: path to the .keras file containing the desired model + :returns: trained tf.keras model + """ + if not os.path.isfile(path): + raise FileExistsError( + "There is no .keras model stored at the given directory.") + return tf.keras.models.load_model(path) + + +def calc_split_index(n, split_train=0.7, + split_valid=0.2, split_test=0.1): + """ + Calculating the indixes for a split_train:split_valid:split_test decomposition of a set with size n + + It must hold split_train + split_valid + split_test = 1 + + :param n: integer value + :param split_train: value between 0 and 1 + :param split_valid: value between 0 and 1 + :param split_test: value between 0 and 1 + :returns: a list of the form [i_train, i_valid, i_test] + """ + if split_train + split_valid + split_test > 1 + 1e-10: + raise ValueError( + "Summed data set shares are greater than 1. Please adjust the values.") + n_train = int(n * split_train) + n_valid = int(n * split_valid) + n_test = n - n_train - n_valid + + return [n_train, n_valid, n_test] + + +def flat_input(input): + """ Flatten input dimension + + :param input: input array of size (n,k,l) + :returns: reshaped array of size (n, k*l) + + """ + dim = tf.reduce_prod(tf.shape(input)[1:]) + return tf.reshape(input, [-1, dim]) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index d708b85cf4..0b9acdf58d 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -21,6 +21,7 @@ from memilio.surrogatemodel.ode_secir_groups import (data_generation, model, network_architectures, dampings) +import memilio.surrogatemodel.surrogate_utils as utils from unittest.mock import patch import os import unittest @@ -52,13 +53,13 @@ def test_dampings_active(self): self.assertEqual(len(factors2), 18) with self.assertRaises(ValueError) as error: - dampings.dampings_active(10, 200) - error_message = "Number of dampings must be smaller than total number of days!" + dampings.dampings_active(10, 200, 3) + error_message = "It's not possible to arrange this number of dampings in the desired interval with the given minimal distance." self.assertEqual(str(error.exception), error_message) # Combine days and factors - days1, factors1 = dampings.dampings_active(30, 5) - days2, factors2 = dampings.dampings_active(40, 10) + days1, factors1 = dampings.dampings_active(30, 5, 4) + days2, factors2 = dampings.dampings_active(40, 10, 4) self.assertEqual(len(days1), 5) self.assertEqual(len(factors1), len(days1)) @@ -66,6 +67,7 @@ def test_dampings_active(self): self.assertEqual(len(days2), len(factors2)) def test_dampings_classic(self): + days1 = dampings.generate_dampings_withshadowdamp(4, 30, 3, 4, 1) days2 = dampings.generate_dampings_withshadowdamp(10, 90, 2, 10, 1) self.assertEqual(len(days1[0]), 4) @@ -73,8 +75,7 @@ def test_dampings_classic(self): with self.assertRaises(ValueError) as error: dampings.dampings_classic(10, 200) - error_message = "Invalid input: It's not possible to generate this number of damping" \ - "in the desired time interval." + error_message = "It's not possible to arrange this number of dampings in the desired interval with the given minimal distance." self.assertEqual(str(error.exception), error_message) # Combine days and factors @@ -89,8 +90,7 @@ def test_dampings_classic(self): def test_dampings_random(self): with self.assertRaises(ValueError) as error: dampings.dampings_random(10, 200) - error_message = "Invalid input: It's not possible to generate this number of damping" \ - "in the desired time interval." + error_message = "It's not possible to arrange this number of dampings in the desired interval with the given minimal distance." self.assertEqual(str(error.exception), error_message) days1, factors1 = dampings.dampings_random(30, 5) @@ -425,22 +425,21 @@ def test_lstm_multi_multi(self): # Dimension of one time step (16 = 4*4) self.assertEqual(output_zeros.shape[2], 16) -# Testing model.py def test_calc_split_index(self): with self.assertRaises(ValueError) as error: - model.calc_split_index( + utils.calc_split_index( 10, 0.9, 0.1, 0.1 ) error_message = "Summed data set shares are greater than 1. Please adjust the values." self.assertEqual(str(error.exception), error_message) - split_index = model.calc_split_index(10, 0.7, 0.1, 0.2) + split_index = utils.calc_split_index(10, 0.7, 0.1, 0.2) self.assertEqual(split_index, [7, 1, 2]) def test_flat_input(self): A = np.zeros((12, 12)) a1 = [A for _ in np.arange(5)] a2 = np.zeros((5, 144)) - a1_flatten = model.flat_input(a1) + a1_flatten = utils.flat_input(a1) b = a2 == a1_flatten self.assertTrue(np.asarray(b).all()) @@ -673,7 +672,7 @@ def test_save_model( filename="data_secir_groups_10days_2_random.pickle", modeltype='classic', plot_stats=False) - model.save_model(model_mlp_multi_input_multi_output.model, + utils.save_model(model_mlp_multi_input_multi_output.model, self.path, "mlp_multi_multi") self.assertEqual(len(os.listdir(self.path)), 2) @@ -719,11 +718,11 @@ def test_load_model( filename="data_secir_groups_10days_2_random.pickle", modeltype='classic', plot_stats=False) - model.save_model(mlp1.model, + utils.save_model(mlp1.model, self.path, "mlp_multi_multi") path = os.path.join(self.path, "mlp_multi_multi.keras") - mlp2 = model.load_model(path) + mlp2 = utils.load_model(path) weights1 = mlp1.model.get_weights() weights2 = mlp2.get_weights() From 8fea15c2ae20591190eb5b7403518dc2a2eb88cb Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 30 Jun 2025 16:36:34 +0200 Subject: [PATCH 21/49] Including review comments --- .../ode_secir_groups/dampings.py | 50 ++++++++++--------- .../ode_secir_groups/data_generation.py | 23 +++++++-- .../ode_secir_simple/data_generation.py | 38 ++++---------- .../memilio/surrogatemodel/surrogate_utils.py | 11 ++-- .../test_surrogatemodel_ode_secir_groups.py | 12 ++--- 5 files changed, 65 insertions(+), 69 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py index d4408c4284..59e84b529e 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py @@ -41,13 +41,13 @@ def calc_dist_days(days, min_day, n_dampings, min_distance=1): return res -def generate_dampings(days, max_number_damping, method, min_distance=2, +def generate_dampings(days, number_dampings, method, min_distance=2, min_damping_day=2): """ Producing dampings for timeseries of length days according to the used method. :param days: Number of days per time series - :param max_number_damping: Number of days on which damping can occur. + :param number_dampings: Number of days on which damping can occur. :param method: Method used to generate the dampings, possible values "classic", "active", "random" :param min_distance: Minimal distance between two dampings :min_damping_day: First day, where a damping can be applied @@ -55,13 +55,13 @@ def generate_dampings(days, max_number_damping, method, min_distance=2, """ if method == "classic": damp_days, damp_factors = dampings_classic( - days, max_number_damping, min_distance, min_damping_day) + days, number_dampings, min_distance, min_damping_day) elif method == "active": damp_days, damp_factors = dampings_active( - days, max_number_damping, min_damping_day) + days, number_dampings, min_damping_day) elif method == "random": damp_days, damp_factors = dampings_random( - days, max_number_damping, min_distance, min_damping_day) + days, number_dampings, min_distance, min_damping_day) else: raise ValueError( "The method argument has to be one of the following: 'classic', 'active' or 'random'.") @@ -70,14 +70,14 @@ def generate_dampings(days, max_number_damping, method, min_distance=2, # Active Damping -def dampings_active(days, max_number_damping, min_damping_day): +def dampings_active(days, number_dampings, min_damping_day): """" Generating list of damping days and corresponding damping factors using the active method. The damping days are created with equal distance on the interval [1, days-3]. :param days: Number of simulated days - :param max_number_damping: Maximal number of dampings per simulation + :param number_dampings: Maximal number of dampings per simulation :param min_damping_day: First day, where a damping can be applied :returns: list of days and damping factors """ @@ -93,13 +93,13 @@ def dampings_active(days, max_number_damping, min_damping_day): # Defining possible damping days distance_between_days = calc_dist_days( - days, min_damping_day, max_number_damping) + days, min_damping_day, number_dampings) damp_days = [min_damping_day + distance_between_days*(i+1) - for i in np.arange(max_number_damping)] + for i in np.arange(number_dampings)] # Generate damping factors dampings = calc_factors_active( - max_number_damping, gamma_pos, alpha, p0, t1_max, t1_min, t2_max, t2_min) + number_dampings, gamma_pos, alpha, p0, t1_max, t1_min, t2_max, t2_min) return damp_days, dampings @@ -153,28 +153,32 @@ def calc_factors_active(n_ddays, gamma_pos=0, alpha=-1, p0=0.5, t1_max=-0.3, t1_ # Classic Damping -def dampings_classic(days, max_number_damping, min_distance=2, +def dampings_classic(days, number_dampings, min_distance=2, min_damping_day=2): """ Generate the damping days using shadow damping and picking days uniformly with a given minimal distance. + The idea behind shadow damping is the following: Days are picked randomly in the interval between min_damping_day and days. + After picking one day a fixed number of days before and after the chosen day is blocked. + The procedure is repeated till number_damping many days are chosen. To overcome the problem of higher probability at the boundary, + the interval is artificially increased, artificial days are not counted. The corresponding factors are drawn uniformly from the interval (0,0.5) :param days: Number of days simulated per run. - :param max_number_damping: Number of damping days generated. + :param number_dampings: Number of damping days generated. :param min_distance: Minimal distance between two dampings :param min_damping_day: First day, where a damping can be applied - :returns: Two lists of length max_number_dampings containing the days and the factors. + :returns: Two lists of length number_dampingss containing the days and the factors. """ # Checking, if the given parameters are compatible - calc_dist_days(days, min_damping_day, max_number_damping, min_distance) + calc_dist_days(days, min_damping_day, number_dampings, min_distance) # Generating damping days damp_days = generate_dampings_withshadowdamp( - max_number_damping, days, min_distance, min_damping_day, 1)[0] + number_dampings, days, min_distance, min_damping_day, 1)[0] # Generating damping factors - damp_factors = np.random.uniform(0, 0.5, max_number_damping).tolist() + damp_factors = np.random.uniform(0, 0.5, number_dampings).tolist() return damp_days, damp_factors @@ -267,7 +271,7 @@ def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min # Random Damping -def dampings_random(days, max_number_damping, min_damping_day=2, +def dampings_random(days, number_dampings, min_damping_day=2, min_distance_damping_day=2): """ Generate random damping days according to the following rule. @@ -277,24 +281,24 @@ def dampings_random(days, max_number_damping, min_damping_day=2, between 0 and 0.5. :param days: Number of days simulated per run. - :param max_number_damping: Number of damping days generated. + :param number_dampings: Number of damping days generated. :param min_distance: Minimal distance between two dampings :param min_damping_day: First day, where a damping can be applied - :returns: Two lists of length max_number_dampings containing the days and the factors. + :returns: Two lists of length number_dampingss containing the days and the factors. """ # Setting parameters # Calculating the expected distance between two dampings distance_between_days = calc_dist_days( - days, min_damping_day, max_number_damping, min_distance_damping_day) + days, min_damping_day, number_dampings, min_distance_damping_day) # Reducing due to minimal distance restriction reduced_distance = distance_between_days - min_distance_damping_day # Try till one admissible configuration of waiting times is produced running = True while running: - dist = np.random.geometric(1/reduced_distance, max_number_damping) - if np.sum(dist) + min_damping_day + max_number_damping*min_distance_damping_day < days: + dist = np.random.geometric(1/reduced_distance, number_dampings) + if np.sum(dist) + min_damping_day + number_dampings*min_distance_damping_day < days: running = False # Reconstructing the days using the waiting times @@ -306,6 +310,6 @@ def dampings_random(days, max_number_damping, min_damping_day=2, day = day + min_distance_damping_day # Generating the associated damping factors - damping_factors = np.random.uniform(0, 0.5, max_number_damping) + damping_factors = np.random.uniform(0, 0.5, number_dampings) return ddays, damping_factors diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py index 70d818ceb8..1071bcb5a4 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py @@ -36,6 +36,8 @@ import memilio.surrogatemodel.ode_secir_groups.dampings as dampings from memilio.surrogatemodel.surrogate_utils import ( interpolate_age_groups, remove_confirmed_compartments, transform_data) +import memilio.simulation as mio +import memilio.simulation.osecir as osecir def run_secir_groups_simulation(days, damping_days, damping_factors, populations): @@ -49,6 +51,9 @@ def run_secir_groups_simulation(days, damping_days, damping_factors, populations :returns: List containing the populations in each compartment used to initialize the run. """ + # Collect indices of confirmed compartments + del_indices = [] + if len(damping_days) != len(damping_factors): raise ValueError("Length of damping_days and damping_factors differ!") @@ -114,6 +119,14 @@ def run_secir_groups_simulation(days, damping_days, damping_factors, populations # twice the value of RiskOfInfectionFromSymptomatic model.parameters.MaxRiskOfInfectionFromSymptomatic[AgeGroup(i)] = 0.5 + # Collecting deletable indices + index_no_sym_conf = model.populations.get_flat_index( + osecir.MultiIndex_PopulationsArray(mio.AgeGroup(i), osecir.InfectionState.InfectedNoSymptomsConfirmed)) + index_sym_conf = model.populations.get_flat_index( + osecir.MultiIndex_PopulationsArray(mio.AgeGroup(i), osecir.InfectionState.InfectedSymptomsConfirmed)) + del_indices.append(index_no_sym_conf) + del_indices.append(index_sym_conf) + # StartDay is the n-th day of the year model.parameters.StartDay = ( date(start_year, start_month, start_day) - date(start_year, 1, 1)).days @@ -148,7 +161,7 @@ def run_secir_groups_simulation(days, damping_days, damping_factors, populations # Omit first column, as the time points are not of interest here. result_array = remove_confirmed_compartments( - np.transpose(result.as_ndarray()[1:, :])) + np.transpose(result.as_ndarray()[1:, :]), del_indices) dataset_entries = copy.deepcopy(result_array) dataset_entries = np.nan_to_num(dataset_entries) @@ -158,7 +171,7 @@ def run_secir_groups_simulation(days, damping_days, damping_factors, populations def generate_data( num_runs, path_out, path_population, input_width, label_width, - normalize=True, save_data=True, damping_method="random", max_number_damping=5): + normalize=True, save_data=True, damping_method="random", number_dampings=5): """ Generate data sets of num_runs many equation-based model simulations and possibly transforms the computed results by a log(1+x) transformation. Divides the results in input and label data sets and returns them as a dictionary of two TensorFlow Stacks. In general, we have 8 different compartments and 6 age groups. If we choose @@ -174,7 +187,7 @@ def generate_data( :param normalize: Default: true Option to transform dataset by logarithmic normalization. :param save_data: Default: true Option to save the dataset. :param damping_method: String specifying the damping method, that should be used. Possible values "classic", "active", "random". - :param max_number_damping: Maximal number of possible dampings. + :param number_dampings: Maximal number of possible dampings. :returns: Data dictionary of input and label data sets. """ @@ -200,7 +213,7 @@ def generate_data( # Generate random damping days damping_days, damping_factors = dampings.generate_dampings( - days, max_number_damping, method=damping_method, min_distance=2, + days, number_dampings, method=damping_method, min_distance=2, min_damping_day=2) data_run, damped_matrices = run_secir_groups_simulation( @@ -309,6 +322,6 @@ def get_population(path): input_width = 5 label_width = 30 - num_runs = 1 + num_runs = 10000 data = generate_data(num_runs, path_output, path_population, input_width, label_width, damping_method="classic") diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py index 091b7ddae2..8dad79ea63 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py @@ -33,32 +33,9 @@ from memilio.simulation.osecir import (Index_InfectionState, InfectionState, Model, interpolate_simulation_result, simulate) -from memilio.surrogatemodel.surrogate_utils import (transform_data) - - -def remove_confirmed_compartments(result_array): - """Aggregates the confirmed and non-confirmed infection compartments. - - :param result_array: Array containing simulation results with compartment populations - :returns: Modified array with aggregated compartments - """ - - # Defining relevant indices - indices_inf_no_symp = [2, 3] - index_inf_no_symp = 2 - indices_inf_symp = [4, 5] - index_inf_symp = 4 - indices_removed = [3, 5] - - # Calculating the values for the merged compartments - sum_inf_no_symp = np.sum(result_array[:, indices_inf_no_symp], axis=1) - sum_inf_symp = np.sum(result_array[:, indices_inf_symp], axis=1) - - # Seting the new values - result_array[:, index_inf_no_symp] = sum_inf_no_symp - result_array[:, index_inf_symp] = sum_inf_symp - - return np.delete(result_array, indices_removed, axis=1) +from memilio.surrogatemodel.surrogate_utils import ( + interpolate_age_groups, remove_confirmed_compartments, transform_data) +import memilio.simulation.osecir as osecir def run_secir_simple_simulation(days): @@ -127,6 +104,13 @@ def run_secir_simple_simulation(days): model.parameters.ContactPatterns.cont_freq_mat[0].minimum = np.ones( (num_groups, num_groups)) * 0 + # Collecting deletable indices + index_no_sym_conf = model.populations.get_flat_index( + osecir.MultiIndex_PopulationsArray(A0, osecir.InfectionState.InfectedNoSymptomsConfirmed)) + index_sym_conf = model.populations.get_flat_index( + osecir.MultiIndex_PopulationsArray(A0, osecir.InfectionState.InfectedSymptomsConfirmed)) + del_indices = (index_no_sym_conf, index_sym_conf) + # Apply mathematical constraints to parameters model.apply_constraints() @@ -139,7 +123,7 @@ def run_secir_simple_simulation(days): result_array = result.as_ndarray() result_array = remove_confirmed_compartments( - result_array[1:, :].transpose()) + result_array[1:, :].transpose(), del_indices) dataset_entries = copy.deepcopy(result_array) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/surrogate_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/surrogate_utils.py index b02b3d02d3..5cede8065b 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/surrogate_utils.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/surrogate_utils.py @@ -42,16 +42,14 @@ def interpolate_age_groups(data_entry): return [age_groups[key] for key in age_groups] -def remove_confirmed_compartments(result_array): +def remove_confirmed_compartments(result_array, delete_indices): """ Removes the confirmed compartments which are not used in the data generation. :param result_array: Array containing the simulation results. + :param delete_indices: flat indices indicating position containing data from confirmed compartments. :returns: Array containing the simulation results without the confirmed compartments. """ - num_groups = int(result_array.shape[1] / 10) - delete_indices = [index for i in range( - num_groups) for index in (3+10*i, 5+10*i)] return np.delete(result_array, delete_indices, axis=1) @@ -61,7 +59,7 @@ def transform_data(data, transformer, num_runs, num_groups=6, num_compartments=8 :param data: Data to be transformed. :param transformer: Transformer used for the transformation. - :param num_runs: Number of samples represented by the data. + :param num_runs: Number of samples represented by the data. :param num_groups: Number of age groups represented by data. :param num_compartments: Number of compartments. :returns: Transformed data. @@ -86,9 +84,6 @@ def save_model(model, path, modelname): os.mkdir(path) path_to_file = os.path.join(path, modelname + ".keras") model.save(path_to_file) - # Wird noch gelöscht - print(path_to_file) - ##### print("Model successfully saved") diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 0b9acdf58d..cb5faac70c 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -176,7 +176,7 @@ def test_data_generation_runs( data_1 = data_generation.generate_data( num_runs_1, self.path, "", input_width_1, label_width_1, - save_data=False, max_number_damping=2) + save_data=False, number_dampings=2) self.assertEqual(len(data_1['inputs']), num_runs_1) self.assertEqual(len(data_1['inputs'][0]), input_width_1) self.assertEqual(len(data_1['inputs'][0][0]), 48) @@ -186,7 +186,7 @@ def test_data_generation_runs( data_2 = data_generation.generate_data( num_runs_2, self.path, "", input_width_2, label_width_2, - save_data=False, max_number_damping=2) + save_data=False, number_dampings=2) self.assertEqual(len(data_2['inputs']), num_runs_2) self.assertEqual(len(data_2['inputs'][0]), input_width_2) self.assertEqual(len(data_2['inputs'][0][0]), 48) @@ -213,7 +213,7 @@ def test_data_generation_save( num_runs = 1 data_generation.generate_data(num_runs, self.path, "", input_width, - label_width, damping_method="random", max_number_damping=2) + label_width, damping_method="random", number_dampings=2) self.assertEqual(len(os.listdir(self.path)), 1) self.assertEqual(os.listdir(self.path), @@ -591,7 +591,7 @@ def test_network_fit(self, mock_population, mock_baseline, mock_minimum): label_width = 10 num_runs = 5 data_generation.generate_data(num_runs, self.path, "", input_width, - label_width, max_number_damping=1) + label_width, number_dampings=1) # models with multiple outputs model_mlp_multi_input_multi_output = model.network_fit( @@ -653,7 +653,7 @@ def test_save_model( num_runs = 2 data_generation.generate_data(num_runs, self.path, "", input_width, - label_width, max_number_damping=2) + label_width, number_dampings=2) max_epochs = 1 early_stop = 100 loss = tf.keras.losses.MeanAbsolutePercentageError() @@ -699,7 +699,7 @@ def test_load_model( num_runs = 2 data_generation.generate_data(num_runs, self.path, "", input_width, - label_width, max_number_damping=2) + label_width, number_dampings=2) max_epochs = 1 early_stop = 100 loss = tf.keras.losses.MeanAbsolutePercentageError() From aab1849610ec4c75294a92985bb52e92a2c7a7e9 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 2 Jul 2025 06:30:15 +0200 Subject: [PATCH 22/49] checking ci fail --- .../test_surrogatemodel_ode_secir_groups.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index cb5faac70c..6037f68d09 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -721,7 +721,13 @@ def test_load_model( utils.save_model(mlp1.model, self.path, "mlp_multi_multi") + self.assertEqual(len(os.listdir(self.path)), 2) + self.assertEqual(os.listdir(self.path), + ['data_secir_groups_10days_2_random.pickle', 'mlp_multi_multi.keras']) + path = os.path.join(self.path, "mlp_multi_multi.keras") + + self.assertTrue(os.path.isfile(path)) mlp2 = utils.load_model(path) weights1 = mlp1.model.get_weights() From a27a0c93672df292813f2dd9b468042c6929f893 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 2 Jul 2025 06:57:53 +0200 Subject: [PATCH 23/49] CI fails --- .../test_surrogatemodel_ode_secir_groups.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 6037f68d09..81798ad6af 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -222,6 +222,7 @@ def test_data_generation_save( # # Testing network_architectures.py + def test_mlp_multi_single(self): with self.assertRaises(ValueError) as error: network_architectures.mlp_multi_input_single_output( @@ -725,10 +726,14 @@ def test_load_model( self.assertEqual(os.listdir(self.path), ['data_secir_groups_10days_2_random.pickle', 'mlp_multi_multi.keras']) - path = os.path.join(self.path, "mlp_multi_multi.keras") + path_file = os.path.join(self.path, "mlp_multi_multi.keras") - self.assertTrue(os.path.isfile(path)) - mlp2 = utils.load_model(path) + self.assertTrue(os.path.isfile(path_file)) + if os.path.isfile(path_file): + print("-_-_-_-_-_- Datei existiert -_-_-_-_-") + else: + print("Neeeeeeeeeiiiiiiiiinnnnnnnn.......nnnnnnn......nnnnnnn") + mlp2 = utils.load_model(path_file) weights1 = mlp1.model.get_weights() weights2 = mlp2.get_weights() From 71f26afde409a518cf052ea724e74730e90ae9fb Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 2 Jul 2025 07:21:52 +0200 Subject: [PATCH 24/49] Inserting grid search values --- .../surrogatemodel/ode_secir_groups/dampings.py | 14 +++++++------- .../test_surrogatemodel_ode_secir_groups.py | 3 ++- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py index 59e84b529e..731f5291da 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py @@ -83,13 +83,13 @@ def dampings_active(days, number_dampings, min_damping_day): """ # Setting parameters - gamma_pos = 0 - alpha = -1 - p0 = 0.5 - t1_max = -0.3 - t1_min = -1.2 - t2_max = 2 - t2_min = -0.5 + gamma_pos = -2 + alpha = -4 + p0 = 0.4 + t1_max = -1 + t1_min = -2.5 + t2_max = 0.95 + t2_min = -0.25 # Defining possible damping days distance_between_days = calc_dist_days( diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 81798ad6af..328959dc47 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -222,7 +222,6 @@ def test_data_generation_save( # # Testing network_architectures.py - def test_mlp_multi_single(self): with self.assertRaises(ValueError) as error: network_architectures.mlp_multi_input_single_output( @@ -733,12 +732,14 @@ def test_load_model( print("-_-_-_-_-_- Datei existiert -_-_-_-_-") else: print("Neeeeeeeeeiiiiiiiiinnnnnnnn.......nnnnnnn......nnnnnnn") + """ mlp2 = utils.load_model(path_file) weights1 = mlp1.model.get_weights() weights2 = mlp2.get_weights() for w1, w2 in zip(weights1, weights2): np.testing.assert_allclose(w1, w2) + """ if __name__ == '__main__': From 49db37cd2e6e607a0f6527178b1f104d0d31230f Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 2 Jul 2025 07:43:43 +0200 Subject: [PATCH 25/49] Ci test --- .../test_surrogatemodel_ode_secir_groups.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 328959dc47..eaa9533c68 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -222,6 +222,7 @@ def test_data_generation_save( # # Testing network_architectures.py + def test_mlp_multi_single(self): with self.assertRaises(ValueError) as error: network_architectures.mlp_multi_input_single_output( @@ -732,8 +733,7 @@ def test_load_model( print("-_-_-_-_-_- Datei existiert -_-_-_-_-") else: print("Neeeeeeeeeiiiiiiiiinnnnnnnn.......nnnnnnn......nnnnnnn") - """ - mlp2 = utils.load_model(path_file) + mlp2 = tf.keras.models.load_model(path) weights1 = mlp1.model.get_weights() weights2 = mlp2.get_weights() From 9923be42728788437b6f6844fc31925555ceae54 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 2 Jul 2025 07:48:20 +0200 Subject: [PATCH 26/49] ci fails --- .../surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index eaa9533c68..2bfe8e82d7 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -222,7 +222,6 @@ def test_data_generation_save( # # Testing network_architectures.py - def test_mlp_multi_single(self): with self.assertRaises(ValueError) as error: network_architectures.mlp_multi_input_single_output( @@ -739,7 +738,6 @@ def test_load_model( weights2 = mlp2.get_weights() for w1, w2 in zip(weights1, weights2): np.testing.assert_allclose(w1, w2) - """ if __name__ == '__main__': From 86874c510982910c3c6bbbe41a7b055c96a18cd5 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 2 Jul 2025 08:21:45 +0200 Subject: [PATCH 27/49] Typo --- .../surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 2bfe8e82d7..5b8777d5b7 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -732,7 +732,7 @@ def test_load_model( print("-_-_-_-_-_- Datei existiert -_-_-_-_-") else: print("Neeeeeeeeeiiiiiiiiinnnnnnnn.......nnnnnnn......nnnnnnn") - mlp2 = tf.keras.models.load_model(path) + mlp2 = tf.keras.models.load_model(path_file) weights1 = mlp1.model.get_weights() weights2 = mlp2.get_weights() From 9dd10fd2b57691eee32bfe21bf70edd8b3129f25 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 2 Jul 2025 09:00:29 +0200 Subject: [PATCH 28/49] Try to fix CI fails --- .../test_surrogatemodel_ode_secir_groups.py | 20 +++++-------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 5b8777d5b7..470a331de9 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -25,6 +25,7 @@ from unittest.mock import patch import os import unittest +import tempfile import numpy as np import tensorflow as tf @@ -718,21 +719,10 @@ def test_load_model( filename="data_secir_groups_10days_2_random.pickle", modeltype='classic', plot_stats=False) - utils.save_model(mlp1.model, - self.path, "mlp_multi_multi") - - self.assertEqual(len(os.listdir(self.path)), 2) - self.assertEqual(os.listdir(self.path), - ['data_secir_groups_10days_2_random.pickle', 'mlp_multi_multi.keras']) - - path_file = os.path.join(self.path, "mlp_multi_multi.keras") - - self.assertTrue(os.path.isfile(path_file)) - if os.path.isfile(path_file): - print("-_-_-_-_-_- Datei existiert -_-_-_-_-") - else: - print("Neeeeeeeeeiiiiiiiiinnnnnnnn.......nnnnnnn......nnnnnnn") - mlp2 = tf.keras.models.load_model(path_file) + with tempfile.TemporaryDirectory() as tmpdir: + utils.save_model(mlp1.model, tmpdir, "mlp_multi_multi") + path_file = os.path.join(tmpdir, "mlp_multi_multi.keras") + mlp2 = utils.load_model(path_file) weights1 = mlp1.model.get_weights() weights2 = mlp2.get_weights() From d5d46b94647f0eb1aee60f3cd93f8d544fc15d67 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 7 Jul 2025 07:40:06 +0200 Subject: [PATCH 29/49] Trying to find reason for CI fail --- .../test_surrogatemodel_ode_secir_groups.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 470a331de9..d840362c64 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -25,7 +25,7 @@ from unittest.mock import patch import os import unittest -import tempfile + import numpy as np import tensorflow as tf @@ -719,10 +719,14 @@ def test_load_model( filename="data_secir_groups_10days_2_random.pickle", modeltype='classic', plot_stats=False) - with tempfile.TemporaryDirectory() as tmpdir: - utils.save_model(mlp1.model, tmpdir, "mlp_multi_multi") - path_file = os.path.join(tmpdir, "mlp_multi_multi.keras") - mlp2 = utils.load_model(path_file) + utils.save_model(mlp1.model, self.path, "mlp_multi_multi") + path_file = os.path.join(self.path, "mlp_multi_multi.keras") + if os.path.isfile(path_file): + print("--------------------------------------------------") + print(r"\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/") + print(r"/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ ") + print("--------------------------------------------------") + mlp2 = utils.load_model(path_file) weights1 = mlp1.model.get_weights() weights2 = mlp2.get_weights() From 8e232721ded64010e12a71f397281fea5ba5bb76 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 7 Jul 2025 08:30:23 +0200 Subject: [PATCH 30/49] Collecting grid_search and helper functions in a utils folder --- .../ode_secir_groups/data_generation.py | 2 +- .../surrogatemodel/ode_secir_groups/model.py | 2 +- .../ode_secir_simple/data_generation.py | 2 +- .../ode_secir_simple/hyperparameter_tuning.py | 2 +- .../surrogatemodel/ode_secir_simple/model.py | 2 +- .../grid_search.py | 27 ++++--- .../helper_functions.py} | 0 .../test_surrogatemodel_ode_secir_groups.py | 70 ++++++++++++++++++- .../test_surrogatemodel_ode_secir_simple.py | 8 ++- 9 files changed, 97 insertions(+), 18 deletions(-) rename pycode/memilio-surrogatemodel/memilio/surrogatemodel/{ode_secir_simple => utils}/grid_search.py (86%) rename pycode/memilio-surrogatemodel/memilio/surrogatemodel/{surrogate_utils.py => utils/helper_functions.py} (100%) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py index 1071bcb5a4..7f304fc219 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py @@ -34,7 +34,7 @@ InfectionState, Model, interpolate_simulation_result, simulate) import memilio.surrogatemodel.ode_secir_groups.dampings as dampings -from memilio.surrogatemodel.surrogate_utils import ( +from memilio.surrogatemodel.utils.helper_functions import ( interpolate_age_groups, remove_confirmed_compartments, transform_data) import memilio.simulation as mio import memilio.simulation.osecir as osecir diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py index 6561755ef1..8a917bfdb4 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/model.py @@ -19,7 +19,7 @@ ############################################################################# from memilio.surrogatemodel.ode_secir_groups import network_architectures from memilio.simulation.osecir import InfectionState -from memilio.surrogatemodel.surrogate_utils import ( +from memilio.surrogatemodel.utils.helper_functions import ( calc_split_index, flat_input) import os import pickle diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py index 8dad79ea63..fa6acec989 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py @@ -33,7 +33,7 @@ from memilio.simulation.osecir import (Index_InfectionState, InfectionState, Model, interpolate_simulation_result, simulate) -from memilio.surrogatemodel.surrogate_utils import ( +from memilio.surrogatemodel.utils.helper_functions import ( interpolate_age_groups, remove_confirmed_compartments, transform_data) import memilio.simulation.osecir as osecir diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/hyperparameter_tuning.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/hyperparameter_tuning.py index 0123c354c3..d1a6f69ba5 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/hyperparameter_tuning.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/hyperparameter_tuning.py @@ -25,7 +25,7 @@ import time from sklearn.model_selection import KFold import numpy as np -import memilio.surrogatemodel.ode_secir_simple.grid_search as grid_search +import memilio.surrogatemodel.utils.grid_search as grid_search import memilio.surrogatemodel.ode_secir_simple.model as md # Setting random seed diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/model.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/model.py index 6e9c891056..4db94c0cb2 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/model.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/model.py @@ -27,7 +27,7 @@ from memilio.simulation.osecir import InfectionState import memilio.surrogatemodel.ode_secir_simple.network_architectures as architectures -from memilio.surrogatemodel.surrogate_utils import (calc_split_index) +from memilio.surrogatemodel.utils.helper_functions import (calc_split_index) def initialize_model(parameters): diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/grid_search.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/grid_search.py similarity index 86% rename from pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/grid_search.py rename to pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/grid_search.py index 869c60404d..b60b345688 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/grid_search.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/grid_search.py @@ -26,13 +26,15 @@ import time from sklearn.model_selection import KFold import numpy as np -import memilio.surrogatemodel.ode_secir_simple.network_architectures as architectures -import memilio.surrogatemodel.ode_secir_simple.model as md +import memilio.surrogatemodel.ode_secir_simple.network_architectures as architectures_simple +import memilio.surrogatemodel.ode_secir_groups.network_architectures as architectures_groups +import memilio.surrogatemodel.ode_secir_simple.model as md_simple +import memilio.surrogatemodel.ode_secir_groups.model as md_groups # Function to train and evaluate the model using cross-validation -def train_and_evaluate_model(param, inputs, labels, training_parameter, show_results=False): +def train_and_evaluate_model(param, inputs, labels, training_parameter, show_results=False, modeltype="simple"): """ Training and evaluating a model with given architecture using 5-fold cross validation, returning a dictionary with the main training statistics. :param param: tuple of parameters describing the model architecture, it should be of the form @@ -43,12 +45,14 @@ def train_and_evaluate_model(param, inputs, labels, training_parameter, show_res (early_stop, max_epochs, loss, optimizer, metrics), where loss is a loss-function implemented in keras, optimizer is the name of the used optimizer, metrics is a list of used training metrics, e.g. [tf.keras.metrics.MeanAbsoluteError(), tf.keras.metrics.MeanAbsolutePercentageError()] :param show_results: Boolean, whether or not the evaluation results are printed. + :param modeltype: String, specifying, which type of model is going to be tested. The possible values are "simple" - refering to the surrogate for the SECIR-model + without age_resolution, "groups" - for the age resolved SECIR model. :returns: a dictionary of training statistics of the form {"model", "activation","optimizer","mean_train_loss_kfold","mean_val_loss_kfold","training_time", "train_losses", "val_losses"} """ # Unpacking parameters - _, _, _, _, activation, modelname = param + activation, modelname = param[-2:] early_stop, max_epochs, loss, optimizer, metrics = training_parameter early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=early_stop, @@ -74,7 +78,12 @@ def train_and_evaluate_model(param, inputs, labels, training_parameter, show_res valid_labels = tf.gather(labels, indices=val_idx) # Initializing model - model = md.initialize_model(param) + if modeltype == "simple": + model = md_simple.initialize_model(param) + elif modeltype == "groups": + model = md_groups.initialize_model(param) + else: + raise ValueError(modeltype+" is not known.") # Compile the model model.compile(loss=loss, optimizer=optimizer, @@ -115,7 +124,7 @@ def train_and_evaluate_model(param, inputs, labels, training_parameter, show_res } -def perform_grid_search(model_parameters, inputs, labels, training_parameters, filename_df, path=None): +def perform_grid_search(model_parameters, inputs, labels, training_parameters, filename_df, path=None, modeltype="simple"): """ Performing grid search for a given set of model parameters The results are stored in directory 'secir_simple_grid_search', each row has the form @@ -132,6 +141,8 @@ def perform_grid_search(model_parameters, inputs, labels, training_parameters, f metrics is a list of used training metrics, e.g. [tf.keras.metrics.MeanAbsoluteError(), tf.keras.metrics.MeanAbsolutePercentageError()] :param filename_df: String, giving name of the file, where the data is stored, actual filename is given by filename_df + ".pickle" :param path: String representing the path, where dataframe should be stored + :param modeltype: String, specifying, which type of model is going to be tested. The possible values are "simple" - refering to the surrogate for the SECIR-model + without age_resolution, "groups" - for the age resolved SECIR model. """ # Create a DataFrame to store the results df_results = pd.DataFrame(columns=['model', 'optimizer', 'number_of_hidden_layers', 'number_of_neurons', 'activation', @@ -141,7 +152,7 @@ def perform_grid_search(model_parameters, inputs, labels, training_parameters, f # Iterate the different model architectures and save the training results for param in model_parameters: for training_parameter in training_parameters: - _, _, layer, neuron_number, activation, modelname = param + layer, neuron_number, activation, modelname = param[-4:] results = train_and_evaluate_model( param, inputs, labels, training_parameter) df_results.loc[len(df_results.index)] = [ @@ -156,7 +167,7 @@ def perform_grid_search(model_parameters, inputs, labels, training_parameters, f ] # Save the results in file - folder_name = 'secir_simple_grid_search' + folder_name = 'secir_' + modeltype + '_grid_search' if path is None: path = os.path.dirname(os.path.realpath(__file__)) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/surrogate_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py similarity index 100% rename from pycode/memilio-surrogatemodel/memilio/surrogatemodel/surrogate_utils.py rename to pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index d840362c64..c00f3ec639 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -21,11 +21,13 @@ from memilio.surrogatemodel.ode_secir_groups import (data_generation, model, network_architectures, dampings) -import memilio.surrogatemodel.surrogate_utils as utils +import memilio.surrogatemodel.utils.helper_functions as utils +from memilio.surrogatemodel.utils import grid_search from unittest.mock import patch +import pandas as pd import os import unittest - +import pickle import numpy as np import tensorflow as tf @@ -733,6 +735,70 @@ def test_load_model( for w1, w2 in zip(weights1, weights2): np.testing.assert_allclose(w1, w2) + @patch('memilio.surrogatemodel.utils.grid_search.train_and_evaluate_model') + def test_perform_grid_search(self, mock_train_and_evaluate_model): + """ Testing grid search for simple model along a sequence of possible parameter values""" + mock_train_and_evaluate_model.return_value = { + "model": "._.", + "activation": "relu", + "optimizer": "Eva", + "mean_train_loss_kfold": 42, + "mean_val_loss_kfold": 12, + "training_time": 1, + "train_losses": [3], + "val_losses": [6] + } + filename_df = "dataframe_optimizer" + os.mkdir(self.path) + + # General grid search parameters for the training process: + early_stop = [100] + max_epochs = [1] + losses = [tf.keras.losses.MeanAbsolutePercentageError()] + optimizers = ['Adam', 'Adam'] + metrics = [[tf.keras.metrics.MeanAbsoluteError(), + tf.keras.metrics.MeanAbsolutePercentageError()]] + + # Define grid search parameters for the architecture + label_width = 3 + num_outputs = 2 + num_age_groups = 1 + hidden_layers = [1, 2] + neurons_in_hidden_layer = [32] + activation_function = ['relu'] + models = ["Dense"] + + # Collecting parameters + training_parameters = [(early, epochs, loss, optimizer, metric) + for early in early_stop for epochs in max_epochs for loss in losses + for optimizer in optimizers for metric in metrics] + + model_parameters = [(label_width, num_age_groups, num_outputs, layer, neuron_number, activation, modelname) + for layer in hidden_layers for neuron_number in neurons_in_hidden_layer + for activation in activation_function for modelname in models] + + # generate dataset with multiple output + inputs = tf.ones([5, 1, 2]) + labels = tf.ones([5, 3, 2]) + + grid_search.perform_grid_search( + model_parameters, inputs, labels, training_parameters, + filename_df, self.path, "groups" + ) + + # testing saving the results + self.assertEqual(len(os.listdir(self.path)), 1) + + self.assertEqual(os.listdir(os.path.join(self.path, 'secir_groups_grid_search')), + [filename_df+".pickle"]) + + # testing size of the output + path_name = os.path.join(self.path, 'secir_groups_grid_search') + with open(os.path.join(path_name, filename_df + ".pickle"), "rb") as f: + d = pickle.load(f) + df = pd.DataFrame(d) + self.assertEqual(len(df['model']), 4) + if __name__ == '__main__': unittest.main() diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py index 8bd3dd5a13..8eef8afa22 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py @@ -17,10 +17,12 @@ # See the License for the specific language governing permissions and # limitations under the License. ############################################################################# + from pyfakefs import fake_filesystem_unittest from memilio.surrogatemodel.ode_secir_simple import (data_generation, - network_architectures, grid_search) + network_architectures) +from memilio.surrogatemodel.utils import grid_search from memilio.surrogatemodel.ode_secir_simple import model as md import os import pickle @@ -441,9 +443,9 @@ def test_train_and_evaluate_model(self): self.assertEqual(len(res["val_losses"][0][0]), max_epochs) self.assertEqual(res["optimizer"], "Adam") - @patch('memilio.surrogatemodel.ode_secir_simple.grid_search.train_and_evaluate_model') + @patch('memilio.surrogatemodel.utils.grid_search.train_and_evaluate_model') def test_perform_grid_search(self, mock_train_and_evaluate_model): - """ Testing gridsearch along a sequence of possible parameter values""" + """ Testing grid search for simple model along a sequence of possible parameter values""" mock_train_and_evaluate_model.return_value = { "model": "._.", "activation": "relu", From 35f61a9839e5e302b5526eb2d544f4f07b816b7a Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 7 Jul 2025 09:17:50 +0200 Subject: [PATCH 31/49] Including dampings in utils folder --- .../surrogatemodel/ode_secir_groups/data_generation.py | 6 +++--- .../surrogatemodel/{ode_secir_groups => utils}/dampings.py | 0 .../test_surrogatemodel_ode_secir_groups.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) rename pycode/memilio-surrogatemodel/memilio/surrogatemodel/{ode_secir_groups => utils}/dampings.py (100%) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py index 7f304fc219..de06e370a8 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py @@ -33,7 +33,7 @@ from memilio.simulation.osecir import (Index_InfectionState, InfectionState, Model, interpolate_simulation_result, simulate) -import memilio.surrogatemodel.ode_secir_groups.dampings as dampings +import memilio.surrogatemodel.utils.dampings as dampings from memilio.surrogatemodel.utils.helper_functions import ( interpolate_age_groups, remove_confirmed_compartments, transform_data) import memilio.simulation as mio @@ -142,8 +142,8 @@ def run_secir_groups_simulation(days, damping_days, damping_factors, populations damped_matrices = [] for i in np.arange(len(damping_days)): - damping = damping = np.ones((num_groups, num_groups) - ) * damping_factors[i] + damping = np.ones((num_groups, num_groups) + ) * damping_factors[i] day = damping_days[i] model.parameters.ContactPatterns.cont_freq_mat.add_damping(Damping( coeffs=(damping), t=day, level=0, type=0)) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/dampings.py similarity index 100% rename from pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/dampings.py rename to pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/dampings.py diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index c00f3ec639..5b8a9a6d54 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -20,9 +20,9 @@ from pyfakefs import fake_filesystem_unittest from memilio.surrogatemodel.ode_secir_groups import (data_generation, model, - network_architectures, dampings) + network_architectures) import memilio.surrogatemodel.utils.helper_functions as utils -from memilio.surrogatemodel.utils import grid_search +from memilio.surrogatemodel.utils import dampings, grid_search from unittest.mock import patch import pandas as pd import os From 683278f9cc1b92084bdca3a29dce1a2ce80ad134 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 7 Jul 2025 09:35:59 +0200 Subject: [PATCH 32/49] Fixing typo in docs --- .../memilio/surrogatemodel/ode_secir_groups/data_generation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py index de06e370a8..97508aa77b 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py @@ -17,6 +17,7 @@ # See the License for the specific language governing permissions and # limitations under the License. ############################################################################# + import copy import os import pickle From 2f8e37fedbabb71f227da756becb261eb182b285 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 7 Jul 2025 12:40:30 +0200 Subject: [PATCH 33/49] Adding __init__ --- .../memilio/surrogatemodel/utils/__init__.py | 23 +++++++++++++++++++ .../surrogatemodel/utils/helper_functions.py | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/__init__.py diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/__init__.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/__init__.py new file mode 100644 index 0000000000..b311d22eda --- /dev/null +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/__init__.py @@ -0,0 +1,23 @@ +############################################################################# +# Copyright (C) 2020-2025 MEmilio +# +# Authors: Agatha Schmidt, Henrik Zunker +# +# Contact: Martin J. Kuehn +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################# + +""" +Various utils for surrogate models +""" diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py index 5cede8065b..8e850e32ef 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py @@ -95,7 +95,7 @@ def load_model(path): :returns: trained tf.keras model """ if not os.path.isfile(path): - raise FileExistsError( + raise FileNotFoundError( "There is no .keras model stored at the given directory.") return tf.keras.models.load_model(path) From 6deedffade89fc3ad41dc92d27b5b4cd9128cff1 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 7 Jul 2025 14:43:33 +0200 Subject: [PATCH 34/49] Adding tests for utils and including review --- .../surrogatemodel/utils/grid_search.py | 5 +- .../surrogatemodel/utils/helper_functions.py | 26 +- .../test_surrogatemodel_ode_secir_groups.py | 242 +--------- .../test_surrogatemodel_ode_secir_simple.py | 86 ---- .../test_surrogatemodel_utils.py | 423 ++++++++++++++++++ 5 files changed, 439 insertions(+), 343 deletions(-) create mode 100644 pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/grid_search.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/grid_search.py index b60b345688..d6028f4a93 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/grid_search.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/grid_search.py @@ -34,7 +34,7 @@ # Function to train and evaluate the model using cross-validation -def train_and_evaluate_model(param, inputs, labels, training_parameter, show_results=False, modeltype="simple"): +def train_and_evaluate_model(param, inputs, labels, training_parameter, show_results=False, modeltype="simple", n_kfold=5): """ Training and evaluating a model with given architecture using 5-fold cross validation, returning a dictionary with the main training statistics. :param param: tuple of parameters describing the model architecture, it should be of the form @@ -47,6 +47,7 @@ def train_and_evaluate_model(param, inputs, labels, training_parameter, show_res :param show_results: Boolean, whether or not the evaluation results are printed. :param modeltype: String, specifying, which type of model is going to be tested. The possible values are "simple" - refering to the surrogate for the SECIR-model without age_resolution, "groups" - for the age resolved SECIR model. + :param n_kfold: number of partizions used to cross-validate :returns: a dictionary of training statistics of the form {"model", "activation","optimizer","mean_train_loss_kfold","mean_val_loss_kfold","training_time", "train_losses", "val_losses"} @@ -59,7 +60,7 @@ def train_and_evaluate_model(param, inputs, labels, training_parameter, show_res mode='min') # Preparing K-Fold Cross-Validation - kf = KFold(n_splits=5) + kf = KFold(n_splits=n_kfold) train_losses = [] val_losses = [] diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py index 8e850e32ef..d3b46a185f 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py @@ -21,6 +21,8 @@ import numpy as np import tensorflow as tf import os +from memilio.epidata.modifyDataframeSeries import (fit_age_group_intervals) +import pandas as pd def interpolate_age_groups(data_entry): @@ -31,15 +33,13 @@ def interpolate_age_groups(data_entry): :returns: List containing the population in each age group used in the simulation. """ - age_groups = { - "A00-A04": data_entry['<3 years'] + data_entry['3-5 years'] * 2 / 3, - "A05-A14": data_entry['3-5 years'] * 1 / 3 + data_entry['6-14 years'], - "A15-A34": data_entry['15-17 years'] + data_entry['18-24 years'] + data_entry['25-29 years'] + data_entry['30-39 years'] * 1 / 2, - "A35-A59": data_entry['30-39 years'] * 1 / 2 + data_entry['40-49 years'] + data_entry['50-64 years'] * 2 / 3, - "A60-A79": data_entry['50-64 years'] * 1 / 3 + data_entry['65-74 years'] + data_entry['>74 years'] * 1 / 5, - "A80+": data_entry['>74 years'] * 4 / 5 - } - return [age_groups[key] for key in age_groups] + df_age_in = pd.DataFrame({ + '<3': 1, '3-5': 1, '6-14': 1, '15-17': 1, '18-24': 1, '25-29': 1, + '30-39': 1, '40-49': 1, '50-64': 1, '64-74': 1, '>74': 1 + }) + age_out = ["<5", "5-14", "15-34", "35-59", "60-79", ">79"] + + return (fit_age_group_intervals(df_age_in, age_out, data_entry)) def remove_confirmed_compartments(result_array, delete_indices): @@ -53,7 +53,7 @@ def remove_confirmed_compartments(result_array, delete_indices): return np.delete(result_array, delete_indices, axis=1) -def transform_data(data, transformer, num_runs, num_groups=6, num_compartments=8): +def normalize_simulation_data(data, transformer, num_runs, num_groups=6, num_compartments=8): """ Transforms the data by a logarithmic normalization. Reshaping is necessary, because the transformer needs an array with dimension <= 2. @@ -65,11 +65,9 @@ def transform_data(data, transformer, num_runs, num_groups=6, num_compartments=8 :returns: Transformed data. """ - - data = np.asarray(data).transpose(2, 0, 1).reshape( - num_groups*num_compartments, -1) + data = np.asarray(data).reshape(num_runs, -1) scaled_data = transformer.transform(data) - return tf.convert_to_tensor(scaled_data.transpose().reshape(num_runs, -1, num_groups*num_compartments)) + return tf.convert_to_tensor(scaled_data.reshape(num_runs, -1, num_groups*num_compartments)) def save_model(model, path, modelname): diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 5b8a9a6d54..d620984686 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -21,13 +21,10 @@ from memilio.surrogatemodel.ode_secir_groups import (data_generation, model, network_architectures) -import memilio.surrogatemodel.utils.helper_functions as utils -from memilio.surrogatemodel.utils import dampings, grid_search + from unittest.mock import patch -import pandas as pd import os import unittest -import pickle import numpy as np import tensorflow as tf @@ -47,63 +44,6 @@ def setUp(self): """ """ self.setUpPyfakefs() - def test_dampings_active(self): - # length of generated sequence - factors1 = dampings.calc_factors_active(10) - factors2 = dampings.calc_factors_active(18) - - self.assertEqual(len(factors1), 10) - self.assertEqual(len(factors2), 18) - - with self.assertRaises(ValueError) as error: - dampings.dampings_active(10, 200, 3) - error_message = "It's not possible to arrange this number of dampings in the desired interval with the given minimal distance." - self.assertEqual(str(error.exception), error_message) - - # Combine days and factors - days1, factors1 = dampings.dampings_active(30, 5, 4) - days2, factors2 = dampings.dampings_active(40, 10, 4) - - self.assertEqual(len(days1), 5) - self.assertEqual(len(factors1), len(days1)) - self.assertEqual(len(days2), 10) - self.assertEqual(len(days2), len(factors2)) - - def test_dampings_classic(self): - - days1 = dampings.generate_dampings_withshadowdamp(4, 30, 3, 4, 1) - days2 = dampings.generate_dampings_withshadowdamp(10, 90, 2, 10, 1) - self.assertEqual(len(days1[0]), 4) - self.assertEqual(len(days2[0]), 10) - - with self.assertRaises(ValueError) as error: - dampings.dampings_classic(10, 200) - error_message = "It's not possible to arrange this number of dampings in the desired interval with the given minimal distance." - self.assertEqual(str(error.exception), error_message) - - # Combine days and factors - days1, factors1 = dampings.dampings_classic(30, 5) - days2, factors2 = dampings.dampings_classic(40, 5) - - self.assertEqual(len(days1), 5) - self.assertEqual(len(factors1), len(days1)) - self.assertEqual(len(days2), 5) - self.assertEqual(len(days2), len(factors2)) - - def test_dampings_random(self): - with self.assertRaises(ValueError) as error: - dampings.dampings_random(10, 200) - error_message = "It's not possible to arrange this number of dampings in the desired interval with the given minimal distance." - self.assertEqual(str(error.exception), error_message) - - days1, factors1 = dampings.dampings_random(30, 5) - days2, factors2 = dampings.dampings_random(40, 5) - - self.assertEqual(len(days1), 5) - self.assertEqual(len(factors1), len(days1)) - self.assertEqual(len(days2), 5) - self.assertEqual(len(days2), len(factors2)) - @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getMinimumMatrix', return_value=0.1 * np.ones((6, 6))) @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getBaselineMatrix', @@ -428,24 +368,6 @@ def test_lstm_multi_multi(self): # Dimension of one time step (16 = 4*4) self.assertEqual(output_zeros.shape[2], 16) - def test_calc_split_index(self): - with self.assertRaises(ValueError) as error: - utils.calc_split_index( - 10, 0.9, 0.1, 0.1 - ) - error_message = "Summed data set shares are greater than 1. Please adjust the values." - self.assertEqual(str(error.exception), error_message) - split_index = utils.calc_split_index(10, 0.7, 0.1, 0.2) - self.assertEqual(split_index, [7, 1, 2]) - - def test_flat_input(self): - A = np.zeros((12, 12)) - a1 = [A for _ in np.arange(5)] - a2 = np.zeros((5, 144)) - a1_flatten = utils.flat_input(a1) - b = a2 == a1_flatten - self.assertTrue(np.asarray(b).all()) - def test_prepare_data_classic(self): data = { "inputs": np.zeros((10, 5, 48)), @@ -637,168 +559,6 @@ def test_network_fit(self, mock_population, mock_baseline, mock_minimum): self.assertEqual( len(cnn_output.history['val_loss']), max_epochs) - @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getMinimumMatrix', - return_value=0.1 * np.ones((6, 6))) - @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getBaselineMatrix', - return_value=0.6 * np.ones((6, 6))) - @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.get_population', - return_value=[[5256.0, 10551, 32368.5, - 43637.833333333336, 22874.066666666666, 8473.6]]) - def test_save_model( - self, mock_population, mock_baseline, mock_minimum): - """ - : param mock_population: - : param mock_baseline: - : param mock_minimum: - """ - input_width = 5 - label_width = 10 - num_runs = 2 - - data_generation.generate_data(num_runs, self.path, "", input_width, - label_width, number_dampings=2) - max_epochs = 1 - early_stop = 100 - loss = tf.keras.losses.MeanAbsolutePercentageError() - optimizer = "Adam" - metric = [tf.keras.metrics.MeanAbsoluteError(), - tf.keras.metrics.MeanAbsolutePercentageError()] - - training_parameter = ( - early_stop, max_epochs, loss, optimizer, metric) - - model_mlp_multi_input_multi_output = model.network_fit( - model=network_architectures.mlp_multi_input_multi_output( - label_width), - training_parameter=training_parameter, - path=self.path, - filename="data_secir_groups_10days_2_random.pickle", - modeltype='classic', plot_stats=False) - - utils.save_model(model_mlp_multi_input_multi_output.model, - self.path, "mlp_multi_multi") - - self.assertEqual(len(os.listdir(self.path)), 2) - - self.assertEqual(os.listdir(self.path), - ['data_secir_groups_10days_2_random.pickle', 'mlp_multi_multi.keras']) - - @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getMinimumMatrix', - return_value=0.1 * np.ones((6, 6))) - @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getBaselineMatrix', - return_value=0.6 * np.ones((6, 6))) - @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.get_population', - return_value=[[5256.0, 10551, 32368.5, - 43637.833333333336, 22874.066666666666, 8473.6]]) - def test_load_model( - self, mock_population, mock_baseline, mock_minimum): - """ - : param mock_population: - : param mock_baseline: - : param mock_minimum: - """ - input_width = 5 - label_width = 10 - num_runs = 2 - - data_generation.generate_data(num_runs, self.path, "", input_width, - label_width, number_dampings=2) - max_epochs = 1 - early_stop = 100 - loss = tf.keras.losses.MeanAbsolutePercentageError() - optimizer = "Adam" - metric = [tf.keras.metrics.MeanAbsoluteError(), - tf.keras.metrics.MeanAbsolutePercentageError()] - - training_parameter = ( - early_stop, max_epochs, loss, optimizer, metric) - - mlp1 = model.network_fit( - model=network_architectures.mlp_multi_input_multi_output( - label_width), - training_parameter=training_parameter, - path=self.path, - filename="data_secir_groups_10days_2_random.pickle", - modeltype='classic', plot_stats=False) - - utils.save_model(mlp1.model, self.path, "mlp_multi_multi") - path_file = os.path.join(self.path, "mlp_multi_multi.keras") - if os.path.isfile(path_file): - print("--------------------------------------------------") - print(r"\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/") - print(r"/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ ") - print("--------------------------------------------------") - mlp2 = utils.load_model(path_file) - - weights1 = mlp1.model.get_weights() - weights2 = mlp2.get_weights() - for w1, w2 in zip(weights1, weights2): - np.testing.assert_allclose(w1, w2) - - @patch('memilio.surrogatemodel.utils.grid_search.train_and_evaluate_model') - def test_perform_grid_search(self, mock_train_and_evaluate_model): - """ Testing grid search for simple model along a sequence of possible parameter values""" - mock_train_and_evaluate_model.return_value = { - "model": "._.", - "activation": "relu", - "optimizer": "Eva", - "mean_train_loss_kfold": 42, - "mean_val_loss_kfold": 12, - "training_time": 1, - "train_losses": [3], - "val_losses": [6] - } - filename_df = "dataframe_optimizer" - os.mkdir(self.path) - - # General grid search parameters for the training process: - early_stop = [100] - max_epochs = [1] - losses = [tf.keras.losses.MeanAbsolutePercentageError()] - optimizers = ['Adam', 'Adam'] - metrics = [[tf.keras.metrics.MeanAbsoluteError(), - tf.keras.metrics.MeanAbsolutePercentageError()]] - - # Define grid search parameters for the architecture - label_width = 3 - num_outputs = 2 - num_age_groups = 1 - hidden_layers = [1, 2] - neurons_in_hidden_layer = [32] - activation_function = ['relu'] - models = ["Dense"] - - # Collecting parameters - training_parameters = [(early, epochs, loss, optimizer, metric) - for early in early_stop for epochs in max_epochs for loss in losses - for optimizer in optimizers for metric in metrics] - - model_parameters = [(label_width, num_age_groups, num_outputs, layer, neuron_number, activation, modelname) - for layer in hidden_layers for neuron_number in neurons_in_hidden_layer - for activation in activation_function for modelname in models] - - # generate dataset with multiple output - inputs = tf.ones([5, 1, 2]) - labels = tf.ones([5, 3, 2]) - - grid_search.perform_grid_search( - model_parameters, inputs, labels, training_parameters, - filename_df, self.path, "groups" - ) - - # testing saving the results - self.assertEqual(len(os.listdir(self.path)), 1) - - self.assertEqual(os.listdir(os.path.join(self.path, 'secir_groups_grid_search')), - [filename_df+".pickle"]) - - # testing size of the output - path_name = os.path.join(self.path, 'secir_groups_grid_search') - with open(os.path.join(path_name, filename_df + ".pickle"), "rb") as f: - d = pickle.load(f) - df = pd.DataFrame(d) - self.assertEqual(len(df['model']), 4) - if __name__ == '__main__': unittest.main() diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py index 8eef8afa22..eebe097d22 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py @@ -420,92 +420,6 @@ def test_network_fit(self): self.assertEqual( len(lstm_output.history['val_loss']), max_epochs) - def test_train_and_evaluate_model(self): - """ Testing evaluation procedure """ - inputs = tf.ones([5, 5, 8]) - labels = tf.ones([5, 10, 8]) - max_epochs = 1 - early_stop = 100 - loss = tf.keras.losses.MeanAbsolutePercentageError() - optimizer = 'Adam' - metric = [tf.keras.metrics.MeanAbsoluteError(), - tf.keras.metrics.MeanAbsolutePercentageError()] - model_parameter = ( - 10, 8, 1, 5, "relu", "Dense" - ) - - training_parameter = ( - early_stop, max_epochs, loss, optimizer, metric) - - res = grid_search.train_and_evaluate_model( - model_parameter, inputs, labels, training_parameter, False) - self.assertEqual(len(res["val_losses"][0]), 5) - self.assertEqual(len(res["val_losses"][0][0]), max_epochs) - self.assertEqual(res["optimizer"], "Adam") - - @patch('memilio.surrogatemodel.utils.grid_search.train_and_evaluate_model') - def test_perform_grid_search(self, mock_train_and_evaluate_model): - """ Testing grid search for simple model along a sequence of possible parameter values""" - mock_train_and_evaluate_model.return_value = { - "model": "._.", - "activation": "relu", - "optimizer": "Eva", - "mean_train_loss_kfold": 42, - "mean_val_loss_kfold": 12, - "training_time": 1, - "train_losses": [3], - "val_losses": [6] - } - filename_df = "dataframe_optimizer" - os.mkdir(self.path) - - # General grid search parameters for the training process: - early_stop = [100] - max_epochs = [1] - losses = [tf.keras.losses.MeanAbsolutePercentageError()] - optimizers = ['Adam', 'Adam'] - metrics = [[tf.keras.metrics.MeanAbsoluteError(), - tf.keras.metrics.MeanAbsolutePercentageError()]] - - # Define grid search parameters for the architecture - label_width = 3 - num_outputs = 2 - hidden_layers = [1, 2] - neurons_in_hidden_layer = [32] - activation_function = ['relu'] - models = ["Dense"] - - # Collecting parameters - training_parameters = [(early, epochs, loss, optimizer, metric) - for early in early_stop for epochs in max_epochs for loss in losses - for optimizer in optimizers for metric in metrics] - - model_parameters = [(label_width, num_outputs, layer, neuron_number, activation, modelname) - for layer in hidden_layers for neuron_number in neurons_in_hidden_layer - for activation in activation_function for modelname in models] - - # generate dataset with multiple output - inputs = tf.ones([5, 1, 2]) - labels = tf.ones([5, 3, 2]) - - grid_search.perform_grid_search( - model_parameters, inputs, labels, training_parameters, - filename_df, self.path - ) - - # testing saving the results - self.assertEqual(len(os.listdir(self.path)), 1) - - self.assertEqual(os.listdir(os.path.join(self.path, 'secir_simple_grid_search')), - [filename_df+".pickle"]) - - # testing size of the output - path_name = os.path.join(self.path, 'secir_simple_grid_search') - with open(os.path.join(path_name, filename_df + ".pickle"), "rb") as f: - d = pickle.load(f) - df = pd.DataFrame(d) - self.assertEqual(len(df['model']), 4) - if __name__ == '__main__': unittest.main() diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py new file mode 100644 index 0000000000..3635ffb77f --- /dev/null +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py @@ -0,0 +1,423 @@ +############################################################################# +# Copyright (C) 2020-2025 MEmilio +# +# Authors: Manuel Heger, Henrik Zunker +# +# Contact: Martin J. Kuehn +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################# +from pyfakefs import fake_filesystem_unittest + +from memilio.surrogatemodel.ode_secir_groups import (data_generation, model, + network_architectures) +import memilio.surrogatemodel.utils.helper_functions as utils +from memilio.surrogatemodel.utils import dampings, grid_search +from sklearn.preprocessing import FunctionTransformer + +from unittest.mock import patch +import pandas as pd +import os +import unittest +import pickle + +import numpy as np +import tensorflow as tf +import logging + +# suppress all autograph warnings from tensorflow + +logging.getLogger("tensorflow").setLevel(logging.ERROR) + + +class TestSurrogatemodelUtils(fake_filesystem_unittest.TestCase): + """ """ + + path = '/home/' + + def setUp(self): + """ """ + self.setUpPyfakefs() + + def test_calc_dist_days(self): + with self.assertRaises(ValueError) as error: + dampings.calc_dist_days(10, 2, 10, 2) + error_message = "It's not possible to arrange this number of dampings in the desired interval with the given minimal distance." + self.assertEqual(str(error.exception), error_message) + + with self.assertRaises(ValueError) as error: + dampings.calc_dist_days(102, 2, 50, 3) + error_message = "It's not possible to arrange this number of dampings in the desired interval with the given minimal distance." + self.assertEqual(str(error.exception), error_message) + + self.assertEqual(dampings.calc_dist_days(102, 2, 50, 1), 2) + self.assertEqual(dampings.calc_dist_days(30, 2, 10, 1), 2) + + def test_generate_dampings(self): + with self.assertRaises(ValueError) as error: + dampings.generate_dampings(100, 2, "gelb") + error_message = "The method argument has to be one of the following: 'classic', 'active' or 'random'." + self.assertEqual(str(error.exception), error_message) + + def test_dampings_active(self): + # length of generated sequence + factors1 = dampings.calc_factors_active(10) + factors2 = dampings.calc_factors_active(18) + # testing if damping factors are in the desired interval + self.assertTrue(np.all(np.log(1-np.array(factors1)) <= 2)) + self.assertTrue(np.all(np.log(1-np.array(factors2)) <= 2)) + self.assertEqual(len(factors1), 10) + self.assertEqual(len(factors2), 18) + + with self.assertRaises(ValueError) as error: + dampings.dampings_active(10, 200, 3) + error_message = "It's not possible to arrange this number of dampings in the desired interval with the given minimal distance." + self.assertEqual(str(error.exception), error_message) + + # Combine days and factors + days1, factors1 = dampings.dampings_active(30, 5, 4) + days2, factors2 = dampings.dampings_active(40, 10, 4) + + self.assertEqual(len(days1), 5) + self.assertEqual(len(factors1), len(days1)) + self.assertEqual(len(days2), 10) + self.assertEqual(len(days2), len(factors2)) + + def test_dampings_classic(self): + + days1 = dampings.generate_dampings_withshadowdamp(4, 30, 3, 4, 1) + days2 = dampings.generate_dampings_withshadowdamp(10, 90, 2, 10, 1) + self.assertEqual(len(days1[0]), 4) + self.assertEqual(len(days2[0]), 10) + + with self.assertRaises(ValueError) as error: + dampings.dampings_classic(10, 200) + error_message = "It's not possible to arrange this number of dampings in the desired interval with the given minimal distance." + self.assertEqual(str(error.exception), error_message) + + # Combine days and factors + days1, factors1 = dampings.dampings_classic(30, 5) + days2, factors2 = dampings.dampings_classic(40, 5) + + self.assertEqual(len(days1), 5) + self.assertEqual(len(factors1), len(days1)) + self.assertEqual(len(days2), 5) + self.assertEqual(len(days2), len(factors2)) + # testing if damping factors are in the desired interval + self.assertTrue(np.all(np.array(factors1) <= 2)) + self.assertTrue(np.all(np.array(factors2) <= 2)) + + def test_dampings_random(self): + with self.assertRaises(ValueError) as error: + dampings.dampings_random(10, 200) + error_message = "It's not possible to arrange this number of dampings in the desired interval with the given minimal distance." + self.assertEqual(str(error.exception), error_message) + + days1, factors1 = dampings.dampings_random(30, 5) + days2, factors2 = dampings.dampings_random(40, 5) + + self.assertEqual(len(days1), 5) + self.assertEqual(len(factors1), len(days1)) + self.assertEqual(len(days2), 5) + self.assertEqual(len(days2), len(factors2)) + # testing if damping factors are in the desired interval + self.assertTrue(np.all(np.array(factors1) <= 2)) + self.assertTrue(np.all(np.array(factors2) <= 2)) + + # Testing helper_functions.py + def test_calc_split_index(self): + with self.assertRaises(ValueError) as error: + utils.calc_split_index( + 10, 0.9, 0.1, 0.1 + ) + error_message = "Summed data set shares are greater than 1. Please adjust the values." + self.assertEqual(str(error.exception), error_message) + split_index = utils.calc_split_index(10, 0.7, 0.1, 0.2) + self.assertEqual(split_index, [7, 1, 2]) + + def test_remove_confirmed_compartments(self): + A = np.asarray([[1, 2, 3, 4, 5]]) + + A_new = utils.remove_confirmed_compartments(A, [1, 2]) + self.assertTrue(np.all(np.asarray([[1, 4, 5]]) == A_new)) + + def test_normalize_simulation_data(self): + transformer = FunctionTransformer(np.log1p, validate=True) + A = [[[1, 2], [1, 2]], + [[3, 4], [3, 4]] + ] + res = [[[0.69314718, 1.09861229], [0.69314718, 1.09861229], + [1.38629436, 1.60943791], [1.38629436, 1.60943791]]] + output = utils.normalize_simulation_data(A, transformer, 1, 1, 2) + for w1, w2 in zip(res, output): + np.testing.assert_allclose(w1, w2) + + def test_flat_input(self): + A = np.zeros((12, 12)) + a1 = [A for _ in np.arange(5)] + a2 = np.zeros((5, 144)) + a1_flatten = utils.flat_input(a1) + b = a2 == a1_flatten + self.assertTrue(np.asarray(b).all()) + + @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getMinimumMatrix', + return_value=0.1 * np.ones((6, 6))) + @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getBaselineMatrix', + return_value=0.6 * np.ones((6, 6))) + @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.get_population', + return_value=[[5256.0, 10551, 32368.5, + 43637.833333333336, 22874.066666666666, 8473.6]]) + def test_save_model( + self, mock_population, mock_baseline, mock_minimum): + """ + : param mock_population: + : param mock_baseline: + : param mock_minimum: + """ + input_width = 5 + label_width = 10 + num_runs = 2 + + data_generation.generate_data(num_runs, self.path, "", input_width, + label_width, number_dampings=2) + max_epochs = 1 + early_stop = 100 + loss = tf.keras.losses.MeanAbsolutePercentageError() + optimizer = "Adam" + metric = [tf.keras.metrics.MeanAbsoluteError(), + tf.keras.metrics.MeanAbsolutePercentageError()] + + training_parameter = ( + early_stop, max_epochs, loss, optimizer, metric) + + model_mlp_multi_input_multi_output = model.network_fit( + model=network_architectures.mlp_multi_input_multi_output( + label_width), + training_parameter=training_parameter, + path=self.path, + filename="data_secir_groups_10days_2_random.pickle", + modeltype='classic', plot_stats=False) + + utils.save_model(model_mlp_multi_input_multi_output.model, + self.path, "mlp_multi_multi") + + self.assertEqual(len(os.listdir(self.path)), 2) + + self.assertEqual(os.listdir(self.path), + ['data_secir_groups_10days_2_random.pickle', 'mlp_multi_multi.keras']) + + @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getMinimumMatrix', + return_value=0.1 * np.ones((6, 6))) + @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getBaselineMatrix', + return_value=0.6 * np.ones((6, 6))) + @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.get_population', + return_value=[[5256.0, 10551, 32368.5, + 43637.833333333336, 22874.066666666666, 8473.6]]) + def test_load_model( + self, mock_population, mock_baseline, mock_minimum): + """ + : param mock_population: + : param mock_baseline: + : param mock_minimum: + """ + input_width = 5 + label_width = 10 + num_runs = 2 + + data_generation.generate_data(num_runs, self.path, "", input_width, + label_width, number_dampings=2) + max_epochs = 1 + early_stop = 100 + loss = tf.keras.losses.MeanAbsolutePercentageError() + optimizer = "Adam" + metric = [tf.keras.metrics.MeanAbsoluteError(), + tf.keras.metrics.MeanAbsolutePercentageError()] + + training_parameter = ( + early_stop, max_epochs, loss, optimizer, metric) + + mlp1 = model.network_fit( + model=network_architectures.mlp_multi_input_multi_output( + label_width), + training_parameter=training_parameter, + path=self.path, + filename="data_secir_groups_10days_2_random.pickle", + modeltype='classic', plot_stats=False) + + utils.save_model(mlp1.model, self.path, "mlp_multi_multi") + path_file = os.path.join(self.path, "mlp_multi_multi.keras") + if os.path.isfile(path_file): + print("--------------------------------------------------") + print(r"\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/") + print(r"/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ ") + print("--------------------------------------------------") + mlp2 = utils.load_model(path_file) + + weights1 = mlp1.model.get_weights() + weights2 = mlp2.get_weights() + for w1, w2 in zip(weights1, weights2): + np.testing.assert_allclose(w1, w2) + + def test_train_and_evaluate_model(self): + """ Testing evaluation procedure """ + inputs = tf.ones([5, 5, 8]) + labels = tf.ones([5, 10, 8]) + max_epochs = 1 + early_stop = 100 + loss = tf.keras.losses.MeanAbsolutePercentageError() + optimizer = 'Adam' + metric = [tf.keras.metrics.MeanAbsoluteError(), + tf.keras.metrics.MeanAbsolutePercentageError()] + model_parameter = ( + 10, 8, 1, 5, "relu", "Dense" + ) + + training_parameter = ( + early_stop, max_epochs, loss, optimizer, metric) + + res = grid_search.train_and_evaluate_model( + model_parameter, inputs, labels, training_parameter, False) + self.assertEqual(len(res["val_losses"][0]), 5) + self.assertEqual(len(res["val_losses"][0][0]), max_epochs) + self.assertEqual(res["optimizer"], "Adam") + + @patch('memilio.surrogatemodel.utils.grid_search.train_and_evaluate_model') + def test_perform_grid_search_simple(self, mock_train_and_evaluate_model): + """ Testing grid search for simple model along a sequence of possible parameter values""" + mock_train_and_evaluate_model.return_value = { + "model": "._.", + "activation": "relu", + "optimizer": "Eva", + "mean_train_loss_kfold": 42, + "mean_val_loss_kfold": 12, + "training_time": 1, + "train_losses": [3], + "val_losses": [6] + } + filename_df = "dataframe_optimizer" + os.mkdir(self.path) + + # General grid search parameters for the training process: + early_stop = [100] + max_epochs = [1] + losses = [tf.keras.losses.MeanAbsolutePercentageError()] + optimizers = ['Adam', 'Adam'] + metrics = [[tf.keras.metrics.MeanAbsoluteError(), + tf.keras.metrics.MeanAbsolutePercentageError()]] + + # Define grid search parameters for the architecture + label_width = 3 + num_outputs = 2 + hidden_layers = [1, 2] + neurons_in_hidden_layer = [32] + activation_function = ['relu'] + models = ["Dense"] + + # Collecting parameters + training_parameters = [(early, epochs, loss, optimizer, metric) + for early in early_stop for epochs in max_epochs for loss in losses + for optimizer in optimizers for metric in metrics] + + model_parameters = [(label_width, num_outputs, layer, neuron_number, activation, modelname) + for layer in hidden_layers for neuron_number in neurons_in_hidden_layer + for activation in activation_function for modelname in models] + + # generate dataset with multiple output + inputs = tf.ones([5, 1, 2]) + labels = tf.ones([5, 3, 2]) + + grid_search.perform_grid_search( + model_parameters, inputs, labels, training_parameters, + filename_df, self.path + ) + + # testing saving the results + self.assertEqual(len(os.listdir(self.path)), 1) + + self.assertEqual(os.listdir(os.path.join(self.path, 'secir_simple_grid_search')), + [filename_df+".pickle"]) + + # testing size of the output + path_name = os.path.join(self.path, 'secir_simple_grid_search') + with open(os.path.join(path_name, filename_df + ".pickle"), "rb") as f: + d = pickle.load(f) + df = pd.DataFrame(d) + self.assertEqual(len(df['model']), 4) + + @patch('memilio.surrogatemodel.utils.grid_search.train_and_evaluate_model') + def test_perform_grid_search_groups(self, mock_train_and_evaluate_model): + """ Testing grid search for groups model along a sequence of possible parameter values""" + mock_train_and_evaluate_model.return_value = { + "model": "._.", + "activation": "relu", + "optimizer": "Eva", + "mean_train_loss_kfold": 42, + "mean_val_loss_kfold": 12, + "training_time": 1, + "train_losses": [3], + "val_losses": [6] + } + filename_df = "dataframe_optimizer" + os.mkdir(self.path) + + # General grid search parameters for the training process: + early_stop = [100] + max_epochs = [1] + losses = [tf.keras.losses.MeanAbsolutePercentageError()] + optimizers = ['Adam', 'Adam'] + metrics = [[tf.keras.metrics.MeanAbsoluteError(), + tf.keras.metrics.MeanAbsolutePercentageError()]] + + # Define grid search parameters for the architecture + label_width = 3 + num_outputs = 2 + num_age_groups = 1 + hidden_layers = [1, 2] + neurons_in_hidden_layer = [32] + activation_function = ['relu'] + models = ["Dense"] + + # Collecting parameters + training_parameters = [(early, epochs, loss, optimizer, metric) + for early in early_stop for epochs in max_epochs for loss in losses + for optimizer in optimizers for metric in metrics] + + model_parameters = [(label_width, num_age_groups, num_outputs, layer, neuron_number, activation, modelname) + for layer in hidden_layers for neuron_number in neurons_in_hidden_layer + for activation in activation_function for modelname in models] + + # generate dataset with multiple output + inputs = tf.ones([5, 1, 2]) + labels = tf.ones([5, 3, 2]) + + grid_search.perform_grid_search( + model_parameters, inputs, labels, training_parameters, + filename_df, self.path, "groups" + ) + + # testing saving the results + self.assertEqual(len(os.listdir(self.path)), 1) + + self.assertEqual(os.listdir(os.path.join(self.path, 'secir_groups_grid_search')), + [filename_df+".pickle"]) + + # testing size of the output + path_name = os.path.join(self.path, 'secir_groups_grid_search') + with open(os.path.join(path_name, filename_df + ".pickle"), "rb") as f: + d = pickle.load(f) + df = pd.DataFrame(d) + self.assertEqual(len(df['model']), 4) + + +if __name__ == '__main__': + unittest.main() From 636ce0663d3985820731975226e1a27dab2eec2c Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 9 Jul 2025 08:08:46 +0200 Subject: [PATCH 35/49] Using the existing fit_age_groups method inside the interpolate age groups function --- .../ode_secir_groups/data_generation.py | 38 +++++++++++-------- .../surrogatemodel/utils/helper_functions.py | 25 +++++++++--- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py index 97508aa77b..61d9e9a3f3 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py @@ -36,12 +36,12 @@ interpolate_simulation_result, simulate) import memilio.surrogatemodel.utils.dampings as dampings from memilio.surrogatemodel.utils.helper_functions import ( - interpolate_age_groups, remove_confirmed_compartments, transform_data) + interpolate_age_groups, remove_confirmed_compartments, normalize_simulation_data) import memilio.simulation as mio import memilio.simulation.osecir as osecir -def run_secir_groups_simulation(days, damping_days, damping_factors, populations): +def run_secir_groups_simulation(days, damping_days, damping_factors, populations, age_groups): """ Uses an ODE SECIR model allowing for asymptomatic infection with 6 different age groups. The model is not stratified by region. Virus-specific parameters are fixed and initial number of persons in the particular infection states are chosen randomly from defined ranges. @@ -49,7 +49,9 @@ def run_secir_groups_simulation(days, damping_days, damping_factors, populations :param damping_days: The days when damping is applied. :param damping_factors: damping factors associated to the damping days. :param populations: List containing the population in each age group. - :returns: List containing the populations in each compartment used to initialize the run. + :param age_groups: List declaring the used age groups, e.g. groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] + :returns: Tuple of lists (list_of_simulation_results, list_of_damped_matrices), the first containing the simulation results, the second list containing the + damped contact matrices. """ # Collect indices of confirmed compartments @@ -65,9 +67,8 @@ def run_secir_groups_simulation(days, damping_days, damping_factors, populations start_year = 2019 dt = 0.1 - # Define age Groups - groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] - num_groups = len(groups) + # Get number of age groups + num_groups = len(age_groups) # Initialize Parameters model = Model(num_groups) @@ -204,8 +205,10 @@ def generate_data( # Since the first day of the input is day 0, we still need to subtract 1. days = input_width + label_width - 1 + # Define used age_groups + age_groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '>79'] # Load population data - population = get_population(path_population) + population = get_population(path_population, age_groups) # show progess in terminal for longer runs # Due to the random structure, there's currently no need to shuffle the data @@ -218,7 +221,7 @@ def generate_data( min_damping_day=2) data_run, damped_matrices = run_secir_groups_simulation( - days, damping_days, damping_factors, population[random.randint(0, len(population) - 1)]) + days, damping_days, damping_factors, population[random.randint(0, len(population) - 1)], age_groups) data['inputs'].append(data_run[:input_width]) data['labels'].append(data_run[input_width:]) data['contact_matrices'].append(damped_matrices) @@ -232,8 +235,10 @@ def generate_data( transformer = FunctionTransformer(np.log1p, validate=True) # transform inputs and labels - data['inputs'] = transform_data(data['inputs'], transformer, num_runs) - data['labels'] = transform_data(data['labels'], transformer, num_runs) + data['inputs'] = normalize_simulation_data( + data['inputs'], transformer, num_runs) + data['labels'] = normalize_simulation_data( + data['labels'], transformer, num_runs) else: data['inputs'] = tf.convert_to_tensor(data['inputs']) data['labels'] = tf.convert_to_tensor(data['labels']) @@ -296,10 +301,11 @@ def getMinimumMatrix(): return minimum -def get_population(path): +def get_population(path, age_groups): """ read population data in list from dataset - :param path: Path to the dataset containing the population data + :param path: Path to the dataset containing the population + :param age_groups: List declaring the new age groups, e.g. groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] :returns: List of interpolated age grouped population data """ @@ -308,7 +314,7 @@ def get_population(path): data = json.load(f) population = [] for data_entry in data: - population.append(interpolate_age_groups(data_entry)) + population.append(interpolate_age_groups(data_entry, age_groups)) return population @@ -322,7 +328,7 @@ def get_population(path): r"data//Germany//pydata//county_current_population.json") input_width = 5 - label_width = 30 - num_runs = 10000 + label_width = 90 + num_runs = 100 data = generate_data(num_runs, path_output, path_population, input_width, - label_width, damping_method="classic") + label_width, damping_method="active") diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py index d3b46a185f..eac26c17b3 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py @@ -25,21 +25,34 @@ import pandas as pd -def interpolate_age_groups(data_entry): +def interpolate_age_groups(data_entry, age_groups): """ Interpolates the age groups from the population data into the age groups used in the simulation. We assume that the people in the age groups are uniformly distributed. :param data_entry: Data entry containing the population data. + :param age_groups: List declaring the new age groups, e.g. groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] :returns: List containing the population in each age group used in the simulation. """ - df_age_in = pd.DataFrame({ - '<3': 1, '3-5': 1, '6-14': 1, '15-17': 1, '18-24': 1, '25-29': 1, - '30-39': 1, '40-49': 1, '50-64': 1, '64-74': 1, '>74': 1 + + df_age_in = pd.DataFrame.from_dict({ + '<3': [1], '3-5': [1], '6-14': [1], '15-17': [1], '18-24': [1], '25-29': [1], + '30-39': [1], '40-49': [1], '50-64': [1], '64-74': [1], '>74': [1] }) - age_out = ["<5", "5-14", "15-34", "35-59", "60-79", ">79"] - return (fit_age_group_intervals(df_age_in, age_out, data_entry)) + # Prepare to convert in Dataframe + for i in data_entry: + data_entry[i] = [data_entry[i]] + + df_entry = pd.DataFrame.from_dict(data_entry) + # Excluding ID_County and total number of population + df_reduced = df_entry.loc[:, "<3 years":">74 years":1] + + # Interpolate the different age_groups + data_interpolated = fit_age_group_intervals( + df_age_in, age_groups, df_reduced) + res = list(data_interpolated) + return res def remove_confirmed_compartments(result_array, delete_indices): From e5fa3447b3b881650adb25e2928199562986e5b4 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 9 Jul 2025 08:09:19 +0200 Subject: [PATCH 36/49] Update README files --- .../memilio/surrogatemodel/ode_secir_groups/README.md | 6 +++--- .../memilio/surrogatemodel/utils/README.md | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) create mode 100644 pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/README.md diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/README.md b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/README.md index 8bae1d01e9..be44b122f7 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/README.md +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/README.md @@ -1,4 +1,4 @@ -# SECIR model with multiple age groups and one damping +# SECIR model with multiple age groups and multiple damping -This model is an application of the SECIR model implemented in https://github.com/DLR-SC/memilio/tree/main/cpp/models/ode_secir/ stratified by age groups using one damping to represent a change in the contact matrice. -The example is based on https://github.com/DLR-SC/memilio/tree/main/pycode/examples/simulation/secir_groups.py which uses python bindings to run the underlying C++ code. +This model is an application of the SECIR model implemented in https://github.com/DLR-SC/memilio/tree/main/cpp/models/ode_secir/ stratified by age groups using dampings to represent changes in the contact matrix. +The example is based on https://github.com/DLR-SC/memilio/tree/main/pycode/examples/simulation/ode_secir_groups.py which uses python bindings to run the underlying C++ code. diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/README.md b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/README.md new file mode 100644 index 0000000000..aa0e12f76d --- /dev/null +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/README.md @@ -0,0 +1,4 @@ +# Functions that are used in the various surrogate models +The file dampings.py contains different methods to generate damping days and damping factors. +grid_search.py provides functionality to evaluate and compare different model choices. +Different functions to prepare data can be found helper_functions.py \ No newline at end of file From 82a50bb3d7852c94874cb8596fcf1ecc7d7845aa Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 9 Jul 2025 08:10:25 +0200 Subject: [PATCH 37/49] small modifications (typos, renaming etc.) --- .../surrogatemodel/ode_secir_simple/data_generation.py | 6 +++--- .../memilio/surrogatemodel/utils/dampings.py | 10 +++++++++- .../test_surrogatemodel_ode_secir_groups.py | 7 ++++--- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py index fa6acec989..318ef2a23a 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_simple/data_generation.py @@ -34,7 +34,7 @@ InfectionState, Model, interpolate_simulation_result, simulate) from memilio.surrogatemodel.utils.helper_functions import ( - interpolate_age_groups, remove_confirmed_compartments, transform_data) + interpolate_age_groups, remove_confirmed_compartments, normalize_simulation_data) import memilio.simulation.osecir as osecir @@ -173,9 +173,9 @@ def generate_data( if normalize: # logarithmic normalization transformer = FunctionTransformer(np.log1p, validate=True) - data['inputs'] = transform_data( + data['inputs'] = normalize_simulation_data( data['inputs'], transformer, num_runs, 1) - data['labels'] = transform_data( + data['labels'] = normalize_simulation_data( data['labels'], transformer, num_runs, 1) else: data['inputs'] = tf.convert_to_tensor(data['inputs']) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/dampings.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/dampings.py index 731f5291da..1b4d713c83 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/dampings.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/dampings.py @@ -115,6 +115,14 @@ def calc_factors_active(n_ddays, gamma_pos=0, alpha=-1, p0=0.5, t1_max=-0.3, t1_ M = exp(h)*M_0 + The h-values are generated by the following rule: + 1) h is initialized as 0 + 2) If h > gamma_pos, we start to reduce the h-value actively by choosing changes delta_h uniformly in + the interval (t1_min, t1_max). After each step h is updated according to h = h + delta_h + This procedure is repeated till h < alpha + 3) If h < gamma_pos the changes of h, called delta_h are chosen randomly, s.t. delta_h = 0 with + probability p0, otherwise the value is uniformly distributed on the interval (t2_min, t2_max). + :param n_ddays: Number of damping days in time series :param gamma_pos: upper bound for h value :param alpha: upper bound for h value, where active damping should be stopped @@ -187,7 +195,7 @@ def generate_dampings_withshadowdamp(number_of_dampings, days, min_distance, min """ Sampling the damping days according to the established method. - The idea is to draw dampings with a minimum distance while traying to keep + The idea is to draw dampings with a minimum distance while trying to keep the distribution of damping days uniformly. We create a list of all possible days, draw one damping day and delete all days before and after the damping that are within the range of the min_distance. To ensure that the the data is not biased, diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index d620984686..40a7847777 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -62,13 +62,14 @@ def test_simulation_run(self, mock_baseline, mock_minimum): damping_factors = [0.3, 0.4] population = [5256.0, 10551, 32368.5, 43637.833333333336, 22874.066666666666, 8473.6] + age_groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '>79'] simulation_1 = data_generation.run_secir_groups_simulation( - days_1, damping_days, damping_factors, population) + days_1, damping_days, damping_factors, population, age_groups) simulation_2 = data_generation.run_secir_groups_simulation( - days_2, damping_days, damping_factors, population) + days_2, damping_days, damping_factors, population, age_groups) simulation_3 = data_generation.run_secir_groups_simulation( - days_3, damping_days, damping_factors, population) + days_3, damping_days, damping_factors, population, age_groups) # result length self.assertEqual(len(simulation_1[0]), days_1+1) From 2b3a49766668bef2ad9a13b3de39672cb46b039f Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 9 Jul 2025 08:33:53 +0200 Subject: [PATCH 38/49] Update dependencies --- .../test_surrogatemodel_ode_secir_groups.py | 2 +- .../test_surrogatemodel_ode_secir_simple.py | 5 +---- .../surrogatemodel_test/test_surrogatemodel_utils.py | 8 +++----- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 40a7847777..11656758c7 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -18,7 +18,7 @@ # limitations under the License. ############################################################################# from pyfakefs import fake_filesystem_unittest - +import memilio.epidata from memilio.surrogatemodel.ode_secir_groups import (data_generation, model, network_architectures) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py index eebe097d22..a307b5d9c7 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py @@ -19,14 +19,11 @@ ############################################################################# from pyfakefs import fake_filesystem_unittest - +import memilio.epidata from memilio.surrogatemodel.ode_secir_simple import (data_generation, network_architectures) -from memilio.surrogatemodel.utils import grid_search from memilio.surrogatemodel.ode_secir_simple import model as md import os -import pickle -import pandas as pd import unittest from unittest.mock import patch diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py index 3635ffb77f..72f46956ae 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py @@ -18,7 +18,7 @@ # limitations under the License. ############################################################################# from pyfakefs import fake_filesystem_unittest - +import memilio.epidata from memilio.surrogatemodel.ode_secir_groups import (data_generation, model, network_architectures) import memilio.surrogatemodel.utils.helper_functions as utils @@ -257,10 +257,8 @@ def test_load_model( utils.save_model(mlp1.model, self.path, "mlp_multi_multi") path_file = os.path.join(self.path, "mlp_multi_multi.keras") if os.path.isfile(path_file): - print("--------------------------------------------------") - print(r"\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/") - print(r"/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ ") - print("--------------------------------------------------") + print("File !") + mlp2 = utils.load_model(path_file) weights1 = mlp1.model.get_weights() From df10a6cbe614845ab28cdeb37d5d75d69dbb2693 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 9 Jul 2025 09:19:01 +0200 Subject: [PATCH 39/49] Updating the refs --- .../ode_secir_groups/data_generation.py | 12 ++++++------ .../test_surrogatemodel_ode_secir_groups.py | 2 +- .../test_surrogatemodel_ode_secir_simple.py | 2 +- .../surrogatemodel_test/test_surrogatemodel_utils.py | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py index 61d9e9a3f3..2530de2f20 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py @@ -85,26 +85,26 @@ def run_secir_groups_simulation(days, damping_days, damping_factors, populations # Initial number of people in each compartment with random numbers model.populations[AgeGroup(i), Index_InfectionState( InfectionState.Exposed)] = random.uniform( - 0.00025, 0.0005) * populations[i] + 0.00025, 0.005) * populations[i] model.populations[AgeGroup(i), Index_InfectionState( InfectionState.InfectedNoSymptoms)] = random.uniform( - 0.0001, 0.00035) * populations[i] + 0.0001, 0.0035) * populations[i] model.populations[AgeGroup(i), Index_InfectionState( InfectionState.InfectedNoSymptomsConfirmed)] = 0 model.populations[AgeGroup(i), Index_InfectionState( InfectionState.InfectedSymptoms)] = random.uniform( - 0.00007, 0.0001) * populations[i] + 0.00007, 0.001) * populations[i] model.populations[AgeGroup(i), Index_InfectionState( InfectionState.InfectedSymptomsConfirmed)] = 0 model.populations[AgeGroup(i), Index_InfectionState( InfectionState.InfectedSevere)] = random.uniform( - 0.00003, 0.00006) * populations[i] + 0.00003, 0.0006) * populations[i] model.populations[AgeGroup(i), Index_InfectionState( InfectionState.InfectedCritical)] = random.uniform( - 0.00001, 0.00002) * populations[i] + 0.00001, 0.0002) * populations[i] model.populations[AgeGroup(i), Index_InfectionState( InfectionState.Recovered)] = random.uniform( - 0.002, 0.008) * populations[i] + 0.002, 0.08) * populations[i] model.populations[AgeGroup(i), Index_InfectionState(InfectionState.Dead)] = 0 model.populations.set_difference_from_group_total_AgeGroup( diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 11656758c7..3f3959a8f9 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -18,7 +18,7 @@ # limitations under the License. ############################################################################# from pyfakefs import fake_filesystem_unittest -import memilio.epidata +import memilio.epidata.modifyDataframeSeries from memilio.surrogatemodel.ode_secir_groups import (data_generation, model, network_architectures) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py index a307b5d9c7..71295cdc14 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py @@ -19,7 +19,7 @@ ############################################################################# from pyfakefs import fake_filesystem_unittest -import memilio.epidata +import memilio.epidata.modifyDataframeSeries from memilio.surrogatemodel.ode_secir_simple import (data_generation, network_architectures) from memilio.surrogatemodel.ode_secir_simple import model as md diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py index 72f46956ae..687e92dd18 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py @@ -18,7 +18,7 @@ # limitations under the License. ############################################################################# from pyfakefs import fake_filesystem_unittest -import memilio.epidata +import memilio.epidata.modifyDataframeSeries from memilio.surrogatemodel.ode_secir_groups import (data_generation, model, network_architectures) import memilio.surrogatemodel.utils.helper_functions as utils From 57d82b4a41ececc3a61b362d82679f83770218d0 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Wed, 9 Jul 2025 10:01:04 +0200 Subject: [PATCH 40/49] Include test for interpolate_age_groups --- .../memilio/surrogatemodel/utils/helper_functions.py | 11 +++-------- .../surrogatemodel_test/test_surrogatemodel_utils.py | 9 +++++++++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py index eac26c17b3..ede18b3ad0 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py @@ -30,16 +30,10 @@ def interpolate_age_groups(data_entry, age_groups): We assume that the people in the age groups are uniformly distributed. :param data_entry: Data entry containing the population data. - :param age_groups: List declaring the new age groups, e.g. groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] + :param age_groups: List declaring the new age groups, e.g. groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '>79'] :returns: List containing the population in each age group used in the simulation. """ - - df_age_in = pd.DataFrame.from_dict({ - '<3': [1], '3-5': [1], '6-14': [1], '15-17': [1], '18-24': [1], '25-29': [1], - '30-39': [1], '40-49': [1], '50-64': [1], '64-74': [1], '>74': [1] - }) - # Prepare to convert in Dataframe for i in data_entry: data_entry[i] = [data_entry[i]] @@ -50,8 +44,9 @@ def interpolate_age_groups(data_entry, age_groups): # Interpolate the different age_groups data_interpolated = fit_age_group_intervals( - df_age_in, age_groups, df_reduced) + df_reduced, age_groups) res = list(data_interpolated) + return res diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py index 687e92dd18..02cd491682 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py @@ -49,6 +49,15 @@ def setUp(self): """ """ self.setUpPyfakefs() + def test_interpolate_age_groups(self): + entry = {'ID_County': 1000, 'Population': 11000, '<3 years': 1000, '3-5 years': 1000, '6-14 years': 1000, '15-17 years': 1000, + '18-24 years': 1000, '25-29 years': 1000, '30-39 years': 1000, '40-49 years': 1000, '50-64 years': 1000, '65-74 years': 1000, '>74 years': 1000} + res = [1666.6666666666665, 1333.3333333333335, 3500.0, + 2166.6666666666665, 1533.3333333333335, 800.0] + interpol_entry = utils.interpolate_age_groups( + entry, ['0-4', '5-14', '15-34', '35-59', '60-79', '>79']) + self.assertEqual(res, interpol_entry) + def test_calc_dist_days(self): with self.assertRaises(ValueError) as error: dampings.calc_dist_days(10, 2, 10, 2) From 69ad6909ea742bbe9ad25f38ebe0d8fd407fab1d Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 14 Jul 2025 07:57:13 +0200 Subject: [PATCH 41/49] CI fails --- .../surrogatemodel_test/test_surrogatemodel_utils.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py index 02cd491682..687e92dd18 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py @@ -49,15 +49,6 @@ def setUp(self): """ """ self.setUpPyfakefs() - def test_interpolate_age_groups(self): - entry = {'ID_County': 1000, 'Population': 11000, '<3 years': 1000, '3-5 years': 1000, '6-14 years': 1000, '15-17 years': 1000, - '18-24 years': 1000, '25-29 years': 1000, '30-39 years': 1000, '40-49 years': 1000, '50-64 years': 1000, '65-74 years': 1000, '>74 years': 1000} - res = [1666.6666666666665, 1333.3333333333335, 3500.0, - 2166.6666666666665, 1533.3333333333335, 800.0] - interpol_entry = utils.interpolate_age_groups( - entry, ['0-4', '5-14', '15-34', '35-59', '60-79', '>79']) - self.assertEqual(res, interpol_entry) - def test_calc_dist_days(self): with self.assertRaises(ValueError) as error: dampings.calc_dist_days(10, 2, 10, 2) From bdcc4fc58589f64718cc6ad925cc44366bc8e00c Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 14 Jul 2025 08:45:01 +0200 Subject: [PATCH 42/49] Mocking fit_age_groups_interpolate --- .../test_surrogatemodel_utils.py | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py index 687e92dd18..17813819d0 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py @@ -18,7 +18,7 @@ # limitations under the License. ############################################################################# from pyfakefs import fake_filesystem_unittest -import memilio.epidata.modifyDataframeSeries + from memilio.surrogatemodel.ode_secir_groups import (data_generation, model, network_architectures) import memilio.surrogatemodel.utils.helper_functions as utils @@ -49,6 +49,18 @@ def setUp(self): """ """ self.setUpPyfakefs() + @patch('memilio.epidata.modifyDataframeSeries.fit_age_group_intervals', + return_value=[1666.6666666666665, 1333.3333333333335, 3500.0, + 2166.6666666666665, 1533.3333333333335, 800.0]) + def test_interpolate_age_groups(self): + res = [1666.6666666666665, 1333.3333333333335, 3500.0, + 2166.6666666666665, 1533.3333333333335, 800.0] + entry = {'ID_County': 1000, 'Population': 11000, '<3 years': 1000, '3-5 years': 1000, '6-14 years': 1000, '15-17 years': 1000, + '18-24 years': 1000, '25-29 years': 1000, '30-39 years': 1000, '40-49 years': 1000, '50-64 years': 1000, '65-74 years': 1000, '>74 years': 1000} + interpol_entry = utils.interpolate_age_groups( + entry, ['0-4', '5-14', '15-34', '35-59', '60-79', '>79']) + self.assertEqual(res, interpol_entry) + def test_calc_dist_days(self): with self.assertRaises(ValueError) as error: dampings.calc_dist_days(10, 2, 10, 2) @@ -256,8 +268,10 @@ def test_load_model( utils.save_model(mlp1.model, self.path, "mlp_multi_multi") path_file = os.path.join(self.path, "mlp_multi_multi.keras") - if os.path.isfile(path_file): - print("File !") + with os.scandir() as dir_entries: + for entry in dir_entries: + info = entry.stat() + print(info) mlp2 = utils.load_model(path_file) From 6af9d33aa087eb969750126654e3800cfd197ad0 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 14 Jul 2025 08:50:57 +0200 Subject: [PATCH 43/49] Mocking interpolation --- .../surrogatemodel_test/test_surrogatemodel_utils.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py index 17813819d0..5f43829c3e 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py @@ -52,7 +52,7 @@ def setUp(self): @patch('memilio.epidata.modifyDataframeSeries.fit_age_group_intervals', return_value=[1666.6666666666665, 1333.3333333333335, 3500.0, 2166.6666666666665, 1533.3333333333335, 800.0]) - def test_interpolate_age_groups(self): + def test_interpolate_age_groups(self, mockres): res = [1666.6666666666665, 1333.3333333333335, 3500.0, 2166.6666666666665, 1533.3333333333335, 800.0] entry = {'ID_County': 1000, 'Population': 11000, '<3 years': 1000, '3-5 years': 1000, '6-14 years': 1000, '15-17 years': 1000, @@ -268,10 +268,6 @@ def test_load_model( utils.save_model(mlp1.model, self.path, "mlp_multi_multi") path_file = os.path.join(self.path, "mlp_multi_multi.keras") - with os.scandir() as dir_entries: - for entry in dir_entries: - info = entry.stat() - print(info) mlp2 = utils.load_model(path_file) From bcc2b47866c182ed019012e65f707c8cefcde585 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 14 Jul 2025 09:29:56 +0200 Subject: [PATCH 44/49] Deleting import in test files --- .../surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py | 1 - .../surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py | 1 - .../memilio/surrogatemodel_test/test_surrogatemodel_utils.py | 2 ++ 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 3f3959a8f9..98be432c4f 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -18,7 +18,6 @@ # limitations under the License. ############################################################################# from pyfakefs import fake_filesystem_unittest -import memilio.epidata.modifyDataframeSeries from memilio.surrogatemodel.ode_secir_groups import (data_generation, model, network_architectures) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py index 71295cdc14..8fbad9f500 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_simple.py @@ -19,7 +19,6 @@ ############################################################################# from pyfakefs import fake_filesystem_unittest -import memilio.epidata.modifyDataframeSeries from memilio.surrogatemodel.ode_secir_simple import (data_generation, network_architectures) from memilio.surrogatemodel.ode_secir_simple import model as md diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py index 5f43829c3e..dabb5e68c2 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py @@ -268,6 +268,8 @@ def test_load_model( utils.save_model(mlp1.model, self.path, "mlp_multi_multi") path_file = os.path.join(self.path, "mlp_multi_multi.keras") + if os.path.isfile(path_file): + print("Datei existiert") mlp2 = utils.load_model(path_file) From 9b9d8e93c399139b53202735ebb456fe4ce9887c Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 14 Jul 2025 09:51:13 +0200 Subject: [PATCH 45/49] updating import --- .../memilio/surrogatemodel/utils/helper_functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py index ede18b3ad0..5da293c70f 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py @@ -21,7 +21,7 @@ import numpy as np import tensorflow as tf import os -from memilio.epidata.modifyDataframeSeries import (fit_age_group_intervals) +from memilio.epidata import modifyDataframeSeries as mds import pandas as pd @@ -43,7 +43,7 @@ def interpolate_age_groups(data_entry, age_groups): df_reduced = df_entry.loc[:, "<3 years":">74 years":1] # Interpolate the different age_groups - data_interpolated = fit_age_group_intervals( + data_interpolated = mds.fit_age_group_intervals( df_reduced, age_groups) res = list(data_interpolated) From 2268ee12eec67c24bdc036ec4aaf398da9365460 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 14 Jul 2025 10:38:49 +0200 Subject: [PATCH 46/49] Undo interpolate_age_groups --- .../ode_secir_groups/data_generation.py | 15 ++++---- .../surrogatemodel/utils/helper_functions.py | 34 +++++++------------ .../test_surrogatemodel_ode_secir_groups.py | 8 ++--- .../test_surrogatemodel_utils.py | 11 +++--- 4 files changed, 27 insertions(+), 41 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py index 2530de2f20..6dc74881c9 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py @@ -41,7 +41,7 @@ import memilio.simulation.osecir as osecir -def run_secir_groups_simulation(days, damping_days, damping_factors, populations, age_groups): +def run_secir_groups_simulation(days, damping_days, damping_factors, populations): """ Uses an ODE SECIR model allowing for asymptomatic infection with 6 different age groups. The model is not stratified by region. Virus-specific parameters are fixed and initial number of persons in the particular infection states are chosen randomly from defined ranges. @@ -49,7 +49,6 @@ def run_secir_groups_simulation(days, damping_days, damping_factors, populations :param damping_days: The days when damping is applied. :param damping_factors: damping factors associated to the damping days. :param populations: List containing the population in each age group. - :param age_groups: List declaring the used age groups, e.g. groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] :returns: Tuple of lists (list_of_simulation_results, list_of_damped_matrices), the first containing the simulation results, the second list containing the damped contact matrices. @@ -67,6 +66,7 @@ def run_secir_groups_simulation(days, damping_days, damping_factors, populations start_year = 2019 dt = 0.1 + age_groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] # Get number of age groups num_groups = len(age_groups) @@ -205,10 +205,8 @@ def generate_data( # Since the first day of the input is day 0, we still need to subtract 1. days = input_width + label_width - 1 - # Define used age_groups - age_groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '>79'] # Load population data - population = get_population(path_population, age_groups) + population = get_population(path_population) # show progess in terminal for longer runs # Due to the random structure, there's currently no need to shuffle the data @@ -221,7 +219,7 @@ def generate_data( min_damping_day=2) data_run, damped_matrices = run_secir_groups_simulation( - days, damping_days, damping_factors, population[random.randint(0, len(population) - 1)], age_groups) + days, damping_days, damping_factors, population[random.randint(0, len(population) - 1)]) data['inputs'].append(data_run[:input_width]) data['labels'].append(data_run[input_width:]) data['contact_matrices'].append(damped_matrices) @@ -301,11 +299,10 @@ def getMinimumMatrix(): return minimum -def get_population(path, age_groups): +def get_population(path): """ read population data in list from dataset :param path: Path to the dataset containing the population - :param age_groups: List declaring the new age groups, e.g. groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+'] :returns: List of interpolated age grouped population data """ @@ -314,7 +311,7 @@ def get_population(path, age_groups): data = json.load(f) population = [] for data_entry in data: - population.append(interpolate_age_groups(data_entry, age_groups)) + population.append(interpolate_age_groups(data_entry)) return population diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py index 5da293c70f..9d2b63460a 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py @@ -25,29 +25,21 @@ import pandas as pd -def interpolate_age_groups(data_entry, age_groups): - """ Interpolates the age groups from the population data into the age groups used in the simulation. +def interpolate_age_groups(data_entry): + """! Interpolates the age groups from the population data into the age groups used in the simulation. We assume that the people in the age groups are uniformly distributed. - - :param data_entry: Data entry containing the population data. - :param age_groups: List declaring the new age groups, e.g. groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '>79'] - :returns: List containing the population in each age group used in the simulation. - + @param data_entry Data entry containing the population data. + @return List containing the population in each age group used in the simulation. """ - # Prepare to convert in Dataframe - for i in data_entry: - data_entry[i] = [data_entry[i]] - - df_entry = pd.DataFrame.from_dict(data_entry) - # Excluding ID_County and total number of population - df_reduced = df_entry.loc[:, "<3 years":">74 years":1] - - # Interpolate the different age_groups - data_interpolated = mds.fit_age_group_intervals( - df_reduced, age_groups) - res = list(data_interpolated) - - return res + age_groups = { + "A00-A04": data_entry['<3 years'] + data_entry['3-5 years'] * 2 / 3, + "A05-A14": data_entry['3-5 years'] * 1 / 3 + data_entry['6-14 years'], + "A15-A34": data_entry['15-17 years'] + data_entry['18-24 years'] + data_entry['25-29 years'] + data_entry['30-39 years'] * 1 / 2, + "A35-A59": data_entry['30-39 years'] * 1 / 2 + data_entry['40-49 years'] + data_entry['50-64 years'] * 2 / 3, + "A60-A79": data_entry['50-64 years'] * 1 / 3 + data_entry['65-74 years'] + data_entry['>74 years'] * 1 / 5, + "A80+": data_entry['>74 years'] * 4 / 5 + } + return [age_groups[key] for key in age_groups] def remove_confirmed_compartments(result_array, delete_indices): diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py index 98be432c4f..118fa16375 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_ode_secir_groups.py @@ -61,14 +61,13 @@ def test_simulation_run(self, mock_baseline, mock_minimum): damping_factors = [0.3, 0.4] population = [5256.0, 10551, 32368.5, 43637.833333333336, 22874.066666666666, 8473.6] - age_groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '>79'] simulation_1 = data_generation.run_secir_groups_simulation( - days_1, damping_days, damping_factors, population, age_groups) + days_1, damping_days, damping_factors, population) simulation_2 = data_generation.run_secir_groups_simulation( - days_2, damping_days, damping_factors, population, age_groups) + days_2, damping_days, damping_factors, population) simulation_3 = data_generation.run_secir_groups_simulation( - days_3, damping_days, damping_factors, population, age_groups) + days_3, damping_days, damping_factors, population) # result length self.assertEqual(len(simulation_1[0]), days_1+1) @@ -165,6 +164,7 @@ def test_data_generation_save( # # Testing network_architectures.py + def test_mlp_multi_single(self): with self.assertRaises(ValueError) as error: network_architectures.mlp_multi_input_single_output( diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py index dabb5e68c2..2d5fbd2d69 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py @@ -49,16 +49,13 @@ def setUp(self): """ """ self.setUpPyfakefs() - @patch('memilio.epidata.modifyDataframeSeries.fit_age_group_intervals', - return_value=[1666.6666666666665, 1333.3333333333335, 3500.0, - 2166.6666666666665, 1533.3333333333335, 800.0]) - def test_interpolate_age_groups(self, mockres): - res = [1666.6666666666665, 1333.3333333333335, 3500.0, - 2166.6666666666665, 1533.3333333333335, 800.0] + def test_interpolate_age_groups(self): + res = [1666.6666666666665, 1333.3333333333333, 3500.0, + 2166.6666666666665, 1533.3333333333333, 800.0] entry = {'ID_County': 1000, 'Population': 11000, '<3 years': 1000, '3-5 years': 1000, '6-14 years': 1000, '15-17 years': 1000, '18-24 years': 1000, '25-29 years': 1000, '30-39 years': 1000, '40-49 years': 1000, '50-64 years': 1000, '65-74 years': 1000, '>74 years': 1000} interpol_entry = utils.interpolate_age_groups( - entry, ['0-4', '5-14', '15-34', '35-59', '60-79', '>79']) + entry) self.assertEqual(res, interpol_entry) def test_calc_dist_days(self): From bd9604af13acd3f7905511962f0ca9c24c236ccb Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 14 Jul 2025 10:53:07 +0200 Subject: [PATCH 47/49] delete import --- .../memilio/surrogatemodel/utils/helper_functions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py index 9d2b63460a..ddc00c052d 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py @@ -21,7 +21,6 @@ import numpy as np import tensorflow as tf import os -from memilio.epidata import modifyDataframeSeries as mds import pandas as pd From 5c428667c86bf1abab079cbb51903b0005645794 Mon Sep 17 00:00:00 2001 From: Manuel Heger Date: Mon, 14 Jul 2025 11:11:25 +0200 Subject: [PATCH 48/49] Delete save and load model --- .../surrogatemodel/utils/helper_functions.py | 28 ------ .../test_surrogatemodel_utils.py | 96 ------------------- 2 files changed, 124 deletions(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py index ddc00c052d..7b91cd5de8 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/utils/helper_functions.py @@ -69,34 +69,6 @@ def normalize_simulation_data(data, transformer, num_runs, num_groups=6, num_com return tf.convert_to_tensor(scaled_data.reshape(num_runs, -1, num_groups*num_compartments)) -def save_model(model, path, modelname): - """ - Saving a trained model. - - :param model: trained tensorflow keras model - :param path: path where the model should be stored - :param modelname: the name of the model - """ - if not os.path.isdir(path): - os.mkdir(path) - path_to_file = os.path.join(path, modelname + ".keras") - model.save(path_to_file) - print("Model successfully saved") - - -def load_model(path): - """ - Loading a trained model. - - :param path: path to the .keras file containing the desired model - :returns: trained tf.keras model - """ - if not os.path.isfile(path): - raise FileNotFoundError( - "There is no .keras model stored at the given directory.") - return tf.keras.models.load_model(path) - - def calc_split_index(n, split_train=0.7, split_valid=0.2, split_test=0.1): """ diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py index 2d5fbd2d69..9c10b60bbf 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel_test/test_surrogatemodel_utils.py @@ -179,102 +179,6 @@ def test_flat_input(self): b = a2 == a1_flatten self.assertTrue(np.asarray(b).all()) - @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getMinimumMatrix', - return_value=0.1 * np.ones((6, 6))) - @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getBaselineMatrix', - return_value=0.6 * np.ones((6, 6))) - @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.get_population', - return_value=[[5256.0, 10551, 32368.5, - 43637.833333333336, 22874.066666666666, 8473.6]]) - def test_save_model( - self, mock_population, mock_baseline, mock_minimum): - """ - : param mock_population: - : param mock_baseline: - : param mock_minimum: - """ - input_width = 5 - label_width = 10 - num_runs = 2 - - data_generation.generate_data(num_runs, self.path, "", input_width, - label_width, number_dampings=2) - max_epochs = 1 - early_stop = 100 - loss = tf.keras.losses.MeanAbsolutePercentageError() - optimizer = "Adam" - metric = [tf.keras.metrics.MeanAbsoluteError(), - tf.keras.metrics.MeanAbsolutePercentageError()] - - training_parameter = ( - early_stop, max_epochs, loss, optimizer, metric) - - model_mlp_multi_input_multi_output = model.network_fit( - model=network_architectures.mlp_multi_input_multi_output( - label_width), - training_parameter=training_parameter, - path=self.path, - filename="data_secir_groups_10days_2_random.pickle", - modeltype='classic', plot_stats=False) - - utils.save_model(model_mlp_multi_input_multi_output.model, - self.path, "mlp_multi_multi") - - self.assertEqual(len(os.listdir(self.path)), 2) - - self.assertEqual(os.listdir(self.path), - ['data_secir_groups_10days_2_random.pickle', 'mlp_multi_multi.keras']) - - @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getMinimumMatrix', - return_value=0.1 * np.ones((6, 6))) - @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.getBaselineMatrix', - return_value=0.6 * np.ones((6, 6))) - @patch('memilio.surrogatemodel.ode_secir_groups.data_generation.get_population', - return_value=[[5256.0, 10551, 32368.5, - 43637.833333333336, 22874.066666666666, 8473.6]]) - def test_load_model( - self, mock_population, mock_baseline, mock_minimum): - """ - : param mock_population: - : param mock_baseline: - : param mock_minimum: - """ - input_width = 5 - label_width = 10 - num_runs = 2 - - data_generation.generate_data(num_runs, self.path, "", input_width, - label_width, number_dampings=2) - max_epochs = 1 - early_stop = 100 - loss = tf.keras.losses.MeanAbsolutePercentageError() - optimizer = "Adam" - metric = [tf.keras.metrics.MeanAbsoluteError(), - tf.keras.metrics.MeanAbsolutePercentageError()] - - training_parameter = ( - early_stop, max_epochs, loss, optimizer, metric) - - mlp1 = model.network_fit( - model=network_architectures.mlp_multi_input_multi_output( - label_width), - training_parameter=training_parameter, - path=self.path, - filename="data_secir_groups_10days_2_random.pickle", - modeltype='classic', plot_stats=False) - - utils.save_model(mlp1.model, self.path, "mlp_multi_multi") - path_file = os.path.join(self.path, "mlp_multi_multi.keras") - if os.path.isfile(path_file): - print("Datei existiert") - - mlp2 = utils.load_model(path_file) - - weights1 = mlp1.model.get_weights() - weights2 = mlp2.get_weights() - for w1, w2 in zip(weights1, weights2): - np.testing.assert_allclose(w1, w2) - def test_train_and_evaluate_model(self): """ Testing evaluation procedure """ inputs = tf.ones([5, 5, 8]) From 0d96161471b5f9329b63b4653aee2c83777a7f17 Mon Sep 17 00:00:00 2001 From: mhheger <122824218+mhheger@users.noreply.github.com> Date: Tue, 15 Jul 2025 16:18:55 +0200 Subject: [PATCH 49/49] Update pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/README.md Co-authored-by: Henrik Zunker <69154294+HenrZu@users.noreply.github.com> --- .../memilio/surrogatemodel/ode_secir_groups/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/README.md b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/README.md index be44b122f7..fe63214fbd 100644 --- a/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/README.md +++ b/pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/README.md @@ -1,4 +1,4 @@ -# SECIR model with multiple age groups and multiple damping +# SECIR model with multiple age groups and multiple dampings This model is an application of the SECIR model implemented in https://github.com/DLR-SC/memilio/tree/main/cpp/models/ode_secir/ stratified by age groups using dampings to represent changes in the contact matrix. The example is based on https://github.com/DLR-SC/memilio/tree/main/pycode/examples/simulation/ode_secir_groups.py which uses python bindings to run the underlying C++ code.