Skip to content

1293 update secir groups surrogate models #1297

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 50 commits into from
Jul 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
3c22df2
Adding different methods for generating dampings
mhheger Jun 11, 2025
9a1b6cc
[ci skip] included multiple dampings
mhheger Jun 11, 2025
26caf61
Adjusting data preparation to handle multiple dampings
mhheger Jun 11, 2025
3c93f21
Updating tests due to changed functionality
mhheger Jun 11, 2025
e37e143
Adding comments
mhheger Jun 16, 2025
ae4e9ee
Update tests and add saving/loading model architecture
mhheger Jun 16, 2025
81a91de
Update tests
mhheger Jun 16, 2025
909bfee
Changes in test file
mhheger Jun 16, 2025
5a0aeae
updating tests
mhheger Jun 16, 2025
5e3d290
debugging test
mhheger Jun 16, 2025
653a6ec
Changing test files
mhheger Jun 16, 2025
0f12f57
Delete pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir…
mhheger Jun 18, 2025
2870857
small modifications
mhheger Jun 18, 2025
1663411
Updating Tests
mhheger Jun 18, 2025
953c216
Undo changes
mhheger Jun 18, 2025
ed2caef
Adding test, whether stored weights are equal
mhheger Jun 18, 2025
35b5426
Changing default method for dampings
mhheger Jun 23, 2025
bb937fc
Adding more variable error handling
mhheger Jun 23, 2025
65e4bfb
Merge branch 'main' into 1293-update-secir-groups-surrogate-models
mhheger Jun 25, 2025
fb55844
Include minimal distance between dampings and minimal damping day
mhheger Jun 25, 2025
b92d2e4
Applying the review comments
mhheger Jun 30, 2025
8fea15c
Including review comments
mhheger Jun 30, 2025
aab1849
checking ci fail
mhheger Jul 2, 2025
a27a0c9
CI fails
mhheger Jul 2, 2025
71f26af
Inserting grid search values
mhheger Jul 2, 2025
49db37c
Ci test
mhheger Jul 2, 2025
9923be4
ci fails
mhheger Jul 2, 2025
86874c5
Typo
mhheger Jul 2, 2025
9dd10fd
Try to fix CI fails
mhheger Jul 2, 2025
d5d46b9
Trying to find reason for CI fail
mhheger Jul 7, 2025
8e23272
Collecting grid_search and helper functions in a utils folder
mhheger Jul 7, 2025
35f61a9
Including dampings in utils folder
mhheger Jul 7, 2025
683278f
Fixing typo in docs
mhheger Jul 7, 2025
2f8e37f
Adding __init__
mhheger Jul 7, 2025
6deedff
Adding tests for utils and including review
mhheger Jul 7, 2025
636ce06
Using the existing fit_age_groups method inside the interpolate age g…
mhheger Jul 9, 2025
e5fa344
Update README files
mhheger Jul 9, 2025
82a50bb
small modifications (typos, renaming etc.)
mhheger Jul 9, 2025
2b3a497
Update dependencies
mhheger Jul 9, 2025
df10a6c
Updating the refs
mhheger Jul 9, 2025
57d82b4
Include test for interpolate_age_groups
mhheger Jul 9, 2025
69ad690
CI fails
mhheger Jul 14, 2025
bdcc4fc
Mocking fit_age_groups_interpolate
mhheger Jul 14, 2025
6af9d33
Mocking interpolation
mhheger Jul 14, 2025
bcc2b47
Deleting import in test files
mhheger Jul 14, 2025
9b9d8e9
updating import
mhheger Jul 14, 2025
2268ee1
Undo interpolate_age_groups
mhheger Jul 14, 2025
bd9604a
delete import
mhheger Jul 14, 2025
5c42866
Delete save and load model
mhheger Jul 14, 2025
0d96161
Update pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir…
mhheger Jul 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SECIR model with multiple age groups and one damping
# SECIR model with multiple age groups and multiple dampings

This model is an application of the SECIR model implemented in https://github.com/DLR-SC/memilio/tree/main/cpp/models/ode_secir/ stratified by age groups using one damping to represent a change in the contact matrice.
The example is based on https://github.com/DLR-SC/memilio/tree/main/pycode/examples/simulation/secir_groups.py which uses python bindings to run the underlying C++ code.
This model is an application of the SECIR model implemented in https://github.com/DLR-SC/memilio/tree/main/cpp/models/ode_secir/ stratified by age groups using dampings to represent changes in the contact matrix.
The example is based on https://github.com/DLR-SC/memilio/tree/main/pycode/examples/simulation/ode_secir_groups.py which uses python bindings to run the underlying C++ code.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#############################################################################

import copy
import os
import pickle
Expand All @@ -33,75 +34,41 @@
from memilio.simulation.osecir import (Index_InfectionState,
InfectionState, Model,
interpolate_simulation_result, simulate)
import memilio.surrogatemodel.utils.dampings as dampings
from memilio.surrogatemodel.utils.helper_functions import (
interpolate_age_groups, remove_confirmed_compartments, normalize_simulation_data)
import memilio.simulation as mio
import memilio.simulation.osecir as osecir


def interpolate_age_groups(data_entry):
""" Interpolates the age groups from the population data into the age groups used in the simulation.
We assume that the people in the age groups are uniformly distributed.

:param data_entry: Data entry containing the population data.
:returns: List containing the population in each age group used in the simulation.

"""
age_groups = {
"A00-A04": data_entry['<3 years'] + data_entry['3-5 years'] * 2 / 3,
"A05-A14": data_entry['3-5 years'] * 1 / 3 + data_entry['6-14 years'],
"A15-A34": data_entry['15-17 years'] + data_entry['18-24 years'] + data_entry['25-29 years'] + data_entry['30-39 years'] * 1 / 2,
"A35-A59": data_entry['30-39 years'] * 1 / 2 + data_entry['40-49 years'] + data_entry['50-64 years'] * 2 / 3,
"A60-A79": data_entry['50-64 years'] * 1 / 3 + data_entry['65-74 years'] + data_entry['>74 years'] * 1 / 5,
"A80+": data_entry['>74 years'] * 4 / 5
}
return [age_groups[key] for key in age_groups]


def remove_confirmed_compartments(result_array):
""" Removes the confirmed compartments which are not used in the data generation.

:param result_array: Array containing the simulation results.
:returns: Array containing the simulation results without the confirmed compartments.

"""
num_groups = int(result_array.shape[1] / 10)
delete_indices = [index for i in range(
num_groups) for index in (3+10*i, 5+10*i)]
return np.delete(result_array, delete_indices, axis=1)


def transform_data(data, transformer, num_runs):
""" Transforms the data by a logarithmic normalization.
Reshaping is necessary, because the transformer needs an array with dimension <= 2.

:param data: Data to be transformed.
:param transformer: Transformer used for the transformation.
:param num_runs:
:returns: Transformed data.

"""
data = np.asarray(data).transpose(2, 0, 1).reshape(48, -1)
scaled_data = transformer.transform(data)
return tf.convert_to_tensor(scaled_data.transpose().reshape(num_runs, -1, 48))


def run_secir_groups_simulation(days, damping_day, populations):
def run_secir_groups_simulation(days, damping_days, damping_factors, populations):
""" Uses an ODE SECIR model allowing for asymptomatic infection with 6 different age groups. The model is not stratified by region.
Virus-specific parameters are fixed and initial number of persons in the particular infection states are chosen randomly from defined ranges.

:param days: Describes how many days we simulate within a single run.
:param damping_day: The day when damping is applied.
:param damping_days: The days when damping is applied.
:param damping_factors: damping factors associated to the damping days.
:param populations: List containing the population in each age group.
:returns: List containing the populations in each compartment used to initialize the run.
:returns: Tuple of lists (list_of_simulation_results, list_of_damped_matrices), the first containing the simulation results, the second list containing the
damped contact matrices.

"""
# Collect indices of confirmed compartments
del_indices = []

if len(damping_days) != len(damping_factors):
raise ValueError("Length of damping_days and damping_factors differ!")

set_log_level(LogLevel.Off)

start_day = 1
start_month = 1
start_year = 2019
dt = 0.1

# Define age Groups
groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+']
num_groups = len(groups)
age_groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+']
# Get number of age groups
num_groups = len(age_groups)

# Initialize Parameters
model = Model(num_groups)
Expand All @@ -118,26 +85,26 @@ def run_secir_groups_simulation(days, damping_day, populations):
# Initial number of people in each compartment with random numbers
model.populations[AgeGroup(i), Index_InfectionState(
InfectionState.Exposed)] = random.uniform(
0.00025, 0.0005) * populations[i]
0.00025, 0.005) * populations[i]
model.populations[AgeGroup(i), Index_InfectionState(
InfectionState.InfectedNoSymptoms)] = random.uniform(
0.0001, 0.00035) * populations[i]
0.0001, 0.0035) * populations[i]
model.populations[AgeGroup(i), Index_InfectionState(
InfectionState.InfectedNoSymptomsConfirmed)] = 0
model.populations[AgeGroup(i), Index_InfectionState(
InfectionState.InfectedSymptoms)] = random.uniform(
0.00007, 0.0001) * populations[i]
0.00007, 0.001) * populations[i]
model.populations[AgeGroup(i), Index_InfectionState(
InfectionState.InfectedSymptomsConfirmed)] = 0
model.populations[AgeGroup(i), Index_InfectionState(
InfectionState.InfectedSevere)] = random.uniform(
0.00003, 0.00006) * populations[i]
0.00003, 0.0006) * populations[i]
model.populations[AgeGroup(i), Index_InfectionState(
InfectionState.InfectedCritical)] = random.uniform(
0.00001, 0.00002) * populations[i]
0.00001, 0.0002) * populations[i]
model.populations[AgeGroup(i), Index_InfectionState(
InfectionState.Recovered)] = random.uniform(
0.002, 0.008) * populations[i]
0.002, 0.08) * populations[i]
model.populations[AgeGroup(i),
Index_InfectionState(InfectionState.Dead)] = 0
model.populations.set_difference_from_group_total_AgeGroup(
Expand All @@ -154,6 +121,14 @@ def run_secir_groups_simulation(days, damping_day, populations):
# twice the value of RiskOfInfectionFromSymptomatic
model.parameters.MaxRiskOfInfectionFromSymptomatic[AgeGroup(i)] = 0.5

# Collecting deletable indices
index_no_sym_conf = model.populations.get_flat_index(
osecir.MultiIndex_PopulationsArray(mio.AgeGroup(i), osecir.InfectionState.InfectedNoSymptomsConfirmed))
index_sym_conf = model.populations.get_flat_index(
osecir.MultiIndex_PopulationsArray(mio.AgeGroup(i), osecir.InfectionState.InfectedSymptomsConfirmed))
del_indices.append(index_no_sym_conf)
del_indices.append(index_sym_conf)

# StartDay is the n-th day of the year
model.parameters.StartDay = (
date(start_year, start_month, start_day) - date(start_year, 1, 1)).days
Expand All @@ -166,14 +141,16 @@ def run_secir_groups_simulation(days, damping_day, populations):
model.parameters.ContactPatterns.cont_freq_mat[0].minimum = minimum

# Generate a damping matrix and assign it to the model
damping = np.ones((num_groups, num_groups)
) * np.float16(random.uniform(0, 0.5))

model.parameters.ContactPatterns.cont_freq_mat.add_damping(Damping(
coeffs=(damping), t=damping_day, level=0, type=0))
damped_matrices = []

damped_contact_matrix = model.parameters.ContactPatterns.cont_freq_mat.get_matrix_at(
damping_day+1)
for i in np.arange(len(damping_days)):
damping = np.ones((num_groups, num_groups)
) * damping_factors[i]
day = damping_days[i]
model.parameters.ContactPatterns.cont_freq_mat.add_damping(Damping(
coeffs=(damping), t=day, level=0, type=0))
damped_matrices.append(model.parameters.ContactPatterns.cont_freq_mat.get_matrix_at(
day+1))

# Apply mathematical constraints to parameters
model.apply_constraints()
Expand All @@ -184,21 +161,22 @@ def run_secir_groups_simulation(days, damping_day, populations):
# Interpolate simulation result on days time scale
result = interpolate_simulation_result(result)

# Omit first column, as the time points are not of interest here.
result_array = remove_confirmed_compartments(
np.transpose(result.as_ndarray()[1:, :]))
np.transpose(result.as_ndarray()[1:, :]), del_indices)

# Omit first column, as the time points are not of interest here.
dataset_entries = copy.deepcopy(result_array)
dataset_entries = np.nan_to_num(dataset_entries)

return dataset_entries.tolist(), damped_contact_matrix
return dataset_entries.tolist(), damped_matrices


def generate_data(
num_runs, path_out, path_population, input_width, label_width,
normalize=True, save_data=True):
""" Generate data sets of num_runs many equation-based model simulations and transforms the computed results by a log(1+x) transformation.
normalize=True, save_data=True, damping_method="random", number_dampings=5):
""" Generate data sets of num_runs many equation-based model simulations and possibly transforms the computed results by a log(1+x) transformation.
Divides the results in input and label data sets and returns them as a dictionary of two TensorFlow Stacks.
In general, we have 8 different compartments and 6 age groups. If we choose,
In general, we have 8 different compartments and 6 age groups. If we choose
input_width = 5 and label_width = 20, the dataset has
- input with dimension 5 x 8 x 6
- labels with dimension 20 x 8 x 6
Expand All @@ -210,14 +188,17 @@ def generate_data(
:param label_width: Int value that defines the size of the labels.
:param normalize: Default: true Option to transform dataset by logarithmic normalization.
:param save_data: Default: true Option to save the dataset.
:param damping_method: String specifying the damping method, that should be used. Possible values "classic", "active", "random".
:param number_dampings: Maximal number of possible dampings.
:returns: Data dictionary of input and label data sets.

"""
data = {
"inputs": [],
"labels": [],
"contact_matrix": [],
"damping_day": []
"contact_matrices": [],
"damping_factors": [],
"damping_days": []
}

# The number of days is the same as the sum of input and label width.
Expand All @@ -232,16 +213,18 @@ def generate_data(
bar = Bar('Number of Runs done', max=num_runs)
for _ in range(0, num_runs):

# Generate a random damping day
damping_day = random.randrange(
input_width, input_width+label_width)
# Generate random damping days
damping_days, damping_factors = dampings.generate_dampings(
days, number_dampings, method=damping_method, min_distance=2,
min_damping_day=2)

data_run, damped_contact_matrix = run_secir_groups_simulation(
days, damping_day, population[random.randint(0, len(population) - 1)])
data_run, damped_matrices = run_secir_groups_simulation(
days, damping_days, damping_factors, population[random.randint(0, len(population) - 1)])
data['inputs'].append(data_run[:input_width])
data['labels'].append(data_run[input_width:])
data['contact_matrix'].append(np.array(damped_contact_matrix))
data['damping_day'].append([damping_day])
data['contact_matrices'].append(damped_matrices)
data['damping_factors'].append(damping_factors)
data['damping_days'].append(damping_days)
bar.next()
bar.finish()

Expand All @@ -250,8 +233,10 @@ def generate_data(
transformer = FunctionTransformer(np.log1p, validate=True)

# transform inputs and labels
data['inputs'] = transform_data(data['inputs'], transformer, num_runs)
data['labels'] = transform_data(data['labels'], transformer, num_runs)
data['inputs'] = normalize_simulation_data(
data['inputs'], transformer, num_runs)
data['labels'] = normalize_simulation_data(
data['labels'], transformer, num_runs)
else:
data['inputs'] = tf.convert_to_tensor(data['inputs'])
data['labels'] = tf.convert_to_tensor(data['labels'])
Expand All @@ -261,8 +246,15 @@ def generate_data(
if not os.path.isdir(path_out):
os.mkdir(path_out)

# save dict to json file
with open(os.path.join(path_out, 'data_secir_groups.pickle'), 'wb') as f:
# save dict to pickle file
if num_runs < 1000:
filename = 'data_secir_groups_%ddays_%d_' % (
label_width, num_runs) + damping_method+'.pickle'
else:
filename = 'data_secir_groups_%ddays_%dk_' % (
label_width, num_runs//1000) + damping_method+'.pickle'

with open(os.path.join(path_out, filename), 'wb') as f:
pickle.dump(data, f)
return data

Expand Down Expand Up @@ -291,13 +283,13 @@ def getMinimumMatrix():
""" loads the minimum matrix"""

minimum_contact_matrix0 = os.path.join(
"./data/contacts/minimum_home.txt")
"./data/Germany/contacts/minimum_home.txt")
minimum_contact_matrix1 = os.path.join(
"./data/contacts/minimum_school_pf_eig.txt")
"./data/Germany/contacts/minimum_school_pf_eig.txt")
minimum_contact_matrix2 = os.path.join(
"./data/contacts/minimum_work.txt")
"./data/Germany/contacts/minimum_work.txt")
minimum_contact_matrix3 = os.path.join(
"./data/contacts/minimum_other.txt")
"./data/Germany/contacts/minimum_other.txt")

minimum = np.loadtxt(minimum_contact_matrix0) \
+ np.loadtxt(minimum_contact_matrix1) + \
Expand All @@ -310,7 +302,8 @@ def getMinimumMatrix():
def get_population(path):
""" read population data in list from dataset

:param path: Path to the dataset containing the population data
:param path: Path to the dataset containing the population
:returns: List of interpolated age grouped population data

"""

Expand All @@ -332,7 +325,7 @@ def get_population(path):
r"data//Germany//pydata//county_current_population.json")

input_width = 5
label_width = 30
num_runs = 10000
label_width = 90
num_runs = 100
data = generate_data(num_runs, path_output, path_population, input_width,
label_width)
label_width, damping_method="active")
Loading
Loading