#
# WarwickLogLik Class
#
# This file is part of EPIMODELS
# (https://github.com/I-Bouros/multi-epi-model-cross-analysis.git) which is
# released under the MIT license. See accompanying LICENSE for copyright
# notice and full license details.
#
"""
This script contains code for parameter inference of the extended SEIR model
created by the University of Warwick. This is one of the official models used
by the UK government for policy making.
It uses an extended version of an SEIR model with contact and region-specific
matrices and can be used to model the effects of within-household dynamics on
the epidemic trajectory in different countries. It also differentiates between
asymptomatic and symptomatic infections.
"""
from iteration_utilities import deepflatten
import numpy as np
import pints
import epimodels as em
[docs]
class WarwickLogLik(pints.LogLikelihood):
"""WarwickLogLik Class:
Controller class to construct the log-likelihood needed for optimisation or
inference in a PINTS framework.
Parameters
----------
model : WarwickSEIRModel
The model for which we solve the optimisation or inference problem.
extended_susceptibles : list
List country-level initial number of
susceptibles organised in an extended age classification.
extended_infectives_prop : list
List country-level initial proportions of
infective individuals in each age group when the population is
organised in an extended age classification.
extended_house_cont_mat : ContactMatrix
Initial contact matrix with more age groups used for the modelling,
underlying household interactions.
extended_school_cont_mat : ContactMatrix
Initial contact matrix with more age groups used for the modelling,
underlying school interactions.
extended_work_cont_mat : ContactMatrix
Initial contact matrix with more age groups used for the modelling,
underlying workplace interactions.
extended_other_cont_mat : ContactMatrix
Initial contact matrix with more age groups used for the modelling,
underlying other non-household interactions.
pDtoH : list
Age-dependent fractions of the number of symptomatic cases that
end up hospitalised.
dDtoH : list
Distribution of the delay between onset of symptoms and
hospitalisation. Must be normalised.
pHtoDeath : list
Age-dependent fractions of the number of hospitalised cases that
die.
dHtoDeath : list
Distribution of the delay between onset of hospitalisation and
death. Must be normalised.
susceptibles_data : list
List of regional age-structured lists of the initial number of
susceptibles.
infectives_data : list
List of regional age-structured lists of the initial number of
infectives in the presymptomatic infectives compartments.
times : list
List of time points at which we have data for the log-likelihood
computation.
deaths_data : numpy.array
List of region-specific age-structured number of deaths as a matrix.
Each column represents an age group.
deaths_times : numpy.array
Matrix of timepoints for which deaths data is available.
tests_data : list of numpy.array
List of region-specific age-structured number of tests conducted as a
matrix. Each column represents an age group.
positives_data : list of numpy.array
List of region-specific age-structured number of positive test results
as a matrix. Each column represents an age group.
serology_times : numpy.array
Matrix of timepoints for which serology data is available.
sens : float or int
Sensitivity of the test (or ratio of true positives).
spec : float or int
Specificity of the test (or ratio of true negatives).
wd : float or int
Proportion of the contribution of the deaths data to the
log-likelihood.
wp : float or int
Proportion of the contribution of the serology data to the
log-likelihood.
"""
def __init__(self, model, extended_susceptibles, extended_infectives_prop,
extended_house_cont_mat, extended_school_cont_mat,
extended_work_cont_mat, extended_other_cont_mat,
pDtoH, dDtoH, pHtoDeath, dHtoDeath,
susceptibles_data, infectives_data, times,
deaths, deaths_times, tests_data, positives_data,
serology_times, sens, spec, wd=1, wp=1):
# Set the prerequisites for the inference wrapper
# Model and ICs data
self._model = model
self._times = times
self._susceptibles = susceptibles_data
self._infectives = infectives_data
# Probablities and delay distributions to hospitalisation and death
self._pDtoH = pDtoH
self._dDtoH = dDtoH
self._pHtoDeath = pHtoDeath
self._dHtoDeath = dHtoDeath
# Death data
self._deaths = deaths
self._deaths_times = deaths_times
# Serology data
self._total_tests = tests_data
self._positive_tests = positives_data
self._serology_times = serology_times
self._sens = sens
self._spec = spec
# Contribution parameters
self._wd = wd
self._wp = wp
# Extended population structure and contact matrices
self._extended_susceptibles = np.array(extended_susceptibles)
self._extended_infectives_prop = np.array(extended_infectives_prop)
self._pop = self._extended_susceptibles
self._N = np.sum(self._pop)
# Identify the appropriate contact matrix for the ODE system
house_cont_mat = extended_house_cont_mat
school_cont_mat = extended_school_cont_mat
work_cont_mat = extended_work_cont_mat
other_cont_mat = extended_other_cont_mat
self._house_cont_mat = house_cont_mat
self._other_cont_mat = other_cont_mat
self._nonhouse_cont_mat = \
school_cont_mat + work_cont_mat + other_cont_mat
self._total_cont_mat = \
self._house_cont_mat + self._nonhouse_cont_mat
# Set fixed parameters of the model
self.set_fixed_parameters()
[docs]
def n_parameters(self):
"""
Returns number of parameters for log-likelihood object.
Returns
-------
int
Number of parameters for log-likelihood object.
"""
return 2
def _log_likelihood(self, var_parameters):
"""
Computes the log-likelihood of the non-fixed parameters
using death and serology data.
Parameters
----------
var_parameters : list
List of varying parameters of the model for which
the log-likelihood is computed for.
Returns
-------
float
Value of the log-likelihood for the given choice of
free parameters.
"""
# tau
self._parameters[-6] = var_parameters[-2]
# gamma
self._parameters[-4] = var_parameters[-1]
# Hs and Ds
Hs = 1
Ds = 1
total_log_lik = 0
# Compute log-likelihood
try:
for r, _ in enumerate(self._model.regions):
self._parameters[0] = r+1
model_output = self._model._simulate(
parameters=list(deepflatten(self._parameters, ignore=str)),
times=self._times
)
model_new_infec = self._model.new_infections(model_output)
model_new_hosp = self._model.new_hospitalisations(
model_new_infec,
(Hs * np.array(self._pDtoH)).tolist(),
self._dDtoH)
model_new_deaths = self._model.new_deaths(
model_new_hosp,
(Ds * np.array(self._pHtoDeath)).tolist(),
self._dHtoDeath)
# Check the input of log-likelihoods fixed data
self._model.check_death_format(model_new_deaths, self._niu)
self._model.check_positives_format(
model_output,
self._total_tests[r],
self._sens,
self._spec)
# Log-likelihood contribution from death data
for t, time in enumerate(self._deaths_times):
total_log_lik += self._wd * self._model.loglik_deaths(
obs_death=self._deaths[r][t, :],
new_deaths=model_new_deaths,
niu=self._niu,
k=time-1
)
# Log-likelihood contribution from serology data
for t, time in enumerate(self._serology_times):
total_log_lik += self._wp * \
self._model.loglik_positive_tests(
obs_pos=self._positive_tests[r][t, :],
output=model_output,
tests=self._total_tests[r][t, :],
sens=self._sens,
spec=self._spec,
k=time-1
)
return np.sum(total_log_lik)
except ValueError: # pragma: no cover
return -np.inf
[docs]
def set_fixed_parameters(self):
"""
Sets the non-changing parameters of the model in the class structure
to save time in the evaluation of the log-likelihood.
"""
# Use prior mean for the over-dispersion parameter
self._niu = 10**(-5)
# Initial Conditions
susceptibles = self._susceptibles
exposed_1_f = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
exposed_1_sd = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
exposed_1_su = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
exposed_1_q = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
exposed_2_f = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
exposed_2_sd = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
exposed_2_su = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
exposed_2_q = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
exposed_3_f = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
exposed_3_sd = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
exposed_3_su = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
exposed_3_q = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
detected_f = self._infectives
detected_qf = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
detected_sd = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
detected_su = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
detected_qs = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
undetected_f = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
undetected_s = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
undetected_q = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
recovered = np.zeros((
len(self._model.regions),
self._model._num_ages)).tolist()
# Regional household quarantine proportions
h = self._model._c[5]
# Disease-specific parameters
tau = 0.4
d = self._model._c[4]
# Transmission parameters
epsilon = self._model._c[2]
gamma = 0.083
sigma = self._model._c[0]
self._parameters = [
0, susceptibles, exposed_1_f, exposed_1_sd, exposed_1_su,
exposed_1_q, exposed_2_f, exposed_2_sd, exposed_2_su,
exposed_2_q, exposed_3_f, exposed_3_sd, exposed_3_su,
exposed_3_q, detected_f, detected_qf, detected_sd, detected_su,
detected_qs, undetected_f, undetected_s, undetected_q, recovered,
sigma, tau, epsilon, gamma, d, h, 'RK45'
]
def __call__(self, x):
"""
Evaluates the log-likelihood in a PINTS framework.
Parameters
----------
x : list
List of free parameters used for computing the log-likelihood.
Returns
-------
float
Value of the log-likelihood at the given point in the free
parameter space.
"""
return self._log_likelihood(x)
#
# WarwickLogPrior Class
#
[docs]
class WarwickLogPrior(pints.LogPrior):
"""WarwickLogPrior Class:
Controller class to construct the log-prior needed for optimisation or
inference in a PINTS framework.
Parameters
----------
model : WarwickSEIRModel
The model for which we solve the optimisation or inference problem.
times : list
List of time points at which we have data for the log-likelihood
computation.
"""
def __init__(self, model, times):
super(WarwickLogPrior, self).__init__()
# Set the prerequisites for the inference wrapper
# Model
self._model = model
self._times = times
[docs]
def n_parameters(self):
"""
Returns number of parameters for log-prior object.
Returns
-------
int
Number of parameters for log-prior object.
"""
return 2
def __call__(self, x):
"""
Evaluates the log-prior in a PINTS framework.
Parameters
----------
x : list
List of free parameters used for computing the log-prior.
Returns
-------
float
Value of the log-prior at the given point in the free
parameter space.
"""
# Prior contribution of tau
log_prior = pints.UniformLogPrior([0], [0.5])(x[0])
# Prior contribution of gamma
log_prior += pints.UniformLogPrior([0.05], [0.5])(x[1])
return log_prior
#
# WarwickSEIRInfer Class
#
[docs]
class WarwickSEIRInfer(object):
"""WarwickSEIRInfer Class:
Controller class for the optimisation or inference of parameters of the
Warwick model in a PINTS framework.
Parameters
----------
model : WarwickSEIRModel
The model for which we solve the optimisation or inference problem.
"""
def __init__(self, model):
super(WarwickSEIRInfer, self).__init__()
# Assign model for inference or optimisation
if not isinstance(model, em.WarwickSEIRModel):
raise TypeError('Wrong model type for parameters inference.')
self._model = model
[docs]
def read_model_data(
self, susceptibles_data, infectives_data):
"""
Sets the initial data used for the model's parameters optimisation or
inference.
Parameters
----------
susceptibles_data : list
List of regional age-structured lists of the initial number of
susceptibles.
infectives_data : list
List of regional age-structured lists of the initial number of
infectives in the presymptomatic infectives compartment.
"""
self._susceptibles_data = susceptibles_data
self._infectives_data = infectives_data
[docs]
def read_extended_population_structure(
self, extended_susceptibles, extended_infectives_prop):
"""
Sets the initial data with more age groups used for the model's
parameters optimisation or inference.
Parameters
----------
extended_susceptibles : list
List country-level initial number of
susceptibles organised in an extended age classification.
extended_infectives_prop : list
List country-level initial proportions of
infective individuals in each age group when the population is
organised in an extended age classification.
"""
self._extended_susceptibles = extended_susceptibles
self._extended_infectives_prop = extended_infectives_prop
[docs]
def read_serology_data(
self, tests_data, positives_data, serology_times, sens, spec):
"""
Sets the serology data used for the model's parameters inference.
Parameters
----------
tests_data: list of numpy.array
List of region-specific age-structured number of tests conducted
as a matrix. Each column represents an age group.
positives_data : list of numpy.array
List of region-specific age-structured number of positive test
results as a matrix. Each column represents an age group.
serology_times : numpy.array
Matrix of timepoints for which serology data is available.
sens : float or int
Sensitivity of the test (or ratio of true positives).
spec : float or int
Specificity of the test (or ratio of true negatives).
"""
self._total_tests = tests_data
self._positive_tests = positives_data
self._serology_times = serology_times
self._sens = sens
self._spec = spec
[docs]
def read_delay_data(self, pDtoH, dDtoH, pHtoDeath, dHtoDeath):
"""
Sets the hospitalisation and death delays data used for the model's
parameters inference.
Parameters
----------
pDtoH : list
Age-dependent fractions of the number of symptomatic cases that
end up hospitalised.
dDtoH : list
Distribution of the delay between onset of symptoms and
hospitalisation. Must be normalised.
pHtoDeath : list
Age-dependent fractions of the number of hospitalised cases that
die.
dHtoDeath : list
Distribution of the delay between onset of hospitalisation and
death. Must be normalised.
"""
self._pDtoH = pDtoH
self._dDtoH = dDtoH
self._pHtoDeath = pHtoDeath
self._dHtoDeath = dHtoDeath
[docs]
def read_deaths_data(
self, deaths_data, deaths_times):
"""
Sets the serology data used for the model's parameters inference.
Parameters
----------
deaths_data : numpy.array
List of region-specific age-structured number of deaths as a
matrix. Each column represents an age group.
deaths_times : numpy.array
Matrix of timepoints for which deaths data is available.
"""
self._deaths = deaths_data
self._deaths_times = deaths_times
[docs]
def return_loglikelihood(self, times, x, wd=1, wp=1):
"""
Return the log-likelihood used for the optimisation or inference.
Parameters
----------
times : list
List of time points at which we have data for the log-likelihood
computation.
x : list
List of free parameters used for computing the log-likelihood.
wd : float or int
Proportion of the contribution of the deaths data to the
log-likelihood.
wp : float or int
Proportion of the contribution of the serology data to the
log-likelihood.
Returns
-------
float
Value of the log-likelihood at the given point in the free
parameter space.
"""
loglikelihood = WarwickLogLik(
self._model, self._extended_susceptibles,
self._extended_infectives_prop, self._extended_house_cont_mat,
self._extended_school_cont_mat, self._extended_work_cont_mat,
self._extended_other_cont_mat,
self._pDtoH, self._dDtoH, self._pHtoDeath, self._dHtoDeath,
self._susceptibles_data, self._infectives_data, times,
self._deaths, self._deaths_times,
self._total_tests, self._positive_tests, self._serology_times,
self._sens, self._spec, wd, wp)
return loglikelihood(x)
def _create_posterior(self, times, wd, wp):
"""
Runs the initial conditions optimisation routine for the Warwick model.
Parameters
----------
times : list
List of time points at which we have data for the log-likelihood
computation.
wd : float or int
Proportion of the contribution of the deaths data to the
log-likelihood.
wp : float or int
Proportion of the contribution of the serology data to the
log-likelihood.
"""
# Create a likelihood
loglikelihood = WarwickLogLik(
self._model, self._extended_susceptibles,
self._extended_infectives_prop, self._extended_house_cont_mat,
self._extended_school_cont_mat, self._extended_work_cont_mat,
self._extended_other_cont_mat,
self._pDtoH, self._dDtoH, self._pHtoDeath, self._dHtoDeath,
self._susceptibles_data, self._infectives_data, times,
self._deaths, self._deaths_times,
self._total_tests, self._positive_tests, self._serology_times,
self._sens, self._spec, wd, wp)
# Create a prior
log_prior = WarwickLogPrior(self._model, times)
self.ll = loglikelihood
# Create a posterior log-likelihood (log(likelihood * prior))
self._log_posterior = pints.LogPosterior(loglikelihood, log_prior)
[docs]
def inference_problem_setup(self, times, num_iter, wd=1, wp=1):
"""
Runs the parameter inference routine for the Warwick model.
Parameters
----------
times : list
List of time points at which we have data for the log-likelihood
computation.
num_iter : integer
Number of iterations the MCMC sampler algorithm is run for.
wd : float or int
Proportion of the contribution of the deaths data to the
log-likelihood.
wp : float or int
Proportion of the contribution of the serology data to the
log-likelihood.
Returns
-------
numpy.array
3D-matrix of the proposed parameters for each iteration for
each of the chains of the MCMC sampler.
"""
# Starting points using optimisation object
x0 = [self.optimisation_problem_setup(times, wd, wp)[0].tolist()]*3
# Create MCMC routine
mcmc = pints.MCMCController(
self._log_posterior, 3, x0)
mcmc.set_max_iterations(num_iter)
mcmc.set_log_to_screen(True)
mcmc.set_parallel(True)
print('Running...')
chains = mcmc.run()
print('Done!')
param_names = [
'tau', 'gamma']
# Check convergence and other properties of chains
results = pints.MCMCSummary(
chains=chains, time=mcmc.time(),
parameter_names=param_names)
print(results)
return chains
[docs]
def optimisation_problem_setup(self, times, wd=1, wp=1):
"""
Runs the initial conditions optimisation routine for the Warwick model.
Parameters
----------
times : list
List of time points at which we have data for the log-likelihood
computation.
wd : float or int
Proportion of the contribution of the deaths data to the
log-likelihood.
wp : float or int
Proportion of the contribution of the serology data to the
log-likelihood.
Returns
-------
numpy.array
Matrix of the optimised parameters at the end of the optimisation
procedure.
float
Value of the log-posterior at the optimised point in the free
parameter space.
"""
self._create_posterior(times, wd, wp)
# Starting points
# x0 = [0.9, 0, 0.2, 15, 0.5, 1, 1, 1]
# x0 = [0.9, 0, 0.2, 15, 1, 1, 1]
x0 = [0.5, 0.2]
# Create optimisation routine
optimiser = pints.OptimisationController(
self._log_posterior, x0, method=pints.CMAES)
optimiser.set_max_unchanged_iterations(100, 1)
found_ics, found_posterior_val = optimiser.run()
print(found_ics, found_posterior_val)
print("Optimisation phase is finished.")
return found_ics, found_posterior_val