Source code for epimodels.inference.phe_inference

#
# PHELogLik Class
#
# This file is part of EPIMODELS
# (https://github.com/I-Bouros/multi-epi-model-cross-analysis.git) which is
# released under the MIT license. See accompanying LICENSE for copyright
# notice and full license details.
#
"""
This script contains code for parameter inference of the extended SEIR model
created by Public Health England and University of Cambridge. This is one of
the official models used by the UK government for policy making.

It uses an extended version of an SEIR model with contact and region-specific
matrices.

"""

import numpy as np
import pints
from iteration_utilities import deepflatten
from scipy.stats import norm, gamma

import epimodels as em



[docs]
class PHELogLik(pints.LogLikelihood):
    """PHELogLik Class:
    Controller class to construct the log-likelihood needed for optimisation or
    inference in a PINTS framework.

    Parameters
    ----------
    model : PheSEIRModel
        The model for which we solve the optimisation or inference problem.
    susceptibles_data : list
        List of regional age-structured lists of the initial number of
        susceptibles.
    infectives_data : list
        List of regional age-structured lists of the initial number of
        infectives in the first infectives compartment.
    times : list
        List of time points at which we have data for the log-likelihood
        computation.
    deaths_data : numpy.array
        List of region-specific age-structured number of deaths as a matrix.
        Each column represents an age group.
    time_to_death : list
        List of probabilities of death of individual d days after infection.
    deaths_times : numpy.array
        Matrix of timepoints for which deaths data is available.
    fatality_ratio : list
        List of age-specific fatality ratios.
    tests_data : list of numpy.array
        List of region-specific age-structured number of tests conducted as a
        matrix. Each column represents an age group.
    positives_data : list of numpy.array
        List of region-specific age-structured number of positive test results
        as a matrix. Each column represents an age group.
    serology_times : numpy.array
        Matrix of timepoints for which serology data is available.
    sens : float or int
        Sensitivity of the test (or ratio of true positives).
    spec : float or int
        Specificity of the test (or ratio of true negatives).
    wd : float or int
        Proportion of the contribution of the deaths data to the
        log-likelihood.
    wp : float or int
        Proportion of the contribution of the serology data to the
        log-likelihood.

    """
    def __init__(self, model, susceptibles_data, infectives_data, times,
                 deaths, time_to_death, deaths_times, fatality_ratio,
                 tests_data, positives_data, serology_times, sens, spec,
                 wd=1, wp=1):
        # Set the prerequisites for the inference wrapper
        # Model and ICs data
        self._model = model
        self._times = times

        self._susceptibles = susceptibles_data
        self._infectives = infectives_data

        # Death data
        self._deaths = deaths
        self._deaths_times = deaths_times
        self._time_to_death = time_to_death
        self._fatality_ratio = fatality_ratio

        # Serology data
        self._total_tests = tests_data
        self._positive_tests = positives_data
        self._serology_times = serology_times
        self._sens = sens
        self._spec = spec

        # Contribution parameters
        self._wd = wd
        self._wp = wp

        # Set fixed parameters of the model
        self.set_fixed_parameters()


[docs]
    def n_parameters(self):
        """
        Returns number of parameters for log-likelihood object.

        Returns
        -------
        int
            Number of parameters for log-likelihood object.

        """
        return 2+len(np.arange(44, len(self._times), 7)) * len(
            self._model.regions)


    def _log_likelihood(self, var_parameters):
        """
        Computes the log-likelihood of the non-fixed parameters
        using death and serology data.

        Parameters
        ----------
        var_parameters : list
            List of varying parameters of the model for which
            the log-likelihood is computed for.

        Returns
        -------
        float
            Value of the log-likelihood for the given choice of
            free parameters.

        """
        # Update parameters
        self._parameters[0] = [var_parameters[0]] * len(self._model.regions)

        LEN = len(np.arange(44, len(self._times), 7))

        betas = np.array(self._parameters[8])
        for r in range(len(self._model.regions)):
            for d, day in enumerate(np.arange(44, len(self._times), 7)):
                betas[r, day:(day+7)] = var_parameters[r*LEN+d+1]

        self._parameters[8] = betas.tolist()

        total_log_lik = 0

        # Compute log-likelihood
        try:
            for r, _ in enumerate(self._model.regions):
                self._parameters[1] = r+1

                model_output = self._model._simulate(
                    parameters=list(deepflatten(self._parameters, ignore=str)),
                    times=self._times
                    )
                model_new_infections = self._model.new_infections(model_output)

                # Check the input of log-likelihoods fixed data
                self._model.check_death_format(
                    model_new_infections,
                    self._fatality_ratio,
                    self._time_to_death,
                    self._niu)

                self._model.check_positives_format(
                    model_output,
                    self._total_tests[r],
                    self._sens,
                    self._spec)

                # Log-likelihood contribution from death data
                for t, time in enumerate(self._deaths_times):
                    total_log_lik += self._wd * self._model.loglik_deaths(
                        obs_death=self._deaths[r][t, :],
                        new_infections=model_new_infections,
                        fatality_ratio=self._fatality_ratio,
                        time_to_death=self._time_to_death,
                        niu=self._niu,
                        k=time-1
                    )

                # Log-likelihood contribution from serology data
                for t, time in enumerate(self._serology_times):
                    total_log_lik += self._wp * \
                        self._model.loglik_positive_tests(
                            obs_pos=self._positive_tests[r][t, :],
                            output=model_output,
                            tests=self._total_tests[r][t, :],
                            sens=self._sens,
                            spec=self._spec,
                            k=time-1
                        )

            return np.sum(total_log_lik)

        except ValueError:  # pragma: no cover
            return -np.inf


[docs]
    def set_fixed_parameters(self):
        """
        Sets the non-changing parameters of the model in the class structure
        to save time in the evaluation of the log-likelihood.

        """
        # Use prior mean for the over-dispersion parameter
        self._niu = 10**(-5)

        # Initial Conditions
        susceptibles = self._susceptibles

        exposed1 = np.zeros((
            len(self._model.regions),
            self._model._num_ages)).tolist()

        exposed2 = np.zeros((
            len(self._model.regions),
            self._model._num_ages)).tolist()

        infectives1 = self._infectives

        infectives2 = np.zeros((
            len(self._model.regions),
            self._model._num_ages)).tolist()

        recovered = np.zeros((
            len(self._model.regions),
            self._model._num_ages)).tolist()

        # Beta multipliers
        betas = np.ones((len(self._model.regions), len(self._times))).tolist()

        # Other Parameters
        dI = self._model._c[-1]
        dL = self._model._c[-2]
        delta_t = self._model._delta_t

        self._parameters = [
            np.zeros(len(self._model.regions)).tolist(),
            0,
            susceptibles, exposed1, exposed2, infectives1, infectives2,
            recovered,
            betas,
            dL,
            dI,
            delta_t,
            'RK45'
        ]


    def __call__(self, x):
        """
        Evaluates the log-likelihood in a PINTS framework.

        Parameters
        ----------
        x : list
            List of free parameters used for computing the log-likelihood.

        Returns
        -------
        float
            Value of the log-likelihood at the given point in the free
            parameter space.

        """
        return self._log_likelihood(x[:-1])



#
# PHELogPrior Class
#


[docs]
class PHELogPrior(pints.LogPrior):
    """PHELogPrior Class:
    Controller class to construct the log-prior needed for optimisation or
    inference in a PINTS framework.

    Parameters
    ----------
    model : PheSEIRModel
        The model for which we solve the optimisation or inference problem.
    times : list
        List of time points at which we have data for the log-likelihood
        computation.

    """
    def __init__(self, model, times):
        super(PHELogPrior, self).__init__()
        # Set the prerequisites for the inference wrapper
        # Model
        self._model = model
        self._times = times


[docs]
    def n_parameters(self):
        """
        Returns number of parameters for log-prior object.

        Returns
        -------
        int
            Number of parameters for log-prior object.

        """
        return 2+len(np.arange(44, len(self._times), 7)) * len(
            self._model.regions)


    def __call__(self, x):
        """
        Evaluates the log-prior in a PINTS framework.

        Parameters
        ----------
        x : list
            List of free parameters used for computing the log-prior.

        Returns
        -------
        float
            Value of the log-prior at the given point in the free
            parameter space.

        """
        # Prior contribution of initial R
        log_prior = pints.UniformLogPrior([0], [5])(x[0])

        # Variance for betas
        sigma_b = x[-1]

        log_prior += gamma.logpdf(sigma_b, 1, scale=1/100)

        # Prior contribution of betas
        LEN = len(np.arange(44, len(self._times), 7))
        for r in range(len(self._model.regions)):
            log_prior += norm.logpdf(
                    np.log(x[r*LEN+1]),
                    loc=0,
                    scale=sigma_b)
            for d in range(1, LEN):
                log_prior += norm.logpdf(
                    np.log(x[r*LEN+d+1]),
                    loc=np.log(x[r*LEN+d]),
                    scale=sigma_b)

        return log_prior



#
# PheSEIRInfer Class
#


[docs]
class PheSEIRInfer(object):
    """PheSEIRInfer Class:
    Controller class for the optimisation or inference of parameters of the
    PHE model in a PINTS framework.

    Parameters
    ----------
    model : PheSEIRModel
        The model for which we solve the optimisation or inference problem.

    """
    def __init__(self, model):
        super(PheSEIRInfer, self).__init__()

        # Assign model for inference or optimisation
        if not isinstance(model, em.PheSEIRModel):
            raise TypeError('Wrong model type for parameters inference.')

        self._model = model


[docs]
    def read_model_data(
            self, susceptibles_data, infectives_data):
        """
        Sets the initial data used for the model's parameters optimisation or
        inference.

        Parameters
        ----------
        susceptibles_data : list
            List of regional age-structured lists of the initial number of
            susceptibles.
        infectives_data : list
            List of regional age-structured lists of the initial number of
            infectives in the first infectives compartment.

        """
        self._susceptibles_data = susceptibles_data
        self._infectives_data = infectives_data



[docs]
    def read_serology_data(
            self, tests_data, positives_data, serology_times, sens, spec):
        """
        Sets the serology data used for the model's parameters inference.

        Parameters
        ----------
        tests_data: list of numpy.array
            List of region-specific age-structured number of tests conducted
            as a matrix. Each column represents an age group.
        positives_data : list of numpy.array
            List of region-specific age-structured number of positive test
            results as a matrix. Each column represents an age group.
        serology_times : numpy.array
            Matrix of timepoints for which serology data is available.
        sens : float or int
            Sensitivity of the test (or ratio of true positives).
        spec : float or int
            Specificity of the test (or ratio of true negatives).

        """
        self._total_tests = tests_data
        self._positive_tests = positives_data
        self._serology_times = serology_times
        self._sens = sens
        self._spec = spec



[docs]
    def read_deaths_data(
            self, deaths_data, deaths_times, time_to_death, fatality_ratio):
        """
        Sets the serology data used for the model's parameters inference.

        Parameters
        ----------
        deaths_data : numpy.array
            List of region-specific age-structured number of deaths as a
            matrix. Each column represents an age group.
        deaths_times : numpy.array
            Matrix of timepoints for which deaths data is available.
        time_to_death : list
            List of probabilities of death of individual d days after
            infection.
        fatality_ratio : list
            List of age-specific fatality ratios.

        """
        self._deaths = deaths_data
        self._deaths_times = deaths_times
        self._time_to_death = time_to_death
        self._fatality_ratio = fatality_ratio



[docs]
    def return_loglikelihood(self, times, x, wd=1, wp=1):
        """
        Return the log-likelihood used for the optimisation or inference.

        Parameters
        ----------
        times : list
            List of time points at which we have data for the log-likelihood
            computation.
        x : list
            List of free parameters used for computing the log-likelihood.
        wd : float or int
            Proportion of the contribution of the deaths data to the
            log-likelihood.
        wp : float or int
            Proportion of the contribution of the serology data to the
            log-likelihood.

        Returns
        -------
        float
            Value of the log-likelihood at the given point in the free
            parameter space.

        """
        loglikelihood = PHELogLik(
            self._model, self._susceptibles_data, self._infectives_data, times,
            self._deaths, self._time_to_death, self._deaths_times,
            self._fatality_ratio,
            self._total_tests, self._positive_tests, self._serology_times,
            self._sens, self._spec, wd, wp)
        return loglikelihood(x)


    def _create_posterior(self, times, wd, wp):
        """
        Runs the initial conditions optimisation routine for the PHE model.

        Parameters
        ----------
        times : list
            List of time points at which we have data for the log-likelihood
            computation.
        wd : float or int
            Proportion of the contribution of the deaths data to the
            log-likelihood.
        wp : float or int
            Proportion of the contribution of the serology data to the
            log-likelihood.

        """
        # Create a likelihood
        loglikelihood = PHELogLik(
            self._model, self._susceptibles_data, self._infectives_data, times,
            self._deaths, self._time_to_death, self._deaths_times,
            self._fatality_ratio,
            self._total_tests, self._positive_tests, self._serology_times,
            self._sens, self._spec, wd, wp)

        # Create a prior
        log_prior = PHELogPrior(self._model, times)

        # Create a posterior log-likelihood (log(likelihood * prior))
        self._log_posterior = pints.LogPosterior(loglikelihood, log_prior)


[docs]
    def inference_problem_setup(self, times, num_iter, wd=1, wp=1):
        """
        Runs the parameter inference routine for the PHE model.

        Parameters
        ----------
        times : list
            List of time points at which we have data for the log-likelihood
            computation.
        num_iter : integer
            Number of iterations the MCMC sampler algorithm is run for.
        wd : float or int
            Proportion of the contribution of the deaths data to the
            log-likelihood.
        wp : float or int
            Proportion of the contribution of the serology data to the
            log-likelihood.

        Returns
        -------
        numpy.array
            3D-matrix of the proposed parameters for each iteration for
            each of the chains of the MCMC sampler.

        """
        # Starting points using optimisation object
        x0 = [self.optimisation_problem_setup(times, wd, wp)[0].tolist()]*3

        # Create MCMC routine
        mcmc = pints.MCMCController(
            self._log_posterior, 3, x0)
        mcmc.set_max_iterations(num_iter)
        mcmc.set_log_to_screen(True)
        mcmc.set_parallel(True)

        print('Running...')
        chains = mcmc.run()
        print('Done!')

        param_names = ['initial_r']
        for region in self._model.regions:
            param_names.extend(['beta_W{}_{}'.format(
                i+1, region) for i in range(
                    len(np.arange(44, len(times), 7)))])
        param_names.extend(['sigma_b'])

        # Check convergence and other properties of chains
        results = pints.MCMCSummary(
            chains=chains, time=mcmc.time(),
            parameter_names=param_names)
        print(results)

        return chains



[docs]
    def optimisation_problem_setup(self, times, wd=1, wp=1):
        """
        Runs the initial conditions optimisation routine for the PHE model.

        Parameters
        ----------
        times : list
            List of time points at which we have data for the log-likelihood
            computation.
        wd : float or int
            Proportion of the contribution of the deaths data to the
            log-likelihood.
        wp : float or int
            Proportion of the contribution of the serology data to the
            log-likelihood.

        Returns
        -------
        numpy.array
            Matrix of the optimised parameters at the end of the optimisation
            procedure.
        float
            Value of the log-posterior at the optimised point in the free
            parameter space.

        """
        self._create_posterior(times, wd, wp)

        # Starting points
        x0 = [3]
        x0.extend([1]*(len(np.arange(44, len(times), 7)) * len(
            self._model.regions)))
        x0.extend([0.1])

        # Create optimisation routine
        optimiser = pints.OptimisationController(
            self._log_posterior, x0, method=pints.CMAES)

        optimiser.set_max_unchanged_iterations(100, 1)

        found_ics, found_posterior_val = optimiser.run()
        print(found_ics, found_posterior_val)

        print("Optimisation phase is finished.")

        return found_ics, found_posterior_val
Source code for epimodels.inference.phe_inference

epimodels

Navigation

Related Topics