The source code for this document is here.

This material is provided under the Creative Commons Attribution-ShareAlike 4.0 International Public License.
CC-BY_SA

Produced with R version 4.1.0.

Preparations

library(grid)
library(plyr)
library(reshape2)
library(magrittr)
library(foreach)
library(iterators)
library(bbmle)
library(nloptr)
library(pomp)
stopifnot(packageVersion("pomp")>="3.5")
library(ggplot2)
library(scales)
library(sp)
library(ncf)

options(
  keep.source=TRUE,
  stringsAsFactors=FALSE,
  knitr.kable.NA="",
  readr.show_types=FALSE,
  dplyr.summarise.inform=FALSE,
  encoding="UTF-8",
  aakmisc.dbname="ewmeasles",
  aakmisc.remotehost="kinglab.eeb.lsa.umich.edu",
  aakmisc.user="gravity"
)
theme_set(theme_bw())
set.seed(594709947L)

The data are measles case reports, population sizes, and weekly numbers of live births, from 954 population centers (cities, towns, villages) in England and Wales. The data are the same as those analyzed in [1]. The github repo for that paper is the definitive source.

Data preprocessing.

stew(file="clean_data.rda",{
  library(aakmisc)
  
  startTunnel()
  
  getQuery("select town,year,births,pop from demog where year>=1944 order by town,year") -> demog
  
  getQuery("select town,date,cases from measles where year>=1944 order by town,date") %>%
    mutate(year=as.integer(format(date+3,"%Y"))) %>%
    ddply(~town+year,mutate,week=seq_along(year),biweek=(week+1)%/%2) %>%
    subset(week<=52,select=-c(date,week)) %>%
    acast(town~year~biweek,value.var="cases",fun.aggregate=sum) %>%
    melt(varnames=c("town","year","biweek"),value.name="cases") %>%
    mutate(town=as.character(town)) %>%
    arrange(town,year,biweek) %>%
    join(demog,by=c("town","year")) %>%
    mutate(births=births/26) -> dat
  
  stopTunnel()
})

In the above:

Measles data is weekly; dates given correspond to Sunday.
We associate each week’s data with the Weds. of that week.
We discard any 53rd weeks (1947,1952,1958,1964).
We aggregate by biweek (26 biweeks/yr).
We join the epidemiological and demographic data.
We scale birth rates to births/biweek.

Fadeouts

dat %>%
  ddply(~town,summarize,
        efrac=(sum(cases==0)+0.5)/(length(cases)+0.5),
        N=mean(pop)) -> fadeouts

lm(qlogis(p=efrac)~log(N),data=fadeouts,subset=efrac>0) -> fit
predict(fit,se.fit=TRUE) -> pfit

fadeouts %>%
  mutate(
    pred=plogis(q=pfit$fit),
    hi=plogis(q=pfit$fit+2*pfit$se.fit),
    lo=plogis(q=fit$fit-2*pfit$se.fit),
    residual=residuals(fit)
  ) -> fadeouts

fadeouts %>%
  ggplot(aes(x=N,y=efrac))+
  geom_point()+
  geom_line(aes(y=pred))+
  geom_ribbon(aes(ymin=lo,ymax=hi),color="grey50",alpha=0.3)+
  scale_x_log10()+
  labs(y="proportion of zeros")

fadeouts %>%
  ggplot(aes(x=N,y=efrac))+
  geom_point()+
  geom_line(aes(y=pred))+
  geom_ribbon(aes(ymin=lo,ymax=hi),color="grey50",alpha=0.3)+
  scale_y_continuous(trans=logit_trans())+
  scale_x_log10()+
  labs(y="proportion of zeros")

fadeouts %>%
  ggplot(aes(x=N,y=residual))+
  geom_point()+
  scale_x_log10()

fadeouts %>%
  ggplot(aes(x=pred,y=residual))+
  geom_point()+
  labs(x="fitted")

fadeouts %>%
  ggplot(aes(x=pred,y=abs(residual)))+
  geom_point()+
  labs(x="fitted",y=expression(group("|",residual,"|")))

plot(fit,which=1:5,ask=FALSE)

Coordinates and distances

Now let’s compute the distances between the cities.

bake(file="coords.rds",{
  library(aakmisc)
  startTunnel()
  getQuery("select * from coords") %>% arrange(town) -> coords
  stopTunnel()
  coords
}) -> coords

bake(file="distances.rds",{
  library(aakmisc)
  library(geosphere)
  
  distm(
    coords %>% subset(select=c(long,lat)),
    coords %>% subset(select=c(long,lat))) %>%
    matrix(nrow=nrow(coords),ncol=nrow(coords),
           dimnames=list(coords$town,coords$town))
}) -> distances

Apply TSIR model to the individual towns.

Susceptible reconstruction.

Smoothing spline regression of cumulative cases on cumulative births to estimate under-reporting and residuals. Correct for under-reporting: I = cases/ur.

Regress cumulative cases on cumulative births to estimate under-reporting (ur) and susceptible depletion (z) I is estimated as cases/ur.

bake(file="under-reporting.rds",{
  foreach (d=dlply(dat,~town),
    .combine=rbind,.inorder=FALSE,
    .packages=c("plyr")
  ) %dopar% {
    cumbirths <- cumsum(d$births)
    cumcases <- cumsum(d$cases)
    fit <- smooth.spline(cumbirths,cumcases,df=2.5)
    mutate(d,
      ur=predict(fit,x=cumbirths,deriv=1)$y,
      I=cases/ur)
  } -> dat
}) -> dat

Now reconstruct the susceptibles (via the residuals z).

bake(file="susc-reconst.rds",{
  foreach (d=dlply(dat,~town),
    .combine=rbind,.inorder=FALSE,
    .packages=c("plyr")) %dopar% {
      cumbirths <- cumsum(d$births)
      cuminc <- cumsum(d$I)
      fit <- smooth.spline(cumbirths,cuminc,df=2.5)
      mutate(d,z=-residuals(fit))
    } -> dat
}) -> dat

Does the preceding scale the z variable appropriately? I.e., does it take under-reporting into account?

Fit TSIR

First, set up the data matrix for the regression. This involves lagging the I and z variables. Population size is assumed constant at its median value.

bake(file="data-matrix.rds",{
  dat %>%
    ddply(~town,mutate,
      Ilag=c(NA,head(I,-1)),
      zlag=c(NA,head(z,-1)),
      Ps=median(pop),
      seas=factor(biweek,levels=1:26)) %>%
    ddply(~town,tail,-1) -> dat
}) -> dat

We’ll start by estimating the mean susceptible fraction, assuming a single global value for this parameter. Some towns have very high changes in susceptible fraction; exclude these.

bake(file="exclusions.rds",{
  dat %>%
    ddply(~town,summarize,
          maxfrac=max(-z/Ps)) %>%
    subset(maxfrac>0.03) %>%
    extract2("town") -> excludes
}) -> excludes

print(length(excludes))

## [1] 233

Now profile on the fraction, \(\sigma\), of susceptibles in the population. This assumes a single, global value of \(\sigma\). NOTE: the \(\beta\) are here scaled by population size.

sigma1dev <- function (dat, sigma) {
  slag <- with(dat,sigma*Ps+zlag)
  fit <- glm(log(I)~-1+seas+log(Ilag)+I(1/Ilag)+offset(log(slag)),
             data=dat,subset=Ilag>0&I>0)
  fit$deviance
}

bake(file="sigma-profile.rds",{
  dat %>% subset(!(town%in%excludes)) -> dat1
  foreach (
    sigma=seq(0.03,0.25,length=100),
    .combine=rbind,.inorder=FALSE,
    .packages=c("plyr","magrittr")) %dopar% {
      dat1 %>%
        daply(~town,sigma1dev,sigma=sigma,.parallel=TRUE) %>%
        sum() -> dev
      data.frame(sigma=sigma,dev=dev)
    } -> sigmaProf
}) -> sigmaProf

bake(file="sigma-bar.rds",{
  dat %>% subset(!(town%in%excludes)) -> dat1
  fit <- optim(par=0.037,lower=0.03,upper=0.10,
               method="Brent",hessian=TRUE,
               fn=function (sigma) {
                 dat1 %>%
                   daply(~town,sigma1dev,sigma=sigma,.parallel=TRUE) %>%
                   sum()
               })
  fit$par -> sigmaBar
}) -> sigmaBar

dat %>% mutate(Slag=sigmaBar*Ps+zlag) -> dat

sigmaProf %>% ggplot(aes(x=sigma,y=dev))+geom_line()

Our global sigma estimate is \(\sigma=0.0512\). Now, we assume this value of \(\sigma\) and fit TSIR to each of the towns individually using linear regression.

bake(file="tsir-fits.rds",{
  coefnames <- c(sprintf("seas%d",1:26),"log(Ilag)","I(1/Ilag)")
  newcoefnames <- c(sprintf("log.beta%02d",1:26),"alpha","m.alpha")
  
  tsirfit <- function (dat) {
    glm(log(I)~-1+seas+log(Ilag)+I(1/Ilag)+offset(log(Slag)),
      data=dat,subset=Ilag>0&I>0) %>% summary() %>%
      extract2("coefficients") -> fit
    fit[,"Estimate"] %>% extract(coefnames) %>% as.list() %>% as.data.frame() %>%
      set_names(newcoefnames) -> coefs
    fit[,"Std. Error"] %>% extract(coefnames) %>% as.list() %>% as.data.frame() %>%
      set_names(paste(newcoefnames,"se",sep=".")) -> se
    cbind(coefs,se,sigma=sigmaBar,
      town=unique(dat$town),Ps=unique(dat$Ps))
  }
  
  dat %>% ddply(~town,tsirfit,.parallel=TRUE) %>%
    melt(id=c("town","Ps")) %>%
    mutate(se=ifelse(grepl("\\.se$",variable),"se","est"),
      variable=sub("\\.se$","",variable)) %>%
    dcast(town+Ps+variable~se) -> tsirs
  
  dat %>% ddply(~town,summarize,Ps=unique(Ps)) -> tsircoef
  
  tsirs %>%
    na.omit() %>%
    ddply(~variable, function (d) {
      fit <- lm(est~log(Ps),data=d,weights=1/se^2)
      data.frame(town=tsircoef$town,value=predict(fit,newdata=tsircoef))
    },.parallel=TRUE) %>%
    dcast(town~variable) %>%
    melt(id=c("town","alpha","m.alpha"),value.name="log.beta") %>%
    mutate(biweek=as.integer(sub("log.beta","",as.character(variable)))) %>%
    arrange(town,biweek) %>%
    subset(select=-variable) -> tsircoef
  
  dat %>%
    join(tsircoef,by=c("town","biweek")) %>%
    mutate(ylag=Ilag/Ps) -> dat
}) -> dat

Just for interest, let’s plot \(R_0\) as a function of city size.

dat %>% 
  ddply(~town,summarize,N=mean(Ps),
        beta=exp(mean(log.beta)),R0=beta*N) %>%
  ggplot(aes(x=N,y=R0))+geom_point()+
  scale_x_log10(breaks=10^seq(2,7),
                labels=trans_format('log10',math_format(10^.x)))+
  scale_y_log10(breaks=seq(1,30))+
  labs(x="Population size",y=expression(R[0]))+
  theme_classic()

Shat is fitted to each individually.
Sbar is fitted globally (with some towns excluded).

Coupling the cities.

Let \(X_{i}(t)\) be the observation at time \(t\) in city \(i\). We assume that \[X_{i}(t+1) \sim \mathrm{Poisson}\left(\lambda_i(t)\right)\] or, alternatively, \[X_{i}(t+1) \sim \mathrm{NegBinom}\left(\lambda_i(t),\frac{1}{\psi}\right),\] where \(\psi\) is an overdispersion parameter. In the latter, we have parameterized the negative binomial distribution so that \(\mathbb{E}\left[{X_i(t+1)}\right]=\lambda_i(t)\) and \(\mathrm{Var}\left[{X_i(t+1)}\right]=\lambda_i(t)+\psi\,\lambda_i(t)^2\).

When \(I_i(t)=0\), we have that \[\lambda_i(t) = \beta_i(t)\,S_i(t)\,\iota_i(t)^\alpha.\] In the above, \(\beta\) is constructed by fitting the TSIR model to each city independently and the susceptible pool, \(S\), is reconstructed using TSIR methods. The quantity \(\iota\) is the import rate, which we estimate using a variety of different models.

The following computes \(y\), \(S\), \(\beta\), and the matrix of reciprocal distances. It also picks out the relevant observations. These are the ones for which the preceding week saw zero cases.

readRDS("tsir-fits.rds") -> dat
dat %>% acast(town~year+biweek,value.var="ylag") -> ylag
dat %>% acast(town~year+biweek,value.var="Slag") -> slag
dat %>% acast(town~year+biweek,value.var="log.beta") %>% exp() -> beta
dat %>% acast(town~year+biweek,value.var="cases") -> obs
dat %>% daply(~town,function(x)unique(x$Ps)) -> N

readRDS("distances.rds") -> distances
dd <- 1/distances
diag(dd) <- 0
dd <- dd[rownames(ylag),rownames(ylag)]

stopifnot(identical(rownames(ylag),rownames(slag)))
stopifnot(identical(rownames(ylag),rownames(beta)))
stopifnot(identical(rownames(ylag),rownames(obs)))
stopifnot(identical(rownames(ylag),names(N)))
stopifnot(identical(rownames(ylag),rownames(dd)))
stopifnot(identical(rownames(ylag),colnames(dd)))

ddscal <- exp(mean(log(dd[lower.tri(dd)])))
dd <- dd/ddscal

alpha <- mean(dat$alpha)

relevant <- which(ylag==0&slag>=0)
obs <- obs[relevant]
betaS <- beta[relevant]*slag[relevant]

Gravity model

The gravity model (with intercept) is \[\iota_i=N_i^{\tau_1} \left(\theta\,\sum_{j\ne i}\! N_j^{\tau_2} d_{ij}^{-\rho}\,\frac{I_j}{N_j}+\phi\right).\] Let \[Q_{ij}=\begin{cases}N_i^{\tau_2}\,d_{ij}^{-\rho}, &i\ne j\\0, &i=j\end{cases}\] and \[y_{i}=\frac{I_i}{N_i}.\]

Expressed compactly, the gravity model is \[\iota = \theta\,\mathrm{diag}(N^{\tau_1})\,Q^T\,y+\phi\,N^{\tau_1}.\]

We use R’s element-recycling feature, which allows us to write \[\mathrm{diag}(A)\,B=A\;*\;B.\]

The negative log likelihood function for the gravity model is:

likfnGravity <- function (theta, phi, rho, psi, tau1, tau2) {
  theta <- exp(theta)
  phi <- exp(phi)
  rho <- exp(rho)
  Q <- (N^tau2)*(dd^rho)
  iota <- (N^tau1)*(theta*crossprod(Q,ylag)+phi)
  lambda <- betaS*iota[relevant]^alpha
  -sum(dnbinom(x=obs,mu=lambda,size=exp(-psi),log=TRUE))
}

Now we’ll do a likelihood profile over \(\tau_1\) and \(\tau_2\).

bake(file="gravity.rds",{
  
  grd <- profile_design(
    tau1=seq(0.1, 1.4, length=25),
    tau2=seq(-1, 1, length=25),
    lower=c(psi=log(5),theta=log(1e-8),phi=log(1e-6),rho=log(0.9)),
    upper=c(psi=log(7),theta=log(1),phi=log(1e-2),rho=log(1.7)),
    nprof=10, type="sobol"
  )
  
  foreach(start=iter(grd,"row"),
    .errorhandling="pass",
    .inorder=FALSE,
    .packages=c("nloptr","magrittr")
  ) %dopar% try(
  {
      fixed <- c("tau1","tau2")
      formals(likfnGravity) %>% names() %>% setdiff(fixed) -> est
      nloptr(unlist(start[est]),
        function(x){
          do.call(likfnGravity,
            c(start[fixed],setNames(as.list(x),est)))
        },
        opts=list(
          algorithm="NLOPT_LN_SBPLX",
          ftol_abs=1000,
          maxeval=10000)
      ) -> fit
      fit$solution %>% setNames(est) %>%
        c(unlist(start[fixed]),loglik=-fit$objective) %>%
        as.list() %>% as.data.frame() %>%
        cbind(conv=fit$status) -> res
    }
  ) -> results
  
  results %>%
    extract(sapply(results,inherits,"try-error")) %>%
    sapply(as.character) %>%
    unique() %>%
    print()
  
  results %>%
    extract(!sapply(results,inherits,"try-error")) %>%
    ldply() %>%
    ddply(~tau1+tau2,subset,loglik==max(loglik,na.rm=TRUE)) -> grd
  
  foreach(start=iter(grd,"row"),
    .errorhandling="pass",
    .inorder=FALSE,
    .packages=c("nloptr","magrittr")
  ) %dopar% try(
    {
      fixed <- c("tau1","tau2")
      formals(likfnGravity) %>% names() %>% setdiff(fixed) -> est
      nloptr(unlist(start[est]),
        function(x){
          do.call(likfnGravity,
            c(start[fixed],setNames(as.list(x),est)))
        },
        opts=list(
          algorithm="NLOPT_LN_SBPLX",
          ftol_abs=1e-5,
          xtol_rel=1e-7,
          maxeval=10000)
      ) -> fit
      fit$solution %>% setNames(est) %>%
        c(unlist(start[fixed]),loglik=-fit$objective) %>%
        as.list() %>% as.data.frame() %>%
        cbind(conv=fit$status) -> res
    }
  ) -> results
  
  
  attr(results,"nproc") <- getDoParWorkers()
  results
}) -> results

The above was completed in 69.5 min on a 250-core cluster. Now we refine to obtain the global MLE.

bake("gravity-mle.rds",{
  
  results %>%
    extract(!sapply(results,inherits,"try-error")) %>%
    ldply() %>%
    subset(loglik==max(loglik),select=-c(loglik,conv)) -> start
  
  mle2(likfnGravity,
    method="Nelder-Mead",
    start=as.list(start),
    control=list(trace=0,maxit=10000),
    skip.hessian=TRUE) -> fit
  
  c(coef(fit),loglik=as.numeric(logLik(fit)),
    conv=fit@details$convergence) %>%
    as.list() %>%
    as.data.frame() -> mle
}) -> mle.grav

results %>%
  extract(sapply(results,inherits,"try-error")) %>%
  sapply(as.character) %>%
  unique()

## list()

results %<>%
  extract(!sapply(results,inherits,"try-error")) %>%
  ldply() %>%
  mutate(rho=exp(rho),theta=exp(theta),psi=exp(psi),phi=exp(phi))

results %>% count(~conv)

results %>%
  ggplot(aes(x=tau1,y=tau2,z=loglik,
    fill=ifelse(loglik>max(loglik)-2000,loglik,NA)))+
  geom_tile(color=NA)+geom_contour(bins=100,color='white')+
  geom_point(color='red',shape="x",size=3,data=mle.grav)+
  geom_hline(color='red',size=0.2,linetype=2,data=mle.grav,aes(yintercept=tau2))+
  geom_vline(color='red',size=0.2,linetype=2,data=mle.grav,aes(xintercept=tau1))+
  labs(x=expression(tau[1]),y=expression(tau[2]),fill=expression(log(L)))

Regarding NLOPT return values:

Successful termination (positive return values)

NLOPT_SUCCESS = 1 Generic success return value.
NLOPT_STOPVAL_REACHED = 2 Optimization stopped because stopval (above) was reached.
NLOPT_FTOL_REACHED = 3 Optimization stopped because ftol_rel or ftol_abs (above) was reached.
NLOPT_XTOL_REACHED = 4 Optimization stopped because xtol_rel or xtol_abs (above) was reached.
NLOPT_MAXEVAL_REACHED = 5 Optimization stopped because maxeval (above) was reached.
NLOPT_MAXTIME_REACHED = 6 Optimization stopped because maxtime (above) was reached.

Error codes (negative return values)

NLOPT_FAILURE = -1 Generic failure code.
NLOPT_INVALID_ARGS = -2 Invalid arguments (e.g. lower bounds are bigger than upper bounds, an unknown algorithm was specified, etcetera).
NLOPT_OUT_OF_MEMORY = -3 Ran out of memory.
NLOPT_ROUNDOFF_LIMITED = -4 Halted because roundoff errors limited progress. (In this case, the optimization still typically returns a useful result.)
NLOPT_FORCED_STOP = -5 Halted because of a forced termination: the user called nlopt_force_stop(opt) on the optimization’s nlopt_opt object opt from the user’s objective function or constraints.

pairs(~loglik+theta+phi+rho+psi+tau1+tau2,
  data=results,
  subset=loglik>max(loglik)-10000,cex=0.5)

We look for evidence of overdispersion by computing the scale parameter for the negative binomial model.

Xia model

The model of [2] is \[\iota_i=N_i^{\tau_1}\,\left(\theta\,\sum_{j\ne i}\! I_j^{\tau_2} d_{ij}^{-\rho} + \phi\right).\] Let \[Q_{ij}=\begin{cases}N_i^{\tau_2}\,d_{ij}^{-\rho}, &i\ne j\\0, &i=j\end{cases}\] and \[y_{i}=\frac{I_i}{N_i}.\]

Expressed compactly, the [2] model is \[\iota = \theta\,\mathrm{diag}(N^{\tau_1})\,Q^T\,y^{\tau_2}+\phi\,N^{\tau_1}.\]

The negative log likelihood function for the [2] model is:

likfnXia <- function (theta, phi, rho, psi, tau1, tau2) {
  theta <- exp(theta)
  phi <- exp(phi)
  rho <- exp(rho)
  Q <- (N^tau2)*(dd^rho)
  iota <- (N^tau1)*(theta*crossprod(Q,ylag^tau2)+phi)
  lambda <- betaS*iota[relevant]^alpha
  -sum(dnbinom(x=obs,mu=lambda,size=exp(-psi),log=TRUE))
}

Again, a profile computation.

bake(file="xia.rds",{
  
  grd <- expand.grid(
    tau1=seq(0.1, 1.5, length=25),
    tau2=seq(0.1, 1.0, length=25),
    psi=log(5),
    rho=log(1.0),
    theta=log(1e-6),
    phi=log(1e-6)
  )
  
  foreach(start=iter(grd,"row"),
    .errorhandling="pass",
    .inorder=FALSE,
    .packages="bbmle"
  ) %dopar% try(
    {
      mle2(likfnXia,
        method="Nelder-Mead",
        start=as.list(start[c("rho","theta","psi","phi")]),
        fixed=as.list(start[c("tau1","tau2")]),
        control=list(trace=0,maxit=10000),
        skip.hessian=TRUE) -> fit
      c(coef(fit),loglik=as.numeric(logLik(fit)),
        conv=fit@details$convergence)
    }
  ) -> results
  
  attr(results,"nproc") <- getDoParWorkers()
  
  results
}) -> results

The above was completed in 11.8 min on a 250-core cluster.

bake("xia-mle.rds",{
  results %>%
    extract(!sapply(results,inherits,"try-error")) %>%
    ldply() %>%
    subset(loglik==max(loglik),select=-c(loglik,conv)) -> start
  
  mle2(likfnXia,
    method="Nelder-Mead",
    start=as.list(start),
    control=list(trace=0,maxit=10000),
    skip.hessian=TRUE) -> fit
  
  c(coef(fit),loglik=as.numeric(logLik(fit)),
    conv=fit@details$convergence) %>%
    as.list() %>%
    as.data.frame() -> mle
}) -> mle.xia

results %>%
  extract(sapply(results,inherits,"try-error")) %>%
  sapply(as.character) %>%
  unique()

## list()

results %<>%
  extract(!sapply(results,inherits,"try-error")) %>%
  ldply() %>%
  mutate(rho=exp(rho),theta=exp(theta),psi=exp(psi),phi=exp(phi))

results %>% count(~conv)

results %>%
  ggplot(aes(x=tau1,y=tau2,z=loglik,
    fill=ifelse(loglik>max(loglik)-2000,loglik,NA)))+
  geom_tile(color=NA)+geom_contour(bins=100,color='white')+
  geom_point(color='red',shape="x",size=3,data=mle.xia)+
  geom_hline(color='red',size=0.2,linetype=2,data=mle.xia,aes(yintercept=tau2))+
  geom_vline(color='red',size=0.2,linetype=2,data=mle.xia,aes(xintercept=tau1))+
  labs(x=expression(tau[1]),y=expression(tau[2]),fill=expression(log(L)))

pairs(~loglik+theta+phi+rho+psi+tau1+tau2,
  data=results,
  subset=loglik>max(loglik)-10000,cex=0.5)

Mean field model

By setting \(\rho = 0\) and \(\tau_1=\tau_2=1\) in the gravity model, we obtain mean-field model.

The negative log likelihood function for the mean-field model is:

likfnMeanField <- function (theta, phi, psi) {
  theta <- exp(theta)
  phi <- exp(phi)
  Q <- N*dd
  iota <- N*(theta*crossprod(Q,ylag)+phi)
  lambda <- betaS*iota[relevant]^alpha
  -sum(dnbinom(x=obs,mu=lambda,size=exp(-psi),log=TRUE))
}

Now we find the MLE.

bake(file="meanfield.rds",{

  grd <- sobol_design(
    lower=c(psi=log(5),theta=log(1e-8),phi=log(1e-6)),
    upper=c(psi=log(7),theta=log(1),phi=log(1e-2)),
    nseq=250
  )
  
  foreach(start=iter(grd,"row"),
    .errorhandling="pass",
    .inorder=FALSE,
    .packages=c("nloptr","magrittr")
  ) %dopar% try(
    {
      fixed <- c()
      formals(likfnMeanField) %>% names() %>% setdiff(fixed) -> est
      nloptr(unlist(start[est]),
        function(x){
          do.call(likfnMeanField,
            c(start[fixed],setNames(as.list(x),est)))
        },
        opts=list(
          algorithm="NLOPT_LN_SBPLX",
          ftol_abs=1000,
          maxeval=10000)
      ) -> fit
      fit$solution %>% setNames(est) %>%
        c(unlist(start[fixed]),loglik=-fit$objective) %>%
        as.list() %>% as.data.frame() %>%
        cbind(conv=fit$status) -> res
    }
  ) -> results
  
  results %>%
    extract(sapply(results,inherits,"try-error")) %>%
    sapply(as.character) %>%
    unique() %>%
    print()
  
  results %>%
    extract(!sapply(results,inherits,"try-error")) %>%
    ldply() %>%
    subset(loglik==max(loglik,na.rm=TRUE)) -> grd
  
  foreach(start=iter(grd,"row"),
    .errorhandling="pass",
    .inorder=FALSE,
    .packages=c("nloptr","magrittr")
  ) %dopar% try(
    {
      fixed <- c()
      formals(likfnMeanField) %>% names() %>% setdiff(fixed) -> est
      nloptr(unlist(start[est]),
        function(x){
          do.call(likfnMeanField,
            c(start[fixed],setNames(as.list(x),est)))
        },
        opts=list(
          algorithm="NLOPT_LN_SBPLX",
          ftol_abs=1e-5,
          xtol_rel=1e-7,
          maxeval=10000)
      ) -> fit
      fit$solution %>% setNames(est) %>%
        c(unlist(start[fixed]),loglik=-fit$objective) %>%
        as.list() %>% as.data.frame() %>%
        cbind(conv=fit$status) -> res
    }
  ) -> results
  
  attr(results,"nproc") <- getDoParWorkers()
  results
  
}) -> results

pairs(~loglik+theta+phi+psi,
  data=results %>%
    extract(!sapply(results,inherits,"try-error")) %>%
    ldply(),
  subset=loglik>max(loglik)-10000,cex=0.5)

The above was completed in 4 min on a 250-core cluster. Now we refine to obtain the global MLE.

bake("meanfield-mle.rds",{
  
  results %>%
    extract(!sapply(results,inherits,"try-error")) %>%
    ldply() %>%
    subset(loglik==max(loglik),select=-c(loglik,conv)) -> start
  
  mle2(likfnMeanField,
    method="Nelder-Mead",
    start=as.list(start),
    control=list(trace=0,maxit=10000),
    skip.hessian=TRUE) -> fit
  
  c(coef(fit),loglik=as.numeric(logLik(fit)),
    conv=fit@details$convergence) %>%
    as.list() %>%
    as.data.frame() -> mle
}) -> mle.meanfield

results %>%
  extract(sapply(results,inherits,"try-error")) %>%
  sapply(as.character) %>%
  unique()

## list()

results %<>%
  extract(!sapply(results,inherits,"try-error")) %>%
  ldply() %>%
  mutate(theta=exp(theta),psi=exp(psi),phi=exp(phi))

results %>% count(~conv)

Diffusion model

By setting \(\tau_1 = \tau_2 = 0\) in the gravity model, we obtain a diffusion model, which couples cities in a way that depends only on the distance between them.

The negative log likelihood function for the diffusion model is:

likfnDiffusion <- function (theta, phi, rho, psi) {
  theta <- exp(theta)
  phi <- exp(phi)
  rho <- exp(rho)
  Q <- dd^rho
  iota <- (theta*crossprod(Q,ylag)+phi)
  lambda <- betaS*iota[relevant]^alpha
  -sum(dnbinom(x=obs,mu=lambda,size=exp(-psi),log=TRUE))
}

Now we’ll do a likelihood profile over \(\tau_1\) and \(\tau_2\).

bake(file="diffusion.rds",{

  grd <- profile_design(
    rho = seq(log(1),log(3),length=50),
    lower=c(psi=log(5),theta=log(1e-8),phi=log(1e-6)),
    upper=c(psi=log(7),theta=log(1),phi=log(1e-2)),
    nprof=10, type="sobol"
  )
  
  foreach(start=iter(grd,"row"),
    .errorhandling="pass",
    .inorder=FALSE,
    .packages=c("nloptr","magrittr")
  ) %dopar% try(
    {
      fixed <- c("rho")
      formals(likfnDiffusion) %>% names() %>% setdiff(fixed) -> est
      nloptr(unlist(start[est]),
        function(x){
          do.call(likfnDiffusion,
            c(start[fixed],setNames(as.list(x),est)))
        },
        opts=list(
          algorithm="NLOPT_LN_SBPLX",
          ftol_abs=1000,
          maxeval=10000)
      ) -> fit
      fit$solution %>% setNames(est) %>%
        c(unlist(start[fixed]),loglik=-fit$objective) %>%
        as.list() %>% as.data.frame() %>%
        cbind(conv=fit$status) -> res
    }
  ) -> results
  
  results %>%
    extract(sapply(results,inherits,"try-error")) %>%
    sapply(as.character) %>%
    unique() %>%
    print()
  
  results %>%
    extract(!sapply(results,inherits,"try-error")) %>%
    ldply() %>%
    ddply(~rho,subset,loglik==max(loglik,na.rm=TRUE)) -> grd
  
  foreach(start=iter(grd,"row"),
    .errorhandling="pass",
    .inorder=FALSE,
    .packages=c("nloptr","magrittr")
  ) %dopar% try(
    {
      fixed <- c("rho")
      formals(likfnDiffusion) %>% names() %>% setdiff(fixed) -> est
      nloptr(unlist(start[est]),
        function(x){
          do.call(likfnDiffusion,
            c(start[fixed],setNames(as.list(x),est)))
        },
        opts=list(
          algorithm="NLOPT_LN_SBPLX",
          ftol_abs=1e-5,
          xtol_rel=1e-7,
          maxeval=10000)
      ) -> fit
      fit$solution %>% setNames(est) %>%
        c(unlist(start[fixed]),loglik=-fit$objective) %>%
        as.list() %>% as.data.frame() %>%
        cbind(conv=fit$status) -> res
    }
  ) -> results
  
  attr(results,"nproc") <- getDoParWorkers()
  results
  
}) -> results

The above was completed in 6.4 min on a 250-core cluster. Now we refine to obtain the global MLE.

bake("diffusion-mle.rds",{
  
  results %>%
    extract(!sapply(results,inherits,"try-error")) %>%
    ldply() %>%
    subset(loglik==max(loglik),select=-c(loglik,conv)) -> start
  
  mle2(likfnDiffusion,
    method="Nelder-Mead",
    start=as.list(start),
    control=list(trace=0,maxit=10000),
    skip.hessian=TRUE) -> fit
  
  c(coef(fit),loglik=as.numeric(logLik(fit)),
    conv=fit@details$convergence) %>%
    as.list() %>%
    as.data.frame() -> mle
}) -> mle.diffusion

results %>%
  extract(sapply(results,inherits,"try-error")) %>%
  sapply(as.character) %>%
  unique()

## list()

results %<>%
  extract(!sapply(results,inherits,"try-error")) %>%
  ldply() %>%
  mutate(theta=exp(theta),psi=exp(psi),phi=exp(phi),rho=exp(rho))

results %>% count(~conv)

results %>%
  ggplot(aes(x=rho,y=loglik))+
  geom_point()+
  labs(x=expression(rho),y=expression(log(L)))

pairs(~loglik+theta+phi+psi+rho,
  data=results,
  subset=loglik>max(loglik)-10000,cex=0.5)

Competing destinations model

The competing destinations model is \[\iota_i=N_i^{\tau_1}\,\left(\theta\,\sum_j\frac{N_j^{\tau_2}}{d_{ij}^\rho}\,\left(\sum_{k \ne i, j}\frac{N_k^{\tau_2}}{d_{jk}^\rho}\right)^\delta\,\frac{I_j}{N_j}+\phi\right).\]

Let \[Q_{ij}=\begin{cases}{N_i^{\tau_2}}{d_{ij}^{-\rho}}, &i\ne j\\0, &i=j\end{cases}\] and \[R_{ji}=\sum_{k \ne i, j}{N_k^{\tau_2}}{d_{jk}^{-\rho}}=\sum_{k\ne i,j}Q_{kj}=\sum_{k\ne i}Q_{kj}=\sum_{k}Q_{kj}-Q_{ij}.\] This implies \[R^T=(\mathbb{1}\,\mathbb{1}^T-I)\,Q \qquad \Longleftrightarrow \qquad R=Q^T\,(\mathbb{1}\,\mathbb{1}^T-I)\] and \[\iota_i=N_i^{\tau_1}\,\left(\theta\,\sum_j Q_{ji}\,R_{ji}^\delta\,y_{j}+\phi\right).\]

The negative log likelihood function for the competing destinations model is:

iii <- 1-diag(length(N))

likfnCompDest <- function (theta, phi, rho, psi, tau1, tau2, delta) {
  theta <- exp(theta)
  phi <- exp(phi)
  rho <- exp(rho)
  Q <- (N^tau2)*(dd^rho)
  R <- crossprod(Q,iii)^delta
  iota <- (N^tau1)*(theta*crossprod(Q*R,ylag)+phi)
  lambda <- betaS*iota[relevant]^alpha
  -sum(dnbinom(x=obs,mu=lambda,size=exp(-psi),log=TRUE))
}

We compute the profile likelihood as before.

bake(file="compdest.rds",{
  
  grd <- profile_design(
    tau1=seq(0.1, 1.4, length=25),
    tau2=seq(-1, 1, length=25),
    lower=c(psi=log(5),theta=log(0.1),phi=log(1e-6),rho=log(1.5),delta=-3),
    upper=c(psi=log(7),theta=log(45),phi=log(1e-2),rho=log(30),delta=0),
    nprof=20, type="sobol"
  )
  
  foreach(start=iter(grd,"row"),
    .errorhandling="pass",
    .inorder=FALSE,
    .packages=c("nloptr","magrittr")
  ) %dopar% try(
    {
      fixed <- c("tau1","tau2")
      formals(likfnCompDest) %>% names() %>% setdiff(fixed) -> est
      nloptr(unlist(start[est]),
        function(x){
          do.call(likfnCompDest,
            c(start[fixed],setNames(as.list(x),est)))
        },
        opts=list(
          algorithm="NLOPT_LN_SBPLX",
          ftol_abs=1000,
          maxeval=10000)
      ) -> fit
      fit$solution %>% setNames(est) %>%
        c(unlist(start[fixed]),loglik=-fit$objective) %>%
        as.list() %>% as.data.frame() %>%
        cbind(conv=fit$status) -> res
    }
  ) -> results
  
  results %>%
    extract(sapply(results,inherits,"try-error")) %>%
    sapply(as.character) %>%
    unique() %>%
    print()
  
  results %>%
    extract(!sapply(results,inherits,"try-error")) %>%
    ldply() %>%
    ddply(~tau1+tau2,subset,loglik==max(loglik,na.rm=TRUE)) -> grd
  
  foreach(start=iter(grd,"row"),
    .errorhandling="pass",
    .inorder=FALSE,
    .packages=c("nloptr","magrittr")
  ) %dopar% try(
    {
      fixed <- c("tau1","tau2")
      formals(likfnCompDest) %>% names() %>% setdiff(fixed) -> est
      nloptr(unlist(start[est]),
        function(x){
          do.call(likfnCompDest,
            c(start[fixed],setNames(as.list(x),est)))
        },
        opts=list(
          algorithm="NLOPT_LN_SBPLX",
          ftol_abs=1e-5,
          xtol_rel=1e-7,
          maxeval=10000)
      ) -> fit
      fit$solution %>% setNames(est) %>%
        c(unlist(start[fixed]),loglik=-fit$objective) %>%
        as.list() %>% as.data.frame() %>%
        cbind(conv=fit$status) -> res
    }
  ) -> results
  
  attr(results,"nproc") <- getDoParWorkers()
  
  results
}) -> results

The above was completed in 245.1 min on a 250-core cluster.

bake("compdest-mle.rds",{
  
  results %>%
    extract(!sapply(results,inherits,"try-error")) %>%
    ldply() %>%
    subset(loglik==max(loglik),select=-c(loglik,conv)) -> start
  
  mle2(likfnCompDest,
    method="Nelder-Mead",
    start=as.list(start),
    control=list(trace=0,maxit=10000),
    skip.hessian=TRUE) -> fit
  
  c(coef(fit),loglik=as.numeric(logLik(fit)),
    conv=fit@details$convergence) %>%
    as.list() %>%
    as.data.frame() -> mle
}) -> mle.compdest

results %>%
  extract(sapply(results,inherits,"try-error")) %>%
  sapply(as.character) %>%
  unique()

## list()

results %>%
  extract(!sapply(results,inherits,"try-error")) %>%
  ldply() %>%
  mutate(rho=exp(rho),theta=exp(theta),psi=exp(psi),phi=exp(phi)) -> results

results %>% count(~conv)

results %>%
  ggplot(aes(x=tau1,y=tau2,z=loglik,
    fill=ifelse(loglik>max(loglik)-2000,loglik,NA)))+
  geom_tile(color=NA)+geom_contour(bins=100,color='white')+
  geom_point(color='red',shape="x",size=3,data=mle.compdest)+
  geom_hline(color='red',size=0.2,linetype=2,data=mle.compdest,aes(yintercept=tau2))+
  geom_vline(color='red',size=0.2,linetype=2,data=mle.compdest,aes(xintercept=tau1))+
  labs(x=expression(tau[1]),y=expression(tau[2]),fill=expression(log(L)))

results %>%
  dplyr::filter(
           loglik>max(loglik)-10000,
           theta<1e6
         ) %>%
  {
    pairs(~loglik+theta+phi+rho+delta+psi+tau1+tau2,
      data=.,cex=0.5)
  }

One-D profiles

\(\delta\)

bake(file="compdest_delta.rds",{
  
  grd <- profile_design(
    delta=seq(-1.6,0,length=250),
    lower=c(tau1=0.1,tau2=-1,psi=log(5),theta=log(0.1),phi=log(1e-6),rho=log(1.5)),
    upper=c(tau1=1.4,tau2=1,psi=log(7),theta=log(45),phi=log(1e-2),rho=log(30)),
    nprof=50, type="sobol"
  )
  
  foreach(start=iter(grd,"row"),
    .errorhandling="pass",
    .inorder=FALSE,
    .packages=c("nloptr","magrittr")
  ) %dopar% try(
    {
      fixed <- c("delta")
      formals(likfnCompDest) %>% names() %>% setdiff(fixed) -> est
      nloptr(unlist(start[est]),
        function(x){
          do.call(likfnCompDest,
            c(start[fixed],setNames(as.list(x),est)))
        },
        opts=list(
          algorithm="NLOPT_LN_SBPLX",
          ftol_abs=1000,
          maxeval=10000)
      ) -> fit
      fit$solution %>% setNames(est) %>%
        c(unlist(start[fixed]),loglik=-fit$objective) %>%
        as.list() %>% as.data.frame() %>%
        cbind(conv=fit$status) -> res
    }
  ) -> res
  
  res %>%
    extract(sapply(res,inherits,"try-error")) %>%
    sapply(as.character) %>%
    unique() %>%
    print()
  
  res %>%
    extract(!sapply(res,inherits,"try-error")) %>%
    ldply() %>%
    ddply(~delta,subset,loglik==max(loglik,na.rm=TRUE)) -> grd
  
  foreach(start=iter(grd,"row"),
    .errorhandling="pass",
    .inorder=FALSE,
    .packages=c("nloptr","magrittr")
  ) %dopar% try(
    {
      fixed <- c("delta")
      formals(likfnCompDest) %>% names() %>% setdiff(fixed) -> est
      nloptr(unlist(start[est]),
        function(x){
          do.call(likfnCompDest,
            c(start[fixed],setNames(as.list(x),est)))
        },
        opts=list(
          algorithm="NLOPT_LN_SBPLX",
          ftol_abs=1e-5,
          xtol_rel=1e-7,
          maxeval=10000)
      ) -> fit
      fit$solution %>% setNames(est) %>%
        c(unlist(start[fixed]),loglik=-fit$objective) %>%
        as.list() %>% as.data.frame() %>%
        cbind(conv=fit$status) -> res
    }
  ) -> res1
  
  attr(res1,"nproc") <- getDoParWorkers()
  
  res1
}) -> res1

res1 %>%
  ldply() %>%
  ggplot(aes(x=delta,y=loglik))+
  geom_point()

The above was completed in 366.3 min on a 250-core cluster.

\(\tau_1\)

bake(file="compdest_tau1.rds",{
  
  grd <- profile_design(
    tau1=seq(0.1,1.4,length=250),
    lower=c(tau2=-1,psi=log(5),theta=log(0.1),phi=log(1e-6),rho=log(1.5),delta=-3),
    upper=c(tau2=1,psi=log(7),theta=log(45),phi=log(1e-2),rho=log(30),delta=0),
    nprof=50, type="sobol"
  )
  
  foreach(start=iter(grd,"row"),
    .errorhandling="pass",
    .inorder=FALSE,
    .packages=c("nloptr","magrittr")
  ) %dopar% try(
    {
      fixed <- c("tau1")
      formals(likfnCompDest) %>% names() %>% setdiff(fixed) -> est
      nloptr(unlist(start[est]),
        function(x){
          do.call(likfnCompDest,
            c(start[fixed],setNames(as.list(x),est)))
        },
        opts=list(
          algorithm="NLOPT_LN_SBPLX",
          ftol_abs=1000,
          maxeval=10000)
      ) -> fit
      fit$solution %>% setNames(est) %>%
        c(unlist(start[fixed]),loglik=-fit$objective) %>%
        as.list() %>% as.data.frame() %>%
        cbind(conv=fit$status) -> res
    }
  ) -> res
  
  res %>%
    extract(sapply(res,inherits,"try-error")) %>%
    sapply(as.character) %>%
    unique() %>%
    print()
  
  res %>%
    extract(!sapply(res,inherits,"try-error")) %>%
    ldply() %>%
    ddply(~tau1,subset,loglik==max(loglik,na.rm=TRUE)) -> grd
  
  foreach(start=iter(grd,"row"),
    .errorhandling="pass",
    .inorder=FALSE,
    .packages=c("nloptr","magrittr")
  ) %dopar% try(
    {
      fixed <- c("tau1")
      formals(likfnCompDest) %>% names() %>% setdiff(fixed) -> est
      nloptr(unlist(start[est]),
        function(x){
          do.call(likfnCompDest,
            c(start[fixed],setNames(as.list(x),est)))
        },
        opts=list(
          algorithm="NLOPT_LN_SBPLX",
          ftol_abs=1e-5,
          xtol_rel=1e-7,
          maxeval=10000)
      ) -> fit
      fit$solution %>% setNames(est) %>%
        c(unlist(start[fixed]),loglik=-fit$objective) %>%
        as.list() %>% as.data.frame() %>%
        cbind(conv=fit$status) -> res
    }
  ) -> res2
  
  attr(res2,"nproc") <- getDoParWorkers()
  
  res2
}) -> res2

res2 %>%
  ldply() %>%
  ggplot(aes(x=tau1,y=loglik))+
  geom_point()

The above was completed in 252.4 min on a 250-core cluster.

\(\tau_2\)

bake(file="compdest_tau2.rds",{
  
  grd <- profile_design(
    tau2=seq(-1,1,length=250),
    lower=c(tau1=0.1,psi=log(5),theta=log(0.1),phi=log(1e-6),rho=log(1.5),delta=-3),
    upper=c(tau1=1.4,psi=log(7),theta=log(45),phi=log(1e-2),rho=log(30),delta=0),
    nprof=50, type="sobol"
  )
  
  foreach(start=iter(grd,"row"),
    .errorhandling="pass",
    .inorder=FALSE,
    .packages=c("nloptr","magrittr")
  ) %dopar% try(
    {
      fixed <- c("tau2")
      formals(likfnCompDest) %>% names() %>% setdiff(fixed) -> est
      nloptr(unlist(start[est]),
        function(x){
          do.call(likfnCompDest,
            c(start[fixed],setNames(as.list(x),est)))
        },
        opts=list(
          algorithm="NLOPT_LN_SBPLX",
          ftol_abs=1000,
          maxeval=10000)
      ) -> fit
      fit$solution %>% setNames(est) %>%
        c(unlist(start[fixed]),loglik=-fit$objective) %>%
        as.list() %>% as.data.frame() %>%
        cbind(conv=fit$status) -> res
    }
  ) -> res
  
  res %>%
    extract(sapply(res,inherits,"try-error")) %>%
    sapply(as.character) %>%
    unique() %>%
    print()
  
  res %>%
    extract(!sapply(res,inherits,"try-error")) %>%
    ldply() %>%
    ddply(~tau2,subset,loglik==max(loglik,na.rm=TRUE)) -> grd
  
  foreach(start=iter(grd,"row"),
    .errorhandling="pass",
    .inorder=FALSE,
    .packages=c("nloptr","magrittr")
  ) %dopar% try(
    {
      fixed <- c("tau2")
      formals(likfnCompDest) %>% names() %>% setdiff(fixed) -> est
      nloptr(unlist(start[est]),
        function(x){
          do.call(likfnCompDest,
            c(start[fixed],setNames(as.list(x),est)))
        },
        opts=list(
          algorithm="NLOPT_LN_SBPLX",
          ftol_abs=1e-5,
          xtol_rel=1e-7,
          maxeval=10000)
      ) -> fit
      fit$solution %>% setNames(est) %>%
        c(unlist(start[fixed]),loglik=-fit$objective) %>%
        as.list() %>% as.data.frame() %>%
        cbind(conv=fit$status) -> res
    }
  ) -> res3
  
  attr(res3,"nproc") <- getDoParWorkers()
  
  res3
}) -> res3

res3 %>%
  ldply() %>%
  ggplot(aes(x=tau2,y=loglik))+
  geom_point()

The above was completed in 313.8 min on a 250-core cluster.

Stouffer model

Let \(i\) be the recipient town; \(j\), the donor. Let \(S(i,j)\) be the collection of towns closer to town \(i\) than \(j\) is. That is, \(S(i,j) = \{k: k\ne i \ \&\ d(i,k) \le d(i,j)\}\).

\[\iota_i = N_i^{\tau_1}\,\left(\theta\,\sum_j\!\left(\frac{N_j}{\sum_{k\in S(i,j)}{N_k}}\right)^{\tau_2}\,\frac{I_j}{N_j}+\phi\right).\]

bake(file="rankmat.rds",{
  rr <- array(dim=dim(distances),dimnames=dimnames(distances))
  for (i in seq_along(N)) {
    for (j in seq_along(N)) {
      rr[i,j] <- N[j]/(sum(N[distances[i,]<=distances[i,j]])-N[i])
    }
  }
  diag(rr) <- 0
  rr
}) -> rr

The negative log likelihood function for the [2] model is:

likfnStouffer <- function (theta, phi, tau1, tau2, psi) {
  theta <- exp(theta)
  phi <- exp(phi)
  iota <- (N^tau1)*(theta*((rr^tau2)%*%ylag)+phi)
  lambda <- betaS*iota[relevant]^alpha
  -sum(dnbinom(x=obs,mu=lambda,size=exp(-psi),log=TRUE))
}

We compute the profile likelihood as before.

bake(file="stouffer.rds",{
  
  grd <- expand.grid(
    tau1=seq(0.5, 1.2, length=25),
    tau2=seq(0.5, 2.0, length=25),
    theta=log(0.2),
    phi=log(0.0001),
    psi=log(5)
  )
  
  foreach(start=iter(grd,"row"),
    .errorhandling="pass",
    .inorder=FALSE,
    .packages="bbmle"
  ) %dopar% try(
    {
      mle2(likfnStouffer,
        method="Nelder-Mead",
        start=as.list(start[c("theta","phi","psi")]),
        fixed=as.list(start[c("tau1","tau2")]),
        control=list(trace=0,maxit=10000),
        skip.hessian=TRUE) -> fit
      c(coef(fit),loglik=as.numeric(logLik(fit)),conv=fit@details$convergence)
    }
  ) -> results
  
  attr(results,"nproc") <- getDoParWorkers()
  
  results
}) -> results

The above was completed in 7 min on a 250-core cluster.

bake("stouffer-mle.rds",{
  
  results %>%
    extract(!sapply(results,inherits,"try-error")) %>%
    ldply() %>%
    subset(loglik==max(loglik),select=-c(loglik,conv)) -> start
  
  mle2(likfnStouffer,
    method="Nelder-Mead",
    start=as.list(start),
    control=list(trace=0,maxit=10000),
    skip.hessian=TRUE) -> fit
  
  c(coef(fit),loglik=as.numeric(logLik(fit)),
    conv=fit@details$convergence) %>%
    as.list() %>%
    as.data.frame() -> mle
}) -> mle.stouffer

results %>%
  extract(sapply(results,inherits,"try-error")) %>%
  sapply(as.character) %>%
  unique()

## list()

results %<>%
  extract(!sapply(results,inherits,"try-error")) %>%
  ldply() %>%
  subset(is.finite(loglik)) %>%
  mutate(theta=exp(theta),psi=exp(psi),phi=exp(phi))

results %>% count(~conv)

results %>%
  ggplot(aes(x=tau1,y=tau2,z=loglik,
    fill=ifelse(loglik>max(loglik)-2000,loglik,NA)))+
  geom_tile(color=NA)+geom_contour(bins=100,color='white')+
  geom_point(color='red',shape="x",size=3,data=mle.stouffer)+
  geom_hline(color='red',size=0.2,linetype=2,data=mle.stouffer,aes(yintercept=tau2))+
  geom_vline(color='red',size=0.2,linetype=2,data=mle.stouffer,aes(xintercept=tau1))+
  labs(x=expression(tau[1]),y=expression(tau[2]),fill=expression(log(L)))

pairs(~loglik+theta+phi+psi+tau1+tau2,
  data=results,
  subset=loglik>max(loglik,na.rm=TRUE)-10000,cex=0.5)

Variant: Stouffer model with recipient included

Let \(i\) be the recipient town; \(j\), the donor. Let \(S'(i,j)\) be the collection of towns closer to town \(i\) than \(j\) is. That is, \(S'(i,j) = \{k: d(i,k) \le d(i,j)\}\). Note that \(S'(i,j)\) includes \(i\), whilst \(S(i,j)\) does not.

\[\iota_i = N_i^{\tau_1}\,\left(\theta\,\sum_j\!\left(\frac{N_j}{\sum_{k\in S'(i,j)}{N_k}}\right)^{\tau_2}\,\frac{I_j}{N_j}+\phi\right)\]

bake(file="rankmat1.rds",{
  rr <- array(dim=dim(distances),dimnames=dimnames(distances))
  for (i in seq_along(N)) {
    for (j in seq_along(N)) {
      rr[i,j] <- N[j]/sum(N[distances[i,]<=distances[i,j]])
    }
  }
  diag(rr) <- 0
  rr
}) -> rr

likfnStouffer1 <- function (theta, phi, tau1, tau2, psi) {
  theta <- exp(theta)
  phi <- exp(phi)
  iota <- (N^tau1)*(theta*(rr^tau2)%*%ylag+phi)
  lambda <- betaS*iota[relevant]^alpha
  -sum(dnbinom(x=obs,mu=lambda,size=exp(-psi),log=TRUE))
}

The profile likelihood computation.

bake(file="stouffer1.rds",{
  
  grd <- expand.grid(
    tau1=seq(0.5, 1.2, length=25),
    tau2=seq(0.5, 2.0, length=25),
    theta=log(0.2),
    phi=log(0.0001),
    psi=log(5)
  )
  
  foreach(start=iter(grd,"row"),
    .errorhandling="pass",
    .inorder=FALSE,
    .packages="bbmle"
  ) %dopar% try(
    {
      mle2(likfnStouffer1,
        method="Nelder-Mead",
        start=as.list(start[c("theta","phi","psi")]),
        fixed=as.list(start[c("tau1","tau2")]),
        control=list(trace=0,maxit=10000),
        skip.hessian=TRUE) -> fit
      c(coef(fit),loglik=as.numeric(logLik(fit)),conv=fit@details$convergence)
    }
  ) -> results
  
  attr(results,"nproc") <- getDoParWorkers()
  
  results
}) -> results

The above was completed in 6.1 min on a 250-core cluster.

bake("stouffer1-mle.rds",{
  
  results %>%
    extract(!sapply(results,inherits,"try-error")) %>%
    ldply() %>%
    subset(loglik==max(loglik),select=-c(loglik,conv)) -> start
  
  mle2(likfnStouffer1,
    method="Nelder-Mead",
    start=as.list(start),
    control=list(trace=0,maxit=10000),
    skip.hessian=TRUE) -> fit
  
  c(coef(fit),loglik=as.numeric(logLik(fit)),
    conv=fit@details$convergence) %>%
    as.list() %>%
    as.data.frame() -> mle
}) -> mle.stouffer1

results %>%
  extract(sapply(results,inherits,"try-error")) %>%
  sapply(as.character) %>%
  unique()

## list()

results %<>%
  extract(!sapply(results,inherits,"try-error")) %>%
  ldply() %>%
  subset(is.finite(loglik))

results %>% count(~conv)

results %>%
  ggplot(aes(x=tau1,y=tau2,z=loglik,
    fill=ifelse(loglik>max(loglik)-2000,loglik,NA)))+
  geom_tile(color=NA)+geom_contour(bins=100,color='white')+
  geom_point(color='red',shape="x",size=3,data=mle.stouffer1)+
  geom_hline(color='red',size=0.2,linetype=2,data=mle.stouffer1,aes(yintercept=tau2))+
  geom_vline(color='red',size=0.2,linetype=2,data=mle.stouffer1,aes(xintercept=tau1))+
  labs(x=expression(tau[1]),y=expression(tau[2]),fill=expression(log(L)))

pairs(~loglik+theta+phi+psi+tau1+tau2,
  data=results,
  subset=loglik>max(loglik,na.rm=TRUE)-10000,cex=0.5)

Radiation model

\[\iota_i=\theta \sum_j N_j \frac{N_j N_i}{(N_j + \sum_{k \in S(i,j)} N_k)(N_j + N_i + \sum_{k \in S(i,j)} N_k)} \frac{I_j}{N_j}\]

bake(file="radmat.rds",{
  rr <- array(dim=dim(distances),dimnames=dimnames(distances))
  for (i in seq_along(N)) {
    for (j in seq_along(N)) {
      s <- sum(N[distances[i,]<=distances[i,j]])
      rr[i,j] <- N[i]*N[j]*N[j]/s/(s-N[i])
    }
  }
  diag(rr) <- 0
  rr
}) -> rr

likfnRadiation <- function (theta, psi) {
  theta <- exp(theta)
  iota <- theta*(rr%*%ylag)
  lambda <- betaS*iota[relevant]^alpha
  -sum(dnbinom(x=obs,mu=lambda,size=exp(-psi),log=TRUE))
}

bake(file="radiation-mle.rds",{
  mle2(likfnRadiation,
    method="Nelder-Mead",
    start=list(theta=log(1),psi=log(5)),
    control=list(trace=0,maxit=10000),
    skip.hessian=FALSE)
}) -> fit

summary(fit)

## Maximum likelihood estimation
## 
## Call:
## mle2(minuslogl = likfnRadiation, start = list(theta = log(1), 
##     psi = log(5)), method = "Nelder-Mead", skip.hessian = FALSE, 
##     control = list(trace = 0, maxit = 10000))
## 
## Coefficients:
##         Estimate Std. Error z value     Pr(z)    
## theta -2.0846191  0.0069030 -301.99 < 2.2e-16 ***
## psi    1.7421955  0.0071838  242.52 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## -2 log L: 400456.9

logLik(fit)

## 'log Lik.' -200228.5 (df=2)

c(coef(fit),loglik=logLik(fit)) %>%
  as.list() %>% as.data.frame() -> mle.radiation

Variant: radiation model with recipient included

\[\iota_i=\theta \sum_j N_j \frac{N_j N_i}{(N_j + \sum_{k \in S'(i,j)} N_k)(N_j + N_i + \sum_{k \in S'(i,j)} N_k)} \frac{I_j}{N_j}\]

bake(file="radmat1.rds",{
  rr <- array(dim=dim(distances),dimnames=dimnames(distances))
  for (i in seq_along(N)) {
    for (j in seq_along(N)) {
      s <- sum(N[distances[i,]<=distances[i,j]])
      rr[i,j] <- N[i]*N[j]*N[j]/(s+N[i])/(s)
    }
  }
  diag(rr) <- 0
  rr
}) -> rr

likfnRadiation1 <- function (theta, psi) {
  theta <- exp(theta)
  iota <- theta*(rr%*%ylag)
  lambda <- betaS*iota[relevant]^alpha
  -sum(dnbinom(x=obs,mu=lambda,size=exp(-psi),log=TRUE))
}

bake(file="radiation1-mle.rds",{
  mle2(likfnRadiation1,
    method="Nelder-Mead",
    start=list(theta=log(1),psi=log(5)),
    control=list(trace=0,maxit=10000),
    skip.hessian=FALSE)
}) -> fit

summary(fit)

## Maximum likelihood estimation
## 
## Call:
## mle2(minuslogl = likfnRadiation1, start = list(theta = log(1), 
##     psi = log(5)), method = "Nelder-Mead", skip.hessian = FALSE, 
##     control = list(trace = 0, maxit = 10000))
## 
## Coefficients:
##         Estimate Std. Error z value     Pr(z)    
## theta -1.8844494  0.0067635 -278.62 < 2.2e-16 ***
## psi    1.7252532  0.0072161  239.08 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## -2 log L: 399364.3

logLik(fit)

## 'log Lik.' -199682.2 (df=2)

c(coef(fit),loglik=logLik(fit)) %>%
  as.list() %>% as.data.frame() -> mle.radiation1

Extended radiation model

We extend the radiation variant model by allowing a background importation rate proportional to city size.

\[\iota_i=\theta \sum_j N_j \frac{N_j N_i}{(N_j + \sum_{k \in S'(i,j)} N_k)(N_j + N_i + \sum_{k \in S'(i,j)} N_k)} \frac{I_j}{N_j}+\phi\,N_i\]

readRDS("radmat1.rds") -> rr

likfnXRad1 <- function (theta, psi, phi) {
  theta <- exp(theta)
  phi <- exp(phi)
  iota <- theta*(rr%*%ylag)+phi*N
  lambda <- betaS*iota[relevant]^alpha
  -sum(dnbinom(x=obs,mu=lambda,size=exp(-psi),log=TRUE))
}

bake(file="xrad1-mle.rds",{
  mle2(likfnXRad1,
    method="Nelder-Mead",
    start=list(theta=log(1),psi=log(5),phi=log(0.00001)),
    control=list(trace=0,maxit=10000),
    skip.hessian=FALSE)
}) -> fit

summary(fit)

## Maximum likelihood estimation
## 
## Call:
## mle2(minuslogl = likfnXRad1, start = list(theta = log(1), psi = log(5), 
##     phi = log(1e-05)), method = "Nelder-Mead", skip.hessian = FALSE, 
##     control = list(trace = 0, maxit = 10000))
## 
## Coefficients:
##          Estimate  Std. Error z value     Pr(z)    
## theta  -2.4559288   0.0111829 -219.62 < 2.2e-16 ***
## psi     1.6285242   0.0073544  221.43 < 2.2e-16 ***
## phi   -11.6821427   0.0183598 -636.29 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## -2 log L: 393261.7

logLik(fit)

## 'log Lik.' -196630.9 (df=3)

c(coef(fit),loglik=logLik(fit)) %>%
  as.list() %>% as.data.frame() -> mle.xrad1

Model comparison

model	\(\ell\)	\(\mathrm{QAIC}\)	\(\hat{c}\)	\(\Delta\!\mathrm{QAIC}\)	\(\log_{10}{\theta}\)	\(\log_{10}{\phi}\)	\(\rho\)	\(\tau_1\)	\(\tau_2\)	\(\delta\)	\(\psi\)
SV	-196466.8	137797.9	2.852	0.000	-0.796	-4.563		0.883	1.730		5.068
XR	-196630.9	137908.9	3.184	111.031	-1.067	-5.073					5.096
CD	-196654.4	137933.4	2.883	135.543	0.455	-3.695	1.861	0.660	0.501	-1.008	5.076
S	-196778.9	138016.7	2.764	218.855	-0.820	-4.291		0.819	1.440		5.140
G	-197733.6	138688.3	2.886	890.392	-3.486	-3.446	1.517	0.615	0.141		5.255
X	-198028.0	138894.8	2.851	1096.925	-6.023	-6.303	0.820	0.607	0.433		5.327
RV	-199682.2	140046.9	3.951	2249.003	-0.818						5.614
R	-200228.5	140430.0	4.369	2632.141	-0.905						5.710
MF	-200369.7	140535.1	4.098	2737.227	-8.769	-5.083					5.745
D	-202735.5	142192.3	2.023	4394.380	-0.719	-0.788	1.942				6.264

Diagnostics

Import and export rates by population size

We can predict importation rates from fitted models

#Gravity
readRDS("gravity-mle.rds") %$% {
  theta <- exp(theta)
  phi <- exp(phi)
  rho <- exp(rho)
  Q <- (N^tau2)*(dd^rho)
  (N^tau1)*(theta*t(Q))
} -> GRmat

#Xia
readRDS("xia-mle.rds") %$% {
  theta <- exp(theta)
  phi <- exp(phi)
  rho <- exp(rho)
  Q <- (N^tau2)*(dd^rho)
  (N^tau1)*(theta*t(Q))
} -> XImat

#CD
readRDS("compdest-mle.rds") %$% {
  theta <- exp(theta)
  phi <- exp(phi)
  rho <- exp(rho)
  Q <- (N^tau2)*(dd^rho)
  R <- crossprod(Q,iii)^delta
  (N^tau1)*(theta*t(Q*R))
} -> CDmat

#Stouffer
readRDS("stouffer-mle.rds") %$% {
  readRDS("rankmat.rds") -> rr
  theta <- exp(theta)
  phi <- exp(phi)
  (N^tau1)*(theta*((rr^tau2)))
} -> STmat

#Stouffer variant
readRDS("stouffer1-mle.rds") %$% {
  readRDS("rankmat1.rds") -> rr
  theta <- exp(theta)
  phi <- exp(phi)
  (N^tau1)*(theta*((rr^tau2)))
} -> SVmat

#Radiation
mle.radiation %$% {
  readRDS("radmat.rds") -> rr
  theta <- exp(theta)
  theta*rr
} -> RDmat

#Radiation Variant
mle.radiation1 %$% {
  readRDS("radmat1.rds") -> rr
  theta <- exp(theta)
  theta*rr
} -> RVmat

#Extended radiation
mle.xrad1 %$% {
  readRDS("radmat1.rds") -> rr
  theta <- exp(theta)
  theta*rr
} -> XRmat

bake("impexp.rds",info=FALSE,{
  data.frame(
    SV_import=SVmat %>% rowMeans(),
    S_import=STmat %>% rowMeans(),
    G_import=GRmat %>% rowMeans(),
    CD_import=CDmat %>% rowMeans(),
    RV_import=RVmat %>% rowMeans(),
    R_import=RDmat %>% rowMeans(),
    XR_import=XRmat %>% rowMeans(),
    SV_export=SVmat %>% colMeans(),
    S_export=STmat %>% colMeans(),
    G_export=GRmat %>% colMeans(),
    CD_export=CDmat %>% colMeans(),
    RV_export=RVmat %>% colMeans(),
    R_export=RDmat %>% colMeans(),
    XR_export=XRmat %>% colMeans(),
    N=N
  ) %>% 
    name_rows() %>%
    dplyr::rename(town=.rownames) %>%
    tidyr::gather(variable,value,-N,-town) %>%
    tidyr::separate(variable,into=c("model","variable")) %>%
    tidyr::spread(variable,value) %>%
    dplyr::arrange(-N,town,model) %>%
    dplyr::mutate(erate=export/N,irate=import/N) %>%
    join(coords,by="town")
}) -> impexp

impexp %>%
  dplyr::select(town,N,model,export=erate,import=irate) %>%
  dplyr::filter(model %in% c("SV","CD","G","XR")) %>%
  dplyr::mutate(
           model=factor(model,levels=c("SV","XR","CD","G"))
         ) %>%
  tidyr::gather(rate,value,import,export) %>%
  ggplot(aes(x=N,y=value))+
  geom_point(alpha=0.3,size=0.1)+
  scale_x_log10(breaks=c(1e3,1e4,1e5,1e6),
    labels=c(
      expression(10^{3}),
      expression(10^{4}),
      expression(10^{5}),
      expression(10^{6})
    )
  )+
  scale_y_log10(breaks=c(1e-6,1e-5,1e-4,1e-3),
    labels=c(
      expression(10^{-6}),
      expression(10^{-5}),
      expression(10^{-4}),
      expression(10^{-3})
    )
  )+
  labs(x="community size (inhabitants)",y="rate (per inhabitant per biweek)")+
  facet_grid(model~rate)

Overall import and export rates

readRDS("BritishIsles.rds") -> gb
gb %>%
  subset(NAME_1 %in% c("England","Wales")) %>%
  bbox() -> bbox

bake(file="ied.rds",
  dependson=impexp,{
    impexp %>%
      dplyr::select(town,model,export=erate,import=irate) %>%
      dplyr::filter(model %in% c("XR","G","CD","SV")) %>%
      tidyr::gather(variable,value,export,import) %>%
      acast(model~town~variable,value.var="value") %>%
      aaply(c(2,3),function(x)outer(x,x,FUN="-")) %>%
      melt() %>%
      dplyr::rename(town=X1,variable=X2,m1=Var3,m2=Var4) %>% 
      mutate(
        m1=ordered(m1,levels=c("XR","G","CD","SV")),
        m2=ordered(m2,levels=c("XR","G","CD","SV"))
      ) %>%
      dplyr::filter(m1 > m2) %>% 
      dplyr::left_join(coords,by="town") -> ied
  }) -> ied

expand.grid(m2=1:4,m1=1:4) %>%
  dplyr::filter(m1<m2) %>%
  dplyr::mutate(
           model1=c("SV","CD","G","XR")[m1],
           model2=c("SV","CD","G","XR")[m2]
         ) -> comps

foreach (c=iter(comps,"row")) %do% {
  ied %>%
    dplyr::filter(m1==c$model1 & m2==c$model2) %>%
    arrange(variable,abs(value)) -> d
  gb %>%
    ggplot(aes(x=long,y=lat))+
    geom_map(aes(map_id=id),map=fortify(gb),fill=NA,color="black",size=0.3)+
    coord_map(projection="gilbert")+
    lims(x=bbox["x",],y=bbox["y",])+
    geom_point(
      data=d,
      aes(x=long,y=lat,color=value),
      size=1
    )+
    guides(alpha="none")+
    scale_color_gradient2(mid="gray90")+
    labs(title="differences in predicted per capita import and export rates",
      subtitle=sprintf("%s - %s",c$model1,c$model2),
      color="",x="",y="")+
    facet_wrap(~variable,nrow=1)+
    theme(axis.text=element_blank()) -> pl
} -> ratemaps

print(ratemaps)

print(sv_g_exp)
print(sv_g_imp)

Fadeout residuals

Let’s make a spatial plot of the residuals of the logit-log relationship between fadeouts and population size.

fadeouts %>%
  dplyr::left_join(coords,by="town") %>%
  arrange(abs(residual)) -> fo

gb %>%
  ggplot(aes(x=long,y=lat))+
  geom_map(aes(map_id=id),map=fortify(gb),fill=NA,color="black",size=0.3)+
  geom_point(
    data=fo,
    aes(x=long,y=lat,color=residual),
    size=1
  )+
  ##  guides(alpha="none")+
  scale_color_gradient2(mid="gray90")+
  labs(color="",x="",y="")+
  coord_map(projection="gilbert")+
  lims(x=bbox["x",],y=bbox["y",])+
  theme(axis.text=element_blank()) %>%
  print()

Likelihood differences

dat %>% acast(town~year+biweek,value.var="cases") -> obs1

likGravity <- function (mle) {
  npar <- 6
  mle %$% {
    theta <- exp(theta)
    phi <- exp(phi)
    rho <- exp(rho)
    Q <- (N^tau2)*(dd^rho)
    iota <- (N^tau1)*(theta*crossprod(Q,ylag)+phi)
    lambda <- betaS*iota[relevant]^alpha
    ll <- dnbinom(x=obs,mu=lambda,size=exp(-psi),log=TRUE)
    rv <- array(0,dim=dim(iota),dimnames=dimnames(iota))
    rv[relevant] <- ll
    rowSums(rv)/rowSums(rv!=0)
  }
}

likCompDest <- function (mle) {
  mle %$% {
    theta <- exp(theta)
    phi <- exp(phi)
    rho <- exp(rho)
    Q <- (N^tau2)*(dd^rho)
    R <- crossprod(Q,iii)^delta
    iota <- (N^tau1)*(theta*crossprod(Q*R,ylag)+phi)
    lambda <- betaS*iota[relevant]^alpha
    ll <- dnbinom(x=obs,mu=lambda,size=exp(-psi),log=TRUE)
    rv <- array(0,dim=dim(iota),dimnames=dimnames(iota))
    rv[relevant] <- ll
    rowSums(rv)/rowSums(rv!=0)
  }
}

likStouffer1 <- function (mle) {
  readRDS("rankmat1.rds") -> rr
  mle %$% {
    theta <- exp(theta)
    phi <- exp(phi)
    iota <- (N^tau1)*(theta*(rr^tau2)%*%ylag+phi)
    lambda <- betaS*iota[relevant]^alpha
    ll <- dnbinom(x=obs,mu=lambda,size=exp(-psi),log=TRUE)
    rv <- array(0,dim=dim(iota),dimnames=dimnames(iota))
    rv[relevant] <- ll
    rowSums(rv)/rowSums(rv!=0)
  }
}

likXRad1 <- function (mle) {
  readRDS("radmat1.rds") -> rr
  mle %$% {
    theta <- exp(theta)
    phi <- exp(phi)
    iota <- theta*(rr%*%ylag)+phi*N
    lambda <- betaS*iota[relevant]^alpha
    ll <- dnbinom(x=obs,mu=lambda,size=exp(-psi),log=TRUE)
    rv <- array(0,dim=dim(iota),dimnames=dimnames(iota))
    rv[relevant] <- ll
    rowSums(rv)/rowSums(rv!=0)
  }
}

poplim <- 1e5

stew(
  file="comps.rda",
  dependson=list(
    mle.stouffer1,mle.compdest,mle.grav,mle.xrad1,
    N,poplim),
  {
    data.frame(
      SV=likStouffer1(mle.stouffer1),
      CD=likCompDest(mle.compdest),
      G=likGravity(mle.grav),
      XR=likXRad1(mle.xrad1),
      N=N
    ) %>%
      name_rows() %>%
      dplyr::rename(town=.rownames) %>%
      dplyr::left_join(coords,by="town") %>%
      dplyr::filter(N<poplim) %>%
      tidyr::gather(model,value,-N,-town,-long,-lat) -> spatialLiks

    expand.grid(m2=1:4,m1=1:4) %>%
      dplyr::filter(m1<m2) %>%
      dplyr::mutate(
               model1=c("SV","CD","G","XR")[m1],
               model2=c("SV","CD","G","XR")[m2]
             ) -> comps
  })

foreach (c=iter(comps,"row")) %do% {
  spatialLiks %>% 
    dplyr::filter(model %in% c("SV","CD","G","XR")) %>%
    dplyr::mutate(
             model=factor(model,levels=c("SV","XR","CD","G"))
           ) %>%
    dplyr::filter(model==c$model1) %>%
    dplyr::select(model,town,N,value) -> d1
  spatialLiks %>% 
    dplyr::filter(model %in% c("SV","CD","G","XR")) %>%
    dplyr::mutate(
             model=factor(model,levels=c("SV","XR","CD","G"))
           ) %>%
    dplyr::filter(model==c$model2) %>%
    dplyr::select(model,town,N,value) -> d2
  dplyr::full_join(d1,d2,by=c("town","N"),
    suffix=c("_1","_2")) %>%
    mutate(diff=value_1-value_2) -> dd
  
  dd %>%
    ggplot(aes(x=N,y=diff,color=diff>0,group=1))+
    geom_point()+
    scale_x_log10()+
    guides(color="none")+
    geom_smooth(method="loess")+
    scale_color_manual(values=c(`TRUE`=muted("blue"),`FALSE`=muted("red")))+
    labs(title="mean per datum differences in log likelihood",
      subtitle=sprintf("%s - %s",c$model1,c$model2),
      x="",y="",color="") -> pl
} -> likpl

print(likpl)

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

foreach (c=iter(comps,"row")) %do% {
  spatialLiks %>% dplyr::filter(model==c$model1) -> d1
  spatialLiks %>% dplyr::filter(model==c$model2) -> d2
  dplyr::full_join(d1,d2,by=c("town","N","long","lat"),
    suffix=c("_1","_2")) %>%
    mutate(diff=value_1-value_2) -> dd
  gb %>%
    ggplot(aes(x=long,y=lat))+
    geom_map(aes(map_id=id),map=fortify(gb),fill=NA,color="black",size=0.3)+
    coord_map(projection="gilbert")+
    lims(x=bbox["x",],y=bbox["y",])+
    geom_point(
      data=dd,
      aes(x=long,y=lat,color=diff,alpha=abs(diff)/max(abs(diff))),
      size=1
    )+
    guides(alpha="none")+
    scale_color_gradient2(mid="gray90")+
    labs(title="per datum differences in log likelihood",
      subtitle=sprintf("%s - %s",c$model1,c$model2),
      x="",y="",color="")+
    theme(axis.text=element_blank()) -> pl
} -> likmaps

print(likmaps)

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

foreach (c=iter(comps[c(2,4),],"row")) %do% {
  
  spatialLiks %>% dplyr::filter(model==c$model1) -> d1
  spatialLiks %>% dplyr::filter(model==c$model2) -> d2
  dplyr::full_join(d1,d2,by=c("town","N","long","lat"),
    suffix=c("_1","_2")) %>%
    mutate(diff=value_1-value_2) -> dd
  
  with(dd,
    ncf::lisa(x=long,y=lat,z=diff,neigh=30,latlon=TRUE,quiet=TRUE)
  ) -> ddlisa
  
  dd %>%
    dplyr::mutate(
             corr=ddlisa$correlation,
             mean=ddlisa$mean,
             p=ddlisa$p
           ) %>%
    dplyr::filter(p<0.05) -> dd

  gb %>%
    ggplot(aes(x=long,y=lat))+
    geom_map(aes(map_id=id),map=fortify(gb),fill=NA,color="black",size=0.3)+
    coord_map(projection="gilbert")+
    lims(x=bbox["x",],y=bbox["y",])+
    geom_point(
      data=dd,
      aes(x=long,y=lat,color=diff,alpha=0.5),
      size=1
    )+
    guides(alpha="none")+
    scale_color_gradient2(mid="gray90")+
    labs(title="significant per datum differences in log likelihood",
      subtitle=sprintf("%s - %s",c$model1,c$model2),
      x="",y="",color="")+
    theme(axis.text=element_blank()) -> pl
} -> lisamaps

print(lisamaps)

## [[1]]

## 
## [[2]]

The above plots are limited to cities of less than \(10^{5}\) inhabitants, since larger cities have few or no fadeouts.

sessionInfo()

## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
##  [1] parallel  stats4    grid      stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] cowplot_1.1.1     tibble_3.1.3      xtable_1.8-4      doParallel_1.0.16
##  [5] ncf_1.2-9         sp_1.4-5          scales_1.1.1      ggplot2_3.3.5    
##  [9] pomp_3.5          nloptr_1.2.2.2    bbmle_1.0.23.1    iterators_1.0.13 
## [13] foreach_1.5.1     magrittr_2.0.1    reshape2_1.4.4    plyr_1.8.6       
## [17] knitr_1.33       
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.7          bdsmatrix_1.3-4     mvtnorm_1.1-2      
##  [4] lattice_0.20-44     tidyr_1.1.3         assertthat_0.2.1   
##  [7] digest_0.6.27       utf8_1.2.2          R6_2.5.0           
## [10] evaluate_0.14       coda_0.19-4         highr_0.9          
## [13] pillar_1.6.2        rlang_0.4.11        jquerylib_0.1.4    
## [16] Matrix_1.3-4        rmarkdown_2.9       splines_4.1.0      
## [19] labeling_0.4.2      stringr_1.4.0       munsell_0.5.0      
## [22] compiler_4.1.0      numDeriv_2016.8-1.1 xfun_0.24          
## [25] pkgconfig_2.0.3     mgcv_1.8-36         htmltools_0.5.1.1  
## [28] tidyselect_1.1.1    codetools_0.2-18    fansi_0.5.0        
## [31] crayon_1.4.1        dplyr_1.0.7         withr_2.4.2        
## [34] MASS_7.3-54         nlme_3.1-152        jsonlite_1.7.2     
## [37] gtable_0.3.0        lifecycle_1.0.0     DBI_1.1.1          
## [40] stringi_1.7.3       farver_2.1.0        mapproj_1.2.7      
## [43] bslib_0.2.5.1       ellipsis_0.3.2      generics_0.1.0     
## [46] vctrs_0.3.8         deSolve_1.28        tools_4.1.0        
## [49] glue_1.4.2          purrr_0.3.4         maps_3.3.0         
## [52] yaml_2.2.1          colorspace_2.0-2    sass_0.4.0

References

Lau MSY, Becker AD, Korevaar HM, Caudron Q, Shaw DJ, et al. (2020) A competing-risks model explains hierarchical spatial coupling of measles epidemics en route to national elimination. Nature Ecology & Evolution. doi:10.1038/s41559-020-1186-6.

Xia Y, Bjørnstad ON, Grenfell BT (2004) Measles metapopulation dynamics: A gravity model for epidemiological coupling and dynamics. The American Naturalist 164: 267–281. doi:10.1086/422341.

Comparison of alternative models of human movement and the spread of disease

Supporting Material