restructured into new Section 7.2.5 with mode/curvature matching

gregorkastner · gregorkastner · commit b08c4f1f4599 · 2025-11-28T01:56:03.000+01:00
diff --git a/vignettes/Chapter07.Rmd b/vignettes/Chapter07.Rmd
@@ -395,7 +395,7 @@ names(ymiss) <- names(logret)[missing]
 For simplicity, we employ our improper prior and sample iteratively.
 
 ```{r}
-ndraws <- 5000
+ndraws <- 500000
 nburn <- 1000
 ind <- missing - 2
 
@@ -720,31 +720,15 @@ for (m in seq_len(ndraws + nburn)) {
     phis[m - nburn] <- phi
   }
 }
-res1 <- data.frame(y0 = y0s, sigma2 = sigma2s, zeta = zetas, phi = phis)
+stationary1 <- data.frame(zeta = zetas, phi = phis, sigma2 = sigma2s, y0 = y0s)
 naccepts1 <- naccepts
 ```
 
-Let us investigate traceplots and empirical autocorrelation functions of the
-draws. In addition, we check the percentage of accepted draws in MH-step (d).
+We repeat the exercise with a more informative beta prior.
 
 ```{r}
-plot.ts(res1, xlab = "Draws after burn-in",
-        main = paste0("MH acceptance rate: ", 100 * naccepts1 / ndraws, "%"))
-acf(res1, ylab = "")
-```
-
-We now repeat this exercise, but use the conditional posterior resulting
-from an auxiliary moment-matched prior in Step (d).
-
-```{r}
-# Compute mean and variance of the actual prior using properties of the beta
-priormean <- 2 * aphi / (aphi + bphi) - 1
-priorvar <- 4 * (aphi * bphi) / ((aphi + bphi)^2 * (aphi + bphi + 1))
-
-# Define the design matrix and y
-X <- matrix(NA_real_, nrow = length(y), 2)
-X[, 1] <- 1
-X[, 2] <- c(NA_real_, y[-length(y)])
+aphi <- 20
+bphi <- 1.5
 
 # Allocate some space for the posterior draws and initialize the parameters:
 y0s <- sigma2s <- zetas <- phis <- rep(NA_real_, ndraws)
@@ -772,18 +756,15 @@ for (m in seq_len(ndraws + nburn)) {
   
   # Step (d): Draw the persistence
   tmp <- y0^2 + sum(y[-length(y)]^2)
-  propvar <- 1 / (1 / priorvar + tmp / sigma2)
-  propmean <- propvar * (priormean / priorvar + (y0 * (y[1] - zeta) +
-    sum(y[-length(y)] * (y[-1] - zeta))) / sigma2)
+  propmean <- (y0 * (y[1] - zeta) + sum(y[-length(y)] * (y[-1] - zeta))) / tmp
+  propvar <- sigma2 / tmp
   phiprop <- rnorm(1, propmean, sqrt(propvar))
   if (-1 < phiprop & phiprop < 1) {
     logR <- dbetarescaled(phiprop, aphi, bphi, log = TRUE) -
       dbetarescaled(phi, aphi, bphi, log = TRUE) +
       dnorm(y0, zeta / (1 - phiprop), sqrt(sigma2 / (1 - phiprop^2)),
             log = TRUE) -
-      dnorm(y0, zeta / (1 - phi), sqrt(sigma2 / (1 - phi^2)), log = TRUE) +
-      dnorm(phi, priormean, sqrt(priorvar), log = TRUE) -
-      dnorm(phiprop, priormean, sqrt(priorvar), log = TRUE)
+      dnorm(y0, zeta / (1 - phi), sqrt(sigma2 / (1 - phi^2)), log = TRUE)
     if (log(runif(1)) < logR) {
       phi <- phiprop
       if (m > nburn) naccepts <- naccepts + 1L
@@ -798,58 +779,111 @@ for (m in seq_len(ndraws + nburn)) {
     phis[m - nburn] <- phi
   }
 }
-res2 <- data.frame(y0 = y0s, sigma2 = sigma2s, zeta = zetas, phi = phis)
+stationary2 <- data.frame(zeta = zetas, phi = phis, sigma2 = sigma2s, y0 = y0s)
 naccepts2 <- naccepts
 ```
 
-Again, we investigate traceplots and empirical autocorrelation functions of the
-draws. In addition, we check the percentage of accepted draws in MH-step (d).
+To conclude, we compare the posteriors of $\phi$ under the improper prior,
+the posterior obtained after post-processing draws to obtain stationarity,
+and the posterior under the stationary-enforcing shifted beta priors.
 
-```{r}
-plot.ts(res2, xlab = "Draws after burn-in",
-        main = paste0("MH acceptance rate: ", 100 * naccepts2 / ndraws, "%"))
-acf(res2, ylab = "")
+```{r, echo = -c(1:2)}
+if (pdfplots) {
+  pdf("7-2_12.pdf", width = 8, height = 5)
+}
+par(mfrow = c(1, 1), mar = c(2.5, 2.5, 1.5, .1), mgp = c(1.5, .5, 0), lwd = 1.5)
+mybreaks <- seq(floor(100 * .99 * min(stationary2$phi)) / 100,
+                ceiling(100 * 1.01 * max(ar1draws)) / 100,
+                by = .0025)
+hist(ar1draws[!nonstationary[, 1]], breaks = mybreaks, col = rgb(0, 0, 1, .2),
+     main = "Histogram of posterior draws", xlab = expression(phi),
+     freq = FALSE)
+hist(ar1draws, breaks = mybreaks, col = rgb(0, 1, 0, .2),
+     freq = FALSE, add = TRUE)
+hist(stationary1$phi, breaks = mybreaks, col = rgb(1, 0, 0, .2),
+     freq = FALSE, add = TRUE)
+hist(stationary2$phi, breaks = mybreaks, col = rgb(1, 1, 0, .2),
+     freq = FALSE, add = TRUE)
+lines(mybreaks[mybreaks <= 1], lty = 3, lwd = 3,
+      dbetarescaled(mybreaks[mybreaks <= 1], 1, 1))
+lines(mybreaks[mybreaks <= 1], lty = 2, lwd = 3,
+      dbetarescaled(mybreaks[mybreaks <= 1], aphi, bphi))
+legend("topright",
+       c("Unrestricted posterior", "Post-processed posterior",
+         "Beta prior posterior (flat)", "Beta prior posterior (informative)"),
+       fill = rgb(c(0, 0, 1, 1), c(1, 0, 0, 1), c(0, 1, 0, 0), .2))
+legend("topleft", c("Beta prior (flat)", "Beta prior (informative)"),
+       col = 1, lty = 3:2, lwd = 3)
 ```
 
-We now compare the draws from the two samplers; they should yield
-draws from the same distribution, irrespective of the acceptance rate and thus
-the mixing of the Markov chain. We graphically
-check this by comparing histograms and quantiles of draws from the marginal
-posterior of $\phi$.
+## Section 7.2.5: Evaluating the Efficiency of an MCMC Sampler
+### Example 7.11: Improving the independence MH step
 
-```{r, echo = -c(1:2), fig.width = 8, fig.height = 4}
+Let us investigate traceplots and empirical autocorrelation functions of the
+posterior draws under the informative stationarity-inducing prior.
+
+```{r, echo = -c(1:2)}
 if (pdfplots) {
-  pdf("7-2_11.pdf", width = 8, height = 4)
+  pdf("7-2_13.pdf", width = 8, height = 5)
+}
+labels <- expression(zeta, phi, sigma^2, y[0])
+par(mfrow = c(4, 2), mar = c(2.5, 2.8, 1.5, .1), mgp = c(1.5, .5, 0), lwd = 1.5)
+for (i in seq_along(stationary2)) {
+  plot.ts(stationary2[i], xlab = "Draws after burn-in", ylab = labels[i])
+  if (i == 1) title("Traceplot")
+  acf(stationary2[i], ylab = "", main = "ACF")
+  if (i == 1) title("Empirical ACF")
 }
-par(mfrow = c(1, 2), mar = c(2.5, 2.5, 1.5, .1), mgp = c(1.5, .5, 0), lwd = 2)
-mybreaks <- seq(min(res1$phi, res2$phi), max(res1$phi, res2$phi),
-                length.out = 50)
-hist(res1$phi, breaks = mybreaks, col = rgb(0, 0, 1, .3),
-     main = "Histogram", xlab = expression(phi), freq = FALSE)
-hist(res2$phi, breaks = mybreaks, col = rgb(1, 0, 0, .3), freq = FALSE,
-     add = TRUE)
-qqplot(res1$phi, res2$phi, xlab = "Sampler 1", ylab = "Sampler 2",
-       main = "QQ plot")
-abline(0, 1, col = 2)
 ```
-We can see that the draws appear to come from the same distribution. Note,
-however, that the first sampler gets "stuck" slightly below 0.93 for a few
-draws, which isn't the case for the second sampler.
 
-We now re-run the second sampler with a more informative beta prior.
+We now compute an estimate of the effective sample size (ESS) and the
+inefficiency factor (IF) use the coda package.
 
 ```{r}
-aphi <- 20
-bphi <- 1.5
+library(coda)
+ess1 <- effectiveSize(data.frame(zeta = res2[[1]]$betas[, 1],
+                               phi = res2[[1]]$betas[, 2],
+                               sigma2 = res2[[1]]$sigma2s))
+ess2 <- effectiveSize(data.frame(zeta = res2[[1]]$betas[!nonstationary[, 1], 1],
+                               phi = res2[[1]]$betas[!nonstationary[, 1], 2],
+                               sigma2 = res2[[1]]$sigma2s[!nonstationary[, 1]]))
+ess3 <- effectiveSize(stationary1)
+ess4 <- effectiveSize(stationary2)
+ess <- rbind(unrestricted = c(ess1, y0 = NA),
+             postprocessed = c(ess2, y0 = NA),
+             betapriorflat = ess3,
+             betapriorinformative = ess4)
+knitr::kable(round(ess))
+knitr::kable(round(ndraws / ess, 2))
+```
 
-# Compute mean and variance of the actual prior using properties of the beta
-priormean <- 2 * aphi / (aphi + bphi) - 1
-priorvar <- 4 * (aphi * bphi) / ((aphi + bphi)^2 * (aphi + bphi + 1))
+We now repeat the above exercise, but use the conditional posterior resulting
+from an auxiliary moment-matched prior in Step (d).
 
-# Define the design matrix and y
-X <- matrix(NA_real_, nrow = length(y), 2)
-X[, 1] <- 1
-X[, 2] <- c(NA_real_, y[-length(y)])
+```{r}
+method <- "fancy"
+if (method == "simple") {
+  # Add .5 to aphi and bphi (from the determinant of the initial state prior)
+  aphiprop <- aphi + .5
+  bphiprop <- bphi + .5
+
+  # Compute mean and variance of the proposal using properties of the beta
+  priormean <- 2 * aphiprop / (aphiprop + bphiprop) - 1
+  priorvar <- 4 * (aphiprop * bphiprop) /
+    ((aphiprop + bphiprop)^2 * (aphiprop + bphiprop + 1))
+
+} else if (method == "fancy") {
+  
+  # Alternatively, mode-and-curvature-match:
+  dprior <- function(phi, zeta, sigma2, y0, aphi, bphi, log = FALSE) {
+    inside <- phi <= 1 & phi >= -1
+    logdens <- -Inf
+    logdens[inside] <- dbetarescaled(phi[inside], aphi, bphi, log = TRUE) +
+      dnorm(y0, zeta / (1 - phi[inside]), sqrt(sigma2 / (1 - phi[inside]^2)),
+            log = TRUE)
+    if (log) logdens else exp(logdens)
+  }
+}
 
 # Allocate some space for the posterior draws and initialize the parameters:
 y0s <- sigma2s <- zetas <- phis <- rep(NA_real_, ndraws)
@@ -876,10 +910,21 @@ for (m in seq_len(ndraws + nburn)) {
   zeta <- rnorm(1, bT, sqrt(BT))
   
   # Step (d): Draw the persistence
+  
+  if (method == "fancy") {
+    mode <- optimize(dprior, c(-1, 1), zeta = zeta, sigma2 = sigma2, y0 = y0,
+                     aphi = aphi, bphi = bphi, log = TRUE, maximum = TRUE)$maximum
+    dd <- numDeriv::hessian(dprior, mode, zeta = zeta, sigma2 = sigma2, y0 = y0,
+                            aphi = aphi, bphi = bphi)
+    priormean <- mode
+    priorvar <- -1 / as.numeric(dd)
+  }
+  
   tmp <- y0^2 + sum(y[-length(y)]^2)
   propvar <- 1 / (1 / priorvar + tmp / sigma2)
-  propmean <- propvar * (priormean / priorvar +
-    (y0 * (y[1] - zeta) + sum(y[-length(y)] * (y[-1] - zeta))) / sigma2)
+  propmean <- propvar * (priormean / priorvar + (y0 * (y[1] - zeta) +
+    sum(y[-length(y)] * (y[-1] - zeta))) / sigma2)
+
   phiprop <- rnorm(1, propmean, sqrt(propvar))
   if (-1 < phiprop & phiprop < 1) {
     logR <- dbetarescaled(phiprop, aphi, bphi, log = TRUE) -
@@ -903,43 +948,56 @@ for (m in seq_len(ndraws + nburn)) {
     phis[m - nburn] <- phi
   }
 }
-res3 <- data.frame(y0 = y0s, sigma2 = sigma2s, zeta = zetas, phi = phis)
+stationary3 <- data.frame(zeta = zetas, phi = phis, sigma2 = sigma2s, y0 = y0s)
 naccepts3 <- naccepts
 ```
 
-To conclude, we compare the posteriors of $\phi$ under the improper prior,
-the posterior obtained after post-processing draws to obtain stationarity,
-and the posterior under the stationary-enforcing shifted beta priors.
+Again, we investigate traceplots and empirical autocorrelation functions of the
+draws. In addition, we check the percentage of accepted draws in MH-step (d).
 
-```{r, echo = -c(1:2)}
+```{r}
+par(mfrow = c(4, 2), mar = c(2.5, 2.8, 1.5, .1), mgp = c(1.5, .5, 0), lwd = 1.5)
+for (i in seq_along(stationary3)) {
+  plot.ts(stationary3[i], xlab = "Draws after burn-in", ylab = labels[i])
+  if (i == 1) title("Traceplot")
+  acf(stationary3[i], ylab = "", main = "ACF")
+  if (i == 1) title("Empirical ACF")
+}
+```
+
+We now compare the draws from the two samplers; they should yield
+draws from the same distribution, irrespective of the acceptance rate and thus
+the mixing of the Markov chain. We graphically
+check this by comparing histograms and quantiles of draws from the marginal
+posterior of $\phi$.
+
+```{r, echo = -c(1:2), fig.width = 8, fig.height = 4}
 if (pdfplots) {
-  pdf("7-2_12.pdf", width = 8, height = 5)
+  pdf("7-2_14.pdf", width = 8, height = 4)
 }
-par(mfrow = c(1, 1), mar = c(2.5, 2.5, 1.5, .1), mgp = c(1.5, .5, 0), lwd = 1.5)
-mybreaks <- seq(floor(100 * .99 * min(res2$phi)) / 100,
-                ceiling(100 * 1.01 * max(ar1draws)) / 100,
-                by = .0025)
-hist(ar1draws[!nonstationary[, 1]], breaks = mybreaks, col = rgb(0, 0, 1, .2),
-     main = "Histogram of posterior draws", xlab = expression(phi),
-     freq = FALSE)
-hist(ar1draws, breaks = mybreaks, col = rgb(0, 1, 0, .2),
-     freq = FALSE, add = TRUE)
-hist(res2$phi, breaks = mybreaks, col = rgb(1, 0, 0, .2),
-     freq = FALSE, add = TRUE)
-hist(res3$phi, breaks = mybreaks, col = rgb(1, 1, 0, .2),
-     freq = FALSE, add = TRUE)
-lines(mybreaks[mybreaks <= 1], lty = 3, lwd = 3,
-      dbetarescaled(mybreaks[mybreaks <= 1], 1, 1))
-lines(mybreaks[mybreaks <= 1], lty = 2, lwd = 3,
-      dbetarescaled(mybreaks[mybreaks <= 1], aphi, bphi))
-legend("topright",
-       c("Unrestricted posterior", "Post-processed posterior",
-         "Beta prior posterior (flat)", "Beta prior posterior (informative)"),
-       fill = rgb(c(0, 0, 1, 1), c(1, 0, 0, 1), c(0, 1, 0, 0), .2))
-legend("topleft", c("Beta prior (flat)", "Beta prior (informative)"),
-       col = 1, lty = 3:2, lwd = 3)
+par(mfrow = c(1, 2), mar = c(2.5, 2.5, 1.5, .1), mgp = c(1.5, .5, 0), lwd = 2)
+mybreaks <- seq(min(stationary2$phi, stationary3$phi),
+                max(stationary2$phi, stationary3$phi),
+                length.out = 50)
+hist(stationary2$phi, breaks = mybreaks, col = rgb(0, 0, 1, .3),
+     main = "Histogram", xlab = expression(phi), freq = FALSE)
+hist(stationary3$phi, breaks = mybreaks, col = rgb(1, 0, 0, .3), freq = FALSE,
+     add = TRUE)
+qqplot(stationary1$phi, stationary3$phi, xlab = "Sampler 1", ylab = "Sampler 2",
+       main = "QQ plot")
+abline(0, 1, col = 2)
+```
+We can see that the draws appear to come from the same distribution. Note,
+however, that the first sampler gets "stuck" slightly below 0.93 for a few
+draws, which isn't the case for the second sampler.
+
+```{r}
+library(coda)
+ess <- effectiveSize(stationary3)
+knitr::kable(rbind(ESS = round(ess), IF = round(ndraws / ess, 2)))
 ```
 
+
 # Section 7.3: Some Extensions
 ## Section 7.3.1: AR Models with a Unit Root