Chapter 4 Plotting meta-analytical results

4.0.1 Overview

This chapter focuses on the visualization of meta-analytical results, which is crucial for effectively communicating findings and insights drawn from aggregated data. Visualization techniques help researchers and stakeholders quickly grasp complex relationships and patterns within the data. In this chapter, we will cover various plotting methods, including forest plots, funnel plots, and advanced visualizations using R packages such as ggplot2, metafor, and dmetar.

4.0.2 Example 1: Forest plot with metafor

dat <- data.frame(author = c("Dyson", "Jönsson", "Morris", "Saslow", "Saslow", "Sato", "Tay", "Yamada"),
                  year   = c(2010, 2009, 2019, 2014, 2017, 2017, 2014, 2014),
                  ai     = c(3, 6, 11, 8, 6, 4, 36, 2), ##Nb event experimental group
                  n1i    = c(6, 6, 21, 9, 11, 22, 46, 12), ## Total experimental group
                  ci     = c(1, 3, 0, 5, 0, 0, 30, 2),  ## Nb events control group
                  n2i    = c(6, 6, 12, 13, 8, 27, 47, 12))  ## Total control group

### calculate risk differences and corresponding sampling variances (and use
### the 'slab' argument to store study labels as part of the data frame)
dat <- metafor::escalc(measure = "RD", ai = ai, n1i = n1i, ci = ci, n2i = n2i, data = dat,
              slab = paste(" ", author, year), addyi = FALSE)

### fit random-effects model (using the DL estimator)
res <- metafor::rma(yi, vi, data = dat, method = "DL")

### colors to be used in the plot
colp <- "#6b58a6"
coll <- "#a7a9ac"

### total number of studies
k <- nrow(dat)

### generate point sizes
psize <- weights(res)
psize <- 1.2 + (psize - min(psize)) / (max(psize) - min(psize))

### get the weights and format them as will be used in the forest plot
weights <- round(weights(res), 1)

### adjust the margins
par(mar = c(2.7, 3.2, 2.3, 1.3), mgp = c(3, 0, 0), tcl = 0.15)

### forest plot with extra annotations
sav <- metafor::forest(dat$yi, dat$vi, xlim = c(-3.4, 2.1), ylim = c(-0.5, k + 3), alim = c(-1, 1), cex = 0.88,
              pch = 18, psize = psize, efac = 0, refline = NA, lty = c(1, 0), xlab = "",
              ilab = cbind(paste(dat$ai, "/", dat$n1i), paste(dat$ci, "/", dat$n2i), weights),
              ilab.xpos = c(-1.9, -1.3, 1.2), annosym = c(" (", " to ", ")"),
              rowadj = -0.07)

### add the vertical reference line at 0
segments(0, -1, 0, k + 1.6, col = coll)

### add the vertical reference line at the pooled estimate
segments(coef(res), 0, coef(res), k, col = colp, lty = "33", lwd = 0.8)

### redraw the CI lines and points in the chosen color
segments(summary(dat)$ci.lb, k:1, summary(dat)$ci.ub, k:1, col = colp, lwd = 1.5)
points(dat$yi, k:1, pch = 18, cex = psize * 1.15, col = "white")
points(dat$yi, k:1, pch = 18, cex = psize, col = colp)

### add the summary polygon
addpoly(res, row = 0, mlab = "Total (95% CI)", efac = 2, col = colp, border = colp)

### add the horizontal line at the top
abline(h = k + 1.6, col = coll)

### redraw the x-axis in the chosen color
axis(side = 1, at = seq(-1, 1, by = 0.5), col = coll, labels = FALSE)

### now we add a bunch of text; since some of the text falls outside of the
### plot region, we set xpd=NA so nothing gets clipped
par(xpd = NA)

### adjust cex as used in the forest plot and use a bold font
par(cex = sav$cex, font = 2)

### add headings
text(sav$xlim[1], k + 2.5, pos = 4, "Study or\nsubgroup")
text(mean(sav$ilab.xpos[1:2]), k + 3.4, "No of events / total")
text(0, k + 2.7, "Risk difference, IV,\nrandom (95% CI)")
text(c(sav$ilab.xpos[3], sav$xlim[2] - 0.35), k + 2.7, c("Weight\n(%)", "Risk difference, IV,\nrandom (95% CI)"))

### add 'Favours control'/'Favours experimental' text below the x-axis
text(c(-1, 1), -2.5, c("Favors control", "Favors experimental"), pos = c(4, 2), offset = -0.3)

### use a non-bold font for the rest of the text
par(cex = sav$cex, font = 1)

### add text with heterogeneity statistics
text(sav$xlim[1], -1, pos = 4, bquote(paste("Test for heterogeneity: ",
                                          tau^2, "=", .(round(res$tau2, digits = 2)), "; ",
                                          I^2, "=", .(round(res$I2, digits = 2)), "%")))

### add text with overall estimate
text(sav$xlim[1], -2.5, pos = 4, bquote(paste("Overall effect: ",
                                              riskdiff == .(round(coef(res), digits = 2)))))

### add titles
text(sav$xlim[1], k + 4.5, pos = 4, "Risk difference between low and very low carbohydrate diets")

4.0.3 Example 2: Forest plot with meta

library(meta)
# Perform meta-analysis using the metagen function from `meta`
# Perform meta-analysis using the metagen function from `meta`
meta_analysis <- metagen(
  TE = dat$yi,  # Use the calculated effect sizes
  seTE = sqrt(dat$vi),  # Standard errors
  studlab = paste(dat$author, dat$year),
  data = dat,
  sm = "RD",  # Risk Difference as the summary measure
  fixed = FALSE,  # Using a random-effects model
  random = TRUE,
  method.tau = "REML",  # Restricted maximum likelihood estimator
  hakn = TRUE,  # Knapp-Hartung adjustment
  title = "Risk Difference between Diet Groups"
)

# Create a forest plot with custom options and store the output
forest_plot <- meta::forest(
  meta_analysis,
  prediction = TRUE,  # Add a prediction interval
  print.tau2 = TRUE,  # Print the tau-squared statistic in the plot
  leftlabs = c("Study", "Risk Diff", "Std. Error"),  # Custom labels
  lab.e = "Favours Low Carb",  # Label for experimental group
  lab.c = "Favours Control",  # Label for control group
  col.study = "#6b58a6",  # Study point color
  col.diamond = "#5e84c0",  # Color of the summary diamond
  col.square = "white"  # Color for individual study estimates
)

library(grid)
grid.text("Meta-analysis of Risk Differences in Diet Studies", 
          x = 0.5, y = unit(0.95, "npc"), 
          gp = gpar(fontsize = 14, fontface = "bold"))
grid.text("Random-effects model with Knapp-Hartung adjustments", 
          x = 0.5, y = unit(0.93, "npc"), 
          gp = gpar(fontsize = 12))

4.0.4 Example 3: Forest plot with ggplot

# Load necessary libraries
library(ggplot2)
library(dplyr)
library(ggpubr)
library(ggstar)

# Prepare the data: Calculate estimates, confidence intervals, and other metrics
dat <- dat %>%
  mutate(
    estimate = yi,  # Estimated effect size
    conf.low = yi - 1.96 * sqrt(vi),  # Lower bound of the confidence interval
    conf.high = yi + 1.96 * sqrt(vi),  # Upper bound of the confidence interval
    Sub_Cat_intervention = paste(author, year)  # Combine author and year for labels
  )

# Create a ggplot forest plot
p <- ggplot(dat, aes(estimate, reorder(Sub_Cat_intervention, estimate))) +
  # Add stars for effect sizes
  geom_star(starshape = 12, size = 4, starstroke = 1.1, color = "#6b58a6") +
  # Add points for individual study estimates
  geom_point(aes(size = n1i), shape = 21, alpha = 0.8, fill = "#ff9d02",
             position = position_dodge(width = 0.6)) +
  # Add error bars using confidence intervals
  geom_errorbar(aes(xmin = conf.low, xmax = conf.high), color = "black",
                size = 0.2, width = 0.1) +
  # Add a vertical line at x = 0
  geom_vline(xintercept = 0, linetype = 2, color = "gray") +
  # Apply ggpubr theme
  theme_pubr() +
  # Set x-axis limits
  xlim(c(-3.5, 3.5)) +  # Adjust limits based on your data
  # Remove legend
  theme(legend.position = "none") +
  # Set axis labels
  labs(y = "", x = "Risk Difference (RD)")

# Display the plot
print(p)

4.0.5 Example 4: Orchard plots

# Load required libraries
library(ggplot2)
library(dplyr)
library(metafor)
library(ggbeeswarm)

# Create a larger randomized dataset with valid constraints
set.seed(42)  # For reproducibility
n_studies <- 30  # Total number of studies

dat <- data.frame(
  author = paste("Study", 1:n_studies),
  year = sample(2000:2020, n_studies, replace = TRUE),
  n1i = sample(10:100, n_studies, replace = TRUE),  # Total in experimental group
  n2i = sample(10:100, n_studies, replace = TRUE)   # Total in control group
)

# Randomly generate events ensuring that events do not exceed group sizes
dat$ai <- sapply(1:n_studies, function(i) sample(0:dat$n1i[i], 1))  # Events in experimental group
dat$ci <- sapply(1:n_studies, function(i) sample(0:dat$n2i[i], 1))  # Events in control group

# Calculate risk differences
dat <- escalc(measure = "RD", ai = ai, n1i = n1i, ci = ci, n2i = n2i, data = dat)

# Fit a random-effects model and create a subgroup variable
dat$subgroup <- sample(c("Group A", "Group B", "Group C"), n_studies, replace = TRUE)

# Create an empty plot list to store individual subgroup plots
plot_list <- list()

# Loop over each subgroup to create the overall mean for each
for (group in unique(dat$subgroup)) {
  subgroup_data <- dat[dat$subgroup == group, ]
  res <- rma(yi, vi, data = subgroup_data, method = "DL")  # Fit model for subgroup

  # Create the orchard plot for each subgroup
  p <- ggplot(subgroup_data, aes(x = yi, y = 1)) +
    geom_violin(fill = "lightgray", alpha = 0.5) +
    geom_quasirandom(aes(size = 1/sqrt(vi)), alpha = 0.7) +  # Use inverse of SE for size
    geom_point(aes(x = res$b, y = 1, color = "Overall Mean"), size = 5, shape = 18) +
    geom_errorbarh(aes(xmin = res$ci.lb, xmax = res$ci.ub, y = 1), height = 0.1, color = "red") +
    labs(x = "Risk Difference", y = "", title = paste("Orchard Plot for", group)) +
    scale_color_manual(name = "", values = c("Overall Mean" = "red")) +
    theme_minimal() +
    theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
    xlim(c(-1, 1)) +
    scale_size(range = c(2, 5), name = "Precision") 

  # Store the plot in the list
  plot_list[[group]] <- p
}

# Display all orchard plots for each subgroup
library(gridExtra)
do.call(grid.arrange, c(plot_list, ncol = 1))  # Arrange plots in a single column