Bandwidth Plot with purrr + ggplot2

RDD estimates are sensitive to bandwidth choices which is why one typically plots them for different bandwidths. Here is some code to quickly obtain RDD estimates for different bandwidths and plot the results. The code relies on the amazing purrr package.

library(purrr)
library(ggplot2)
library(dplyr)
library(broom)
library(stringr)

# Define a helper function that gives back local linear
# regression estimates when handed a bandwidth and a specification
get_lm_bw_est <- function(df,bw,formula) {
		return(  df %>% filter( abs(runningvar) <= bw ) %>%
		do(tidy(lm(formula, data=.))) %>%
		filter(term=="treat") %>%
		mutate(bw=bw, spec=format(formula) )  )
		}

set.seed(42)

# Sim Data
x <- runif(1000,-1,1)
y <- 5+ 3*x + 0.5*(x>=0) + rnorm(1000)

fakedata <- data.frame(y=y,runningvar=x, treat=as.numeric(x>=0) )

# Set up grid of specification choices and bandwidths
spec1 <- y ~ treat * runningvar
spec2 <- y ~ treat * (runningvar + I(runningvar^2))
spec <- c(spec1,spec2)

simframe <- expand.grid(bw=seq(.1,.9,length=50), spec=spec)

# Estimate, i.e. map simframe values to helper function
m <- with(simframe, map2_df(bw, spec, ~ get_lm_bw_est(fakedata, .x, .y) ))

# Calculate 95% CI and plot results
m <- m %>% mutate(lo=qnorm(0.975)*std.error + estimate,
				  hi=qnorm(0.025)*std.error + estimate) 

m <- m %>% mutate(spec=str_count(spec, "running"),
				  spec=paste("OLS with poly(", spec, ")", sep=""))

ggplot(m, aes(bw,estimate,group=spec)) +
	geom_ribbon(aes(ymin=lo,ymax=hi, fill=spec), alpha=0.25) +
	geom_line(aes(colour=spec)) + geom_hline(aes(yintercept=0)) +
	theme(legend.position='bottom', legend.title=element_blank()) +
	ylab("Estimates / 95%-CI") + xlab("Bandwidth")

Bandwidth plot with purrr