Bias

Drew Dimmery

February 26, 2015

Measurement Error Sim

Dstar <- rnorm(10000)
D <- ifelse(Dstar > 0, 1 , 0)
Y <- 10 + 5*D + rnorm(10000,0,2)
simME <- function(stdev) {
  Dnew <- ifelse(Dstar + rnorm(10000,0,stdev) > 0,1,0)
  mean(Y[Dnew==1]) - mean(Y[Dnew==0])
}
sdevs <- seq(0.01,20,.1)
eff <- sapply(sdevs,simME)

Plot it

plot(sdevs,eff,xlab='Amount of Measurement Error',ylab='Estimated Effect',pch=19)

Sensitivity Analysis

I’m going to walk you through how to do a generalized version of the Imbens (2003) method.
It may be easier to use one of the canned routines for your homework, though.
We’re going to keep working with Pat’s data, since we already have it handy.
Imbens process:
- Simulate (or imagine simulating) an unobserved confounder like the following:
  \(Y_d | X, U \sim \mathcal{N}(\tau d + \boldsymbol\beta'X + \delta U, \sigma^2)\)
  \(D | X, U \sim f(\boldsymbol\gamma'X + \alpha U)\) (with \(f\) known)
- That is, \(Y_1, Y_0 \perp D | X, U\)
- So we want to generate an additively linear confounder with both \(D\) and \(Y\).

Example

require(foreign)
d <- read.dta("gwdataset.dta")

## Warning in read.dta("gwdataset.dta"): value labels ('q2') for 'jan07_q2'
## are missing

zips <- read.dta("zipcodetostate.dta")
zips<-unique(zips[,c("statenum","statefromzipfile")])
pops <- read.csv("population_ests_2013.csv")
pops$state <- tolower(pops$NAME)
d$getwarmord <- as.double(d$getwarmord)
# And estimate primary model of interest:
out<-lm(getwarmord~ddt_week+educ_hsless+educ_coll+educ_postgrad+educ_dk+party_rep+party_leanrep+party_leandem+party_dem+male+raceeth_black+raceeth_hisp+raceeth_notwbh+raceeth_dkref+age_1824+age_2534+age_3544+age_5564+age_65plus+age_dk+ideo_vcons+ideo_conservative+ideo_liberal+ideo_vlib+ideo_dk+attend_1+attend_2+attend_3+attend_5+attend_6+attend_9+as.factor(doi)+as.factor(statenum)+as.factor(wbnid_num),d)

Generate a confounder

For our analysis, \(Y\) is belief in global warming and \(D\) is local variation in temperature.
We want to standardize these variables first.

d$getwarmord <- scale(d$getwarmord)
d$ddt_week <- scale(d$ddt_week)
genConfound<-function(alpha,delta) {
  e <- rnorm(nrow(d),0,1)
  U <- alpha * d$ddt_week + delta * d$getwarmord + e
  return(U)
}

So we can vary partial correlations with \(D\) and \(Y\) by varying alpha and delta.

U1<-genConfound(0,2)
U2<-genConfound(10,10)
c(D=cor(U1,d$ddt_week),Y=cor(U1,d$getwarmord))

##          D          Y 
## 0.03851302 0.89405287

c(D=cor(U2,d$ddt_week),Y=cor(U2,d$getwarmord))

##         D         Y 
## 0.7200823 0.7198210

c(D=coef(lm(paste0("ddt_week~U1+",X),d))["U1"],Y=coef(lm(paste0("getwarmord~U1+",X),d))["U1"])

##        D.U1        Y.U1 
## 0.006572182 0.387065938

c(D=coef(lm(paste0("ddt_week~U2+",X),d))["U2"],Y=coef(lm(paste0("getwarmord~U2+",X),d))["U2"])

##       D.U2       Y.U2 
## 0.03236123 0.06691239

Continued

More importantly, we can see how this changes our estimate of the treatment effect:

out <- lm(paste0("getwarmord~ddt_week+",X),d)
coef(out)["ddt_week"]

##   ddt_week 
## 0.03618393

coef(lm(paste0("getwarmord~ddt_week+U1+",X),d))["ddt_week"]

##    ddt_week 
## 0.008236237

coef(lm(paste0("getwarmord~ddt_week+U2+",X),d))["ddt_week"]

##   ddt_week 
## -0.9904723

Now we want to do this over a larger number of values of alpha and delta

alphas<-rnorm(100,0,.5)
deltas<-rnorm(100,0,.5)
results<-NULL
for(i in seq_len(length(alphas))) {
  U<-genConfound(alphas[i],deltas[i])
  corD<-cor(U,d$ddt_week)
  corY<-cor(U,d$getwarmord)
  estTE<-coef(lm(paste0("getwarmord~ddt_week+U+",X),d))["ddt_week"]
  names(estTE)<-NULL
  res<-c(estTE=estTE,corD=corD,corY=corY)
  results<-rbind(results,res)
}
results<-cbind(results,TEchange=(results[,"estTE"]-coef(out)["ddt_week"]))

resultsSens<-NULL
for(i in seq_len(length(alphas))) {
  U<-genConfound(alphas[i],deltas[i])
  corD<-cor(U,d$ddt_week)
  corY<-cor(U,d$getwarmord)
  estTE<-coef(lm(paste0("getwarmord~ddt_week+U+",Xsens),d))["ddt_week"]
  names(estTE)<-NULL
  res<-c(estTE=estTE,corD=corD,corY=corY)
  resultsSens<-rbind(resultsSens,res)
}
resultsSens<-cbind(resultsSens,TEchange=(resultsSens[,"estTE"]-coef(out)["ddt_week"]))

Plot Simulation Code

color<-ifelse(results[,"estTE"]<=.5*coef(out)["ddt_week"],"red",NA)
color<-ifelse(is.na(color) & results[,"estTE"]>=1.5*coef(out)["ddt_week"],"blue",color)
color<-ifelse(is.na(color),"green",color)
plot(results[,"corD"],results[,"corY"],col=color,xlab="correlation with D",ylab="correlation with Y",xlim=c(-1,1),ylim=c(-1,1))
vars<-strsplit(X,"[+]",perl=TRUE)[[1]]
vars<-vars[grep("factor",vars,invert=TRUE)]
for(v in vars) {
  corD<-with(d,cor(get(v),d$ddt_week))
  corY<-with(d,cor(get(v),d$getwarmord))
  points(corD,corY,pch="+",col="black")
}
abline(v=0,col="grey",lty=3)
abline(h=0,col="grey",lty=3)

Plot Sensitive Model

colorS<-ifelse(resultsSens[,"estTE"]<=.5*coef(out)["ddt_week"],"red",NA)
colorS<-ifelse(is.na(colorS) & resultsSens[,"estTE"]>=1.5*coef(out)["ddt_week"],"blue",color)
colorS<-ifelse(is.na(colorS),"green",colorS)
plot(resultsSens[,"corD"],resultsSens[,"corY"],col=color,xlab="correlation with D",ylab="correlation with Y",xlim=c(-1,1),ylim=c(-1,1))
vars<-strsplit(Xsens,"[+]",perl=TRUE)[[1]]
for(v in vars) {
  corD<-with(d,cor(get(v),d$ddt_week))
  corY<-with(d,cor(get(v),d$getwarmord))
  points(corD,corY,pch="+",col="black")
}
abline(v=0,col="grey",lty=3)
abline(h=0,col="grey",lty=3)

Plot of the Results

Blackwell (2013)

Instead, imagine a function which defines the confounding.
\(q(d,x) = E[Y_i(d)|D_i = d, X_i = x] - E[Y_i(d)|D_i = 1 - d, X_i = x]\)
Treated counterfactuals always higher (lower): \(q(d,x;\alpha) = \alpha\)
Treated group potential outcomes always higher (lower): \(q(d,x;\alpha) = \alpha(2 d - 1)\)
Package on CRAN: causalsens
You should probably use this for the homework.

Example

Remove the fixed effects to make it sensitive:

require(causalsens)

## Loading required package: causalsens

d$ddt_week<-ifelse(d$ddt_week>0,1,0)
out<-lm(paste0("getwarmord~ddt_week+",paste(vars,collapse="+")),data=d)
coef(out)["ddt_week"]

##   ddt_week 
## 0.04557408

outD<-glm(paste0("ddt_week~",paste(vars,collapse="+")),data=d,family=binomial())
alpha<-seq(-.1, .1, by = .001)
SensAnalysis<-causalsens(out,outD,as.formula(paste0("~",paste(vars,collapse="+"))),data=d,alpha=alpha,confound=one.sided)

Sensitivity Plots

par(mfrow=c(1,2))
plot(SensAnalysis,type="raw",bty="n")
plot(SensAnalysis,type="r.squared",bty="n")

Sensitivity Analysis

We’re adding to the discussion on post-treatment bias with a sensitivity analysis.
This is also in Rosenbaum (1984).
The variable which one might think could induce post-treatment bias in our example is that of “public acceptance”.

Rosenbaum Bounding

In general Rosenbaum is a proponent of trying to “bound” biases.
He does this in his “normal” sensitivity analysis method, and we do the same, here.
We will assume a “surrogate” for \(U\) (necessary for CIA), which is observed post-treatment.
The surrogate has two potential outcomes: \(S_1\) and \(S_0\)
It is presumed to have a linear response on the outcome.
(As are the other observed covariates)
This gives us the following two regression models: \(E[Y_1|S_1 = s , X = x] = \mu_1 + \beta' x + \gamma's\) and
\(E[Y_0|S_0 = s , X = x] = \mu_0 + \beta' x + \gamma's\)
This gives us:
\(\tau = E[ (\mu_1 + \beta' X + \gamma'S_1) - (\mu_0 + \beta' X + \gamma'S_0)]\)
Which is equal to the following useful expression:
\(\tau = \mu_1 - \mu_0 + \gamma'( E[S_1 - S_0])\)
For us, this means that \(\tau = \beta_1 + \beta_2 E[S_1 - S_0]\)

(Re)introduce Example

require(foreign,quietly=TRUE)
d <- read.dta("replicationdataIOLGBT.dta")
#Base specification
d$ecthrpos <- as.double(d$ecthrpos)-1
d.lm <- lm(policy~ecthrpos+pubsupport+ecthrcountry+lgbtlaws+cond+eumember0+euemploy+coemembe+lngdp+year+issue+ccode,d)
d <- d[-d.lm$na.action,]
d$issue <- as.factor(d$issue)
d$ccode <- as.factor(d$ccode)
d.lm <- lm(policy~ecthrpos+pubsupport+ecthrcountry+lgbtlaws+cond+eumember0+euemploy+coemembe+lngdp+year+issue+ccode,d)

Back to Bounding

Our surrogate is public acceptance.
But it can be swayed by court opinions, right? This is at least plausible.
Let’s try and get some reasonable bounds on \(\tau\).

sdS <- sd(d$pubsupport)
Ediff <- c(-1.5*sdS,-sdS,-sdS/2,0,sdS/2,sdS,1.5*sdS)
tau <- coef(d.lm)[2] + coef(d.lm)[3]*Ediff
names(tau)<-c("-1.5","-1","-.5","0",".5","1","1.5")
tau

##       -1.5         -1        -.5          0         .5          1 
## 0.06620715 0.06817761 0.07014808 0.07211854 0.07408901 0.07605947 
##        1.5 
## 0.07802994

But with this method, you don’t necessarily have to assume that the regression functions are this rigid.
Can you think about how one might relax some assumptions?

Bias

February 26, 2015

Measurement Error Sim

Plot it

Sensitivity Analysis

Example

Generate a confounder

Continued

More

Plot Simulation Code

Plot Sensitive Model

Plot of the Results

Blackwell (2013)

Example

Sensitivity Plots

Sensitivity Analysis

Rosenbaum Bounding

(Re)introduce Example

Back to Bounding