require(MatchIt)
data(lalonde, package = "MatchIt")
p.model <- glm(treat ~ age + educ + black + hispan + married + nodegree + re74 +
re75, lalonde, family = "binomial")
pscore <- predict(p.model, type = "response")
ipw <- lalonde$treat + (1 - lalonde$treat)/(1 - pscore)
ipw.mod <- lm(re78 ~ treat + age + educ + black + hispan + married + nodegree +
re74 + re75, lalonde, weights = ipw)
N <- nrow(lalonde)
bootstrapLalonde <- function(out = "coef") {
b.samp <- sample(N, replace = TRUE)
b.p.model <- glm(treat ~ age + educ + black + hispan + married + nodegree +
re74 + re75, lalonde[b.samp, ], family = "binomial")
b.pscore <- predict(b.p.model, type = "response")
b.ipw <- lalonde$treat[b.samp] + (1 - lalonde$treat[b.samp])/(1 - b.pscore)
b.ipw.mod <- lm(re78 ~ treat + age + educ + black + hispan + married + nodegree +
re74 + re75, lalonde[b.samp, ], weights = b.ipw)
if (out == "coef") {
out <- coef(b.ipw.mod)[2]
} else if (out == "t") {
out <- summary(b.ipw.mod)$coefficients[2, 3]
} else {
stop("Unknown output statistic.")
}
out
}
b.samps.coef <- replicate(500, bootstrapLalonde())
b.samps.t <- replicate(500, bootstrapLalonde(out = "t"))
c(coef(ipw.mod)[2], mean(b.samps.coef))
## treat
## 1332 1389
c(summary(ipw.mod)$coefficients[2, 2], sd(b.samps.coef))
## [1] 697.9 767.1
c(summary(ipw.mod)$coefficients[2, 3], mean(b.samps.t))
## [1] 1.909 1.904
dat <- read.csv(file = "GreenVavreck_PolAnalysis_2008_PA_Replication.csv", head = TRUE,
sep = ",")
f1 <- paste("tout1 ~ treat + ", paste(names(dat)[11:49], collapse = " + "))
ols1 <- lm(f1, dat)
summary(ols1)$coefficients[2, ]
## Estimate Std. Error t value Pr(>|t|)
## 0.0241120 0.0070322 3.4288223 0.0006072
robust.se <- function(model, cluster) {
require(sandwich)
require(lmtest)
M <- length(unique(cluster))
N <- length(cluster)
K <- model$rank
dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
uj <- apply(estfun(model), 2, function(x) tapply(x, cluster, sum))
rcse.cov <- dfc * sandwich(model, meat = crossprod(uj)/N)
rcse.se <- coeftest(model, rcse.cov)
return(list(rcse.cov, rcse.se))
}
robust.se(ols1, dat$syscode)[[2]][2, ]
## Note: no visible binding for global variable 'lmtest'
## Estimate Std. Error t value Pr(>|t|)
## 0.02411 0.01405 1.71577 0.08622
ses <- c(model = summary(ols1)$coefficients[2, 2], cl.robust = robust.se(ols1,
dat$syscode)[[2]][2, 2])
H <- length(unique(dat$syscode))
clus <- unique(dat$syscode)
obs.in.clus <- sapply(clus, function(h) which(dat$syscode == h))
clus <- as.character(clus)
names(obs.in.clus) <- clus
pairsBoot.GV <- function(out = "coef") {
b.samp <- sample(clus, replace = TRUE)
b.obs <- unlist(sapply(b.samp, function(h) obs.in.clus[[h]]))
b.ols <- lm(f1, dat[b.obs, ])
if (out == "coef") {
out <- coef(b.ols)[2]
} else if (out == "t") {
out <- summary(b.ols)$coefficients[2, 3]
} else {
stop("Unknown output type.")
}
out
}
b.samps.coef <- replicate(500, pairsBoot.GV())
# b.samps.t <- replicate(500, pairsBoot.GV(coef='t'))
ses <- c(ses, pairs.boot = sd(b.samps.coef))
round(ses, 3)
## model cl.robust pairs.boot
## 0.007 0.014 0.024
ols.fit <- predict(ols1)
ols.res <- residuals(ols1)
dat$syscode <- as.character(dat$syscode)
f.b <- paste("newY ~ treat + ", paste(names(dat)[11:49], collapse = " + "))
wildBoot.GV <- function(out = "coef") {
toflip <- rbinom(H, 1, 0.5)
names(toflip) <- clus
toflip <- names(toflip)[toflip == 1]
b.resid <- ifelse(dat$syscode %in% toflip, -ols.res, ols.res)
newY <- ols.fit + b.resid
b.ols <- lm(f.b, cbind(newY, dat))
if (out == "coef") {
out <- coef(b.ols)[2]
} else if (out == "t") {
out <- summary(b.ols)$coefficients[2, 3]
} else {
stop("Unknown output type.")
}
out
}
b.samps.coef <- replicate(500, wildBoot.GV())
ses <- c(ses, wild.boot = sd(b.samps.coef))
round(ses, 3)
## model cl.robust pairs.boot wild.boot
## 0.007 0.014 0.024 0.015
require(foreign)
dat <- read.dta("maketable5.dta")
dat <- subset(dat, baseco == 1)
require(AER)
first <- lm(avexpr ~ logem4 + f_brit + f_french, dat)
iv2sls <- ivreg(logpgp95 ~ avexpr + f_brit + f_french, ~logem4 + f_brit + f_french,
dat)
require(car)
linearHypothesis(first, "logem4", test = "F")
## Linear hypothesis test
##
## Hypothesis:
## logem4 = 0
##
## Model 1: restricted model
## Model 2: avexpr ~ logem4 + f_brit + f_french
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 61 117
## 2 60 94 1 23 14.7 0.00031 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(iv2sls)
##
## Call:
## ivreg(formula = logpgp95 ~ avexpr + f_brit + f_french | logem4 +
## f_brit + f_french, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2716 -0.7488 0.0728 0.7544 2.4004
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.372 1.388 0.99 0.327
## avexpr 1.078 0.218 4.95 6.3e-06 ***
## f_brit -0.778 0.354 -2.20 0.032 *
## f_french -0.117 0.355 -0.33 0.743
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.04 on 60 degrees of freedom
## Multiple R-Squared: 0.0483, Adjusted R-squared: 0.000748
## Wald test: 10.1 on 3 and 60 DF, p-value: 1.82e-05
gamma <- seq(-1, 1, 0.25)
ExclSens <- function(g) {
newY <- dat$logpgp95 - g * dat$logem4
coef(ivreg(newY ~ avexpr + f_brit + f_french, ~logem4 + f_brit + f_french,
cbind(dat, newY)))[2]
}
sens.coefs <- sapply(gamma, ExclSens)
names(sens.coefs) <- gamma
round(sens.coefs, 3)
## -1 -0.75 -0.5 -0.25 0 0.25 0.5 0.75 1
## -0.793 -0.326 0.142 0.610 1.078 1.546 2.013 2.481 2.949