The raw .Rmd file is available in the repository. You can create a handout version of it with the following commands (assuming knitr and pandoc are installed), first in R:
require(knitr)
knit(presentation.Rmd)And then on the command line:
pandoc presentation.md -o handout.pdf.Rmd file.?lminstall.packages and libraryFind the appropriate packages and commands with Google and via searching in R:
?covariance
??covariance
install.packages("sandwich")
library("sandwich")
?vcovHCstr <- "This is a string"
paste("This", "is", "a", "string", sep = " ")
## [1] "This is a string"
as.character(99)
## [1] "99"
class(str)
## [1] "character"
num <- 99.867
class(num)
## [1] "numeric"
round(num, digits = 2)
## [1] 99.87
round(str, digits = 2)
## Error: non-numeric argument to mathematical function
pi
## [1] 3.142
exp(1)
## [1] 2.718
sin, exp, log, factorial, choose, BesselJ, etc2 == 4
## [1] FALSE
class(2 == 4)
## [1] "logical"
str != num
## [1] TRUE
"34" == 34
## [1] TRUE
==, !=, >, <, >=, <=, !, &, |, any, all, etcX <- 1:100
Y <- rnorm(100, X)
out.lm <- lm(Y ~ X)
class(out.lm)
## [1] "lm"
out.lm$rank
## [1] 2
class(out.lm$rank)
## [1] "integer"
There are other ways to hold data, though:
as.vector(4)
## [1] 4
4
## [1] 4
c:vec <- c("a", "b", "c")
vec
## [1] "a" "b" "c"
c(2, 3, vec)
## [1] "2" "3" "a" "b" "c"
c(1, 2, 3, 4) + c(1, 2)
## [1] 2 4 4 6
c(1, 2, 3, 4) + c(1, 2, 1, 2)
## [1] 2 4 4 6
c(1, 2, 3, 4) + c(1, 2, 3)
## Warning: longer object length is not a multiple of shorter object length
## [1] 2 4 6 5
vec[1]
## [1] "a"
names(vec) <- c("first", "second", "third")
vec
## first second third
## "a" "b" "c"
vec["first"]
## first
## "a"
vec[1] <- NA
vec
## first second third
## NA "b" "c"
is.na(vec)
## first second third
## TRUE FALSE FALSE
vec[!is.na(vec)] # vec[complete.cases(vec)]
## second third
## "b" "c"
listie <- list(first = vec, second = num)
listie
## $first
## first second third
## NA "b" "c"
##
## $second
## [1] 99.87
listie[[1]]
## first second third
## NA "b" "c"
listie$first
## first second third
## NA "b" "c"
A <- matrix(c(1, 2, 3, 4), nrow = 2, ncol = 2)
A
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
A[1, 2]
## [1] 3
A[1, ]
## [1] 1 3
solve(A) #A^{-1}
## [,1] [,2]
## [1,] -2 1.5
## [2,] 1 -0.5
10 * A
## [,1] [,2]
## [1,] 10 30
## [2,] 20 40
B <- diag(c(1, 2))
B
## [,1] [,2]
## [1,] 1 0
## [2,] 0 2
A %*% B
## [,1] [,2]
## [1,] 1 6
## [2,] 2 8
A %*% diag(3)
## Error: non-conformable arguments
t(A) # A'
## [,1] [,2]
## [1,] 1 2
## [2,] 3 4
rbind(A, B)
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
## [3,] 1 0
## [4,] 0 2
cbind(A, B)
## [,1] [,2] [,3] [,4]
## [1,] 1 3 1 0
## [2,] 2 4 0 2
c(1, 2, 3) %x% c(1, 1) # Kronecker Product
## [1] 1 1 2 2 3 3
rownames(A)
## NULL
rownames(A) <- c("a", "b")
colnames(A) <- c("c", "d")
A
## c d
## a 1 3
## b 2 4
A[, "d"]
## a b
## 3 4
A[3]
## [1] 3
The workhorse
Basically just a matrix that allows mixing of types.
data(iris)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
for (i in 1:5) {
cat(i * 10, " ")
}
## 10 20 30 40 50
for (i in 1:length(vec)) {
cat(vec[i], " ")
}
## NA b c
for (i in vec) {
cat(i, " ")
}
## NA b c
if (vec[2] == "b") print("Hello World!")
## [1] "Hello World!"
if (vec[3] == "a") {
print("Hello World!")
} else {
print("!dlroW olleH")
}
## [1] "!dlroW olleH"
vec <- letters[1:3]
new <- vector(length = length(vec))
for (i in 1:length(vec)) {
if (vec[i] == "b") {
new[i] <- 13
} else {
new[i] <- 0
}
}
new
## [1] 0 13 0
new <- ifelse(vec == "b", 13, 0)
new
## [1] 0 13 0
add3 <- function(X) {
return(X + 3)
}
add3(2)
## [1] 5
makeGroups <- function(groups, members = 1) {
return((1:groups) %x% rep(1, members))
}
makeGroups(5)
## [1] 1 2 3 4 5
makeGroups(5, 2)
## [1] 1 1 2 2 3 3 4 4 5 5
Note: Most functions don’t do complete case analysis by default (usually option na.rm=TRUE)
print, cat, paste, with, length, sort, order, unique, rep, nrow, ncol, complete.cases, subset, merge, mean, sum, sd, var, lag,lm, model.matrix,coef, vcov, residuals, vcovHC (from sandwich), ivreg (from AER), countrycode (fromcountrycode),summary, pdf, plot, Tools from plm, and many more.
?Distributionsrnorm, dnorm, qnorm, pnormrdist - generate random variable from distddist - density function of distqdist - quantile function of distpdist - distribution function of distrnorm(16)
## [1] 0.03219 -0.89729 -0.28782 0.86993 -1.21937 1.47985 0.38488
## [8] 0.28917 -1.66721 0.23155 1.63280 0.84529 -0.87946 -0.22374
## [15] 1.35861 -0.61532
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | |
summary(vec)
## Length Class Mode
## 3 character character
summary(c(1, 2, 3, 4))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 1.75 2.50 2.50 3.25 4.00
summary(iris[, 1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.30 Min. :2.00 Min. :1.00 Min. :0.1
## 1st Qu.:5.10 1st Qu.:2.80 1st Qu.:1.60 1st Qu.:0.3
## Median :5.80 Median :3.00 Median :4.35 Median :1.3
## Mean :5.84 Mean :3.06 Mean :3.76 Mean :1.2
## 3rd Qu.:6.40 3rd Qu.:3.30 3rd Qu.:5.10 3rd Qu.:1.8
## Max. :7.90 Max. :4.40 Max. :6.90 Max. :2.5
?summary ?summary.lmapply - performs actions on the rows or columns of a matrix/array (1 for rows, 2 for columns, 3 for ??)sapply - performs actions on every element of a vectortapply - performs actions on a vector by groupreplicate - performs the same action a given number of timesA
## c d
## a 1 3
## b 2 4
apply(A, 1, sum)
## a b
## 4 6
apply(A, 2, mean)
## c d
## 1.5 3.5
vec
## [1] "a" "b" "c"
sapply(vec, function(x) paste0(x, ".vec"))
## a b c
## "a.vec" "b.vec" "c.vec"
paste0(vec, ".vec")
## [1] "a.vec" "b.vec" "c.vec"
Why?
replicate is basically just sapply(1:N,funct) where funct never uses the index.
tapply(1:10, makeGroups(5, 2), mean)
## 1 2 3 4 5
## 1.5 3.5 5.5 7.5 9.5
setwd("~/github/Quant II Recitation/2014-01-31/")
dir()
## [1] "apsrtable.png" "figure" "handout.pdf"
## [4] "iris.csv" "presentation.html" "presentation.md"
## [7] "presentation.Rmd" "stargazer.png"
iris <- read.csv("iris.csv")
If data is, for instance, a Stata .dta file, use read.dta from the foreign package.
Useful options for reading data: sep, na.strings, stringsAsFactors
For different formats, Google it.
set.seed(1023) # Important for replication
X <- rnorm(1000, 0, 5)
Y <- sin(5 * X) * exp(abs(X)) + rnorm(1000)
dat <- data.frame(X, Y)
plot(X, Y, xlim = c(0, 5), ylim = c(-50, 50))
dat.lm <- lm(Y ~ X, data = dat)
dat.lm
##
## Call:
## lm(formula = Y ~ X, data = dat)
##
## Coefficients:
## (Intercept) X
## -216634 183687
summary(dat.lm)
##
## Call:
## lm(formula = Y ~ X, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.10e+08 -4.19e+05 2.01e+05 8.17e+05 9.08e+06
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -216634 212126 -1.02 0.31
## X 183687 43470 4.23 2.6e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6710000 on 998 degrees of freedom
## Multiple R-squared: 0.0176, Adjusted R-squared: 0.0166
## F-statistic: 17.9 on 1 and 998 DF, p-value: 2.6e-05
xtable package:require(xtable)
## Loading required package: xtable
xtable(dat.lm)
## % latex table generated in R 3.0.2 by xtable 1.7-1 package
## % Tue Jan 28 23:38:40 2014
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrr}
## \hline
## & Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\
## \hline
## (Intercept) & -216633.6722 & 212125.4622 & -1.02 & 0.3074 \\
## X & 183687.1735 & 43469.5839 & 4.23 & 0.0000 \\
## \hline
## \end{tabular}
## \end{table}
xtable works on any sort of matrixxtable(A)
## % latex table generated in R 3.0.2 by xtable 1.7-1 package
## % Tue Jan 28 23:38:40 2014
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrr}
## \hline
## & c & d \\
## \hline
## a & 1.00 & 3.00 \\
## b & 2.00 & 4.00 \\
## \hline
## \end{tabular}
## \end{table}
xtable does with the lm object:class(summary(dat.lm)$coefficients)
## [1] "matrix"
xtable(summary(dat.lm)$coefficients)
## % latex table generated in R 3.0.2 by xtable 1.7-1 package
## % Tue Jan 28 23:38:40 2014
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrr}
## \hline
## & Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\
## \hline
## (Intercept) & -216633.67 & 212125.46 & -1.02 & 0.31 \\
## X & 183687.17 & 43469.58 & 4.23 & 0.00 \\
## \hline
## \end{tabular}
## \end{table}
xtable(dat.lm)xtable spits out:print(xtable(dat.lm, digits = 1), booktabs = TRUE)
## % latex table generated in R 3.0.2 by xtable 1.7-1 package
## % Tue Jan 28 23:38:40 2014
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrr}
## \toprule
## & Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\
## \midrule
## (Intercept) & -216633.7 & 212125.5 & -1.0 & 0.3 \\
## X & 183687.2 & 43469.6 & 4.2 & 0.0 \\
## \bottomrule
## \end{tabular}
## \end{table}
?xtable and ?print.xtablerequire(apsrtable)
## Loading required package: apsrtable
dat.lm2 <- lm(Y ~ X + 0, data = dat)
apsrtable(dat.lm, dat.lm2)
## Note: no visible binding for global variable 'se'
## Note: no visible binding for global variable 'se'
## Note: no visible binding for global variable 'nmodels'
## Note: no visible binding for global variable 'lev'
## \begin{table}[!ht]
## \caption{}
## \label{}
## \begin{tabular}{ l D{.}{.}{2}D{.}{.}{2} }
## \hline
## & \multicolumn{ 1 }{ c }{ Model 1 } & \multicolumn{ 1 }{ c }{ Model 2 } \\ \hline
## % & Model 1 & Model 2 \\
## (Intercept) & -216633.67 & \\
## & (212125.46) & \\
## X & 183687.17 ^* & 182921.71 ^*\\
## & (43469.58) & (43464.06) \\
## $N$ & 1000 & 1000 \\
## $R^2$ & 0.02 & 0.02 \\
## adj. $R^2$ & 0.02 & 0.02 \\
## Resid. sd & 6706998.86 & 6707143.05 \\ \hline
## \multicolumn{3}{l}{\footnotesize{Standard errors in parentheses}}\\
## \multicolumn{3}{l}{\footnotesize{$^*$ indicates significance at $p< 0.05 $}}
## \end{tabular}
## \end{table}
library(png)
library(grid)
img <- readPNG("apsrtable.png")
grid.raster(img)
require(stargazer)
## Loading required package: stargazer
##
## Please cite as:
##
## Hlavac, Marek (2013). stargazer: LaTeX code and ASCII text for well-formatted regression and summary statistics tables.
## R package version 4.5.3. http://CRAN.R-project.org/package=stargazer
stargazer(dat.lm, dat.lm2)
##
## % Table created by stargazer v.4.5.3 by Marek Hlavac, Harvard University. E-mail: hlavac at fas.harvard.edu
## % Date and time: Tue, Jan 28, 2014 - 23:38:48
## \begin{table}[!htbp] \centering
## \caption{}
## \label{}
## \begin{tabular}{@{\extracolsep{5pt}}lcc}
## \\[-1.8ex]\hline
## \hline \\[-1.8ex]
## & \multicolumn{2}{c}{\textit{Dependent variable:}} \\
## \cline{2-3}
## \\[-1.8ex] & \multicolumn{2}{c}{Y} \\
## \\[-1.8ex] & (1) & (2)\\
## \hline \\[-1.8ex]
## X & 183,687.000$^{***}$ & 182,922.000$^{***}$ \\
## & (43,470.000) & (43,464.000) \\
## & & \\
## Constant & $-$216,634.000 & \\
## & (212,125.000) & \\
## & & \\
## \hline \\[-1.8ex]
## Observations & 1,000 & 1,000 \\
## R$^{2}$ & 0.018 & 0.017 \\
## Adjusted R$^{2}$ & 0.017 & 0.016 \\
## Residual Std. Error & 6,706,999.000 (df = 998) & 6,707,143.000 (df = 999) \\
## F Statistic & 17.860$^{***}$ (df = 1; 998) & 17.710$^{***}$ (df = 1; 999) \\
## \hline
## \hline \\[-1.8ex]
## \textit{Note:} & \multicolumn{2}{r}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\
## \normalsize
## \end{tabular}
## \end{table}
img <- readPNG("stargazer.png")
grid.raster(img)
Both packages are good (and can be supplemented with xtable when it is easier)
Get pretty close to what you want with these packages, and then tweak the LaTeX directly.
plot(x,y) plots the pairs of points in x and ytype - determines whether you plot points, lines or whatnotpch - determines plotting characterxlim - x limits of the plot (likewise for y)xlab - label on the x-axismain - main plot labelcol - colorplot. Try plot(dat.lm)x <- seq(-1, 1, 0.01)
y <- 3/4 * (1 - x^2)
plot(x, y, type = "l", xlab = "h", ylab = "weight")
loc.lin <- function(Y, X, c = 0, bw = sd(X)/2) {
d <- (X - c)/bw
W <- 3/4 * (1 - d^2) * (abs(d) < 1)
W <- diag(W)
X <- cbind(1, d)
b <- solve(t(X) %*% W %*% X) %*% t(X) %*% W %*% Y
sigma <- t(Y - X %*% b) %*% W %*% (Y - X %*% b)/(sum(diag(W) > 0) - 2)
sigma <- solve(t(X) %*% W %*% X) * c(sigma)
return(c(est = b[1], se = sqrt(diag(sigma))[1]))
}
X.est <- seq(0, 5, 0.1)
dat.llm <- sapply(X.est, function(x) loc.lin(Y, X, c = x, bw = 0.25))
plot(X, Y, xlim = c(0, 5), ylim = c(-50, 50), pch = 20)
lines(X.est, dat.llm[1, ], col = "red")
lines(X.est, dat.llm[1, ] + 1.96 * dat.llm[2, ], col = "pink")
lines(X.est, dat.llm[1, ] - 1.96 * dat.llm[2, ], col = "pink")