The raw .Rmd file is available in the repository. You can create a handout version of it with the following commands (assuming knitr and pandoc are installed), first in R:
require(knitr)
knit(presentation.Rmd)
And then on the command line:
pandoc presentation.md -o handout.pdf
.Rmd
file.?lm
install.packages
and library
Find the appropriate packages and commands with Google and via searching in R:
?covariance
??covariance
install.packages("sandwich")
library("sandwich")
?vcovHC
str <- "This is a string"
paste("This", "is", "a", "string", sep = " ")
## [1] "This is a string"
as.character(99)
## [1] "99"
class(str)
## [1] "character"
num <- 99.867
class(num)
## [1] "numeric"
round(num, digits = 2)
## [1] 99.87
round(str, digits = 2)
## Error: non-numeric argument to mathematical function
pi
## [1] 3.142
exp(1)
## [1] 2.718
sin
, exp
, log
, factorial
, choose
, BesselJ
, etc2 == 4
## [1] FALSE
class(2 == 4)
## [1] "logical"
str != num
## [1] TRUE
"34" == 34
## [1] TRUE
==
, !=
, >
, <
, >=
, <=
, !
, &
, |
, any
, all
, etcX <- 1:100
Y <- rnorm(100, X)
out.lm <- lm(Y ~ X)
class(out.lm)
## [1] "lm"
out.lm$rank
## [1] 2
class(out.lm$rank)
## [1] "integer"
There are other ways to hold data, though:
as.vector(4)
## [1] 4
4
## [1] 4
c
:vec <- c("a", "b", "c")
vec
## [1] "a" "b" "c"
c(2, 3, vec)
## [1] "2" "3" "a" "b" "c"
c(1, 2, 3, 4) + c(1, 2)
## [1] 2 4 4 6
c(1, 2, 3, 4) + c(1, 2, 1, 2)
## [1] 2 4 4 6
c(1, 2, 3, 4) + c(1, 2, 3)
## Warning: longer object length is not a multiple of shorter object length
## [1] 2 4 6 5
vec[1]
## [1] "a"
names(vec) <- c("first", "second", "third")
vec
## first second third
## "a" "b" "c"
vec["first"]
## first
## "a"
vec[1] <- NA
vec
## first second third
## NA "b" "c"
is.na(vec)
## first second third
## TRUE FALSE FALSE
vec[!is.na(vec)] # vec[complete.cases(vec)]
## second third
## "b" "c"
listie <- list(first = vec, second = num)
listie
## $first
## first second third
## NA "b" "c"
##
## $second
## [1] 99.87
listie[[1]]
## first second third
## NA "b" "c"
listie$first
## first second third
## NA "b" "c"
A <- matrix(c(1, 2, 3, 4), nrow = 2, ncol = 2)
A
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
A[1, 2]
## [1] 3
A[1, ]
## [1] 1 3
solve(A) #A^{-1}
## [,1] [,2]
## [1,] -2 1.5
## [2,] 1 -0.5
10 * A
## [,1] [,2]
## [1,] 10 30
## [2,] 20 40
B <- diag(c(1, 2))
B
## [,1] [,2]
## [1,] 1 0
## [2,] 0 2
A %*% B
## [,1] [,2]
## [1,] 1 6
## [2,] 2 8
A %*% diag(3)
## Error: non-conformable arguments
t(A) # A'
## [,1] [,2]
## [1,] 1 2
## [2,] 3 4
rbind(A, B)
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
## [3,] 1 0
## [4,] 0 2
cbind(A, B)
## [,1] [,2] [,3] [,4]
## [1,] 1 3 1 0
## [2,] 2 4 0 2
c(1, 2, 3) %x% c(1, 1) # Kronecker Product
## [1] 1 1 2 2 3 3
rownames(A)
## NULL
rownames(A) <- c("a", "b")
colnames(A) <- c("c", "d")
A
## c d
## a 1 3
## b 2 4
A[, "d"]
## a b
## 3 4
A[3]
## [1] 3
The workhorse
Basically just a matrix that allows mixing of types.
data(iris)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
for (i in 1:5) {
cat(i * 10, " ")
}
## 10 20 30 40 50
for (i in 1:length(vec)) {
cat(vec[i], " ")
}
## NA b c
for (i in vec) {
cat(i, " ")
}
## NA b c
if (vec[2] == "b") print("Hello World!")
## [1] "Hello World!"
if (vec[3] == "a") {
print("Hello World!")
} else {
print("!dlroW olleH")
}
## [1] "!dlroW olleH"
vec <- letters[1:3]
new <- vector(length = length(vec))
for (i in 1:length(vec)) {
if (vec[i] == "b") {
new[i] <- 13
} else {
new[i] <- 0
}
}
new
## [1] 0 13 0
new <- ifelse(vec == "b", 13, 0)
new
## [1] 0 13 0
add3 <- function(X) {
return(X + 3)
}
add3(2)
## [1] 5
makeGroups <- function(groups, members = 1) {
return((1:groups) %x% rep(1, members))
}
makeGroups(5)
## [1] 1 2 3 4 5
makeGroups(5, 2)
## [1] 1 1 2 2 3 3 4 4 5 5
Note: Most functions don’t do complete case analysis by default (usually option na.rm=TRUE)
print
, cat
, paste
, with
, length
, sort
, order
, unique
, rep
, nrow
, ncol
, complete.cases
, subset
, merge
, mean
, sum
, sd
, var
, lag
,lm
, model.matrix
,coef
, vcov
, residuals
, vcovHC
(from sandwich
), ivreg
(from AER
), countrycode
(fromcountrycode
),summary
, pdf
, plot
, Tools from plm
, and many more.
?Distributions
rnorm
, dnorm
, qnorm
, pnorm
rdist
- generate random variable from distddist
- density function of distqdist
- quantile function of distpdist
- distribution function of distrnorm(16)
## [1] 0.03219 -0.89729 -0.28782 0.86993 -1.21937 1.47985 0.38488
## [8] 0.28917 -1.66721 0.23155 1.63280 0.84529 -0.87946 -0.22374
## [15] 1.35861 -0.61532
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
|
summary(vec)
## Length Class Mode
## 3 character character
summary(c(1, 2, 3, 4))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 1.75 2.50 2.50 3.25 4.00
summary(iris[, 1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.30 Min. :2.00 Min. :1.00 Min. :0.1
## 1st Qu.:5.10 1st Qu.:2.80 1st Qu.:1.60 1st Qu.:0.3
## Median :5.80 Median :3.00 Median :4.35 Median :1.3
## Mean :5.84 Mean :3.06 Mean :3.76 Mean :1.2
## 3rd Qu.:6.40 3rd Qu.:3.30 3rd Qu.:5.10 3rd Qu.:1.8
## Max. :7.90 Max. :4.40 Max. :6.90 Max. :2.5
?summary
?summary.lm
apply
- performs actions on the rows or columns of a matrix/array (1 for rows, 2 for columns, 3 for ??)sapply
- performs actions on every element of a vectortapply
- performs actions on a vector by groupreplicate
- performs the same action a given number of timesA
## c d
## a 1 3
## b 2 4
apply(A, 1, sum)
## a b
## 4 6
apply(A, 2, mean)
## c d
## 1.5 3.5
vec
## [1] "a" "b" "c"
sapply(vec, function(x) paste0(x, ".vec"))
## a b c
## "a.vec" "b.vec" "c.vec"
paste0(vec, ".vec")
## [1] "a.vec" "b.vec" "c.vec"
Why?
replicate
is basically just sapply(1:N,funct)
where funct
never uses the index.
tapply(1:10, makeGroups(5, 2), mean)
## 1 2 3 4 5
## 1.5 3.5 5.5 7.5 9.5
setwd("~/github/Quant II Recitation/2014-01-31/")
dir()
## [1] "apsrtable.png" "figure" "handout.pdf"
## [4] "iris.csv" "presentation.html" "presentation.md"
## [7] "presentation.Rmd" "stargazer.png"
iris <- read.csv("iris.csv")
If data is, for instance, a Stata .dta file, use read.dta
from the foreign
package.
Useful options for reading data: sep
, na.strings
, stringsAsFactors
For different formats, Google it.
set.seed(1023) # Important for replication
X <- rnorm(1000, 0, 5)
Y <- sin(5 * X) * exp(abs(X)) + rnorm(1000)
dat <- data.frame(X, Y)
plot(X, Y, xlim = c(0, 5), ylim = c(-50, 50))
dat.lm <- lm(Y ~ X, data = dat)
dat.lm
##
## Call:
## lm(formula = Y ~ X, data = dat)
##
## Coefficients:
## (Intercept) X
## -216634 183687
summary(dat.lm)
##
## Call:
## lm(formula = Y ~ X, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.10e+08 -4.19e+05 2.01e+05 8.17e+05 9.08e+06
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -216634 212126 -1.02 0.31
## X 183687 43470 4.23 2.6e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6710000 on 998 degrees of freedom
## Multiple R-squared: 0.0176, Adjusted R-squared: 0.0166
## F-statistic: 17.9 on 1 and 998 DF, p-value: 2.6e-05
xtable
package:require(xtable)
## Loading required package: xtable
xtable(dat.lm)
## % latex table generated in R 3.0.2 by xtable 1.7-1 package
## % Tue Jan 28 23:38:40 2014
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrr}
## \hline
## & Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\
## \hline
## (Intercept) & -216633.6722 & 212125.4622 & -1.02 & 0.3074 \\
## X & 183687.1735 & 43469.5839 & 4.23 & 0.0000 \\
## \hline
## \end{tabular}
## \end{table}
xtable
works on any sort of matrixxtable(A)
## % latex table generated in R 3.0.2 by xtable 1.7-1 package
## % Tue Jan 28 23:38:40 2014
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrr}
## \hline
## & c & d \\
## \hline
## a & 1.00 & 3.00 \\
## b & 2.00 & 4.00 \\
## \hline
## \end{tabular}
## \end{table}
xtable
does with the lm
object:class(summary(dat.lm)$coefficients)
## [1] "matrix"
xtable(summary(dat.lm)$coefficients)
## % latex table generated in R 3.0.2 by xtable 1.7-1 package
## % Tue Jan 28 23:38:40 2014
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrr}
## \hline
## & Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\
## \hline
## (Intercept) & -216633.67 & 212125.46 & -1.02 & 0.31 \\
## X & 183687.17 & 43469.58 & 4.23 & 0.00 \\
## \hline
## \end{tabular}
## \end{table}
xtable(dat.lm)
xtable
spits out:print(xtable(dat.lm, digits = 1), booktabs = TRUE)
## % latex table generated in R 3.0.2 by xtable 1.7-1 package
## % Tue Jan 28 23:38:40 2014
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrr}
## \toprule
## & Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\
## \midrule
## (Intercept) & -216633.7 & 212125.5 & -1.0 & 0.3 \\
## X & 183687.2 & 43469.6 & 4.2 & 0.0 \\
## \bottomrule
## \end{tabular}
## \end{table}
?xtable
and ?print.xtable
require(apsrtable)
## Loading required package: apsrtable
dat.lm2 <- lm(Y ~ X + 0, data = dat)
apsrtable(dat.lm, dat.lm2)
## Note: no visible binding for global variable 'se'
## Note: no visible binding for global variable 'se'
## Note: no visible binding for global variable 'nmodels'
## Note: no visible binding for global variable 'lev'
## \begin{table}[!ht]
## \caption{}
## \label{}
## \begin{tabular}{ l D{.}{.}{2}D{.}{.}{2} }
## \hline
## & \multicolumn{ 1 }{ c }{ Model 1 } & \multicolumn{ 1 }{ c }{ Model 2 } \\ \hline
## % & Model 1 & Model 2 \\
## (Intercept) & -216633.67 & \\
## & (212125.46) & \\
## X & 183687.17 ^* & 182921.71 ^*\\
## & (43469.58) & (43464.06) \\
## $N$ & 1000 & 1000 \\
## $R^2$ & 0.02 & 0.02 \\
## adj. $R^2$ & 0.02 & 0.02 \\
## Resid. sd & 6706998.86 & 6707143.05 \\ \hline
## \multicolumn{3}{l}{\footnotesize{Standard errors in parentheses}}\\
## \multicolumn{3}{l}{\footnotesize{$^*$ indicates significance at $p< 0.05 $}}
## \end{tabular}
## \end{table}
library(png)
library(grid)
img <- readPNG("apsrtable.png")
grid.raster(img)
require(stargazer)
## Loading required package: stargazer
##
## Please cite as:
##
## Hlavac, Marek (2013). stargazer: LaTeX code and ASCII text for well-formatted regression and summary statistics tables.
## R package version 4.5.3. http://CRAN.R-project.org/package=stargazer
stargazer(dat.lm, dat.lm2)
##
## % Table created by stargazer v.4.5.3 by Marek Hlavac, Harvard University. E-mail: hlavac at fas.harvard.edu
## % Date and time: Tue, Jan 28, 2014 - 23:38:48
## \begin{table}[!htbp] \centering
## \caption{}
## \label{}
## \begin{tabular}{@{\extracolsep{5pt}}lcc}
## \\[-1.8ex]\hline
## \hline \\[-1.8ex]
## & \multicolumn{2}{c}{\textit{Dependent variable:}} \\
## \cline{2-3}
## \\[-1.8ex] & \multicolumn{2}{c}{Y} \\
## \\[-1.8ex] & (1) & (2)\\
## \hline \\[-1.8ex]
## X & 183,687.000$^{***}$ & 182,922.000$^{***}$ \\
## & (43,470.000) & (43,464.000) \\
## & & \\
## Constant & $-$216,634.000 & \\
## & (212,125.000) & \\
## & & \\
## \hline \\[-1.8ex]
## Observations & 1,000 & 1,000 \\
## R$^{2}$ & 0.018 & 0.017 \\
## Adjusted R$^{2}$ & 0.017 & 0.016 \\
## Residual Std. Error & 6,706,999.000 (df = 998) & 6,707,143.000 (df = 999) \\
## F Statistic & 17.860$^{***}$ (df = 1; 998) & 17.710$^{***}$ (df = 1; 999) \\
## \hline
## \hline \\[-1.8ex]
## \textit{Note:} & \multicolumn{2}{r}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\
## \normalsize
## \end{tabular}
## \end{table}
img <- readPNG("stargazer.png")
grid.raster(img)
Both packages are good (and can be supplemented with xtable
when it is easier)
Get pretty close to what you want with these packages, and then tweak the LaTeX directly.
plot(x,y)
plots the pairs of points in x
and y
type
- determines whether you plot points, lines or whatnotpch
- determines plotting characterxlim
- x limits of the plot (likewise for y
)xlab
- label on the x-axismain
- main plot labelcol
- colorplot
. Try plot(dat.lm)
x <- seq(-1, 1, 0.01)
y <- 3/4 * (1 - x^2)
plot(x, y, type = "l", xlab = "h", ylab = "weight")
loc.lin <- function(Y, X, c = 0, bw = sd(X)/2) {
d <- (X - c)/bw
W <- 3/4 * (1 - d^2) * (abs(d) < 1)
W <- diag(W)
X <- cbind(1, d)
b <- solve(t(X) %*% W %*% X) %*% t(X) %*% W %*% Y
sigma <- t(Y - X %*% b) %*% W %*% (Y - X %*% b)/(sum(diag(W) > 0) - 2)
sigma <- solve(t(X) %*% W %*% X) * c(sigma)
return(c(est = b[1], se = sqrt(diag(sigma))[1]))
}
X.est <- seq(0, 5, 0.1)
dat.llm <- sapply(X.est, function(x) loc.lin(Y, X, c = x, bw = 0.25))
plot(X, Y, xlim = c(0, 5), ylim = c(-50, 50), pch = 20)
lines(X.est, dat.llm[1, ], col = "red")
lines(X.est, dat.llm[1, ] + 1.96 * dat.llm[2, ], col = "pink")
lines(X.est, dat.llm[1, ] - 1.96 * dat.llm[2, ], col = "pink")