########################
# Lecture 5
# Hypothesis Testing
# POLS 509: The Linear Model
# Dr. Justin Esarey
########################

rm(list=ls())
set.seed(123456)

# show a distribution
###############################

beta.hat<-seq(from=-2, to=5, by=0.01)
pdf.of.beta.hat<-dnorm(beta.hat, mean=1.5, sd=1)
plot(pdf.of.beta.hat~beta.hat, type="l", main="Distribution of Estimated Beta.hat")
abline(v=0, lty=2)
dev.copy2eps(file="beta_hat_dist.eps")



# visual examples for hypothesis testing
###############################

null<-seq(from=-3, to=3, by=0.01)
pdf.of.null<-dnorm(null, mean=0, sd=1)
plot(pdf.of.null~null, type="l", main="Distribution of beta-hat under null", xlab="beta hat", ylab="f(beta-hat)")
abline(v=1.5, lty=2)
abline(v=0, lty=3)
shade.x<-c(1.51,seq(from=1.51, to=3, by=0.01),3)
shade.y<-c(0, dnorm(seq(from=1.51, to=3, by=0.01), mean=0, sd=1)-0.00075,0)
polygon(shade.x, shade.y, border=NA, col="gray")
legend("topleft", lty=c(3,2), legend=c("null beta", "est. beta-hat")) 
dev.copy2eps(file="null.eps")



null<-seq(from=-3, to=3, by=0.01)
pdf.of.null<-dnorm(null, mean=0, sd=1)
plot(pdf.of.null~null, type="l", main="Distribution of z under null", xlab="beta hat", ylab="f(z)")
abline(v=1.5, lty=2)
abline(v=0, lty=3)
shade.x<-c(1.51,seq(from=1.51, to=3, by=0.01),3)
shade.y<-c(0, dnorm(seq(from=1.51, to=3, by=0.01), mean=0, sd=1)-0.00075,0)
polygon(shade.x, shade.y, border=NA, col="gray")
legend("topleft", lty=c(3,2), legend=c("z(null)", "z(est. beta-hat)")) 
dev.copy2eps(file="null_z.eps")



null<-seq(from=-3, to=3, by=0.01)
pdf.of.null<-dnorm(null, mean=0, sd=1)
plot(pdf.of.null~null, type="l", main="Distribution of z under null", xlab="beta hat", ylab="f(z)")
abline(v=1.5, lty=2)
abline(v=0, lty=3)
shade.x<-c(1.51,seq(from=1.51, to=3, by=0.01),3)
shade.y<-c(0, dnorm(seq(from=1.51, to=3, by=0.01), mean=0, sd=1)-0.00075,0)
polygon(shade.x, shade.y, border=NA, col="gray")
shade.x<-c(-3,seq(from=-3, to=-1.51, by=0.01),-1.51)
shade.y<-c(0, dnorm(seq(from=-3, to=-1.51, by=0.01), mean=0, sd=1)-0.0025,0)
polygon(shade.x, shade.y, border=NA, col="gray")
legend("topleft", lty=c(3,2), legend=c("z(null)", "z(est. beta-hat)")) 
dev.copy2eps(file="z_twotail.eps")



# show relevant distributions
###############################

# normal distribution
z<-seq(from=-3, to=3, by=0.01)
par(mfrow=c(1,2))
plot(dnorm(z, mean=0, sd=1)~z, type="l", main="Normal Distribution PDF, mu=0, sd=1", ylab="density")
plot(pnorm(z, mean=0, sd=1)~z, type="l", main="Normal Distribution CDF, mu=0, sd=1", ylab="probability")

# chi squared distribution
par(mfrow=c(1,1))
z<-seq(from=0, to=10, by=0.01)
plot(dchisq(z, 2)~z, type="l", ylab="density", main="Chi Squared PDF")
lines(dchisq(z, 3)~z, lty=2)
lines(dchisq(z, 7)~z, lty=3)
legend("topright", legend=c("m=2","m=3","m=7"), lty=c(1,2,3))

# t distribution
z<-seq(from=-3, to=3, by=0.01)
par(mfrow=c(1,1))
plot(dt(z, df=1)~z, type="l", ylim=c(0, 0.4), main="T Distribution PDF", ylab="density")
lines(dt(z, df=5)~z, lty=2)
lines(dt(z, df=10)~z, lty=3)
lines(dt(z, df=30)~z, lty=4)
legend("topright", legend=c("df=1","df=5","df=10","df=30"), lty=c(1:4))
dev.copy2eps(file="t-dist.eps")


# F testing
###############################

# F distribution

z<-seq(from=0.01, to=5, by=0.01)
par(mfrow=c(1,1))
plot(df(z, df1=1, df2=30)~z,, ylim=c(0,2), type="l", main="F distribution PDF, n-k=30", ylab="density")
lines(df(z, df1=3, df2=30)~z, lty=2)
lines(df(z, df1=5, df2=30)~z, lty=3)
lines(df(z, df1=10, df2=30)~z, lty=4)
legend("topright", legend=c("df1=1","df1=3","df1=5","df1=10"), lty=c(1:4))
dev.copy2eps(file="f-dist.eps")



# tests of simple hypotheses
###############################

rm(list=ls())
require(foreign)
set.seed(123456)
x<-runif(100, min=0, max=10)
y<-2*x+1+rnorm(100, mean=0, sd=1.5)
z<-runif(100, min=0, max=10)
w<-runif(100, min=0, max=10)
example.data<-as.data.frame(cbind(y,w,x,z))
write.dta(example.data, file="lecture5data.dta")

rm(list=ls())
example.data<-read.dta(file="lecture5data.dta")
attach(example.data)

model<-lm(y~x)

# conduct t-tests of each coefficient
summary(model)

# show that the standard errors come from the vcv matrix
vcov(model)
sqrt(diag(vcov(model)))


# conduct an F-test for multiple restrictions
model2<-lm(y~w+x+z)

anova(model, model2)





####################################################################