########################
# Lecture 6
# Compled Hypothesis Testing and Interaction
# POLS 509: The Linear Model
# Dr. Justin Esarey
########################

rm(list=ls())
set.seed(20985)

# Power Analysis
########################

null<-seq(from=-3, to=3, by=0.01)
pdf.of.null<-dnorm(null, mean=0, sd=1)
plot(pdf.of.null~null, type="l", main="Distribution of beta hat under null", xlab="beta hat", ylab="f(b.hat)")
abline(v=1.5, lty=2)
abline(v=0, lty=3)
shade.x<-c(1.51,seq(from=1.51, to=3, by=0.01),3)
shade.y<-c(0, dnorm(seq(from=1.51, to=3, by=0.01), mean=0, sd=1)-0.00075,0)
polygon(shade.x, shade.y, border=NA, col="gray")
legend("topleft", lty=c(3,2), legend=c("null beta", "est. beta-hat")) 
dev.copy2eps(file="null.eps")

null<-seq(from=-3, to=3, by=0.01)
pdf.of.null<-dnorm(null, mean=1.5, sd=1)
plot(pdf.of.null~null, type="l", main="Distribution of beta hat under alternative", xlab="beta hat", ylab="f(b.hat)")
shade.x<-c(-3,seq(from=-3, to=1.65, by=0.01),1.65)
shade.y<-c(0, dnorm(seq(from=-3, to=1.65, by=0.01), mean=1.5, sd=1)-0.00075,0)
polygon(shade.x, shade.y, border=NA, col="gray")
abline(v=1.65, lty=2)
abline(v=1.5, lty=3)
legend("topleft", lty=c(3,2), legend=c("est. beta-hat", "critical b(t) for significance")) 
dev.copy2eps(file="alternative.eps")


null<-seq(from=-3, to=3, by=0.01)
pdf.of.null<-dnorm(null, mean=0, sd=1)
pdf.of.alt<-dnorm(null, mean=1.5, sd=1)
plot(1, type="n", axes=T, main="Power/Size Tradeoff", xlim=c(-3,3), ylim=c(0, 0.4), xlab="beta hat", ylab="f(b.hat)")
shade.x<-c(1.96,seq(from=1.96, to=3, by=0.01),3)
shade.y<-c(0, dnorm(seq(from=1.96, to=3, by=0.01), mean=0, sd=1),0)
polygon(shade.x, shade.y, border=NA, col="coral")
shade.x<-c(-3,seq(from=-3, to=1.96, by=0.01),1.96)
shade.y<-c(0, dnorm(seq(from=-3, to=1.96, by=0.01), mean=1.5, sd=1),0)
polygon(shade.x, shade.y, border=NA, col="cadetblue")
lines(pdf.of.null~null, col="red")
lines(pdf.of.alt~null, col="blue")
legend("topleft", col=c("red", "blue", "black"), lty=c(1,1,2), legend=c("f(b.hat under null)","f(b.hat|b.hat=1.5)", "critical b(t)=1.96"))
abline(v=1.96, lty=2)
dev.copy2eps(file="power-size.eps")

null<-seq(from=-3, to=3, by=0.01)
pdf.of.null<-dnorm(null, mean=0, sd=1)
pdf.of.alt<-dnorm(null, mean=1.5, sd=1)
plot(1, type="n", axes=T, main="Power/Size Tradeoff", xlim=c(-3,3), ylim=c(0, 0.4), xlab="beta hat", ylab="f(b.hat)")
shade.x<-c(1.645,seq(from=1.645, to=3, by=0.01),3)
shade.y<-c(0, dnorm(seq(from=1.645, to=3, by=0.01), mean=0, sd=1),0)
polygon(shade.x, shade.y, border=NA, col="coral")
shade.x<-c(-3,seq(from=-3, to=1.645, by=0.01),1.645)
shade.y<-c(0, dnorm(seq(from=-3, to=1.645, by=0.01), mean=1.5, sd=1),0)
polygon(shade.x, shade.y, border=NA, col="cadetblue")
lines(pdf.of.null~null, col="red")
lines(pdf.of.alt~null, col="blue")
legend("topleft", col=c("red", "blue", "black"), lty=c(1,1,2), legend=c("f(b.hat under null)","f(b.hat|b.hat=1.5)", "critical b(t)=1.645"))
abline(v=1.645, lty=2)
dev.copy2eps(file="power-size2.eps")


null<-seq(from=-3, to=3, by=0.01)
pdf.of.null<-dnorm(null, mean=0, sd=1)
pdf.of.alt<-dnorm(null, mean=1.5, sd=1)
plot(1, type="n", axes=T, main="Power/Size Tradeoff", xlim=c(-3,3), ylim=c(0, 0.4), xlab="beta hat", ylab="f(b.hat)")
shade.x<-c(1,seq(from=1, to=3, by=0.01),3)
shade.y<-c(0, dnorm(seq(from=1, to=3, by=0.01), mean=0, sd=1),0)
polygon(shade.x, shade.y, border=NA, col="coral")
shade.x<-c(-3,seq(from=-3, to=1, by=0.01),1)
shade.y<-c(0, dnorm(seq(from=-3, to=1, by=0.01), mean=1.5, sd=1),0)
polygon(shade.x, shade.y, border=NA, col="cadetblue")
lines(pdf.of.null~null, col="red")
lines(pdf.of.alt~null, col="blue")
legend("topleft", col=c("red", "blue", "black"), lty=c(1,1,2), legend=c("f(b.hat under null)","f(b.hat|b.hat=1.5)", "critical b(t)=1"))
abline(v=1, lty=2)
dev.copy2eps(file="power-size3.eps")



# example power analysis

n<-50
x<-runif(n, min=0, max=10)
z<-runif(n, min=0, max=10)
y<-2*x+1.5*z+1+rnorm(n, mean=0, sd=1)

model<-lm(y~x+z)
summary(model)
summary(model)$coefficients
pt(1.96-summary(model)$coefficients[1,3], df=n-3)
# 34.6% probability of false negatives




# Asymptotic Testing
########################

set.seed(23123)

# start with a tiny sample, bimodal error
p.store.x<-c()
p.store.z<-c()
n<-10
sims=5000
pb<-txtProgressBar(min=0,max=sims,initial=0,style=3)
for(i in 1:sims){

  setTxtProgressBar(pb,value=i)
  
	x<-runif(n, min=0, max=10)
	z<-runif(n, min=0, max=10)
	u<-rnorm(n, mean=1.5, sd=0.5)*sign(runif(n,min=-0.5,max=0.5))	# bimodal error, mean zero
	
	y<-2*x+1+u

	model<-lm(y~x+z)
	p.store.x[i]<-summary(model)$coefficients[2,4]
	p.store.z[i]<-summary(model)$coefficients[3,4]
	
}
close(pb)

sum(p.store.x<0.05)/sims
sum(p.store.z<0.05)/sims




# tiny sample, uniform error
p.store.x<-c()
p.store.z<-c()
n<-10
sims=5000
pb<-txtProgressBar(min=0,max=sims,initial=0,style=3)
for(i in 1:sims){
  
  setTxtProgressBar(pb,value=i)
  
  x<-runif(n, min=0, max=10)
  z<-runif(n, min=0, max=10)
  u<-runif(n,min=-2,max=2)
  
  y<-2*x+1+u
  
  model<-lm(y~x+z)
  p.store.x[i]<-summary(model)$coefficients[2,4]
  p.store.z[i]<-summary(model)$coefficients[3,4]
  
}
close(pb)

sum(p.store.x<0.05)/sims
sum(p.store.z<0.05)/sims




# tiny sample, F-distributed error
p.store.x<-c()
p.store.z<-c()
n<-10
sims=5000
pb<-txtProgressBar(min=0,max=sims,initial=0,style=3)
for(i in 1:sims){
  
  setTxtProgressBar(pb,value=i)
  
  x<-runif(n, min=0, max=10)
  z<-runif(n, min=0, max=10)
  u<-rf(n, df1=1, df2=100)-(100/98)   # f-distributed error, mean zero
  
  y<-2*x+1+u
  
  model<-lm(y~x+z)
  p.store.x[i]<-summary(model)$coefficients[2,4]
  p.store.z[i]<-summary(model)$coefficients[3,4]
  
}
close(pb)

sum(p.store.x<0.05)/sims
sum(p.store.z<0.05)/sims



# now a slightly larger sample
p.store.x<-c()
p.store.z<-c()
sims<-5000
n<-100
pb<-txtProgressBar(min=0,max=sims,initial=0,style=3)

for(i in 1:sims){
  
  setTxtProgressBar(pb,value=i)
  
	x<-runif(n, min=0, max=10)
	z<-runif(n, min=0, max=10)
	u<-rf(n, df1=1, df2=100)-(100/98) 	# f-distributed error, mean zero
	
	y<-2*x+1+u

	model<-lm(y~x+z)
	p.store.x[i]<-summary(model)$coefficients[2,4]
	p.store.z[i]<-summary(model)$coefficients[3,4]
	
}
close(pb)

sum(p.store.x<0.05)/sims
sum(p.store.z<0.05)/sims



# try an "infinite" sample
p.store.x<-c()
p.store.z<-c()
sims<-5000
n<-1000
pb<-txtProgressBar(min=0,max=sims,initial=0,style=3)

for(i in 1:sims){
  
  setTxtProgressBar(pb,value=i)
  
  x<-runif(n, min=0, max=10)
  z<-runif(n, min=0, max=10)
  u<-rf(n, df1=1, df2=100)-(100/98) 	# f-distributed error, mean zero
  
  y<-2*x+1+u
  
  model<-lm(y~x+z)
  p.store.x[i]<-summary(model)$coefficients[2,4]
  p.store.z[i]<-summary(model)$coefficients[3,4]
  
}
close(pb)

sum(p.store.x<0.05)/sims
sum(p.store.z<0.05)/sims




# tests of complex hypotheses
###############################
rm(list=ls())
set.seed(4589)

n<-100
x<-c(rep(0, 33), rep(1, 33), rep(0, 34))
z<-c(rep(0, 33), rep(0, 33), rep(1, 34))
y<-2*x+1.5*z+1+rnorm(n, mean=0, sd=1)

data<-as.data.frame(cbind(y,x,z))

model<-lm(y~x+z)

# Approach 1: t-test
###############

# get the VCV of the model
vcv.model<-vcov(model)

# test B1-B2=0 using t-test
t.stat<-(model$coefficients[2]-model$coefficients[3])/sqrt(vcv.model[2,2]+vcv.model[3,3]-2*vcv.model[2,3])
p.val<-1-pt(t.stat, df=97)
cat("t-statistic: ", t.stat, "\n")
cat("two-tailed p-value: ", 2*p.val, "\n")
# print standard error
sqrt(vcv.model[2,2]+vcv.model[3,3]-2*vcv.model[2,3])


# Approach 2: f-test
###############
# model 1: unrestricted
model<-lm(y~x+z)

# model 2: restricted
X<-x|z
model2<-lm(y~X)

anova(model,model2)



# Approach 3: simulation out of the asymptotic distribution
###############

require(mvtnorm)
draws<-rmvnorm(1000, mean=model$coefficients, sigma=vcv.model)
# show you what this looks like
plot(draws[,2]~draws[,3])
difs<-draws[,2]-draws[,3]
plot(density(difs), xlab="beta_x - beta_z", main="Density of b_x - b_z")
quantile(difs, probs=c(0.025, 0.975))



# Approach 4: nonparametric bootstrap
###############

set.seed(21399)
beta.boot<-matrix(data=NA, nrow=1000, ncol=3)
v<-1:length(y)
for(i in 1:1000){

	j<-sample(v, size=length(v), replace=T)	
	data.samp<-data[j,]
	beta.boot[i,]<-lm(data.samp[,1]~as.matrix(data.samp[,2:3]))$coefficients
}

apply(beta.boot, MARGIN=2, FUN=mean)

difs<-beta.boot[,2]-beta.boot[,3]
quantile(difs, probs=c(0.025, 0.975))


#############################
# Interaction Model Analysis
#############################

rm(list=ls())
set.seed(2347823)

# make some data
X<-runif(50, min=-10, max=10)
Z<-runif(50, min=-10, max=10)
XZ<-X*Z
const<-rep(1, 50)
b<-c(0.123, 2.542, -2.843, -0.485)
y<-cbind(const, X, Z, XZ)%*%b+rnorm(50, mean=0, sd=15)


# save this data
require(foreign)
data<-as.data.frame(cbind(y, X, Z))
names(data)<-c("y", "X", "Z")
write.dta(data, file="lecture6interaction.dta")



# analyze data from scratch
#############################

# clear memory
rm(list=ls())

# read in data
data<-read.dta(file="lecture6interaction.dta")
attach(data)

# run a linear regression 
model<-lm(y~X*Z)
summary(model)

# coefficient on X*Z is sufficient to test whether X changes the relationship between y and Z (and vice versa)
# but how do we test for the statistical significance of the relationship between y and X (or y and Z)?

# recover the coefficients from the model
coef<-coefficients(model)
# recover the vcv matrix
vcv<-vcov(model)

# create a two panel plot
par(mfrow=c(1,2))

# plot derivative values
Z.fits<-seq(from=-10, to=10, by=0.01)
# this is the main effect
dYdX<-coef[2]+Z.fits*coef[4]
plot(dYdX~Z.fits, type="l", ylim=c(-6, 12), xlab="Z", ylab="dY/dX", main="Analytic Calculation")

# what is the standard error of this quantity?
# exact calculation 
# fixed the standard error here, which used the wrong formula at first
dYdX.se<-sqrt(vcv[2,2]+Z.fits^2*vcv[4,4]+2*(Z.fits)*vcv[2,4])
# 95% CI
lines(dYdX+1.96*dYdX.se~Z.fits, lty=2)
lines(dYdX-1.96*dYdX.se~Z.fits, lty=2)
# show the zero line
abline(h=0, lty=3)


# draw out of the asymptotic distribution of betas, fixed sigma (hat)
plot(dYdX~Z.fits, type="l", ylim=c(-6, 12), xlab="Z", ylab="dY/dX", main="Simulation")
require(mvtnorm)
beta.draws<-rmvnorm(1000, mean=coef, sigma=vcv) # note: mean and vcv come out of regression results
dYdX.draws<-beta.draws[,2]+as.matrix(Z.fits)%*%beta.draws[,4]
dYdX.lo<-apply(dYdX.draws, MARGIN=1, FUN=quantile, probs=0.025)
dYdX.hi<-apply(dYdX.draws, MARGIN=1, FUN=quantile, probs=0.975)


# plot the 95% CI of dYdX
lines(dYdX.lo~Z.fits, lty=2)
lines(dYdX.hi~Z.fits, lty=2)
# show the zero line
abline(h=0, lty=3)


# some variation - could smooth this out... redo all the plots
plot(dYdX~Z.fits, type="l", ylim=c(-6, 12), xlab="Z", ylab="dY/dX", main="Analytic Calculation")
lines(dYdX+1.96*dYdX.se~Z.fits, lty=2)
lines(dYdX-1.96*dYdX.se~Z.fits, lty=2)
abline(h=0, lty=3)
plot(dYdX~Z.fits, type="l", ylim=c(-6, 12), xlab="Z", ylab="dY/dX", main="Simulation")

# now use loess smoothing
dYdX.lo.loess<-loess(dYdX.lo~Z.fits)
dYdX.lo.smooth<-predict(dYdX.lo.loess, newdata=Z.fits)
dYdX.hi.loess<-loess(dYdX.hi~Z.fits)
dYdX.hi.smooth<-predict(dYdX.hi.loess, newdata=Z.fits)
lines(dYdX.lo.smooth~Z.fits, lty=2)
lines(dYdX.hi.smooth~Z.fits, lty=2)

# add a legend
legend("topright", legend=c("dY/dX", "95% CI"), lty=c(1,2))

# show the zero line
abline(h=0, lty=3)




# squared terms
############################
rm(list=ls)
set.seed(3249023)

n<-100
x<-runif(n, min=-10, max=10)
xsq<-x^2
y<-1+x+0.15*xsq+rnorm(n, mean=0, sd=1)
plot(y~x, xlim=c(-10,10))

xx<-seq(from=-10, to=10, by=0.1)
xxsq<-xx^2
yy<-1+xx+0.15*xxsq
lines(yy~xx)