shuttle = read.table("http://sites.williams.edu/bklingen/files/2012/02/challenger.txt",header=TRUE) shuttle attach(shuttle) plot(damage~temperature,xlim=c(52,83)) ## Fitting a Logistic Regression model to the space shuttle data fit.logit <- glm(damage ~ temperature, family=binomial(link=logit), data=shuttle) summary(fit.logit) vcov(fit.logit) anova(fit.logit, test = "Chisq") #Compare model to a more complicated one, i.e., inclusing a quadratic temperature effect: fit1.logit <- glm(damage ~ temperature +I(temperature^2), family=binomial(link=logit), data=shuttle) summary(fit1.logit) anova(fit1.logit, test = "Chisq") ## Plotting the fitted mean and confidence bounds: plot(damage~temperature,xlim=c(52,83)) temp.new <- seq(min(temperature),max(temperature),1) pred <- predict(fit.logit,type="resp",newdata=list(temperature=temp.new),se.fit=TRUE) #estimated probabilities points(pred$fit~temp.new,type="l", col="red") #puts fitted line on graph ## Note: se.fit=TRUE in conjunction with type="resp" returns the standard error of pi_i computed via the Delta method!!! ## I.e., (alpha_hat,beta_hat) are asympt. multivariate normal, what is the asymptotic distribution of exp(alpha-hat+beta_hat*x)/(1+exp(alpha-hat+beta_hat*x) ## Using it may result in lower bounds below 0 or upper bounds above 1. ## We are NOT using this to plot confidence limits for pi_i. Instead, see next ## To get confidence intervals for the fitted mean, get the confidence interval on the logit scale first ## and then apply the inverse link function to the lower and upper bound: pred.logit <- predict(fit.logit,type="link",newdata=list(temperature=temp.new),se.fit=TRUE) lower.logit <- pred.logit$fit - 1.96*pred.logit$se.fit upper.logit <- pred.logit$fit + 1.96*pred.logit$se.fit lower2 <- exp(lower.logit)/(1+exp(lower.logit)) upper2 <- exp(upper.logit)/(1+exp(upper.logit)) lines(lower2~temp.new,lty=2, col="brown") lines(upper2~temp.new,lty=2, col="brown") ## Fitting a naive simple linear regression model. NOT RECOMMENDED fit.SLR <- lm(damage~temperature, data=shuttle) summary(fit.SLR) pred <- predict(fit.SLR,newdata=list(temperature=temp.new),se.fit=TRUE) points(pred$fit~temp.new,type="l", col="red") #Confidence intervals on mean response: #To get the lower and upper 95% confidence intervals, simply find lower <- pred$fit - 1.96*pred$se.fit upper <- pred$fit + 1.96*pred$se.fit lines(lower~temp.new,lty=2, col="brown") lines(upper~temp.new,lty=2, col="brown") ## You can get the exact same fit by using the glm() function in R: fit1.SLR <- glm(damage~temperature, family=gaussian(link=identity), data=shuttle) summary(fit1.SLR)