crab <- read.table("http://sites.williams.edu/bklingen/files/2012/02/crab.txt",header=TRUE) crab crab$sat <- crab$satellite>0 attach(crab) fit1 <- glm(sat~weight, family=binomial(link=logit)) summary(fit1) anova(fit1,test="Chisq") #Goodness of fit: Cannot use Deviance directly, need to group data summary(weight) weight.grouped=(weight<=2000)+2*(weight>2000 & weight<=2350)+3*(weight>2350 & weight<=2850)+4*(weight>2850) #better to use more categories luis=table(weight.grouped,sat) #shows number of crabs with and without satellites in each weight group n=rowSums(luis) # number of crabs in each weight group weight1=c(1825,2175,2500,3000) #midpoints of each weight group pi.hat <- predict(fit1,type="resp",data.frame(weight=weight1)) #predicted success probability in each weight group, treat as constant within each group (not entirely true) obs=matrix(luis,1,8) exp=c(n*(1-pi.hat),n*pi.hat) X2=sum((obs-exp)^2/exp) 1-pchisq(X2,2) #P-value for GoF with X2 G2=sum(obs*log(obs/exp)) 1-pchisq(G2,2) #P-value for GoF with G2 # add color fit2 <- glm(sat~weight+factor(color), family=binomial(link=logit)) summary(fit2) anova(fit2,test="Chisq") weight.new=seq(1000,5300,50) fit.col1 <- predict(fit2,type="response",newdata=data.frame(weight=weight.new,color=1)) fit.col2 <- predict(fit2,type="response",newdata=data.frame(weight=weight.new,color=2)) fit.col3 <- predict(fit2,type="response",newdata=data.frame(weight=weight.new,color=3)) fit.col4 <- predict(fit2,type="response",newdata=data.frame(weight=weight.new,color=4)) plot(fit.col1~weight.new,col=gray(0.9),type="l",xlab="weight",ylab="Est. Prob of a satellite", ylim=c(0,1)) lines(fit.col2~weight.new,col=gray(0.7)) lines(fit.col3~weight.new,col=gray(0.4)) lines(fit.col4~weight.new,col=gray(0.2)) legend("bottomright", legend=c("light", "medium-light", "medium-dark", "dark"), lty=1, col=gray(c(0.9,0.7,0.4,0.2))) #treat color as ordinal with scores 1,2,3,4: fit3 <- glm(sat~weight+color, family=binomial(link=logit)) summary(fit3) anova(fit3,test="Chisq") fit.col1 <- predict(fit3,type="response",newdata=data.frame(weight=weight.new,color=1)) fit.col2 <- predict(fit3,type="response",newdata=data.frame(weight=weight.new,color=2)) fit.col3 <- predict(fit3,type="response",newdata=data.frame(weight=weight.new,color=3)) fit.col4 <- predict(fit3,type="response",newdata=data.frame(weight=weight.new,color=4)) plot(fit.col1~weight.new,col=gray(0.9),type="l",xlab="weight",ylab="Est. Prob of a satellite", ylim=c(0,1)) lines(fit.col2~weight.new,col=gray(0.7)) lines(fit.col3~weight.new,col=gray(0.4)) lines(fit.col4~weight.new,col=gray(0.2)) legend("bottomright", legend=c("light", "medium-light", "medium-dark", "dark"), lty=1, col=gray(c(0.9,0.7,0.4,0.2))) #necessary to include interaction between color and weight? fit4 <- glm(sat~weight+factor(color)+weight*factor(color), family=binomial(link=logit)) summary(fit4) anova(fit4,test="Chisq") fit.col1 <- predict(fit4,type="link",newdata=data.frame(weight=weight.new,color=1)) fit.col2 <- predict(fit4,type="link",newdata=data.frame(weight=weight.new,color=2)) fit.col3 <- predict(fit4,type="link",newdata=data.frame(weight=weight.new,color=3)) fit.col4 <- predict(fit4,type="link",newdata=data.frame(weight=weight.new,color=4)) plot(fit.col1~weight.new,col=gray(0.9),type="l",xlab="weight",ylab="Est. log-odds of a satellite", ylim=c(-1,5)) lines(fit.col2~weight.new,col=gray(0.7)) lines(fit.col3~weight.new,col=gray(0.4)) lines(fit.col4~weight.new,col=gray(0.2)) legend("topleft", legend=c("light", "medium-light", "medium-dark", "dark"), lty=1, col=gray(c(0.9,0.7,0.4,0.2))) #, bty="0", bg="white") fit.col1 <- predict(fit4,type="response",newdata=data.frame(weight=weight.new,color=1)) fit.col2 <- predict(fit4,type="response",newdata=data.frame(weight=weight.new,color=2)) fit.col3 <- predict(fit4,type="response",newdata=data.frame(weight=weight.new,color=3)) fit.col4 <- predict(fit4,type="response",newdata=data.frame(weight=weight.new,color=4)) plot(fit.col1~weight.new,col=gray(0.9),type="l",xlab="weight",ylab="Est. Prob. of a satellite",ylim=c(0,1)) lines(fit.col2~weight.new,col=gray(0.7)) lines(fit.col3~weight.new,col=gray(0.4)) lines(fit.col4~weight.new,col=gray(0.2)) legend("topleft", legend=c("light", "medium-light", "medium-dark", "dark"), lty=1, col=gray(c(0.9,0.7,0.4,0.2))) #, bty="0", bg="white")