> alli <- read.table("http://lanfiles.williams.edu/~bklingen/STAT440/alligator.txt",header=TRUE) > alli[1:6,] lake gender size food count 1 1 1 1 1 7 2 1 1 1 2 1 3 1 1 1 3 0 4 1 1 1 4 0 5 1 1 1 5 5 6 1 1 2 1 4 > food<-factor(alli$food, labels=c("fish", "invert", "rep", "bird","other")) > lake<-factor(alli$lake,labels=c("hancock", "oklawaha","trafford","george")) > gender<-factor(alli$gender,labels=c("m","f")) > size<-factor(alli$size,labels=c("<2.3",">2.3")) > count<-alli$count > alli1=data.frame(food,size,gender,lake,count) > alli2=alli1[alli1$count>0,] > require("VGAM") > fit.SGL<-vglm(food~size+gender+lake,multinomial(refLevel=1), weights=count, data=alli2) #the reflev=1 ensures that the first category, Fish, is selected as the baseline #Remark: you could also have food an n times 5 matrix holding the counts across the 5 food categories # If you have the data available in contingency table format, this is more convenient; # here, however, we have them as separate entries. # If you compute the deviance > deviance(fit.SGL) # Careful: Deviance depends on data entry (=saturated model), this is not the correct deviance, but ok for model comparison [1] 537.8655 # To get correct deviance, need to enter data in contingency table format, with food categories across rows. # Lake Gender Size Fish Inv Rept Bird Other # Han M <2.3 7 1 0 0 5 #... > fit.SL<-vglm(food~size+lake,multinomial(refLevel=1),data=alli2, weights=count) > deviance(fit.SL) [1] 540.0803 > deviance(fit.SL)-deviance(fit.SGL) [1] 2.214799 > 1-pchisq(2.215,df=4) #why 4? Because the effect of gender required 4 parameters, one for each baseline logit [1] 0.696284 > #Gender does not have an effect, collapse table over gender > alli3=aggregate(alli2$count,list(food=alli2$food,size=alli2$size,lake=alli2$lake),sum) > names(alli3)[4]="count" > alli3[1:7,] food size lake count 1 fish <2.3 hancock 23 2 invert <2.3 hancock 4 3 rep <2.3 hancock 2 4 bird <2.3 hancock 2 5 other <2.3 hancock 8 6 fish >2.3 hancock 7 7 rep >2.3 hancock 1 > fit.SL<-vglm(food~size+lake,multinomial(refLevel=1),data=alli3, weights=count) #same model fit as above, but nicer output! > coef(fit.SL, matrix=T) #fitted model equations log(mu[,2]/mu[,1]) log(mu[,3]/mu[,1]) log(mu[,4]/mu[,1]) log(mu[,5]/mu[,1]) (Intercept) -1.749173 -2.4230189 -2.0286189 -0.7465252 size>2.3 -1.458205 0.3512628 0.6306597 -0.3315503 lakeoklawaha 2.595578 1.2160953 -1.3483238 -0.8205431 laketrafford 2.780343 1.6924767 0.3926492 0.6901725 lakegeorge 1.658359 -1.2427742 -0.6951176 -0.8261962 > summary(fit.SL) #gives standard errors Call: vglm(formula = food ~ size + lake, family = multinomial(refLevel = 1), data = alli3, weights = count) Pearson Residuals: Min 1Q Median 3Q Max log(mu[,2]/mu[,1]) -3.6412 -1.15074 -0.60888 -0.100399 6.1891 log(mu[,3]/mu[,1]) -2.3180 -0.70678 -0.30564 -0.068855 9.1878 log(mu[,4]/mu[,1]) -1.8431 -0.46532 -0.22789 -0.090654 8.0249 log(mu[,5]/mu[,1]) -3.1574 -0.73258 -0.49624 -0.191509 7.0788 Coefficients: Value Std. Error t value (Intercept):1 -1.74917 0.53918 -3.24411 (Intercept):2 -2.42302 0.64361 -3.76476 (Intercept):3 -2.02862 0.55805 -3.63522 (Intercept):4 -0.74653 0.35198 -2.12092 size>2.3:1 -1.45820 0.39595 -3.68282 size>2.3:2 0.35126 0.57999 0.60564 size>2.3:3 0.63066 0.64245 0.98164 size>2.3:4 -0.33155 0.44825 -0.73965 lakeoklawaha:1 2.59558 0.65971 3.93442 lakeoklawaha:2 1.21610 0.78601 1.54717 lakeoklawaha:3 -1.34832 1.16275 -1.15960 lakeoklawaha:4 -0.82054 0.72964 -1.12459 laketrafford:1 2.78034 0.67122 4.14220 laketrafford:2 1.69248 0.78044 2.16861 laketrafford:3 0.39265 0.78177 0.50226 laketrafford:4 0.69017 0.55967 1.23317 lakegeorge:1 1.65836 0.61288 2.70585 lakegeorge:2 -1.24277 1.18450 -1.04920 lakegeorge:3 -0.69512 0.78127 -0.88973 lakegeorge:4 -0.82620 0.55755 -1.48184 Number of linear predictors: 4 Names of linear predictors: log(mu[,2]/mu[,1]), log(mu[,3]/mu[,1]), log(mu[,4]/mu[,1]), log(mu[,5]/mu[,1]) Dispersion Parameter for multinomial family: 1 Residual Deviance: 540.0803 on 124 degrees of freedom Log-likelihood: -270.0401 on 124 degrees of freedom Number of Iterations: 5 > fit.probs=predict(fit.SL,type="response") #fitted multinomial probabilities at each setting included in the data set > cbind(alli3,fit.probs) food size lake count fish invert rep bird other 1 fish <2.3 hancock 23 0.5353035 0.09309880 0.04745657 0.070401523 0.25373963 2 invert <2.3 hancock 4 0.5353035 0.09309880 0.04745657 0.070401523 0.25373963 3 rep <2.3 hancock 2 0.5353035 0.09309880 0.04745657 0.070401523 0.25373963 4 bird <2.3 hancock 2 0.5353035 0.09309880 0.04745657 0.070401523 0.25373963 5 other <2.3 hancock 8 0.5353035 0.09309880 0.04745657 0.070401523 0.25373963 6 fish >2.3 hancock 7 0.5701978 0.02307168 0.07182461 0.140896287 0.19400964 7 rep >2.3 hancock 1 0.5701978 0.02307168 0.07182461 0.140896287 0.19400964 8 bird >2.3 hancock 3 0.5701978 0.02307168 0.07182461 0.140896287 0.19400964 9 other >2.3 hancock 5 0.5701978 0.02307168 0.07182461 0.140896287 0.19400964 10 fish <2.3 oklawaha 5 0.2581861 0.60189674 0.07722761 0.008817496 0.05387208 11 invert <2.3 oklawaha 11 0.2581861 0.60189674 0.07722761 0.008817496 0.05387208 12 rep <2.3 oklawaha 1 0.2581861 0.60189674 0.07722761 0.008817496 0.05387208 13 other <2.3 oklawaha 3 0.2581861 0.60189674 0.07722761 0.008817496 0.05387208 14 fish >2.3 oklawaha 13 0.4584385 0.24864517 0.19483741 0.029416130 0.06866280 15 invert >2.3 oklawaha 8 0.4584385 0.24864517 0.19483741 0.029416130 0.06866280 ... 35 bird >2.3 george 1 0.6574424 0.13967783 0.02389877 0.081067361 0.09791361 36 other >2.3 george 3 0.6574424 0.13967783 0.02389877 0.081067361 0.09791361 >