############################################
### Simultaneous lower, or two-sided     ###
###          confidence bounds           ###
###    for differences of proportions    ###
###          (Risk Differences)          ###
###  in multiple comparisons to control  ###
############################################

# Bernhard Klingenberg
# Williams College
# For more information, visit:
# www.williams.edu/~bklingen (to be discontinued in 2012)
# or sites.williams.edu/bklingen

# Requires package mvtnorm !!!!
require("mvtnorm")

#gives MLEs for p1,p2 under restriction p_i-p0=delta_i and returns score statistic z (Agresti 2002, p77) 
score.z <- function(y,n,delta.null) {
        N=sum(n)
        C=sum(y)
        L3=N
        L2=(n[2]+2*n[1])*delta.null-N-C
        L1=(n[1]*delta.null-N-2*y[1])*delta.null+C
        L0=y[1]*delta.null*(1-delta.null)
        c=L2^3/(3*L3)^3 - L1*L2/(6*L3^2) + L0/(2*L3)
        b=ifelse(c>=0,1,-1)*sqrt(L2^2/(3*L3)^2 - L1/(3*L3))
        a=(3.141593+acos(c/b^3))/3
        p0=2*b*cos(a)-L2/(3*L3)
        p.i=p0+delta.null
        z=(y[2]/n[2]-y[1]/n[1]-delta.null)/sqrt(p.i*(1-p.i)/n[2] + p0*(1-p0)/n[1]) #Mee
        #z=(y[2]/n[2]-y[1]/n[1]-delta.null)/sqrt(p.i*(1-p.i)/n[2] + p0*(1-p0)/n[1])*N/(N-1)) #Mietinnen and Nurminen
        return(z)
} 

#computes lower or upper score bounds for diff of prop p_i - p_0
score.int <- function(y,n,c, type) {
    delta1=y[2]/n[2]-y[1]/n[1]
    if (any(type=="lower",type=="two-sided")) {
        delta2=-1
        while( abs(delta1-delta2)>10^(-6) ) {#Bisection for LB
            delta=(delta1+delta2)/2
            z=score.z(y,n,delta)
            if (abs(z)>abs(c)) delta2=delta else delta1=delta
        }
    }
    delta.l=delta1
    if (any(type=="upper",type=="two-sided")) {
        delta2=1;
            while( abs(delta1-delta2)>10^(-6) ) {#Bisection for UB
                delta=(delta1+delta2)/2
                z=score.z(y,n,delta)
                if (abs(z)>abs(c)) delta2=delta else delta1=delta
            }
    }
    delta.u=delta1
    return(switch(type,
                "lower"=delta.l,
                "upper"=delta.u,
                "two-sided"=cbind(delta.l,delta.u)
                ))
}

#computes simultaneous lower or upper score bounds for diff of prop p_i-p_0
RD.Score <- function(y, n, c, type) { 
    B <- apply(cbind(y[-1],n[-1]),1, function(r) score.int(y=c(y[1],r[1]),n=c(n[1],r[2]), c=c, type=type))
    return(B)
}

#returns simultaneous lower or upper Wald bounds for diff of prop p_i-p_0
RD.Wald <-function(y, n, c, type) { 
    p = y/n
    diff = p[-1]-p[1]
    var = p[-1]*(1-p[-1])/n[-1] + p[1]*(1-p[1])/n[1]
    if (type=="two-sided") return(rbind(diff - c*sqrt(var),diff + c*sqrt(var))) else
        return(diff - c*sqrt(var))
}

#returns simultaneous lower Newcombe bounds,  TYPE=upper or two-sided not implemented yet!!!
RD.NHS <-function(y, n, c, type) { 
    if (type!="lower") stop("NHS method not yet implemented for upper or two-sided simultaneous intervals. Use other (=better) method instead.")
    p = y/n
    bound = c*sqrt((p*(1-p)+c^2 /(4*n)) / n) / (1+c^2/n)
    midpnt = (p+c^2/(2*n)) / (1+c^2/n)
    limits = cbind(midpnt - bound, midpnt + bound)
    var = (p[-1]-limits[-1,1])^2 + (limits[1,2]-p[1])^2 #still implement upper bounds
    L = (p[-1]-p[1]) - sqrt(var)
    return (L)
}

#computes critical value according to Dunnett procedure (see paper)
c.Dun <- function(y,n,conflev) {
    require("mvtnorm")
    p = y/n
    if (y[1]==0) p[1]=0.5/n[1]
    if (y[1]==n[1]) p[1]=(n[1]-0.5)/n[1]
    gamma = 1/sqrt(1 + n[1]/n[-1]*p[-1]*(1-p[-1])/ (p[1]*(1-p[1])))
    Gamma= gamma %o% gamma
    diag(Gamma)=1  
    c=qmvnorm(conflev, tail="lower.tail", corr=Gamma, interval=c(qnorm(conflev),7), algorithm=GenzBretz(abseps = 0.00005))$quantile
    return(round(c,4))
}

#computes critical value according to Dunnett0 procedure (see paper)
c.Dun0 <- function(y,n,conflev) {
    require("mvtnorm")
    p0=y[1]/n[1]
    if (y[1]==0) p0=0.5/n[1]
    if (y[1]==n[1]) p0=(n[1]-0.5)/n[1]
    gamma = 1/sqrt(1 + (n[1]/n[-1])*(1+(p0-0.5)^2/(p0*(1-p0))))
    Gamma= gamma %o% gamma
    diag(Gamma)=1  
    c=qmvnorm(conflev, tail="lower.tail", corr=Gamma, interval=c(qnorm(conflev),7), algorithm=GenzBretz(abseps = 0.00005))$quantile
    return(round(c,4))
}

#Finds critical value for No, Bonferroni, Sidak, Dunnett or Dunnett0 multiplicity adjustment
c.adj <-function(y, n, conflev, type, adjust) { 
    if (type=="two-sided") conflev=1-(1-conflev)/2
    r = length(n)-1
    if (r==1) c=qnorm(conflev) else #no adj in the r=1 case
        c = switch(adjust,
            "No" = qnorm(conflev),
            "Bon" = qnorm(1-(1-conflev)/r),
            "Dun0" = c.Dun0(y,n,conflev),
            "Dun" = c.Dun(y,n,conflev),
            qnorm(conflev^(1/r)) #Sidak is default adjustment
            )
    if (type=="upper") c=-c
    return(c)
}

### Main Function ####
### Finds simultaneous lower or upper CI's for risk differences
simci.RD <- function(y, n, conflev=0.975, type="lower", method="Score", adjust="Sid", c.given=NULL) {
    if (is.null(c.given)) c = c.adj(y,n,conflev,type,adjust)  else c=c.given 
    ci = switch(method,
        "Wald" = RD.Wald(y,n,c,type),
        "NHS" = RD.NHS(y,n,c,type),
        "Add4" = RD.Wald(y+1,n+2,c,type),
        "Add2" = RD.Wald(y+0.5,n+1,c,type),
        RD.Score(y,n,c,type) #Score is default method
        )
    lab=paste(1:length(n[-1]),"-0", sep = "")
    diff=y[-1]/n[-1]-y[1]/n[1]
    if (type=="lower") return(data.frame(Contr=lab,Diff=diff,LB=ci))
    if (type=="upper") return(data.frame(Contr=lab,Diff=diff,UB=ci))
    if (type=="two-sided") return(data.frame(Contr=lab,Diff=diff,LB=ci[1,],UB=ci[2,]))
}

#########################################
# Example: 
# simci.RD(y=c(9,27,23,29),n=c(40,40,40,40),conflev=0.975, type="lower", method="Score", adjust="Dun0")
############################################


################################################
###### Coverage and Power Simulations ##########
################################################
# probs ... vector of probabilities, with first entry control group probability
# n ... sample size
# delta.null ... do I have enough power to detect a risk difference significantly above these values?
# which.stat ... which test statistics should be inverted: Wald, NHS, Add4, Add2, Score
# which.adj ... which multiplicity adjustment methods should be used: No, Sidak, Dun, Dun0
# c.given ... if true, calculate true crit. value c from true probabilities and not separately for each sample
sim.cov.pow <- function(probs, n, conflev=0.975, type="lower", delta.null=NULL, sims=5000, which.stat="Score", which.adj="Dun0", seed=NULL, c.given=FALSE) {
    if (!is.null(seed)) set.seed(seed)
    r <- length(n)-1
    delta <- probs[-1] - probs[1]
    method <- expand.grid(which.stat,which.adj)
    if (c.given==TRUE) {
        c=sapply(unique(method[,2]),function(ro) c.adj(probs*n,n,conflev,type,as.character(ro)))
        method=data.frame(method,c=kronecker(c,matrix(1,nrow=length(which.stat))))
        }
    coverage <- matrix(NA,sims,dim(method)[1])
    if (!is.null(delta.null)) pow.first=pow.least=pow.all <- matrix(NA,sims,dim(method)[1])
    for (sim in 1:sims) {
        y <- rbinom(r+1,size=n,prob=probs)
        if (type=="lower") {
            if (c.given==FALSE) int <- apply(method,1, function(ro) simci.RD(y,n,conflev,type,method=ro[1],adjust=ro[2])$LB) else int <- apply(method,1, function(ro) simci.RD(y,n,conflev,type,method=ro[1],adjust=ro[2],c.given=as.numeric(ro[3]))$LB)
            coverage[sim,] <- apply(int,2,function(co) prod(delta>=co))
            if(!is.null(delta.null)) {
                pow.first[sim,] <- int[1,]>delta.null[1]
                pow.least[sim,] <- apply(int,2,function(co) max(co>delta.null))
                pow.all[sim,] <- apply(int,2,function(co) prod(co>delta.null))
            }
        }
        if (type=="upper") {
            if (c.given==FALSE) int <- apply(method,1, function(ro) simci.RD(y,n,conflev,type,method=ro[1],adjust=ro[2])$UB) else int <- apply(method,1, function(ro) simci.RD(y,n,conflev,type,method=ro[1],adjust=ro[2], c.given=as.numeric(ro[3]))$UB)
            coverage[sim,] <- apply(int,2,function(co) prod(delta<=co))
            if(!is.null(delta.null)) {
                pow.first[sim,] <- int[1,]<delta.null[1]
                pow.least[sim,] <- apply(int,2,function(co) max(co<delta.null))
                pow.all[sim,] <- apply(int,2,function(co) prod(co<delta.null))
            }
        }
        if (type=="two-sided") {
            if (c.given==FALSE) int <- apply(method,1, function(ro) simci.RD(y,n,conflev,type,method=ro[1],adjust=ro[2])[,c(3,4)]) else int <- apply(method,1, function(ro) simci.RD(y,n,conflev,type,method=ro[1],adjust=ro[2],c.given=as.numeric(ro[3]))[,c(3,4)])
            coverage[sim,] <- sapply(int,function(x) prod((delta>=x[,1])*(delta<=x[,2])))
            if(!is.null(delta.null)) {
                pow.first[sim,] <- sapply(int,function(x) (delta.null[1]<x[1,1])+(x[1,2]<delta.null[1]))
                pow.least[sim,] <- sapply(int,function(x) max((delta.null<x[,1])+(x[,2]<delta.null)))
                pow.all[sim,] <- sapply(int,function(x) prod((delta.null<x[,1])+(x[,2]<delta.null)))
            }
        }
    }
    name <- paste(method[,1],method[,2],sep=".")
    result=list(mean.coverage=colMeans(coverage), conflev=conflev)
    names(result$mean.coverage)=name
    if (!is.null(delta.null)) {
        pow.first1 <- colMeans(pow.first)
        pow.least1 <- colMeans(pow.least)
        pow.all1 <- colMeans(pow.all)
        names(pow.first1)=names(pow.least1)=names(pow.all1)=name
        result$pow.first=pow.first1
        result$pow.least=pow.least1
        result$pow.all=pow.all1
    }    
    result$true.probs=probs
    result$delta=delta
    result$delta.null=delta.null
    result$n=n
    return(result)
}

##########################################
# Example 
# result <- sim.cov.pow(probs=c(0.15,0.55,0.75,0.45),n=rep(40,4),conflev=0.975, type="lower", sims=10,delta.null=NULL,which.stat=c("Add4","Add2","Score"), which.adj=c("Dun0"))
# result
##########################################