############################################
### Simultaneous upper and two-sided     ###
###         confidence bounds            ###
###        for relative risks            ###
###  in multiple comparisons to control  ###
############################################

# Bernhard Klingenberg
# Williams College
# For information, visit:
# www.sites.williams.edu/bklingen/research/simRR

# Requires package mvtnorm !!!!

# Computes the Clopper/Pearon exact ci
# for a binomial success probability
# for x successes out of n trials with
# confidence coefficient conflev
exactci <- function(x,n,conflev){
   alpha <- (1 - conflev)
   if (x == 0) {
      ll <- 0
      ul <- 1 - (alpha/2)^(1/n)
   }
   else if (x == n) {
      ll <- (alpha/2)^(1/n)
      ul <- 1
   }
   else {
      ll <- 1/(1 + (n - x + 1) / (x * qf(alpha/2, 2 * x, 2 * (n-x+1))))
      ul <- 1/(1 + (n - x) / ((x + 1) * qf(1-alpha/2, 2 * (x+1), 2 *(n-x))))
   }
   c(ll,ul)
}


#####   Inverting the minimum Score Statistic   ######
######################################################

RR.S2 <- function(y,n,RR.null) { #test statistic for H_0i with variance estimated under H_0i
    A <-sum(n)*RR.null
    B <- -((y[1]+n[2])*RR.null+y[2]+n[1])
    C <- y[2]+y[1]
    p0.tilde <- (-B-sqrt(B^2-4*A*C))/(2*A)  #MLE under H_0i
    p=c(p0.tilde,RR.null*p0.tilde)
    variance <- p[2]*(1-p[2])/n[2] + (RR.null^2)*p[1]*(1-p[1])/n[1]
    S2 <- (y[2]/n[2] - RR.null*y[1]/n[1]) / sqrt(variance)
}

bisect <- function(y,n,RR.bounds,c) {#need for inverting Score statistic, bisection method
    RRl <- RR.bounds[1]; RRu<-RR.bounds[2]
    while( abs(RRl-RRu)>10^(-5) ) {
        RR <- (RRl+RRu)/2
        S2 <- RR.S2(y,n,RR)
        if (S2>c) RRl=RR else RRu=RR
    }
    return(RR)
}

bisect2u <- function(y,n,RR.bounds,c) {#need for inverting Score statistic, bisection method
    RRl <- RR.bounds[1]; RRu<-RR.bounds[2]
    while( abs(RRl-RRu)>10^(-5) ) {
        RR <- (RRl+RRu)/2
        S2 <- abs(RR.S2(y,n,RR))
        if (S2<c) RRl=RR else RRu=RR
    }
    return(RR)
}

bisect2l <- function(y,n,RR.bounds,c) {#need for inverting Score statistic, bisection method
    RRl <- RR.bounds[1]; RRu<-RR.bounds[2]
    while( abs(RRl-RRu)>10^(-5) ) {
        RR <- (RRl+RRu)/2
        S2 <- abs(RR.S2(y,n,RR))
        if (S2<c) RRu=RR else RRl=RR
    }
    return(RR)
}

RRci.S2.Sid <- function(y,n,conflev=0.975,type="upper",adj="Sidak") { #returns Sidak or Bonferroni adjusted upper bounds
    r <- length(y[-1])
    if(type=="two-sided") conflev=1-(1-conflev)/2
    c <- qnorm(1-(conflev)^(1/r)) #Sidak
    if (adj == "Bonf") c <- qnorm((1-conflev)/r)
    if (adj == "No") c <- qnorm(1-conflev)
    LB <- (y[-1]+0.5)*(n[1]+0.5)/((y[1]+0.5)*(n[-1]+0.5)) #sample relative risks
    if (type=="upper") {
        UB <- 100*LB #conservative guess of upper bound, adjust if necessary
        B <- apply(cbind(y[-1],n[-1],LB,UB),1, function(r) bisect(y=c(y[1],r[1]),n=c(n[1],r[2]),RR.bounds=r[3:4],c))
        }
    if(type=="two-sided") {
        UB <- 50*LB #conservative guess of upper bound, adjust if necessary
        Bupper <- apply(cbind(y[-1],n[-1],LB,UB),1, function(r) bisect2u(y=c(y[1],r[1]),n=c(n[1],r[2]),RR.bounds=r[3:4],-c))
        UB <- 0.05*LB #conservative guess of upper bound, adjust if necessary
        Blower <- apply(cbind(y[-1],n[-1],UB,LB),1, function(r) bisect2l(y=c(y[1],r[1]),n=c(n[1],r[2]),RR.bounds=r[3:4],-c))
        B <- cbind(Blower,Bupper)
        }
    B=as.matrix(B)
    rownames(B) <- paste("rho",1:r,":",sep="")
    if(type=="upper") colnames(B) <- "UB"
    if(type=="two-sided") colnames(B) <- c("LB","UB")
    return(B)
}


RRci.S2.Dun <- function(y, n, conflev=0.975, type="upper") { #returns Dunnett adjusted upper bounds
    require("mvtnorm")
    r <- length(y[-1])
    pi.hat = y/n
    RR.hat = pi.hat[-1] / pi.hat[1]
    lambda = 1/sqrt(1+(n[1]/n[-1])*(1-pi.hat[-1])/(RR.hat-pi.hat[-1]))
    Gamma= lambda %o% lambda
    diag(Gamma)=1
    c <- switch(type,
               "upper"=qmvnorm(conflev,tail="upper.tail",corr=Gamma, algorithm=GenzBretz(abseps = 0.0005))$quantile,
               "two-sided"=qmvnorm(conflev,tail="both.tails",corr=Gamma, algorithm=GenzBretz(abseps = 0.0005))$quantile
                )
    LB <- (y[-1]+0.5)*(n[1]+0.5)/((y[1]+0.5)*(n[-1]+0.5)) #sample relative risks
    if (type=="upper") {
        UB <- 10*LB #conservative guess of upper bound, adjust if necessary
        B <- apply(cbind(y[-1],n[-1],LB,UB),1, function(r) bisect(y=c(y[1],r[1]),n=c(n[1],r[2]),RR.bounds=r[3:4],c))
        }
    if(type=="two-sided") {
        UB <- 10*LB #conservative guess of upper bound, adjust if necessary
        Bupper <- apply(cbind(y[-1],n[-1],LB,UB),1, function(r) bisect2u(y=c(y[1],r[1]),n=c(n[1],r[2]),RR.bounds=r[3:4],c))
        UB <- 0.1*LB #conservative guess of upper bound, adjust if necessary
        Blower <- apply(cbind(y[-1],n[-1],UB,LB),1, function(r) bisect2l(y=c(y[1],r[1]),n=c(n[1],r[2]),RR.bounds=r[3:4],c))
        B <- cbind(Blower,Bupper)
        }
    B=as.matrix(B)
    rownames(B) <- paste("rho",1:r,":",sep="")
    if(type=="upper") colnames(B) <- "UB"
    if(type=="two-sided") colnames(B) <- c("LB","UB")
    return(B)
}


RR.S2.BB <- function(y,n,type,RR.null) { #returns Berger-Boos P-value for given RR.null=rho_i^0
   S=apply(cbind(y[-1],n[-1],RR.null),1, function(r) RR.S2(c(y[1],r[1]),c(n[1],r[2]),r[3]))
   r <- length(n[-1])
   if (type=="upper") {
                p0up=exactci(y[1],n[1],0.9998)[2] #upper bound of 99.99% confidence for pi_0 Berger-Boos
                helpi=RR.null*ifelse(RR.null<1,p0up,0)
                lambda = 1/sqrt(1+(n[1]/n[-1])*(1-helpi)/(RR.null-helpi))
                Gamma= lambda %o% lambda
                diag(Gamma)=1
                P.val=1 - pmvnorm(lower=rep(min(S),r), upper=rep(Inf,r), corr=Gamma)[1]
                }
   if (type=="two-sided") {
                p0ci=exactci(y[1],n[1],0.9999)#99.99% confidence interval for pi_0 Berger-Boos
                helpi=RR.null*ifelse(RR.null>=1,p0ci[1],p0ci[2])
                lambda = 1/sqrt(1+(n[1]/n[-1])*(1-helpi)/(RR.null-helpi))
                Gamma= lambda %o% lambda
                diag(Gamma)=1
                P.val=1 - pmvnorm(lower=rep(-max(abs(S)),r),upper=rep(max(abs(S)),r), corr=Gamma)[1]
                }
   return(P.val)
}


RRci.S2.BB <- function(y,n,conflev=0.975,type="upper",gridpts=5000) {
   require("mvtnorm")
   r <- length(n[-1])
   if(type=="upper") {
            L=RRci.S2.Sid(y,n,conflev,type,adj="No") #lower bound on upper bound
            U=RRci.S2.Sid(y,n,conflev,type)          #upper bound on upper bound
            RR.grid=matrix(runif(r*gridpts),ncol=r)* matrix(U-L,gridpts,r,byrow=TRUE)+matrix(L,gridpts,r,byrow=TRUE)
            Pval=apply(RR.grid,1,function(r) RR.S2.BB(y,n,type,RR=r))
            index=(Pval>=(1-conflev))
            if (sum(index)==0) { #need finer grid!
                cat("Warning: Largest P-value in grid with S2:",max(Pval)," Enlarge Grid to get better bounds")
                U=RRci.S2.Dun(y,n,conflev,type)
                } else U=apply(RR.grid[index,],2,max)
            U=matrix(U,ncol=1)
            rownames(U)=paste("rho",1:r,":",sep="")
            colnames(U)="UB"
            }
   if(type=="two-sided") {
            B=RRci.S2.Sid(y,n,conflev,type) #Two-sided Sidak points
            RR.grid=matrix(runif(r*gridpts),ncol=r)* matrix(B[,2]-B[,1],gridpts,r,byrow=TRUE)+matrix(B[,1],gridpts,r,byrow=TRUE)
            Pval=apply(RR.grid,1,function(r) RR.S2.BB(y,n,type,RR=r))
            index=Pval>=(1-conflev)
            if (sum(index)==0) {
                cat("Warning: Largest P-value in grid with S2:",max(Pval)," Enlarge Grid to get better bounds")
                U=RRci.S2.Dun(y,n,conflev,type)
                } else {
                    B1=apply(RR.grid[index,],2,min)
                    B2=apply(RR.grid[index,],2,max)
                    U=matrix(c(B1,B2),ncol=2)
                    }
            rownames(U)=paste("rho",1:r,":",sep="")
            colnames(U)=c("LB","UB")
            }
   return(U)
}



#### Inverting the minimum Wald statistic  #########
####################################################

RRci.T1.Sid <- function(y,n,conflev=0.975,type="upper", adj="Sidak") {
#simultaneous upper bounds for relative risks via inverting T1 and Sidak adjustment
#first element of y and n is control group
   r <- length(y[-1])
   if (y[1]==0) {
            U=matrix(.Inf,nrow=r,ncol=ifelse("upper",1,2))
            rownames(U)=paste("rho",1:r,":",sep="")
            if(type=="upper") colnames(U) <- "UB"
            if(type=="two-sided") colnames(U) <- c("LB","UB")
            return(U) #upper bounds in case of 0 successes in control group
            }
   if (any(y==0)) {
            y[y==0] <- y[y==0]+0.5
            n[y==0] <- n[y==0]+0.5
            }
   C <- cbind(matrix(-1,nrow=r),diag(r))
   logRR <- C%*%log(y/n)
   var.logRR <-(n[-1]-y[-1])/(n[-1]*y[-1]) + (n[1]-y[1])/(n[1]*y[1])
   if(type=="two-sided") conflev=1-(1-conflev)/2
   c <- qnorm(1-conflev^(1/r)) #Sidak
   if (adj == "Bonf") c <- qnorm((1-conflev)/r)
   if (adj == "No") c <- qnorm(1-conflev)
   U <- switch(type,
               "upper"=exp(logRR - c*sqrt(var.logRR)),  #upper Sidak
               "two-sided"=cbind(exp(logRR + c*sqrt(var.logRR)),exp(logRR - c*sqrt(var.logRR)))  #two-sided Sidak
               )
   rownames(U) <- paste("rho",1:r,":",sep="")
   if(type=="upper") colnames(U) <- "UB"
   if(type=="two-sided") colnames(U) <- c("LB","UB")
   return(U)
}


RRci.T1.Dun <- function(y,n,conflev=0.975,type="upper") {
#simultaneous upper bounds for relative risks via inverting T1
#first element of y and n is control group
   require("mvtnorm")
   r <- length(y[-1])
   if (y[1]==0) {
            U=matrix(.Inf,nrow=r,ncol=ifelse("upper",1,2))
            rownames(U) <- paste("rho",1:r,":",sep="")
            if(type=="upper") colnames(U) <- "UB"
            if(type=="two-sided") colnames(U) <- c("LB","UB")
            return(U) #upper bounds in case of 0 successes in control group
            }
   if (any(y==0)) {
            y[y==0] <- y[y==0]+0.5
            n[y==0] <- n[y==0]+0.5
            }
    pi.hat <- y/n
    RR.hat <- pi.hat[-1] / pi.hat[1]
    lambda <- 1/sqrt(1+(n[1]/n[-1])*(1-pi.hat[-1])/(RR.hat-pi.hat[-1]))
    Gamma <- lambda %o% lambda
    diag(Gamma) <- 1
    c <- round(qmvnorm(conflev,tail="upper.tail",corr=Gamma)$quantile,4)
    C <- cbind(matrix(-1,nrow=r),diag(r))
    logRR <- C%*%log(y/n)
    var.logRR <-(n[-1]-y[-1])/(n[-1]*y[-1]) + (n[1]-y[1])/(n[1]*y[1])
    U <- switch(type,
               "upper"={c=round(qmvnorm(conflev,tail="upper.tail",corr=Gamma)$quantile,4)
                        exp(logRR - c*sqrt(var.logRR))},  #upper Dunnett
               "two-sided"={c=round(qmvnorm(conflev,tail="both.tails",corr=Gamma)$quantile,4)
                            cbind(exp(logRR - c*sqrt(var.logRR)),exp(logRR + c*sqrt(var.logRR)))}  #two-sided Sidak
               )
    rownames(U) <- paste("rho",1:r,":",sep="")
    if(type=="upper") colnames(U) <- "UB"
    if(type=="two-sided") colnames(U) <- c("LB","UB")
    return(U)
}

RR.T1.BB <- function(y,n,type,RR.null) {
   r <- length(y[-1])
   C <- cbind(matrix(-1,nrow=r),diag(r))
   logRR <- C%*%log(y/n)
   var.logRR=(n[-1]-y[-1])/(n[-1]*y[-1]) + (n[1]-y[1])/(n[1]*y[1])
   T=(logRR-log(RR.null))/sqrt(var.logRR)
   if (type=="upper") {
                p0up=exactci(y[1],n[1],0.9998)[2] #upper bound of 99.99% confidence for pi_0 Berger-Boos
                helpi=ifelse(RR.null<1,RR.null*p0up,0)
                lambda = 1/sqrt(1+(n[1]/n[-1])*(1-helpi)/(RR.null-helpi))
                Gamma= lambda %o% lambda
                diag(Gamma)=1
                P.val=1 - pmvnorm(lower=rep(min(T),r), upper=rep(Inf,r), corr=Gamma)[1]
                }
   if (type=="two-sided") {
                p0ci=exactci(y[1],n[1],0.9999)#99.99% confidence interval for pi_0 Berger-Boos
                helpi=RR.null*ifelse(RR.null>=1,p0ci[1],p0ci[2])
                lambda = 1/sqrt(1+(n[1]/n[-1])*(1-helpi)/(RR.null-helpi))
                Gamma= lambda %o% lambda
                diag(Gamma)=1
                P.val=1 - pmvnorm(lower=rep(-max(abs(T)),r),upper=rep(max(abs(T)),r), corr=Gamma)[1]
                }
   return(P.val)
}

RRci.T1.BB <- function(y,n,conflev=0.975,type="upper",gridpts=5000) {
   require("mvtnorm")
   r <- length(y[-1])
   if (y[1]==0) {
            U=matrix(.Inf,nrow=r,ncol=ifelse("upper",1,2))
            rownames(U) <- paste("rho",1:r,":",sep="")
            if(type=="upper") colnames(U) <- "UB"
            if(type=="two-sided") colnames(U) <- c("LB","UB")
            return(U) #bounds in case of 0 successes in control group
            }
   if (any(y==0)) {
            y[y==0] <- y[y==0]+0.5
            n[y==0] <- n[y==0]+0.5
            }
   if(type=="upper") {
            L=RRci.T1.Sid(y,n,conflev,type,adj="No") #lower bound on upper bound
            U=RRci.T1.Sid(y,n,conflev,type)          #upper bound on upper bound
            RR.grid=matrix(runif(r*gridpts),ncol=r)* matrix(U-L,gridpts,r,byrow=TRUE)+matrix(L,gridpts,r,byrow=TRUE)
            Pval=apply(RR.grid,1,function(r) RR.T1.BB(y,n,type,RR=r))
            index=Pval>=(1-conflev)
            if (sum(index)==0) {
                cat("Warning: Largest P-value in grid with T1:",max(Pval)," Enlarge Grid to get better bounds")
                U=RRci.T1.Dun(y,n,conflev,type)
                }
            else U=apply(RR.grid[index,],2,max)
            U=matrix(U,ncol=1)
            rownames(U)=paste("rho",1:r,":",sep="")
            colnames(U)="UB"
            }
   if(type=="two-sided") {
            B=RRci.T1.Sid(y,n,conflev,type) #Two-sided Sidak points
            RR.grid=matrix(runif(r*gridpts),ncol=r)* matrix(B[,2]-B[,1],gridpts,r,byrow=TRUE)+matrix(B[,1],gridpts,r,byrow=TRUE)
            Pval=apply(RR.grid,1,function(r) RR.T1.BB(y,n,type,RR=r))
            index=Pval>=(1-conflev)
            if (sum(index)==0) {
                cat("Warning: Largest P-value in grid with T1:",max(Pval)," Enlarge Grid to get better bounds")
                U=RRci.T1.Dun(y,n,conflev,type)
                }
            else {
                B1=apply(RR.grid[index,],2,min)
                B2=apply(RR.grid[index,],2,max)
                U=matrix(c(B1,B2),ncol=2)
                }
            rownames(U)=paste("rho",1:r,":",sep="")
            colnames(U)=c("LB","UB")
            }
   return(U)
}


################################################
###### Coverage and Power Simulations ##########
################################################
# p0 ... true control probability
# RR ... true relative risks
# n ... sample size
# RR.null ... do I have enough power to detect relative risks significantly below these values?
# which.stat ... which test statistic should be inverted and what multiplicity method used: one of
#                S2.Sid, S2.Dun, S2.BB, T1.Sid, T1.Dun, T1.BB
# Note: The Berger-Boos adjustment BB can be very time consuming, especially in higher dimension!!
#        Adjust the number of gridpoint, gridpts, a parameter in the ciRR.S2.BB and ciRR.T1.BB functions
#        to shorten simulation time. gridpts=200 instead of the default 5000) should be fine for simulation purposes.
# Note: Two-sided version not implemented yet!

sim.cov.pow <- function(p0, RR, n, conflev=0.975, RR.null=NULL, sims=100, which.stat=c("S2.Unadj","S2.Dun","S2.BB"), seed=NULL) {
    if (!is.null(seed)) set.seed(seed)
    probs <- c(p0,p0*RR)
    r=length(which.stat)
    coverage <- matrix(NA,sims,r)
    if (!is.null(RR.null)) pow.least=pow.all <- matrix(NA,sims,r)
    int <- matrix(NA,r,length(RR))
    stat=paste("RRci",which.stat,sep=".")
    for (sim in 1:sims) {
        y <- rbinom(length(probs),size=n,prob=probs)
        for (i in 1:r) int[i,] <- do.call(stat[i],list(y=y,n=n,conflev=conflev))
        coverage[sim,] <- apply(int,1,function(r) prod(RR<r))
        if (!is.null(RR.null)) {
            pow.least[sim,] <- apply(int,1,function(r) max(r<RR.null))
            pow.all[sim,] <- apply(int,1,function(r) prod(r<RR.null))
            }
    }
    coverage1=colMeans(coverage);names(coverage1)=which.stat
    if (!is.null(RR.null)) {
        pow.least1=colMeans(pow.least);names(pow.least1)=which.stat
        pow.all1=colMeans(pow.all);names(pow.all1)=which.stat
        }
    if (!is.null(RR.null)) {
        return(list(mean.coverage=coverage1, mean.pow.least=pow.least1, mean.pow.all=pow.all1))
        } else return(list(mean.coverage=coverage1))

}


###############
# Example  ###
###############
# y <- c(70,26,24) # successes
# n <- c(3900,3900,3900) #sample sizes
# RRci.S2.Sid(y, n, conflev=0.975, type="upper", adj="No") # Unadjusted Score bounds
# RRci.S2.Sid(y, n, conflev=0.975, type="upper") # Sidak adj. simult. score bounds
# RRci.S2.Dun(y, n, conflev=0.975, type="upper") # Dunnett adj. simult. score bounds
# RRci.S2.BB(y, n, conflev=0.975, type="upper") # Berger-Boos adj. simult. score bounds
# RRci.T1.Sid(y, n, conflev=0.975, type="upper") # Sidak adj. simult. Wald bounds
# RRci.T1.Dun(y, n, conflev=0.975, type="upper") # Dunnett adj. simult. Wald bounds
# RRci.T1.BB(y, n, conflev=0.975, type="upper") # Berger-Boos adj. simult. Wald bounds
############
# For two-sided simultaneous intervals select type="two-sided"
# RRci.S2.BB(y, n, conflev=0.95, type="two-sided") # Berger-Boos adj. simult. score intervals
############
# To simulate coverage and power for any setting of parameters (only one-sided case implemented so far):
# To facilitate large sacel simulation for the BB approach, set gridpts=200 instead
# sim.result <- sim.cov.pow(p0=0.05, RR=c(0.8,0.7,0.6), n=c(1000,1000,1000,1000), conflev=0.975, RR.null=c(1,1,1), which.stat=c("S2.Sid","S2.Dun","S2.BB","T1.Sid","T1.Dun","T1.BB"), sims=6000)
# sim.result$mean.coverage
# sim.result$mean.pow.least
# sim.result$mean.pow.all