# When using this code, please cite the paper:
# Klingenberg, B. (2014), "A new and improved confidence
# interval for the Mantel–Haenszel risk difference",
# Statistics in Medicine, 33, 2968-2983.  

#################################################################
# computes the CI for a common risk difference
# from several stratified 2x2 tables
# using the variance estimator of Sato under the null (default)
# alternative: method="Sato" returns Sato interval
# alternative: method="GR" returns Greenland-Robins interval
# Input dataset: needs to have centers as rows and
# number of successes in group1, number of trials in group 1,
# number of successes in group2, number of trials in group 2
# as columns in precisely this order
#################################################################
strat.MHRD <- function(dataset, conflev=0.95, method="Sato0") {
  num = sum(apply(dataset,1,function(ro) (ro[1]*ro[4] - ro[3]*ro[2])/(ro[2]+ro[4])))
  W = sum(apply(dataset,1,function(ro) ro[2]*ro[4]/(ro[2]+ro[4]))) #Cochran weights
  delta.MH = num/W
  P = sum(apply(dataset,1,function(ro) (ro[2]^2*ro[3]-ro[4]^2*ro[1]+0.5*ro[2]*ro[4]*(ro[4]-ro[2]))/(ro[2]+ro[4])^2))
  Q = sum(apply(dataset,1,function(ro) (ro[1]*(ro[4]-ro[3])+ro[3]*(ro[2]-ro[1]))/(2*(ro[2]+ro[4]))))
  if (method=="Sato0") {
      delta.Mid = delta.MH + 0.5*qchisq(conflev,df=1)*(P/W^2)
      ME = sqrt(delta.Mid^2 - delta.MH^2 + qchisq(conflev,df=1)*Q/W^2)
      CI = delta.Mid + cbind(-1,1)*ME
      return(list(delta.MH=delta.MH, delta.Mid=delta.Mid, pseudo.se = ME/qnorm(1-(1-conflev)/2), CI=CI, conflev=conflev, var.estimator="Sato0"))
  }
  if (method=="Sato") var.delta.MH = (delta.MH*P+Q)/W^2
    else {
      p1 = dataset[,1]/dataset[,2]
      p2 = dataset[,3]/dataset[,4]
      denom = apply(dataset,1,function(ro) ro[2]*ro[4]/(ro[2]+ro[4])) #Cochran weights
      var.delta.MH = sum (denom^2 * (p1*(1-p1)/dataset[,2] + p2*(1-p2)/dataset[,4])) / W^2
   }
  return(list(delta.MH= delta.MH, se.delta.MH = sqrt(var.delta.MH), CI=delta.MH +c(-1,1)*qnorm(1-(1-conflev)/2)*sqrt(var.delta.MH), conflev=conflev, var.estimator=ifelse(method=="Sato","Sato","Greenland-Robins")))
}
#############################################################################
dataset=matrix(c(8,106,5,120,22,98,16,85),nrow=2,byrow=TRUE) #from Rothman
strat.MHRD(dataset) #new interval (preferred), default
strat.MHRD(dataset, method="GR") #Greenland-Robins interval
strat.MHRD(dataset, method="Sato") #Sato



#################################################################
# computes the CI for a common risk difference
# from several stratified 2x2 tables
# based on inverting the Miettinen-Nurminen score statistic
# Allows for different weights
# Main function that returns the interval: strat.MHRD.MN()
# Input dataset: needs to have centers as rows and
# number of successes in group1, number of trials in group 1,
# number of successes in group2, number of trials in group 2
# as columns in precisely this order
#################################################################
########### Mietinnen and Nurminen type interval   ###############
resMLE.MN <- function(delta, y, n) {
  # computes restricted MLEs for pi1, pi2 under restriction pi1-pi2=delta
  # for a given 2x2 table and returns components
  # of MN chi-square function
  N=sum(n)
  C=sum(y)
  L3=N
  L2=(n[1]+2*n[2])*delta-N-C
  L1=(n[2]*delta-N-2*y[2])*delta+C
  L0=y[2]*delta*(1-delta)
  c=L2^3/(3*L3)^3 - L1*L2/(6*L3^2) + L0/(2*L3)
  b=ifelse(c>=0,1,-1)*sqrt(L2^2/(3*L3)^2 - L1/(3*L3))
  d=max(c/b^3,-1)
  d=min(d,1)
  a=(3.14159265359+acos(d))/3
  p2=2*b*cos(a)-L2/(3*L3)
  p2=min(p2,1)
  p2=max(0,p2)
  p1=p2+delta
  p1=min(p1,1)
  p1=max(0,p1)
  numer = y[1]/n[1] - y[2]/n[2] - delta
  denom = (p1*(1-p1)/n[1] + p2*(1-p2)/n[2])*sum(n)/(sum(n)-1)
  return(c(numer,denom))
}

MN.weights <- function(delta, dataset, bias.correct=TRUE) { 
# computes the MN weights necessary for CI for stratified data
# dataset needs to have centers as rows and
# number of successes in group1, number of trials in group 1,
# number of successes in group2, number of trials in group 2
# as columns.
  w = apply(dataset[,c(2,4)],1,prod) / rowSums(dataset[,c(2,4)])  #w = n1*n2/(n1+n2)
  repeat {
    w.old = w
    w = w/sum(w)
    p=t(apply(dataset,1,function(ro) resMLE.MN(delta,y=ro[c(1,3)],n=ro[c(2,4)]))) #contains the restricted MLE's for p1 and p2 for each strata for given delta
    R = colSums(w*p)
    RR = R*(1-R)
    if (bias.correct) w = apply(dataset[,c(2,4)],1, function(ro)  (1 / (RR[1]/RR[2]/ro[1]+1/ro[2])) * (sum(ro)-1)/sum(ro) ) #updated weights with bias corection
      else w = apply(dataset[,c(2,4)],1, function(ro)  (1 / (RR[1]/RR[2]/ro[1]+1/ro[2])) ) #updated weights with bias corection
    sum(abs(w-w.old))
    if (sum(abs(w-w.old))<10^(-4)) break
  }
  return(w)
}
#################


scorestat.MN <- function(delta0, dataset, w=NULL, conflev=0.95, bias.correct=TRUE) {
  # computes the chi-square statistic
  # for a given null value delta0
  # over the stratified 2x2 tables
  # Miettinen and Nurminen 1985, Stat. in Medicine, page 218
  # dataset needs to have centers as rows and
  # number of successes in group1, number of trials in group 1,
  # number of successes in group2, number of trials in group 2
  # as columns.
  # default weights used are Cochran weights!!
  if(is.null(w)) w = apply(dataset[,c(2,4)],1,prod) / rowSums(dataset[,c(2,4)])  #Cochran weights
  helpi = apply(dataset, 1, function(ro) resMLE.MN(delta0, y=ro[c(1,3)], n=ro[c(2,4)]))
  chisqf = sum(w*helpi[1,])^2 / sum(w^2*helpi[2,])
  if (!is.finite(chisqf)) return(-Inf) else return(chisqf)
}


strat.MHRD.MN <- function(dataset, weights= "Cochran", conflev=0.95, bias.correct=TRUE) {
#bias.correct=TRUE is MN statistic, FALSE= regular score statistic
#### weights:
 w.orig = switch(weights,
             "sample" = rowSums(dataset[,c(2,4)]),  #w=n1+n2
             "Cochran" = apply(dataset[,c(2,4)],1,prod) / rowSums(dataset[,c(2,4)]),  #w = n1*n2/(n1+n2)
             "BS" = apply(dataset[,c(2,4)],1,prod), #w=n1*n2
             "MH" = apply(dataset[,c(2,4)],1,prod) / (rowSums(dataset[,c(2,4)])-1),  #w = n1*n2/(n1+n2-1)
             "inv.Var" = {
               p1=dataset[,1]/dataset[,2]
               p2=dataset[,3]/dataset[,4]
               (p1*(1-p1)/dataset[,2] + p2*(1-p2)/dataset[,4])^(-1)
             },
             "Add4" = {  #inverse variance with 1 success and 1 failure added to each strata
               p1=(dataset[,1]+1)/(dataset[,2]+2)
               p2=(dataset[,3]+1)/(dataset[,4]+2)
               (p1*(1-p1)/dataset[,2] + p2*(1-p2)/dataset[,4])^(-1)
             },
             "Add2" = {  #inverse variance with 0.5 successes and 0.5 failures added to each strata
               p1=(dataset[,1]+0.5)/(dataset[,2]+1)
               p2=(dataset[,3]+0.5)/(dataset[,4]+1)
               (p1*(1-p1)/dataset[,2] + p2*(1-p2)/dataset[,4])^(-1)
             },
             "MN" = MN.weights(0,dataset,bias.correct)   #iterative weights proposed by Miettinen and Nurminen initially computed for delta0=0
  )
  #getting initial estimte of point estimate to start numerical search:
  w = w.orig/sum(w.orig) #standardized weights
  d = dataset[,1]/dataset[,2] - dataset[,3]/dataset[,4]   #sample risk differences
  delta.MH = sum(w*d)
  ## searching endpoints for root search
  eps=10^(-3)
  p1=(dataset[,1]+1)/(dataset[,2]+2)
  p2=(dataset[,3]+1)/(dataset[,4]+2)
  delta.min = max(min(p1)-1, -min(p2))
  check.low=scorestat.MN(delta.min, dataset=dataset, w = w, conflev=conflev, bias.correct=bias.correct)
  while (check.low<0 & delta.min>-1) {
    delta.min = delta.min-eps
    check.low=scorestat.MN(delta.min, dataset=dataset, w = w, conflev=conflev, bias.correct=bias.correct)
  }
  delta.max = min(max(p1), 1-min(p2))
  check.high=scorestat.MN(delta.max, dataset=dataset, w = w, conflev=conflev, bias.correct=bias.correct)
  while (check.high<0 & delta.max<1) {
    delta.max = delta.max+eps
    check.high=scorestat.MN(delta.max, dataset=dataset, w = w, conflev=conflev, bias.correct=bias.correct)
  }
  c <- qchisq(conflev,df=1) #critical value
  if(delta.MH==-1) delta.lb=-1 else {
    #interval halving to find lower bound
    delta1 <- delta.MH
    delta2 <- -1
    while( abs(delta1-delta2)>10^(-6) ) {#Bisection for LB
      delta <- (delta1+delta2)/2
      if(weights=="MN") {w <- MN.weights(delta, dataset=dataset, bias.correct=bias.correct); w = w/sum(w)}
      s <- scorestat.MN(delta, dataset=dataset, w = w, conflev=conflev, bias.correct=bias.correct)
      if (s>c) delta2 <- delta else delta1 <- delta
    }
    delta.lb <- delta
  }
  #delta.lb = uniroot(scorestat.MN, interval=c(delta.min, delta.MH), dataset=dataset, w = w, conflev=conflev, bias.correct=bias.correct, tol=10^(-5))$root
  if(delta.MH==1) delta.ub=1  else {
    delta1 <- delta.MH
    delta2 <- 1
    while( abs(delta1-delta2)>10^(-6) ) {#Bisection for UB
      delta <- (delta1+delta2)/2
      if(weights=="MN") {w <- MN.weights(delta, dataset=dataset, bias.correct=bias.correct); w = w/sum(w)}
      s <- scorestat.MN(delta, dataset=dataset, w = w, conflev=conflev, bias.correct=bias.correct)
      if (s>c) delta2 <- delta else delta1 <- delta
    }
    delta.ub <- delta
  }
  #delta.ub=uniroot(scorestat.MN, interval=c(delta.MH,delta.max), dataset=dataset, w = w, conflev=conflev, bias.correct=bias.correct, tol=10^(-5))$root
  return(list(delta.MH=delta.MH, CI=c(delta.lb,delta.ub), conflev=conflev, weights=list(Name = weights, raw.weights=w.orig, std.weight=w)))
}
#####################################################

## Example ##
strat.MHRD.MN(dataset) #Miettinen Nurminen score interval with Cochran weights
strat.MHRD.MN(dataset, weights="MN") #Miettinen Nurminen score interval with Miettinen-Nurminen weights
strat.MHRD.MN(dataset, weights="inv.Var") #Miettinen Nurminen score interval with Miettinen-Nurminen weights