# *********************************************************************
#  R Script "Homogeneity Permutation Tests" (January 3, 2019)
#  see Chapter 5, §5.3.4 (test), 5.4.3 (region) for partial comparison,
#  in                                      
#     "Combinatorial Inference in Geometric Data Analysis"   
#            B. Le Roux, S. Bienaise, J.-L. Durand          
#                   Chapman & Hall/CRC, 2019                 
# ********************************************************************
# Data consist in a table with coordinate variables, the last column contains
# integer values that indicates the groups.
# In order to  choose the type of comparison a logical value is given 
# (TRUE indicates partial comparison, and FALSE a specific comparison).
#
# The following R code deals with partial or specific comparison
# of two independent groups in multivariate case (2 or more dimensions).
# The two groups to be compared are numbered 1 and 2.

# === Step 1. The data  ===
setwd("D:/path")        # replace "D:/path" by the path of the data directory
base <- read.table(file = "Target_C3.txt", header = TRUE,
                   sep = ";", dec = ".", row.names = 1)
base[(which(base[, dim(base)[2]] > 3)), dim(base)[2]] <- 3
# partial <- FALSE # if specific comparison
partial <- TRUE # if partial comparison
select <- 1:dim(base)[1]
if(!partial) {select <- which(base[, dim(base)[2]] < 3)}

X_OM.IK <- as.matrix(base[select,-dim(base)[2]])  # coordinates
C <- as.factor(base[select,dim(base)[2]])         # factor
n <- dim(X_OM.IK)[1]                    # number of individuals
K <- dim(X_OM.IK)[2]                    # dimensionality of space
n.C <- as.vector(table(C))              # sizes of groups
cardC <- length(n.C)                    # number of groups
lc <- 1:2                               # groups to be compared
nprim <- n.C[1] + n.C[2]
if (K < 2) stop("Dimension < 2")

# Step 2. Parameters of the method
notable_PV <- 0.04        # notable limit for proportion of variance
alpha <- 0.05             # alpha level
max_number <- 100000      # maximum number of nestings
n_dir <- 500              # number of lines for compatibility region
seed <- NULL              # seed for random number generator


# Step 3. Descriptive analysis
X_GM.IK <- sweep(X_OM.IK, 2, colMeans(X_OM.IK), "-")
Mcov.KK <- t(X_GM.IK) %*% X_GM.IK /n
Vcloud <- sum(diag(Mcov.KK))
X_GGc.CK <- as.matrix(aggregate(X_GM.IK, by=list(C), FUN=mean)[,-1])
Var_Cprim <- sum(diag(cov.wt(X_GGc.CK[lc, ],
                             method = "ML", wt = n.C[lc])$cov))
PV <- (nprim/n) * Var_Cprim/Vcloud
cat(" partial eta squared ", PV, "\n")
if (PV < notable_PV) {
  cat(" PV is small: there is no point in doing the test.\n")
} else {cat(" descriptively, mean points differ.\n")}

# Step 4. The testing procedure
cardJ <- choose(n, n.C[1])
if(cardC >2){
  for (i in 2:(cardC - 1))
  {cardJ <- cardJ * choose(n - sum(n.C[1:(i-1)]), n.C[i])}
}
if (cardJ < max_number) { # exhaustive method
  s.C <- cumsum(n.C)
  J0 <- choose(n, n.C[1])
  Ep1 <- rbind(combn(n, n.C[1]), combn(n, n - s.C[1])[ , J0:1])
  if(cardC > 2){
    for (c in 2:(cardC - 1)) {
      s0 <- s.C[c - 1]
      s1 <- s0 + 1
      nr <- n - s.C[c]
      Ep0 <- Ep1
      J1 <- choose(n - s0, n.C[c])
      Ep1 <- rbind(matrix(apply(matrix(Ep0[1:s0, ], nrow = s0), 2, rep, times = J1), nrow = s0), 
                   matrix(apply(Ep0[(s1):n , ], 2, combn, m = n.C[c]), nrow = n.C[c]), 
                   matrix(apply(Ep0[(s1):n , ], 2, combn, m = nr)[(nr*J1):1,], nrow = nr)[nr:1, ])
      J0 <- J0 * J1
    }
  }
  Ep1 <- t(Ep1)
  Epsilon.JI <- matrix(1, nrow = J0, ncol = n)
  for (c in 2:cardC) {
    for (j in 1:J0) {
      Epsilon.JI[j, Ep1[j, (s.C[c - 1] + 1):(s.C[c])]] <- c
    }
  }
  rm(Ep0, Ep1)
} else { # MC method
  cardJ <- max_number; set.seed(seed)
  Epsilon.JI <- matrix(0L, nrow = cardJ, ncol = n)
  for (j in 1:cardJ) {Epsilon.JI[j, ] <- sample(C)}
}

# coordinates of cloud M^I in orthocalibrated principal basis
eig <- eigen(Mcov.KK, symmetric = TRUE)
L <- sum(eig$values > 1.5e-8); lambda.L <- eig$values[1:L]
if (L < 2) {warning("One-dimensional cloud")
} else {
  BasisChange.KL <- eig$vectors[ , 1:L] %*%
    diag(1/sqrt(lambda.L), nrow= L)
  Z_GM.IL <- X_GM.IK %*% BasisChange.KL 
  Z_GGc.CL <- X_GGc.CK %*% BasisChange.KL
  D2M_obs <- sum( (Z_GGc.CL[1, ] - Z_GGc.CL[2, ])^2 )

  Zd2_G.JC <- matrix(0, nrow= cardJ, ncol= cardC)
  for (c in (1:cardC)) {
    one.C <- rep(0L, cardC); one.C[c] <- 1L
    One_c.JI <- matrix(one.C[t(Epsilon.JI)],
                       nrow= cardJ, ncol= n, byrow= TRUE)
    Zd2_G.JC[, c] <- as.matrix( rowSums((One_c.JI %*% Z_GM.IL)^2),
                                nrow= cardJ, ncol= cardC) / n.C[c]
  }
  D2M.J <- { (nprim*rowSums(Zd2_G.JC[,lc]) -
                (n-nprim)*Zd2_G.JC[,cardC]) / (n.C[1]*n.C[2]) }
  D2M_obs <- sum((Z_GGc.CL[2, ] - Z_GGc.CL[1, ])^2)
  n_sup <- sum(D2M.J >= D2M_obs * (1 - 1e-12))
  p_value <- n_sup / cardJ
  cat(" p-value = ", n_sup, "/", cardJ, " = ",
      format(round(p_value, digits= 3), nsmal= 3),"\n", sep= "")
}

# Step 5. The compatibility region
Xd_obs.K <- t(t( X_GGc.CK[2, ] - X_GGc.CK[1, ] ))
d.C <- c(-n.C[2], n.C[1], 0)
X_GR.IK <- X_GM.IK - t(t(d.C[C])) %*% t(Xd_obs.K)/nprim
Rcov.KK <- t(X_GR.IK) %*% X_GR.IK /n # covariance matrix of reference
eig <- eigen(Rcov.KK, symmetric= TRUE)
L <- sum(eig$values > 1.5e-8) ; lambda.L <- eig$values[1:L]
if (L < 2){warning("One-dimensional cloud")
} else {
  BasisChange.KL <- eig$vectors[,1:L] %*% diag(1/sqrt(lambda.L),nrow=L)
  U_GR.IL <- X_GR.IK %*% BasisChange.KL
  U_OD.JL <- matrix(d.C[Epsilon.JI], nrow= cardJ, ncol= n) %*%
    U_GR.IL/(n.C[1]*n.C[2])
  C.J <- t(t(rowSums(U_OD.JL^2))) # squared R-norms of deviations
  Cnul <- which(C.J < 1e-12)

  n11.J <- rowSums(Epsilon.JI[ , which(C == 1)] == 1)
  n22.J <- rowSums(Epsilon.JI[ , which(C == 2)] == 2)
  nn.J <- rowSums(Epsilon.JI[ , which(C !=3)] != 3)
  epsilon.J <- n11.J/n.C[1] + n22.J/n.C[2]- nn.J/nprim
  rm(Epsilon.JI)
  rank_inf <- trunc(alpha * cardJ) + 1
  kappa.D <- rep(0, 2 * n_dir); eff.D <- rep(0L, 2 * n_dir)
  set.seed(seed)
  for (d in 1:n_dir){
    xy.L <- runif(L, -1, 1); Beta.L <- t(t(xy.L / sqrt(sum(xy.L^2))))
    U.J <- U_OD.JL %*% Beta.L
    A.J <- epsilon.J^2 -1 + abs(C.J - U.J^2) * n.C[1]*n.C[2] /n/nprim
    B.J <- epsilon.J * U.J
    Delta.J <- abs( B.J^2 - A.J * C.J)
    x1.J <- (-B.J + sqrt(Delta.J))/A.J # negative root
    x2.J <- (-B.J - sqrt(Delta.J))/A.J # positive root
    AC <- setdiff( which(abs (A.J) < 1e-12), Cnul) #A = 0 & C>0
    if(length(AC) > 0){ #case A.J=0 and C.J>0
      for (j in AC){
        x1.J[j] = -Inf; x2.J[j] = Inf
        if (B.J[j] > 1e-12) {x1.J[j] <- -C.J[j] / abs(2*B.J[j])}
        if (B.J[j] < -1e-12) {x2.J[j] <- +C.J[j] / abs(2*B.J[j])}
      }
    }
    for (j in Cnul){ # particular case C.J=0
      if(epsilon.J[j]^2 == 1) { x1.J[j] <- -Inf; x2.J[j] <- Inf
      } else {x1.J[j] <- x2.J[j] <- 0 }
    }
    x1_sorted <- sort(x1.J); x2_sorted <- sort(x2.J)
    kappa.D[2*d - 1] <- abs(x1_sorted[rank_inf])
    kappa.D[2*d] <- x2_sorted[cardJ + 1 - rank_inf]
  }
  if (max(kappa.D) == Inf) {
    warning("Finite compatibility region is not accessible")
  } else {
    cat(" mean of ", 2*max(L,n_dir)," kappa values = ",
        round(mean(kappa.D), digits= 2), " in the range ",
        floor(min(kappa.D)*1000)/1000, " to ",
        ceiling(max(kappa.D)*1000)/1000, " \n", sep= "")
  }
}
