# *************************************************************
#  R Script "Combinatorial Typicality Test" (January 3,, 2019)
#  see  Chapter 3, §3.6.1, in
#     "Combinatorial Inference in Geometric Data Analysis"   
#            B. Le Roux, S. Bienaise, J.-L. Durand          
#                   Chapman & Hall/CRC, 2019                 
# *************************************************************
# Data consist in two tables: 
# one for the reference cloud, and one for the group cloud.
# Each row contains the coordinates of points.

# === Step 1. The data  ===
setwd("D:/path")          # REPLACE "D:/path" by the path of the data directory

base <-  read.table(file = "Target_reference.txt", header= TRUE,
                    sep= ";", dec= ".", row.names= 1)
n    <- dim(base)[1]                     # cardinality of reference set
K    <- dim(base)[2]                     # number of coordinates variables
X.IK <- as.matrix(base, nrow= n, ncol= K)# matrix of reference coordinates

base <-  read.table(file = "Target_group.txt", header= TRUE,
                    sep= ";", dec= ".", row.names= 1)
n_c  <- dim(base)[1]                     # cardinality of group
X.CK <- as.matrix(base, nrow= n_c, ncol= K)# matrix of group coordinates
if(dim(base)[2] < K)
  stop("the number of columns for group is less than ", K)

# === Step 2. === Parameters of the method
notable_D  <- 0.4          # notable limit for D
alpha    <- 0.05           # alpha level for compatibility region
max_number <- 100000       # maximum number of samples
seed       <- NULL         # seed for random number generator

# === Step 3. Descriptive analysis ===
Mcov.KK <- cov.wt(X.IK, method= "ML")$cov     # covariance matrix
eig <- eigen(Mcov.KK, symmetric = TRUE)       # diagonalization 
L <- sum(eig$values > 1.5e-8)                 # dimensionality of cloud
lambda.L <- eig$values[1:L]                   # non--null eigenvalues
if (L < K) {
  warning("The dimension of the reference cloud is ", L, " < ", K,
          " (number of variables). \n It is advisable to study again",
          " the geometric construction of the reference cloud.")
}
BasisChange.KL <- eig$vectors[ ,1:L] %*% diag(1/sqrt(lambda.L), nrow=L)
Z.IL   <- sweep(X.IK, 2, colMeans(X.IK), "-") %*% BasisChange.KL
ZC.L   <- t(colMeans(X.CK) - colMeans(X.IK)) %*% BasisChange.KL
d2_obs <- sum(ZC.L^2)               # observed value of test statistic
cat(" M-distance D = ", sqrt(d2_obs), "\n",
    " Descriptively, the deviation is ",
    ifelse(d2_obs >= notable_D^2, "notable.",
           "small: there is no point in doing the test."), sep= "")
 
# === Step 4. The testing procedure ===
cardJ <- min(choose(n, n_c), max_number)     # number of used samples
D2.J <- matrix(0, nrow= cardJ, ncol= 1)      # test statistic
if (choose(n, n_c) <= max_number) {
  Samples.JC <- t(combn(n, n_c))
  for (j in 1:cardJ)
    D2.J[j]  <- sum(colMeans(as.matrix(Z.IL[Samples.JC[j, ], ],
                                       ncol = L))^2)
} else { set.seed(seed)
  for (j in 1:cardJ)
    D2.J[j]  <- sum(colMeans(as.matrix(Z.IL[sample.int(n, n_c),
                                            ncol = L]))^2)
}
if(d2_obs >= notable_D^2){
  n_sup   <- sum(D2.J >= d2_obs * (1 - 1e-12))
  p_value <- n_sup / cardJ
  cat(" p-value = ", n_sup, "/", cardJ, " = ",
      format(round(p_value, digits= 3), nsmall= 3),"\n", sep= "")
}

# === Step 5. The compatibility region ===
rank      <-  cardJ - trunc(cardJ *alpha) 
d_alpha   <- sqrt(sort(D2.J)[rank])

cat("\n ", 100*(1 - alpha), "% compatibility region:\n", 
    "  principal ellipsoid of the reference cloud ",
    "centred on the group mean point,\n  with scale parameter", 
    " d_alpha = ", round(d_alpha, 3), ". \n", sep = "")

#==================== end of the script ====================

