# CIGDA_Homog-v1 (January 21, 2019)
# B. Le Roux, S. Bienaise, J.-L. Durand

#!SPADR Homogeneity test for independent groups

#### SPAD Interface: BEGIN ####

#SPAD.VARN STATUT = SEL; LABEL = Selected; ROLE = C
#SPAD.VAR1 STATUT = FACT; LABEL = Factor defining groups; ROLE = N


#SPAD.TITRE[1]      LABEL = Analyses

#SPAD.SEP[1]        LABEL = Type of comparison
#SPAD.PAR.LISTE[1]  NOM   = COMPARISON_TYPE; LABEL = ; ITEMS = global#partial#specific; STYLE = LARGE2; DEFAUT = 1

#SPAD.SEP[1]        LABEL = If partial or specific, put comma between labels of groups to be compared and & between those to be grouped
#SPAD.PAR.TEXTE[1]  NOM   = LEVELS; LABEL = comparison; DEFAUT = 

#SPAD.SEP[1]        LABEL = Choice of analyses
#SPAD.PAR.LISTE[1]  NOM   = SPACE_TYPE; LABEL = ; ITEMS = one-dimensional#multidimensional#one & multidimensional; STYLE = LARGE2; DEFAUT = 2

#SPAD.SEP[1]        LABEL = Combinatorial procedures
#SPAD.PAR.LISTE[1]  NOM   = PROC_TYPE; LABEL = ; ITEMS = test#compatibility region#test & region; STYLE = LARGE2; DEFAUT = 1


#SPAD.TITRE[2]      LABEL = Parameters

#SPAD.SEP[2]        LABEL = Notable limit 
#SPAD.PAR.REEL[2]   NOM   = NOTABLE_PV; LABEL = eta-squared; DEFAUT = 0.04; MIN = 0

#SPAD.SEP[2]        LABEL = Alpha-levels
#SPAD.PAR.REEL[2]   NOM   = ALPHA_T; LABEL = test; MIN = 0.001; MAX = 0.5; DEFAUT = 0.05
#SPAD.PAR.REEL[2]   NOM   = ALPHA_C; LABEL = compatibility region; MIN = 0.001; MAX = 0.5; DEFAUT = 0.05

#SPAD.SEP[2]        LABEL = Nestings
#SPAD.PAR.ENTIER[2] NOM   = MAX_NUMBER; LABEL = maximum number; MIN =5000; DEFAUT = 100000

#SPAD.SEP[2]        LABEL = Monte Carlo
#SPAD.PAR.LISTE[2]  NOM   = RANDOM_SEED; LABEL = seed; STYLE = LARGE2; ITEMS = random#fixed; DEFAUT = 1
#SPAD.PAR.ENTIER[2] NOM   = SEED; LABEL = seed (if fixed); DEFAUT = 1234

#SPAD.SEP[2]        LABEL = Adjusted compatibility region
#SPAD.PAR.ENTIER[2] NOM   = NDIR; LABEL = number of lines; MIN = 50; DEFAUT = 100; STEP = 10


#SPAD.TITRE[3]      LABEL = Output

#SPAD.SEP[3]        LABEL = Number of decimals
#SPAD.PAR.ENTIER[3] NOM   = M_DIGITS; LABEL = mean; MIN = 0; MAX = 8; DEFAUT = 3
#SPAD.PAR.ENTIER[3] NOM   = E2_DIGITS; LABEL = eta-squared; MIN = 2; MAX = 8; DEFAUT = 3
#SPAD.PAR.ENTIER[3] NOM   = P_DIGITS; LABEL = p-value; MIN = 2; MAX = 8; DEFAUT = 3
#SPAD.PAR.ENTIER[3] NOM   = K_DIGITS; LABEL = scale parameter (kappa); MIN = 1; MAX = 8; DEFAUT = 2

#SPAD.SEP[3]        LABEL = Approximate results
#SPAD.PAR.LISTE[3]  NOM   = APPROXIMATE; LABEL = ; ITEMS = yes#no; STYLE = LARGE2; DEFAUT = 2

#SPAD.SEP[3]        LABEL = Export of results
#SPAD.PAR.LISTE[3]  NOM   = EXPORT; LABEL = test statistic distribution; STYLE = LARGE2; ITEMS = yes#no; DEFAUT = 2

#### SPAD Interface: END ####

#### DATA AND PARAMETERS ####
base            = as.data.frame(SPAD.getCategoryAsDataFrame("SEL"))
fact            = factor(SPAD.getCategoryAsDataFrame("FACT")[, 1])

global          = (1 %in% SPAD.getInputParameter("COMPARISON_TYPE"))  # TRUE if global
partial         = (2 %in% SPAD.getInputParameter("COMPARISON_TYPE"))  # TRUE if partial
specific        = (3 %in% SPAD.getInputParameter("COMPARISON_TYPE"))  # TRUE if specific
comparison      = SPAD.getInputParameter("LEVELS")
test_dim        = SPAD.getInputParameter("SPACE_TYPE")
unidim          = (1 %in% test_dim | 3 %in% test_dim)
multidim        = (2 %in% test_dim | 3 %in% test_dim)
proc_type       = SPAD.getInputParameter("PROC_TYPE")
test            = (1 %in% proc_type | 3 %in% proc_type)
region          = (2 %in% proc_type | 3 %in% proc_type)

notable_pv      = SPAD.getInputParameter("NOTABLE_PV")  # Proportion of variance
alpha_t         = SPAD.getInputParameter("ALPHA_T")  # alpha level for test
alpha_c         = SPAD.getInputParameter("ALPHA_C")  # alpha level for compatibility region
max_number      = SPAD.getInputParameter("MAX_NUMBER")      # number of samples
max_number      = 2 * ceiling(max_number / 2)
n_dir           = SPAD.getInputParameter("NDIR") # number of pseudo-random directions
random_seed     = (1 %in% SPAD.getInputParameter("RANDOM_SEED"))  # TRUE if random seed is used for each test
seed            = SPAD.getInputParameter("SEED")
if (random_seed) {seed <- NULL}

m_digits        = SPAD.getInputParameter("M_DIGITS")  # Nb of decimal places for raw mean or sd
e2_digits       = SPAD.getInputParameter("E2_DIGITS")  # Nb of decimal places for eta-2 or d2M
p_digits        = SPAD.getInputParameter("P_DIGITS")  # Nb of decimal places for p-values
k_digits        = SPAD.getInputParameter("K_DIGITS")  # Nb of decimal places for scale parameter kappa

approximate     = SPAD.getInputParameter("APPROXIMATE") == 1
export          = NULL  # dataframe for results export
export_TF       = SPAD.getInputParameter("EXPORT") == 1
comparison_type = if (global) {" global"} else { if (partial) "partial" else "specific"}

# *** The 3 following lines are temporary functions ***
# setwd("D:/CIGDA/data")
# save.image()
# load("D:/CIGDA/data/.RData")

iota <- 1e-12  # technical parameter
time1 <- Sys.time()
continue <- TRUE

# === TITLE PRINTING ===
cat(" ******************************************************** \n",
    " *             HOMOGENEITY PERMUTATION TEST             * \n", 
    " *        R script interfaced with Coheris SPAD         * \n", 
    " *                        * * *                         * \n",
    " *              See Chapter 5 of the book:              * \n",
    " * \"Combinatorial Inference in Geometric Data Analysis\" * \n", 
    " *         B. Le Roux, S. Bienaise, J.-L. Durand        * \n",
    " *               Chapman & Hall/CRC, 2019               * \n", 
    " ******************************************************** \n", 
    sep = "")

if (dim(base)[2] == 0) {
  cat("\n INPUT ERROR: no variable is selected. \n")
  continue <- FALSE
}
if (continue & any(is.na(base))) {
  cat("\n INPUT ERROR: there are missing data. \n")
  continue <- FALSE
}

# Control of factor 
if (length(levels(fact)) == 0) {
  cat("\n INPUT ERROR: no factor is selected. \n", sep = "")
  continue <- FALSE
}
if (length(levels(fact)) == 1) {
  cat("\n INPUT ERROR: factor must have at least two levels. \n", sep = "")
  continue <- FALSE
}
if (any(is.na(fact))) {
  cat("\n INPUT ERROR: there are empty cells in factor. \n", sep = "")
  continue <- FALSE
}


# === Input control for comparison ===
if (trimws(comparison) == "") {  # omnibus test (global comparison)
  global <- TRUE
  partial <- specific <- FALSE
}

if (global) {
  selected_levels <- levels(fact)
  comparison_type <- "global"
  comparison <- levels(fact)[1]
  for (i in 2:length(levels(fact))) {
    comparison <- paste(comparison, levels(fact)[i], sep = ", ")
  }
} else {
  # split chain into character vector (split = ",")  and remove spaces around ","
  txt1 <- trimws(unlist(strsplit(x = comparison, split = ",", fixed = TRUE)))
  # split chain into character vector (split = ",")  and remove spaces around "&"
  txt2 <- trimws(unlist(strsplit(x = txt1, split = "&", fixed = TRUE)))
  txt_errors <- setdiff(txt2, fact)
  
  if (length(txt_errors) > 0) {
    cat("\n", "Selected levels for ",
        if (partial) "partial " else "specific ",
        "comparison: \n ", comparison, "\n\n",
        "INPUT ERROR \n ", txt_errors, sep ="")
    if (length(txt_errors) == 1) {
      cat(" is not a category (level) of the selected factor. \n", sep ="")
    } else {
      cat(" are not categories (levels) of the selected factor. \n", sep ="")
    }
    continue <- FALSE
  }
  
  if (continue & (anyDuplicated(txt2) > 0)) {
    cat("\n", "Selected levels for ",
        if (partial) "partial " else "specific ",
        "comparison: \n ", sep = "")
    cat(comparison)
    cat("\n", "INPUT ERROR \n", " You can't duplicate categories (levels) of factor. \n", sep = "")
    continue <- FALSE
  }
  if (continue & (length(txt1) < 2)) {
    cat("\n", "Selected levels for ",
        if (partial) "partial " else "specific ",
        "comparison: \n ", sep = "")
    cat(comparison)
    cat("\n\n", "INPUT ERROR \n", 
        " You must choose at least 2 categories (levels) of factor. \n", sep = "")
    continue <- FALSE
  }
  if (continue & (any(txt1 == ""))) {
    cat("\n", "Selected levels for ",
        if (partial) "partial " else "specific ",
        "comparison: \n ", sep = "")
    cat(comparison)
    cat("\n\n", "INPUT ERROR \n", 
        " A coma must separate categories (levels) of factor. \n", sep = "")
    continue <- FALSE
  }
  for (i in seq_along(txt1)) {
    if (continue & (any(trimws(unlist(strsplit(x = txt1[i], split = "&", fixed = TRUE))) == ""))) {
      cat("\n", "Selected levels for ",
          if (partial) "partial " else "specific ",
          "comparison: \n ", sep = "")
      cat(comparison)
      cat("\n\n", "INPUT ERROR \n", 
          " The & symbol must group categories (levels) of factor. \n", sep = "")
      continue <- FALSE
    }
  }  
  selected_levels <- txt1
}
# === End of Input control for comparison ===

if (continue) {

  cardF <- length(levels(fact))
  cardCp <- length(selected_levels)
  
  if (cardCp > 2) {test <- TRUE; region <- FALSE}
  if (cardCp == cardF) {  # global comparison
    comparison_type <- "global"
    global = TRUE
    partial = FALSE
    specific = FALSE
  } else {
    if (length(which(fact %in% txt2)) == length(fact)) {
      comparison_type <- "specific"
      specific <- TRUE
      partial <- global <- FALSE
    } 
    if (specific) {  # unused rows and unused levels are discarted
      base <- base[which(fact %in% txt2), ]            # discard unused rows
      fact <- droplevels(fact[which(fact %in% txt2)])  # discard unused levels
      cardF <- length(levels(fact))
    }
  }
  
  # Set C factor: with cardF integer levels if global or specific comparison (1 to cardF)
  #               with cardF + 1 integer levels if partial comparison (1 to cardF + 1)
  C <- rep(cardCp + 1, length(fact))
  for (i in 1:cardCp) {
    C[fact %in% trimws(unlist(strsplit(x = selected_levels[i], split = "&", fixed = TRUE)))] <- i
  }
  
  C <- factor(C)
  n.C <- as.vector(table(C))
  n <- sum(n.C)
  cardC <- length(n.C)
  
  
  if (partial) {
    nprim <- sum(n.C[-cardC])
    lc <- 1:(cardC - 1)  # indexes of groups to be compared
  } else {
    nprim <- n; lc <- 1:cardC  # indexes of groups to be compared
  }
  # cat("\n # indexes of groups to be compared", lc, "\n")
  K <- dim(base)[2]
  if (K == 1) {
    multidim <- FALSE; unidim <- TRUE
  }
  
  # === Table Epsilon.JI for inductive analysis ===
  J <- choose(n, n.C[1])   # number of nestings for exhaustive method
  if(cardC > 2){
    for (i in 2:(cardC - 1))
    {J <- J * choose(n - sum(n.C[1:(i-1)]), n.C[i])}
  }  
  cardJ <- choose(n, n.C[1])
  if(cardC >2){
    for (i in 2:(cardC - 1))
    {cardJ <- cardJ * choose(n - sum(n.C[1:(i-1)]), n.C[i])}
  }
  if (cardJ < max_number) { # exhaustive method
    MC <- FALSE
    s.C <- cumsum(n.C)
    J0 <- choose(n, n.C[1])
    Ep1 <- rbind(combn(n, n.C[1]), combn(n, n - s.C[1])[ , J0:1])
    if(cardC > 2){
      for (c in 2:(cardC - 1)) {
        s0 <- s.C[c - 1]
        s1 <- s0 + 1
        nr <- n - s.C[c]
        Ep0 <- Ep1
        J1 <- choose(n - s0, n.C[c])
        Ep1 <- rbind(matrix(apply(matrix(Ep0[1:s0, ], nrow = s0), 2, rep, times = J1), nrow = s0), 
                     matrix(apply(Ep0[(s1):n , ], 2, combn, m = n.C[c]), nrow = n.C[c]), 
                     matrix(apply(Ep0[(s1):n , ], 2, combn, m = nr)[(nr*J1):1,], nrow = nr)[nr:1, ])
        J0 <- J0 * J1
      }
      rm(Ep0)
    }
    Ep1 <- t(Ep1)
    Epsilon.JI <- matrix(1, nrow = J0, ncol = n)
    for (c in 2:cardC) {
      for (j in 1:J0) {
        Epsilon.JI[j, Ep1[j, (s.C[c - 1] + 1):(s.C[c])]] <- c
      }
    }
    rm(Ep1)
  } else { # MC method
    MC <- TRUE
    cardJ <- max_number; set.seed(seed)
    Epsilon.JI <- matrix(0L, nrow = cardJ, ncol = n)
    for (j in 1:cardJ) {Epsilon.JI[j, ] <- sample(C)}
  }
  
  
  
  
  
  # epsilon.J
  if (cardCp == 2) {
    nc1c2 <- n.C[1] * n.C[2]
    coef <- nc1c2 / (n * nprim)
    contrast.JI <- matrix(0L, nrow = cardJ, ncol = n)
    contrast.JI[Epsilon.JI == 1] <- -n.C[2]
    contrast.JI[Epsilon.JI == 2] <- n.C[1]

    n11.J <- rowSums(Epsilon.JI[ , which(C == 1)] == 1)
    n22.J <- rowSums(Epsilon.JI[ , which(C == 2)] == 2)
    nn.J <-  rowSums(Epsilon.JI[ , which(C != 3)] != 3)
    epsilon.J <-  n11.J/n.C[1]  + n22.J/n.C[2]- nn.J/nprim
  }
  
  first_export <- TRUE
  
  # === 2 GROUPS - UNIDIMENSIONAL ANALYSES (begin) ===
  if (unidim & cardCp == 2) {
    first_variable <- TRUE
    for (icol in 1:K) {
      # === DESCRIPTIVE ANALYSIS ===
      x.I <- base[ , icol]       # initial coordinates
      var_x <- sum(x.I^2)/n - (sum(x.I)^2/n^2)
      
      if (isTRUE(all.equal(var_x, 0))) {
        cat("\n", "ERROR \n",
            "  Variable ",
            colnames(base)[icol],
            " is constant. \n",
            "  There is no point in doing comparison of groups. \n", sep = "")
      } else {
        x.C <- tapply(X = x.I, C, mean)  # moyennes des groupes
        X_sd.C <- sqrt(tapply(X = x.I^2, C, sum) / n.C - x.C^2) # group standard deviations
        
        # d_obs (vecteur Gc1->Gc2)
        d_obs <- x.C[2] - x.C[1]
        
        # scaled deviation
        
        # PV (Percentage of Variance) as function of of d_obs
        x_cta_inter <- d_obs^2 * n.C[1] * n.C[2] / (n * nprim)  #  OK
        PV <- x_cta_inter / var_x  #  OK
        notable <- PV >= notable_pv
  
        # === INDUCTIVE DATA ANALYSIS ===
        D.J <- contrast.JI %*% x.I / nc1c2
        n_sup <- sum(D.J >= d_obs * (1 - iota * sign(d_obs)))
        n_inf <- sum(D.J <= d_obs * (1 + iota * sign(d_obs)))
        p_value <- min(n_sup, n_inf) / cardJ
        # approximate test
        z_obs <- d_obs * sqrt(coef * (n - 1) / var_x)
        app_p_value <- 1 - pnorm(abs(z_obs))
        
        # === COMPATIBILITY INTERVAL ===
        # Exact method
        if (region) {
          x.J <- contrast.JI %*% x.I / nc1c2
          u.J <- (d_obs - x.J) / (1 - epsilon.J)
          u.J[which(epsilon.J == 1)] <- d_obs
          # d_inf <- sort(u.J)[trunc(cardJ * alpha_c/2) + 1]
          # d_sup <- sort(u.J, decreasing = TRUE)[trunc(cardJ * alpha_c/2) + 1]
          u_sorted.J <- sort(u.J[-1])
          rank_inf <- trunc(cardJ * alpha_c / 2) + 1
          if (rank_inf < 2) {
            warning_CI <- TRUE
          } else {
            warning_CI <- FALSE
            d_inf <- u_sorted.J[rank_inf - 1]
            d_sup <- u_sorted.J[cardJ + 1 - rank_inf]
          }
          # Approximate method
          h_alpha <- qnorm(1 - alpha_c/2) * sqrt(var_x / ((n - 1) * coef))
          app_d_inf <- d_obs - h_alpha
          app_d_sup <- d_obs + h_alpha
        }
        
        # === RESULTS PRINTING ===
        if (first_variable) {
          cat("\n")
          cat("======================================== \n")
          cat("  HOMOGENEITY OF INDEPENDENT GROUPS  \n")
          cat("   ",comparison_type,"comparison between means\n")
          cat("======================================== \n")
          if (K >= 2) cat("  ", K," variables are analyzed separately." ,"\n", sep = "")
        }
        cat("\n ********** VARIABLE: ", colnames(base)[icol], " **********\n\n", sep= "")
        cat("Descriptive analysis \n")
        cat("-------------------- \n")
        # cat("  comparison: ", comparison, "\n",
        #     "  ", comparison_type,
        #     " comparison (sum of sizes = ", sum(n.C[1:cardCp]), ") \n",
        #     "  size of overall cloud: ", n, "\n", sep = "")
         cat(" size of overall cloud: ", n, "\n",
           " ", comparison_type, "  comparison: ", comparison,
        " (sum of sizes = ", sum(n.C[1:cardCp]), ") \n", sep = "")        
        Size <- n.C[1:cardCp]
        names(Size) <- paste("  ", selected_levels)
        Mean <- round(x.C[1:cardCp], digits = m_digits)
        SD <- round(X_sd.C[1:cardCp], digits = m_digits)
        tmp <- cbind(Size, Mean, SD)
        colnames(tmp) = c(" size", "    mean", "      SD")
        print(tmp)
        cat("\n", "  difference between means: ", 
            format(round(d_obs, m_digits), nsmall = m_digits), "\n", sep = "")
        cat("  ", if (nprim < n) "partial ", "eta-squared = ",  
            format(round(x_cta_inter, m_digits), nsmall = m_digits), "/",
            format(round(var_x, m_digits), nsmall = m_digits), " = ",
            format(round(PV, e2_digits), nsmall = e2_digits),
            if (PV >= notable_pv) {" > "} else {" < "},
            notable_pv, ":\n",
#            "  ", if (nprim < n) "partial ", "eta-squared is ",
            if (PV >= notable_pv) {
              "  descriptively, the means of groups differ. \n"
            } else {
              "  descriptively, the means of groups weakly differ. \n"
            }, sep = "")
        cat("\n")
        cat("Combinatorial Inference \n")
        cat("----------------------- \n")
        
        cat("  number of possible nestings", sep = "")
        if (J < Inf) {
          if (log10(J) < 10) {
            cat(": ", J, "; \n", sep = "")
          } else {
            cat(": ", format(J, digits = 1), "; \n", sep = "")
          }
        } else {
          cat(" > ", format(.Machine$double.xmax, digits = 1), "; \n", sep = "")
        }
        if (MC) {
          cat("  MC method is performed with ", cardJ, " nestings. \n", sep = "")
        } else {
          cat("  exhaustive method is performed. \n", sep = "")
        }
        
        if (test) {  
          cat("\n", "~ homogeneity permutation test \n", sep = "")
          if (PV < notable_pv) {
            cat("  ", if (nprim < n) "partial ")
            cat("eta-squared is small, there is no point in performing the test. \n")
          } else {
            cat("  test statistic: difference between means \n", sep = "")
            p_value_txt <- format(round(p_value, p_digits), nsmall = p_digits)
            cat("  p-value = ", 
                min(n_sup, n_inf), "/", cardJ, " = ",
                p_value_txt, sep = "")
            
            if (p_value <= alpha_t/2) {
              cat(" < ", alpha_t/2, " (one-sided) \n",
                  "  The two groups are heterogeneous at level ", 
                  alpha_t/2, 
                  "\n  with mean of ", selected_levels[2],
                  if (d_obs > 0) " > " else " < ",
                  "mean of ", selected_levels[1], ". \n", sep = "")
            } else {    
              cat(" > ",  alpha_t/2, " (one-sided) \n",
                  "  The two groups are not heterogeneous at level ", 
                  alpha_t, ". \n", sep = "")    
            }  
          } 
        }
        
        if (region) {
          # COMPATIBILITY INTERVAL
          if (warning_CI) {
            cat("\n", "~ ", "No ", 100 * (1 - alpha_c), "% compatibility interval available \n", sep = "") # à améliorer
          } else {
            cat("\n", "~ ", 100 * (1 - alpha_c), "%-compatibility interval for deviation:", sep = "")
            cat(" [",
                format(round(d_inf, m_digits), nsmall = m_digits), ", ",
                format(round(d_sup, m_digits), nsmall = m_digits),  "] ", "\n", sep = "" )
          }
        }
        
        # APPROXIMATE METHOD
        if (approximate) {
          if (test & notable | region) {
            cat("------------- \n", sep = "")
          }
          if (test & notable) {
            app_p_value_txt <- format(round(app_p_value, p_digits), nsmall = p_digits)
            cat("  approximate test: z = ", format(round(z_obs, 3), nsmall = 3), 
                "; p-value = ", app_p_value_txt, " (one-sided) \n", sep = "")
          }
          if (region) {
            cat("  approximate ", 100 * (1 - alpha_c), "%-compatibility interval for deviation:", sep = "")
            cat(" [",
                format(round(app_d_inf, m_digits), nsmall = m_digits), ", ",
                format(round(app_d_sup, m_digits), nsmall = m_digits),  "] \n", sep = "" )
          }
        }

        # === EXPORT TO SPAD ===
        if (export_TF) {
          if (first_export == TRUE) {
            export <- as.data.frame(D.J)
          } else {
            export <- cbind(export, D.J)
          }
          colnames(export)[dim(export)[2]] <- paste("D_", colnames(base)[icol], sep = "")
          first_export <- FALSE
        }
      }
      first_variable <- FALSE
    }
  }
  # === 2 GROUPS - UNIDIMENSIONAL ANALYSES (end) ===
  
  
  # === 3 or MORE GROUPS - UNIDIMENSIONAL ANALYSES (begin) ===
  if (unidim & cardCp > 2) {
    first_variable <- TRUE
    for (icol in 1:K) {
      # === DESCRIPTIVE ANALYSIS ===
      X_OM.I <- base[ , icol]       # coordinates 
      X_OGc.C <- tapply(X = X_OM.I, C, mean)  # means
      X_sd.C <- sqrt(tapply(X = X_OM.I^2, C, sum) / n.C - X_OGc.C^2) # group standard deviations
      X_GM.I <- X_OM.I - mean(X_OM.I)
      V_cloud <- sum(X_GM.I^2) / n  # variance of cloud M^I
      
      # between cloud
      X_GGc.C <- tapply(X = X_GM.I, C, mean)
      dimnames(X_GGc.C) <- NULL
      
      # Cprim mean point
      X_GGprim <- sum(n.C[lc] * X_GGc.C[lc] ) / nprim  

      # mean points variance in Cprim 
      v_obs <- sum(n.C[lc] * X_GGc.C[lc]^2 ) / nprim - X_GGprim^2
      x_cta_inter <- v_obs * nprim / n
      
      # effect size (proportion of variance)
      PV <- x_cta_inter / V_cloud
      notable <- PV >= notable_pv

      # === INDUCTIVE DATA ANALYSIS ===
      # test stat V 
      V.J <- numeric(length = cardJ)  
      for (j in 1:cardJ) {
        # mean points
        X_GGcj.C <- tapply(X_GM.I, Epsilon.JI[j, ], mean)
        dimnames(X_GGcj.C) <- NULL
        
        # Cprim_j mean point
        X_GGprimj <- sum(n.C[lc] * X_GGcj.C[lc] ) / nprim  
        
        # test statistic: mean points variance in Cprim
        V.J[j] <- sum(n.C[lc] * X_GGcj.C[lc]^2 ) / nprim - X_GGprimj^2
      }
      
      # test
      n_sup <- sum(V.J >= v_obs * (1 - iota))
      p_value <- n_sup / cardJ
      
      # === APPROXIMATE TEST === 
      chi2_obs <- (v_obs / V_cloud)  * (n - 1) * nprim / n
      app_p_value <- 1 - pchisq(chi2_obs, df = cardCp - 1)
      
      # === RESULTS PRINTING ===
      if (first_variable) {
        cat("\n")
        cat("======================================== \n")
        cat("    HOMOGENEITY OF INDEPENDENT GROUPS  \n")
        cat("   ", comparison_type," comparison between means\n")
        cat("======================================== \n")
        if (K >= 2) cat("  ", K," variables are analyzed separately." ,"\n", sep = "")
      }
      
      cat("\n ********** VARIABLE: ", colnames(base)[icol], " **********\n\n", sep= "")
      
      cat("Descriptive analysis \n")
      cat("-------------------- \n")
      # cat("  comparison: ", comparison, "\n",
      #     "  ", comparison_type,
      #     " comparison (sum of sizes = ", sum(n.C[1:cardCp]), ") \n",
      #     "  size of overall cloud: ", n, "\n", sep = "")
      cat(" size of overall cloud: ", n, "\n",
          " ",  comparison_type, " comparison: ", comparison,
          " (sum of sizes = ", sum(n.C[1:cardCp]), ") \n", sep = "")
      Size <- n.C[1:cardCp]
      names(Size) <- paste("   ", selected_levels)
      # if (partial) {
      #   names(Size)[cardCp + 1] <- "   rest"
      # }
      Mean <- round(X_OGc.C[1:cardCp], m_digits)
      SD <- round(X_sd.C[1:cardCp], m_digits)
      tmp <- cbind(Size, Mean, SD)
      colnames(tmp) = c(" size", "    mean", "      SD")
      #  colnames(tmp) = c(" size", "    mean")
      print(tmp)
      cat("\n")
      cat("  ", if (nprim < n) "partial ", "eta-squared = ",  
          format(round(x_cta_inter, m_digits), nsmall = m_digits), "/",
          format(round(V_cloud, m_digits), nsmall = m_digits), " = ",
          format(round(PV, e2_digits), nsmall = e2_digits),
          if (PV >= notable_pv) {" > "} else {" < "},
          notable_pv, "\n",
#          "  ", if (nprim < n) "Partial ", "eta-squared is ",
          if (PV >= notable_pv) {
            "  descriptively, the means of groups differ. \n\n"
          } else {
            "  descriptively, the means of groups weakly differ. \n\n"
          }, sep = "")
      
      if (PV < notable_pv) {
        cat("  ", if (nprim < n) "partial ")
        cat("eta-squared is small, there is no point in performing the test. \n")
      } else {
        cat("Combinatorial Inference \n")
        cat("----------------------- \n")
        cat("  number of possible nestings", sep = "")
        if (J < Inf) {
          if (log10(J) < 10) {
            cat(": ", J, "; \n", sep = "")
          } else {
            cat(": ", format(J, digits = 1), "; \n", sep = "")
          }
        } else {
          cat(" > ", format(.Machine$double.xmax, digits = 1), "; \n", sep = "")
        }
        if (MC) {
          cat("  MC method is performed with ", cardJ, " nestings. \n", sep = "")
        } else {
          cat("  exhaustive method is performed. \n", sep = "")
        }
        
        if (test) {
          cat("\n", "~ homogeneity permutation test \n", sep = "")
          cat("  test statistic: variance of means", sep = "")
          cat(" (observed value: ", format(round(v_obs, m_digits), nsmall = m_digits),")\n", sep = "")
          p_value_txt <- format(round(p_value, p_digits), nsmall = p_digits)
          cat("  p-value = ", 
              n_sup, "/", cardJ, " = ",
              p_value_txt, sep = "")
          
          if (p_value <= alpha_t) {
            cat(" < ", alpha_t, "\n", 
                "  The ", cardCp, " groups are heterogeneous at level ", 
                alpha_t, ". \n", sep = "")
          } else {    
            cat(" > ",  alpha_t, "\n",
                "  The ", cardCp, " groups are not heterogeneous at level ", 
                alpha_t, ". \n", sep = "")    
          }
        }
      
        # APPROXIMATE RESULTS
        if (approximate) {
          if (test & notable) {
            cat("------------- \n", sep = "")
            app_p_value_txt <- format(round(app_p_value, p_digits), nsmall = p_digits)
            cat("  approximate test: Chi-Square = ", format(round(chi2_obs, 3), nsmall = 3),
                ", df = ", cardCp - 1, "; p = ", app_p_value_txt, "\n", sep = "")
          }
        }
      }
      
      # === EXPORT TO SPAD ===
      if (export_TF) {
        if (first_export == TRUE) {
          export <- as.data.frame(V.J)
        } else {
          export <- cbind(export, V.J)
        }
        colnames(export)[dim(export)[2]] <- paste("D_", colnames(base)[icol], sep = "")
        first_export <- FALSE
      } 
      first_variable <- FALSE
    }
    
  }
  # === 3 or MORE GROUPS - UNIDIMENSIONAL ANALYSES (end) ===
  
  
  # === 2 or more GROUPS - MULTIDIMENSIONAL ANALYSES (begin)  ===
  if (multidim) {
    
    # === DESCRIPTIVE ANALYSIS ===
    X_OM.IK <- as.matrix(base)       # initial coordinates
    Mcov.KK <- cov.wt(X_OM.IK, method = "ML")$cov
    X_GM.IK <- scale(X_OM.IK, center = TRUE, scale = FALSE)
    Vcloud <- sum(diag(Mcov.KK))
    
    
    # between cloud 
    X_GGc.CK <- as.matrix(aggregate(X_GM.IK, by=list(C), FUN=mean)[,-1])
    X_GGprim.K <- colSums(diag(n.C[lc]) %*% X_GGc.CK[lc, ])/nprim
    betw_Cp_act <- (sum(n.C[lc] * rowSums(X_GGc.CK[lc,]^2)) - nprim* sum(X_GGprim.K^2))/n
    PV <- betw_Cp_act / Vcloud
    notable <- PV >= notable_pv
    
    # === INDUCTIVE ANALYSIS ===
    # coordinates of cloud M^I in orthocalibrated principal basis
    eig <- eigen(Mcov.KK, symmetric = TRUE)
    L <- sum(eig$values > 1.5e-8)
    lambda.L <- eig$values[1:L]
    if (L < 2) {cat("\n one-dimensional cloud")
    } else {
      BasisChange.KL <- eig$vectors[ , 1:L] %*% diag(1/sqrt(lambda.L))
      Z_GM.IL <- X_GM.IK %*% BasisChange.KL 
      Z_GGc.CL <- X_GGc.CK %*% BasisChange.KL
      Z_GGprim.L <- X_GGprim.K %*% BasisChange.KL
      Zd2c.JC <- matrix(0, nrow = cardJ, ncol = cardC)
      for (c in (1:cardC)) {
        one.C <- rep(0L, cardC); one.C[c] <- 1L
        One_c.JI <- matrix(one.C[t(Epsilon.JI)], nrow=cardJ, ncol=n, byrow=TRUE)
        Zd2c.JC[, c] <- as.matrix(rowSums((One_c.JI %*% Z_GM.IL)^2), 
                                  nrow=cardJ, ncol=cardC)/n.C[c]
      }
      
      if (length(lc) == 2) {
        # test statistic is D2M (Mahalanobis distance between mean points)
        D2M.J <- (nprim * rowSums(Zd2c.JC[,lc])- (n-nprim)*Zd2c.JC[,cardC]) / (n.C[1] * n.C[2])
        D2M_obs <- sum((Z_GGc.CL[2, ] - Z_GGc.CL[1, ])^2)
        n_sup <- sum(D2M.J > D2M_obs * (1 - iota))
      } else {Zd2c.JC
        # test statistic is M-variance 
        VM.J <- (rowSums(Zd2c.JC[,lc])- (n-nprim)*Zd2c.JC[,cardC]/nprim) / nprim
        VM_obs <- (sum(n.C[lc] * rowSums(Z_GGc.CL[lc, ]^2)) / nprim
                   - sum(Z_GGprim.L^2))
        n_sup <- sum(VM.J >= VM_obs * (1 - iota))
      } 
      p_value <- n_sup / cardJ
    }
    
    # APPROXIMATE TEST
    if (length(lc) == 2) {
      chi2_obs <- D2M_obs * (n - 1) * n.C[1] * n.C[2]  / (n * nprim)
    } else {
      chi2_obs <- VM_obs * (n - 1) * nprim / n
    }
    app_p_value <- 1 - pchisq(chi2_obs, df = L * (cardCp - 1))

    # Compatibility region: C'= 2
    if (region & length(lc) == 2) {
      Xd_obs.K <- t(t( X_GGc.CK[2, ] - X_GGc.CK[1, ] ))
      d.C <- c(-n.C[2], n.C[1], 0)
      X_GR.IK <- X_GM.IK - t(t(d.C[C])) %*% t(Xd_obs.K)/nprim
      Rcov.KK <- t(X_GR.IK) %*% X_GR.IK / n
      
      # coordinates of cloud R^I in the R-orthocalibrated principal basis
      eig <- eigen(Rcov.KK, symmetric = TRUE)
      L <- sum(eig$values > 1.5e-8)
      lambda.L <- eig$values[1:L]
      if (L < 2) {cat("\n one-dimensional cloud")
      } else {
        BasisChange.KL <- eig$vectors[ , 1:L] %*% diag(1/sqrt(lambda.L))
        U_GR.IL <- X_GR.IK %*% BasisChange.KL 
        U_OD.JL <- matrix(d.C[Epsilon.JI], nrow= cardJ, ncol= n)  %*% U_GR.IL/(n.C[1]*n.C[2])
 
        # L principal axes and n_dir - L random lines 
        set.seed(seed)
        rank_inf <- trunc(cardJ * alpha_c) + 1
        kappa.D <- rep(0, 2 * n_dir); eff.D <- rep(0L, 2 * n_dir)
        C.J <- t(t(rowSums(U_OD.JL^2)))  # squared R-norms of cardJ nestings
        Cnul <- which(C.J < iota)
        set.seed(seed)
        for (d in 1:n_dir){
          xy.L <- runif(L, -1, 1); Beta.L <- t(t(xy.L / sqrt(sum(xy.L^2)))) 
          U.J <-  U_OD.JL %*% Beta.L
          A.J <- epsilon.J^2 -1 + abs(C.J - U.J^2) * n.C[1]*n.C[2] /n/nprim
          B.J <- epsilon.J * U.J 
          Delta.J <- abs( B.J^2 - A.J * C.J)
          x1.J <- (-B.J + sqrt(Delta.J))/A.J   # negative root
          x2.J <- (-B.J - sqrt(Delta.J))/A.J   # positive root
          AC <- setdiff(  which(abs (A.J) < iota), Cnul)   #A = 0 & C>0
          if(length(AC) > 0){                    #case A.J=0 and C.J>0
            for (j in AC){
              x1.J[j] = -Inf; x2.J[j] = Inf
              if (B.J[j] >  iota) {x1.J[j] <- -C.J[j] / abs(2*B.J[j])} 
              if (B.J[j] < -iota) {x2.J[j] <- +C.J[j] / abs(2*B.J[j])}
            }
          }
          for (j in Cnul){                      # particular case C.J=0
            if(epsilon.J[j]^2 == 1) { x1.J[j] <- -Inf; x2.J[j] <- Inf
            } else {x1.J[j] <- x2.J[j] <- 0 }
          }
          x1_sorted  <- sort(x1.J); x2_sorted  <- sort(x2.J)
          kappa.D[2*d - 1] <- abs(x1_sorted[rank_inf])
          kappa.D[2*d] <- x2_sorted[cardJ + 1 - rank_inf]
        }
        
        kappa <- mean(kappa.D)
        if (L == 2 & kappa < Inf) {
          
          # *** Figure 1 - Compatibility region for D_obs ***
          
          SPAD.report.newPage(title = "CompatibilityRegion")
          SPAD.report.addText("    (the numerical results and conclusions of tests are in the file 'Edition') ")
          
          nbp <- 720
          M.22 <- diag(sqrt(lambda.L), nrow = L) %*% t(eig$vectors[ , 1:2])
          Circle <- matrix(0, nrow = nbp, ncol = 2)
          for (a in 1:nbp) {
            Circle[a, 1] <- cos(a*2*pi/nbp)
            Circle[a, 2] <- sin(a*2*pi/nbp)
          }

          # ellipse for c1 mean point
          Ellipse <- sweep(kappa * Circle %*% M.22, Xd_obs.K, FUN = "+", MARGIN = 2)

          # range of figure
          rx <- range(c(0, Ellipse[ , 1]))
          ry <- range(c(0, Ellipse[ , 2]))
          # for pretty(): same range for x and y
          dx <- (rx[2] - rx[1])
          dy <- (ry[2] - ry[1])
          if(dx > dy) {
            dmax <- dx
            ry <- ry + c(-(dx - dy)/2, (dx - dy)/2)
          } else {
            dmax <- dy
            rx <- rx + c(-(dy - dx)/2, (dy - dx)/2)
          }
            
          # small expansion of range (20%)
          rx <- rx + c(-dmax/10, dmax/10)
          ry <- ry + c(-dmax/10, dmax/10)
          
          plot(Xd_obs.K, type = "n",
               xlim = rx, ylim = ry,
               col = "grey", cex = 1, lwd = 2,
               asp = 1.18,
               xlab = rownames(Xd_obs.K)[1],
               ylab = rownames(Xd_obs.K)[2],
               main = substitute(paste(a1, "% compatibility region adjusted by the ",
                                       kappa, "-ellipse of reference cloud (", kappa, " = ", k1, ")"), 
                                 list(a1 = 100 * (1 - alpha_c), k1 = 
                                        format(round(kappa, k_digits), nsmall = k_digits))))

          abline(h = pretty(ry, 10), v = pretty(rx, 10), col = "lightgray", lty = 2)
        
          # D_obs coordinates
          x_D <- Xd_obs.K[1, 1]
          y_D <- Xd_obs.K[2, 1]
          text("D_obs", x = x_D + (rx[2] - rx[1]) / 20, y = y_D, cex = 1.5, col = "blue")
          points(Ellipse, cex = 0.1, col = "blue", type = "o")  # ellipse
          A0 <- kappa * diag(c(-1, -1))
          B0 <- kappa * diag(c(1, 1))
          A <- t(t(A0 %*% M.22) + as.vector(Xd_obs.K))
          B <- t(t(B0 %*% M.22) + as.vector(Xd_obs.K))
          segments(x0= A[,1], y0= A[,2], x1= B[,1], y1= B[,2], col= "blue", lty= 2)  # axes
          arrows(x0= 0, y0= 0, x1= x_D, y1= y_D, col= "blue", lty= 1, lwd = 2)  # effect vector
          points(0,0, cex = 1.5, col = "blue", pch = 16)
          # text("O", x = (rx[2] - rx[1]) / 20, y = 0, cex = 1.5, col = "blue")
        }
      }

      # Approximate compatibility ellipsoid
      y <-  qchisq(alpha_c, df = L, lower.tail = FALSE) / (n - 1)
      if (y < 1) {
        app_kappa <- sqrt( (1/coef)* y / (1 - y))
      } else {
        app_kappa <- Inf  # all space is compatible
      }
    }
    
    # === RESULTS PRINTING ===
    {
      cat("\n")
      cat("============================================== \n")
      cat("     HOMOGENEITY OF INDEPENDENT GROUPS  \n")
      cat("   ", comparison_type," comparison between mean points\n")
      cat("============================================== \n")
      cat("  ", K," variables (", paste(colnames(base), collapse = ", "),
          ") are analyzed jointly; \n  clouds are in a ", K,"-dimensional geometric space. \n", sep= "")
      if (isTRUE(L < K)) {
        cat("   The dimension of the cloud is ", L, 
            " < ", K, " (number of variables). \n", 
            "   It is advisable to study again",
            " the geometric construction of the cloud.\n", sep = "")
      }
      if (L == 1) {
        cat("ERROR \n", " The dimension of cloud is not 2 or more. \n")
      } else {
        cat("\n")
        cat("Descriptive analysis \n")
        cat("-------------------- \n")
        # cat("  comparison: ", comparison, "\n",
        #     "  ", comparison_type,
        #     " comparison (sum of sizes = ", sum(n.C[1:cardCp]), ") \n",
        #     "  size of overall cloud: ", n, "\n", sep = "")
        cat(" size of overall cloud: ", n, "\n",
            " ", comparison_type, " comparison: ", comparison,
              " (sum of sizes = ", sum(n.C[1:cardCp]), ") \n", sep = "")
        Size <- n.C[1:cardCp]
        names(Size) <- paste("  ", selected_levels)
        tmp <- cbind(Size)
        colnames(tmp) = c(" size")
        print(tmp) 
        # conclusion about size of PV
        cat("  ", if (nprim < n) "partial ", "eta-squared = ",
            format(round(betw_Cp_act, m_digits), nsmall = m_digits), "/",
                   format(round(Vcloud, m_digits), nsmall = m_digits), " = ",
                   format(round(PV, e2_digits), nsmall = e2_digits),
            if (PV >= notable_pv) {" > "} else {" < "},
            notable_pv, ":\n",
#            "  ", if (nprim < n) "Partial ", "eta-squared is ",
            if (PV >= notable_pv) {
              "  descriptively, mean points differ. \n"
            } else {
              "  descriptively, mean points weakly differ. \n"
            }, sep = "")
        cat("\n")
        cat("Combinatorial inference \n")
        cat("----------------------- \n")
        cat("  number of possible nestings", sep = "")
        if (J < Inf) {
          if (log10(J) < 10) {
            cat(": ", J, "; \n", sep = "")
          } else {
            cat(": ", format(J, digits = 1), "; \n", sep = "")
          }
        } else {
          cat(" > ", format(.Machine$double.xmax, digits = 1), "; \n", sep = "")
        }
        if (MC) {
          cat("  MC method is performed with ", cardJ, " nestings. \n", sep = "")
        } else {
          cat("  exhaustive method is performed. \n", sep = "")
        }
        
        if (test) {
          if (!notable) {
            cat("  ", if (nprim < n) "partial ")
            cat("eta-squared is small: there is no point in doing the test. \n")
          } else {
            p_value_txt <- format(round(p_value, p_digits), nsmall = p_digits)
            cat("\n", "~ homogeneity permutation test \n", sep = "")
            if (length(lc) == 2) {
              cat("  test statistic: squared M-distance between mean points", sep = "")
              cat(" (observed value: ", 
                  format(round(D2M_obs, e2_digits), nsmall = e2_digits),")\n", sep = "")
            } else {
              cat("  test statistic: between-group M-variance", sep = "")
              cat(" (observed value: ", 
                  format(round(VM_obs, e2_digits), nsmall = e2_digits),")\n", sep = "")
            }
            cat("  p-value = ", n_sup, "/", cardJ, " = ", p_value_txt, sep = "")
            if (p_value <= alpha_t) {
              cat(" < ", alpha_t, "\n",
                  "  The ", cardCp, " groups are heterogeneous at level ", 
                  alpha_t, ". \n", sep = "")
            } else {    
              cat(" > ",  alpha_t,"\n",
                  "  The ", cardCp, " groups are not heterogeneous at level ", 
                  alpha_t, ". \n", sep = "")    
            } 
          }
        }

        # Compatibility region
        if (region & length(lc) == 2) {
          ellips = ifelse(L == 2, "ellipse", "ellipsoid")
          cat("\n", "~ adjusted ", 100 * (1 - alpha_c), 
              "% compatibility region: \n", 
              "  principal kappa-", ellips,
              " of reference cloud, with kappa = ",
              format(round(mean(kappa.D), k_digits), nsmall= k_digits), " \n", sep= "")
          cat("  (mean of ", 2*max(L,n_dir)," values in the range ", 
              format(round(min(kappa.D), k_digits), nsmall= k_digits), 
              " to ", format(round(max(kappa.D), k_digits), nsmall= k_digits), 
              ") \n", sep= "")
        }
        
        # Approximate results 
        if (approximate) {
          if (test & notable | region & length(lc) == 2) {
            cat("------------- \n", sep = "")
          }
          if (test & notable) {
            app_p_value_txt <- format(round(app_p_value, p_digits), nsmall = p_digits)
            cat("  approximate test: Chi-Square = ", format(round(chi2_obs, 3), nsmall = 3), 
                ", df = ", (cardCp-1)*L, "; p = ", app_p_value_txt, "\n", sep = "")
          }
          if (region & length(lc) == 2) {
            cat("  approximate kappa: ",
                format(round(app_kappa, k_digits), nsmall= k_digits), "\n", sep= "")
          }
        }
      }
    }
    
    # === EXPORT TO SPAD ===
    if (export_TF & L > 1) {
      if (length(lc) == 2) {
        if (first_export) {
          export <- as.data.frame(D2M.J)
        } else {
          export <- cbind(export, D2M.J)
        }
        colnames(export)[dim(export)[2]] <- "D2_M"
      } else {
        if (first_export) {
          export <- as.data.frame(VM.J)
        } else {
          export <- cbind(export, VM.J)
        }
        colnames(export)[dim(export)[2]] <- "V_M"
      }
      first_export <- FALSE
    }
  }
  # === MULTIDIMENSIONAL ANALYSES (end) ===
  
  # === EXPORT TO SPAD ===
  if (export_TF) {
    SPAD.setDataFrame(export, generateColId = FALSE)
    if (dim(as.data.frame(export))[2] > 1) {
      cat("\n", "Test statistic distributions are exported to SPAD. \n", sep = "")
    } else {
      cat("\n", "The test statistic distribution is exported to SPAD. \n", sep = "")
    }
  } else {cat("\n")}
  
  time2 <- Sys.time()
  elapsed_time <- round(as.numeric(difftime(time2, time1, units = "secs")), digits = 2)
  sec_s <- if (elapsed_time < 2) {" sec \n"} else {" secs \n"}
  cat("Elapsed time: ", elapsed_time, sec_s, "\n", sep = "")
}
