# CIGDA_Comb-v1.1 (March 1, 2020)
# B. Le Roux, S. Bienaise, J.-L. Durand

#### BEGINNING OF DATA INPUT USING SPAD GUI ####

#!SPADR Combinatorial Typicality Tests

#SPAD.VARN STATUT = SEL; LABEL = Selected;  ROLE = C
#SPAD.VAR1 STATUT = GRP; LABEL = Group;     ROLE = N
#SPAD.VAR1 STATUT = REF; LABEL = Reference; ROLE = N


#SPAD.TITRE[1]      LABEL = Analyses

#SPAD.SEP[1]        LABEL = Group definition (put & between categories to be grouped)
#SPAD.PAR.TEXTE[1]  NOM   = GROUP_LEVEL; LABEL = name of the group; DEFAUT = <Name of the group>

#SPAD.SEP[1]        LABEL = Choice of analyses
#SPAD.PAR.LISTE[1]  NOM   = SPACE_TYPE; LABEL = ; ITEMS = one-dimensional#multidimensional#one & multidimensional; STYLE = LARGE2; DEFAUT = 2

#SPAD.SEP[1]        LABEL = Choice of the typicality test
#SPAD.PAR.LISTE[1]  NOM   = TEST_STAT; LABEL = ; STYLE = LARGE2; ITEMS =  for Mean point#for Variance; DEFAUT = 1

#SPAD.SEP[1]        LABEL = Combinatorial procedures
#SPAD.PAR.LISTE[1]  NOM   = PROC_TYPE; LABEL = ; ITEMS = test#compatibility region#test & region; STYLE = LARGE2; DEFAUT = 1


#SPAD.TITRE[2]      LABEL = Parameters

#SPAD.SEP[2]        LABEL = Notable limits of descriptive statistics
#SPAD.PAR.REEL[2]   NOM   = NOTABLE_D; LABEL = M-distance/scaled deviation; DEFAUT = 0.4; MIN = 0
#SPAD.PAR.REEL[2]   NOM   = NOTABLE_VR; LABEL = absolute value of variation rate; DEFAUT = 0.1; MIN = 0 ; MAX = 1

#SPAD.SEP[2]        LABEL = Alpha-levels
#SPAD.PAR.REEL[2]   NOM   = ALPHA_T; LABEL = test; MIN = 0.001; MAX = 0.5; DEFAUT = 0.05
#SPAD.PAR.REEL[2]   NOM   = ALPHA_C; LABEL = compatibility region; MIN = 0.001; MAX = 0.5; DEFAUT = 0.05

#SPAD.SEP[2]        LABEL = Samples
#SPAD.PAR.ENTIER[2] NOM   = MAX_NUMBER; LABEL = maximum number; MIN = 5000; DEFAUT = 100000

#SPAD.SEP[2]        LABEL = Monte Carlo
#SPAD.PAR.LISTE[2]  NOM   = RANDOM_SEED; LABEL = seed; STYLE = LARGE2; ITEMS = random#fixed; DEFAUT = 1
#SPAD.PAR.ENTIER[2] NOM   = SEED; LABEL = seed (if fixed); DEFAUT = 1234


#SPAD.TITRE[3]      LABEL = Outputs

#SPAD.SEP[3]        LABEL =  Number of decimals
#SPAD.SEP[3]        LABEL = 1- test for Mean Point 
#SPAD.PAR.ENTIER[3] NOM   = M_DIGITS; LABEL =  mean; MIN = 0; MAX = 8; DEFAUT = 3
#SPAD.PAR.ENTIER[3] NOM   = K_DIGITS; LABEL =  M-distance & scale parameter; MIN = 1; MAX = 8; DEFAUT = 2

#SPAD.SEP[3]        LABEL = 2- test for Variance
#SPAD.PAR.ENTIER[3] NOM   = V_DIGITS; LABEL =  variance; MIN = 1; MAX = 8; DEFAUT = 2
#SPAD.PAR.ENTIER[3] NOM   = VR_DIGITS; LABEL = variation rate; MIN = 1; MAX = 8; DEFAUT = 2

#SPAD.SEP[3]        LABEL = 3-
#SPAD.PAR.ENTIER[3] NOM   = P_DIGITS; LABEL= p-value; MIN = 2; MAX = 8; DEFAUT = 3

#SPAD.SEP[3]        LABEL = 
#SPAD.SEP[3]        LABEL = Approximate results
#SPAD.PAR.LISTE[3]  NOM   = APPROXIMATE; LABEL = ; ITEMS = yes#no; STYLE = LARGE2; DEFAUT = 2

#SPAD.SEP[3]        LABEL = Export of test statistic distribution
#SPAD.PAR.LISTE[3]  NOM   = EXPORT; LABEL = ; STYLE = LARGE2; ITEMS = yes#no; DEFAUT = 2


### DATA AND PARAMETERS ###
base           = as.data.frame(SPAD.getCategoryAsDataFrame("SEL"))  # numerical
reference      = SPAD.getCategoryAsDataFrame("REF")                 # categorical
group          = SPAD.getCategoryAsDataFrame("GRP")                  # categorical

group_lbl      = SPAD.getInputParameter("GROUP_LEVEL")
test_dim       = SPAD.getInputParameter("SPACE_TYPE")
unidim         = (1 %in% test_dim | 3 %in% test_dim)
multidim       = (2 %in% test_dim | 3 %in% test_dim)
test_stat      = SPAD.getInputParameter("TEST_STAT")
test_stat_mean = (1 %in% test_stat)
test_stat_var  = (2 %in% test_stat)
proc_type      = SPAD.getInputParameter("PROC_TYPE")
test           = (1 %in% proc_type | 3 %in% proc_type)
region         = (2 %in% proc_type | 3 %in% proc_type)

notable_D      = SPAD.getInputParameter("NOTABLE_D")
notable_VR     = SPAD.getInputParameter("NOTABLE_VR")
alpha_t        = SPAD.getInputParameter("ALPHA_T")  # alpha level for test
alpha_c        = SPAD.getInputParameter("ALPHA_C")  # alpha level for compatibility region
max_number     = SPAD.getInputParameter("MAX_NUMBER")      # number of samples
random_seed    = (1 %in% SPAD.getInputParameter("RANDOM_SEED"))  # TRUE if random seed is used for each test
seed           = SPAD.getInputParameter("SEED")
if (random_seed) {seed <- NULL}

m_digits       = SPAD.getInputParameter("M_DIGITS")  # Nb of decimal places for raw mean or sd
v_digits       = SPAD.getInputParameter("V_DIGITS")  # Nb of decimal places for variance
vr_digits      = SPAD.getInputParameter("VR_DIGITS") # Nb of decimal places for variance ratio
p_digits       = SPAD.getInputParameter("P_DIGITS")  # Nb of decimal places for p-values
k_digits       = SPAD.getInputParameter("K_DIGITS")  # Nb of decimal places for M-variance & kappa
approximate    = SPAD.getInputParameter("APPROXIMATE") == 1
export         = NULL  # dataframe for results export
export_TF      = SPAD.getInputParameter("EXPORT") == 1
spad           = TRUE

if(any(is.na(base))) {
  cat("\n=== INPUT ERROR ===\n ",
       "there are empty cells in numeric variables. \n\n", sep = "")
  stop("\n\n=== INPUT ERROR ===\n ",
      "there are empty cells in numeric variables. \n", sep = "")
}

if(any(is.na(group))) {
  cat("\n=== INPUT ERROR ===\n ",
       "You must choose a variable for defining the Group ('Variables' dialog box). \n\n", sep = "")
  stop("\n\n=== INPUT ERROR ===\n ",
      "You must choose a variable for defining the Group ('Variables' dialog box). \n", sep = "")
}

# === group definition ===
txt_tmp <- group_lbl
# split chain into character vector (split = ",")
# and remove leading and trailing whitespace from character strings
selected_levels <- trimws(strsplit(x = txt_tmp, split = "&", fixed = TRUE)[[1]])
groupName <- paste0(selected_levels, collapse = " & ")
rm(txt_tmp)
tmp_levels <- setdiff(selected_levels, unlist(group))

#  ERROR MESSAGE PRINTING
if (length(tmp_levels) > 0) {
  cat("Selected level(s) for group \n", sep = "")

  if (length(tmp_levels) == 1) {
    cat("\n=== INPUT ERROR ===\n ", tmp_levels, ": category not observed. \n\n", sep ="")
    stop("\n\nINPUT ERROR \n ", tmp_levels, ": category not observed. \n", sep ="")
  } else {
    cat("\n=== INPUT ERROR ===\n ", tmp_levels, ": categories not observed. \n\n", sep = "")
    stop("\n\nINPUT ERROR \n ", tmp_levels, ": categories not observed. \n", sep = "")
  }
}

if (length(selected_levels) == 0) {
  cat("\n=== INPUT ERROR ===\n", 
      " You must choose 1 or more Group categorie(s). \n\n",
       sep = "")
  stop("\n\nINPUT ERROR \n", 
      " You must choose 1 or more Group categorie(s). \n",
      sep = "")
}

group_TF <- unlist(group) %in% selected_levels

# reference definition
if (length(reference) == 1 & any(is.na(reference))) {
  ref_TF <- rep(TRUE, dim(base)[1])
  cat("\n=== WARNING ===\n No variable is chosen for reference, therefore all cases belong to the reference set.\n\n")
} else {
  ref_TF <- unlist(reference) %in% "ref"
}

X.IK <- as.matrix(base[ref_TF, ]) # reference cloud coordinates 
rownames(X.IK) <- rownames(base)[ref_TF]
colnames(X.IK) <- colnames(base)
X.CK <- as.matrix(base[group_TF, ]) # group cloud coordinates
rownames(X.CK) <- rownames(base)[group_TF]

### END OF DATA INPUT ###

### PART OF THE R SCRIPT REGARDING COMBINATORIAL PROCEDURES ###

cat(" ******************************************************** \n",
    " *            COMBINATORIAL TYPICALITY TEST             * \n", 
    " *        R script interfaced with Coheris SPAD         * \n", 
    " *                        * * *                         * \n",
    " *              See Chapter 3 of the book:              * \n",
    " * \"Combinatorial Inference in Geometric Data Analysis\" * \n", 
    " *         B. Le Roux, S. Bienaise, J.-L. Durand        * \n",
    " *               Chapman & Hall/CRC, 2019               * \n", 
    " ******************************************************** \n", 
    sep = "")

# technical parameter
time1 <- Sys.time()
iota <- 1e-12 
plot_ratio <- ifelse(spad, 1.18, 1)

K <- dim(X.IK)[2]
n <- dim(X.IK)[1]
n_c <- dim(X.CK)[1]

if (isTRUE(n_c <= 1) & test_stat_var) {
  cat("\n=== INPUT ERROR ===\n group size must be at least 2.\n\n", sep = "")
  stop("\n\nINPUT ERROR \n group size must be at least 2.", sep = "")
}
if (isTRUE(n_c >= n - 1)) {
  cat("\n=== INPUT ERROR ===\n group size must be less than reference size minus 1. \n\n", sep = "")
  stop("\n\nINPUT ERROR \n group size must be less than reference size minus 1. \n", sep = "")
}

if (unidim & multidim & K == 1) { multidim <- FALSE }
if (!unidim & multidim & K == 1) {
  cat("\n=== INPUT ERROR ===\n Only one variable is selected: multidimensional analysis is irrelevant. \n\n")
  stop("\n\nINPUT ERROR \n Only one variable is selected: multidimensional analysis is irrelevant. \n")
}

# Sample space and method
choose.n.n_c <- choose(n, n_c)
cardJ <- min(choose.n.n_c, max_number)    # card_J: number of samples
if (cardJ < max_number) {
  MC <- FALSE
  Samples.JC <- t(combn(1:n, n_c))          # enumeration of samples
} else { MC <- TRUE }


# === UNIDIMENSIONAL ANALYSES FOR MEAN ===
# ========================================
if (unidim & test_stat_mean) {first_variable <- TRUE
	
	for (icol in 1:K) {
		x.I <- as.vector(X.IK[, icol])
		names(x.I) <- rownames(X.IK)
		x.C <- as.vector(X.CK[, icol])
		names(x.C) <- row.names(X.CK)

		# === DESCRIPTIVE ANALYSIS ===
		xbar  <- mean(x.I)                # mean of reference set I
		v     <- mean(x.I^2) - xbar^2     # variance of reference set I
		m_obs <- mean(x.C)                # mean of group C
		v_C   <- mean(x.C^2) - m_obs^2    # variance of group C
		
		d_obs <- (m_obs - xbar) / sqrt(v) # observed scaled deviation
		notable <- ifelse (abs(d_obs) >= notable_D, TRUE, FALSE)
		z.I   <- (x.I - xbar) / sqrt(v)   # scaled deviations
    rm(x.I, x.C)

		# === INDUCTIVE ANALYSIS ===
    d.J <- matrix(0, nrow = cardJ, ncol = 1)
    if (MC) {
      set.seed(seed)
      for (j in 1:cardJ) {
        d.J[j] <- mean(z.I[sample(1:n, n_c)])
      }
    } else {
      for (j in 1:cardJ){
        d.J[j] <- mean(z.I[Samples.JC[j, ]])
      }
    }

		if (test & notable) {
			n_sup <- sum( d.J >= (d_obs * (1 - iota * sign(d_obs)) - iota))
			n_inf <- sum( d.J <= (d_obs * (1 + iota * sign(d_obs)) + iota)) 
			# combinatorial p-value (one-sided)
			p_value <- min(n_inf, n_sup) / cardJ
		}
    if (region) {
			# exact compatibility interval
			d.J_ord  <- sort(d.J)
			rank_inf <- trunc(cardJ * alpha_c/2) + 1
      # limits for test statistic D
			d_sup <- d_obs - d.J_ord[rank_inf]
			d_inf <- d_obs - d.J_ord[cardJ + 1 - rank_inf]
			# limits for test statistic M
			m_sup <- d_sup * sqrt(v) + xbar
			m_inf <- d_inf * sqrt(v) + xbar
    }
    
    if (approximate) {
			# approximate p_value
			z_obs <- sqrt(n_c * (n - 1) / (n - n_c)) * d_obs
			app_p_value <- pnorm(-abs(z_obs))

			# approximate compatibility interval 
			h_alpha <- -qnorm(alpha_c / 2) * sqrt((n - n_c)/((n - 1)* n_c)) # half-length for D
			# limits for test statistic D
			app_d_sup <- d_obs + h_alpha
			app_d_inf <- d_obs - h_alpha
			# limits for test statistic M
			app_m_sup <- m_obs + h_alpha * sqrt(v)
			app_m_inf <- m_obs - h_alpha * sqrt(v)
    }

		# === RESULTS PRINTING ===
		if (first_variable) {
			cat("\n")
		  cat("================================================= \n")
			cat(" COMPARISON OF A GROUP CLOUD TO A REFERENCE CLOUD \n")
			cat("                ACCORDING TO MEAN \n")
			cat("================================================= \n")
  			if (K >= 2) cat("  ", K," variables are analysed separately." ,"\n", sep = "")
		}
		
		cat("\n ********** VARIABLE: ", 
		    colnames(X.IK)[icol], " **********\n", sep = "")

		cat("Descriptive analysis \n")
		cat("-------------------- \n")
		Size <- c(n,n_c)
		Mean <- round(c(xbar, m_obs), m_digits)
		SD <- round(c(sqrt(v), sqrt(v_C)), m_digits)
		names(Mean) <- c("  Reference", paste("  Group (", groupName, ")", sep = ""))
		tmp = cbind(Size, Mean, SD)
		colnames(tmp) = c(" size", "    mean", "      SD")
		print(tmp)
		
    # conclusion about the magnitude of deviation
		cat("  Deviation between means (group - reference): ", 
		    format(round(m_obs - xbar, m_digits), nsmall = m_digits), 
		    "\n", sep = "")
		cat("  Scaled deviation: D = ", 
		    format(round(d_obs, k_digits), nsmall = k_digits), "\n", sep = "")
		cat("  |D| ",
		    ifelse(notable, ">= ", "< "),
		    notable_D, ": descriptively, deviation is ",
		    ifelse(notable, "notable", "small"),
		    ". \n\n", sep = "")

		cat("Combinatorial inference \n")
		cat("----------------------- \n")
		cat("  number of possible samples", sep = "")
		if (choose.n.n_c < Inf) {
		  if (log10(choose.n.n_c) < 10) {
		    cat(": ", choose.n.n_c, "; \n", sep = "")
		  } else {
		    cat(": ", format(choose.n.n_c, digits = 1), "; \n", sep = "")
		  }
		} else {
		  cat(" > ", format(.Machine$double.xmax, digits = 1), "; \n", sep = "")
		}
		if (MC) {
		  cat("  MC method is performed with ", cardJ, " samples. \n", sep = "")
		} else {
		  cat("  exhaustive method is performed. \n", sep = "")
		}

  	if (test) {
  	  cat("\n~ combinatorial typicality test \n", sep = "")
  		if (!notable) {
  		  cat("  Deviation is small, there is no point in performing the test. \n")
  		} else {
  		  cat("  test statistic: Mean \n", sep = "")
  		  side <- ifelse(d_obs > 0, "high", "low")
  		  cat("  p-value: p = ", min(n_inf, n_sup), "/", cardJ, " = ", 
  		      format(round(p_value, p_digits), nsmall = p_digits), sep = "")
  		  if (p_value <= alpha_t / 2) {
  		    cat(" < ", alpha_t / 2, " (one-sided) \n", 
  		        "  For Mean, the group is atypical ", 
  		        "of the reference population on the side of ", side, 
  		        " values, at level ", alpha_t/2,". \n", sep = "")
  		  } else {
  		    cat(" > ", alpha_t / 2, " (one-sided) \n", 
  		        "  For Mean, the group is not atypical ", 
  		        "of the reference population, at level ", alpha_t, ". \n", sep = "")
  		  }
  		}
		}
		
		# Compatibility interval
		if (region) {
  		cat("\n~ ", 100 * (1 - alpha_c), "% compatibility interval \n",  
  		    
  		    "    Mean: [", 
  		    format(round(m_inf, m_digits), nsmall = m_digits), ", ",
  		    format(round(m_sup, m_digits), nsmall = m_digits),  "] \n", 
  		    "    Scaled deviation: [",
  		    format(round(d_inf, k_digits), nsmall = k_digits), ", ", 
  		    format(round(d_sup, k_digits), nsmall = k_digits), "] \n ", sep = "")
		}
		
		# APPROXIMATE RESULTS
		if (approximate) {
  		if ((test & notable) | region) {
  		  cat("-------------- \n", sep = "")
  	  }
  		if (test & notable) {
  		  cat("  approximate test: z = ", 
  		      format(round(z_obs, 3), nsmall = 3), 
  		      "; p = ", 
  		      format(round(app_p_value, p_digits), nsmall = p_digits),
  		      " (one-sided) \n", sep = "")
  		}
  		# Compatibility interval
  		if (region) {
    		cat("  approximate ", 100 * (1 - alpha_c), "% compatibility interval \n",
    		    "    Mean: [", 
    		    format(round(app_m_inf, m_digits), nsmall = m_digits), ", ",
    		    format(round(app_m_sup, m_digits), nsmall = m_digits),  "] \n", 
    		    "    Scaled deviation: [",
    		    format(round(app_d_inf, k_digits), nsmall = k_digits), ", ", 
    		    format(round(app_d_sup, k_digits), nsmall = k_digits), "] \n", sep = "")
  		}
		}
		
		if (export_TF) {    # List of the values of statistics D and M (for export to SPAD)
			if (first_variable == TRUE) {
				export <- cbind(d.J, (d.J * sqrt(v) + xbar))
				colnames(export) <- as.list(
							c(paste("D_", colnames(X.IK)[icol], sep = ""),
							  paste("M_", colnames(X.IK)[icol], sep = ""))
					)
			} else {
			  nb_col <- dim(export)[2]
				export <- cbind(export, d.J, (d.J * sqrt(v) + xbar))
				
				colnames(export)[c(nb_col + 1, nb_col + 2) ] <- as.list( c(paste("D_", colnames(X.IK)[icol], sep = ""),
						     	paste("M_", colnames(X.IK)[icol], sep = "")) )
			}
		}
		first_variable <- FALSE
	}
}

# === MULTIDIMENSIONAL ANALYSIS FOR MEAN POINT ===
# ================================================
if (multidim & test_stat_mean) {
  xC.K <- colMeans(X.CK)
  Mcov.KK <- cov.wt(X.IK, method = "ML")$cov  # reference covariance matrix
	eig <- eigen(Mcov.KK, symmetric = TRUE)
	L <- sum(eig$values > 1.5e-8)
	lambda.L <- eig$values[1:L]
  BasisChange.KL <- eig$vectors[ , 1:L] %*% diag(1/sqrt(lambda.L), nrow = L)
	# standardized principal coordinates of reference cloud
	Z.IL <- as.matrix(sweep(X.IK, 2, colMeans(X.IK), "-")) %*% BasisChange.KL 

	# scaled coordinates of observed mean point (matrice colonne)
	zC.L    <- matrix(xC.K - colMeans(X.IK), nrow = 1) %*% BasisChange.KL
	d2_obs  <- sum(zC.L^2)
	notable <- ifelse(d2_obs >= notable_D^2, TRUE, FALSE)

	# === INDUCTIVE DATA ANALYSIS ===
	
  # test statistic D2
	D2.J <- matrix(0, nrow= cardJ, ncol= 1)
	if (MC) {
	  set.seed(seed)
	  for (j in 1:cardJ)
	    D2.J[j] <- sum(colMeans(matrix(Z.IL[sample.int(n, n_c),],nrow= n_c))^2)
	} else {
	  Samples.JC <- t(combn(n, n_c))
	  for (j in 1:cardJ)
	    D2.J[j]  <- sum(colMeans(matrix(Z.IL[Samples.JC[j,],], nrow= n_c))^2)
	}
	
	colnames(D2.J) <- "D2" 

	# combinatorial p-value
	if (test) {
		n_sup   <- sum( D2.J >= (d2_obs * (1 - iota) - iota))
		p_value <- n_sup / cardJ
	}
	
	# EXACT (1 - alpha)-compatibilty limit: d_alpha
	if (region) {
		rank      <-  cardJ - trunc(cardJ *alpha_c)
		d_alpha   <- sqrt(sort(D2.J)[rank])
		
		# Figure: compatibility ellipse
		if (L == 2) {
		  nbp <- 720
		  kappa <- d_alpha
		  # lambda.L <- E$values[1:2]
		  M.22 <- diag(sqrt(lambda.L), nrow = L) %*% t(eig$vectors[ , 1:2])
		  Circle <- matrix(0, nrow = nbp, ncol = 2)
		  for (a in 1:nbp) {
		    Circle[a, 1] <- cos(a*2*pi/nbp)
		    Circle[a, 2] <- sin(a*2*pi/nbp)
		  }
		  A0 <- kappa * diag(c(-1, -1))
		  B0 <- kappa * diag(c(1, 1))
		  A <- t(t(A0 %*% M.22) + xC.K)
		  B <- t(t(B0 %*% M.22) + xC.K)
		  Ellipse <- sweep(kappa * Circle %*% M.22, xC.K, FUN = "+", MARGIN = 2)

		  rx <- range(c(X.IK[, 1], Ellipse[ , 1]))
		  ry <- range(c(X.IK[, 1], Ellipse[ , 2]))
		  # for pretty(): same range for x and y
		  dx <- (rx[2] - rx[1])
		  dy <- (ry[2] - ry[1])
		  if(dx > dy) {
		    dmax <- dx
		    ry <- ry + c(-(dx - dy)/2, (dx - dy)/2)
		  } else {
		    dmax <- dy
		    rx <- rx + c(-(dy - dx)/2, (dy - dx)/2)
		  }
		  # small expansion of range (20%)
		  rx <- rx + c(-dmax/10, dmax/10)
		  ry <- ry + c(-dmax/10, dmax/10)
		  
		  xG <- colMeans(X.IK)[1]
		  yG <- colMeans(X.IK)[2]
		  
		  if (spad) SPAD.report.newPage(title = "Compatibility region")
		  if (spad) SPAD.report.addText("    (the numerical results and conclusions of tests are in the file 'Edition') ")
		  plot(X.IK,
		       xlim = rx, ylim = ry,
		       col = "grey", cex = 1, lwd = 2,
# 			       main = substitute(paste(a1, "% compatibility region:\n", "principal ellipse of reference cloud centered at mean point of group cloud with scale parameter ", k1, ")"),
#             list(a1 = 100 * (1 - alpha_c),
# 			       k1 = format(round(kappa, k_digits), nsmall = k_digits))),
         main = substitute(paste(a1, "% compatibility region"),
                                  list(a1 = 100 * (1 - alpha_c))),
         sub = substitute(paste( "principal ellipse of reference cloud centred at mean point of group cloud with scale parameter ", k1),
                            list(k1 = format(round(kappa, k_digits), nsmall = k_digits))),
	       asp = plot_ratio)
		  abline(h = pretty(ry, 10), v = pretty(rx, 10), col = "lightgray", lty = 2)
		  points(X.IK, col = "black", cex = 1, lwd = 2)
		  points(xG, yG, cex = 1.5, lwd = 3, col = "black", pch = 16)  # reference mean point
		  points(xC.K[1], xC.K[2], cex = 1.5, lwd = 3, col = "blue", pch = 16)  # group mean point
		  text("G", x = xG + (rx[2] - rx[1]) / 30, y = yG, cex = 1.5)
		  points(Ellipse, asp = 1, cex = 0.1, col = "blue", type = "o")  # ellipse
		  segments(x0= A[,1], y0= A[,2], x1= B[,1], y1= B[,2], col= "blue", lty= 2)
		  text(paste("group size =", n_c), x = xC.K[1], y = ry[1], cex = 1.2, col = "blue")
		}
	}

	# APPROXIMATE TEST
	if (approximate) {
		chi2_obs  <- d2_obs * n_c * (n - 1) / (n - n_c)
		app_p_value <- 1 - pchisq(chi2_obs, df = L)

 		# APPROXIMATE (1 - alpha)-compatibilty limit d_alpha
		app_d_alpha <- sqrt(qchisq(1 - alpha_c, df = L) * (n - n_c) / (n_c * (n - 1)))
	}

	# === RESULTS PRINTING ===
	cat("\n")
	cat("================================================= \n")
	cat(" COMPARISON OF A GROUP CLOUD TO A REFERENCE CLOUD \n")
	cat("             ACCORDING TO MEAN POINT \n")
	cat("================================================= \n")
	cat("  ", K," variables (", paste(colnames(X.IK), collapse = ", "),
	    ") are analysed jointly; \n", sep= "")
	cat("  clouds are in a ", L,"-dimensional geometric space. \n", sep= "")
	cat("       size of the reference set:  ", n, "\n", sep = "")
	cat("       size of the group (", groupName, "): ", n_c, "\n\n", sep = "")
	
	cat("Descriptive analysis \n")
	cat("-------------------- \n")
	cat("  M-distance between mean points: ",
	    format(round(sqrt(d2_obs), k_digits), nsmall = k_digits), 
	    ifelse(d2_obs >= notable_D^2, " >= ", " < "),
	    notable_D, "\n", sep = "")
	cat("  Descriptively, the deviation is ",
	    ifelse(d2_obs >= notable_D^2, "notable.", "small."), 
	    "\n\n", sep = "")
	
	
  cat("Combinatorial inference \n", sep = "")
  cat("----------------------- \n", sep= "")
  cat("  number of possible samples", sep = "")
  if (choose.n.n_c < Inf) {
    if (log10(choose.n.n_c) < 10) {
      cat(": ", choose.n.n_c, "; \n", sep = "")
    } else {
      cat(": ", format(choose.n.n_c, digits = 1), "; \n", sep = "")
    }
  } else {
    cat(" > ", format(.Machine$double.xmax, digits = 1), "; \n", sep = "")
  }
  if (MC) {
    cat("  MC method is performed with ", cardJ, " samples. \n", sep = "")
  } else {
    cat("  exhaustive method is performed. \n", sep = "")
  }
	
	if (test) {
	  cat("\n~ Combinatorial typicality test \n", sep = "")
	  if (!notable) {
	    cat("  Deviation is small, there is no point in performing the test. \n")
	  } else {
	    cat("  test statistic: squared M-distance ", sep = "")
	    cat("(observed value: ", format(round(d2_obs, vr_digits), nsmall = vr_digits), ")\n", sep = "")
	    cat("  p-value: p = ", n_sup, "/", cardJ, " = ", 
	        format(round(p_value, p_digits), nsmall = p_digits), sep = "")
	    if (p_value <= alpha_t) {
	      cat(" <= ", alpha_t,  "\n", 
	          "  For mean point, the group is atypical of the ",
	          "reference population, at level ", alpha_t, ".", "\n", sep = "")
	    } else {
	      cat(
	        " > ",  alpha_t, "\n", 
	        "  For mean point, the group is not atypical of the ",
	        "reference population, at level ", alpha_t, ".", "\n", sep = "")
	    }
	  }
	}
	if (region) {
	  ellips = ifelse(L == 2, "ellipse", "ellipsoid")
	  cat("\n~ ", 100 * (1 - alpha_c), 
	      "% compatibility region: principal ", ellips, " of reference cloud\n",
	      "     centered at the mean point of group cloud and with scale parameter ",
	      format(round(d_alpha, k_digits), nsmall = k_digits), ".\n", sep = "")
	}

	# === Approximate Method ===
  if (approximate) {
    if (test & notable | region) {
	    cat("------------- \n", sep = "")
    }
	  if (test & notable) {
	    cat("  approximate test: Chi-Square = ", 
	        format(round(chi2_obs, 3), nsmall = 3), 
	        ", df = ", L, "; p = ", 
	        format(round(app_p_value, p_digits), nsmall = p_digits), "\n", sep = "")
	  }
	  if (region) {
  	  cat("  approximate ", 100 * (1- alpha_c), 
  	      "% compatibility region: scale parameter = ", 
  	      format(round(app_d_alpha, k_digits), nsmall = k_digits), "\n", sep = "")
	  }
  }
  
  if (export_TF) {  	# Export to SPAD
    if (!unidim) {
      export <- as.data.frame(D2.J)
    } else {
      export <- cbind(export, D2.J)
    }
	}  # end of multidimentional analysis
}

# === ANALYSES FOR VARIANCE OF CLOUD ===
if (test_stat_var) {
  v_grp.K <- v_ref.K <- v_ratio.K <- numeric(length = K)
  for (k in 1:K) {
    v_ref.K[k] <- sum(X.IK[ ,k]^2)/n - (sum(X.IK[ ,k])/n)^2 # reference variance 
    v_grp.K[k] <- sum(X.CK[ ,k]^2)/n_c - (sum(X.CK[ ,k])/n_c)^2  # group variance
    v_ratio.K[k] <- v_grp.K[k]/v_ref.K[k]
  }
  notable.K <- abs(v_ratio.K - 1) >= notable_VR
  v_ref <- sum(v_ref.K)
  v_grp <- sum(v_grp.K)
  v_ratio <- v_grp/v_ref
  notable <- abs(v_ratio - 1) >= notable_VR
  
  # === INDUCTIVE ANALYSIS ===
  V.JK <- matrix(0, nrow = cardJ, ncol = K)
  v.J <- numeric(length = cardJ)
  if (MC) {
    set.seed(seed)
    for (j in 1:cardJ) {
      tmp_sample <- sample(1:n, n_c)
      for (k in 1:K) {
        V.JK[j, k] <- sum(X.IK[tmp_sample, k]^2)/n_c - (sum(X.IK[tmp_sample, k])/n_c)^2
      }
      v.J[j] <- sum(V.JK[j, ])
    }
  } else {
    for (j in 1:cardJ){
      for (k in 1:K) {
        V.JK[j, k] <- sum(X.IK[Samples.JC[j, ], k]^2)/n_c - (sum(X.IK[Samples.JC[j, ], k])/n_c)^2
      }
      v.J[j] <- sum(V.JK[j, ])
    }
  }

  p.K <- n_sup.K <- n_inf.K <- numeric(length = K)
  if (test) {
    for (k in 1:K) {
      n_sup.K[k] <- sum( V.JK[,k] >= (v_grp.K[k] * (1 - iota) - iota))
      n_inf.K[k] <- sum( V.JK[,k] <= (v_grp.K[k] * (1 + iota) + iota))
      p.K[k] <- min(n_sup.K[k], n_inf.K[k])/cardJ
    }
    n_sup <- sum( v.J >= (v_grp * (1 - iota) - iota))
    n_inf <- sum( v.J <= (v_grp * (1 + iota) + iota))
    p <- min(n_sup, n_inf)/cardJ 
  }
  # compatibility region for unidimensional analyses
  if (region) {
    v_inf.K <-  v_sup.K <- numeric(length = K)
    for (k in 1:K) {
      v_inf.K[k] <- sort(V.JK[ ,k])[trunc(cardJ * alpha_c/2) + 1]
      v_sup.K[k] <- sort(V.JK[ ,k], decreasing = TRUE)[trunc(cardJ * alpha_c/2) + 1]
    }
    # compatibility region for multidimensional analysis
    v_inf <- sort(v.J)[trunc(cardJ * alpha_c/2) + 1]
    v_sup <- sort(v.J, decreasing = TRUE)[trunc(cardJ * alpha_c/2) + 1]  
  }

  if (unidim) {  
    first_variable <- TRUE
    for (k in 1:K) {
      # === RESULTS PRINTING ===
      if (first_variable) {
        cat("\n")
        cat("================================================= \n")
        cat(" COMPARISON OF A GROUP TO A REFERENCE POPULATION \n")
        cat("              ACCORDING TO VARIANCE \n")
        cat("================================================= \n")
        if (K >= 2) cat("  ", K," variables are analysed separately." ,"\n", sep = "")
      }
      
      cat("\n ********** VARIABLE: ", 
          colnames(X.IK)[k], " **********\n", sep = "")
      
      cat("Descriptive analysis \n")
      cat("-------------------- \n")
      Size <- c(n,n_c)
      VAR <- round(c(v_ref.K[k], v_grp.K[k]), v_digits)
      names(VAR) <- c("  Reference", paste("  Group (", groupName, ")", sep = ""))
      tmp = cbind(Size, VAR)
      colnames(tmp) = c(" size", " variance")
      print(tmp)
      
      # conclusion about the magnitude of the variation rate of variance
      cat("  variance ratio (group/reference): VR = ", 
          format(round(v_ratio.K[k], vr_digits), nsmall = vr_digits), "\n", sep = "")
      cat("  |VR - 1| ", 
          ifelse(notable.K[k], ">= ", "< "),
          notable_VR,
          ": descriptively, the variance ratio is ", 
          ifelse(notable.K[k], "notable", "not notable"),
          ". \n\n", sep = "")
      
      cat("Combinatorial inference \n")
      cat("----------------------- \n")
      
      cat("  number of possible samples", sep = "")
      if (choose.n.n_c < Inf) {
        if (log10(choose.n.n_c) < 10) {
          cat(": ", choose.n.n_c, "; \n", sep = "")
        } else {
          cat(": ", format(choose.n.n_c, digits = 1), "; \n", sep = "")
        }
      } else {
        cat(" > ", format(.Machine$double.xmax, digits = 1), "; \n", sep = "")
      }
      if (MC) {
        cat("  MC method is performed with ", cardJ, " samples. \n", sep = "")
      } else {
        cat("  exhaustive method is performed. \n", sep = "")
      }
      
      if (test) {
        cat("\n~ Combinatorial typicality test \n") 
        if (!notable.K[k]) {
          cat("\n  Variance ratio is not notable, there is no point in performing the test. \n")
        } else {
          cat("  test statistic: Variance \n")
          side <- ifelse(v_ratio.K[k] > 1, "high", "low")
          cat("  p-value: p = ", min(n_inf.K[k], n_sup.K[k]), "/", cardJ, " = ", 
              format(round(p.K[k], p_digits), nsmall = p_digits), sep = "")
          if (p.K[k] <= alpha_t / 2) {
            cat(" < ", alpha_t / 2, " (one-sided) \n", 
                "  For variance, the group is atypical ", 
                "of the reference set on the side of ", side, 
                " values, at level ", alpha_t/2, ". \n", sep = "")
          } else {
            cat(" > ", alpha_t / 2, " (one-sided) \n", 
                "  For variance, the group is not atypical ", 
                "of the reference set, at level ", alpha_t, ". \n", sep = "")
          }
        }
      }
      
      # Compatibility interval
      if (region) {
        cat("\n~ ", 100 * (1 - alpha_c), "% compatibility interval: [",
            format(round(v_grp.K[k] * v_ref.K[k]/v_sup.K[k], v_digits), nsmall = v_digits), ", ", 
            format(round(v_grp.K[k] * v_ref.K[k]/v_inf.K[k], v_digits), nsmall = v_digits), "] \n",
            sep = "")
      }
      
      if (approximate) {
        cat("\n  Approximate methods are not available.\n", sep = "")
      }
    
      if (export_TF) {    # List of the values of statistics V (for export to SPAD)
        if (first_variable == TRUE) {
          export <- as.data.frame(V.JK[ , k])
        } else {
          export <- cbind(export, V.JK[ , k])
        }
        colnames(export)[dim(export)[2]] <- paste("V_", colnames(X.IK)[k], sep = "")
      }
      first_variable <- FALSE
    }
  }
  
  if (multidim) {
    # === RESULTS PRINTING ===

    cat("\n")
    cat("============================================ \n")
    cat(" COMPARISON OF A CLOUD TO A REFERENCE CLOUD \n")
    cat("              ACCORDING TO VARIANCE \n")
    cat("============================================== \n")
    cat("  ", K," variables (", paste(colnames(X.IK), collapse = ", "),
        ") are analysed jointly (", K,"-dimensional geometric space). \n\n", sep= "")
    cat("Descriptive analysis \n")
    cat("-------------------- \n")
    Size <- c(n, n_c)
    VAR <- round(c(v_ref, v_grp), v_digits)
    names(VAR) <- c("  Reference", paste("  Group (", groupName, ")", sep = ""))
    tmp = cbind(Size, VAR)
    colnames(tmp) = c(" size", " variance")
    print(tmp)

    # conclusion about the magnitude of the variation rate of variance
    cat("  variance ratio (group/reference): VR = ", 
        format(round(v_ratio, vr_digits), nsmall = vr_digits), "\n", sep = "")
    cat("  |VR - 1| ", 
        ifelse(notable, ">= ", "< "),
        notable_VR,
        ": descriptively, the variance ratio is ", 
        ifelse(notable, "notable", "not notable"),
        ". \n\n", sep = "")
    
    cat("Combinatorial inference \n")
    cat("----------------------- \n")
    cat("  number of possible samples", sep = "")
    if (choose.n.n_c < Inf) {
      if (log10(choose.n.n_c) < 10) {
        cat(": ", choose.n.n_c, "; \n", sep = "")
      } else {
        cat(": ", format(choose.n.n_c, digits = 1), "; \n", sep = "")
      }
    } else {
      cat(" > ", format(.Machine$double.xmax, digits = 1), "; \n", sep = "")
    }
    if (MC) {
      cat("  MC method is performed with ", cardJ, " samples. \n", sep = "")
    } else {
      cat("  exhaustive method is performed. \n", sep = "")
    }
    
    if (test) {
      if (!notable) {
        cat("\n  Variance ratio is not notable, ", 
            "there is no point in performing the test. \n")
      } else {
        cat("\n~ Combinatorial typicality test \n")
        cat("  test statistic: Variance \n", sep = "")
        side <- ifelse(v_ratio > 1, "high", "low")
        cat("  p-value: p = ", min(n_inf, n_sup), "/", cardJ, " = ", 
            format(round(p, p_digits), nsmall = p_digits), sep = "")
        if (p <= alpha_t / 2) {
          cat(" < ", alpha_t / 2, " (one-sided) \n", 
              "  For variance, the group is atypical ", 
              "of the reference set on the side of ", side, 
              " values, at level ", alpha_t/2, ". \n", sep = "")
        } else {
          cat(" > ", alpha_t / 2, " (one-sided) \n", 
              "  For variance, the group is not atypical ", 
              "of the reference set, at level ", alpha_t, ". \n", sep = "")
        }
      }
    }
    
    # Compatibility interval
    if (region) {
      cat("\n~ ", 100 * (1 - alpha_c), 
          "% compatibility interval for variance: [",
          format(round(v_grp * v_ref/v_sup, v_digits), nsmall = v_digits), ", ", 
          format(round(v_grp * v_ref/v_inf, v_digits), nsmall = v_digits), 
          "] \n", sep = "")
    }
    
    if (approximate) {
      cat("  Approximate methods are not available.\n", sep = "")
    }
      
    if (export_TF) {  	# Export to SPAD
      if (!unidim) {
        export <- as.data.frame(v.J)
      } else {
        export <- cbind(export, v.J)
      }
      colnames(export)[dim(export)[2]] <- "V"
    }
  }
}

if (export_TF) {
  SPAD.setDataFrame(export, generateColId = FALSE)
  if (unidim) {
    cat("\n", "Test statistic distributions are exported to SPAD.\n", sep = "")
  } else {
    cat("\n", "Test statistic distribution is exported to SPAD.\n", sep = "")
  }
} else {cat("\n")}

time2 <- Sys.time()
elapsed_time <- round(as.numeric(difftime(time2, time1, units = "secs")), 2)
sec_s <- if (elapsed_time < 2) {" sec"} else {" secs"}
cat("Elapsed time: ", elapsed_time, sec_s, "\n", sep = "")

### END ###