# CIGDA_Comb-v1.1 (March 1, 2020) # B. Le Roux, S. Bienaise, J.-L. Durand #### BEGINNING OF DATA INPUT USING SPAD GUI #### #!SPADR Combinatorial Typicality Tests #SPAD.VARN STATUT = SEL; LABEL = Selected; ROLE = C #SPAD.VAR1 STATUT = GRP; LABEL = Group; ROLE = N #SPAD.VAR1 STATUT = REF; LABEL = Reference; ROLE = N #SPAD.TITRE[1] LABEL = Analyses #SPAD.SEP[1] LABEL = Group definition (put & between categories to be grouped) #SPAD.PAR.TEXTE[1] NOM = GROUP_LEVEL; LABEL = name of the group; DEFAUT = #SPAD.SEP[1] LABEL = Choice of analyses #SPAD.PAR.LISTE[1] NOM = SPACE_TYPE; LABEL = ; ITEMS = one-dimensional#multidimensional#one & multidimensional; STYLE = LARGE2; DEFAUT = 2 #SPAD.SEP[1] LABEL = Choice of the typicality test #SPAD.PAR.LISTE[1] NOM = TEST_STAT; LABEL = ; STYLE = LARGE2; ITEMS = for Mean point#for Variance; DEFAUT = 1 #SPAD.SEP[1] LABEL = Combinatorial procedures #SPAD.PAR.LISTE[1] NOM = PROC_TYPE; LABEL = ; ITEMS = test#compatibility region#test & region; STYLE = LARGE2; DEFAUT = 1 #SPAD.TITRE[2] LABEL = Parameters #SPAD.SEP[2] LABEL = Notable limits of descriptive statistics #SPAD.PAR.REEL[2] NOM = NOTABLE_D; LABEL = M-distance/scaled deviation; DEFAUT = 0.4; MIN = 0 #SPAD.PAR.REEL[2] NOM = NOTABLE_VR; LABEL = absolute value of variation rate; DEFAUT = 0.1; MIN = 0 ; MAX = 1 #SPAD.SEP[2] LABEL = Alpha-levels #SPAD.PAR.REEL[2] NOM = ALPHA_T; LABEL = test; MIN = 0.001; MAX = 0.5; DEFAUT = 0.05 #SPAD.PAR.REEL[2] NOM = ALPHA_C; LABEL = compatibility region; MIN = 0.001; MAX = 0.5; DEFAUT = 0.05 #SPAD.SEP[2] LABEL = Samples #SPAD.PAR.ENTIER[2] NOM = MAX_NUMBER; LABEL = maximum number; MIN = 5000; DEFAUT = 100000 #SPAD.SEP[2] LABEL = Monte Carlo #SPAD.PAR.LISTE[2] NOM = RANDOM_SEED; LABEL = seed; STYLE = LARGE2; ITEMS = random#fixed; DEFAUT = 1 #SPAD.PAR.ENTIER[2] NOM = SEED; LABEL = seed (if fixed); DEFAUT = 1234 #SPAD.TITRE[3] LABEL = Outputs #SPAD.SEP[3] LABEL = Number of decimals #SPAD.SEP[3] LABEL = 1- test for Mean Point #SPAD.PAR.ENTIER[3] NOM = M_DIGITS; LABEL = mean; MIN = 0; MAX = 8; DEFAUT = 3 #SPAD.PAR.ENTIER[3] NOM = K_DIGITS; LABEL = M-distance & scale parameter; MIN = 1; MAX = 8; DEFAUT = 2 #SPAD.SEP[3] LABEL = 2- test for Variance #SPAD.PAR.ENTIER[3] NOM = V_DIGITS; LABEL = variance; MIN = 1; MAX = 8; DEFAUT = 2 #SPAD.PAR.ENTIER[3] NOM = VR_DIGITS; LABEL = variation rate; MIN = 1; MAX = 8; DEFAUT = 2 #SPAD.SEP[3] LABEL = 3- #SPAD.PAR.ENTIER[3] NOM = P_DIGITS; LABEL= p-value; MIN = 2; MAX = 8; DEFAUT = 3 #SPAD.SEP[3] LABEL = #SPAD.SEP[3] LABEL = Approximate results #SPAD.PAR.LISTE[3] NOM = APPROXIMATE; LABEL = ; ITEMS = yes#no; STYLE = LARGE2; DEFAUT = 2 #SPAD.SEP[3] LABEL = Export of test statistic distribution #SPAD.PAR.LISTE[3] NOM = EXPORT; LABEL = ; STYLE = LARGE2; ITEMS = yes#no; DEFAUT = 2 ### DATA AND PARAMETERS ### base = as.data.frame(SPAD.getCategoryAsDataFrame("SEL")) # numerical reference = SPAD.getCategoryAsDataFrame("REF") # categorical group = SPAD.getCategoryAsDataFrame("GRP") # categorical group_lbl = SPAD.getInputParameter("GROUP_LEVEL") test_dim = SPAD.getInputParameter("SPACE_TYPE") unidim = (1 %in% test_dim | 3 %in% test_dim) multidim = (2 %in% test_dim | 3 %in% test_dim) test_stat = SPAD.getInputParameter("TEST_STAT") test_stat_mean = (1 %in% test_stat) test_stat_var = (2 %in% test_stat) proc_type = SPAD.getInputParameter("PROC_TYPE") test = (1 %in% proc_type | 3 %in% proc_type) region = (2 %in% proc_type | 3 %in% proc_type) notable_D = SPAD.getInputParameter("NOTABLE_D") notable_VR = SPAD.getInputParameter("NOTABLE_VR") alpha_t = SPAD.getInputParameter("ALPHA_T") # alpha level for test alpha_c = SPAD.getInputParameter("ALPHA_C") # alpha level for compatibility region max_number = SPAD.getInputParameter("MAX_NUMBER") # number of samples random_seed = (1 %in% SPAD.getInputParameter("RANDOM_SEED")) # TRUE if random seed is used for each test seed = SPAD.getInputParameter("SEED") if (random_seed) {seed <- NULL} m_digits = SPAD.getInputParameter("M_DIGITS") # Nb of decimal places for raw mean or sd v_digits = SPAD.getInputParameter("V_DIGITS") # Nb of decimal places for variance vr_digits = SPAD.getInputParameter("VR_DIGITS") # Nb of decimal places for variance ratio p_digits = SPAD.getInputParameter("P_DIGITS") # Nb of decimal places for p-values k_digits = SPAD.getInputParameter("K_DIGITS") # Nb of decimal places for M-variance & kappa approximate = SPAD.getInputParameter("APPROXIMATE") == 1 export = NULL # dataframe for results export export_TF = SPAD.getInputParameter("EXPORT") == 1 spad = TRUE if(any(is.na(base))) { cat("\n=== INPUT ERROR ===\n ", "there are empty cells in numeric variables. \n\n", sep = "") stop("\n\n=== INPUT ERROR ===\n ", "there are empty cells in numeric variables. \n", sep = "") } if(any(is.na(group))) { cat("\n=== INPUT ERROR ===\n ", "You must choose a variable for defining the Group ('Variables' dialog box). \n\n", sep = "") stop("\n\n=== INPUT ERROR ===\n ", "You must choose a variable for defining the Group ('Variables' dialog box). \n", sep = "") } # === group definition === txt_tmp <- group_lbl # split chain into character vector (split = ",") # and remove leading and trailing whitespace from character strings selected_levels <- trimws(strsplit(x = txt_tmp, split = "&", fixed = TRUE)[[1]]) groupName <- paste0(selected_levels, collapse = " & ") rm(txt_tmp) tmp_levels <- setdiff(selected_levels, unlist(group)) # ERROR MESSAGE PRINTING if (length(tmp_levels) > 0) { cat("Selected level(s) for group \n", sep = "") if (length(tmp_levels) == 1) { cat("\n=== INPUT ERROR ===\n ", tmp_levels, ": category not observed. \n\n", sep ="") stop("\n\nINPUT ERROR \n ", tmp_levels, ": category not observed. \n", sep ="") } else { cat("\n=== INPUT ERROR ===\n ", tmp_levels, ": categories not observed. \n\n", sep = "") stop("\n\nINPUT ERROR \n ", tmp_levels, ": categories not observed. \n", sep = "") } } if (length(selected_levels) == 0) { cat("\n=== INPUT ERROR ===\n", " You must choose 1 or more Group categorie(s). \n\n", sep = "") stop("\n\nINPUT ERROR \n", " You must choose 1 or more Group categorie(s). \n", sep = "") } group_TF <- unlist(group) %in% selected_levels # reference definition if (length(reference) == 1 & any(is.na(reference))) { ref_TF <- rep(TRUE, dim(base)[1]) cat("\n=== WARNING ===\n No variable is chosen for reference, therefore all cases belong to the reference set.\n\n") } else { ref_TF <- unlist(reference) %in% "ref" } X.IK <- as.matrix(base[ref_TF, ]) # reference cloud coordinates rownames(X.IK) <- rownames(base)[ref_TF] colnames(X.IK) <- colnames(base) X.CK <- as.matrix(base[group_TF, ]) # group cloud coordinates rownames(X.CK) <- rownames(base)[group_TF] ### END OF DATA INPUT ### ### PART OF THE R SCRIPT REGARDING COMBINATORIAL PROCEDURES ### cat(" ******************************************************** \n", " * COMBINATORIAL TYPICALITY TEST * \n", " * R script interfaced with Coheris SPAD * \n", " * * * * * \n", " * See Chapter 3 of the book: * \n", " * \"Combinatorial Inference in Geometric Data Analysis\" * \n", " * B. Le Roux, S. Bienaise, J.-L. Durand * \n", " * Chapman & Hall/CRC, 2019 * \n", " ******************************************************** \n", sep = "") # technical parameter time1 <- Sys.time() iota <- 1e-12 plot_ratio <- ifelse(spad, 1.18, 1) K <- dim(X.IK)[2] n <- dim(X.IK)[1] n_c <- dim(X.CK)[1] if (isTRUE(n_c <= 1) & test_stat_var) { cat("\n=== INPUT ERROR ===\n group size must be at least 2.\n\n", sep = "") stop("\n\nINPUT ERROR \n group size must be at least 2.", sep = "") } if (isTRUE(n_c >= n - 1)) { cat("\n=== INPUT ERROR ===\n group size must be less than reference size minus 1. \n\n", sep = "") stop("\n\nINPUT ERROR \n group size must be less than reference size minus 1. \n", sep = "") } if (unidim & multidim & K == 1) { multidim <- FALSE } if (!unidim & multidim & K == 1) { cat("\n=== INPUT ERROR ===\n Only one variable is selected: multidimensional analysis is irrelevant. \n\n") stop("\n\nINPUT ERROR \n Only one variable is selected: multidimensional analysis is irrelevant. \n") } # Sample space and method choose.n.n_c <- choose(n, n_c) cardJ <- min(choose.n.n_c, max_number) # card_J: number of samples if (cardJ < max_number) { MC <- FALSE Samples.JC <- t(combn(1:n, n_c)) # enumeration of samples } else { MC <- TRUE } # === UNIDIMENSIONAL ANALYSES FOR MEAN === # ======================================== if (unidim & test_stat_mean) {first_variable <- TRUE for (icol in 1:K) { x.I <- as.vector(X.IK[, icol]) names(x.I) <- rownames(X.IK) x.C <- as.vector(X.CK[, icol]) names(x.C) <- row.names(X.CK) # === DESCRIPTIVE ANALYSIS === xbar <- mean(x.I) # mean of reference set I v <- mean(x.I^2) - xbar^2 # variance of reference set I m_obs <- mean(x.C) # mean of group C v_C <- mean(x.C^2) - m_obs^2 # variance of group C d_obs <- (m_obs - xbar) / sqrt(v) # observed scaled deviation notable <- ifelse (abs(d_obs) >= notable_D, TRUE, FALSE) z.I <- (x.I - xbar) / sqrt(v) # scaled deviations rm(x.I, x.C) # === INDUCTIVE ANALYSIS === d.J <- matrix(0, nrow = cardJ, ncol = 1) if (MC) { set.seed(seed) for (j in 1:cardJ) { d.J[j] <- mean(z.I[sample(1:n, n_c)]) } } else { for (j in 1:cardJ){ d.J[j] <- mean(z.I[Samples.JC[j, ]]) } } if (test & notable) { n_sup <- sum( d.J >= (d_obs * (1 - iota * sign(d_obs)) - iota)) n_inf <- sum( d.J <= (d_obs * (1 + iota * sign(d_obs)) + iota)) # combinatorial p-value (one-sided) p_value <- min(n_inf, n_sup) / cardJ } if (region) { # exact compatibility interval d.J_ord <- sort(d.J) rank_inf <- trunc(cardJ * alpha_c/2) + 1 # limits for test statistic D d_sup <- d_obs - d.J_ord[rank_inf] d_inf <- d_obs - d.J_ord[cardJ + 1 - rank_inf] # limits for test statistic M m_sup <- d_sup * sqrt(v) + xbar m_inf <- d_inf * sqrt(v) + xbar } if (approximate) { # approximate p_value z_obs <- sqrt(n_c * (n - 1) / (n - n_c)) * d_obs app_p_value <- pnorm(-abs(z_obs)) # approximate compatibility interval h_alpha <- -qnorm(alpha_c / 2) * sqrt((n - n_c)/((n - 1)* n_c)) # half-length for D # limits for test statistic D app_d_sup <- d_obs + h_alpha app_d_inf <- d_obs - h_alpha # limits for test statistic M app_m_sup <- m_obs + h_alpha * sqrt(v) app_m_inf <- m_obs - h_alpha * sqrt(v) } # === RESULTS PRINTING === if (first_variable) { cat("\n") cat("================================================= \n") cat(" COMPARISON OF A GROUP CLOUD TO A REFERENCE CLOUD \n") cat(" ACCORDING TO MEAN \n") cat("================================================= \n") if (K >= 2) cat(" ", K," variables are analysed separately." ,"\n", sep = "") } cat("\n ********** VARIABLE: ", colnames(X.IK)[icol], " **********\n", sep = "") cat("Descriptive analysis \n") cat("-------------------- \n") Size <- c(n,n_c) Mean <- round(c(xbar, m_obs), m_digits) SD <- round(c(sqrt(v), sqrt(v_C)), m_digits) names(Mean) <- c(" Reference", paste(" Group (", groupName, ")", sep = "")) tmp = cbind(Size, Mean, SD) colnames(tmp) = c(" size", " mean", " SD") print(tmp) # conclusion about the magnitude of deviation cat(" Deviation between means (group - reference): ", format(round(m_obs - xbar, m_digits), nsmall = m_digits), "\n", sep = "") cat(" Scaled deviation: D = ", format(round(d_obs, k_digits), nsmall = k_digits), "\n", sep = "") cat(" |D| ", ifelse(notable, ">= ", "< "), notable_D, ": descriptively, deviation is ", ifelse(notable, "notable", "small"), ". \n\n", sep = "") cat("Combinatorial inference \n") cat("----------------------- \n") cat(" number of possible samples", sep = "") if (choose.n.n_c < Inf) { if (log10(choose.n.n_c) < 10) { cat(": ", choose.n.n_c, "; \n", sep = "") } else { cat(": ", format(choose.n.n_c, digits = 1), "; \n", sep = "") } } else { cat(" > ", format(.Machine$double.xmax, digits = 1), "; \n", sep = "") } if (MC) { cat(" MC method is performed with ", cardJ, " samples. \n", sep = "") } else { cat(" exhaustive method is performed. \n", sep = "") } if (test) { cat("\n~ combinatorial typicality test \n", sep = "") if (!notable) { cat(" Deviation is small, there is no point in performing the test. \n") } else { cat(" test statistic: Mean \n", sep = "") side <- ifelse(d_obs > 0, "high", "low") cat(" p-value: p = ", min(n_inf, n_sup), "/", cardJ, " = ", format(round(p_value, p_digits), nsmall = p_digits), sep = "") if (p_value <= alpha_t / 2) { cat(" < ", alpha_t / 2, " (one-sided) \n", " For Mean, the group is atypical ", "of the reference population on the side of ", side, " values, at level ", alpha_t/2,". \n", sep = "") } else { cat(" > ", alpha_t / 2, " (one-sided) \n", " For Mean, the group is not atypical ", "of the reference population, at level ", alpha_t, ". \n", sep = "") } } } # Compatibility interval if (region) { cat("\n~ ", 100 * (1 - alpha_c), "% compatibility interval \n", " Mean: [", format(round(m_inf, m_digits), nsmall = m_digits), ", ", format(round(m_sup, m_digits), nsmall = m_digits), "] \n", " Scaled deviation: [", format(round(d_inf, k_digits), nsmall = k_digits), ", ", format(round(d_sup, k_digits), nsmall = k_digits), "] \n ", sep = "") } # APPROXIMATE RESULTS if (approximate) { if ((test & notable) | region) { cat("-------------- \n", sep = "") } if (test & notable) { cat(" approximate test: z = ", format(round(z_obs, 3), nsmall = 3), "; p = ", format(round(app_p_value, p_digits), nsmall = p_digits), " (one-sided) \n", sep = "") } # Compatibility interval if (region) { cat(" approximate ", 100 * (1 - alpha_c), "% compatibility interval \n", " Mean: [", format(round(app_m_inf, m_digits), nsmall = m_digits), ", ", format(round(app_m_sup, m_digits), nsmall = m_digits), "] \n", " Scaled deviation: [", format(round(app_d_inf, k_digits), nsmall = k_digits), ", ", format(round(app_d_sup, k_digits), nsmall = k_digits), "] \n", sep = "") } } if (export_TF) { # List of the values of statistics D and M (for export to SPAD) if (first_variable == TRUE) { export <- cbind(d.J, (d.J * sqrt(v) + xbar)) colnames(export) <- as.list( c(paste("D_", colnames(X.IK)[icol], sep = ""), paste("M_", colnames(X.IK)[icol], sep = "")) ) } else { nb_col <- dim(export)[2] export <- cbind(export, d.J, (d.J * sqrt(v) + xbar)) colnames(export)[c(nb_col + 1, nb_col + 2) ] <- as.list( c(paste("D_", colnames(X.IK)[icol], sep = ""), paste("M_", colnames(X.IK)[icol], sep = "")) ) } } first_variable <- FALSE } } # === MULTIDIMENSIONAL ANALYSIS FOR MEAN POINT === # ================================================ if (multidim & test_stat_mean) { xC.K <- colMeans(X.CK) Mcov.KK <- cov.wt(X.IK, method = "ML")$cov # reference covariance matrix eig <- eigen(Mcov.KK, symmetric = TRUE) L <- sum(eig$values > 1.5e-8) lambda.L <- eig$values[1:L] BasisChange.KL <- eig$vectors[ , 1:L] %*% diag(1/sqrt(lambda.L), nrow = L) # standardized principal coordinates of reference cloud Z.IL <- as.matrix(sweep(X.IK, 2, colMeans(X.IK), "-")) %*% BasisChange.KL # scaled coordinates of observed mean point (matrice colonne) zC.L <- matrix(xC.K - colMeans(X.IK), nrow = 1) %*% BasisChange.KL d2_obs <- sum(zC.L^2) notable <- ifelse(d2_obs >= notable_D^2, TRUE, FALSE) # === INDUCTIVE DATA ANALYSIS === # test statistic D2 D2.J <- matrix(0, nrow= cardJ, ncol= 1) if (MC) { set.seed(seed) for (j in 1:cardJ) D2.J[j] <- sum(colMeans(matrix(Z.IL[sample.int(n, n_c),],nrow= n_c))^2) } else { Samples.JC <- t(combn(n, n_c)) for (j in 1:cardJ) D2.J[j] <- sum(colMeans(matrix(Z.IL[Samples.JC[j,],], nrow= n_c))^2) } colnames(D2.J) <- "D2" # combinatorial p-value if (test) { n_sup <- sum( D2.J >= (d2_obs * (1 - iota) - iota)) p_value <- n_sup / cardJ } # EXACT (1 - alpha)-compatibilty limit: d_alpha if (region) { rank <- cardJ - trunc(cardJ *alpha_c) d_alpha <- sqrt(sort(D2.J)[rank]) # Figure: compatibility ellipse if (L == 2) { nbp <- 720 kappa <- d_alpha # lambda.L <- E$values[1:2] M.22 <- diag(sqrt(lambda.L), nrow = L) %*% t(eig$vectors[ , 1:2]) Circle <- matrix(0, nrow = nbp, ncol = 2) for (a in 1:nbp) { Circle[a, 1] <- cos(a*2*pi/nbp) Circle[a, 2] <- sin(a*2*pi/nbp) } A0 <- kappa * diag(c(-1, -1)) B0 <- kappa * diag(c(1, 1)) A <- t(t(A0 %*% M.22) + xC.K) B <- t(t(B0 %*% M.22) + xC.K) Ellipse <- sweep(kappa * Circle %*% M.22, xC.K, FUN = "+", MARGIN = 2) rx <- range(c(X.IK[, 1], Ellipse[ , 1])) ry <- range(c(X.IK[, 1], Ellipse[ , 2])) # for pretty(): same range for x and y dx <- (rx[2] - rx[1]) dy <- (ry[2] - ry[1]) if(dx > dy) { dmax <- dx ry <- ry + c(-(dx - dy)/2, (dx - dy)/2) } else { dmax <- dy rx <- rx + c(-(dy - dx)/2, (dy - dx)/2) } # small expansion of range (20%) rx <- rx + c(-dmax/10, dmax/10) ry <- ry + c(-dmax/10, dmax/10) xG <- colMeans(X.IK)[1] yG <- colMeans(X.IK)[2] if (spad) SPAD.report.newPage(title = "Compatibility region") if (spad) SPAD.report.addText(" (the numerical results and conclusions of tests are in the file 'Edition') ") plot(X.IK, xlim = rx, ylim = ry, col = "grey", cex = 1, lwd = 2, # main = substitute(paste(a1, "% compatibility region:\n", "principal ellipse of reference cloud centered at mean point of group cloud with scale parameter ", k1, ")"), # list(a1 = 100 * (1 - alpha_c), # k1 = format(round(kappa, k_digits), nsmall = k_digits))), main = substitute(paste(a1, "% compatibility region"), list(a1 = 100 * (1 - alpha_c))), sub = substitute(paste( "principal ellipse of reference cloud centred at mean point of group cloud with scale parameter ", k1), list(k1 = format(round(kappa, k_digits), nsmall = k_digits))), asp = plot_ratio) abline(h = pretty(ry, 10), v = pretty(rx, 10), col = "lightgray", lty = 2) points(X.IK, col = "black", cex = 1, lwd = 2) points(xG, yG, cex = 1.5, lwd = 3, col = "black", pch = 16) # reference mean point points(xC.K[1], xC.K[2], cex = 1.5, lwd = 3, col = "blue", pch = 16) # group mean point text("G", x = xG + (rx[2] - rx[1]) / 30, y = yG, cex = 1.5) points(Ellipse, asp = 1, cex = 0.1, col = "blue", type = "o") # ellipse segments(x0= A[,1], y0= A[,2], x1= B[,1], y1= B[,2], col= "blue", lty= 2) text(paste("group size =", n_c), x = xC.K[1], y = ry[1], cex = 1.2, col = "blue") } } # APPROXIMATE TEST if (approximate) { chi2_obs <- d2_obs * n_c * (n - 1) / (n - n_c) app_p_value <- 1 - pchisq(chi2_obs, df = L) # APPROXIMATE (1 - alpha)-compatibilty limit d_alpha app_d_alpha <- sqrt(qchisq(1 - alpha_c, df = L) * (n - n_c) / (n_c * (n - 1))) } # === RESULTS PRINTING === cat("\n") cat("================================================= \n") cat(" COMPARISON OF A GROUP CLOUD TO A REFERENCE CLOUD \n") cat(" ACCORDING TO MEAN POINT \n") cat("================================================= \n") cat(" ", K," variables (", paste(colnames(X.IK), collapse = ", "), ") are analysed jointly; \n", sep= "") cat(" clouds are in a ", L,"-dimensional geometric space. \n", sep= "") cat(" size of the reference set: ", n, "\n", sep = "") cat(" size of the group (", groupName, "): ", n_c, "\n\n", sep = "") cat("Descriptive analysis \n") cat("-------------------- \n") cat(" M-distance between mean points: ", format(round(sqrt(d2_obs), k_digits), nsmall = k_digits), ifelse(d2_obs >= notable_D^2, " >= ", " < "), notable_D, "\n", sep = "") cat(" Descriptively, the deviation is ", ifelse(d2_obs >= notable_D^2, "notable.", "small."), "\n\n", sep = "") cat("Combinatorial inference \n", sep = "") cat("----------------------- \n", sep= "") cat(" number of possible samples", sep = "") if (choose.n.n_c < Inf) { if (log10(choose.n.n_c) < 10) { cat(": ", choose.n.n_c, "; \n", sep = "") } else { cat(": ", format(choose.n.n_c, digits = 1), "; \n", sep = "") } } else { cat(" > ", format(.Machine$double.xmax, digits = 1), "; \n", sep = "") } if (MC) { cat(" MC method is performed with ", cardJ, " samples. \n", sep = "") } else { cat(" exhaustive method is performed. \n", sep = "") } if (test) { cat("\n~ Combinatorial typicality test \n", sep = "") if (!notable) { cat(" Deviation is small, there is no point in performing the test. \n") } else { cat(" test statistic: squared M-distance ", sep = "") cat("(observed value: ", format(round(d2_obs, vr_digits), nsmall = vr_digits), ")\n", sep = "") cat(" p-value: p = ", n_sup, "/", cardJ, " = ", format(round(p_value, p_digits), nsmall = p_digits), sep = "") if (p_value <= alpha_t) { cat(" <= ", alpha_t, "\n", " For mean point, the group is atypical of the ", "reference population, at level ", alpha_t, ".", "\n", sep = "") } else { cat( " > ", alpha_t, "\n", " For mean point, the group is not atypical of the ", "reference population, at level ", alpha_t, ".", "\n", sep = "") } } } if (region) { ellips = ifelse(L == 2, "ellipse", "ellipsoid") cat("\n~ ", 100 * (1 - alpha_c), "% compatibility region: principal ", ellips, " of reference cloud\n", " centered at the mean point of group cloud and with scale parameter ", format(round(d_alpha, k_digits), nsmall = k_digits), ".\n", sep = "") } # === Approximate Method === if (approximate) { if (test & notable | region) { cat("------------- \n", sep = "") } if (test & notable) { cat(" approximate test: Chi-Square = ", format(round(chi2_obs, 3), nsmall = 3), ", df = ", L, "; p = ", format(round(app_p_value, p_digits), nsmall = p_digits), "\n", sep = "") } if (region) { cat(" approximate ", 100 * (1- alpha_c), "% compatibility region: scale parameter = ", format(round(app_d_alpha, k_digits), nsmall = k_digits), "\n", sep = "") } } if (export_TF) { # Export to SPAD if (!unidim) { export <- as.data.frame(D2.J) } else { export <- cbind(export, D2.J) } } # end of multidimentional analysis } # === ANALYSES FOR VARIANCE OF CLOUD === if (test_stat_var) { v_grp.K <- v_ref.K <- v_ratio.K <- numeric(length = K) for (k in 1:K) { v_ref.K[k] <- sum(X.IK[ ,k]^2)/n - (sum(X.IK[ ,k])/n)^2 # reference variance v_grp.K[k] <- sum(X.CK[ ,k]^2)/n_c - (sum(X.CK[ ,k])/n_c)^2 # group variance v_ratio.K[k] <- v_grp.K[k]/v_ref.K[k] } notable.K <- abs(v_ratio.K - 1) >= notable_VR v_ref <- sum(v_ref.K) v_grp <- sum(v_grp.K) v_ratio <- v_grp/v_ref notable <- abs(v_ratio - 1) >= notable_VR # === INDUCTIVE ANALYSIS === V.JK <- matrix(0, nrow = cardJ, ncol = K) v.J <- numeric(length = cardJ) if (MC) { set.seed(seed) for (j in 1:cardJ) { tmp_sample <- sample(1:n, n_c) for (k in 1:K) { V.JK[j, k] <- sum(X.IK[tmp_sample, k]^2)/n_c - (sum(X.IK[tmp_sample, k])/n_c)^2 } v.J[j] <- sum(V.JK[j, ]) } } else { for (j in 1:cardJ){ for (k in 1:K) { V.JK[j, k] <- sum(X.IK[Samples.JC[j, ], k]^2)/n_c - (sum(X.IK[Samples.JC[j, ], k])/n_c)^2 } v.J[j] <- sum(V.JK[j, ]) } } p.K <- n_sup.K <- n_inf.K <- numeric(length = K) if (test) { for (k in 1:K) { n_sup.K[k] <- sum( V.JK[,k] >= (v_grp.K[k] * (1 - iota) - iota)) n_inf.K[k] <- sum( V.JK[,k] <= (v_grp.K[k] * (1 + iota) + iota)) p.K[k] <- min(n_sup.K[k], n_inf.K[k])/cardJ } n_sup <- sum( v.J >= (v_grp * (1 - iota) - iota)) n_inf <- sum( v.J <= (v_grp * (1 + iota) + iota)) p <- min(n_sup, n_inf)/cardJ } # compatibility region for unidimensional analyses if (region) { v_inf.K <- v_sup.K <- numeric(length = K) for (k in 1:K) { v_inf.K[k] <- sort(V.JK[ ,k])[trunc(cardJ * alpha_c/2) + 1] v_sup.K[k] <- sort(V.JK[ ,k], decreasing = TRUE)[trunc(cardJ * alpha_c/2) + 1] } # compatibility region for multidimensional analysis v_inf <- sort(v.J)[trunc(cardJ * alpha_c/2) + 1] v_sup <- sort(v.J, decreasing = TRUE)[trunc(cardJ * alpha_c/2) + 1] } if (unidim) { first_variable <- TRUE for (k in 1:K) { # === RESULTS PRINTING === if (first_variable) { cat("\n") cat("================================================= \n") cat(" COMPARISON OF A GROUP TO A REFERENCE POPULATION \n") cat(" ACCORDING TO VARIANCE \n") cat("================================================= \n") if (K >= 2) cat(" ", K," variables are analysed separately." ,"\n", sep = "") } cat("\n ********** VARIABLE: ", colnames(X.IK)[k], " **********\n", sep = "") cat("Descriptive analysis \n") cat("-------------------- \n") Size <- c(n,n_c) VAR <- round(c(v_ref.K[k], v_grp.K[k]), v_digits) names(VAR) <- c(" Reference", paste(" Group (", groupName, ")", sep = "")) tmp = cbind(Size, VAR) colnames(tmp) = c(" size", " variance") print(tmp) # conclusion about the magnitude of the variation rate of variance cat(" variance ratio (group/reference): VR = ", format(round(v_ratio.K[k], vr_digits), nsmall = vr_digits), "\n", sep = "") cat(" |VR - 1| ", ifelse(notable.K[k], ">= ", "< "), notable_VR, ": descriptively, the variance ratio is ", ifelse(notable.K[k], "notable", "not notable"), ". \n\n", sep = "") cat("Combinatorial inference \n") cat("----------------------- \n") cat(" number of possible samples", sep = "") if (choose.n.n_c < Inf) { if (log10(choose.n.n_c) < 10) { cat(": ", choose.n.n_c, "; \n", sep = "") } else { cat(": ", format(choose.n.n_c, digits = 1), "; \n", sep = "") } } else { cat(" > ", format(.Machine$double.xmax, digits = 1), "; \n", sep = "") } if (MC) { cat(" MC method is performed with ", cardJ, " samples. \n", sep = "") } else { cat(" exhaustive method is performed. \n", sep = "") } if (test) { cat("\n~ Combinatorial typicality test \n") if (!notable.K[k]) { cat("\n Variance ratio is not notable, there is no point in performing the test. \n") } else { cat(" test statistic: Variance \n") side <- ifelse(v_ratio.K[k] > 1, "high", "low") cat(" p-value: p = ", min(n_inf.K[k], n_sup.K[k]), "/", cardJ, " = ", format(round(p.K[k], p_digits), nsmall = p_digits), sep = "") if (p.K[k] <= alpha_t / 2) { cat(" < ", alpha_t / 2, " (one-sided) \n", " For variance, the group is atypical ", "of the reference set on the side of ", side, " values, at level ", alpha_t/2, ". \n", sep = "") } else { cat(" > ", alpha_t / 2, " (one-sided) \n", " For variance, the group is not atypical ", "of the reference set, at level ", alpha_t, ". \n", sep = "") } } } # Compatibility interval if (region) { cat("\n~ ", 100 * (1 - alpha_c), "% compatibility interval: [", format(round(v_grp.K[k] * v_ref.K[k]/v_sup.K[k], v_digits), nsmall = v_digits), ", ", format(round(v_grp.K[k] * v_ref.K[k]/v_inf.K[k], v_digits), nsmall = v_digits), "] \n", sep = "") } if (approximate) { cat("\n Approximate methods are not available.\n", sep = "") } if (export_TF) { # List of the values of statistics V (for export to SPAD) if (first_variable == TRUE) { export <- as.data.frame(V.JK[ , k]) } else { export <- cbind(export, V.JK[ , k]) } colnames(export)[dim(export)[2]] <- paste("V_", colnames(X.IK)[k], sep = "") } first_variable <- FALSE } } if (multidim) { # === RESULTS PRINTING === cat("\n") cat("============================================ \n") cat(" COMPARISON OF A CLOUD TO A REFERENCE CLOUD \n") cat(" ACCORDING TO VARIANCE \n") cat("============================================== \n") cat(" ", K," variables (", paste(colnames(X.IK), collapse = ", "), ") are analysed jointly (", K,"-dimensional geometric space). \n\n", sep= "") cat("Descriptive analysis \n") cat("-------------------- \n") Size <- c(n, n_c) VAR <- round(c(v_ref, v_grp), v_digits) names(VAR) <- c(" Reference", paste(" Group (", groupName, ")", sep = "")) tmp = cbind(Size, VAR) colnames(tmp) = c(" size", " variance") print(tmp) # conclusion about the magnitude of the variation rate of variance cat(" variance ratio (group/reference): VR = ", format(round(v_ratio, vr_digits), nsmall = vr_digits), "\n", sep = "") cat(" |VR - 1| ", ifelse(notable, ">= ", "< "), notable_VR, ": descriptively, the variance ratio is ", ifelse(notable, "notable", "not notable"), ". \n\n", sep = "") cat("Combinatorial inference \n") cat("----------------------- \n") cat(" number of possible samples", sep = "") if (choose.n.n_c < Inf) { if (log10(choose.n.n_c) < 10) { cat(": ", choose.n.n_c, "; \n", sep = "") } else { cat(": ", format(choose.n.n_c, digits = 1), "; \n", sep = "") } } else { cat(" > ", format(.Machine$double.xmax, digits = 1), "; \n", sep = "") } if (MC) { cat(" MC method is performed with ", cardJ, " samples. \n", sep = "") } else { cat(" exhaustive method is performed. \n", sep = "") } if (test) { if (!notable) { cat("\n Variance ratio is not notable, ", "there is no point in performing the test. \n") } else { cat("\n~ Combinatorial typicality test \n") cat(" test statistic: Variance \n", sep = "") side <- ifelse(v_ratio > 1, "high", "low") cat(" p-value: p = ", min(n_inf, n_sup), "/", cardJ, " = ", format(round(p, p_digits), nsmall = p_digits), sep = "") if (p <= alpha_t / 2) { cat(" < ", alpha_t / 2, " (one-sided) \n", " For variance, the group is atypical ", "of the reference set on the side of ", side, " values, at level ", alpha_t/2, ". \n", sep = "") } else { cat(" > ", alpha_t / 2, " (one-sided) \n", " For variance, the group is not atypical ", "of the reference set, at level ", alpha_t, ". \n", sep = "") } } } # Compatibility interval if (region) { cat("\n~ ", 100 * (1 - alpha_c), "% compatibility interval for variance: [", format(round(v_grp * v_ref/v_sup, v_digits), nsmall = v_digits), ", ", format(round(v_grp * v_ref/v_inf, v_digits), nsmall = v_digits), "] \n", sep = "") } if (approximate) { cat(" Approximate methods are not available.\n", sep = "") } if (export_TF) { # Export to SPAD if (!unidim) { export <- as.data.frame(v.J) } else { export <- cbind(export, v.J) } colnames(export)[dim(export)[2]] <- "V" } } } if (export_TF) { SPAD.setDataFrame(export, generateColId = FALSE) if (unidim) { cat("\n", "Test statistic distributions are exported to SPAD.\n", sep = "") } else { cat("\n", "Test statistic distribution is exported to SPAD.\n", sep = "") } } else {cat("\n")} time2 <- Sys.time() elapsed_time <- round(as.numeric(difftime(time2, time1, units = "secs")), 2) sec_s <- if (elapsed_time < 2) {" sec"} else {" secs"} cat("Elapsed time: ", elapsed_time, sec_s, "\n", sep = "") ### END ###