### R code from vignette source 'rebmix.Rnw' ################################################### ### code chunk number 1: rebmix-code-0 ################################################### ############################################## ## R sources for reproducing the results in ## ## rebmix package ## ############################################## options(prompt = "R> ", continue = "+ ", width = 80, useFancyQuotes = FALSE, digits = 3) ################################################### ### code chunk number 2: rebmix-code-1 ################################################### ################### ## Preliminaries ## ################### ## load package and set prompt before starting new page to TRUE. library(rebmix) devAskNewPage(ask = TRUE) ################################################### ### code chunk number 3: rebmix-code-2 ################################################### ###################### ## Gamma datasets ## ###################### ## Generate gamma datasets. n <- c(100, 100, 100, 100) Theta <- new("RNGMIX.Theta", c = 4, pdf = "gamma") a.theta1(Theta) <- rep(1/100, 4) a.theta2(Theta) <- c(200, 400, 600, 800) gamma1 <- RNGMIX(Dataset.name = "gamma1", n = n, Theta = a.Theta(Theta)) n <- c(40, 360) Theta <- new("RNGMIX.Theta", c = 2, pdf = "gamma") a.theta1(Theta) <- c(1/27, 1 / 270) a.theta2(Theta) <- c(9, 90) gamma2 <- RNGMIX(Dataset.name = "gamma2", n = n, Theta = a.Theta(Theta)) n <- c(80, 240, 80) Theta <- new("RNGMIX.Theta", c = 3, pdf = "gamma") a.theta1(Theta) <- c(1/20, 1, 1/20) a.theta2(Theta) <- c(40, 6, 200) gamma3 <- RNGMIX(Dataset.name = "gamma3", rseed = -4, n = n, Theta = a.Theta(Theta)) ################################################### ### code chunk number 4: rebmix-code-3 ################################################### ## Estimate number of components, component weights and component parameters. gamma1est <- REBMIX(Dataset = a.Dataset(gamma1), Preprocessing = "kernel density estimation", cmax = 8, Criterion = "BIC", pdf = "gamma") gamma2est <- REBMIX(Dataset = a.Dataset(gamma2), Preprocessing = "histogram", cmax = 8, Criterion = "BIC", pdf = "gamma") gamma3est <- REBMIX(Dataset = a.Dataset(gamma3), Preprocessing = "histogram", cmax = 8, Criterion = "BIC", pdf = "gamma", K = 23:27) ################################################### ### code chunk number 5: gamma3-fig ################################################### plot(gamma3est, pos = 1, what = c("pdf", "marginal cdf"), ncol = 2, npts = 1000, family = "sans") ################################################### ### code chunk number 6: rebmix-code-4 ################################################### summary(gamma2est) a.theta1.all(gamma1est, pos = 1) a.theta2.all(gamma1est, pos = 1) ################################################### ### code chunk number 7: rebmix-code-5 ################################################### ## Bootstrap finite mixture. gamma3boot <- boot(x = gamma3est, pos = 1, Bootstrap = "p", B = 10) gamma3boot summary(gamma3boot) ################################################### ### code chunk number 8: rebmix-code-6 ################################################### ## EM.control object creation. EM <- new("EM.Control", strategy = "best", variant = "EM", acceleration = "fixed", acceleration.multiplier = 1.0, tolerance = 1e-4, maximum.iterations = 1000, K = 0) gamma1est.em <- REBMIX(Dataset = a.Dataset(gamma1), Preprocessing = "kernel density estimation", cmax = 8, Criterion = "BIC", pdf = "gamma", EMcontrol = EM) gamma2est.em <- REBMIX(Dataset = a.Dataset(gamma2), Preprocessing = "histogram", cmax = 8, Criterion = "BIC", pdf = "gamma", EMcontrol = EM) gamma3est.em <- REBMIX(Dataset = a.Dataset(gamma3), Preprocessing = "histogram", cmax = 8, Criterion = "BIC", pdf = "gamma", K = 23:27, EMcontrol = EM) summary(gamma1est.em) summary(gamma2est.em) summary(gamma3est.em) ################################################### ### code chunk number 9: rebmix-code-7 ################################################### ######################### ## Poisson dataset ## ######################### ## Generate the Poisson dataset. n <- c(200, 200, 200) Theta <- new("RNGMIX.Theta", c = 3, pdf = rep("Poisson", 2)) a.theta1(Theta, 1) <- c(3, 2) a.theta1(Theta, 2) <- c(9, 10) a.theta1(Theta, 3) <- c(15, 16) poisson <- RNGMIX(Dataset.name = paste("Poisson_", 1:10, sep = ""), n = n, Theta = a.Theta(Theta)) ################################################### ### code chunk number 10: rebmix-code-8 ################################################### ## Estimate number of components, component weights and component parameters. poissonest <- REBMIX(Dataset = a.Dataset(poisson), Preprocessing = "histogram", cmax = 10, Criterion = "MDL5", pdf = rep("Poisson", 2), K = 1) ################################################### ### code chunk number 11: poisson-fig ################################################### plot(poissonest, pos = 1, what = c("pdf", "marginal pdf", "IC", "D", "logL"), nrow = 2, ncol = 3, npts = 1000, family = "sans") ################################################### ### code chunk number 12: poisson-clu-fig ################################################### poissonclu <- RCLRMIX(x = poissonest, pos = 1, Zt = a.Zt(poisson)) plot(poissonclu, family = "sans") ################################################### ### code chunk number 13: rebmix-code-9 ################################################### ## Visualize results. summary(poissonest) a.theta1.all(poissonest, pos = 1) a.theta2.all(poissonest, pos = 1) ################################################### ### code chunk number 14: rebmix-code-10 ################################################### ## EM.control object creation. EM <- new("EM.Control", strategy = "exhaustive", variant = "EM", acceleration = "fixed", acceleration.multiplier = 1.0, tolerance = 1e-4, maximum.iterations = 1000, K = 0) poissonest.em <- REBMIX(Dataset = a.Dataset(poisson), Preprocessing = "histogram", cmax = 10, Criterion = "MDL5", pdf = rep("Poisson", 2), K = 1, EMcontrol = EM) summary(poissonest.em) ################################################### ### code chunk number 15: rebmix-code-11 ################################################### ################################### ## Multivariate normal dataset ## ################################### ## Generate normal dataset. n <- c(50, 50, 50, 50, 50) Theta <- new("RNGMVNORM.Theta", c = 5, d = 2) a.theta1(Theta, 1) <- c(2.7, 3.7) a.theta1(Theta, 2) <- c(5.7, 9.1) a.theta1(Theta, 3) <- c(2.0, 9.0) a.theta1(Theta, 4) <- c(9.5, 6.6) a.theta1(Theta, 5) <- c(6.3, 0.6) a.theta2(Theta, 1) <- c(0.9, -0.1, -0.1, 0.4) a.theta2(Theta, 2) <- c(2.8, -1.3, -1.3, 1.5) a.theta2(Theta, 3) <- c(0.1, 0.0, 0.0, 0.3) a.theta2(Theta, 4) <- c(1.3, -0.4, -0.4, 0.4) a.theta2(Theta, 5) <- c(0.5, 0.3, 0.3, 2.5) mvnorm.simulated <- RNGMIX(model = "RNGMVNORM", Dataset.name = "mvnormdataset", rseed = -1, n = n, Theta = a.Theta(Theta)) ################################################### ### code chunk number 16: rebmix-code-12 ################################################### ## Estimate number of components, component weights and component parameters. mvnormest <- REBMIX(model = "REBMVNORM", Dataset = a.Dataset(mvnorm.simulated), Preprocessing = "histogram", cmax = 20, Criterion = "BIC") ################################################### ### code chunk number 17: mvnorm-fig ################################################### plot(mvnormest, family = "sans") ################################################### ### code chunk number 18: mvnorm-clu-fig ################################################### mvnormclu <- RCLRMIX(model = "RCLRMVNORM", x = mvnormest) plot(mvnormclu, family = "sans") ################################################### ### code chunk number 19: rebmix-code-13 ################################################### summary(mvnormest) ################################################### ### code chunk number 20: rebmix-code-14 ################################################### summary(mvnormclu) ################################################### ### code chunk number 21: rebmix-code-15 ################################################### ## EM.control object creation. EM <- new("EM.Control", strategy = "single", variant = "EM", acceleration = "fixed", acceleration.multiplier = 1.0, tolerance = 1e-4, maximum.iterations = 1000, K = 0) ## Optimal K estimation. K <- optbins(Dataset = a.Dataset(mvnorm.simulated), Rule = "Knuth equal", kmin = 2, kmax = 100) ## Finite mixture estimation mvnormest.em <- REBMIX(model = "REBMVNORM", Dataset = a.Dataset(mvnorm.simulated), Preprocessing = "histogram", cmax = 20, K = K, Criterion = "BIC", EMcontrol = EM) summary(mvnormest.em) ################################################### ### code chunk number 22: rebmix-code-16 ################################################### ## EM.control object creation. CEM <- new("EM.Control", strategy = "exhaustive", variant = "ECM", acceleration = "fixed", acceleration.multiplier = 1.0, tolerance = 1e-4, maximum.iterations = 1000, K = 0) ## Estimate number of components, component weights and component parameters. mvnormest.cem <- REBMIX(model = "REBMVNORM", Dataset = a.Dataset(mvnorm.simulated), Preprocessing = "histogram", cmax = 10, Criterion = "ICL", EMcontrol = CEM) mvnorm.clu <- RCLRMIX(model = "RCLRMVNORM", x = mvnormest.cem) ################################################### ### code chunk number 23: mvnorm-clu-fig-cem ################################################### plot(mvnorm.clu, family = "sans") ################################################### ### code chunk number 24: rebmix-code-17 ################################################### ## Create the EM.Control object to utilize one of the REBMIX&EM strategies. EM.normal <- new("EM.Control", strategy = "exhaustive", variant = "EM", acceleration = "fixed", acceleration.multiplier = 1.0, tolerance = 1e-4, maximum.iterations = 1000, K = 0) ## Estimate number of components, component weights and component parameters. mvnormestest.em.normal <- REBMIX(model = "REBMVNORM", Dataset = a.Dataset(mvnorm.simulated), Preprocessing = "histogram", cmax = 15, Criterion = "BIC", EMcontrol = EM.normal) cat("Total number of EM algorithm iterations: ", a.summary.EM(mvnormestest.em.normal, pos = 1, col.name = "total.iterations.nbr"), ". Value of BIC: ", a.summary(mvnormestest.em.normal, pos = 1, col.name = "IC")) ################################################### ### code chunk number 25: rebmix-code-18 ################################################### ## Create the EM.Control object to utilize one of the REBMIX&EM strategies. EM.fixed1.5 <- new("EM.Control", strategy = "exhaustive", variant = "EM", acceleration = "fixed", acceleration.multiplier = 1.5, tolerance = 1e-4, maximum.iterations = 1000, K = 0) ## Estimate number of components, component weights and component parameters. mvnormest.em.fixed1.5 <- REBMIX(model = "REBMVNORM", Dataset = a.Dataset(mvnorm.simulated), Preprocessing = "histogram", cmax = 15, Criterion = "BIC", EMcontrol = EM.fixed1.5) cat("Total number of EM algorithm iterations: ", a.summary.EM(mvnormest.em.fixed1.5, pos = 1, col.name = "total.iterations.nbr"), ". Value of BIC: ", a.summary(mvnormest.em.fixed1.5, pos = 1, col.name = "IC")) ################################################### ### code chunk number 26: rebmix-code-19 ################################################### ## Create the EM.Control object to utilize one of the REBMIX&EM strategies. EM.line <- new("EM.Control", strategy = "exhaustive", variant = "EM", acceleration = "line", acceleration.multiplier = 1.0, tolerance = 1e-4, maximum.iterations = 1000, K = 0) ## Estimate number of components, component weights and component parameters. mvnormest.em.line <- REBMIX(model = "REBMVNORM", Dataset = a.Dataset(mvnorm.simulated), Preprocessing = "histogram", cmax = 15, Criterion = "BIC", EMcontrol = EM.line) cat("Total number of EM algorithm iterations: ", a.summary.EM(mvnormest.em.line, pos = 1, col.name = "total.iterations.nbr"), ". Value of BIC: ", a.summary(mvnormest.em.line, pos = 1, col.name = "IC")) ################################################### ### code chunk number 27: rebmix-code-20 ################################################### ## Create the EM.Control object to utilize one of the REBMIX&EM strategies. EM.golden <- new("EM.Control", strategy = "exhaustive", variant = "EM", acceleration = "golden", acceleration.multiplier = 1.0, tolerance = 1e-4, maximum.iterations = 1000, K = 0) ## Estimate number of components, component weights and component parameters. mvnormest.em.golden <- REBMIX(model = "REBMVNORM", Dataset = a.Dataset(mvnorm.simulated), Preprocessing = "histogram", cmax = 15, Criterion = "BIC", EMcontrol = EM.golden) cat("Total number of EM algorithm iterations: ", a.summary.EM(mvnormest.em.golden, pos = 1, col.name = "total.iterations.nbr"), ". Value of BIC: ", a.summary(mvnormest.em.golden, pos = 1, col.name = "IC")) ################################################### ### code chunk number 28: rebmix-code-21 ################################################### ## Create the EM.Control object to utilize one of the REBMIX&EM strategies. EM.stem1 <- new("EM.Control", strategy = "exhaustive", variant = "EM", acceleration = "stem1", tolerance = 1e-4, maximum.iterations = 1000, K = 0) ## Estimate number of components, component weights and component parameters. mvnormest.em.stem1 <- REBMIX(model = "REBMVNORM", Dataset = a.Dataset(mvnorm.simulated), Preprocessing = "histogram", cmax = 15, Criterion = "BIC", EMcontrol = EM.stem1) cat("Total number of EM algorithm iterations: ", a.summary.EM(mvnormest.em.stem1, pos = 1, col.name = "total.iterations.nbr"), ". Value of BIC: ", a.summary(mvnormest.em.stem1, pos = 1, col.name = "IC")) ################################################### ### code chunk number 29: rebmix-code-22 ################################################### ## Create the EM.Control object to utilize one of the REBMIX&EM strategies. EM.stem2 <- new("EM.Control", strategy = "exhaustive", variant = "EM", acceleration = "stem2", tolerance = 1e-4, maximum.iterations = 1000, K = 0) ## Estimate number of components, component weights and component parameters. mvnormest.em.stem2 <- REBMIX(model = "REBMVNORM", Dataset = a.Dataset(mvnorm.simulated), Preprocessing = "histogram", cmax = 15, Criterion = "BIC", EMcontrol = EM.stem2) cat("Total number of EM algorithm iterations: ", a.summary.EM(mvnormest.em.stem2, pos = 1, col.name = "total.iterations.nbr"), ". Value of BIC: ", a.summary(mvnormest.em.stem2, pos = 1, col.name = "IC")) ################################################### ### code chunk number 30: rebmix-code-23 ################################################### ## Create the EM.Control object to utilize one of the REBMIX&EM strategies. EM.stem3 <- new("EM.Control", strategy = "exhaustive", variant = "EM", acceleration = "stem3", tolerance = 1e-4, maximum.iterations = 1000, K = 0) ## Estimate number of components, component weights and component parameters. mvnormest.em.stem3 <- REBMIX(model = "REBMVNORM", Dataset = a.Dataset(mvnorm.simulated), Preprocessing = "histogram", cmax = 15, Criterion = "BIC", EMcontrol = EM.stem3) cat("Total number of EM algorithm iterations: ", a.summary.EM(mvnormest.em.stem3, pos = 1, col.name = "total.iterations.nbr"), ". Value of BIC: ", a.summary(mvnormest.em.stem3, pos = 1, col.name = "IC")) ################################################### ### code chunk number 31: rebmix-code-24 ################################################### ## Create the EM.Control object to utilize one of the REBMIX&EM strategies. EM.square1 <- new("EM.Control", strategy = "exhaustive", variant = "EM", acceleration = "square1", tolerance = 1e-4, maximum.iterations = 1000, K = 0) ## Estimate number of components, component weights and component parameters. mvnormest.em.square1 <- REBMIX(model = "REBMVNORM", Dataset = a.Dataset(mvnorm.simulated), Preprocessing = "histogram", cmax = 15, Criterion = "BIC", EMcontrol = EM.square1) cat("Total number of EM algorithm iterations: ", a.summary.EM(mvnormest.em.square1, pos = 1, col.name = "total.iterations.nbr"), ". Value of BIC: ", a.summary(mvnormest.em.square1, pos = 1, col.name = "IC")) ################################################### ### code chunk number 32: rebmix-code-25 ################################################### ## Create the EM.Control object to utilize one of the REBMIX&EM strategies. EM.square2 <- new("EM.Control", strategy = "exhaustive", variant = "EM", acceleration = "square2", tolerance = 1e-4, maximum.iterations = 1000, K = 0) ## Estimate number of components, component weights and component parameters. mvnormest.em.square2 <- REBMIX(model = "REBMVNORM", Dataset = a.Dataset(mvnorm.simulated), Preprocessing = "histogram", cmax = 15, Criterion = "BIC", EMcontrol = EM.square2) cat("Total number of EM algorithm iterations: ", a.summary.EM(mvnormest.em.square2, pos = 1, col.name = "total.iterations.nbr"), ". Value of BIC: ", a.summary(mvnormest.em.square2, pos = 1, col.name = "IC")) ################################################### ### code chunk number 33: rebmix-code-26 ################################################### ## Create the EM.Control object to utilize one of the REBMIX&EM strategies. EM.square3 <- new("EM.Control", strategy = "exhaustive", variant = "EM", acceleration = "square3", tolerance = 1e-4, maximum.iterations = 1000, K = 0) ## Estimate number of components, component weights and component parameters. mvnormest.em.square3 <- REBMIX(model = "REBMVNORM", Dataset = a.Dataset(mvnorm.simulated), Preprocessing = "histogram", cmax = 15, Criterion = "BIC", EMcontrol = EM.square3) cat("Total number of EM algorithm iterations: ", a.summary.EM(mvnormest.em.square3, pos = 1, col.name = "total.iterations.nbr"), ". Value of BIC: ", a.summary(mvnormest.em.square3, pos = 1, col.name = "IC")) ################################################### ### code chunk number 34: rebmix-code-27 ################################################### data(sensorlessdrive) ## Split dataset into train (75%) and test (25%) subsets. set.seed(5) Drive <- split(p = 0.75, Dataset = sensorlessdrive, class = 4) ################################################### ### code chunk number 35: rebmix-code-28 ################################################### ## Estimate number of components, component weights and component ## parameters for train subsets. driveest <- REBMIX(model = "REBMVNORM", Dataset = a.train(Drive), Preprocessing = "histogram", cmax = 15, Criterion = "BIC") ################################################### ### code chunk number 36: rebmix-code-29 ################################################### ## Selected features. drivecla <- RCLSMIX(model = "RCLSMVNORM", x = list(driveest), Dataset = a.test(Drive), Zt = a.Zt(Drive)) ################################################### ### code chunk number 37: rebmix-code-30 ################################################### drivecla summary(drivecla) ################################################### ### code chunk number 38: drive-cla-fig ################################################### # Plot selected features. plot(drivecla, nrow = 3, ncol = 2, family = "sans") ################################################### ### code chunk number 39: rebmix-code-31 ################################################### ## EM.control object creation. EM <- new("EM.Control", strategy = "exhaustive", variant = "EM", acceleration = "fixed", acceleration.multiplier = 1.0, tolerance = 1e-4, maximum.iterations = 1000, K = 300) ## Estimate number of components, component weights and component ## parameters for train subsets. driveest <- REBMIX(model = "REBMVNORM", Dataset = a.train(Drive), Preprocessing = "histogram", cmax = 15, Criterion = "BIC", EMcontrol = EM) drivecla <- RCLSMIX(model = "RCLSMVNORM", x = list(driveest), Dataset = a.test(Drive), Zt = a.Zt(Drive)) summary(drivecla) ################################################### ### code chunk number 40: rebmix-code-32 ################################################### data(adult) ## Find complete cases. adult <- adult[complete.cases(adult),] ## Replace levels with numbers. adult <- as.data.frame(data.matrix(adult)) ################################################### ### code chunk number 41: rebmix-code-33 ################################################### ## Find numbers of levels. cmax <- unlist(lapply(apply(adult[, c(-1, -16)], 2, unique), length)) cmax ################################################### ### code chunk number 42: rebmix-code ################################################### ## Split adult dataset into train and test subsets for two Incomes ## and remove Type and Income columns. Adult <- split(p = list(type = 1, train = 2, test = 1), Dataset = adult, class = 16) ################################################### ### code chunk number 43: rebmix-code-34 ################################################### ## Estimate number of components, component weights and component parameters ## for the set of chunks 1:14. adultest <- list() for (i in 1:14) { adultest[[i]] <- REBMIX(Dataset = a.train(chunk(Adult, i)), Preprocessing = "histogram", cmax = min(120, cmax[i]), Criterion = "BIC", pdf = "Dirac", K = 1) } ################################################### ### code chunk number 44: rebmix-code ################################################### ## Class membership prediction based upon the best first search algorithm. adultcla <- BFSMIX(x = adultest, Dataset = a.test(Adult), Zt = a.Zt(Adult)) ################################################### ### code chunk number 45: rebmix-code-35 ################################################### adultcla summary(adultcla) ################################################### ### code chunk number 46: adult-cla-fig ################################################### ## Plot selected chunks. plot(adultcla, nrow = 5, ncol = 2, family = "sans") ################################################### ### code chunk number 47: rebmix-code-36 ################################################### rm(list = ls())