--- title: "Introduction to ggInterval" subtitle: "A source-portable companion for interval-valued visualization" author: "Bo-Syue Jiang and Han-Ming Wu" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 number_sections: true vignette: > %\VignetteIndexEntry{Introduction to ggInterval} %\VignetteEncoding{UTF-8} %\VignetteEngine{knitr::rmarkdown} editor_options: chunk_output_type: console --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", echo = TRUE, warning = FALSE, message = FALSE, fig.align = "center", fig.width = 7, fig.height = 5, out.width = "100%" ) library(ggInterval) library(ggplot2) ``` # Overview `ggInterval` extends `ggplot2` for interval-valued data. The package supports visualizations such as scatter plots, histograms, index plots, radar plots, line plots, PCA displays, and correlation heatmaps, while keeping the layered grammar familiar to `ggplot2` users. This vignette focuses on source-portable examples that can be rebuilt directly from the package. It uses built-in data, the bundled `data.csv` file for the aggregation example, and optional sections for packages such as `HistDAWass` and `MAINT.Data`. # Built-in Data ```{r built_in_data} data(facedata) data(Environment) facedata Environment Subjects <- substr(rownames(facedata), 1, 3) Subjects ``` # Basic Usage ```{r ggplot2_workflow} ggInterval_scatterplot( facedata, aes(x = AD, y = BC, fill = Subjects), showLabels = FALSE, col = "black" ) + scale_fill_brewer(palette = "Set1") + labs(fill = "Subjects") ``` # Aggregating Classical Data into Interval-Valued Data ```{r aggregation_examples} breastData <- read.csv("data.csv") breastData$X <- NULL breastData <- dplyr::select(breastData, -id) breastData_kmeans <- classic2sym(breastData, groupby = "kmeans", k = 5) head(breastData_kmeans$intervalData[, 1:4]) breastData_diag <- classic2sym(breastData, groupby = "diagnosis") head(breastData_diag$intervalData[, 1:4]) ``` # Descriptive Statistics ```{r descriptive_statistics} mean(facedata) sd(facedata) cov(facedata$AD, facedata$BC, method = "BD") cor(facedata$AD, facedata$BC, method = "BD") summary(facedata$AD) summary(Environment[1:3, ]) ``` # Univariate Plots ## Index Plots and Index Images ```{r index_plots} ggInterval_indexplot(facedata, aes(x = AD)) ggInterval_indexplot(facedata, aes(x = AD, fill = Subjects)) + scale_fill_brewer(palette = "Set1") + geom_vline(xintercept = mean(facedata$AD), color = "darkgray") + labs(fill = "Subjects") ggInterval_indexImage(facedata, aes(x = AD)) + coord_flip() ggInterval_indexImage(facedata, aes(x = AD), full_strip = TRUE) + coord_flip() ``` ```{r index_plot_ordering} face.ip <- ggInterval_indexplot(facedata, aes(fill = Subjects), plotAll = TRUE) + scale_fill_brewer(palette = "Set1") + labs(x = "", y = "", fill = "Subjects") face.ip b <- ggInterval_indexplot(facedata, aes(fill = Subjects), plotAll = TRUE, row_order = "c") + scale_fill_brewer(palette = "Set1") + labs(x = "", y = "", fill = "Subjects") b cc <- ggInterval_indexplot(facedata, aes(fill = Subjects), plotAll = TRUE, row_order = "r") + scale_fill_brewer(palette = "Set1") + labs(x = "", y = "", fill = "Subjects") cc ``` In the ordered `plotAll` displays, the outer y-axis labels are suppressed by default. Here `row_order = "c"` reorders observations by interval centers within each variable panel, while `row_order = "r"` reorders them by interval ranges within each variable panel. Set `labels = TRUE` if you want the row labels repeated inside each variable panel. ## Boxplots and Histograms ```{r boxplots_histograms} ggInterval_boxplot(facedata, plotAll = TRUE, width_type = "violin-like") + theme(legend.position = "bottom", axis.text.x = element_blank()) ggInterval_boxplot(facedata, plotAll = TRUE, width_type = "side-by-side") + theme(legend.position = "bottom", axis.text.x = element_blank()) ggInterval_boxplot(facedata, aes(x = AD), width_type = "quantile-depth") hist.obj.equal <- ggInterval_hist(facedata, plotAll = TRUE, bins = 10) print(hist.obj.equal) hist.obj.equal$`Table AD` hist.obj.unequal <- ggInterval_hist( facedata, plotAll = TRUE, method = "unequal-bin" ) print(hist.obj.unequal) ggInterval_hist(facedata, aes(x = AD), method = "equal-bin", bins = 10) ggInterval_hist(facedata, aes(x = AD), method = "unequal-bin") ``` ## Line Plot ```{r lineplot_stocklike} if (!requireNamespace("TTR", quietly = TRUE)) { stop("Package 'TTR' is required for this example.") } data("ttrc", package = "TTR") stock.data <- subset( ttrc[, c("Date", "Close", "Low", "High")], format(Date, "%Y-%m") %in% c("1985-01", "1985-02", "1985-03") ) stock.data$Month <- factor( month.abb[as.integer(format(stock.data$Date, "%m"))], levels = month.abb[1:3] ) stock.data$Day <- as.integer(format(stock.data$Date, "%d")) stock.data.LH <- classic2sym( stock.data, groupby = "customize", minData = stock.data$Low, maxData = stock.data$High ) ggInterval_lineplot(stock.data.LH, aes(y = V1, x = Day), barWidth = 0.6) + geom_point(aes(y = Close), shape = 21, fill = "#D95F02", color = "black", size = 1.6, stroke = 0.2) + coord_cartesian(xlim = c(1, 31), expand = FALSE) + facet_wrap(~Month, ncol = 1, scales = "free_y") + scale_x_continuous(breaks = c(1, 8, 15, 22, 29)) + labs(title = "Interval-valued Line Plot", x = "Day of month", y = "Price") + ggthemes::theme_economist() + theme(strip.text = element_text(face = "bold")) ``` ## Min-Max Plot ```{r minmax_plot} mm.plot <- function(x) { plot.var <<- names(facedata)[x] ggInterval_MMplot(facedata, aes(facedata[[plot.var]], size = 2)) + coord_fixed(ratio = 1) + theme(legend.position = "none") } mm.plot.list <- lapply(seq_along(facedata), mm.plot) gridExtra::marrangeGrob(mm.plot.list, nrow = 2, ncol = 3, top = "") ``` ## Center-Range Plot ```{r centerrange_plot} cr_aspect_ratio <- 5 ggInterval_CRplot(facedata, aes(size = 1.5), plotAll = TRUE) + coord_fixed(ratio = cr_aspect_ratio) facedata.scale <- scale(facedata)$intervalData ggInterval_CRplot(facedata.scale, aes(size = 1.5), plotAll = TRUE) + coord_fixed(ratio = cr_aspect_ratio) ``` # Bivariate Plots ## Scatter Plot with Label Controls ```{r scatterplot_labels} ggInterval_scatterplot( facedata, aes(x = BC, y = AD, fill = Subjects), showLabels = TRUE, labelSize = 2.6, labelPosition = "topright", labelNudgeX = -0.2, labelNudgeY = 0.15, checkOverlap = FALSE, color = "black" ) + scale_fill_brewer(palette = "Set1") + scale_x_continuous(expand = expansion(mult = c(0.08, 0.12))) + scale_y_continuous(expand = expansion(mult = c(0.06, 0.10))) + labs(fill = "Subjects") ``` ## 2D Histograms ```{r hist2d_equal} face.2dh.equal <- ggInterval_2Dhist( facedata, aes(x = BC, y = AD, col = "white"), method = "equal-bin", xBins = 10, yBins = 10, display = "p", palette = "Blues", direction = 1, cell_labels = TRUE )$plot face.2dh.equal + coord_fixed(ratio = 1) ``` ```{r hist2d_unequal} face.2dh.unequal <- ggInterval_2Dhist( facedata, aes(x = BC, y = AD, col = "white"), method = "unequal-bin", display = "p", palette = "Blues", direction = 1, tau = 0.5 )$plot face.2dh.unequal + coord_fixed(ratio = 1) ``` # Matrix Displays ## Scatter Matrix ```{r scatter_matrix} ggInterval_scatterMatrix(facedata) ggInterval_scatterMatrix(facedata, aes(fill = "steelblue", alpha = 0.3)) ``` ## 2D Histogram Matrices ```{r hist2d_matrix_equal} ggInterval_2DhistMatrix( facedata, aes(col = "white"), method = "equal-bin", xBins = 10, yBins = 10, display = "p", removeZero = TRUE, palette = "Blues", direction = 1, cell_labels = FALSE ) ``` ```{r hist2d_matrix_unequal} ggInterval_2DhistMatrix( facedata, aes(col = "white"), method = "unequal-bin", display = "p", palette = "Blues", direction = 1, tau = 0.5, removeZero = TRUE, cell_labels = FALSE ) ``` ## Image Plots ```{r image_plots} ggInterval_indexImage( facedata, plotAll = TRUE, full_strip = TRUE, column_condition = FALSE ) + scale_colour_distiller(palette = "Blues", direction = 1) + labs(x = "Subjects") + theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank()) ggInterval_indexImage( facedata, plotAll = TRUE, full_strip = TRUE ) + scale_colour_distiller(palette = "Spectral") + labs(x = "Subjects") ggInterval_indexImage( facedata, plotAll = TRUE, full_strip = FALSE, column_condition = FALSE ) + scale_colour_distiller(palette = "Blues", direction = 1) + labs(x = "Subjects") + theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank()) ggInterval_indexImage( facedata, plotAll = TRUE, full_strip = FALSE ) + scale_colour_distiller(palette = "Spectral") + labs(x = "Subjects") ``` ## Radar Plots ```{r radar_plots} library(ggthemes) ggInterval_radarplot( Environment, plotPartial = c(4, 6), showLegend = FALSE, base_circle = FALSE, base_lty = 1, addText = FALSE, addText_modal = FALSE ) + scale_fill_manual(values = c("darkred", "darkblue")) + scale_color_manual(values = c("darkred", "darkblue")) + labs(title = "") + theme_hc() ggInterval_radarplot( Environment, plotPartial = c(4, 6), showLegend = FALSE, base_circle = FALSE, base_lty = 1, addText = FALSE, addText_modal = FALSE, type = "rect" ) + scale_fill_manual(values = c("darkred", "darkblue")) + scale_color_manual(values = c("darkred", "darkblue")) + labs(title = "") + theme_hc() ggInterval_radarplot( facedata, base_circle = FALSE, base_lty = 1, type = "quantile", quantileNum = 5, showLegend = TRUE, Drift = 0 ) + scale_fill_brewer(palette = "Greys") + labs(title = "", fill = "Quantiles") + theme_hc() ggInterval_radarplot( Environment, base_circle = FALSE, base_lty = 1, type = "quantile", quantileNum = 5, showLegend = TRUE, Drift = 0 ) + scale_fill_brewer(palette = "Greys") + labs(title = "", fill = "Quantiles") + theme_hc() ``` # Customization ```{r customization_examples} set.seed(1234567890) facedata.tmp <- facedata facedata.tmp$cluster <- RSDA::sym.kmeans(facedata.tmp, k = 3)$cluster ggInterval_indexplot(facedata.tmp, aes(y = AD, fill = Subjects)) + geom_text( aes(x = seq_len(nrow(facedata.tmp)), y = .data$AD$min, label = rownames(facedata.tmp)), vjust = 1.5, size = 2 ) + scale_fill_brewer(palette = "Set1") + labs(title = "Customized Index Plot Using K-Means Clusters", fill = "Subjects") + facet_grid(cols = vars(cluster), scales = "free_x", space = "free_x") + theme_economist_white() + theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank()) ``` # Working with Other SDA Packages ## Histogram-Valued Data via HistDAWass This example is evaluated only when the optional package `HistDAWass` is installed. ```{r histdawass_example, eval = requireNamespace("HistDAWass", quietly = TRUE)} BLOOD <- HistDAWass::BLOOD blood.min <- HistDAWass::get.MatH.stats(BLOOD, stat = "min") blood.max <- HistDAWass::get.MatH.stats(BLOOD, stat = "max") blood <- data.frame(blood.min, blood.max) myBLOOD <- classic2sym( blood, groupby = "customize", minData = blood[, 2:4], maxData = blood[, 6:8] ) colnames(myBLOOD$intervalData) <- HistDAWass::get.MatH.main.info(BLOOD)$varnames ggInterval_PCA(myBLOOD$intervalData, plot = FALSE)$ggplotPCA ``` ## Midpoint-Range Data via MAINT.Data This example is evaluated only when the optional package `MAINT.Data` is installed. ```{r maint_data_example, eval = requireNamespace("MAINT.Data", quietly = TRUE)} AbaloneIdt <- MAINT.Data::AbaloneIdt AbaloneRange <- exp(AbaloneIdt@LogR) AbaloneMid <- AbaloneIdt@MidP AbaloneBounds <- data.frame( AbaloneMid - AbaloneRange / 2, AbaloneMid + AbaloneRange / 2 ) myAbalone <- classic2sym( AbaloneBounds, groupby = "customize", minData = AbaloneBounds[, 1:7], maxData = AbaloneBounds[, 8:14] ) colnames(myAbalone$intervalData) <- AbaloneIdt@VarNames ggInterval_PCA(myAbalone$intervalData, plot = FALSE)$ggplotPCA ``` # PCA and Correlation Heatmaps ```{r pca_example} pca.results <- RSDA::sym.pca(facedata, method = "tops") rownames(pca.results$Sym.Components) <- rownames(facedata) ggInterval_scatterplot( pca.results$Sym.Components, aes(Dim.1, Dim.2, fill = as.factor(Subjects)), labelSize = 2.6, labelPosition = "topright", labelNudgeX = -0.2, labelNudgeY = 0.15, checkOverlap = TRUE, col = "black" ) + scale_fill_brewer(palette = "Set1") + scale_x_continuous(expand = expansion(mult = c(0.08, 0.12))) + scale_y_continuous(expand = expansion(mult = c(0.06, 0.10))) + coord_fixed(ratio = 1) + labs(x = "PCA-1", y = "PCA-2", fill = "Subjects") ggInterval_PCA(facedata, poly = TRUE, concepts_group = as.factor(Subjects)) + coord_fixed(ratio = 1) + labs(x = "PCA-1", y = "PCA-2") ``` ```{r corrplot_example} ggInterval_corrplot(facedata, method = "BG", triangle = "lower") ``` # Citation Use `citation("ggInterval")` after installation to retrieve the current package citation metadata. # Session Information ```{r session_info} sessionInfo() ```