#' Feature Selection for Spectral Data
#'
#' This function filters healthy vs diseased samples, selects the most discriminative spectral variables,
#' applies FDR correction, and exports the results.
#'
#' @param file_path Path to the cleaned dataset (output of qaqcs function).
#' @param output_path Path to save the selected features table.
#' @param fdr_threshold Threshold for filtering significant features (default: 0.01).
#' @return A data.table containing selected spectral variables.
#' @export
#' @examples
#' # Create mock spectral data
#' library(openxlsx)
#' mock_data <- data.frame(
#'   treatment = sample(0:1, 100, replace = TRUE),
#'   var1 = rnorm(100),
#'   var2 = rnorm(100),
#'   var3 = rnorm(100)
#' )
#' temp_file <- tempfile(fileext = ".xlsx")
#' write.xlsx(mock_data, temp_file)
#'
#' # Perform feature selection
#' output_path <- tempfile(fileext = ".xlsx")
#' selected_features <- feature_selection(temp_file, output_path, fdr_threshold = 0.01)
#' head(selected_features)
feature_selection <- function(file_path, output_path = "selected_features.xlsx", fdr_threshold = 0.01) {

  # Read the dataset while preserving original column names
  df <- as.data.table(read_xlsx(file_path))
  
  # Filter for treatment 0 (Healthy) and 1 (Diseased)
  df <- df[treatment %in% c(0, 1)]
  
  # Automatically detect spectral variables (numeric columns beyond metadata)
  spectral_vars <- setdiff(names(df), c("treatment", "Sample", "Scan.date", "Scan.time", "unit", "sensor", "genotype", "g_alias"))
  
  # Perform statistical modeling (linear regression)
  results_list <- lapply(spectral_vars, function(var) {
    model <- lm(as.formula(paste0("`", var, "` ~ treatment")), data = df)
    tidy_model <- broom::tidy(model)
    tidy_model$Variable <- var  # Preserve the original column name
    return(tidy_model)
  })
  
  # Convert results to data.table
  results <- as.data.table(rbindlist(results_list, fill = TRUE))
  
  # Adjust p-values using FDR correction
  results[, fdr_p_value := p.adjust(p.value, method = "fdr")]
  
  # Define threshold explicitly
  fdr_threshold <- fdr_threshold
  
  # Filter for treatment term and significant FDR p-values
  significant_features <- results[grepl("treatment", term) & fdr_p_value <= fdr_threshold]
  
  # Replace 'treatment1' with 'Treatment' for clarity
  significant_features[, term := gsub("treatment1", "Treatment", term)]
  
  
  # Extract unique variables based on the first three characters
  significant_features[, Unique_Variable := substr(Variable, 1, 3)]
  selected_features <- significant_features[!duplicated(Unique_Variable)][order(fdr_p_value)]
  
  # Export the results as an Excel file
  openxlsx::write.xlsx(as.data.frame(selected_features), output_path)
  
  # Return the selected features
  return(selected_features)
}
