## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.dim = c(6, 4)
)

## ----setup--------------------------------------------------------------------
library(EZbakR)
library(dplyr)
library(ggplot2)
library(MASS) # For plotting

# For coloring points by density:
# Source: https://slowkow.com/notes/ggplot2-color-by-density/
get_density <- function(x, y, ...) {
  dens <- kde2d(x, y, ...)
  ix <- findInterval(x, dens$x)
  iy <- findInterval(y, dens$y)
  ii <- cbind(ix, iy)
  return(dens$z[ii])
}

## ----class.source = 'fold-show', eval=TRUE------------------------------------

### SIMULATE DATA

simdata <- EZSimulate(nfeatures = 200,
                      ntreatments = 1,
                      mode = "dynamics",
                      label_time = c(1, 3),
                      dynamics_preset = "nuc2cyto")
              
### EZBAKR ANALYSES

ezbdo <- EZbakRData(simdata$cB, simdata$metadf)

ezbdo <- EstimateFractions(ezbdo)


# Lengths of features:
# In simulation, all features are same length, so this is here for illustrative
# purposes only
feature_lengths <- tibble(
  GF = unique(simdata$cB$GF),
  length = 1000
)


# Averaging fractions rather than kinetics
# Note the model, which specifies that we want to average data for a given
# compartment AND label time (tl).
ezbdo <- AverageAndRegularize(ezbdo,
                              formula_mean = ~tl:compartment - 1,
                              type = "fractions",
                              feature_lengths = feature_lengths,
                              parameter = "logit_fraction_highTC")


## ----class.source = 'fold-show', eval=TRUE------------------------------------
### Input #2: the graph
graph <- matrix(c(0, 1, 0,
                  0, 0, 2,
                  3, 0, 0),
                nrow = 3,
                ncol = 3,
                byrow = TRUE)
colnames(graph) <- c("0", "N", "C")
rownames(graph) <- colnames(graph)

# Each row and column represents an RNA species being modeled
# Here we are modeling the dynamics of nuclear (N) and cytoplasmic (C)
# RNA. "0" represents no RNA, and must appear in the graph. N is synthesized
# from 0 (i.e., its synthesis kinetics are 0-th order), and C is degraded to 0
#
# Numbers in graph should range from 0 to the number of parameters in your model.
# Order is arbitrary

## ----class.source = 'fold-show', eval=TRUE------------------------------------
modeled_to_measured <- list(
  nuclear = list(GF ~ N),
  cytoplasm = list(GF ~ C),
  total = list(GF ~ C + N) # total RNA is a combination of C and N
)

## ----class.source = 'fold-show', eval=TRUE------------------------------------
# See documentation (?EZDynamics()) for descriptions of all parameters
# specified. Bit of trickery here with an object loaded with EZbakR
# called ode_models. More on this later
ezbdo <- EZDynamics(ezbdo,
                    graph = graph,
                    sub_features = "GF",
                    grouping_features = "GF",
                    sample_feature = "compartment",
                    modeled_to_measured = ode_models$nuc2cyto$formulas)


## ----pressure1, fig.width=6, out.width="40%", fig.align="default", fig.show='hold', eval=TRUE----
gt <- simdata$ground_truth$parameter_truth

dynfit <- ezbdo$dynamics$dynamics1 

compare <- dplyr::inner_join(dynfit, gt %>% dplyr::rename(GF = feature),
                             by = "GF")


true_scale_factor <- mean(exp(compare$logk1[compare$logk1 < 9.9]) / compare$true_k1[compare$logk1 < 9.9])


gPk1 <- compare %>%
  dplyr::mutate(density = get_density(
    x = log(true_k1),
    y = log(exp(logk1)/true_scale_factor),
    n = 200
  )) %>%
  ggplot(aes(x = log(true_k1),
             y = log(exp(logk1)/true_scale_factor),
             color = density)) +
  geom_point(size=0.9) +
  theme_classic() +
  scale_color_viridis_c() +
  xlab("log(true ksyn)") +
  ylab("log(estimated ksyn)") +
  geom_abline(slope =1,
              intercept = 0,
              color = 'darkred',
              linewidth = 0.75,
              linetype = 'dotted') +
  theme(axis.text=element_text(size=10),
    axis.title=element_text(size=12),
    legend.text=element_text(size=10),
    legend.title=element_text(size=12))




gPk2 <- compare %>%
  dplyr::mutate(density = get_density(
    x = log(true_k2),
    y = logk2,
    n = 200
  )) %>%
  ggplot(aes(x = log(true_k2),
             y = logk2,
             color = density)) +
  geom_point(size=0.9) +
  theme_classic() +
  scale_color_viridis_c() +
  xlab("log(true kexp)") +
  ylab("log(estimated kexp)") +
  geom_abline(slope =1,
              intercept = 0,
              color = 'darkred',
              linewidth = 0.75,
              linetype = 'dotted') +
  theme(axis.text=element_text(size=10),
    axis.title=element_text(size=12),
    legend.text=element_text(size=10),
    legend.title=element_text(size=12))

gPk3 <- compare %>%
  dplyr::mutate(density = get_density(
    x = log(true_k3),
    y = logk3,
    n = 200
  )) %>%
  ggplot(aes(x = log(true_k3),
             y = logk3,
             color = density)) +
  geom_point(size=0.9) +
  theme_classic() +
  scale_color_viridis_c() +
  xlab("log(true kdeg)") +
  ylab("log(estimated kdeg)") +
  geom_abline(slope =1,
              intercept = 0,
              color = 'darkred',
              linewidth = 0.75,
              linetype = 'dotted') +
  theme(axis.text=element_text(size=10),
    axis.title=element_text(size=12),
    legend.text=element_text(size=10),
    legend.title=element_text(size=12))

gPk1
gPk2
gPk3

## ----class.source = 'fold-show', eval=TRUE------------------------------------
### Input #2: the graph
graph <- matrix(c(0, 1, 0,
                  3, 0, 2,
                  4, 0, 0),
                nrow = 3,
                ncol = 3,
                byrow = TRUE)
colnames(graph) <- c("0", "N", "C")
rownames(graph) <- colnames(graph)

# Each row and column represents an RNA species being modeled
# Here we are modeling the dynamics of nuclear (N) and cytoplasmic (C)
# RNA. "0" represents no RNA, and must appear in the graph. N is synthesized
# from 0 (i.e., its synthesis kinetics are 0-th order), and C is degraded to 0
#
# Numbers in graph should range from 0 to the number of parameters in your model.
# Order is arbitrary

## ----eval=TRUE----------------------------------------------------------------

##### SIMULATE DATA

simdata <- EZSimulate(nfeatures = 50,
                      ntreatments = 1,
                      mode = "dynamics",
                      label_time = c(1, 3),
                      dynamics_preset = "nuc2cytowithNdeg")

##### EZBAKR ANALYSES

ezbdo <- EZbakRData(simdata$cB, simdata$metadf)

ezbdo <- EstimateFractions(ezbdo)


# Lengths of features:
# In simulation, all features are same length, so this is here for illustrative
# purposes only
feature_lengths <- tibble(
  GF = unique(simdata$cB$GF),
  length = 1000
)


# Averaging fractions rather than kinetics
# Note the model, which specifies that we want to average data for a given
# compartment AND label time (tl).
ezbdo <- AverageAndRegularize(ezbdo,
                              formula_mean = ~tl:compartment - 1,
                              type = "fractions",
                              feature_lengths = feature_lengths,
                              parameter = "logit_fraction_highTC")

# Fit ODE model
# Getting some help from ode_models object again
ezbdo <- EZDynamics(ezbdo,
                    graph = graph,
                    sub_features = "GF",
                    grouping_features = "GF",
                    sample_feature = "compartment",
                    modeled_to_measured = ode_models$nuc2cytowithNdeg$formulas)

## ----eval=TRUE----------------------------------------------------------------
graph <- matrix(c(0, 1, 0,
                  0, 0, 2,
                  3, 0, 0),
                nrow = 3,
                ncol = 3,
                byrow = TRUE)
colnames(graph) <- c("0", "P", "M")
rownames(graph) <- colnames(graph)


## ----eval=TRUE----------------------------------------------------------------


##### SIMULATE DATA

simdata <- EZSimulate(nfeatures = 50,
                      ntreatments = 1,
                      mode = "dynamics",
                      label_time = c(1, 3),
                      dynamics_preset = "preRNA")

##### EZBAKR ANALYSES

ezbdo <- EZbakRData(simdata$cB, simdata$metadf)

ezbdo <- EstimateFractions(ezbdo)


# Lengths of features:
# In simulation, all features are same length, so this is here for illustrative
# purposes only
features <- unique(simdata$cB$feature)
feature_lengths <- tibble(
  GF = c(features,
         rep("__no_feature", times = length(features))),
  XF = c(rep("__no_feature", times = length(features)),
         features),
  length = 1000
)


# Averaging fractions rather than kinetics
# Note the model, which specifies that we want to average data for a given
# compartment AND label time (tl).
ezbdo <- AverageAndRegularize(ezbdo,
                              formula_mean = ~tl - 1,
                              type = "fractions",
                              feature_lengths = feature_lengths,
                              parameter = "logit_fraction_highTC")

# Fit ODE model
# Getting some help from ode_models object again
ezbdo <- EZDynamics(ezbdo,
                    graph = graph,
                    sub_features = c("GF", "XF"),
                    grouping_features = "feature",
                    modeled_to_measured = ode_models$preRNA$formulas$total)

## ----eval=TRUE----------------------------------------------------------------
graph <- matrix(c(0, 1, 0, 0, 0,
                  0, 0, 2, 3, 0,
                  0, 0, 0, 0, 4,
                  0, 0, 0, 0, 5,
                  6, 0, 0, 0, 0),
                nrow = 5,
                ncol = 5,
                byrow = TRUE)

colnames(graph) <- c("0", "NP", "NM", "CP","CM")
rownames(graph) <- colnames(graph)

## ----eval=TRUE----------------------------------------------------------------
##### SIMULATE DATA

simdata <- EZSimulate(nfeatures = 50,
                      ntreatments = 1,
                      mode = "dynamics",
                      label_time = c(1, 3),
                      dynamics_preset = "nuc2cytowithpreRNA")

##### EZBAKR ANALYSES

ezbdo <- EZbakRData(simdata$cB, simdata$metadf)

ezbdo <- EstimateFractions(ezbdo)


# Lengths of features:
# In simulation, all features are same length, so this is here for illustrative
# purposes only
features <- unique(simdata$cB$feature)
feature_lengths <- tibble(
  GF = c(features,
         rep("__no_feature", times = length(features))),
  XF = c(rep("__no_feature", times = length(features)),
         features),
  length = 1000
)


# Averaging fractions rather than kinetics
# Note the model, which specifies that we want to average data for a given
# compartment AND label time (tl).
ezbdo <- AverageAndRegularize(ezbdo,
                              formula_mean = ~tl:compartment - 1,
                              type = "fractions",
                              feature_lengths = feature_lengths,
                              parameter = "logit_fraction_highTC")

# Fit ODE model
# Getting some help from ode_models object again
ezbdo <- EZDynamics(ezbdo,
                    graph = graph,
                    sub_features = c("GF", "XF"),
                    grouping_features = "feature",
                    sample_feature = "compartment",
                    modeled_to_measured = ode_models$nuc2cytowithpreRNA$formulas)

## -----------------------------------------------------------------------------

### SIMULATE DATA

# Now we are simulating two distinct biological conditions.
# Default is for about 50% of all features to have differences in
# all parameters. 
simdata <- EZSimulate(nfeatures = 200,
                      ntreatments = 2,
                      mode = "dynamics",
                      label_time = c(1, 3),
                      dynamics_preset = "nuc2cyto")
              
### EZBAKR ANALYSES

ezbdo <- EZbakRData(simdata$cB, simdata$metadf)

ezbdo <- EstimateFractions(ezbdo)


# Lengths of features:
# In simulation, all features are same length, so this is here for illustrative
# purposes only
feature_lengths <- tibble(
  GF = unique(simdata$cB$GF),
  length = 1000
)


# DIFFERENT FROM SINGLE CONDITION ANALYSIS: "treatment" is included in the formula.
ezbdo <- AverageAndRegularize(ezbdo,
                              formula_mean = ~tl:compartment:treatment - 1,
                              type = "fractions",
                              feature_lengths = feature_lengths,
                              parameter = "logit_fraction_highTC")


# Nothing changes here!
ezbdo <- EZDynamics(ezbdo,
                    graph = ode_models$nuc2cyto$graph,
                    sub_features = "GF",
                    grouping_features = "GF",
                    sample_feature = "compartment",
                    modeled_to_measured = ode_models$nuc2cyto$formulas)


# Now we can run CompareParameters. Need to:
# 1) Let it know you want to use EZDynamics output (type = "dynamics")
# 2) Specify the "design_factor" (treatment in this case) 
# 3) Specify the "parameter" you want to compare 
# (let's compare log(k2), a.k.a logk2).
ezbdo <- CompareParameters(ezbdo,
                           type = "dynamics",
                           parameter = "logk2",
                           design_factor = "treatment",
                           reference = "treatment1",
                           experimental = "treatment2")


# Can make a volcano plot
# Here I have specified everything to be exhaustive, but
# in this case we could just specify `parameter = "logk2"`,
# since we only have one logk2 comparative analysis in our
# EZbakRData object
EZVolcanoPlot(ezbdo,
              parameter = "logk2",
              design_factor = "treatment",
              reference = "treatment1",
              experimental = "treatment2")


## -----------------------------------------------------------------------------
graph <- matrix(
  c(0, 1, 0,
    0, 0, 2,
    3, 0, 0),
  ncol = 3,
  nrow = 3,
  byrow = TRUE
)
colnames(graph) <- c("0", "N", "C")
rownames(graph) <- colnames(graph)

## -----------------------------------------------------------------------------
metadf <- tibble(
  sample = c("cyto_1", "cyto_2",
             "nuc_1", "nuc_2",
             "tot_1", "tot_3"),
  tl = 1,
  compartment = c("cytoplasmic", "cytoplasmic",
                  "nuclear", "nuclear",
                  "total", "total")
)

## -----------------------------------------------------------------------------
mtom <- list(
  total = list(XF ~ C + N),
  nuclear = list(XF ~ N),
  cytoplasmic = list(XF ~ C)
)

## -----------------------------------------------------------------------------
mtom <- list(
  XF ~ M,
  GF ~ P
)

## -----------------------------------------------------------------------------
mtom <- list(
  XF_sj ~ M,
  XF_nosj ~ P + M,
  GF ~ P
)

## -----------------------------------------------------------------------------
graph <- matrix(c(0, 1, 0, 0, 0,
                  0, 0, 2, 3, 0,
                  0, 0, 0, 0, 4,
                  0, 0, 0, 0, 5,
                  6, 0, 0, 0, 0),
                nrow = 5,
                ncol = 5,
                byrow = TRUE)

colnames(graph) <- c("0", "NP", "NM", "CP","CM")
rownames(graph) <- colnames(graph)

## -----------------------------------------------------------------------------
mtom <- list(
  total = list(
    GF ~ NP + CP,
    XF ~ NM + CM
  ),
  nuclear = list(
    GF ~ NP,
    XF ~ NM
  ),
  cytoplasmic = list(
    GF ~ CP,
    XF ~ CM
  ) 
)