## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(SmoothPLS) library(ggplot2) library(dplyr) ## ----------------------------------------------------------------------------- nind = 50 # number of individuals (train set) start = 0 # First time end = 100 # end time lambda_0 = 0.2 # Exponential law parameter for state 0 lambda_1 = 0.1 # Exponential law parameter for state 1 prob_start = 0.55 # Probability of starting with state 1 curve_type = 'cat' TTRatio = 0.2 # Train Test Ratio NotS_ratio = 0.2 # noise variance over total variance for Y beta_0_real = 5.4321 # Intercept value for the link between X(t) and Y nbasis = 15 # number of basis functions norder = 4 # 4 for cubic splines basis ## ----------------------------------------------------------------------------- id_df = data.frame(id=rep(1,5), time=seq(0, 40, 10), state=c(0, 1, 1, 0, 1)) id_df ## ----fig.alt="Decay plot"----------------------------------------------------- func = function(t){0.01*t^2 - t} plot(0:40, func(0:40)) ## ----------------------------------------------------------------------------- evaluate_id_func_integral(id_df, func) ## ----------------------------------------------------------------------------- id_df_regul = regularize_time_series(id_df, time_seq = seq(0, 40, 2), curve_type = 'cat') id_df_regul ## ----------------------------------------------------------------------------- print(id_df$time) print(id_df_regul$time) ## ----------------------------------------------------------------------------- id_df_long = convert_to_wide_format(id_df_regul) id_df_long ## ----------------------------------------------------------------------------- basis = create_bspline_basis(0, 100, 10, 4) coef = c(10, 8, 6, 4, 2, 1, 3, 5, 7, 9) fd_obj = fda::fd(coef = coef, basisobj = basis) func_from_fd = from_fd_to_func(coef = coef, basis = basis) ## ----fig.alt="fd object"------------------------------------------------------ plot(fd_obj) ## ----------------------------------------------------------------------------- fda::eval.fd(fd_obj, 13) ## ----------------------------------------------------------------------------- df = generate_X_df(nind = nind, start = start,end = end, curve_type = 'cat', lambda_0 = lambda_0, lambda_1 = lambda_1, prob_start = prob_start) head(df) ## ----------------------------------------------------------------------------- df_2 = generate_X_df(nind=20, start = 13, end = 60, curve_type = 'cat', lambda_0 = 0.21, lambda_1 = 0.13, prob_start = 0.55) length(unique(df_2$id)) ## ----fig.alt="Binary CFD individuals"----------------------------------------- plot_CFD_individuals(df) ## ----fig.alt="3 binary CFD individuals"--------------------------------------- plot_CFD_individuals(df_2, n_ind_to_plot = 3) ## ----------------------------------------------------------------------------- nind_test = number_of_test_id(TTRatio = TTRatio, nind = nind) df_test = generate_X_df(nind_test, start, end, curve_type = 'cat', lambda_0, lambda_1, prob_start) length(unique(df_test$id)) ## ----------------------------------------------------------------------------- df_test_2 = generate_X_df(nind=number_of_test_id(TTRatio = TTRatio, nind = 80), start, end, curve_type = 'cat', lambda_0, lambda_1, prob_start) # Here the number of individuals will be 20 because : # 20 = 0.2 (80 + 20) or # 20 = floor(80*TTRatio/(1-TTRatio)) length(unique(df_test_2$id)) ## ----fig.alt="beta_1_real_func"----------------------------------------------- plot(x=0:100, y=beta_1_real_func(0:100, 100), type='l', main="Beta_1") ## ----fig.alt="beta_2_real_func"----------------------------------------------- plot(x=0:100, y=beta_2_real_func(0:100, 100), type='l', main="Beta_2") ## ----fig.alt="beta_3_real_func"----------------------------------------------- plot(x=0:100, y=beta_3_real_func(0:100, 100), type='l', main="Beta_3") ## ----fig.alt="Other beta functions"------------------------------------------- plot(x=0:100, y=beta_4_real_func(0:100, 100), type='l', main="Beta_4") plot(x=0:100, y=beta_5_real_func(0:100, 100), type='l', main="Beta_5") plot(x=0:100, y=beta_6_real_func(0:100, 100), type='l', main="Beta_6") plot(x=0:100, y=beta_7_real_func(0:100, 100), type='l', main="Beta_7") ## ----------------------------------------------------------------------------- Y_df = generate_Y_df(df, curve_type = 'cat', beta_1_real_func, beta_0_real, NotS_ratio) names(Y_df) ## ----------------------------------------------------------------------------- head(Y_df) ## ----------------------------------------------------------------------------- var(Y_df$Y_real) var(Y_df$Y_noised) var(Y_df$Y_real)/var(Y_df$Y_noised) (var(Y_df$Y_noised) - var(Y_df$Y_real))/var(Y_df$Y_noised) ## ----fig.alt="Y_df real and noised value histograms"-------------------------- oldpar <- par(mfrow=c(1,2)) hist(Y_df$Y_real) hist(Y_df$Y_noised) par(oldpar) ## ----------------------------------------------------------------------------- Y_df_test = generate_Y_df(df_test, curve_type = 'cat', beta_1_real_func, beta_0_real, NotS_ratio) head(Y_df_test) ## ----------------------------------------------------------------------------- df = generate_X_df(nind = nind, start = start,end = end, curve_type = 'num', noise_sd = 0.15, seed = 123) head(df) ## ----fig.alt="Noised cosinus curves"------------------------------------------ # Visualisation ggplot(df, aes(x = time, y = value, group = id, color = factor(id))) + geom_line(alpha = 0.8) + labs(title = "Noised cosinus curves", x = "Time", y = "Value", color = "Individual") + theme_minimal() ## ----------------------------------------------------------------------------- Y_df = generate_Y_df(df = df, curve_type = 'num', beta_real_func_or_list = beta_1_real_func, beta_0_real = beta_0_real, NotS_ratio = NotS_ratio, seed = 123) head(Y_df) ## ----------------------------------------------------------------------------- id_df_regul = regularize_time_series(df, time_seq = seq(0, 40, 2), curve_type = 'num') id_df_regul ## ----------------------------------------------------------------------------- id_df_long = convert_to_wide_format(id_df_regul) id_df_long ## ----------------------------------------------------------------------------- N_states = 4 ## ----------------------------------------------------------------------------- # Initialized the lambdas values lambdas = lambda_determination(N_states) lambdas ## ----------------------------------------------------------------------------- # Initialized the transition matrix transition_df = transfer_probabilities(N_states) transition_df ## ----------------------------------------------------------------------------- df = generate_X_df_multistates(nind = 100, N_states, start=0, end=100, lambdas, transition_df) head(df) ## ----fig.alt="Multistates individuals"---------------------------------------- plot_CFD_individuals(df) ## ----fig.alt="Multistates individuals plot by cfda"--------------------------- cfda::plotData(df) ## ----------------------------------------------------------------------------- proba = cfda::estimate_pt(df) ## ----fig.alt="Marginal probabilities"----------------------------------------- plot(proba, ribbon = FALSE) plot(proba, ribbon = TRUE) ## ----------------------------------------------------------------------------- head(df) ## ----------------------------------------------------------------------------- str(df$state) unique(df$state) order(unique(df$state)) # Warning, give the indices of the order! state_ordered = unique(df$state)[order(unique(df$state))] state_ordered ## ----------------------------------------------------------------------------- si_df = state_indicator(df, id_col='id', time_col='time') names(si_df) ## ----------------------------------------------------------------------------- head(si_df) ## ----------------------------------------------------------------------------- split_df = split_in_state_df(si_df, id_col='id', time_col='time') names(split_df) mode(split_df) ## ----------------------------------------------------------------------------- names(split_df)[4] head(split_df[[4]]) ## ----------------------------------------------------------------------------- states_df = build_df_per_state(split_df, id_col='id', time_col='time') names(states_df) mode(states_df) ## ----fig.alt="Indicator function per state"----------------------------------- plot_CFD_individuals(states_df[[1]]) ## ----------------------------------------------------------------------------- df_list = cat_data_to_indicator(df) names(df_list) head(df_list$state_1)