/* graph_sampler.h

   Written by Frederic Bois
   22 June 2014

   Copyright (c) 2014 Frederic Bois.

   This code is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   See the GNU General Public License at <http://www.gnu.org/licenses/> 

   -- Revisions -----
     Logfile:  %F%
    Revision:  %I%
        Date:  %G%
     Modtime:  %U%
      Author:  @a
   -- SCCS  ---------

*/

#ifndef GS_H_DEFINED


/* ----------------------------------------------------------------------------
   Inclusions
*/

#include <limits.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "concordance.h"
#include "degree.h"
#include "edge_number.h"
#include "lexerr.h"
#include "likelihoods.h"
#include "lists.h"
#include "matrices.h"
#include "motifs.h"
#include "outputs.h"
#include "random.h"
#include "topo_sort.h"
#include "y.tab.h"


/* ----------------------------------------------------------------------------
   Macros
*/

#define Readln(_pf) ((fscanf (_pf, "%*[^\n]") < 0) ? -1: fgetc(_pf))


/* ----------------------------------------------------------------------------
   Prototypes
*/
void   AnnounceProgram (void);

void   CleanupMemory (void);

double DLoglikelihood_node (int node, double **pData);

double DLoglikelihood_node_NA (int node, double **pData);

double DLoglikelihood_node_DBN (int node, double **pData);

double DLoglikelihood_node_DBN_NA (int node, double **pData);

void   GetCmdLineArgs (int cArg, char *const *rgszArg, char **pszFileIn, 
                       char **pszPrefixOut);

double GLoglikelihood_node (int node, double **pData);

double GLoglikelihood_node_NA (int node, double **pData);

double GLoglikelihood_node_DBN (int node, double **pData);

double GLoglikelihood_node_DBN_NA (int node, double **pData);

void   InitArrays (void);

void   Loglikelihood_diff (int parent, int child, int diff, 
                           double **pData, double *logLdiff);
double Loglikelihood_full (int N, double **pData);
void   Logprior_diff (int **adjacency_current,
                      int parent_node, int child_node, int diff,
                      double *logPdiff);
double Logprior_diff_bernoulli (int parent_node, int child_node, int diff);
double Logprior_full (int N, int **adjacency);

void   ReadScript_Bison (char *const filename);

void   SampleTemperature (void);

void   SetPriorHyperParam (void);

void   UpdateBestGraph (void);
void   UpdateEdgeP (void);

double ZLoglikelihood_node (int node, double **pData);

double ZLoglikelihood_node_NA (int node, double **pData);

double ZLoglikelihood_node_DBN (int node, double **pData);

double ZLoglikelihood_node_DBN_NA (int node, double **pData);


/* ----------------------------------------------------------------------------
   Global declarations, shared
*/

// basic flags and variables
BOOL   autocycle;           // A  -> A edges allowed
BOOL   bBN;                 // sample Bayesian networks
BOOL   bDBN;                // sample dynamic Bayesian networks
BOOL   bCrazy;              // experimental mode...
int    nNodes;              // the number of nodes in graph
double seed;                // random generator seed
RDM_GEN_NAMES rdm_gen_name; // if gsl available, random generator type

// core computational variables
int    **current_adj;       // current adjacency matrix
int    diff;                // type of edge change (add, delete) 
int    parent, child;       // current pair of nodes
int    *nParents;           // number of parents for each node
int    **index_parents;     // the list of current parents for each node

// variables for basic Bernoulli prior
double **hyper_pB;          // hyper parameters of the prior pairwise edges
BOOL   *bAllowed_parents;   // Boolean: are parents allowed for each node

// variables for edge concordance prior
BOOL   bPriorConcordance;   // flag for including a concordance prior
int    **edge_requirements; // prior edge pattern matrix
double lambda_concord;      // exponent for concordance weighting

// variables for prior on degree distribution
BOOL   bPriorDegreeNode;    // flag for including a power prior on degrees
double *cumdegree_count;
int    *current_degrees;    // point to a table of # of edges for each node
double *degree_count;
double gamma_degree;        // exponent of the degrees power prior

// variables for prior on number of edges 
BOOL   bPriorEdgeCount;     // flag for a binomial prior on total edge count
long   current_edge_count;  // number of edges in the graph
long   expected_n_edges;
double *pdiff_binom_P;      // table of P(n=x+1) - P(n=x), see InitArrays

// variables for prior on motifs
BOOL   bPriorMotif;         // flag for including a prior on defined motifs
int    alpha_motif;         // alpha beta-binomial parameter for Eloops P
int    beta_motif;          // second beta-binomial parameter
double cum_nEloops;         // cumulated number of Endless loops
double cum_nFloops;         // cumulated number of Frustrated loops
long   current_nEloops;     // current number of Endless loops
long   current_nFloops;     // current number of Frustrated loops
long   diff_nEloops;        // proposed change in E loops count
long   diff_nFloops;        // proposed change in F loops count
double current_motif_prior; // density of the motif prior in current graph
double proposd_motif_prior; // density of the motif prior in proposed graph

// variables for priors handling
double current_logprior;     // current total prior density
double dBestPrior;           // prior of the max posterior graph 

// variables for likelihood handling
double current_loglikelihood;
double dBestLikelihood;      // likelihood of the max posterior graph
double *current_ll_node;     // likelihoods for each node

// variables for posterior handling
double current_logposterior; // current total posterior density
double dBestPosterior;       // maximum graph probability found in a run
int    **best_adj;           // maximum probability adjacency matrix sampled

// variables for likelihoods
BOOL   bNormalGamma;         // Normal-Gamma score on or off
BOOL   bDirichlet;           // Dirichlet score on or off
BOOL   bZellner;             // Zellner score on or off
double alpha_normal_gamma;   // prior Gamma shape for data precision
double beta_normal_gamma;    // prior Gamma rate for data precision
double gamma_zellner;        // tuning parameter for Zellner score

// variables for data handling
BOOL    bData;               // flag the availability of data
BOOL    bNAData;             // flag for missing data in data matrix
int     nData;               // max number of data points per node in input
double  **pData;             // data structure (a vector of data per node)
int     *pDataLevels;        // number of codes per node for discrete data
PLISTIJ plistMissing;        // list of missing data locations
BOOL    *bHasMissing;        // missing data indicator for each node

// variables for tempering
BOOL   bTempered;            // flag for using tempering
int    nTemperatures;        // number of temperatures considered
int    indexT;               // current temperature
double *pInvTemperatures;    // array of inverse temperatures
long   n_at_targetT;         // number of graphs sampled at target temperature
double dCZero;               // pseudo-prior updating parameter
double dNZero;               // pseudo-prior updating parameter

double *plnPi;               // array of temperatures pseudo-priors

// computational helps
double **mat_sum;            // cumulated edges counts over graphs
double **pdWorkMatrixSizeN;

// MCMC parameters
long   nRuns, nBurnin;
long   iter;


#define GS_H_DEFINED
#endif

/* end */
