/*=======================================================================
 * Version: $Id: sax.c,v 1.3 2015/07/02 12:14:32 nroche Exp $
 * Project: MediaTeX
 * Module : Sax scanner
 *
 * copy/paste from 
 *  http://www.jamesh.id.au/articles/libxml-sax/libxml-sax.html 

 MediaTex is an Electronic Records Management System
 Copyright (C) 2016  Nicolas Roche
 
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 any later version.
 
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 =======================================================================*/

#include "misc/log.h"
#include "misc.h"

#include <stdio.h>
#include <memory.h>
#include <wchar.h>
#include <libxml/parser.h>
#include <ctype.h> // iblank

#include "seda02.h"

typedef struct SaxData {
  void *ps;
  SEDA_STYPE tokenValue;
  BisonData* bisonData;
} SaxData;

typedef struct Entity {
  char name[32];
  int openVal;
  int closeVal;
} Entity;

typedef struct Attribut {
  char name[32];
  int val;
} Attribut;

Entity entities[] = {
  {"ArchiveTransfer", _ARCHIVETRANSFER, ARCHIVETRANSFER_},
  {"Comment", _COMMENT, COMMENT_},
  {"Date", _DATE, DATE_},
  {"TransferIdentifier", _TRANSFERTIDENTIFIER, TRANSFERTIDENTIFIER_},
  {"Identification", _IDENTIFICATION, IDENTIFICATION_},
  {"TransferringAgency", _TRANSFERRINGAGENCY, TRANSFERRINGAGENCY_},
  {"ArchivalAgency", _ARCHIVALAGENCY, ARCHIVALAGENCY_},
  {"Integrity", _INTEGRITY, INTEGRITY_},
  {"Contains", _CONTAINS, CONTAINS_},
  {"UnitIdentifier", _UNITIDENTIFIER, UNITIDENTIFIER_},
  {"ArchivalAgreement", _ARCHIVALAGREEMENT, ARCHIVALAGREEMENT_},
  {"ArchivalProfile", _ARCHIVALPROFILE,	ARCHIVALPROFILE_},      
  {"DescriptionLanguage", _DESCRIPTIONLANGUAGE, DESCRIPTIONLANGUAGE_},
  {"DescriptionLevel", _DESCRIPTIONLEVEL, DESCRIPTIONLEVEL_},
  {"Name", _NAME, NAME_},
  {"ContentDescription", _CONTENTDESCRIPTION, CONTENTDESCRIPTION_},
  {"Description", _DESCRIPTION, DESCRIPTION_},
  {"Format", _FORMAT, FORMAT_},	       
  {"Language", _LANGUAGE, LANGUAGE_},
  {"OriginatingAgency", _ORIGINATINGAGENCY, ORIGINATINGAGENCY_},
  {"ContentDescriptive", _CONTENTDESCRIPTIVE, CONTENTDESCRIPTIVE_}, 
  {"KeywordContent", _KEYWORDCONTENT, KEYWORDCONTENT_},
  {"KeywordType", _KEYWORDTYPE, KEYWORDTYPE_},
  {"Appraisal", _APPRAISAL, APPRAISAL_},
  {"Code", _CODE, CODE_},  
  {"Duration", _DURATION, DURATION_},
  {"StartDate", _STARTDATE, STARTDATE_},
  {"AccessRestriction", _ACCESSRESTRICTION, ACCESSRESTRICTION_},
  {"Document", _DOCUMENT, DOCUMENT_},
  {"Attachment", _ATTACHMENT, ATTACHMENT_},
  {"Control", _CONTROL, CONTROL_},
  {"Copy", _COPY, COPY_},
  {"Description", _DESCRIPTION, DESCRIPTION_},
  {"Issue", _ISSUE, ISSUE_},
  {"Purpose", _PURPOSE, PURPOSE_},
  {"Type", _TYPE, TYPE_},
  {"", 0, 0}
};

Attribut attributs[] = {
  {"xmlns", ATTR_XMLNS},
  {"algorithme", ATTR_ALGORITHME},
  {"listVersionID", ATTR_LISTVERSIONID},
  {"filename", ATTR_FILENAME},
  {"", 0}
};

// to manages several contigus call to characters()
static char buffer[512] = "";

static void addCharacters(char* string)
{
  int l = 0;
  int i = 0;
  char* ptr1 = string;
  char* ptr2 = 0;

  if (env.debugLexer) {
    logMain(LOG_DEBUG, "add characters: %s", string);
  }

  i = l = strlen(buffer);
  ptr2 = buffer + l;

  // escape sequence for double quotes
  while (i < 511 && *ptr1 != 0) {
    if (*ptr1 == '"') {
      *ptr2++ = '\\';
      ++i;
    }
    *ptr2++ = *ptr1++;
    ++i;
  }
  
  buffer[i] = 0;
}

static void flushCaracters(SaxData* data)
{
  if (strlen(buffer) == 0) return;
      
  strncpy(data->tokenValue.string, buffer, 256);
  data->tokenValue.string[255] = 0;

  if (env.debugLexer) {
    logMain(LOG_INFO, "characters: %s", buffer);
  }

  // push TEXT token to bison
  pushParser (data->ps, TEXT, &data->tokenValue, data->bisonData);

  buffer[0] = 0;
}

void startDocument (void *user_data)
{
  if (env.debugLexer) {
    logMain(LOG_INFO, "%s", "start document");
  }
}

void endDocument (void *user_data)
{
  SaxData* data = user_data;

  flushCaracters(data);

  if (env.debugLexer) {
    logMain(LOG_INFO, "%s", "end document");
  }

  // push $end lexem to bison
  pushParser (data->ps, 0, 0, data->bisonData);
}

// called when there are characters that are outside of tags get parsed
void characters (void *user_data, const xmlChar *ch, int len)
{
  //SaxData* data = user_data
  char buffer[256];
  char* ptr;

  if (*ch == '\n') {
    goto end;
  }
  
  if (len > 255) len = 255;
  
  strncpy(buffer, (char*)ch, len);
  buffer[len] = 0;

  // remove blank text we found outside of tags
  ptr = buffer;
  while (*ptr != 0 && isblank(*ptr)) ++ptr;
  if (buffer + len == ptr)
    return;

  addCharacters(buffer);
 end:
  return;
}

void startElement (void *user_data, const xmlChar *name, const xmlChar **attrs)
{
  SaxData* data = user_data;
  Entity* e = entities;
  Attribut* a = 0;

  flushCaracters(data);

  if (env.debugLexer) {
    logMain(LOG_INFO, "start element: %s", name);
  }

  while (e && *e->name) {
    if (!strcmp(e->name, (char*)name)) {

      // Push _ELEMENT token to Bison 
      pushParser (data->ps, e->openVal, 0, data->bisonData);
      break;
    }
    ++e;
  }
  if (!*e->name) {
    logMain(LOG_ERR, "Sax: unfound %s entity!", name);
  }

  /* int i = 0; */
  /* while (attrs && *attrs) { */
  /*   if (i == 0) { */
  /*     printf(" attribut: %s", *attrs); */
  /*     i = 1; */
  /*   } */
  /*   else { */
  /*     printf(" = %s\n", *attrs); */
  /*     i = 0; */
  /*   } */
  /*   ++attrs; */
  /* } */

  while (attrs && *attrs) {
    a = attributs;
    while (a && *a->name) {
      if (!strcmp(a->name, (char*)*attrs)) {
	strcpy(data->tokenValue.string, (char*)*(attrs+1));

	if (env.debugLexer) {
	  logMain(LOG_INFO, "start element, attribute: %s", 
		  data->tokenValue.string);
	}

	// push ATTR_ token to Bison
	pushParser (data->ps, a->val, &data->tokenValue, data->bisonData);
	break;
      }
      ++a;
    }
    if (!*a->name) {
      logMain(LOG_ERR, "Sax: unfound %s attribute!", *attrs);
    }
    attrs+=2;
  }
}

void endElement (void *user_data, const xmlChar *name)
{
  SaxData* data = user_data;
  Entity* e = entities;

  flushCaracters(data);

  if (env.debugLexer) {
    logMain(LOG_INFO, "end element: %s", name);
  }

  while (e && *e->name) {
    if (!strcmp(e->name, (char*)name)) {

      // Push ELEMENT_ token to Bison 
      pushParser (data->ps, e->closeVal, 0, data->bisonData);
      break;
    }
    ++e;
  }
  if (!*e->name) {
    logMain(LOG_ERR, "Sax: unfound %s entity!", name);
  }
}

// not understood
xmlEntityPtr getEntity (void *user_data, const xmlChar *name)
{
  logMain(LOG_NOTICE, "Sax: get entity '%s'", name);
  return xmlGetPredefinedEntity (name);
}

static void
warning(void *user_data, const char *msg, ...) {
    va_list args;
    char buff[255];

    strncpy(buff, "[%s %s:%i] ", 255);
    strncpy(buff+11, msg, 255-11);

    va_start (args, msg);
    logEmitFunc(env.logHandler, LOG_WARNING, buff, __FILE__, __LINE__, 
		args);
    va_end (args);
}

static void
error(void *user_data, const char *msg, ...) {
    va_list args;
    char buff[255];

    strncpy(buff, "[%s %s:%i] ", 255);
    strncpy(buff+11, msg, 255-11);

    va_start (args, msg);
    logEmitFunc(env.logHandler, LOG_ERR, buff, __FILE__, __LINE__, 
		args);
    va_end (args);
}

static void
fatalError(void *user_data, const char *msg, ...) {
    va_list args;
    char buff[255];

    strncpy(buff, "[%s %s:%i] ", 255);
    strncpy(buff+11, msg, 255-11);

    va_start (args, msg);
    logEmitFunc(env.logHandler, LOG_EMERG, buff, __FILE__, __LINE__, 
		args);
    va_end (args);
}

int
parse_xml_file (BisonData* bisonData, const char *filename)
{
  int rc = FALSE;
  SaxData data;
  xmlSAXHandler saxHandler;

  logMain(LOG_NOTICE, "parse '%s'", filename);

  memset (&saxHandler, 0, (sizeof(xmlSAXHandler)));
  saxHandler.startDocument = startDocument;
  saxHandler.endDocument = endDocument;
  saxHandler.characters = characters;
  saxHandler.startElement = startElement;
  saxHandler.endElement = endElement;  
  //saxHandler.getEntity = getEntity;
  saxHandler.warning =warning;
  saxHandler.error = error;
  saxHandler.fatalError = fatalError;

  memset (&data, 0, (sizeof(SaxData)));

  data.bisonData = bisonData;
  data.ps = createParser ();

  if (xmlSAXUserParseFile (&saxHandler, &data, filename) < 0) {
    goto error;
  }

  destroyParser (data.ps);

  rc = TRUE;
 error:
  return rc;
}
