/*
 *  xmlt.c - XML data structure.
 *           This file is part of the FreeLCD package.
 *
 *  $Id: xmlt.c,v 1.9 2004/06/20 12:38:35 unicorn Exp $
 *
 *  This program is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU General Public License as published by the
 *  Free Software Foundation; either version 2 of the License, or (at your
 *  option) any later version.
 * 
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *  MA  02111-1307  USA
 *
 *  Copyright (c) 2002, 2003, Jeroen van den Berg <unicorn@hippie.nu>
 */

/** \file xmlt.c */

#if HAVE_CONFIG_H
# include <config.h>
#endif

#if HAVE_STRING_H
# include <string.h>
#else
# if HAVE_STRINGS_H
#  include <strings.h>
# endif
#endif

#include <assert.h>
#include <expat.h>
#include <stdio.h>

#include "dictionary.h"
#include "entities.h"
#include "xmlt.h"
#include "debug.h"

/** The state information that is internally used for parsing XML. */
typedef struct
{
  XML_Parser parser;            /**< The Expat parser */
  xml_node *root_node;          /**< First tag in the document is root */
  xml_node *curr_node;          /**< Current node, used while parsing */
  void (*doc_finished) (void *, xml_node *);/**< Callback for delivering
                                                 finished documents */
  void *user_callback_data;     /**< Data is passed to callback function */
  dictionary *known_tags;       /**< The dictionary with the known tags,
                                     used for looking up a tag's index */
  dictionary *known_attributes; /**< Same thing for attributes */
  int depth;                    /**< Tree depth, used while parsing */
  unsigned char curr_is_cdata;  /**< Working with character data at the
                                     moment, used while parsing */
  unsigned char valid;          /**< Document is still valid */
  unsigned char reset;          /**< Deferred parser context reset flag */
  size_t parsed;                /**< Total parsed bytes so far */
}
xml_context;


/*----------------------------------------------------------- _xml_node --*/
static xml_node *
_create_xml_node (xml_node_t type, int tag)
{
  xml_node *new_node = g_malloc (sizeof (xml_node));

  new_node->children = NULL;
  new_node->attributes = NULL;

  new_node->type = type;
  new_node->tag = tag;
  new_node->parent = NULL;
  new_node->cdata = NULL;

  return new_node;
}

/*----------------------------------------------------- _cleanup_string --*/
static size_t
_cleanup_string (const char *text, char *dest, size_t len)
{
  size_t i;
  size_t newlen = 0;
  char c;

  assert (text != NULL);
  assert (dest != NULL);

  if (len == 0)
    return 0;

  for (i = 0; i < len; ++i)
    {
      c = text[i];
      if (c == '\n' || c == '\r' || c == '\t')
        c = ' ';

      /* Don't put spaces at the beginning, or after another space */
      if (c != ' ' || newlen == 0 || dest[newlen - 1] != ' ')
        dest[newlen++] = c;
    }

  /* Don't store trailing spaces. */
  while (newlen > 0 && dest[newlen - 1] == ' ')
    --newlen;

  return newlen;
}

/*------------------------------------------------------- _attr_compare --*/
static gint
_attr_compare (gconstpointer data, gconstpointer compare)
{
  const xml_attribute *attr = (const xml_attribute*)data;
  int attr_t = *(int*)compare;

  return attr->attribute == attr_t ? 0 : 1;
}

/*----------------------------------------------------- _rescan_attribs --*/
static void
_rescan_attrib (gpointer data, gpointer userdata)
{
  xml_attribute *attrib = (xml_attribute*)data; 
  dictionary *attr_dict = (dictionary*)userdata;
  int *lookup;
  
  if (attrib->attribute == XML_UNKNOWN_ATTRIBUTE)
    {
      lookup = (int *) dict_lookup (attr_dict, attrib->unknown_attrib);
      if (lookup != NULL)
        {
          attrib->attribute = *lookup;
          g_free (attrib->unknown_attrib);
          attrib->unknown_attrib = NULL;
        }
    }
}

/*------------------------------------------------------ _handle_entity --*/
static int
_handle_entity (XML_Parser p, const XML_Char *context, const XML_Char *base,
                const XML_Char *sys_id, const XML_Char *publicid)
{
  return XML_STATUS_OK;
}

/*------------------------------------------------------ _start_element --*/
static void
_start_element (void *cntxt, const char *name, const char **attr)
{
  xml_node *new_node = _create_xml_node (NODE, XML_UNKNOWN_TAG);
  xml_context *context = (xml_context *)cntxt;
  int *node_t;
  
  /*  Look up the node name in the dictionary. If it's not there, mark
   *  it simply as an unknown node.
   */
  node_t = dict_lookup (context->known_tags, name);
  if (node_t == NULL)
    {
      new_node->cdata = g_strdup (name);
    }
  else
    {
      new_node->tag = *(int *) node_t;
      new_node->cdata = NULL;
    }

  if (context->root_node == NULL)
    {
      debug ("new XML document");
      context->root_node = new_node;
    }
  else
    {
      new_node->parent = context->curr_node;
      context->curr_node->children = 
        g_slist_append (context->curr_node->children, new_node);
    }

  while (attr != NULL && *attr != NULL)
    {
      xml_attribute *new_attr = g_malloc (sizeof (xml_attribute));
  
      if (node_t == NULL)
        {
          /* Never replace attributes in unknown tags, even if they are
           * in the dictionary. This could mess up the bookkeeping of
           * other modules that rescan subsections of the document. */

          new_attr->attribute = XML_UNKNOWN_ATTRIBUTE;
          new_attr->unknown_attrib = g_strdup (*attr);
        }
      else
        {
          int *attr_t = dict_lookup (context->known_attributes, *attr);

          if (attr_t == NULL)
            {
              new_attr->attribute = XML_UNKNOWN_ATTRIBUTE;
              new_attr->unknown_attrib = g_strdup (*attr);
            }
          else
            {
              new_attr->attribute = *(int*)attr_t;
              new_attr->unknown_attrib = NULL;
            }
        }
      
      new_attr->value = g_strdup(*(attr + 1));
      new_node->attributes = g_slist_append (new_node->attributes, new_attr);
      attr += 2;
    }
  
  context->curr_node = new_node;
  context->curr_is_cdata = 0;
  ++context->depth;
}

/*-------------------------------------------------------- _end_element --*/
static void
_end_element (void *cntxt, const char *name)
{
  xml_context *context = (xml_context *) cntxt;
  
  (void)name;

  --context->depth;
  if (context->depth == 0)
    {
      debug ("xml doc completed");
      context->doc_finished (context->user_callback_data, context->root_node);
      context->root_node = NULL;
      context->reset = 1;
    }
  else
    {
      context->curr_node = context->curr_node->parent;
      context->curr_is_cdata = 0;
    }
}

/*----------------------------------------------------- _character_data --*/
static void
_character_data (void *cntxt, const char *text, int len)
{
  size_t      newlen;
  char        *newtext;
  xml_node    *new_node;
  xml_context *context = (xml_context *) cntxt;
  GSList     **siblings = NULL;

  assert (cntxt != NULL);
  assert (text != NULL);
  
  newtext = g_malloc (len + 1); /* plus one for 0 character */
  newlen = _cleanup_string (text, newtext, len);
  assert (newlen <= len);
  if (newlen == 0)
    {
      g_free (newtext);
      return;
    }
  
  newtext[newlen] = '\0';

  siblings = &(context->curr_node->children);
  if (context->curr_is_cdata)
    {
      /*  The parser was already handling character data. Just add this
       *  cdata to what was already there.
       */
      xml_node *last_node = (xml_node*) g_slist_last(*siblings)->data;
      size_t cdata_len = strlen (last_node->cdata);

      last_node->cdata = g_realloc (last_node->cdata, cdata_len + newlen + 1);
      memcpy (last_node->cdata + cdata_len, newtext, newlen);
      g_free (newtext);
      last_node->cdata[cdata_len + cdata_len + newlen + 1] = '\0';
    }
  else
    {
      new_node = _create_xml_node (CDATA, -1);
      new_node->cdata = newtext;
      new_node->parent = context->curr_node;
  
      *siblings = g_slist_append (*siblings, new_node);
      context->curr_is_cdata = 1;
    }
}

/*----------------------------------------------------- _free_attribute --*/
static void
_free_attribute (gpointer _attr, gpointer userdata)
{
  xml_attribute *attr = (xml_attribute *) _attr;
  (void)userdata;
  assert (attr != NULL);
  g_free (attr->value);
  g_free (attr->unknown_attrib);
  g_free (attr);
}

static void
_context_init (xml_context *context)
{
  XML_SetUserData (context->parser, context);
  XML_SetElementHandler (context->parser, _start_element, _end_element);
  XML_SetCharacterDataHandler (context->parser, _character_data);
  XML_SetExternalEntityRefHandler (context->parser, _handle_entity);

  context->root_node     = NULL;
  context->curr_node     = NULL;
  context->depth         = 0;
  context->curr_is_cdata = 0;
  context->valid         = 1;
  context->reset         = 0;
  context->parsed        = 0;
}


/*-------------------------------------------------- xml_create_context --*/
void *
xmlt_create_context (void (*cb) (void *, xml_node *), void *cb_data,
                     dictionary *tags, dictionary *attribs)
{
  xml_context *context = g_malloc (sizeof (xml_context));

  debug ("xmlt_create_context");
  context->parser = XML_ParserCreate (0);
  if (!context->parser)
    return NULL;

  context->user_callback_data = cb_data;
  context->known_tags         = tags;
  context->known_attributes   = attribs;
  context->doc_finished       = cb;

  _context_init (context);
  
  return context;
}

/*---------------------------------------------------------- xmlt_parse --*/
int
xmlt_parse (void *_context, const char *data, size_t len)
{
  xml_context *context = (xml_context *) _context;
  
  assert (context != NULL);
  assert (context->parser != NULL);

  if (!context->valid)
    return 0;
  
  assert (data != NULL);
  
  while (len > 0)
    {
      if (!XML_Parse (context->parser, data, len, 0))
        {
          int err = XML_GetErrorCode (context->parser);
          if (err == XML_ERROR_JUNK_AFTER_DOC_ELEMENT)
            {
              /* Slight problem here; Expat doesn't like it when there's more
               * than one document in a stream, but that is exactly the kind
               * of thing FreeLCD uses.  Try to reset the parser context and
               * continue where the error manifested. */

              size_t idx = XML_GetCurrentByteIndex (context->parser);
              size_t skip = idx - context->parsed;
              len -= skip;
              data += skip;

              if (len < 0)
                len = 0;

              if (!xmlt_reset_context (context))
                return 0;
            }
          else
            {
              /* Any other error really is an error. */
              context->valid = 0;
              return 0;
            }
        }
      else if (context->reset)
        {
          size_t idx = XML_GetCurrentByteIndex (context->parser);
          size_t skip = idx - context->parsed;
          len -= skip;
          data += skip;

          /* If a part of the document was parsed without any troubles,
           * the context still needs to be reset for the next run. */

          if (!xmlt_reset_context (context))
            return 0;
        }
      else
        {
          /* Everything went well. Exit the loop. */
          context->parsed += len;
          break;
        }
    }

  return 1;
}

/*-------------------------------------------------- xmlt_reset_context --*/
int
xmlt_reset_context (void *_context)
{
  xml_context *context = (xml_context *)_context;
  assert (context);

  if (!XML_ParserReset (context->parser, 0))
    {
      /* Resetting the parser failed for some reason. Try again, the good
       * old-fashioned way. */
      XML_ParserFree (context->parser);
      context->parser = XML_ParserCreate (0);
      if (context->parser == NULL)
        {
          context->valid = 0;
          return 0;
        }
    }

  _context_init (context);

  return 1;
}

/*--------------------------------------------------- xmlt_free_context --*/
void
xmlt_free_context (void *_context)
{
  xml_context *context = (xml_context *)_context;
  
  if (context == NULL)
    return;

  debug ("xmlt_free_context");
  xmlt_free_document (context->root_node);
  XML_ParserFree (context->parser);
  g_free (context);
}

/*-------------------------------------------------- xmlt_free_document --*/
void
xmlt_free_document (xml_node *doc)
{
  GSList *iter;
  int is_cdata = 1;
  void *p;

  if (doc == NULL)
    return;

  debug ("xmlt_free_document");
  iter = doc->children;
  g_slist_foreach (doc->attributes, _free_attribute, NULL);
  g_slist_free (doc->attributes);
  doc->attributes = NULL;

  while (iter != NULL)
    {
      p = iter->data;
      iter = g_slist_next (iter);

      if (is_cdata)
        g_free (p);
      else
        xmlt_free_document (p);

      /* Toggle is_cdata between 0 and 1 */
      is_cdata = -is_cdata + 1;
    }

  g_free (doc->cdata);
  g_free (doc);
}

/*------------------------------------------------------- xmlt_get_next --*/
xml_node *
xmlt_get_next (xml_node *iter, xml_node *doc)
{
  GSList *i;

  assert (iter != NULL);
  assert (doc != NULL);
  
  if (iter->children != NULL)
    return (xml_node*)iter->children->data;
  
  while (iter->parent != NULL)
    {
      if (iter == doc)
        return NULL;

      i = g_slist_find (iter->parent->children, iter);
      if (i->next != NULL)
        return (xml_node *) (i->next->data);

      iter = iter->parent;
    }

  /* We shouldn't get here in theory, but, just in case. */
  return NULL;
}

/*------------------------------------------------ xmlt_get_next_shallow --*/
xml_node *
xmlt_get_next_shallow (xml_node *iter)
{
  GSList *siblings;
  assert (iter);
  
  siblings = g_slist_find (iter->parent->children, iter);

  if (siblings->next != NULL)
    iter = (xml_node *)(siblings->next->data);
  else
    iter = NULL;

  return iter;
}

/*------------------------------------------------ xmlt_rescan_document --*/
void
xmlt_rescan_document (xml_node *doc, dictionary *tags, dictionary *attribs)
{
  int *tag_ptr;
  xml_node *curr_node = doc;

  while (curr_node)
    {
      if (curr_node->type == NODE && curr_node->tag == XML_UNKNOWN_TAG)
        {
          /*  The unknown tag is stored as a string in 'cdata'.  If we
           *  can find it back in our own dictionary, replace the value in
           *  'tag' (which is XML_UNKNOWN_TAG) with the value from our
           *  dictionary, and free the memory that was allocated for the
           *  string. 
           */
          tag_ptr = dict_lookup (tags, curr_node->cdata);

          if (tag_ptr != NULL)
            {
              curr_node->tag = *tag_ptr;
              g_free (curr_node->cdata);
              curr_node->cdata = NULL;

              if (attribs != NULL)
                {
                  g_slist_foreach (curr_node->attributes, _rescan_attrib, 
                                   attribs);
                }
            }
        }

      curr_node = xmlt_get_next (curr_node, doc);
    }
}

/*----------------------------------------------------------- xmlt_find --*/
xml_node *
xmlt_find (xml_node *doc, xml_node* iter, int tag)
{
  assert (doc != NULL);

  if (iter == NULL)
    iter = xmlt_get_next (doc, doc);
  else
    iter = xmlt_get_next_shallow (iter);

  while (iter != NULL && iter->tag != tag)
    iter = xmlt_get_next_shallow (iter);
  
  return iter;
}

/*-------------------------------------------------------- xmlt_find_if --*/
xml_node *
xmlt_find_if (xml_node *doc, xml_node* iter, int(*compare)(xml_node*))
{
  if (!iter)
    iter = xmlt_get_next (doc, doc);
  else
    iter = xmlt_get_next_shallow (iter);

  while (iter && !compare (iter))
    iter = xmlt_get_next_shallow (iter);
  
  return iter;
}

/*------------------------------------------------------- xmlt_for_each --*/
void
xmlt_for_each (xml_node *doc, void *user_data, void (*func)(void*, xml_node*))
{
  xml_node *iter = doc;
  assert (doc);
  
  do
    {
      func (user_data, iter);
      iter = xmlt_get_next (iter, doc);
    }
  while (iter);
}

/*----------------------------------------------------- xmlt_get_attrib --*/
const char*
xmlt_get_attrib (xml_node *node, int attribute)
{
  GSList *i;
  
  i = g_slist_find_custom (node->attributes, &attribute, _attr_compare);
  if (i != NULL)
    return ((xml_attribute*)i->data)->value;

  return NULL;
} 

/*------------------------------------------------ xmlt_get_first_cdata --*/
const char*
xmlt_get_first_cdata (xml_node *node)
{
  xml_node *first_child;

  if (node == NULL || node->children == NULL)
    return NULL;
  
  first_child = (xml_node*)node->children->data;

  return first_child ? first_child->cdata : NULL;
}


#ifdef UNIT_TEST_XMLT_C

/* UNIT_CFLAGS -O2 -lexpat -lglib-2.0 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include */
/* UNIT_EXTRA  ../common/dictionary.c */

void test_callback (void *userdata, xml_node *document)
{
  *(xml_node**)userdata = document;
}

void
test_next_tag (xml_node **doc_iter, xml_node *doc, int tag)
{
  *doc_iter = xmlt_get_next (*doc_iter, doc);
  
  if (!doc_iter || !*doc_iter)
    {
      printf ("xmlt_get_next() failed, iterator is NULL\n");
      exit (1);
    }

  if ((**doc_iter).type != NODE)
    {
      printf ("xmlt_get_next() failed, got CDATA, expected NODE\n");
      exit (1);
    }

  if ((**doc_iter).tag != tag)
    {
      printf ("xmlt_get_next() failed, tag is %i instead of %i\n",
              (**doc_iter).tag, tag);
      exit (1);
    }
}

void
test_next_cdata (xml_node **doc_iter, xml_node *doc, const char* cdata)
{
  *doc_iter = xmlt_get_next (*doc_iter, doc);
  
  if (!doc_iter || !*doc_iter)
    {
      printf ("xmlt_get_next() failed, iterator is NULL\n");
      exit (1);
    }

  if ((**doc_iter).type != CDATA)
    {
      printf ("xmlt_get_next() failed, got NODE, expected CDATA\n");
      exit (1);
    }

  if (strcmp((**doc_iter).cdata, cdata))
    {
      printf ("xmlt_get_next() failed, cdata is '%s' instead of '%s'.\n",
              (**doc_iter).cdata, cdata);
      exit (1);
    }
}

int main (int argc, char **argv)
{
  int tag_array[]  = { 1, 2, 3, 4, 5 };
  int attr_array[] = { 10, 20, 30, 40, 50 };
  xml_node *finished_doc = 0;
  xml_node *doc_iter;
  const char* attr_value;
  
  dict_pair tag_dict[] = 
    {
      { "four",  tag_array + 3 },
      { "one",   tag_array + 0 },
      { "three", tag_array + 2 },
      { "two",   tag_array + 1 }
    };
  
  dict_pair attr_dict[] = 
    {
        { "cuatro", attr_array + 3 },
        { "dos",    attr_array + 1 },
        { "tres",   attr_array + 2 },
        { "uno",    attr_array + 0 }
    };
  
  dict_pair rescan_tag_dict[] =
    {
        { "rescanned", tag_array + 4 }
    };
    
  dict_pair rescan_attr_dict[] =
    {
        { "rescan", attr_array + 4 }
    };
    
  dictionary tags  = { tag_dict , sizeof (tag_dict) / sizeof (dict_pair) };
  dictionary attrs = { attr_dict, sizeof (attr_dict) / sizeof (dict_pair) };
  
  dictionary r_tags  = { rescan_tag_dict , 
                         sizeof (rescan_tag_dict) / sizeof (dict_pair) };
  dictionary r_attrs = { rescan_attr_dict, 
                         sizeof (rescan_attr_dict) / sizeof (dict_pair) };

  int read;
  
  char *xmldata = "<one><two>Inside two</two><three uno=\"een\" dos=\"twee\">" \
    "<four tres=\"drie\" cuatro=\"vier\">Inside four</four></three>" \
    "<rescanned rescan=\"foo\" />Closing</one>";
  
  xml_context* context = xmlt_create_context (test_callback, &finished_doc, 
                                              &tags, &attrs);
  
  if (!context)
    {
      printf ("xmlt_create_context() failed\n");
      exit (1);
    }

  if (!xmlt_parse (context, xmldata, strlen (xmldata)))
    {
      printf ("xmlt_parse() failed\n");
      exit (1);
    }

  xmlt_free_context (context);

  if (!finished_doc)
    {
      printf ("xmlt_parse() failed, no document returned\n");
      exit (1);
    }

  doc_iter = finished_doc;
  if (doc_iter->tag != tag_array[0])
    {
      printf ("xmlt_parse() failed, root tag is %i instead of %i\n",
              doc_iter->tag, tag_array[0]);
      exit (1);
    }
  
  if (doc_iter->parent != 0)
    {
      printf ("xmlt_parse() failed, first tag is not root tag\n");
      exit (1);
    }
  
  test_next_tag (&doc_iter, finished_doc, tag_array[1]);
  
  attr_value = xmlt_get_attrib (doc_iter, attr_array[0]);
  if (attr_value)
    {
      printf ("xmlt_get_attribute() returned something.\n");
      exit (1);
    }
  
  test_next_cdata(&doc_iter, finished_doc, "Inside two");
  test_next_tag(&doc_iter, finished_doc, tag_array[2]);

  attr_value = xmlt_get_attrib (doc_iter, attr_array[0]);
  if (!attr_value)
    {
      printf ("xmlt_get_attribute() didn't return anything.\n");
      exit (1);
    }
  
  if (strcmp (attr_value, "een"))
    {
      printf ("xmlt_get_attribute() returned '%s' instead of 'een'.\n",
              attr_value);
      exit (1);
    }
      
  attr_value = xmlt_get_attrib (doc_iter, attr_array[1]);
  if (!attr_value)
    {
      printf ("iterator is at tag %i.\n", doc_iter->tag);
      printf ("xmlt_get_attribute() didn't return anything (2).\n");
      exit (1);
    }
  
  if (strcmp (attr_value, "twee"))
    {
      printf ("xmlt_get_attribute() returned '%s' instead of 'twee'.\n",
              attr_value);
      exit (1);
    }
      
  test_next_tag (&doc_iter, finished_doc, tag_array[3]);
  test_next_cdata (&doc_iter, finished_doc, "Inside four");
  test_next_tag (&doc_iter, finished_doc, XML_UNKNOWN_TAG);

  if (strcmp (doc_iter->cdata, "rescanned"))
      exit (1);
  
  test_next_cdata (&doc_iter, finished_doc, "Closing");
  
  xmlt_rescan_document (finished_doc, &r_tags, &r_attrs);
  doc_iter = finished_doc;
  
  test_next_tag (&doc_iter, finished_doc, tag_array[1]);
  test_next_cdata (&doc_iter, finished_doc, "Inside two");
  test_next_tag (&doc_iter, finished_doc, tag_array[2]);
  test_next_tag (&doc_iter, finished_doc, tag_array[3]);
  test_next_cdata (&doc_iter, finished_doc, "Inside four");
  test_next_tag (&doc_iter, finished_doc, tag_array[4]);

  if (doc_iter->cdata != NULL)
    {
      printf ("Unknown tag in doc_iter->cdata was not cleaned up "
              "after rescan.\n");
      exit (1);
    }

  attr_value = xmlt_get_attrib (doc_iter, attr_array[4]);
  if (attr_value == NULL)
    {
      printf ("At tag %i\n", doc_iter->tag);
      printf ("xmlt_get_attribute() didn't return anything (3).\n");
      exit (1);
    }
  
  if (strcmp (attr_value, "foo"))
      {
        printf ("xmlt_get_attribute() returned '%s' instead of 'foo'.\n",
                attr_value);
        exit (1);
      }

  test_next_cdata (&doc_iter, finished_doc, "Closing");

  return 0;
}

#endif
