/* $Id: selfsyncio.c 658 2006-05-13 14:50:30Z jim $
   teebu - An archiving tool
   Copyright (C) 2006 Jim Farrand

   This program is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the Free
   Software Foundation; either version 2 of the License, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   more details.

   You should have received a copy of the GNU General Public License along with
   this program; if not, write to the Free Software Foundation, Inc., 51
   Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */


#include <stdbool.h>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>

#include "logging.h"
#include "terminatedint.h"
#include "selfsyncio.h"

#define MAX(a,b) ((a) >= (b) ? (a) : (b))

struct selfsyncio_out_data
{
  out_stream_t out_base;
  int out_escape_len;
  bool out_own_base;
  const char *out_escape;
  uint32_t out_pending_escapes;
  uint32_t out_pending_partial;
  bool out_pass_marks;
};

/* Release data associated with a self syncing output stream. */
static void
selfsyncio_release_out (void *uncast_data)
{
  struct selfsyncio_out_data *data =
    (struct selfsyncio_out_data *) uncast_data;
  assert (data);

  if (data->out_own_base)
    release_out (data->out_base);
  free (data);
}

/* How much of the data is free of escapes?  */
static int
check_buffer_for_escape (int escape_len, const char *escape, iobuffer_t * buf,
                         size_t amount)
{
  assert (amount > 0);

  char *data = iobuffer_data_pointer (buf);

  int ok_len = 0, escape_pos = 0;
  for (int i = 0; i < amount; i++)
    {
      // printf("(%c == %c) = %d\n", data[i], escape[escape_pos], data[i] == escape[escape_pos]) ;
      if (data[i] == escape[escape_pos])
        {
          // Data matches the escape
          if (escape_len == ++escape_pos)
            {
              // Reached the end of an escape
              // printf("ok_len = %d\n", ok_len) ;
              return ok_len;
            }
        }
      else
        {
          // Data does not match the escape
          if (escape_pos > 0)
            {
              return i;
            }
          ok_len = i + 1;
        }
    }

  // printf("ok_len = %d\n", ok_len) ;
  return ok_len;
}

/* Output some escapes and some partial escapes. */
// TODO: Refactor this so it takes data and sets pending_escapes etc
static output_err_t
output_pending_escapes (struct selfsyncio_out_data *data)
{
  assert (data);

  iobuffer_t iobuf;
  if (data->out_pending_escapes > 0)
    {
      DEBUGF ("Outputting escapes: %" PRIu32, data->out_pending_escapes);
      init_iobuffer_with (&iobuf, data->out_escape_len, data->out_escape_len,
                          (char *) data->out_escape);
      output_err_t err = output_all (data->out_base, &iobuf);
      if (OUTPUT_OK != err)
        {
          return err;
        }

      // Make a mark terminated encoding
      char strbuf[UINT32_MAX_BYTES];
      int n = 1;
      char *optr = &strbuf[0];
      uint32_t pending_escapes = data->out_pending_escapes;
      while (encode_uint32_byte (optr++, &pending_escapes))
        n++;
      // printf("pending escapes size = %d\n", n) ;

      // Init the ioubffer with the portion of count that was filled
      init_iobuffer_with (&iobuf, UINT32_MAX_BYTES, n, strbuf);

      // Output the count
      err = output_all (data->out_base, &iobuf);
      if (OUTPUT_OK != err)
        return err;

      data->out_pending_escapes = 0;
    }

  // Init the ioubffer with the partial bit of the escape
  if (data->out_pending_partial > 0)
    {
      DEBUGF ("Outputting partial escape: %" PRIu32,
              data->out_pending_partial);
      init_iobuffer_with (&iobuf, data->out_pending_partial,
                          data->out_pending_partial,
                          (char *) data->out_escape);

      output_err_t err = output_all (data->out_base, &iobuf);
      if (OUTPUT_OK != err)
        return err;

      data->out_pending_partial = 0;
    }

  return OUTPUT_OK;
}

/* Called when there is an escape or partial escape next in the output buffer. */
static output_err_t
output_escape_next (struct selfsyncio_out_data *data,
                    iobuffer_t * buf, const size_t amount)
{
  assert (data);

  const char *escape = data->out_escape;
  const int escape_len = data->out_escape_len;

  char *data_ptr = iobuffer_data_pointer (buf);

  for (size_t i = 0; i < amount; i++)
    {
      if (data_ptr[i] == escape[data->out_pending_partial])
        {
          // Still matching an escape
          // Incr place in escape, and check for end of escape
          if ((++data->out_pending_partial) == escape_len)
            {
              data->out_pending_escapes++;
              data->out_pending_partial = 0;
            }
        }
      else
        {
          // Didn't match escape
          // Output pending escapes
          // Consume all characters up to and excluding this one
          // Return

          // If this is not true, we were called when there was not any escape data pending!
          assert (data->out_pending_escapes || data->out_pending_partial);
          iobuffer_mark_taken (buf, i);

          DEBUG ("Diverged from escape");

          return output_pending_escapes (data);
        }
    }

  // If we get here then we reached the end of the data without going off of the escape
  // So we just update the counters and return

  iobuffer_mark_taken (buf, amount);

  DEBUG ("Buffered potential escape");

  return OUTPUT_OK;
}

/* Do output to a selfsyncio stream.  */
static output_err_t
selfsyncio_output_limited (void *uncast_data, iobuffer_t * buf,
                           const size_t amount)
{
  struct selfsyncio_out_data *data =
    (struct selfsyncio_out_data *) uncast_data;

  if (!data)
    return OUTPUT_ERR_BAD;

  if (data->out_pending_escapes || data->out_pending_partial)
    {
      // There are pending or partial escapes left over from the last output
      // so we need to examine the start of this buffer and see if it continues
      // the escape, and maybe do some output

      size_t size = iobuffer_data_size (buf);
      output_err_t err = output_escape_next (data, buf, amount);
      if (OUTPUT_OK != err)
        return err;

      if (iobuffer_data_size (buf) == size)
        return selfsyncio_output_limited (uncast_data, buf, amount);

      return OUTPUT_OK;
    }
  else
    {
      // No pending escapes.  Search the new data to see how much of it is
      // free of escapes and can be immediately output
      const size_t escape_free_len =
        check_buffer_for_escape (data->out_escape_len, data->out_escape, buf,
                                 amount);
      if (0 == escape_free_len)
        {
          // There is potential escape right at the start of the data, we need to deal with it
          assert (*iobuffer_data_pointer (buf) == *data->out_escape);

          return output_escape_next (data, buf, amount);
        }
      else
        {
          // There is now escape free data.  Output it, and we'll deal with the escape next time
          DEBUGF ("Outputting escape free data: %zu", escape_free_len);
          return output_limited (data->out_base, buf, escape_free_len);
        }
    }
}

/* Output a mark */
static output_err_t
selfsyncio_output_mark (void *uncast_data)
{
  struct selfsyncio_out_data *data =
    (struct selfsyncio_out_data *) uncast_data;

  if (!data)
    return OUTPUT_ERR_BAD;

  output_err_t err;
  if (data->out_pending_escapes || data->out_pending_partial)
    {
      err = output_pending_escapes (data);
      if (OUTPUT_OK != err)
        return err;
    }

  iobuffer_t escape_buffer;
  init_iobuffer_with (&escape_buffer, data->out_escape_len, data->out_escape_len, (char *) data->out_escape);   // I promise we really won't change it

  err = output_all (data->out_base, &escape_buffer);
  if (OUTPUT_OK != err)
    return err;

  const char zero = '\0';
  init_iobuffer_with (&escape_buffer, 1, 1, (char *) &zero);

  err = output_all (data->out_base, &escape_buffer);
  if (OUTPUT_OK != err)
    return err;

  if (data->out_pass_marks)
    err = output_mark (data->out_base);

  return err;
}

static output_err_t
selfsyncio_close_out (void *uncast_data)
{
  struct selfsyncio_out_data *data =
    (struct selfsyncio_out_data *) uncast_data;

  if (!data)
    return OUTPUT_ERR_BAD;

  if (data->out_pending_escapes || data->out_pending_partial)
    {
      output_err_t err = output_pending_escapes (data);
      if (OUTPUT_OK != err)
        {
          close_out (data->out_base);
          return err;
        }
    }

  return close_out (data->out_base);
}

static out_stream_type_t selfsyncio_out_type = {
  .output_limited = selfsyncio_output_limited,
  .output_mark = selfsyncio_output_mark,        // Unimplemented
  .flush = NULL,                // Unimplemented
  .close_out = selfsyncio_close_out,
  .release_out = selfsyncio_release_out
};

/* Check the given escape is valid.  Escapes must be at least 1 character, and cannot
 * contain repeating characters. */
static bool
check_escape (int escape_len, const char *escape)
{
  if (0 == escape_len || !escape)
    return false;

  // Check for repeated characters
  // BUG: Maybe slow for big escapes
  for (int i = 0; i < escape_len; i++)
    {
      const char c = escape[i];

      for (int j = i + 1; j < escape_len; j++)
        if (c == escape[j])
          return false;
    }

  return true;
}

out_stream_t
selfsync_open_out (int escape_len, const char *escape, out_stream_t base_outs,
                   bool out_pass_marks, bool own_base)
{
  if (!base_outs || !check_escape (escape_len, escape))
    return NULL;

  struct selfsyncio_out_data *data =
    malloc (sizeof (struct selfsyncio_out_data));
  if (!data)
    return NULL;

  data->out_escape_len = escape_len;
  data->out_escape = escape;
  data->out_pending_escapes = 0;
  data->out_pending_partial = 0;
  data->out_base = base_outs;
  data->out_own_base = own_base;
  data->out_pass_marks = out_pass_marks;

  return open_out (&selfsyncio_out_type, data);
}

struct selfsyncio_in_data
{
  in_stream_t in_base;
  bool in_own_base, in_pass_marks, in_pending_mark;
  int in_escape_len, in_pending_escapes, in_escape_next;
  const char *in_escape;
  iobuffer_t in_buffer;
};

/* We can't accept marks in the underlying input stream.  Use this macro to
 * convert marks to failures. */
#define BASE_IN_ERR(x) ((INPUT_MARK == (x)) ? INPUT_ERR_FAILED : (x))

static input_err_t
refill_buffer (in_stream_t in_base,
               iobuffer_t * internal_buffer, int escape_len, int pending_in,
               int space_in)
{

  // Add UINT32_MAX_BYTES, so that we have room for 32bit mark terminated int after the escape
  escape_len = escape_len + UINT32_MAX_BYTES;

  if (pending_in < escape_len && space_in < (escape_len - pending_in))
    space_in += iobuffer_shunt (internal_buffer);

  input_err_t err;
  do
    {
      err = input (in_base, internal_buffer);
      pending_in = iobuffer_data_size (internal_buffer);
      // pending_in is out of sync with space_in
    }
  while (INPUT_OK == err && pending_in < escape_len);

  return err;
}

static input_err_t
selfsyncio_input_limited (void *uncast_data, iobuffer_t * buf, size_t amount)
{
  struct selfsyncio_in_data *data = (struct selfsyncio_in_data *) uncast_data;

  if (data->in_pending_mark)
    {
      return INPUT_MARK;
    }
  else if (data->in_pending_escapes || data->in_escape_next)
    {
      // there are pending escapes, write them into the buffer

      char *char_data = iobuffer_free_pointer (buf);
      int pos;
      for (pos = 0; pos < amount; pos++)
        {
          // Grab a character from the escape and add it to the buffer
          char_data[pos] = data->in_escape[data->in_escape_next];
          if (data->in_escape_len == ++(data->in_escape_next))
            {
              data->in_escape_next = 0;
              if (0 == --(data->in_pending_escapes))
                {
                  pos++;
                  break;
                }
            }
        }

      assert (pos > 0);
      iobuffer_mark_added (buf, pos);

      // We filled amount bytes, or we ate all the pending escapes
      // Either way, all is good.
      return INPUT_OK;
    }
  else
    {
      // If we have less that 1 escape worth of data,
      // OR, User wants more than we have, and there is space in the buffer
      iobuffer_t *internal_buffer = &(data->in_buffer);

      int pending_in_buffer = iobuffer_data_size (internal_buffer),
        space_in_buffer = iobuffer_free_size (internal_buffer);

      if ((pending_in_buffer < (data->in_escape_len + UINT32_MAX_BYTES))
          || (pending_in_buffer < amount && space_in_buffer > 0))
        {

          input_err_t err = refill_buffer (data->in_base, internal_buffer,
                                           data->in_escape_len,
                                           pending_in_buffer,
                                           space_in_buffer);

          pending_in_buffer = iobuffer_data_size (internal_buffer);
          if (INPUT_OK != err && (INPUT_EOF != err || pending_in_buffer == 0))
            {
              // printf("foo = %s\n", INPUT_ERR_NAME(err)) ;
              return BASE_IN_ERR (err);
            }
        }

      if (pending_in_buffer < amount)
        amount = pending_in_buffer;

      assert (amount > 0);

      // Check for an escape in the data up to required amount
      const int escape_free_len =
        check_buffer_for_escape (data->in_escape_len, data->in_escape,
                                 internal_buffer, amount);

      if (0 == escape_free_len)
        {
          // There is potential escape right at the start of the data, we need to deal with it
          char *char_data = iobuffer_data_pointer (internal_buffer);
          assert (*char_data == *data->in_escape);

          // Refill buffer garunteed that we have enough data to decode a
          // full escape, unless we are at the end of the file

          // Check we have the full escape
          int in_pending = iobuffer_data_size (internal_buffer);
          if (in_pending < (data->in_escape_len + 1))
            {
              // We are at the end of the file, so this data may look like an escape but isn't
              // Output it's sorry arse
              iobuffer_copy_limited (internal_buffer, buf,
                                     MAX (amount, in_pending));
              return INPUT_OK;
            }

          int used;
          for (used = 0; used < data->in_escape_len; used++)
            {
              if (char_data[used] != data->in_escape[used])
                {
                  // The data is not really an escape!  We can output this much
                  if (used < amount)
                    amount = used;
                  iobuffer_copy_limited (internal_buffer, buf, amount);
                  return INPUT_OK;
                }
            }

          // Decode the int after the escape
          uint32_t x = 0;
          for (used = data->in_escape_len; used < in_pending; used++)
            {
              if (!decode_uint32_byte
                  (&x, used - data->in_escape_len, char_data + used))
                {
                  used++;
                  break;
                }
            }

          assert (used <= in_pending);

          iobuffer_mark_taken (internal_buffer, used);
          // printf("took escape %d\n", used) ;

          // If 0, this is a mark, so set the mark bit and return
          if (0 == x)
            {
              data->in_pending_mark = true;
              DEBUGF ("Mark found: %d bytes", used);
              return INPUT_MARK;
            }
          // Otherwise, this is escape data, set the appropriate flags
          // and retry the input

          DEBUGF ("Escapes found: %d repeats, %d bytes", x, used);
          data->in_pending_escapes = x;
          return selfsyncio_input_limited (uncast_data, buf, amount);
        }
      else
        {
          // There is some non-escape data in the buffer, so output it
          iobuffer_copy_limited (internal_buffer, buf, escape_free_len);
          return INPUT_OK;
        }
    }
}

static input_err_t
selfsyncio_input_mark (void *uncast_data)
{
  struct selfsyncio_in_data *data = (struct selfsyncio_in_data *) uncast_data;
  if (!data)
    return INPUT_ERR_BAD;

  if (data->in_pending_mark)
    {
      // Flag is set, so a mark has been encountered during input
      data->in_pending_mark = false;
      DEBUG ("Mark skipped");
      if (data->in_pass_marks)
        return input_mark (data->in_base);

      return INPUT_OK;
    }

  if (data->in_pending_escapes || data->in_escape_next)
    {
      // If there are escapes pending, there cannot be a mark next
      DEBUG ("No mark - escapes pending");
      return INPUT_ERR_NO_MARK;
    }

  iobuffer_t *internal_buffer = &(data->in_buffer);
  char *char_data = iobuffer_data_pointer (internal_buffer);

  int pending_in_buffer = iobuffer_data_size (internal_buffer),
    space_in_buffer = iobuffer_free_size (internal_buffer);

  if ((pending_in_buffer < (data->in_escape_len + UINT32_MAX_BYTES)))
    {
      input_err_t err = refill_buffer (data->in_base, internal_buffer,
                                       data->in_escape_len, pending_in_buffer,
                                       space_in_buffer);

      char_data = iobuffer_data_pointer (internal_buffer);
      pending_in_buffer = iobuffer_data_size (internal_buffer);
      if (INPUT_OK != err && (INPUT_EOF != err || pending_in_buffer == 0))
        {
          // printf("foo = %s\n", INPUT_ERR_NAME(err)) ;
          return BASE_IN_ERR (err);
        }
    }
  // Refill buffer garunteed that we have enough data to decode a
  // full escape, unless we are at the end of the file

  // Check we have the full escape
  int in_pending = iobuffer_data_size (internal_buffer);
  if (in_pending < data->in_escape_len)
    {
      // Not enough data left in the file for there to be an escape
      return INPUT_ERR_NO_MARK;
    }

  int used;
  for (used = 0; used < data->in_escape_len; used++)
    {
      if (char_data[used] != data->in_escape[used])
        {
          // The data is not really an escape!
          return INPUT_ERR_NO_MARK;
        }
    }

  // Decode the int after the escape
  uint32_t x = 0;
  for (used = data->in_escape_len; used < in_pending; used++)
    {
      if (!decode_uint32_byte
          (&x, used - data->in_escape_len, char_data + used))
        {
          used++;
          break;
        }
    }

  assert (used <= in_pending);

  if (0 != x)
    return INPUT_ERR_NO_MARK;

  iobuffer_mark_taken (internal_buffer, used);
  DEBUGF ("Mark found: %d bytes", used);
  // Wrong... this is why pass marks should only be used for stats
  if (data->in_pass_marks)
    return input_mark (data->in_base);
  return INPUT_OK;
}

static input_err_t
my_input_recover (void *uncast_data)
{
  struct selfsyncio_in_data *data = (struct selfsyncio_in_data *) uncast_data;
  if (!data)
    return INPUT_ERR_BAD;

  DEBUG ("Error recovery");

  return input_recover (data->in_base);
}

static input_err_t
selfsyncio_close_in (void *uncast_data)
{
  struct selfsyncio_in_data *data = (struct selfsyncio_in_data *) uncast_data;
  if (!data)
    return OUTPUT_ERR_BAD;
  return close_in (data->in_base);
}

static void
selfsyncio_release_in (void *uncast_data)
{
  assert (uncast_data);
  struct selfsyncio_in_data *data = (struct selfsyncio_in_data *) uncast_data;
  free (iobuffer_data_block (&(data->in_buffer)));
  if (data->in_own_base)
    release_in (data->in_base);

  free (data);
}

static in_stream_type_t selfsyncio_in_type = {
  .input_limited = selfsyncio_input_limited,
  .input_mark = selfsyncio_input_mark,
  .close_in = selfsyncio_close_in,
  .release_in = selfsyncio_release_in,
  .input_recover = my_input_recover
};

in_stream_t
selfsync_open_in (int escape_len, const char *escape, size_t buffer_size,
                  in_stream_t base_ins, bool pass_marks, bool own_base)
{
  struct selfsyncio_in_data *data =
    malloc (sizeof (struct selfsyncio_in_data));
  if (!data)
    return NULL;

  char *buffer_block = malloc (buffer_size);
  init_iobuffer_with (&(data->in_buffer), buffer_size, 0, buffer_block);

  data->in_base = base_ins;
  data->in_escape = escape;
  data->in_escape_len = escape_len;
  data->in_pending_escapes = 0;
  data->in_escape_next = 0;
  data->in_pending_mark = false;
  data->in_own_base = own_base;
  data->in_pass_marks = pass_marks;

  return open_in (&selfsyncio_in_type, data);
}
