/* $Id: datapairstore.c 718 2006-06-01 17:19:53Z jim $
   teebu - An archiving tool
   Copyright (C) 2006 Jim Farrand

   This program is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the Free
   Software Foundation; either version 2 of the License, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   more details.

   You should have received a copy of the GNU General Public License along with
   this program; if not, write to the Free Software Foundation, Inc., 51
   Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */


#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <limits.h>

#include "logging.h"
#include "size.h"
#include "terminatedint.h"
#include "datapairstore.h"
#include "fixedlenio.h"
#include "selfsyncio.h"
#include "gpgio.h"
#include "gzipio.h"
#include "bzip2io.h"
#include "nomarkio.h"
#include "remarkio.h"

#define ESCAPE_SEQ "*#"
#define ESCAPE_LEN 2

/**
 * Limitation: Although the format itself supports data of any size, the
 * current implementation is limited to 2^64 bytes of data.
 */

struct dps_out
{
  /* Selfsyncio stream */
  out_stream_t out_base;
  /* out_base with chunk filters. */
  out_stream_t out_filtered;
  /* out filtered with remarker */
  out_stream_t out_main;
  /* A stream handle opened to write a large chunk.  NULL if no handle is
   * opened. */
  out_stream_t out_data;
  off_t out_chunk_size;
};

dps_out_t
open_dps_out (out_stream_t out_base, off_t chunk_size)
{
  dps_out_t dps = malloc (sizeof (struct dps_out));
  if (!dps)
    MEMFAILED ();

  dps->out_base = selfsync_open_out (ESCAPE_LEN, ESCAPE_SEQ, out_base, true, true);
  if (!dps->out_base)
    {
      free (dps);
      LOG (ERROR, "Couldn't open selfsync stream");
      return NULL;
    }

  // These are the same until compression etc enabled
  dps->out_filtered = dps->out_base;

  dps->out_chunk_size = chunk_size;
  dps->out_main = remarkio_open_out (dps->out_filtered, dps->out_chunk_size, false);
  if (!dps->out_main)
    {
      LOG (ERROR, "Couldn't open remark stream");
      return false;
    }

  dps->out_data = NULL;

  return dps;
}

bool
close_dps_out (dps_out_t dps)
{
  return OUTPUT_OK == close_out (dps->out_main);
}

void
release_dps_out (dps_out_t dps)
{
  release_out (dps->out_main);
  if (!dps)
    free (dps);
}

bool
enable_output_filters (dps_out_t dps, bool gzip, bool bzip2,
                       const char *gpg_passphrase_path)
{
  assert (!dps->out_data);

  flush (dps->out_filtered);
  output_mark (dps->out_base);

  dps->out_filtered = dps->out_base;

  if(gpg_passphrase_path)
    {
      out_stream_t gpgio_outs = gpgio_open_out (dps->out_filtered, gpg_passphrase_path, true);
      if (!gpgio_outs)
        {
          LOG (ERROR, "Couldn't open gpg stream");
          return false;
        }

      LOG (VERBOSE, "Enabling gpg encryption");
      dps->out_filtered = gpgio_outs;
    }

  if (gzip)
    {
      out_stream_t gzipio_outs = gzipio_open_out (5, dps->out_filtered, true);
      if (!gzipio_outs)
        {
          LOG (ERROR, "Couldn't open gzip stream");
          return false;
        }

      LOG (VERBOSE, "Enabling gzip compression");
      dps->out_filtered = gzipio_outs;
    }

  if (bzip2)
    {
      out_stream_t bzip2io_outs = bzip2io_open_out (5, dps->out_filtered, true);
      if (!bzip2io_outs)
        {
          LOG (ERROR, "Couldn't open bzip2 stream");
          return false;
        }

      LOG (VERBOSE, "Enabling bzip2 compression");
      dps->out_filtered = bzip2io_outs;
    }

  dps->out_main = remarkio_open_out (dps->out_filtered, dps->out_chunk_size, false);
  if (!dps->out_main)
    {
        LOG (ERROR, "Couldn't open remark stream");
        return false;
    }

  return true;
}

static bool
output_terminatedint (out_stream_t outs, uint64_t x)
{
  char buf[UINT64_MAX_BYTES];

  int optr = 0;
  while (encode_uint64_byte (&buf[optr++], &x))
    ; // Do nothing
  // x has changed

  iobuffer_t iobuf;
  init_iobuffer_with (&iobuf, optr, optr, buf);
  return OUTPUT_OK == output_all (outs, &iobuf);
}

static bool
output_pair_name_size (dps_out_t dps, const char *name, size_t name_len)
{
  assert (name_len > 0);
  iobuffer_t iobuf;

  if (!output_terminatedint (dps->out_main, name_len))
    {
      LOG (ERROR,
           "Error: Failed to output name len (in output_small_pair_size)");
      return false;
    }

  if (name_len > 0)
    {
      init_iobuffer_with (&iobuf, name_len, name_len, (char *) name);
      if (OUTPUT_OK != output_all (dps->out_main, &iobuf))
        {
          LOG (ERROR,
               "Error: Failed to output name (in output_small_pair_size)");
          return false;
        }
    }

  return true;
}

bool
output_small_pair_size (dps_out_t dps,
                        const char *name, size_t name_len,
                        const char *data, size_t data_len)
{
  if (dps->out_data)
    return false;

  if (!output_pair_name_size (dps, name, name_len))
    return false;

  DEBUGF ("Outputting small pair: %.10s (%zu) / %zubytes", name, name_len,
          data_len);

  iobuffer_t iobuf;
  if (!output_terminatedint (dps->out_main, data_len))
    {
      LOG (ERROR,
           "Error: Failed to output data len (in output_small_pair_size)");
      return false;
    }

  if (data_len > 0)
    {
      init_iobuffer_with (&iobuf, data_len, data_len, (char *) data);
      if (OUTPUT_OK != output_all (dps->out_main, &iobuf))
        {
          LOG (ERROR,
               "Error: Failed to output data (in output_small_pair_size)");
          return false;
        }
    }

  return true;
}

bool
dps_output_mark (dps_out_t dps)
{
  output_err_t err = output_mark (dps->out_main);
  if (OUTPUT_OK != err && OUTPUT_ERR_UNSUPPORTED != err)
    {
      LOG (ERROR, "Error: Failed to output mark");
      return false;
    }

  return true;
}

bool
output_small_pair (dps_out_t dps, const char *name, const char *data)
{
  assert (dps);
  assert (name);
  assert (data);

  const size_t name_len = strlen (name), data_len = strlen (data);

  return output_small_pair_size (dps, name, name_len, data, data_len);
}

out_stream_t
output_big_pair_size (dps_out_t dps, const char *name, size_t name_len,
                      size_t data_len)
{
  assert (dps);

  DEBUGF ("Outputting big pair: %.10s (%zu) / %zubytes", name, name_len,
          data_len);

  if (!output_pair_name_size (dps, name, name_len))
    return NULL;

  if (!output_terminatedint (dps->out_main, data_len))
    {
      LOG (ERROR, "Error: Failed to output data len (in output_big_pair)");
      return NULL;
    }

  return dps->out_data = fixedlenio_open_out (dps->out_main, data_len);
}

out_stream_t
output_big_pair (dps_out_t dps, const char *name, size_t data_len)
{
  assert (name);

  return output_big_pair_size (dps, name, strlen (name), data_len);
}

bool
output_end_big_pair (dps_out_t dps)
{
  assert (dps);

  if (!dps->out_data)
    return false;

  if (is_out_open (dps->out_data))
    if (OUTPUT_OK != close_out (dps->out_data))
      return false;

  release_out (dps->out_data);
  dps->out_data = NULL;

  return true;
}

struct dps_in
{
  in_stream_t in_base;
  in_stream_t in_main;
  bool in_name_read;
  size_t in_name_buffer_len;
  char *in_name_buffer;
  size_t in_data_size;
  in_stream_t in_data;
  size_t in_data_buffer_len;
  char *in_data_buffer;
};

dps_in_t
open_dps_in (in_stream_t in_base, size_t max_name_len, iostat_t *stats)
{
  dps_in_t dps = malloc (sizeof (struct dps_in));
  if (!dps)
    MEMFAILED ();

  dps->in_name_buffer = malloc (max_name_len + 1);
  if (!dps->in_name_buffer)
    MEMFAILED ();

  dps->in_base = in_base;
  {
    in_stream_t selfsync_in =
      selfsync_open_in (ESCAPE_LEN, ESCAPE_SEQ, 4096, dps->in_base,
                        false, true);
    if (!selfsync_in)
      {
        free (dps);
        LOG (ERROR, "Couldn't open selfsync stream");
        return NULL;
      }
    dps->in_base = selfsync_in;
  }

  // Output stats
  if (stats)
    {
      in_stream_t stat_ins = statio_open_in (dps->in_base, stats, true, true);
      if (!stat_ins)
        {
          LOG (ERROR, "Couldn't open stat stream");
          return false;
        }

      dps->in_base = stat_ins;
    }


  LOG (DEBUG, "Created selfsyncing input layer");

  // dps->in_main = nomarkio_open_in(dps->in_base, false) ;
  dps->in_main = dps->in_base;

  LOG(DEBUG, "Created nomark input layer") ;

  dps->in_name_buffer_len = max_name_len + 1;
  dps->in_data_buffer = NULL;
  dps->in_data_buffer_len = 0;
  dps->in_name_read = false;
  dps->in_data = NULL;

  return dps;
}

bool
close_dps_in (dps_in_t dps)
{
  return true;
}

void
release_dps_in (dps_in_t dps)
{
  assert (dps);
  free (dps->in_name_buffer);
  if (dps->in_data_buffer)
    free (dps->in_data_buffer);
  free (dps);
}

static bool
input_size (in_stream_t in_base, uint64_t * x, input_err_t * in_err)
{
  iobuffer_t iobuf;
  char buffer;
  init_iobuffer_with (&iobuf, 1, 0, &buffer);

  int pos = 0;
  char in_c;
  do
    {
      if (pos > UINT64_MAX_BYTES)
        return false;

      input_err_t err = input_all (in_base, &iobuf);
      if (INPUT_OK != err)
        {
          if (in_err)
            *in_err = err;
          return false;
        }

      in_c = *iobuffer_data_pointer (&iobuf);
      iobuffer_mark_taken (&iobuf, 1);
    }
  while (decode_uint64_byte (x, pos++, &in_c));

  return true;
}

bool
dps_input_pair (dps_in_t dps, input_err_t * in_err)
{
  assert (dps);
  if (in_err)
    *in_err = INPUT_OK;

  if (dps->in_data)
    return false;

  uint64_t len = 0;
  if (!input_size (dps->in_main, &len, in_err))
    return false;

  if (0 == len || len >= SIZE_MAX)
    return false;

  // Don't try and align for next pair.  If name is too long, maybe length is
  // also wrong, causing us to suck up potentially recoverable data
  if (len >= dps->in_name_buffer_len)
    return false;

  iobuffer_t iobuf;
  init_iobuffer_with (&iobuf, len, 0, dps->in_name_buffer);

  input_err_t err = input_all (dps->in_main, &iobuf);
  if (in_err)
    *in_err = err;
  if (INPUT_OK != err)
      return false;

  dps->in_name_buffer[len] = '\0';

  len = 0;
  if (!input_size (dps->in_main, &len, in_err))
    return false;

  dps->in_data_size = len;
  dps->in_name_read = true;

  DEBUGF ("Aligned pair: %.10s (%zu) / %zubytes", dps->in_name_buffer,
          strlen (dps->in_name_buffer), dps->in_data_size);

  return true;
}

char *
dps_input_pair_name (dps_in_t dps)
{
  assert (dps);

  if (!dps->in_name_read)
    return NULL;

  return dps->in_name_buffer;
}

size_t
dps_input_data_size (dps_in_t dps)
{
  assert (dps);
  assert (dps->in_name_read);
  return dps->in_data_size;
}

char *
dps_input_small_data (dps_in_t dps)
{
  assert (dps);

  if (!dps->in_name_read)
    return NULL;

  if (dps->in_data_size >= SIZE_MAX)    // Use >= to leave room for terminator
    return NULL;

  // Check buf has enough room for data
  if (dps->in_data_size >= dps->in_data_buffer_len)
    {                           // Use >= to leave room for terminator
      if (dps->in_data_buffer)
        free (dps->in_data_buffer);
      dps->in_data_buffer = malloc (dps->in_data_size + 1);
      if (!dps->in_data_buffer)
        {
          dps->in_data_buffer_len = 0;
          return NULL;
        }
      dps->in_data_buffer_len = dps->in_data_size + 1;
    }

  if (dps->in_data_size > 0)
    {
      iobuffer_t iobuf;
      init_iobuffer_with (&iobuf, dps->in_data_size, 0, dps->in_data_buffer);
      if (INPUT_OK != input_all (dps->in_main, &iobuf))
        return NULL;
    }

  dps->in_data_buffer[dps->in_data_size] = '\0';

  dps->in_name_read = false;

  DEBUG ("Input small data");
  return dps->in_data_buffer;
}

in_stream_t
dps_input_big_data (dps_in_t dps)
{
  assert (dps);

  if (!dps->in_name_read)
    return NULL;

  if (dps->in_data)
    return NULL;

  DEBUG ("Inputting big pair");

  return dps->in_data = fixedlenio_open_in (dps->in_main, dps->in_data_size);
}

bool
dps_finish_big_input (dps_in_t dps)
{
  assert (dps);

  if (!dps->in_data)
    return false;

  if (is_in_open (dps->in_data))
    if (INPUT_OK != close_in (dps->in_data))
      return false;

  release_in (dps->in_data);
  dps->in_data = NULL;
  dps->in_name_read = false;

  DEBUG ("Done inputting big pair");
  return true;
}

bool
dps_skip_data_in (dps_in_t dps)
{
  assert (dps);
  assert (dps->in_name_read);

  if (INPUT_OK != skip_in (dps->in_main, dps->in_data_size))
    return false;

  dps->in_name_read = false;

  DEBUG ("Skipped pair data");

  return true;
}

bool
dps_input_mark (dps_in_t dps)
{
  assert (dps);
  if (INPUT_OK != input_mark (dps->in_main))
    return false;

  DEBUG ("Input mark");

  return true;
}

bool
dps_skip_chunk (dps_in_t dps)
{
  assert (dps);
  assert (!dps->in_data);

  input_err_t err = skip_all (dps->in_main);
  if (INPUT_MARK != err)
    return false;

  return true;
}

bool
dps_input_recovery (dps_in_t dps)
{
  input_err_t err = input_recover (dps->in_main);
  if (INPUT_OK != err)
    {
      LOGF (WARNING, "Error recovery failed with: %s", INPUT_ERR_NAME (err));
      return false;
    }

  if (dps->in_data)
    {
      LOG (VERBOSE, "Closing partially read stream to recover from error");
      close_and_release_in (dps->in_data);
      dps->in_data = NULL;
    }
  dps->in_name_read = false;

  return true;
}

bool
enable_input_filters (dps_in_t dps, bool gzip, bool bzip2, bool gpg, const char *gpg_passphrase_path)
{
  assert (!dps->in_data);

  if (OUTPUT_OK != input_mark (dps->in_base))
    {
      LOG (ERROR, "Enabling filters but no mark in stream");
      return false;
    }

  if(gpg)
    {
      if (!gpg_passphrase_path)
        {
          LOG(FATAL, "Fatal: Archive is encrypted but no passphrase specified");
          return false;
        }

      in_stream_t gpgio_ins = gpgio_open_in (dps->in_base, gpg_passphrase_path, true);
      if (!gpgio_ins)
        {
          LOG (ERROR, "Couldn't open gpg stream");
          return false;
        }

      LOG (NORMAL, "Enabled gpg decryption");
      dps->in_base = gpgio_ins;
    }


  if (gzip)
    {
      in_stream_t gzipio_ins = gzipio_open_in (dps->in_base, true);
      if (!gzipio_ins)
        {
          LOG (ERROR, "Couldn't open gzip stream");
          return false;
        }

      LOG (NORMAL, "Enabled gzip decompression");
      dps->in_base = gzipio_ins;
    }

  if (bzip2)
    {
      in_stream_t bzip2io_ins = bzip2io_open_in (dps->in_base, true);
      if (!bzip2io_ins)
        {
          LOG (ERROR, "Couldn't open bzip2 stream");
          return false;
        }

      LOG (NORMAL, "Enabled bzip2 decompression");
      dps->in_base = bzip2io_ins;
    }

  dps->in_main = dps->in_base;

  return true;
}

