/* $Id: filestore_output.c 736 2006-06-13 18:45:39Z jim $
   teebu - An archiving tool
   Copyright (C) 2006 Jim Farrand

   This program is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the Free
   Software Foundation; either version 2 of the License, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   more details.

   You should have received a copy of the GNU General Public License along with
   this program; if not, write to the Free Software Foundation, Inc., 51
   Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */


#include <sys/stat.h>
#include <sys/types.h>
#include <dirent.h>
#include <assert.h>
#include <unistd.h>
#include <fcntl.h>

#include "datapairstore.h"
#include "logging.h"
#include "uidcache.h"
#include "md5sumio.h"
#include "sha1sumio.h"
#include "terminatedint.h"
#include "config.h"
#include "unixio.h"
#include "rng.h"
#include "source_checksum.h"

#include "filestore_common.h"
#include "filestore_output.h"

#define FILESTORE_VERSION (PACKAGE " " VERSION " " SOURCE_MD5_CHECKSUM)

// Compression, encryption, terminator
#define FLAG_BUFFER_LEN    3

struct filestore_out
{
  dps_out_t       out_dps;      // Underlying pair store
  filestore_id_t  out_id;       // ID of this archive
  iostat_t        *out_stats;   // Stats tracked here
  char            *out_md5sum;  // } Buffer for storing checksums
  char            *out_sha1sum; // } or NULL if checksum is not required
  uidcache_t      out_uidcache; // Cache for looking up usernames etc
};

void
init_filestore_out_params (filestore_out_params_t *params, out_stream_t out_stream)
{
  params->fop_out_stream = out_stream;
  params->fop_iostats = NULL;
  params->fop_label = NULL;
  params->fop_gpg_passphrase = NULL;
  params->fop_chunk_size = 1024 * 1024 * 32;
  params->fop_gzip = false;
  params->fop_bzip2 = false;
  params->fop_sha1sum = false;
  params->fop_md5sum = false;
  params->fop_ignore_header_error = false;
  params->fop_info_after_filters = false;
  params->fop_id_after_filters = false;
}

filestore_id_t
allocate_filestore_id()
{
  filestore_id_t r = 0;
  for (int i = 0; i < ID_LEN; i++)
    {
      r = r << 8;
      r = r | (random_long() & 0xFF) ;
    }
  return r;
}

static void
format_filestore_id (filestore_id_t id, char *buffer)
{
  // Place bytes into buffer, little endian format
  for (int i = ID_LEN-1; i >= 0; i--)
    {
      buffer[i] = (char)(id & 0xFF);
      id >>= 8;
    }
}

/* Setup Compression and encryption on a new filestore. */
static bool
setup_filters (filestore_out_params_t *params, filestore_out_t fso)
{
  char flags[FLAG_BUFFER_LEN];
  int flags_used = 0;

  // Create the flags string
  if (params->fop_gzip)
    flags[flags_used++] = GZIP_FILTER_FLAG;
  else if (params->fop_bzip2)
    flags[flags_used++] = BZIP2_FILTER_FLAG;

  if (params->fop_gpg_passphrase)
    flags[flags_used++] = GPG_FILTER_FLAG;

  assert(flags_used < FLAG_BUFFER_LEN);

  // Always output this tag, even if there are no filters.  It also servers as
  // a useful "end of meta-data" marker.

  flags[flags_used] = '\0';

  // Write the file metadata
  if (!output_small_pair_size (fso->out_dps,
                               TAG_TEXT (CHUNK_FILTER_TAG),
                               TAG_LEN (CHUNK_FILTER_TAG),
                               flags, strlen(flags)))
    {
      release_filestore_out (fso);
      LOG (ERROR, "Failed to output chunk filter tag");
      return false;
    }

  if (!enable_output_filters(fso->out_dps, params->fop_gzip, params->fop_bzip2,
                             params->fop_gpg_passphrase))
    {
      LOG (ERROR,"Failed to enable output filters");
      return false;
    }

  if (params->fop_gpg_passphrase)
    {
      // Always write a second copy of the ID inside the archive, to prevent tampering
      char id_buffer[ID_LEN];
      format_filestore_id (fso->out_id, id_buffer);

      if (!output_small_pair_size (fso->out_dps,
                                   TAG_TEXT (ARCHIVE_ID_TAG),
                                   TAG_LEN (ARCHIVE_ID_TAG),
                                   id_buffer, ID_LEN))
        {
          LOG (ERROR, "Failed to output id");
          release_filestore_out (fso);
          return NULL;
        }

      if (params->fop_id_after_filters)
        {
          LOGF (NORMAL, "Archive ID: %" PRIx64, fso->out_id);
        }
    }

  return true;
}

/* Output a creation timestamp and label a filestore. */
static bool
output_filestore_info (filestore_out_params_t *params, filestore_out_t fso)
{
  char archive_date[FORMATTED_TIME_BUFFER_SIZE];
  format_time (time(NULL), archive_date, false);
  if (! output_small_pair_size (fso->out_dps, TAG_TEXT (ARCHIVE_TIMESTAMP_TAG),
                                TAG_LEN (ARCHIVE_TIMESTAMP_TAG), archive_date,
                                FORMATTED_TIME_DATA_SIZE))
    {
      LOG (ERROR, "Failed to output archive timestamp tag");
      return false;
    }

  if(params->fop_label)
    {
      if (!output_small_pair_size (fso->out_dps,
                                   TAG_TEXT (ARCHIVE_LABEL_TAG),
                                   TAG_LEN (ARCHIVE_LABEL_TAG),
                                   params->fop_label, strlen(params->fop_label)))
        {
          LOG (ERROR, "Failed to output label");
          return false;
        }
    }

  return true;
}

filestore_out_t
open_filestore_out (filestore_out_params_t *params)
{

  filestore_out_t fso = malloc (sizeof (struct filestore_out));
  if (!fso)
    return NULL;                // out of memory!

  fso->out_id = allocate_filestore_id();

  if (params->fop_md5sum)
    {
      fso->out_md5sum = malloc (MD5_SUM_LEN);
      if (!fso->out_md5sum)
        {
          release_filestore_out (fso);
          return NULL;
        }

    }
  else
    {
      fso->out_md5sum = NULL;
    }

  if (params->fop_sha1sum)
    {
      fso->out_sha1sum = malloc (SHA1_SUM_LEN);
      if (!fso->out_sha1sum)
        {
          release_filestore_out (fso);
          return NULL;
        }
    }
  else
    {
      fso->out_sha1sum = NULL;
    }


  fso->out_uidcache = create_uidcache ();
  if (!fso->out_uidcache)
    {
      release_filestore_out (fso);
      return NULL;              // couldn't create idcache!
    }

  fso->out_dps = open_dps_out (params->fop_out_stream, params->fop_chunk_size);
  if (!fso->out_dps)
    {
      release_filestore_out (fso);
      return NULL;              // couldn't open datapairstore!
    }

  fso->out_stats = params->fop_iostats;

  // Output start of archive marker
  if (!output_small_pair_size (fso->out_dps,
                               TAG_TEXT (TEEBU_FILESTORE_TAG),
                               TAG_LEN (TEEBU_FILESTORE_TAG),
                               FILESTORE_VERSION, strlen(FILESTORE_VERSION)))
    {
      release_filestore_out (fso);
      LOG (ERROR, "Failed to output filestore tag");
      return NULL;
    }

  if(!params->fop_info_after_filters)
    if (!output_filestore_info (params, fso))
      {
        release_filestore_out (fso);
        LOG (ERROR, "Failed to output archive timestamp tag");
        return NULL;
      }

  // FIXME: Do stats on these failures

  if (!params->fop_id_after_filters)
    {
      char id_buffer[ID_LEN];
      format_filestore_id (fso->out_id, id_buffer);

      if (!output_small_pair_size (fso->out_dps,
                                   TAG_TEXT (ARCHIVE_ID_TAG),
                                   TAG_LEN (ARCHIVE_ID_TAG),
                                   id_buffer, ID_LEN))
        {
          release_filestore_out (fso);
          LOG (ERROR, "Failed to output id");
          return NULL;
        }
      LOGF (NORMAL, "Archive ID: %" PRIx64, fso->out_id);
    }

  if (!setup_filters(params, fso))
    {
      release_filestore_out (fso);
      LOG (ERROR, "Failed to setup filters");
      return NULL;
    }

  if(params->fop_info_after_filters)
    if (!output_filestore_info (params, fso))
      {
        release_filestore_out (fso);
        return NULL;
      }


  return fso;
}

bool
close_filestore_out (filestore_out_t fso)
{
  return close_dps_out (fso->out_dps);
}

/* Output times, ownership and optionally permissions */
static bool
output_file_info (filestore_out_t fso, path_t path, struct stat *input_stat,
                  bool permissions, bool times)
{
  const char *paths = path_str (path);
  // File permissions
  if (permissions)
    {
      mode_t perms = input_stat->st_mode & SAVE_PERMS;
      char file_mode_buffer[UINT32_MAX_BYTES];
      size_t file_mode_size = encode_uint32 (file_mode_buffer, perms);
      if (!output_small_pair_size (fso->out_dps,
                                   TAG_TEXT (UNIX_PERM_TAG),
                                   TAG_LEN (UNIX_PERM_TAG),
                                   file_mode_buffer, file_mode_size))
        {
          LOG (ERROR, "Failed to output file permissions");
          return false;
        }
    }

  if (times)
    {
      // Creation time
      char creation_time_buffer[FORMATTED_TIME_DATA_SIZE] ;
      format_time(input_stat->st_ctime, creation_time_buffer, false) ;
      if (!output_small_pair_size (fso->out_dps,
                                  TAG_TEXT (CREATION_TIME_TAG),
                                  TAG_LEN (CREATION_TIME_TAG),
                                  creation_time_buffer, FORMATTED_TIME_DATA_SIZE))
        {
          LOG (ERROR, "Failed to output creation time");
          return false;
        }

      LOGF(DEBUG, "Output creation time: %s", creation_time_buffer) ;

      // Modification time
      char modification_time_buffer[FORMATTED_TIME_DATA_SIZE] ;
      format_time(input_stat->st_mtime, modification_time_buffer, false) ;
      if (!output_small_pair_size (fso->out_dps,
                                  TAG_TEXT (MODIFICATION_TIME_TAG),
                                  TAG_LEN (MODIFICATION_TIME_TAG),
                                  modification_time_buffer, FORMATTED_TIME_DATA_SIZE))
        {
          LOG (ERROR, "Failed to output modification time");
          return false;
        }

      LOGF(DEBUG, "Output modification time: %s", modification_time_buffer) ;
    }

  // Owner
  {
    const char *username = lookup_username_from_uid (fso->out_uidcache,
                                                     input_stat->st_uid);

    if (!username)
      {
        LOGF (VERBOSE, "Couldn't lookup username for: %lu",
              (unsigned long)input_stat->st_uid);
      }
    else
      {
        // Write the file owner
        if (!output_small_pair_size (fso->out_dps,
                                     TAG_TEXT (FILE_OWNER_TAG),
                                     TAG_LEN (FILE_OWNER_TAG),
                                     username, strlen(username)))
          {
            LOG (ERROR, "Failed to output file owner");
            return false;
          }
        LOGF (DEBUG, "Wrote file owner: %s", username);
      }

  }

  // Group
  {
    const char *groupname = lookup_groupname_from_gid (fso->out_uidcache,
                                                       input_stat->st_gid);

    if (!groupname)
      {
        LOGF (VERBOSE, "Couldn't lookup groupname for: %lu",
              (unsigned long)input_stat->st_gid);
      }
    else
      {
        // Write the file group
        if (!output_small_pair_size (fso->out_dps,
                                     TAG_TEXT (FILE_GROUP_TAG),
                                     TAG_LEN (FILE_GROUP_TAG),
                                     groupname, strlen(groupname)))
          {
            LOG (ERROR, "Failed to output file group");
            return false;
          }
        LOGF (DEBUG, "Wrote file group: %s", groupname);
      }

  }

  // Path
  unsigned skip = 0;
  while ('/' == paths[skip])
    skip++;

  if (!output_small_pair_size (fso->out_dps,
                               TAG_TEXT (PATH_TAG),
                               TAG_LEN (PATH_TAG),
                               paths + skip, strlen (paths) - skip))
    {
      return false;
    }

  return true;
}

// TODO: Pass stats down into this function, as the return code is not specific
// enough to do sensible stats.
bool
filestore_output_file (filestore_out_t fso, path_t path, filestore_stats_t *stats)
{
  assert (fso);
  const char *paths = path_str (path);

  if (fso->out_stats)
      merge_stats_path (fso->out_stats, path);

  LOGF (VERBOSE, "Writing:\t%s", paths);

  int input_fd = open (paths, O_RDONLY);
  if (-1 == input_fd)
    {
      LOGF (ERROR, "Error: Couldn't open file: %s", paths);
      STAT_INCR (stats, STAT_INPUT_ERROR_OPENING_FILE);
      return true;              // Not fatal, as we can continute
    }

  struct stat input_stat;
  if (-1 == fstat (input_fd, &input_stat))
    {
      close (input_fd);
      LOGF (ERROR, "Error: Couldn't stat file: %s", paths);
      STAT_INCR (stats, STAT_INPUT_ERROR_STATING_FILE);
      return true;              // Not fatal, as we can continute
    }

  // FileType: This tag is always first for files
  if (!output_small_pair_size (fso->out_dps,
                               TAG_TEXT (FILETYPE_TAG),
                               TAG_LEN (FILETYPE_TAG), &regfiletype, 1))
    {
      STAT_INCR (stats, STAT_ARCHIVE_OUTPUT_ERROR);
      close (input_fd);
      return false;
    }

  if (!output_file_info (fso, path, &input_stat, true, true))
    {
      STAT_INCR (stats, STAT_ARCHIVE_OUTPUT_ERROR);
      close (input_fd);
      return false;
    }

  in_stream_t data_in = unixio_open_in_fd (input_fd);
  if (!data_in)
    {
      STAT_INCR (stats, STAT_INPUT_ERROR_OPENING_FILE);
      close (input_fd);
      return false;
    }

  if (fso->out_md5sum)
    {
      in_stream_t md5sum_in =
        md5sumio_open_in (data_in, true, fso->out_md5sum);
      if (!md5sum_in)
        {
          STAT_INCR (stats, STAT_INPUT_ERROR_OPENING_FILE);
          close_and_release_in (data_in);
          return false;
        }
      data_in = md5sum_in;
    }

  if (fso->out_sha1sum)
    {
      in_stream_t sha1sum_in =
        sha1sumio_open_in (data_in, true, fso->out_sha1sum);
      if (!sha1sum_in)
        {
          STAT_INCR (stats, STAT_INPUT_ERROR_OPENING_FILE);
          close_and_release_in (data_in);
          return false;
        }
      data_in = sha1sum_in;
    }

  if (fso->out_stats)
    {
      in_stream_t stat_in =
        statio_open_in (data_in, fso->out_stats, false, true);
      if (!data_in)
        {
          STAT_INCR (stats, STAT_INPUT_ERROR_OPENING_FILE);
          close_and_release_in (data_in);
          return false;
        }
      data_in = stat_in;
    }

  // Filedata
  out_stream_t data_outs = output_big_pair_size (fso->out_dps,
                                                 TAG_TEXT (FILEDATA_TAG),
                                                 TAG_LEN (FILEDATA_TAG),
                                                 input_stat.st_size);

  if (!data_outs)
    {
      STAT_INCR (stats, STAT_ARCHIVE_OUTPUT_ERROR);
      close_in (data_in);
      release_in (data_in);
      return false;
    }

  input_err_t in_err;
  output_err_t out_err;
  if (!copy_stream (4096, NULL, &in_err, &out_err, data_in, data_outs))
    {
      close_in (data_in);
      release_in (data_in);
      output_end_big_pair (fso->out_dps);
      LOGF (ERROR,
            "Error: Couldn't copy file stream for %s: in_err = %s, out_err = %s",
            paths, INPUT_ERR_NAME (in_err), OUTPUT_ERR_NAME (out_err));

      if (INPUT_OK != in_err)
        STAT_INCR (stats, STAT_INPUT_ERROR);
      if (OUTPUT_OK != out_err)
        STAT_INCR (stats, STAT_ARCHIVE_OUTPUT_ERROR);

      return false;
    }

  if (!output_end_big_pair (fso->out_dps))
    {
      STAT_INCR (stats, STAT_ARCHIVE_OUTPUT_ERROR);
      close_and_release_in (data_in);
      return false;
    }

  if (INPUT_OK != close_and_release_in (data_in))
    {
      STAT_INCR (stats, STAT_INPUT_ERROR);
      return false;
    }

  if (fso->out_md5sum)
    {
      LOGF (VERBOSE, "Writing MD5:\t%s", fso->out_md5sum);
      if (!output_small_pair_size (fso->out_dps,
                                   TAG_TEXT (MD5SUM_TAG),
                                   TAG_LEN (MD5SUM_TAG),
                                   fso->out_md5sum, MD5_SUM_LEN - 1))
        {
          STAT_INCR (stats, STAT_ARCHIVE_OUTPUT_ERROR);
          return false;
        }
      STAT_INCR (stats, STAT_ARCHIVE_OUTPUT_FILE_CHECKSUM_MD5);
    }

  if (fso->out_sha1sum)
    {
      LOGF (VERBOSE, "Writing SHA1:\t%s", fso->out_sha1sum);
      if (!output_small_pair_size (fso->out_dps,
                                   TAG_TEXT (SHA1SUM_TAG),
                                   TAG_LEN (SHA1SUM_TAG),
                                   fso->out_sha1sum, SHA1_SUM_LEN - 1))
        {
          STAT_INCR (stats, STAT_ARCHIVE_OUTPUT_ERROR);
          return false;
        }
      STAT_INCR (stats, STAT_ARCHIVE_OUTPUT_FILE_CHECKSUM_SHA1);
    }

  if (!dps_output_mark (fso->out_dps))
    {
      STAT_INCR (stats, STAT_ARCHIVE_OUTPUT_ERROR);
      return false;
    }

  LOGF (DEBUG, "Written:\t%s", paths);
  STAT_INCR (stats, STAT_ARCHIVE_OUTPUT_FILE_OK);
  return true;
}


/* Output a link to the filestore. */
bool
filestore_output_link (filestore_out_t fso, path_t path, filestore_stats_t *stats)
{
  const char *paths = path_str (path);

  char *link = read_link (path);

  LOGF (VERBOSE, "Link:\t%s\t -> %s", path_str (path), link);

  // FileType
  if (!output_small_pair_size (fso->out_dps,
                               TAG_TEXT (FILETYPE_TAG),
                               TAG_LEN (FILETYPE_TAG), &softlinkfiletype, 1))
    {
      STAT_INCR(stats, STAT_ARCHIVE_OUTPUT_ERROR);
      free (link);
      return false;
    }

  struct stat input_stat;
  if (-1 == lstat (path_str (path), &input_stat))
    {
      STAT_INCR(stats, STAT_INPUT_ERROR_STATING_LINK);
      free (link);
      LOGF (ERROR, "Error: Couldn't stat link: %s", paths);
      return true;              // Not fatal, as we can continute
    }

  if (!output_file_info (fso, path, &input_stat, false, false))
    {
      STAT_INCR(stats, STAT_ARCHIVE_OUTPUT_ERROR);
      free (link);
      return false;
    }

  if (!output_small_pair_size (fso->out_dps,
                               TAG_TEXT (LINK_TAG),
                               TAG_LEN (LINK_TAG), link, strlen (link)))
    {
      STAT_INCR(stats, STAT_ARCHIVE_OUTPUT_ERROR);
      free (link);
      return false;
    }

  if (!dps_output_mark (fso->out_dps))
    {
      STAT_INCR(stats, STAT_ARCHIVE_OUTPUT_ERROR);
      free (link);
      return false;
    }

  LOGF (DEBUG, "Written:\t%s", paths);
  STAT_INCR(stats, STAT_ARCHIVE_OUTPUT_LINK_OK);

  free (link);
  return true;
}

/* return false iff fatal error */
bool
filestore_output_dir (filestore_out_t fso, filestore_stats_t * stats,
                      path_t path, bool recurse, void (*file_callback)
                      (void *, path_t),
                      void *callback_data)
{
  assert (fso);

  const char *paths = path_str (path);
  LOGF (VERBOSE, "Directory:\t%s", paths);

  DIR *dir = opendir (paths);
  if (!dir)
    {
      LOGF (ERROR, "Couldn't open directory: %s", paths);
      STAT_INCR (stats, STAT_INPUT_ERROR_OPENING_DIR);
      return true;
    }

  // FileType
  if (!output_small_pair_size (fso->out_dps,
                               TAG_TEXT (FILETYPE_TAG),
                               TAG_LEN (FILETYPE_TAG), &dirfiletype, 1))
    {
      closedir (dir);
      LOGF (ERROR, "Couldn't output directory filetype: %s", paths);
      STAT_INCR (stats, STAT_ARCHIVE_OUTPUT_ERROR);
      return false;
    }


  struct stat input_stat;
  if (-1 == lstat (paths, &input_stat))
    {
      closedir (dir);
      LOGF (ERROR, "Error: Couldn't stat directory: %s", paths);
      STAT_INCR (stats, STAT_INPUT_ERROR_STATING_DIR);

      return true;              // Not fatal, as we can continute
    }

  if (!output_file_info (fso, path, &input_stat, true, true))
    {
      closedir (dir);
      STAT_INCR (stats, STAT_INPUT_ERROR);
      return false;
    }

  bool ret = true;
  if (recurse)
    {
      struct dirent *dirent;
      while ((dirent = readdir (dir)) && true == ret)
        {
          if (0 == strcmp (".", dirent->d_name)
              || 0 == strcmp ("..", dirent->d_name))
            {
              DEBUGF ("Skipping:\t%s", dirent->d_name);
              continue;
            }

          path_t new_path = copy_path (path);
          path_add (new_path, dirent->d_name);
          ret =
            filestore_output_path (fso, stats, new_path, true, file_callback,
                                   callback_data);
          release_path (new_path);
        }
    }

  if (-1 == closedir (dir))
    {
      LOGF (WARNING, "Couldn't close directory: %s", paths);
      STAT_INCR (stats, STAT_INPUT_ERROR);
    }

  if (ret)
    {
      STAT_INCR (stats, STAT_ARCHIVE_OUTPUT_DIR_OK);
    }
  return ret;
}

/* increments stats for each file.  return false iff fatal error. */
bool
filestore_output_path (filestore_out_t fso, filestore_stats_t * stats,
                       path_t path, bool recurse, 
                       void (*file_callback) (void *, path_t),
                       void *callback_data)
{
  assert (fso);

  const char *paths = path_str (path);
  DEBUGF ("Processing:\t%s", paths);

  struct stat file_stats;
  if (-1 == lstat (paths, &file_stats))
    {
      LOGF (WARNING, "Couldn't stat:\t%s", paths);
      STAT_INCR (stats, STAT_INPUT_ERROR_STATING_FILE);
      return true;
    }

  if (S_ISREG (file_stats.st_mode))
    {
      bool ret = filestore_output_file (fso, path, stats);
      if (file_callback)
        (*file_callback) (callback_data, path);
      return ret;
    }
  else if (S_ISDIR (file_stats.st_mode))
    {
      return filestore_output_dir (fso, stats, path, recurse, file_callback,
                                   callback_data);
    }
  else if (S_ISLNK (file_stats.st_mode))
    {
      bool ret = filestore_output_link (fso, path, stats);
      if (file_callback)
        (*file_callback) (callback_data, path);
      return ret;
    }
  else
    {
      LOGF (WARNING, "Skipping unhandled file file:\t%s", paths);
      STAT_INCR (stats,  STAT_ARCHIVE_OUTPUT_SKIP_UNHANDLED);
      return true;
    }
}

void
release_filestore_out (filestore_out_t fso)
{
  if (fso->out_dps)
      release_dps_out (fso->out_dps);

  if (fso->out_md5sum)
    free (fso->out_md5sum);
  if (fso->out_sha1sum)
    free (fso->out_sha1sum);

  if(fso->out_uidcache)
    release_uidcache (fso->out_uidcache);

  free (fso);
}

