/*  Tarlz - Archiver with multimember lzip compression
    Copyright (C) 2013-2018 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#define _FILE_OFFSET_BITS 64

#include <algorithm>
#include <cerrno>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
#include <stdint.h>
#include <unistd.h>
#include <utime.h>
#include <sys/stat.h>
#include <sys/types.h>
#if defined(__GNU_LIBRARY__)
#include <sys/sysmacros.h>		// for makedev
#endif
#include <lzlib.h>

#include "arg_parser.h"
#include "lzip.h"
#include "tarlz.h"


namespace {

int gretval = 0;

bool make_path( const std::string & name )
  {
  const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
  unsigned end = name.size();	// first slash before last component

  while( end > 0 && name[end-1] == '/' ) --end;	// remove trailing slashes
  while( end > 0 && name[end-1] != '/' ) --end;	// remove last component
  while( end > 0 && name[end-1] == '/' ) --end;	// remove more slashes

  unsigned index = 0;
  while( index < end )
    {
    while( index < end && name[index] == '/' ) ++index;
    unsigned first = index;
    while( index < end && name[index] != '/' ) ++index;
    if( first < index )
      {
      const std::string partial( name, 0, index );
      struct stat st;
      if( stat( partial.c_str(), &st ) == 0 )
        { if( !S_ISDIR( st.st_mode ) ) return false; }
      else if( mkdir( partial.c_str(), mode ) != 0 )
        return false;
      }
    }
  return true;
  }


// Returns in buf the first rd bytes of the second lzip member or
// the first 512 bytes of the second tar member, and sets islz if lzip member
bool skip_first_member( const int infd, uint8_t * const buf,
                        int & rd, bool & islz )
  {
  while( true )
    {
    for( int i = 0; i < rd; ++i )
      if( buf[i] == 'L' && (*(Lzip_header *)( buf + i )).verify_prefix( rd - i ) )
        {
        const int ts = rd - i;				// tail size
        std::memmove( buf, buf + i, ts );
        if( ts >= (int)sizeof lzip_magic )
          { rd = ts; islz = true; return true; }
        int rd2 = readblock( infd, buf + ts, header_size - ts );
        if( rd2 != header_size - ts && errno )
          { show_error( "Error reading archive", errno ); return false; }
        if( ts + rd2 >= min_member_size &&
            (*(Lzip_header *)buf).verify_magic() )
          { rd = ts + rd2; islz = true; return true; }
        std::memmove( buf, buf + ts, rd2 );
        int rd3 = readblock( infd, buf + rd2, header_size - rd2 );
        if( rd3 != header_size - rd2 && errno )
          { show_error( "Error reading archive", errno ); return false; }
        rd = rd2 + rd3; i = -1;
        }
    if( rd < header_size ) return false;		// eof
    if( rd == header_size && verify_ustar_chksum( buf ) )
      { islz = false; return true; }
    rd = readblock( infd, buf, header_size );
    if( rd != header_size && errno )
      { show_error( "Error reading archive", errno ); return false; }
    }
  }


inline bool block_is_zero( const uint8_t * const buf, const int size )
  {
  for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false;
  return true;
  }


bool archive_read( const int infd, uint8_t * const buf, const int size )
  {
  static LZ_Decoder * decoder = 0;
  static bool first_call = true;
  static bool at_eof = false;

  if( first_call )					// check format
    {
    first_call = false;
    if( size != header_size )
      internal_error( "size != header_size on first call." );
    int rd = readblock( infd, buf, size );
    if( rd != size && errno )
      { show_error( "Error reading archive", errno ); return false; }
    bool islz =
      ( rd >= min_member_size && (*(Lzip_header *)buf).verify_magic() );
    const bool istar = ( rd == size && verify_ustar_chksum( buf ) );
    const bool iseof =
      ( !islz && !istar && rd == size && block_is_zero( buf, size ) );
    if( !islz && !istar && !iseof )
      {
      show_error( "This does not look like a tar archive." );
      show_error( "Skipping to next header." );
//      std::fprintf( stderr, "%07o\n", ustar_chksum( buf ) );
      gretval = 2;
      if( !skip_first_member( infd, buf, rd, islz ) ) return false;
      }
    if( !islz ) return true;				// uncompressed
    decoder = LZ_decompress_open();			// compressed
    if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
      { show_error( "Not enough memory." );
        LZ_decompress_close( decoder ); return false; }
    if( LZ_decompress_write( decoder, buf, rd ) != rd )
      internal_error( "library error (LZ_decompress_write)." );
    if( !archive_read( infd, buf, size ) ) return false;
    if( verify_ustar_chksum( buf ) || block_is_zero( buf, size ) ) return true;
    show_error( "This does not look like a tar archive." );
    show_error( "Skipping to next header." );
    gretval = 2;
    if( LZ_decompress_sync_to_member( decoder ) < 0 )
      internal_error( "library error (LZ_decompress_sync_to_member)." );
    }

  if( !decoder )					// uncompressed
    { if( readblock( infd, buf, size ) == size ) return true;
      show_error( "Archive ends unexpectedly." ); return false; }
  const int ibuf_size = 16384;
  uint8_t ibuf[ibuf_size];
  int sz = 0;
  while( sz < size )
    {
    if( !at_eof && LZ_decompress_write_size( decoder ) > 0 )
      {
      const int rsize = std::min( ibuf_size, LZ_decompress_write_size( decoder ) );
      const int rd = readblock( infd, ibuf, rsize );
      if( LZ_decompress_write( decoder, ibuf, rd ) != rd )
        internal_error( "library error (LZ_decompress_write)." );
      if( rd < rsize )
        {
        at_eof = true; LZ_decompress_finish( decoder );
        if( errno )
          { show_error( "Error reading archive", errno ); return false; }
        }
      }
    const int rd = LZ_decompress_read( decoder, buf + sz, size - sz );
    if( rd < 0 )
      {
      show_error( "Skipping to next header." );
      gretval = 2;
      if( LZ_decompress_sync_to_member( decoder ) < 0 )
        internal_error( "library error (LZ_decompress_sync_to_member)." );
      continue;
      }
    if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
     { LZ_decompress_close( decoder );
       show_error( "Archive ends unexpectedly." ); return false; }
    sz += rd;
    if( sz == size && LZ_decompress_finished( decoder ) == 1 &&
        LZ_decompress_close( decoder ) < 0 )
      { show_error( "LZ_decompress_close failed." ); return false; }
    }
  return true;
  }


const char * mode_string( const Tar_header header )
  {
  static char buf[11];
  const Typeflag typeflag = (Typeflag)header[typeflag_o];

  std::memcpy( buf, "----------", sizeof buf - 1 );
  switch( typeflag )
    {
    case tf_regular: break;
    case tf_link: buf[0] = 'h'; break;
    case tf_symlink: buf[0] = 'l'; break;
    case tf_chardev: buf[0] = 'c'; break;
    case tf_blockdev: buf[0] = 'b'; break;
    case tf_directory: buf[0] = 'd'; break;
    case tf_fifo: buf[0] = 'p'; break;
    case tf_hiperf: buf[0] = 'C'; break;
    default: buf[0] = '?';
    }
  const mode_t mode = strtoul( header + mode_o, 0, 8 );		// 12 bits
  const bool setuid = mode & S_ISUID;
  const bool setgid = mode & S_ISGID;
  const bool sticky = mode & S_ISVTX;
  if( mode & S_IRUSR ) buf[1] = 'r';
  if( mode & S_IWUSR ) buf[2] = 'w';
  if( mode & S_IXUSR ) buf[3] = setuid ? 's' : 'x';
  else if( setuid ) buf[3] = 'S';
  if( mode & S_IRGRP ) buf[4] = 'r';
  if( mode & S_IWGRP ) buf[5] = 'w';
  if( mode & S_IXGRP ) buf[6] = setgid ? 's' : 'x';
  else if( setgid ) buf[6] = 'S';
  if( mode & S_IROTH ) buf[7] = 'r';
  if( mode & S_IWOTH ) buf[8] = 'w';
  if( mode & S_IXOTH ) buf[9] = sticky ? 't' : 'x';
  else if( sticky ) buf[9] = 'T';
  return buf;
  }


const char * user_group_string( const Tar_header header )
  {
  enum { bufsize = uname_l + 1 + gname_l + 1 };
  static char buf[bufsize];

  if( header[uname_o] && header[gname_o] )
    snprintf( buf, bufsize, "%.32s/%.32s", header + uname_o, header + gname_o );
  else
    {
    const int uid = strtoul( header + uid_o, 0, 8 );
    const int gid = strtoul( header + gid_o, 0, 8 );
    snprintf( buf, bufsize, "%u/%u", uid, gid );
    }
  return buf;
  }


void show_member_name( const Extended & extended, const Tar_header header,
                       const int vlevel )
  {
  if( verbosity < vlevel ) return;
  if( verbosity > vlevel )
    {
    const time_t mtime = strtoull( header + mtime_o, 0, 8 );	// 33 bits
    const struct tm * const tm = localtime( &mtime );
    const Typeflag typeflag = (Typeflag)header[typeflag_o];
    const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
    const char * const link_string = !islink ? "" :
                         ( ( typeflag == tf_link ) ? " link to " : " -> " );
    std::printf( "%s %s %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n",
                 mode_string( header ), user_group_string( header ),
                 extended.size, 1900 + tm->tm_year, 1 + tm->tm_mon,
                 tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path.c_str(),
                 link_string, !islink ? "" : extended.linkpath.c_str() );
    }
  else std::printf( "%s\n", extended.path.c_str() );
  std::fflush( stdout );
  }


int list_member( const int infd, const Extended & extended,
                 const Tar_header header, const bool skip )
  {
  if( !skip ) show_member_name( extended, header, 0 );

  const unsigned bufsize = 32 * header_size;
  uint8_t buf[bufsize];
  unsigned long long rest = extended.size;
  const int rem = extended.size % header_size;
  const int padding = rem ? header_size - rem : 0;
  while( rest > 0 )
    {
    const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding;
    if( !archive_read( infd, buf, rsize ) ) return 2;
    if( rest < bufsize ) break;
    rest -= rsize;
    }
  return 0;
  }


bool contains_dotdot( const char * const filename )
  {
  for( int i = 0; filename[i]; ++i )
    if( filename[i] == '.' && filename[i+1] == '.' &&
        ( i == 0 || filename[i-1] == '/' ) &&
        ( filename[i+2] == 0 || filename[i+2] == '/' ) ) return true;
  return false;
  }


int extract_member( const int infd, const Extended & extended,
                    const Tar_header header )
  {
  const char * const filename = extended.path.c_str();
  if( contains_dotdot( filename ) )
    {
    show_file_error( filename, "Contains a '..' component, skipping." );
    return list_member( infd, extended, header, true );
    }
  const mode_t mode = strtoul( header + mode_o, 0, 8 );		// 12 bits
  const time_t mtime = strtoull( header + mtime_o, 0, 8 );	// 33 bits
  const Typeflag typeflag = (Typeflag)header[typeflag_o];
  const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
  int outfd = -1;

  show_member_name( extended, header, 1 );
  std::remove( filename );
  make_path( filename );
  switch( typeflag )
    {
    case tf_regular:
    case tf_hiperf:
      outfd = open_outstream( filename );
      if( outfd < 0 ) return 2;
      chmod( filename, mode );			// ignore errors
      break;
    case tf_link:
    case tf_symlink:
      {
      const char * const linkname = extended.linkpath.c_str();
/*      if( contains_dotdot( linkname ) )
        {
        show_file_error( filename,
          "Link destination contains a '..' component, skipping." );
        return list_member( infd, extended, header, false );
        }*/
      const bool hard = typeflag == tf_link;
      if( ( hard && link( linkname, filename ) != 0 ) ||
          ( !hard && symlink( linkname, filename ) != 0 ) )
        {
        if( verbosity >= 0 )
          std::fprintf( stderr, "Can't %slink file '%s' to '%s': %s.\n",
                        hard ? "" : "sym", linkname, filename,
                        std::strerror( errno ) );
        return 2;
        }
      } break;
    case tf_directory:
      if( mkdir( filename, mode ) != 0 && errno != EEXIST )
        {
        show_file_error( filename, "Can't create directory", errno );
        return 2;
        }
      break;
    case tf_chardev:
    case tf_blockdev:
      {
      const unsigned dev = makedev( strtoul( header + devmajor_o, 0, 8 ),
                                    strtoul( header + devminor_o, 0, 8 ) );
      const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode;
      if( mknod( filename, dmode, dev ) != 0 )
        {
        show_file_error( filename, "Can't create device node", errno );
        return 2;
        }
      break;
      }
    case tf_fifo:
      if( mkfifo( filename, mode ) != 0 && errno != EEXIST )
        {
        show_file_error( filename, "Can't create FIFO file", errno );
        return 2;
        }
      break;
    default:
      if( verbosity >= 0 )
        std::fprintf( stderr, "File type '%c' not supported for file '%s'.\n",
                      typeflag, filename );
      return 2;
    }

  const uid_t uid = (uid_t)strtoul( header + uid_o, 0, 8 );
  const gid_t gid = (gid_t)strtoul( header + gid_o, 0, 8 );
  if( !islink && chown( filename, uid, gid ) != 0 &&
      errno != EPERM && errno != EINVAL )
    {
    show_file_error( filename, "Can't change file owner", errno );
    return 2;
    }

  const unsigned bufsize = 32 * header_size;
  uint8_t buf[bufsize];
  unsigned long long rest = extended.size;
  const int rem = extended.size % header_size;
  const int padding = rem ? header_size - rem : 0;
  while( rest > 0 )
    {
    const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding;
    if( !archive_read( infd, buf, rsize ) )
      { if( outfd >= 0 ) { close( outfd ); std::remove( filename ); }
        return 2; }
    const int wsize = ( rest >= bufsize ) ? bufsize : rest;
    if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize )
      { show_file_error( filename, "Error writing file", errno ); return 2; }
    rest -= wsize;
    }
  if( outfd >= 0 && close( outfd ) != 0 )
    { show_file_error( filename, "Error closing file", errno ); return 2; }
  if( !islink )
    {
    struct utimbuf t;
    t.actime = mtime;
    t.modtime = mtime;
    utime( filename, &t );			// ignore errors
    }
  return 0;
  }


const char * remove_leading_slash( const char * const filename )
  {
  static bool first_post = true;
  const char * p = filename;

  while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p;
  if( p != filename && first_post )
    {
    first_post = false;
    std::string msg( "Removing leading '" );
    msg.append( filename, p - filename );
    msg += "' from member names.";
    show_error( msg.c_str() );
    }
  if( *p == 0 ) p = ".";
  return p;
  }


// return true if dir is a parent directory of name
bool compare_prefix_dir( const char * const dir, const char * const name )
  {
  int len = 0;
  while( dir[len] && dir[len] == name[len] ) ++len;
  return ( !dir[len] && len > 0 && ( dir[len-1] == '/' || name[len] == '/' ) );
  }


// compare two file names ignoring trailing slashes
bool compare_tslash( const char * const name1, const char * const name2 )
  {
  const char * p = name1;
  const char * q = name2;
  while( *p && *p == *q ) { ++p; ++q; }
  while( *p == '/' ) ++p;
  while( *q == '/' ) ++q;
  return ( !*p && !*q );
  }

} // end namespace


bool Extended::parse( const int infd, const Tar_header header,
                      const bool permissive )
  {
  const unsigned long long edsize = strtoull( header + size_o, 0, 8 );
  const unsigned long long bufsize = round_up( edsize );
  if( bufsize == 0 || edsize == 0 || edsize >= 1ULL << 33 )
    return false;				// overflow or no extended data
  char * const buf = new char[bufsize];		// extended records buffer
  if( !archive_read( infd, (uint8_t *)buf, bufsize ) ) goto error;
  for( unsigned long long pos = 0; pos < edsize; )	// parse records
    {
    char * tail;
    const unsigned long long rsize = strtoull( buf + pos, &tail, 10 );
    if( rsize == 0 || rsize > edsize - pos || tail[0] != ' ' ||
        buf[pos+rsize-1] != '\n' ) goto error;
    ++tail;	// point to keyword
    // length of (keyword + '=' + value) without the final newline
    const unsigned long long rest = ( buf + pos + rsize - 1 ) - tail;
    if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
      { if( path.size() && !permissive ) goto error;
        path.assign( tail + 5, rest - 5 ); }
    else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 )
      { if( linkpath.size() && !permissive ) goto error;
        linkpath.assign( tail + 9, rest - 9 ); }
    else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 )
      {
      if( size != 0 && !permissive ) goto error;
      size = 0;
      for( unsigned long long i = 5; i < rest; ++i )
        {
        if( tail[i] < '0' || tail[i] > '9' ) goto error;
        const unsigned long long prev = size;
        size = size * 10 + ( tail[i] - '0' );
        if( size < prev ) goto error;		// overflow
        }
      if( size < 1ULL << 33 ) goto error;	// size fits in ustar header
      }
    else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 )
      {
      if( crc_present && !permissive ) goto error;
      if( rsize != 22 ) goto error;
      char * t;
      const uint32_t stored_crc = strtoul( tail + 10, &t, 16 );
      if( t - tail - 10 != 8 || t[0] != '\n' ) goto error;
      const uint32_t computed_crc =
        crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize );
      crc_present = true;
      if( stored_crc != computed_crc ) goto error;
      }
    pos += rsize;
    }
  delete[] buf;
  return true;
error:
  delete[] buf;
  return false;
  }


int decode( const std::string & archive_name, const Arg_parser & parser,
            const int filenames, const bool listing, const bool missing_crc,
            const bool permissive )
  {
  const int infd = archive_name.size() ?
                   open_instream( archive_name ) : STDIN_FILENO;
  if( infd < 0 ) return 1;

  // execute -C options and mark filenames to be extracted or listed
  std::vector< bool > name_pending( parser.arguments(), false );
  for( int i = 0; i < parser.arguments(); ++i )
    {
    const int code = parser.code( i );
    if( code == 'C' && !listing )
      {
      const char * const dir = parser.argument( i ).c_str();
      if( chdir( dir ) != 0 )
        { show_file_error( dir, "Error changing working directory", errno );
          return 1; }
      }
    if( !code ) name_pending[i] = true;
    }

  Extended extended;		// metadata from extended records
  int retval = 0;
  bool prev_extended = false;	// prev header was extended
  bool skipping = false;
  while( true )			// process one member per iteration
    {
    uint8_t buf[header_size];
    if( !archive_read( infd, buf, header_size ) ) return 2;
    if( !verify_ustar_chksum( buf ) )
      {
      if( block_is_zero( buf, header_size ) ) break;
      gretval = 2;
      if( !skipping )
        { skipping = true; show_error( "Skipping to next header." ); }
      continue;
      }
    skipping = false;

    const char * const header = (const char *)buf;
    const Typeflag typeflag = (Typeflag)header[typeflag_o];
    if( typeflag == tf_extended )
      {
      if( prev_extended && !permissive )
        { show_error( "Format violation: consecutive extended headers found."
                      /*" Use --permissive."*/, 0, true ); return 2; }
      if( !extended.parse( infd, header, permissive ) )
        { show_error( "Error in extended records. Skipping to next header." );
          extended.reset(); gretval = 2; }
      else if( !extended.crc_present && missing_crc )
        { show_error( "Missing CRC in extended records.", 0, true ); return 2; }
      prev_extended = true;
      continue;
      }
    prev_extended = false;

    if( extended.linkpath.empty() )
      {
      for( int i = 0; i < linkname_l && header[linkname_o+i]; ++i )
        extended.linkpath += header[linkname_o+i];
      while( extended.linkpath.size() > 1 &&		// trailing '/'
             extended.linkpath[extended.linkpath.size()-1] == '/' )
        extended.linkpath.resize( extended.linkpath.size() - 1 );
      }

    if( extended.path.empty() )
      {
      char stored_name[prefix_l+1+name_l+1];
      int len = 0;
      while( len < prefix_l && header[prefix_o+len] )
        { stored_name[len] = header[prefix_o+len]; ++len; }
      if( len && header[name_o] ) stored_name[len++] = '/';
      for( int i = 0; i < name_l && header[name_o+i]; ++i )
        { stored_name[len] = header[name_o+i]; ++len; }
      while( len > 0 && stored_name[len-1] == '/' ) --len;	// trailing '/'
      stored_name[len] = 0;
      extended.path = remove_leading_slash( stored_name );
      }
    const char * const filename = extended.path.c_str();

    bool skip = filenames > 0;
    if( skip )
      for( int i = 0; i < parser.arguments(); ++i )
        if( parser.code( i ) == 0 &&
            ( compare_prefix_dir( parser.argument( i ).c_str(), filename ) ||
              compare_tslash( filename, parser.argument( i ).c_str() ) ) )
          { skip = false; name_pending[i] = false; break; }

    if( extended.size == 0 &&
        ( typeflag == tf_regular || typeflag == tf_hiperf ) )
      extended.size = strtoull( header + size_o, 0, 8 );

    if( listing || skip )
      retval = list_member( infd, extended, header, skip );
    else
      retval = extract_member( infd, extended, header );
    extended.reset();
    if( retval ) return retval;
    }

  for( int i = 0; i < parser.arguments(); ++i )
    if( parser.code( i ) == 0 && name_pending[i] )
      {
      show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
      if( gretval < 1 ) gretval = 1;
      }
  if( !retval && gretval )
    { show_error( "Exiting with failure status due to previous errors." );
      retval = gretval; }
  return retval;
  }
