/*
    humanunzip - restores files compressed by humanzip
    Copyright (C) 2007 Matthew Strait

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
*/
using namespace std;

#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <sys/stat.h>
#include "humanzip.h"

static int keeporig = 0; // keep the input files?
static int tostdout = 0; // write to stdout?
static int overwrite = 0; // overwrite existing files?

static void handle_cmdline(vector<string> & filenames, int argc, char ** argv)
{
  int done = 0;
  const char * opts = "fckf:h?";
  while(!done){
    char c;
    switch(c = getopt(argc, argv, opts))
    {
      case -1:
        done = 1;
        break;
      case 'f':
        overwrite = 1;
        break;
      case 'c':
        keeporig = 1;
        tostdout = 1;
        break;
      case 'k':
        keeporig = 1;
        break;
      case 'h':
      case '?':
      default:
        cerr << "humanunzip v" << HUMANVERSION << " © 2007 Matthew Strait\n"
        << "humanunzip comes with ABSOLUTELY NO WARRANTY. "
        << "This is free software\n"        
        << "and you may redistribute it under the terms of the GPLv2.\n"
        << "\n"
        << "Syntax: humanunzip file.hz\n"
        << "\n"
        << "If no file is specified, humanunzip reads from stdin.\n"
        << "The uncompressed version is written to stdout. Options:\n"
        << "\n"
        << "-k\tKeep the compressed files instead of deleting them.\n"
        << "-c\tSend output to stdout and keep compresed files.\n"
        << "-f\tForce overwriting of existing output files.\n"
        << "-h\tPrint this help and exit.\n"
        << "\n";
        exit(1);
        break;
    }
  }

  for(int i = optind; i < argc; i++)
    filenames.push_back(argv[i]);

  if(filenames.size() == 0)
    filenames.push_back("/dev/stdin");
}

static int invalidkey(const string & line, int pos)
{
  cerr << "Got invalid key line at byte " << pos << ": " << line << endl;
  return -1;
}

static string convertlinebreaks(const string & s)
{
  string answer;
  for(unsigned int i = 0; i < s.size(); i++){
    if(i < s.size()-1 && s.substr(i, 2) == "¶"){
      answer += '\n'; 
      i++; 
    }
    else if(i < s.size()-2 && s.substr(i, 3) == "⁋"){
      answer += '\r';
      i += 2;
    }
    else answer += s[i];
  }
  return answer;
}

static int parsekeyline(searchreplace & sr, const string & line)
{
  unsigned int i = 0;

  while(1){
    if(i >= line.size() || (line[i] > 0 && line[i] != '/') || 
       sr.replacelower.size() > 4) return invalidkey(line, i);
    if(line[i] == '/'){ i++; break; }
    sr.replacelower += line[i];
    i++;
  }

  while(1){
    if(i >= line.size() || (line[i] > 0 && line[i] != ' ') || 
       sr.replacelower.size()>4) invalidkey(line, i);
    if(line[i] == ' '){ i++; break; }
    sr.replaceupper += line[i];
    i++;
  }

  if(i>=line.size() || line[i] != '-') return invalidkey(line, i);
  i++;  
  if(i>=line.size() || line[i] != ' ') return invalidkey(line, i);
  i++;

  while(i < line.size()){
    sr.search += line[i];
    i++;
  }

  if(sr.replacelower.size() < 2 || sr.replaceupper.size() < 2 ||
     sr.search.size()       < 2) return invalidkey(line, -1);

  sr.search = convertlinebreaks(sr.search);

  return 1;
}

// Returns 0 on sucess, -1 on error.  Sends back the last line that caused
// an error, if it was expected to be a key line.
static int getrt(string & errorline, vector<searchreplace> & rt,
  ifstream & infile, const string & filename)
{
  int returnvalue = 0;
  errorline = "";
  string line;

  getline(infile, line);
  if(line != "ĦŨӍĄŅŽȈƤǷΈÐ"){
    // Maybe the user removed the magic because he didn't like how it looked
    cerr << filename << " doesn't look like it is humanzipped.\n"
            "I'll try unzipping it anyway, keeping the original.\n";
    keeporig = 1;
    returnvalue = -1;

    // try parsing the top line as a key line
    searchreplace sr;
    if(-1 == parsekeyline(sr, line)){
      cerr << "I'll try unzipping it anyway, keeping the original.\n";
      errorline = line;
    }
    else
      rt.push_back(sr);
  }

  // handle key lines
  while(getline(infile, line)){
    if(line == "") break; // end of table

    searchreplace sr;
    if(-1 == parsekeyline(sr, line)){
      cerr << "I'll try unzipping it anyway, keeping the original.\n";
      errorline = line;
      returnvalue = -1;
      keeporig = 1;
    }
    else
      rt.push_back(sr);
  }
  return returnvalue;
}

static string findsearch(string replace, const vector<searchreplace> & rt)
{
  for(unsigned int i = 0; i < rt.size(); i++){
    // Even though humanzip only writes lines with the first letter raised
    // let's do this symmetrically so that we can tolerate changes.
    if(replace == rt[i].replacelower) return lowerfirstletter(rt[i].search);
    if(replace == rt[i].replaceupper) return raisefirstletter(rt[i].search);
  }

  cerr << "Didn't find original string!\n";
  return replace;
}

static string getutf8(ifstream & infile, char c)
{
  string uchar;
  int width = 1;

  if     ((c & 0xe0) == 0xc0) width = 2;
  else if((c & 0xf0) == 0xe0) width = 3;
  else if((c & 0xf8) == 0xf0) width = 4;

  if(width == 1){
    cerr << "Got bad UTF-8 character!\n";
    return "";
  }
  for(int i = 0; i < width; i++){ 
    infile.get(c);
    uchar += c;
  }

  return uchar;
}

// takes, e.g. "hello.hz" and returns "hello"
static string mkfn(const string & filename)
{
  if(filename.substr(filename.size()-3, 3) == ".hz") 
    return filename.substr(0, filename.size()-3);
  else
    return filename + ".out";
}

// Returns 1 on sucess, -1 on error
static int doreplacement(const vector<searchreplace> & rt, ifstream & infile,
  ofstream & outfile)
{
  int returnvalue = 1;
  char c;
  while((c = infile.peek()) != EOF){
    if(c >= 0){
      infile.get(c);
      outfile << c;
    }
    else{
      string replace = getutf8(infile, c);
      string search = findsearch(replace, rt);
      if(search == "") returnvalue = -1;
      outfile << findsearch(replace, rt);
    }
  }
  return returnvalue;
}

// Checks if we're ok with writing out the uncompressed file
// returns 1 on sucess, 0 on failure
static int checkoutfile(const string & filename)
{
  if(tostdout) return 1; // stdout had better always be ok

  string outfilename = mkfn(filename);

  // if the file already exists and we're not willing to ovewrite it, fail
  struct stat buffer;
  if(!overwrite && (-1 != stat(outfilename.c_str(), &buffer))){
    cerr << "Output file " << outfilename << " already exists, skipping.\n";
    return 0;
  }

  // if the file does not exist, or it does and we're willing to overwrite
  // it, check if we can actually write to it
  ofstream outfile(outfilename.c_str());
  if(!outfile.is_open()){
    cerr << "Couldn't open " << outfilename << " for writing!\n";
    return 0;
  }
  else{
    outfile.close();
    return 1;
  }
}


// Returns 1 on failure, 0 on sucess
static int doit(const string & filename)
{
  int returnvalue = 0;

  if(!checkoutfile(filename)) return 1;

  ifstream infile(filename.c_str());
  if(!infile.is_open()){
    cerr << "Couldn't open " << filename << ".  Skipping...\n";
    return 1;
  }

  string outfilename;
  if(filename == "/dev/stdin" || tostdout) outfilename = "/dev/stdout";
  else outfilename = mkfn(filename);
  ofstream outfile(outfilename.c_str());
  if(!outfile.is_open()){
    cerr << "Couldn't open " << outfilename << " for writing\n";
    return 1;
  }

  vector<searchreplace> replacementtable;
  string errorline = "";
  if(-1 == getrt(errorline, replacementtable, infile, filename))
    returnvalue = 1;

  // Was expecting a key line, got this.  Maybe the blank line was deleted?
  if(returnvalue == 1 && errorline != "") outfile << errorline << endl;

  if(-1 == doreplacement(replacementtable, infile, outfile))
    returnvalue = 1;

  if(!keeporig) unlink(filename.c_str()); 
  return returnvalue;
}

int main(int argc, char ** argv)
{
  int nfailed = 0;
  vector<string> filenames;
  handle_cmdline(filenames, argc, argv);
  for(unsigned int i = 0; i < filenames.size(); i++)
    nfailed += doit(filenames[i]);
  return nfailed;
}
