/*  GNU Moe - My Own Editor
    Copyright (C) 2005, 2006 Antonio Diaz Diaz.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include <cctype>
#include <string>
#include <vector>

#include "buffer.h"
#include "iso_8859.h"
#include "regex.h"


namespace Regex {

class Set
  {
  int _parsed_len;
  bool in;
  std::string data;

public:
  Set( const std::string & regex ) throw();
  bool includes( const unsigned char ch ) const throw()
    { return ( _parsed_len && ( in == ( data.find( ch ) < data.size() ) ) ); }
  int parsed_len() const throw() { return _parsed_len; }
  };


Set::Set( const std::string & regex ) throw()
  : _parsed_len( 0 ), in( true )
  {
  if( regex.size() < 3 || regex[0] != '[' ) return;
  bool fail = true;
  unsigned int i = 1;
  if( regex[i] == '^' ) { ++i; in = false; }
  if( regex[i] == ']' ) data += regex[i++];

  for( ; i < regex.size(); ++i )
    {
    unsigned char ch1 = regex[i];
    if( ch1 == ']' ) { ++i; fail = false; break; }
    if( ch1 == '\\' )
      {
      int len, cht = ISO_8859::escape( std::string( regex, i + 1 ), &len );
      if( cht < 0 ) break;
      ch1 = cht; i += len;
      }
    if( i + 2 >= regex.size() || regex[i+1] != '-' || regex[i+2] == ']' )
      data += ch1;
    else
      {
      i += 2;
      unsigned char ch2 = regex[i];
      if( ch2 == '\\' )
        {
        int len, cht = ISO_8859::escape( std::string( regex, i + 1 ), &len );
        if( cht < 0 ) break;
        ch2 = cht; i += len;
        }
      for( int c = ch1; c <= ch2; ++c ) data += c;
      }
    }
  if( !fail ) _parsed_len = i;
  }


// Returns true if substring of 'buffer' beginning at 'p' matches 'regex'.
// If found, sets 'p' after found substring and returns in 'pieces' the
// text matching every \*, \? etc from the regular expresion.
//
bool match_regex( const Buffer & buffer, Point & p,
                  const std::string & regex,
                  std::vector< std::string > & pieces, bool icase ) throw()
  {
  if( regex.size() == 0 ) return true;
  Point p1 = p;
  unsigned int pieces_old_size = pieces.size();
  bool fail = false;

  for( unsigned int i = 0; i < regex.size() && !fail; ++i )
    {
    unsigned char ch1 = regex[i];
    if( ch1 != '\\' )			// normal (not escaped) char
      {
      int ch2 = buffer.pgetc( p1 );
      if( ch2 < 0 || ( !icase && ch1 != ch2 ) ||
          ( icase && ISO_8859::tolower( ch1 ) != ISO_8859::tolower( ch2 ) ) )
        { fail = true; break; }
      continue;
      }

    if( ++i >= regex.size() ) { fail = true; break; }
    ch1 = regex[i];
    switch( ch1 )	// ch1 is first (maybe only) of a escape secuence
      {
      case '^': if( p1 != buffer.bol( p1 ) || p1 == buffer.eof() ) fail = true; break;
      case '$': if( p1 != buffer.eol( p1 ) ) fail = true; break;
      case '<': if( !buffer.pisbow( p1 ) ) fail = true; break;
      case '>': if( !buffer.piseow( p1 ) ) fail = true; break;
      case '?': {
                int ch2 = buffer.pgetc( p1 );
                if( ch2 < 0 ) { fail = true; break; }
                pieces.push_back( std::string( 1, ch2 ) );
                } break;
      case '*': {			// Find shortest matching sequence
                const int ps = pieces.size();
                std::string piece;
                while( !match_regex( buffer, p1, std::string( regex, i+1 ), pieces, icase ) )
                  {
                  int ch2 = buffer.pgetc( p1 );
                  if( ch2 < 0 || ch2 == '\n' ) { fail = true; break; }
                  piece += ch2;
                  }
                if( !fail ) { pieces.insert( pieces.begin() + ps, piece ); i = regex.size(); }
                } break;
      case 'c': {
                Point begin = p1;
                if( !buffer.set_to_matching_delimiter( p1, true, true ) || !buffer.pseek( p1, 1 ) )
                  { fail = true; break; }
                std::string piece;
                buffer.to_string( begin, p1, piece );
                pieces.push_back( piece );
                } break;
      case '[': {
                Set set( std::string( regex, i ) );	// Set of characters to match
                int ch2 = buffer.pgetc( p1 );
                if( ch2 < 0 || !set.includes( ch2 ) ) { fail = true; break; }
                i += ( set.parsed_len() - 1 );
                pieces.push_back( std::string( 1, ch2 ) );
                } break;
      case '+': {
                if( ++i >= regex.size() ) { fail = true; break; }
                const int ps = pieces.size();
                std::string piece;	// Find shortest matching sequence
                ch1 = regex[i];
                if( ch1 == '\\' )
                  {
                  if( i + 1 < regex.size() && regex[i+1] == '[' )
                    {
                    Set set( std::string( regex, i + 1 ) );	// Set of characters to match
                    if( !set.parsed_len() ) { fail = true; break; }
                    i += set.parsed_len();
                    while( !match_regex( buffer, p1, std::string( regex, i + 1 ), pieces, icase ) )
                      {
                      int ch2 = buffer.pgetc( p1 );
                      if( ch2 < 0 || !set.includes( ch2 ) ) { fail = true; break; }
                      piece += ch2;
                      }
                    if( !fail ) { pieces.insert( pieces.begin() + ps, piece ); i = regex.size(); }
                    break;
                    }
                  int len, cht = ISO_8859::escape( std::string( regex, i + 1 ), &len );
                  if( cht < 0 ) { fail = true; break; }
                  ch1 = cht; i += len;
                  }
                while( !match_regex( buffer, p1, std::string( regex, i + 1 ), pieces, icase ) )
                  {
                  int ch2 = buffer.pgetc( p1 );
                  if( ch2 < 0 ||
                      ( icase && ISO_8859::tolower( ch1 ) != ISO_8859::tolower( ch2 ) ) ||
                      ( !icase && ch1 != ch2 ) ) { fail = true; break; }
                  piece += ch2;
                  }
                if( !fail ) { pieces.insert( pieces.begin() + ps, piece ); i = regex.size(); }
                } break;
      case 'w': {			// Find shortest matching sequence
                const int ps = pieces.size();
                std::string piece;
                while( !match_regex( buffer, p1, std::string( regex, i+1 ), pieces, icase ) )
                  {
                  int ch2 = buffer.pgetc( p1 );
                  if( ch2 < 0 || !std::isspace( ch2 ) ) { fail = true; break; }
                  piece += ch2;
                  }
                if( !fail ) { pieces.insert( pieces.begin() + ps, piece ); i = regex.size(); }
                } break;
      default: {
               int len, cht = ISO_8859::escape( std::string( regex, i ), &len );
               if( cht < 0 ) { fail = true; break; }
               ch1 = cht;
               int ch2 = buffer.pgetc( p1 );
               if( ch2 < 0 ||
                   ( icase && ISO_8859::tolower( ch1 ) != ISO_8859::tolower( ch2 ) ) ||
                   ( !icase && ch1 != ch2 ) ) { fail = true; break; }
               i += ( len - 1 );
               }
      }
    }
  if( !fail ) p = p1;
  else if( pieces_old_size != pieces.size() ) pieces.resize( pieces_old_size );
  return !fail;
  }

} // end namespace Regex


// Searches 'buffer' from 'p1' ('p1-1' if backward) for 'regex'.
// If found, sets 'p1' at the first char of, and 'p2' after the
// found substring and returns in 'pieces' the text matching every \*,
// \? etc from the regular expresion.
//
bool Regex::find( const Buffer & buffer, Point & p1, Point & p2,
                  const std::string & regex, std::vector< std::string > & pieces,
                  bool icase, bool backward ) throw()
  {
  if( regex.size() == 0 ) return true;
  if( pieces.size() ) pieces.clear();

  const int bs = regex.find('\\');
  if( bs > 0 )
    {
    const std::string header( regex, 0, bs );
    const bool same_size = ( header.size() == regex.size() );
    if( backward ) while( true )
      {
      if( !buffer.rfind_text( p1, header, icase ) ) return false;
      p2 = p1;
      if( same_size ) { buffer.pseek( p2, header.size() ); return true; }
      if( match_regex( buffer, p2, regex, pieces, icase ) ) return true;
      if( !buffer.pprev( p1 ) ) return false;
      }
    else while( true )
      {
      if( !buffer.find_text( p1, header, icase ) ) return false;
      p2 = p1; buffer.pseek( p1, -header.size() );
      if( same_size ) return true; else p2 = p1;
      if( match_regex( buffer, p2, regex, pieces, icase ) ) return true;
      if( !buffer.pnext( p1 ) || p1 == buffer.eof() ) return false;
      }
    }

  if( backward ) while( true )
    {
    if( !buffer.pprev( p1 ) ) return false; else p2 = p1;
    if( match_regex( buffer, p2, regex, pieces, icase ) ) return true;
    }
  else while( true )
    {
    p2 = p1;
    if( match_regex( buffer, p2, regex, pieces, icase ) ) return true;
    else if( !buffer.pnext( p1 ) || p1 == buffer.eof() ) return false;
    }
  }


// Deletes block at [p1,p2) and replaces it with the text produced from
// 'regex' and 'pieces'.
// If there is no error, returns true and sets 'p2' after the newly
// inserted text.
//
bool Regex::replace( Buffer & buffer, const Point & p1, Point & p2,
                     const std::string & regex,
                     const std::vector< std::string > & pieces ) throw()
  {
  const Basic_buffer matched( buffer, p1, p2 );
  Basic_buffer tmp;
  Point p = tmp.bof();
  bool fail = false;

  for( unsigned int i = 0; i < regex.size() && !fail; ++i )
    {
    unsigned char ch1 = regex[i];
    if( ch1 != '\\' )			// normal (not escaped) char
      { tmp.pputc( p, ch1 ); continue; }

    if( ++i >= regex.size() ) { fail = true; break; }
    ch1 = regex[i];	// ch1 is first (maybe only) of a escape secuence
    if( std::isdigit( ch1 ) )
      {
      const unsigned int n = ch1 - '0';
      if( pieces.size() > n )
        for( unsigned int j = 0; j < pieces[n].size(); ++j )
          tmp.pputc( p, pieces[n][j] );
      else fail = true;
      }
    else if( ch1 == '&' )
      {
      if( !matched.empty() )
        tmp.pputb( p, matched, matched.bof(), matched.eof()  );
      }
    else
      {
      int len, cht = ISO_8859::escape( std::string( regex, i ), &len );
      if( cht >= 0 ) { tmp.pputc( p, cht ); i += ( len - 1 ); }
      else fail = true;
      }
    }
  if( !fail && !buffer.replace( p1, p2, tmp, tmp.bof(), tmp.eof() ) )
    fail = true;
  return !fail;
  }


// Returns true if 'regex' matches 'name'
//
bool Regex::match_filename( const std::string & regex, const std::string & name ) throw()
  {
  if( name.size() == 0 || regex.size() == 0 ) return false;
  unsigned int i2 = 0;
  bool fail = false;

  for( unsigned int i1 = 0; i1 < regex.size() && !fail; ++i1 )
    {
    unsigned char ch1 = regex[i1];
    switch( ch1 )
      {
      case '?': if( ++i2 > name.size() ) fail = true; break;
      case '*': if( i1 + 1 >= regex.size() ) { i2 = name.size(); break; }
                if( regex[i1+1] == '*' ) break;
                for( ; i2 < name.size(); ++i2 )
                  if( match_filename( std::string( regex, i1+1 ), std::string( name, i2 ) ) )
                    break;
                if( i2 >= name.size() ) { fail = true; break; }
                i1 = regex.size(); i2 = name.size(); break;
      case '[': {
                if( i2 >= name.size() ) { fail = true; break; }
                unsigned char ch2 = name[i2++];
                Set set( std::string( regex, i1 ) );	// Set of characters to match
                if( set.includes( ch2 ) ) i1 += ( set.parsed_len() - 1 );
                else fail = true;
                } break;
      default: if( ch1 == '\\' )
                 {
                 int len, cht = ISO_8859::escape( std::string( regex, i1 + 1 ), &len );
                 if( cht < 0 ) { fail = true; break; }
                 ch1 = cht; i1 += ( len - 1 );
                 }
               if( i2 >= name.size() || ch1 != name[i2++] ) fail = true;
               break;
      }
    }
  if( !fail && i2 < name.size() ) fail = true;
  return !fail;
  }
