/*  GNU OCRAD - Optical Character Recognition program
    Copyright (C) 2003 Antonio Diaz Diaz.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include <cstdio>
#include <list>
#include <vector>
#include "common.h"
#include "rectangle.h"
#include "block.h"
#include "character.h"
#include "profile.h"
#include "feats.h"
#include "iso_8859_1.h"


// First attempt at recognition without relying on context.
void Character::recognize1( int charbox_vcenter ) throw()
  {
  if( _block_list.size() == 1 ) recognize11( charbox_vcenter );
  else if( _block_list.size() == 2 ) recognize12( charbox_vcenter );
  else if( _block_list.size() == 3 ) recognize13();
  }


// Recognizes 1 block characters.
// 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghklmnopqrstuvwxyz
// #$&'()*+,-./<>@[\]^_`{|}~
void Character::recognize11( int charbox_vcenter ) throw()
  {
  const Block & b = _block_list.front();
  if( b.block_list().size() == 0 ) recognize110( charbox_vcenter );
  else if( b.block_list().size() == 1 ) recognize111();
  else if( b.block_list().size() == 2 ) recognize112();
  }


// Recognizes 1 block characters without holes.
// 12357CEFGHIJKLMNSTUVWXYZcfhklmnrstuvwxyz
// '()*+,-./<>@[\]^_`{|}~
void Character::recognize110( int charbox_vcenter ) throw()
  {
  const Block & b = _block_list.front();
  Features f( b );
  unsigned char ch = f.test_easy( charbox_vcenter );
  if( ch )
    {
    if( ch == '.' && b.width() > b.height() && b.v_includes( charbox_vcenter ) )
      { add_guess( ch, 1 ); add_guess( '-', 0 ); return; }
    add_guess( ch, 0 ); return;
    }

  ch = f.test_CEFIJLlT();   if( ch ) { add_guess( ch, 0 ); return; }
  ch = f.test_frst();       if( ch ) { add_guess( ch, 0 ); return; }
  ch = f.test_G();          if( ch ) { add_guess( ch, 0 ); return; }
  ch = f.test_c();          if( ch ) { add_guess( ch, 0 ); return; }
  ch = f.test_235Esz();     if( ch ) { add_guess( ch, 0 ); return; }
  ch = f.test_HKMNUuvwYy();
  if( ch == 'u' && f.lp.isvpit() )	// Looks for merged 'tr'
    {
    int col = b.seek_left( b.vcenter(), b.right() );
    if( col < b.hpos( 90 ) && !b.escape_top( b.vcenter(), col ) )
      {
      col = b.seek_left( b.vcenter(), col - 1, false );
      while( --col > b.hpos( 40 ) &&
             ( b.seek_top( b.vcenter(), col ) > b.top() ||
               f.hp[col-b.left()] > b.height() / 10 ) );
      if( col > b.hpos( 40 ) )
        { only_guess( 0, b.left() ); add_guess( 't', col );
        add_guess( 'r', b.right() ); return; }
      }
    }
  if( ch ) { add_guess( ch, 0 ); return; }
  if( f.bp.minima( b.height() / 8 ) == 2 )
    {
    ch = f.test_hknwx();
    if( ch == 'n' )	// Looks for '"' or merged 'rr' or 'fl'
      {
      if( b.bottom() <= charbox_vcenter ) { add_guess( '"', 0 ); return; }
      if( b.width() > b.height() )
        {
        int col = b.seek_left( b.vcenter(), b.right() );
        if( col < b.hpos( 90 ) && !b.escape_top( b.vcenter(), col ) )
          { only_guess( 0, b.left() ); add_guess( 'r', b.hcenter() );
          add_guess( 'r', b.right() ); return; }
        }
      int dmax = 0; bool bar = false;
      for( int row = b.vcenter(); row > b.vpos( 25 ); --row )
        {
        int d = b.hcenter() - b.seek_left( row, b.hcenter() );
        if( d > dmax ) dmax = d;
        else if( 2 * d < dmax && dmax > 2 ) bar = true;
        if( bar && Ocrad::similar( d, dmax, 25 ) )
          {
          int col, limit = b.seek_right( b.vcenter(), b.hcenter() );
          for( col = b.hcenter(); col <= limit; ++col )
            if( b.seek_bottom( b.vcenter(), col ) < b.bottom() ) break;
          if( b.left() < col && col < b.right() )
            { only_guess( 0, b.left() ); add_guess( 'f', col - 1 );
            add_guess( 'l', b.right() ); return; }
          }
        }
      }
    else if( ch == 'h' )	// Looks for merged 'rf'
      {
      if( b.seek_right( b.top() + 1, b.left() ) > b.hcenter() )
        {
        int col = 0, hmin = f.hp.range() + 1;
        for( int i = b.hpos(40); i <= b.hpos(60); ++i )
          if( f.hp[i-b.left()] < hmin )
            { hmin = f.hp[i-b.left()]; col = i; }
        if( col > b.left() && col < b.right() )
          { only_guess( 0, b.left() ); add_guess( 'r', col - 1 );
          add_guess( 'f', b.right() ); return; }
        }
      }
    else if( ch == 'k' )	// Looks for merged 'rt'
      {
      if( b.seek_right( b.top() + 1, b.left() ) > b.hcenter() )
        {
        only_guess( 0, b.left() ); add_guess( 'r', b.hcenter() );
        add_guess( 't', b.right() ); return;
        }
      }
    if( ch ) { add_guess( ch, 0 ); return; }
    }
  if( f.bp.minima() == 3 ) { add_guess( 'm', 0 ); return; }
  if( f.bp.minima() == 4 )
    {
    int col = b.seek_right( b.bottom() - 1, b.left() );
    col = b.seek_right( b.bottom() - 1, col + 1, false );
    col = b.seek_right( b.bottom() - 1, col + 1 );
    if( col > b.left() && col < b.right() )
      { only_guess( 0, b.left() ); add_guess( 'r', col );
      add_guess( 'm', b.right() ); return; }
    }

  if( f.tp.minima() == 3 ) { add_guess( 'w', 0 ); return; }
  if( b.width() > 2 * b.height() && f.tp.minima() == 2 && f.bp.minima() == 2 )
    { add_guess( '~', 0 ); return; }
  if( f.rp.isconvex() && b.height() > 2 * b.width() )
    { add_guess( ')', 0 ); return; }

  ch = f.test_line( charbox_vcenter );
  if( ch ) { add_guess( ch, 0 ); return; }

  ch = f.test_misc();
  if( ch ) { add_guess( ch, 0 ); return; }
  }


// Recognizes 1 block characters with 1 hole.
// 0469ADOPQRabdegopq#
void Character::recognize111() throw()
  {
  Block & b = _block_list.front();
  const Block & h = b.block_list().front();
  Features f( b );
  int top_delta = h.top() - b.top(), bottom_delta = b.bottom() - h.bottom();

  if( std::abs( top_delta - bottom_delta ) <= 2 ||
      Ocrad::similar( top_delta, bottom_delta, 40 ) ) // hole is vertically centered
    {
    unsigned char ch = f.test_4ADQao();
    if( ch )
      {
      if( ch == 'Q' ) add_guess( 'a', 1 );
      add_guess( ch, 0 );
      }
    return;
    }

  if( top_delta < bottom_delta )	// hole is high
    {
    unsigned char ch = f.test_49ARegpq();
    if( ch ) add_guess( ch, 0 );
    return;
    }

  if( top_delta > bottom_delta )	// hole is low
    {
    unsigned char ch = f.test_6abd();
    if( ch )
      {
      add_guess( ch, 0 );
      if( ch == ISO_8859_1::SOACUTE )
        {
        int row = h.top() - ( b.bottom() - h.bottom() ) - 1;
        if( row <= b.top() || row + 1 >= h.top() ) return;
        Block b1( b, *b.blockmap(), b.id() );
        b.top( row + 1 ); b1.bottom( row );
        _block_list.push_back( b1 );
        }
      }
    }
  }


// Recognizes 1 block characters with 2 holes.
// 8BQg$&
void Character::recognize112() throw()
  {
  const Block & b = _block_list.front();
  const Block & h1 = b.block_list().front();		// upper hole
  const Block & h2 = b.block_list().back();		// lower hole
  int a1 = h1.area();
  int a2 = h2.area();
//  Features f( b );

  if( h1.includes_vcenter( h2 ) && h2.includes_vcenter( h1 ) )
    { add_guess( 'm', 0 ); return; }

  Profile lp( b, Profile::left );
  Profile rp( b, Profile::right );
  Profile bp( b, Profile::bottom );
  if( Ocrad::similar( a1, a2, 50 ) )		// I don't like this
    {
    int hdiff;
    if( b.bottom_hook( &hdiff ) && hdiff > b.height() / 2 )
      if( b.top_hook( &hdiff ) && hdiff > b.height() / 2 )
        { add_guess( 's', 0 ); return; }
  
    if( lp.isflats() ) { add_guess( 'B', 0 ); return; }

    int col1 = h1.seek_left( h1.bottom(), h1.right() + 1 ) - 1;
    int col2 = h2.seek_right( h2.top(), h2.left() - 1 ) + 1;
    if( col1 <= col2 )
      { if( lp.isconvex() ) add_guess( 'e', 1 ); add_guess( '$', 0 ); return; }

    if( b.hcenter() > h1.hcenter() && b.hcenter() > h2.hcenter() &&
        ( b.hcenter() >= h1.right() || b.hcenter() >= h2.right() ) )
      { add_guess( '&', 0 ); return; }
    for( int row = h1.bottom() + 1; row < h2.top(); ++row )
      if( b.id( row, hcenter() ) == 0 ) { add_guess( 'g', 0 ); return; }
    if( bp.isconvex() )
      { if( lp.isconvex() ) add_guess( 'e', 1 ); add_guess( '8', 0 ); return; }
    if( lp.minima() == 2 && rp.minima() == 1 ) { add_guess( 'a', 0 ); return; }
    add_guess( 'B', 1 ); add_guess( 'a', 0 ); return;
    }
  if( a1 > a2 )
    {
    if( !h1.v_overlaps( h2 ) ) { add_guess( 'g', 0 ); return; }
    add_guess( 'Q', 0 ); return;
    }
  add_guess( '&', 0 );
  }


// Recognizes 2 block characters.
// ij!%:;=?|
void Character::recognize12( int charbox_vcenter ) throw()
  {
  const Block & b1 = _block_list.front();		// lower block
  const Block & b2 = _block_list.back();		// upper block
  int a1 = b1.area();
  int a2 = b2.area();
  Features f1( b1 );
  Features f2( b2 );

  if( Ocrad::similar( a1, a2, 10 ) )
    {
    if( width() > height() || Ocrad::similar( width(), height(), 50 ) )
      { add_guess( '=', 0 ); return; }
    if( b2.height() >= 2 * b2.width() ) { add_guess( '|', 0 ); return; }
    add_guess( ':', 0 ); return;
    }
  if( Ocrad::similar( a1, a2, 60 ) )
    {
    if( f2.test_solid( charbox_vcenter ) == '.' )
      {
      if( f1.test_solid( charbox_vcenter ) == '.' )
        { add_guess( ':', 0 ); return; }
      if( b1.height() > b2.height() ) { add_guess( ';', 0 ); return; }
      }
    unsigned char ch = f1.test_solid( charbox_vcenter );
    if( ch == '-' || ch == '_' )
      { add_guess( ISO_8859_1::PLUSMIN, 0 ); return; }
    if( b1.includes_hcenter( b2 ) && b2.includes_hcenter( b1 ) )
      { if( b1.blocks() && b2.blocks() ) add_guess( 'g', 0 ); else return; }
    if( b1.hcenter() > b2.hcenter() ) { add_guess( '%', 0 ); return; }
    return;
    }
  if( a1 > a2 )
    {
    unsigned char ch = f2.test_solid( charbox_vcenter );
    if( ch == '-' && 2 * b2.height() > b2.width() ) ch = '.';	//FIXME
    if( !b1.blocks() && ( ch == '.' || ch == '\'' ) )
      {
      // Looks for merged 'ri'
      if( f1.bp.minima( b1.height() / 4 ) == 2 &&
          b1.top() > b2.bottom() && b1.hcenter() < b2.left() )
        {
        Character c1( b1 ); c1.recognize1( vcenter() );
        Character c2( b2 ); c2.recognize1( vcenter() );
        if( c1.guesses() == 1 && c1.guess( 0 ).ch == 'n' &&
            c2.guesses() == 1 && c2.guess( 0 ).ch == '.' )
          {
          int col, limit = b1.seek_right( b1.vcenter(), b1.hcenter() );
          for( col = b1.hcenter(); col <= limit; ++col )
            if( b1.seek_bottom( b1.vcenter(), col ) < b1.bottom() ) break;
          if( b1.left() < col && col < b1.right() )
            { only_guess( 0, b1.left() ); add_guess( 'r', col - 1 );
            add_guess( 'i', b1.right() ); return; }
          }
        }
      if( f1.bp.minima( b1.height() / 4 ) != 1 ) return;

        {
        int hdiff;
        if( b1.bottom_hook( &hdiff ) && std::abs( hdiff ) >= b1.height() / 2 )
          {
          if( hdiff > 0 && f1.rp.increasing( f1.rp.pos( 75 ) ) )
            { add_guess( 'j', 0 ); return; }
          if( hdiff < 0 )
            {
            if( f1.wp.max() > 2 * f2.wp.max() && f1.lp.minima() == 1 )
              { add_guess( ISO_8859_1::IQUEST, 0 ); return; }
            add_guess( 'i', 0 ); return;
            }
          }
        }

      if( f1.tp.minima() == 1 )
        {
        if( Ocrad::similar( f1.wp.max(), f2.wp.max(), 20 ) )
          {
          if( 3 * f1.wp[f1.wp.pos(10)] < 2 * f2.wp.max() )
            { add_guess( ISO_8859_1::IEXCLAM, 0 ); return; }
          add_guess( 'i', 0 ); return;
          }
        if( 3 * f1.wp.max() > 4 * f2.wp.max() &&
            b1.seek_bottom( b1.vcenter(), b1.hpos( 10 ) ) < b1.bottom() &&
            f1.rp.increasing( f1.rp.pos( 75 ) ) )
          { add_guess( 'j', 0 ); return; }
        add_guess( 'i', 0 ); return;
        }
      }

    int slope;
    unsigned char atype = '\'';
    if( f2.bp.minima() == 2 ) atype = '^';
    else if( f2.rp.straight( &slope ) && slope < 0 ) atype = '`';
    Character c( b1 );
    c.recognize1( c.vcenter() );
    if( c.guesses() ) ch = ISO_8859_1::compose( c.guess( 0 ).ch, atype );
    else ch = 0;
    if( ch ) add_guess( ch, 0 ); else add_guess( atype , 0 );
    return;
    }
  unsigned char ch = f1.test_solid( charbox_vcenter );
  if( ch == '.' )
    {
    if( Ocrad::similar( b1.width(), b2.width(), 50 ) ) { add_guess( '!', 0 ); return; }
    add_guess( '?', 0 ); return;
    }
  if( ch == '-' || ch == '_' )
    {
    if( b2.block_list().size() == 1 )
      {
      const Block & h = b2.block_list().front();
      if( Ocrad::similar( h.left() - b2.left(), b2.right() - h.right(), 40 ) )
        { add_guess( ISO_8859_1::MASCORD, 0 ); return; }
      add_guess( ISO_8859_1::FEMIORD, 0 ); return;
      }
    }
  }


// Recognizes 3 block characters.
// %
void Character::recognize13() throw()
  {
  const Block & b1 = _block_list.front();
  Features f1( b1 );
  if( f1.test_solid( vcenter() ) == '.' ) add_guess( ISO_8859_1::DIV, 0 );
  Character c( b1 );
  c.recognize1( c.vcenter() );
  if( c.guesses() )
    switch( c.guess( 0 ).ch )
      {
      case 'A': add_guess( ISO_8859_1::CADIAER, 0 ); return;
      case 'E': add_guess( ISO_8859_1::CEDIAER, 0 ); return;
      case 'I': add_guess( ISO_8859_1::CIDIAER, 0 ); return;
      case 'O': add_guess( ISO_8859_1::CODIAER, 0 ); return;
      case 'V':
      case 'U': add_guess( ISO_8859_1::CUDIAER, 0 ); return;
      case 'a': add_guess( ISO_8859_1::SADIAER, 0 ); return;
      case 'e': add_guess( ISO_8859_1::SEDIAER, 0 ); return;
      case '|':
      case 'l':
      case 'i': add_guess( ISO_8859_1::SIDIAER, 0 ); return;
      case 'o': add_guess( ISO_8859_1::SODIAER, 0 ); return;
      case 'v':
      case 'u': add_guess( ISO_8859_1::SUDIAER, 0 ); return;
      }
  }
