/*
   Copyright (C) 2007, 2012 Oleksiy Chernyavskyy

   This file is part of XDClient.

   XDClient is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   XDClient is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with XDClient.  If not, see <http://www.gnu.org/licenses/>.
*/

#define _GNU_SOURCE

#include <stdio.h>
#include <wchar.h>
#include <stdlib.h>
#include <string.h>
#include <wctype.h>
#include "regexp_utf8.h"

#define NCHARS 0xFFFF /* UNICODE Plane 0 */
#define true 1

int reg_test_r(wchar_t *data, int dpos, wchar_t *re_text, int rpos, unsigned flags, retr_t *retr);
void enum_clean(wchar_t *enum_chars);

/* test 8bit data with wide char regular expression*/
int reg_test_8(char *data8, wchar_t *re_text, unsigned flags, retr_t *retr)
{
  int ret;
  long len;
  wchar_t *udata;

  len = strlen(data8);

  if (len == 0)
	return 0;

  udata = (wchar_t*) malloc(sizeof(wchar_t) * len + 1);
  ret = mbstowcs(udata, data8, len+1);

  if (ret == 0) {
	fwprintf(stderr, L"regexp: error: couldn't onvert from mbs to wcs\n");
	free(udata);
	return 0;
  }

  ret = reg_test(udata, re_text, flags, retr);
  free(udata);
  return ret;
}

/* wrapper function */
int reg_test(wchar_t *data, wchar_t *re_text, unsigned flags, retr_t *retr)
{
  int i;

  if (retr)
	retr->is_set = 0;

  if (re_text[0] == L'^')
	return reg_test_r(data, 0, re_text, 1, flags, retr);
  else if (flags & RE_PREFIX_DASH)
	return reg_test_r(data, 0, re_text, 0, flags, retr);

  for (i=0; data[i] != L'\0' && data[i] != L'\n'; i++)
  {
	if (reg_test_r(data, i, re_text, 0, flags, retr))
	  return 1 ;
  }
  return 0;
}


int reg_test_r(wchar_t *data, int dpos, wchar_t *re_text, int rpos, unsigned flags, retr_t *retr)
{
  int i;
  wint_t wc, wc1, wc2;
  wchar_t enum_chars[NCHARS];
  unsigned re_len;


  if (retr)
	retr->cmp_is_valid = 0;

  if (!data || !re_text)
	return 0;

  if (re_text[rpos] == L'*' || re_text[rpos] == L'+' || re_text[rpos] == L'?')
	return 0 ;

  while (true)
  {	
	if (flags & RE_SKIP_END_SPC) {
	  if (iswblank(data[dpos])) {
		i=dpos;
		while(iswblank(data[i]))
		  i++;
		if (data[i] == L'\0' || data[i] == L'\n')
		  dpos = i;
	  }

	  if (iswblank(re_text[rpos])) {
		i=rpos;
		while(iswblank(re_text[i]) || re_text[i] == L'*' || re_text[i] == L'+' || re_text[i] == L'?')
		  i++;
		if (re_text[i] == L'\0' || re_text[i] == L'$')
		  rpos = i;
	  }
	}

	if (! re_text[rpos]) {
	  if (retr && !retr->is_set) {
		retr->cmp_is_valid = 1;
		if (data[dpos] == L'\0' || data[dpos] == L'\n')
		  retr->cmp_res = 0;
		else
		  retr->cmp_res = -1;
	  }

	  if (flags & RE_SUFFIX_WEND) {
		if (!iswblank(data[dpos]) && data[dpos] != L'\0' && data[dpos] != L'\n') {
		  return 0;
		}
	  }

	  if (flags & RE_SUFFIX_DOLLAR) {
		if (data[dpos] == L'\0' || data[dpos] == L'\n')
		  return 1;
		else
		  return 0;
	  } else {
		return 1;
	  }
	}

	if (flags & RE_COMP_SPACES) {
	  if (iswblank(data[dpos])) {
		while(iswblank(data[dpos]))
		  dpos++;
		dpos--;
	  }
	  if (iswblank(re_text[rpos])) {
		while(iswblank(re_text[rpos]))
		  rpos++;
		rpos--;
	  }
	}

	if (flags & RE_SKIP_NONALPHA) {
	  while(!iswalnum(data[dpos]) && !iswblank(data[dpos]) && data[dpos] != L'\0' && data[dpos] != L'\n')
		dpos++;
	}

	if (flags & RE_SKIP_START_SPC) {
	  if (dpos == 0) {
		while (iswblank(data[dpos]))
		  dpos++;
	  }
	  if (rpos == 0 || (rpos == 1 && re_text[0] == L'^')) {
		if (iswblank(re_text[rpos])) {
		  while (iswblank(re_text[rpos]))
			rpos++;
		  wc = re_text[rpos];
		  while (wc == L'*' || wc == L'+' || wc == L'?') {
			rpos++;
			wc = re_text[rpos];
		  }
		}
	  }
	}


	/* check '.' (any symbol) */
	if (re_text[rpos] == L'.')
	{
	  if (flags & RE_STOP_WILDCARD) {
		if (retr && !retr->is_set) {
		  retr->cmp_is_valid = 1;
		  if (data[dpos] == L'\0' || data[dpos] == L'\n')
			retr->cmp_res = 0;
		  else
			retr->cmp_res = -1;
		}
		return 1;
	  }

	  if (retr) {
		retr->cmp_is_valid = 0;
		retr->is_set = 1;
	  }

	  rpos++;
	  if (re_text[rpos] == L'*')
	  {
		rpos++;
		while(true)
		{
		  if (reg_test_r(data, dpos, re_text, rpos, flags, NULL))
			return 1;
		  if (data[dpos] && data[dpos] != L'\n')
			dpos++;
		  else
			return 0;
		} 
	  }
	  else if (re_text[rpos] == L'+')
	  {
		rpos++;
		while(true)
		{
		  if (data[dpos] && data[dpos] != L'\n')
			dpos++;
		  else
			return 0;
		  if (reg_test_r(data, dpos, re_text, rpos, flags, NULL))
			return 1;
		}
	  }
	  else if (re_text[rpos] == L'?')
	  {
		rpos++;
		if (reg_test_r(data, dpos, re_text, rpos, flags, NULL))
		  return 1;
		if (data[dpos] && data[dpos] != L'\n')
		  dpos++;
		else
		  return 0;
	  }
	  else
	  { 
		if (data[dpos] && data[dpos] != L'\n')
		  dpos++;
		else
		  return 0;
	  }
	}
	/* [] block */
	else if (re_text[rpos] == L'[')
	{
	  if (flags & RE_STOP_WILDCARD) {
		if (retr && !retr->is_set) {
		  retr->cmp_is_valid = 1;
		  if (data[dpos] == L'\0' || data[dpos] == L'\n')
			retr->cmp_res = 0;
		  else
			retr->cmp_res = -1;
		}
		return 1;
	  }

	  if (retr) {
		retr->cmp_is_valid = 0;
		retr->is_set = 1;
	  }

	  rpos++;
	  re_len = wcslen(re_text);

	  enum_clean(enum_chars);

	  while(re_text[rpos] != L']')
	  {
		if(rpos < re_len-2 && re_text[rpos+1] == L'-' && re_text[rpos] != L'[' && re_text[rpos+2] != L']')
		{
		  if (re_text[rpos] <= re_text[rpos+2])
		  {
			wc1 = re_text[rpos];
			wc2 = re_text[rpos+2];
		  } else {
			wc1 = re_text[rpos+2];
			wc2 = re_text[rpos];
		  }
		  for (; wc1 <= wc2; wc1++)
		  {
			wc = wc1;
			if (flags & RE_IGN_CASE)
			  wc = towlower(wc);
			enum_chars[wc] = 1;
		  }
		  rpos+=3;
		} else {
		  wc = re_text[rpos];
		  if (flags & RE_IGN_CASE)
			wc = towlower(wc);
		  enum_chars[wc] = 1;
		  rpos++;
		}
	  }

	  rpos++;

	  if (re_text[rpos] == L'*')
	  {
		rpos++;
		while(true)
		{
		  if (reg_test_r(data, dpos, re_text, rpos, flags, NULL))
			return 1 ;
		  if (!data[dpos] || data[dpos] == L'\n')
			return 0;
		  wc = data[dpos];
		  if (flags & RE_IGN_CASE)
			wc = towlower(wc);

		  if (! enum_chars[wc]) {
			if (flags & RE_SKIP_NONALPHA) {
			  if (iswalnum(wc) || iswblank(wc))
				return 0;
			} else {
			  return 0;
			}
		  }
		  dpos++; 
		} 
	  }
	  else if (re_text[rpos] == L'+')
	  {
		rpos++;
		while(true)
		{
		  if (!data[dpos] || data[dpos] == L'\n')
			return 0;
		  wc = data[dpos];
		  if (flags & RE_IGN_CASE)
			wc = towlower(wc);

		  if (! enum_chars[wc]) {
			if (flags & RE_SKIP_NONALPHA) {
			  if (iswalnum(wc) || iswblank(wc))
				return 0;
			} else {
			  return 0;
			}
		  }

		  dpos++; 
		  if (reg_test_r(data, dpos, re_text, rpos, flags, NULL))
			return 1 ;
		}
	  }
	  else if (re_text[rpos] == L'?')
	  {
		rpos++;
		if (reg_test_r(data, dpos, re_text, rpos, flags, NULL))
		  return 1 ;

		if (!data[dpos] || data[dpos] == L'\n')
		  return 0;
		wc = data[dpos];
		if (flags & RE_IGN_CASE)
		  wc = towlower(wc);

		if (! enum_chars[wc]) {
		  if (flags & RE_SKIP_NONALPHA) {
			if (iswalnum(wc) || iswblank(wc))
			  return 0;
		  } else {
			return 0;
		  }
		}
		dpos++; 
	  }
	  else
	  { 
		if (!data[dpos] || data[dpos] == L'\n')
		  return 0;
		wc = data[dpos];
		if (flags & RE_IGN_CASE)
		  wc = towlower(wc);
		if (! enum_chars[wc])
		  return 0 ;
		dpos++; 
	  }
	}
	/* check end of line '$' */
	else if (re_text[rpos] == L'$')
	{
	  if (data[dpos] == L'\0' || data[dpos] == L'\n')
		return 1;
	  else
		return 0;
	}
	/* check ordinary (non-special) character */
	else
	{
	  if (re_text[rpos] == L'\\') rpos++ ;

	  if (flags & RE_SKIP_NONALPHA) {
		while(!iswalnum(re_text[rpos]) && !iswblank(re_text[rpos]) && re_text[rpos] != L'\0')
		  rpos++;
	  }

	  wc2 = re_text[rpos];
	  if (flags & RE_IGN_CASE)
		wc2 = towlower(wc2);

	  rpos++;

	  wc = re_text[rpos];
	  if (wc == L'*' || wc == L'+' || wc == L'?') {
		if (flags & RE_STOP_WILDCARD) {
		  if (retr && !retr->is_set) {
			retr->cmp_is_valid = 1;
			if (data[dpos] == L'\0' || data[dpos] == L'\n')
			  retr->cmp_res = 0;
			else
			  retr->cmp_res = -1;

			retr->is_set = 1;
		  }
		  return 1;
		}
	  }

	  if (wc != L'*' && wc != L'?') {
		if (data[dpos] == L'\0' || data[dpos] == L'\n') {
		  if (retr && !retr->is_set) {
			retr->cmp_is_valid = 1;
			retr->cmp_res = 1;
		  }
		  return 0;
		}
	  }

	  if (re_text[rpos] == L'*') {
		rpos++;
		while(true)
		{
		  if (reg_test_r(data, dpos, re_text, rpos, flags, NULL))
			return 1 ;

		  wc1 = data[dpos];
		  if ((flags & RE_SKIP_NONALPHA) && !iswalnum(wc1) && !iswblank(wc1)) {
			dpos++;
		  } else {
			if (flags & RE_IGN_CASE)
			  wc1 = towlower(wc1);
			if (wc1 == wc2)
			  dpos++;
			else
			  return 0 ;
		  }
		}
	  } else if (re_text[rpos] == L'+') {
		rpos++;
		while(true)
		{
		  wc1 = data[dpos];
		  if ((flags & RE_SKIP_NONALPHA) && !iswalnum(wc1) && !iswblank(wc1)) {
			dpos++;
		  } else {
			if (flags & RE_IGN_CASE)
			  wc1 = towlower(wc1);
			if (wc1 == wc2)
			  dpos++;
			else
			  return 0 ;
		  }

		  if (reg_test_r(data, dpos, re_text, rpos, flags, NULL))
			return 1 ;
		}
	  } else if (re_text[rpos] == L'?') {
		rpos++;
		if (reg_test_r(data, dpos, re_text, rpos, flags, NULL))
		  return 1 ;

		wc1 = data[dpos];
		if (flags & RE_IGN_CASE)
		  wc1 = towlower(wc1);
		if (wc1 == wc2)
		  dpos++;
		else
		  return 0 ;
	  } else {
		wc1 = data[dpos];
		if (flags & RE_IGN_CASE)
		  wc1 = towlower(wc1);
		if (wc1 == wc2) {
		  dpos++;
		} else {
		  if (retr && !retr->is_set) {
			retr->cmp_is_valid = 1;
			if (wc1 < wc2)
			  retr->cmp_res = 1;
			else
			  retr->cmp_res = -1;
			retr->is_set = 1;
		  }
		  return 0 ;
		}
	  }
	}

	/* do we need the following? */
	/*	if (! *data || *data == L'\n') return 0 ; */
  } 
}

void enum_clean(wchar_t *enum_chars)
{
  int i;

  for (i = 0; i < NCHARS; i++)
	enum_chars[i] = 0;
}

int is_regexp_start(wchar_t *str)
{
  if (!str)
	return 0;
  if (str[0] == L'.' || str[0] == L'[')
	return 1;

  return 0;
}

