/*
* utf8_util.c

Copyright (C) 2008 Alessandro Vesely

This file is part of Ipqbdb.

Ipqbdb is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Ipqbdb is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Ipqbdb.  If not, see <http://www.gnu.org/licenses/>.

*/
#include "utf8_util.h"

char* begin_utf8(char *begin, char *end)
/*
* truncate a string by placing a 0 at *end, or at an earlier position
* in case that happens to be part of an utf-8 multybyte sequence
*/
{
	int ch;
	while (((ch = *end) & 0x80) != 0 && (ch & 0xc0) == 0x80)
		if (--end <= begin)
			break;
	return end;
}

#if defined TEST_MAIN
#include <stdio.h>
#include <string.h>
#include <ctype.h>

static char *trim(char *s)
{
	unsigned char *end = (unsigned char *)(s + strlen(s));
	unsigned char *u = (unsigned char *)s;
	while (u < end)
		if (isspace(*u))
			++u;
		else
			break;
	while (u < end)
		if (isspace(*--end))
			*end = 0;
		else
			break;
	return (char*)u;
}

int main(int argc, char *argv[])
{
	int i;

	for (i = 1; i < argc; ++i)
	{
		FILE *fp = fopen(argv[i], "r");
		if (fp)
		{
			char buf[4096], *s;
			int line = 0;

			printf("testing %s\n", argv[i]);
			while ((s = fgets(buf, sizeof buf, fp)) != NULL)
			{
				size_t l, j;
				s = trim(s);
				printf("\n\nline %d: %s\n", ++line, s);
				if (s[0] == '#' || s[0] == 0)
					continue;
				l = strlen(s);
				for (j = 0; j < l; ++j)
				{
					char *t = begin_utf8(s, s + j);
					printf("%3zu -> %3u:>%.*s<HERE>%s\n",
						j, (int)(t-s), (int)(t-s), s, t);
				}
			}
			fclose(fp);
		}
	}
	return 0;
}
#endif
