/* Public domain: use this file as you wish. NO WARRANTY. */
/* Written in 2013 by Alexandru Cojocaru <xojoc@gmx.com> */

/* TODO: compress, UTF-8, UTF-{16,32}?
		 reorder switch cases based on dstructs frequencies
		 use p->endp
		 split/merge
		 pprint/compress
		 pjson_check is broken, find a (fast) hack
		 parser.strcp?
		 use a sentinel to avoid extra checks
		 benchmark zero copy technique vs pjson
		 fastcgi
		 fix err reporintg (again :))
		 encstr
		 [[]: broken 
*/
#include "pjson.h"
#define DEBUG
#ifdef DEBUG
static size_t ___stacksz;
static size_t ___tos;
#endif
typedef unsigned int uint;
typedef unsigned char uchar;
#define USED(v)((void)v)
static void
skipspace (struct pjson_parser *p)
{
	while (isspace (*(p->ptr)) && p->ptr < p->endp)
		++p->ptr;
	if (p->ptr == p->endp)
		p->err = PJSON_EOF;
}
static struct pjson_token *
__newtoken (struct pjson_parser *p, enum pjson_type y)
{	struct pjson_token *t = p->malloc (sizeof (struct pjson_token));
	if (!t) {
		p->err = PJSON_EMEM;
		return NULL;
	}
	memset (t, 0, sizeof (struct pjson_token));
	t->type = y;
	return t;
}
static struct pjson_token dummy_tok;
static struct pjson_token *
__newtoken_ck (struct pjson_parser *p, enum pjson_type y)
{
	USED (p);
	USED (y);
	return &dummy_tok;
}
static struct pjson_token *
(*newtoken)(struct pjson_parser *p, enum pjson_type y) = __newtoken;
/* See Unicode Corrigendum #1: http://www.unicode.org/versions/corrigenda.html */
#define expect(lr,hr) (if (!(*w >= lr && *w <= hr)){encutf8 (b, 0xFFFD); return;})
static void
encutf8 (char **b, uint v)
{
	char *w = *b;
	if (v <= 0x007f)
		*w++ = (char) v;
	else if (v <= 0x07FF) {
		*w++ = (char) 0xC0 | (v >> 6);
		*w++ = (char) 0x80 | (v & 0x3F);
	}
	else if (v <= 0xFFFF) {
		*w++ = (char) 0xE0 | (v >> 12);
		*w++ = (char) 0x80 | ((v >> 6) & 0x3F);
		*w++ = (char) 0x80 | (v & 0x3F);
	}
	else if (v <= 0x10FFFF) {
		*w++ = (char) 0xF0 | (v >> 18);
		*w++ = (char) 0x80 | ((v >> 12) & 0x3F);
		*w++ = (char) 0x80 | ((v >> 6) & 0x3F);
		*w++ = (char) 0x80 | (v & 0x3F);
	}
	else {
		encutf8 (b, 0xFFFD);
		return;
	}
	*b = w;
}
#define tokerr(STR) ({do {p->err = PJSON_ERR; p->strerr = STR; return NULL;} while(0);})
#define tokeof() ({do {p->err = PJSON_EOF; return NULL;} while (0);})
#define tokwarn(STR) ({do {p->err = PJSON_WARN; p->strerr = STR;} while (0);})
#define skipspace_eof() skipspace (p); if (p->err == PJSON_EOF) return NULL
#define newtoken_emem(y) newtoken (p, y); if (p->err == PJSON_EMEM) return NULL
/* See: RFC4627 */
static struct pjson_token *
gettoken (struct pjson_parser *p)
{
#ifdef DEBUG
	int ___x = 0;
	___stacksz = ___tos - (size_t) &___x;
#endif
	char *l;
	size_t llen;
	struct pjson_token *tok;

	switch (*(p->ptr)) {
	case '{':;
		struct pjson_token *obj = tok = newtoken_emem (PJSON_OBJECT);
		++p->ptr;
		skipspace_eof ();
		if (*(p->ptr) == '}') { /* {} empty object */
			++p->ptr;
			return obj;
		}

		while (1) {
			skipspace_eof ();

			tok->key = gettoken (p);
			if (!tok->key)
				return NULL;
			if (tok->key->type != PJSON_STRING)
				tokerr ("object key must be a string");

			skipspace_eof ();

			if (*(p->ptr) != ':')
				tokerr ("`:' was espected after key");

			++p->ptr;
			skipspace_eof ();
			
			tok->val = gettoken (p);
			if (!tok->val)
				return NULL;

			skipspace_eof ();

			if (*(p->ptr) == ',') {
				tok = tok->next = newtoken_emem (PJSON_OBJECT);
				++p->ptr;
			}
			else if (*(p->ptr) == '}') {
				++p->ptr;
				return obj;
			}
			else 
				tokerr ("`,' or `}' was expected after object member");
		}
	case '[':;
		struct pjson_token *arr = tok = newtoken_emem (PJSON_ARRAY);
		++p->ptr;
		skipspace_eof ();
		if (*(p->ptr) == ']') { /* [] empty array */
			++p->ptr;
			return arr;
		}

		while (1) {
			skipspace_eof ();

			tok->val = gettoken (p);
			if (!tok->val)
				return NULL;

			skipspace_eof ();

			if (*(p->ptr) == ',') {
				tok = tok->next = newtoken_emem (PJSON_ARRAY);
				++p->ptr;
			}
			else if (*(p->ptr) == ']') {
				++p->ptr;
				return arr;
			}
			else 
				tokerr ("`,' or `]' was expected after array member");
		}		
	case '\"':;	
		struct pjson_token *s = newtoken_emem (PJSON_STRING);
		++p->ptr;
		char *q = p->ptr;
		while (q < p->endp) {
			if (*q == '\"') {
				char *w = q - 1;
				while (*w == '\\')
					--w;
				if (((q-w) & 1) == 1)
					break;
			}
			++q;
		}

		if (q == p->endp)
			tokeof ();

		s->strsz = q - p->ptr;
		s->str = p->malloc (s->strsz + 1);
		if (!(s->str)) {
			p->err = PJSON_EMEM;
			return NULL;
		}
		memcpy (s->str, p->ptr, s->strsz);
		s->str[s->strsz] = '\0';
		p->ptr = q + 1;

		char *b = q = s->str;
		char cc[] = { [0] = '\b', [4] = '\f', [12] = '\n', [16] = '\r', [18] = '\t' };
		uint16_t l_sur = 0;
		while (q < s->str + s->strsz) {
			if (*q != '\\') {
				++q;
				++b;
				continue;
			}
			++q;
			switch (*q) {
			case 'b': case 'f': case 'n': case 'r': case 't':
				*b++ = cc[(uchar)*q];
				break;
			case 'u':;
				uint v = 0;
				int i;
				for (i = 0; i < 4 && (q + i) < (s->str + s->strsz); ++i) {
					v <<= 4;
					if (toupper (q[i]) >= 'A' && 'F' <= toupper (q[i]))
						v += toupper (q[i]) - 'A';
					else if (isdigit (q[i]))
						v += q[i] - '0';
					else
						break;
				}
				if (i != 4) {
					*b++ = *q;
					break;
				}
				q += 4;
				if (v >= 0xD800 && v <= 0xDBFF) {
					if (l_sur != 0) {
						tokwarn ("two consecutive leading surrogates");
						encutf8 (&b, 0xFFFD);
					}
					l_sur = v;
					break;
				}
				else if (v >= 0xDC00 && v <= 0xDFFF) {
					if (l_sur == 0) {
						tokwarn ("leading surrogate expected before a trailing one");
						encutf8 (&b, 0xFFFD);
						break;
					}
					v = ((l_sur - 0xD800) & 0xDBFF) +
						((v - 0xDC00) & 0xDFFF) +
						0x10000;
					l_sur = 0;
				}
				encutf8 (&b, v);
				break;
			default:
				*b++ = *q;
			}
			if (l_sur != 0) {
				tokwarn ("leading surrogate without a trailing one");
				encutf8 (&b, 0xFFFD);
			}
			++q; /* add this to each logic */
		}
		s->strsz = q - s->str + 1;
		return s;
	case '-': case '0' ... '9':;
		struct pjson_token *n = newtoken_emem (PJSON_NUMBER);
		int hassign = 0;
		if (*(p->ptr) == '-') {
			hassign = 1;
			++p->ptr;
		}

		if (!isdigit (*(p->ptr)))
			tokerr ("`[1-9][0-9]* or 0' was expected");		
		if (*(p->ptr) == '0' && isdigit (*(p->ptr+1)))
			tokerr ("leading zero not allowed");

		while (isdigit (*(p->ptr))) {
			n->num *= 10;
			n->num += *(p->ptr) - '0';
			++p->ptr;
		}

		if (*(p->ptr) == '.') {
			++p->ptr;
			if (!isdigit (*(p->ptr)))
				tokerr ("`[0-9]+' was expected after decimal point");
			float f = 0;
			while (isdigit (*(p->ptr))) {
				f += *(p->ptr) - '0';
				f /= 10;
				++p->ptr;
			}
			n->num += f;
		}
		
		if (tolower (*(p->ptr)) == 'e') {
			++p->ptr;
			int ehassign = 0;
			if (*(p->ptr) == '-') {
				ehassign = 1;
				++p->ptr;
			}
			else if (*(p->ptr) == '+')
				++p->ptr;
			if (!isdigit (*(p->ptr)))
				tokerr ("`[0-9]+' was expected after exponention");
			uint e = 0;
			while (isdigit (*(p->ptr))) {
				e *= 10;
				e += *(p->ptr) - '0';
				++p->ptr;
			}
			uint d = 1;
			for (size_t i = 0; i < e; ++i)
				d *= 10;
			if (ehassign)
				n->num /= d;
			else
				n->num *= d;
		}
		
		n->num *= hassign ? -1 : 1;
		return n;
	case 't':
		l = "true";
		llen = 4;
		goto cmp;
	case 'f':
		l = "false";
		llen = 5;
		goto cmp;
	case 'n':
		l = "null";
		llen = 4;
		goto cmp;
cmp:
	if (p->ptr + llen >= p->endp)
		tokeof ();
	if (memcmp (p->ptr, l, llen) == 0) {
		struct pjson_token *l = newtoken_emem (PJSON_LITERAL);
		l->lit = *(p->ptr);
		p->ptr += llen;
		return l;
	}
	default:
		tokerr ("garbage");
	}
}
void
pjson_init_parser (struct pjson_parser *p, char *buf, size_t len)
{
	p->buf = buf;
	p->endp = buf + len;	

	p->malloc = malloc;
	p->free = free;
	
	p->pp_out = stdout;
}
struct pjson_token *
pjson_parse (struct pjson_parser *p)
{
	struct pjson_token *t = NULL;
	if (!(p->buf) || p->buf == p->endp) {
		p->err = PJSON_BARG;
		p->strerr = "buffer can't be empty";
		return NULL;
	}
	p->ptr = p->buf;
	p->err = 0;
	p->strerr = NULL;
	
	skipspace (p);
	if (p->err == PJSON_EOF)
		goto exit;
	if (*(p->ptr) != '{' && *(p->ptr) != '[')
		tokerr ("A JSON text must be an object or an array");

	t = gettoken (p);

exit:
	if (p->err == PJSON_EOF)
		p->strerr = "stream ended prematurly";
	else if (p->err == PJSON_EMEM)
		p->strerr = "out of virtual memory";
	return t;
}
void
pjson_free (struct pjson_parser *p, struct pjson_token *t)
{
	if (!t)
		return;
	switch (t->type) {
	case PJSON_OBJECT:
		while (t) {
			pjson_free (p, t->val);
			pjson_free (p, t->key);
			struct pjson_token *r = t;
			t = t->next;
			p->free (r);
		}
		break;
	case PJSON_ARRAY:
		while (t) {
			pjson_free (p, t->val);
			struct pjson_token *r = t;
			t = t->next;
			p->free (r);
		}
		break;
	case PJSON_STRING:
		p->free (t->str);
		p->free (t);
		break;
	case PJSON_NUMBER: case PJSON_LITERAL:
		p->free (t);
		break;
	}
}
static void *
malloc_dummy (size_t s)
{
	if (s != sizeof (struct pjson_token))
		return malloc (s);
	return &dummy_tok;
}
static void
free_dummy (void *t)
{USED (t);}
/* This is borken :( PJSON_STRING needs more memory
	than sizeof (pjson_token). */
enum pjson_err
pjson_check (struct pjson_parser *p)
{
	void * (*m)(size_t) = p->malloc;
	void (*f)(void *) = p->free;
	p->malloc = malloc_dummy;
	p->free = free_dummy;
	pjson_parse (p);
	p->malloc = m;
	p->free = f;
	return p->err;
}
void
__pprint (struct pjson_token *r, FILE *stream, int indent)
{
	switch (r->type) {
	case PJSON_OBJECT:
		putc ('{', stream);
		putc ('\n', stream);
		indent += 4;
		int f = 1;
		while (r) {
			if (r->key) {
				if (f)
					fprintf (stream, "%*s", indent, "");
				__pprint (r->key, stream, indent);
				fprintf (stream, ": ");
				__pprint (r->val, stream, indent);
			}
			r = r->next;
			if (r) {
				f = 0;
				putc (',', stream);
				indent += 4;
			}
		}
		indent -= 4;
		fprintf (stream, "\n%*c\n", indent, '}');
		break;
	case PJSON_ARRAY:
		fputc ('[', stream);
		while (r) {
			if (r->val)
				__pprint (r->val, stream, indent);
			r = r->next;
			if (r) {
				putchar (',');
			}
		}
		putc (']', stream);
		putc ('\n', stream);
		break;
	case PJSON_STRING:
		fprintf (stream, "\"%s\"", r->str);
		break;
	case PJSON_NUMBER:
		fprintf (stream, "%g", r->num);
		break;
	case PJSON_LITERAL:
		switch (r->lit) {
		case 't':
			fprintf (stream, "true");
			break;
		case 'f':
			fprintf (stream, "false");
			break;
		case 'n':
			fprintf (stream, "null");
			break;
		}
		break;
	}
}
void
pjson_pprint (struct pjson_parser *p, struct pjson_token *t)
{
	if (!t)
		return;
	__pprint (t, p->pp_out, 0);
}
static void
countsz (struct pjson_token *t, size_t *c)
{
	enum pjson_type y = t->type;
	switch (y) {
	case PJSON_OBJECT: case PJSON_ARRAY:
		while (t) {
			*c += sizeof (*t);
			if (t->key && (y == PJSON_OBJECT))
				*c += sizeof (*t);
			countsz (t->val, c);
			t = t->next;
		}
		break;
	default:
		*c += sizeof (*t);
		break;
	}
}
#ifdef STANDALONE
#include <stdio.h>
#include <stdint.h>
#include <errno.h>
#include <error.h>
#include <alloca.h>
#include <string.h>
#include <sys/stat.h>
#include <limits.h>
#include <getopt.h>
static char flag;
static size_t gnum;
static struct option const longopts[] =
{
	{"pprint", no_argument, NULL, 'p'},
	{"compress", no_argument, NULL, 'c'},
	{"validate", no_argument, NULL, 'v'},
	{"generate", optional_argument, NULL, 'g'},
	{"help", no_argument, NULL, CHAR_MAX + 1},
	{"version", no_argument, NULL, CHAR_MAX + 2},
	{NULL, 0, NULL, 0}
};
void
usage (char *pn)
{
	printf ("\
Usage: %s -p [FILE]\n\
  or:  %s -c [FILE]\n\
  or:  %s -v [FILE]\n\
\n\
  or:  %s -g [NUM]\n\
", pn, pn, pn, pn);

	puts ("\
\n\
Parse or validate a JSON FILE. Or generate\n\
a random one containing NUM objects.\n\
");

	puts ("\
	-p, --pprint		pretty print the JSON FILE\n\
	-c, --compress		compress (e.i. remove spaces) JSON FILE\n\
	-v, --validate		check the formedness of JSON FILE\n\
\n\
	-g, --generate		generate a JSON file containing NUM objects\n\
\n\
	--help				display this help and exit\n\
	--version			output version information and exit\n\
\n\
If FILE is not specified, read standard input\
");
puts ("\
\n\
Report pjson bugs to "PACKAGE_BUGREPORT"\n\
Pjson home page: <"PACKAGE_URL">\
");

	exit (1);
};
int
main (int argc, char *argv[])
{
#ifdef DEBUG
	int ___x = 0;
	___tos = (size_t) &___x;
#endif
/*
	char *a = alloca (4);
	memset (a, 0, 4);
	encutf8 (&a, 0x10FFFF);
	printf ("%d %d %d %d\n", a[-4], a[-3], a[-2], a[-1]);
*/
	FILE *f = stdin;
	size_t fsz = 0;
	while (1) {
		int c = getopt_long (argc, argv, "pcvg:", longopts, NULL);
		
		if (c == -1)
			break;

		switch (c) {
		case 'p': case 'c': case 'v':
			if (optarg) {
				f = fopen (optarg, "r");
				if (!f)
					error (1, errno, "can't open file");
				struct stat st;
				if (stat (optarg, &st) == 0)
					fsz = st.st_size;
			}
			flag = c;
			break;
		case 'g':
			if (optarg)
				gnum = strtol (optarg, NULL, 10);
			else
				gnum = 300;
			break;
		case CHAR_MAX+1:
			usage (argv[0]);
			break;
		case CHAR_MAX+2:
			puts ("\
"PACKAGE_NAME" "PACKAGE_VERSION"\n\
This program is in the PUBLIC DOMAIN, use it as you wish.\n\
This is Free Software: you are free to change and redistribute it.\n\
There is NO WARRANTY, to the extent permitted by law.\n\
\n\
Written by Alexandru Cojocaru.\
");
			exit (0);
		case '?':
			printf ("unkown option `%c'\n", optopt);
			break;
		default:
			abort ();
		}
	}
	
	if (flag || (argc == 1)) {
		struct pjson_parser p;
		pjson_init_parser (&p, NULL, 0);
		if (argc == 1)
			flag = 'p';
		size_t rc;
		size_t nr = 0;
		size_t bufsz = fsz ? fsz : 1000;
		char *buf = malloc (bufsz);
		if (!buf)
			error (1, errno, "out of virtual memory");
		while ((rc = fread (buf + nr, 1, fsz ? fsz : 1000, f)) != 0) {
			nr += rc;
			if (!fsz) {
				bufsz += rc;
				buf = realloc (buf, bufsz);
				if (!buf)
					error (1, errno, "out of virtual memory");
			}
		}
		printf ("buf allocd: %zd\n", bufsz);
		p.buf = buf;
		p.endp = buf + nr;
		
		if (flag == 'p' || flag == 'c') {
			struct pjson_token *t = pjson_parse (&p);
			if (t)
				pjson_pprint (&p, t);
		}
		else {
			struct pjson_token *t = pjson_parse (&p);
			size_t tc = 0;
			countsz (t, &tc);
			printf ("%zd\n", tc);
			if (p.err == 0) {
				puts ("valid");
				exit (0);
			}
		}

		if (p.err) {
			/* Error or warning. */
			if (p.err == PJSON_ERR || p.err == PJSON_EOF)
				error (1, 0, "can't parse: %s\n", p.strerr);
			else if (p.err == PJSON_EMEM)
				/* Using `errno' here is ok, since we don't call
				   any other function when we fail to alloc mem. */
				error (1, errno, "out of virtual memory");
			else if (p.err == PJSON_WARN)
				error (0, 0, "warning: %s", p.strerr);
		}
		
		if (fclose (f) < 0)
			error (1, errno, "can't close file");	
	}
	else {
		size_t co = 0;
		while (co < gnum) {
			putchar ('[');
			++co;
		}
		while (co > 0) {
			putchar (']');
			--co;
		}
	}
		
	USED (__newtoken_ck);
	char *j[] = {"", "{}", "[]", " {}", "{\"a\" : 4e2}", "{\"1\" : {\"1a\": \"2a\"}}", "[\"arr\", true]"};

	return 0;
}
#endif /* STANDALONE */