/*
 * 	Random Access Machine.
 * 	Program parser.
 *
 * 	Copyright (C) 2002, 2003  Dmitry Rutsky	<rutsky@school.ioffe.rssi.ru>
 * 	
 * 	This program is free software; you can redistribute it and/or modify
 * 	it under the terms of the GNU General Public License as published by
 * 	the Free Software Foundation; either version 2 of the License, or
 * 	(at your option) any later version.
 *
 * 	This program is distributed in the hope that it will be useful,
 * 	but WITHOUT ANY WARRANTY; without even the implied warranty of
 * 	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * 	GNU General Public License for more details.
 *
 * 	You should have received a copy of the GNU General Public License
 * 	along with this program; if not, write to the 
 * 	Free Software Foundation, Inc.,
 * 	59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * 	TODO:
 * 	All this code looks too complicated (and a big mess somewhere),
 * 	and quite limited.
 * 	It probably should be rewritten using Bison sometimes, as most
 * 	of the normal interpreters do.  Now it's as buggy as compact.
 *
 * 	Well, probably it is not so bad as it looks.  (It sounds like
 * 	I'm just too lazy to learn Bison ;-)  --DR)
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <gmp.h>

#include "str_read.h"
#include "ram.h"

/* ========	Parser data	======== */

typedef struct _TextLine
{
   char *line;

   struct _TextLine *next;
}
TextLine;

typedef struct _Label
{
   char *name;
   unsigned int operator;

   struct _Label *next;
}
Label;

typedef struct _Instruction
{
   RAM_Instruction *instruction;
   const char *reference;	// Reference to a name of label to link if used
   
   char *text_reference;	// Reference to corresponding line in
   				// the program text -- is it still needed?

   unsigned int line;		// This information is also needed for error
   				// messages through `link' stage, so 
				// historically it is line number, and by 
				// 1 greater than corresponding index in
				// instruction_line array.
   
   struct _Instruction *next;
}
Instruction;

typedef struct
{
   RAM_Program *program;	// Program being parsed

   Label *labels;		// Linked list of the labels
   Instruction *instructions;	// Linked list of the instructions.
   				// Note: it grows in the direction opposite
				// to where instruction numbers grow.

   TextLine *text_start, *text_end;	// Linked list of the text lines.
   					// It grows in the natural direction,
					// but there is no special reason for
					// such direction.
					// TODO: unify it.

   unsigned int line_no, instruction;	// Indexes of the current line and
   					// and instruction.
}
ParserData;

typedef struct
{
   const char *name;
   RAM_InstructionType instruction;
}
InstructionName;

static const char *INPUT_DELIMITERS = " \t\n";
static InstructionName instruction_names [] =
{
   {"read", RAM_READ},
   {"write", RAM_WRITE},
   {"load", RAM_LOAD},
   {"store", RAM_STORE},
   {"add", RAM_ADD},
   {"neg", RAM_NEG},
   {"half", RAM_HALF},
   {"jump", RAM_JUMP},
   {"jgtz", RAM_JGTZ},
   {"halt", RAM_HALT},
   {NULL, RAM_NONE}
};

static const int CONSTANT_PARAMETER = 0x01;
static const int INSTRUCTION_PARAMETER = 0x02;
static const int POINTER_PARAMETER = 0x04;
static const int INDIRECT_POINTER_PARAMETER = 0x08;

/* ========	Parser data constructors and destructors	======== */

//
//	This good stuff is used mostly unappropriately in the parser functions.
//	

static TextLine *text_line_new ()
{
   TextLine *tl;

   tl = (TextLine *) calloc (1, sizeof (TextLine));
   if (!tl)
      err_error_perror ("calloc", "TextLine memory allocation failure");

   return tl;
}

// Create text line and init it with `line'.
static inline TextLine *text_line_new_by (char *line)
{
   TextLine *tl = text_line_new ();

   (tl -> line) = line;

   return tl;
}

static inline void text_line_struct_delete (TextLine *tl)
{
   free (tl);
}
/*
static void text_line_list_delete (TextLine *tl)
{
   TextLine *t;

   while (tl)
   {
      if (tl -> line)
	 free (tl -> line);

      t = (tl -> next);
      free (tl);
      tl = t;
   }
}
*/
static Label *label_new ()
{
   Label *l;

   l = (Label *) calloc (1, sizeof (Label));
   if (!l)
      err_fatal_perror ("calloc", "Label memory allocation failure");

   return l;
}

static inline void label_struct_delete (Label *l)
{
   free (l);
}

static void label_list_delete (Label *l)
{
   Label *t;

   while (l)
   {
      if (l -> name)
	 free (l -> name);

      t = (l -> next);
      label_struct_delete (l);
      l = t;
   }
}

static Instruction *instruction_new ()
{
   Instruction *i;

   i = (Instruction *) calloc (1, sizeof (Instruction));
   if (!i)
      err_fatal_perror ("calloc", "Instruction memory allocation failure");

   return i;
}

static inline void instruction_struct_delete (Instruction *i)
{
   free (i);
}
/*
static void instruction_list_delete (Instruction *i)
{
   Instruction *t;

   while (i)
   {
      if (i -> instruction)
         ram_instruction_delete (i -> instruction);

      t = (i -> next);
      free (t);
      i = t;
   }
}
*/
static ParserData *parser_data_new ()
{
   ParserData *pd;

   pd = (ParserData *) calloc (1, sizeof (ParserData));
   if (!pd)
      err_fatal_perror ("calloc", "ParserData memory allocation failure");

   return pd;
}

static inline void parser_data_struct_delete (ParserData *pd)
{
   free (pd);
}

/*	Unrequired function
static void parser_data_delete (ParserData *pd)
{
   if (pd -> program)
      ram_program_delete (pd -> program);
   
   if (pd -> labels)
      label_list_delete (pd -> labels);

   if (pd -> instructions)
      instruction_list_delete (pd -> instructions);

   if (pd -> text_start)
      text_line_list_delete (pd -> text_start);

   parser_data_struct_delete (pd);
}
*/

/* ========	Parser functions	======== */

// This function is an advanced version of strtok.
// s will point to the next token (or '\0' if it was the last token in
// the string).  The token is returned, or NULL if none tokens left.
// If (*delimiter) differs from '\0', it will be used as previous
// delimiter, and next delimiter is saved to (*delimiter), so after the
// last `get_token' call the string will be restored.  So, delimiter
// is pointer to character variable, and it should be set to '\0' for
// the first call.
static char *get_token (char **s, const char *delimiters, char *delimiter)
{
   char *p;
   
   p = *s;
   
   if (!p)
      return NULL;
   
   if (*delimiter)
      *(p - 1) = *delimiter;

   // Skip delemiters before the token
   while (*p && strchr (delimiters, *p))
      p ++;

   if (*p)
   {
      char *t = p;
      
      while (*t && !strchr (delimiters, *t))
	 t ++;

      (*delimiter) = *t;
      if (*t)
      {
         *t = '\0';
         (*s) = t + 1;
      }
      else
	 (*s) = NULL;
   }
   else
   {
      (*s) = NULL;
      return NULL;
   }

   return p;
}

static void parse_error (unsigned int line, const char *error)
{
   err_error ("line %d: %s", line, error);
}

static inline void parse_err_with_string
		(unsigned int line, const char *error, const char *param)
{
   err_error ("line %d: %s %s", line, error, param);
}

static RAM_InstructionType get_instruction (const char *s)
{
   InstructionName *in;

   in = instruction_names;
   while (in -> name)
   {
      if (! strcasecmp ((in -> name), s))
	 return (in -> instruction);

      in ++;
   }

   return RAM_NONE;
}

// 'allowed' is a bit set indicating what kind of args are allowed.
// Return value is zero if parse_argument failed.
static int parse_argument (char *argument, Instruction *i, int allowed)
{
   int brackets = 0;
   char old;
   char *end;
  
   if (! argument)
      return 0;
   
   while (*argument == '[')
   {
      brackets ++;
      do
	 argument ++;
      while (isspace (*argument));
   }

   end = argument;
   while (*end == '-' || *end == '_' || isalnum (*end))
      end ++;
   old = *end;
   (*end) = '\0';

   if (brackets &&
	(allowed & (INDIRECT_POINTER_PARAMETER | POINTER_PARAMETER)))
   {
      mpz_init (i -> instruction -> parameter);
      if (gmp_sscanf
	(argument, "%Zd", &(i -> instruction -> parameter)) != 1)
      {
         mpz_clear (i -> instruction -> parameter);
	 goto error;
      }
   }
  
   if (brackets == 0 && (allowed & INSTRUCTION_PARAMETER))
   {
      mpz_init (i -> instruction -> parameter);
      if (gmp_sscanf
	(argument, "%Zd", &(i -> instruction -> parameter)) !=
		      1)
      {
         mpz_clear (i -> instruction -> parameter);
         (i -> reference) = strdup (argument);
      }
      else if (mpz_sgn (i -> instruction -> parameter) <= 0)
      {
         mpz_clear (i -> instruction -> parameter);
         goto error;
      }
      
      (i -> instruction -> parameter_type) = RAM_INSTRUCTION;
   }
   else if (brackets == 2 && (allowed & INDIRECT_POINTER_PARAMETER))
      (i -> instruction -> parameter_type) = RAM_INDIRECT_POINTER;
   else if (brackets == 1 && (allowed & POINTER_PARAMETER))
      (i -> instruction -> parameter_type) = RAM_POINTER;
   else if (brackets == 0 && (allowed & CONSTANT_PARAMETER))
   {
      (i -> instruction -> parameter_type) = RAM_CONSTANT;
      if (mpz_init_set_str ((i -> instruction -> parameter),
		      argument, 10))
	 goto error;
   }
   else
      return 0;

   (*end) = old;
   while (brackets)
   {
      if (!(*end))
	 return 0;

      if ((*end) == ']')
	 brackets --;

      end ++;
   }
   
   return 1;
   
error:
   (*end) = old;
   return 0;
}

// Parse single instruction.  Return `true' if it was read sucessfully.
// Sets line to be refered as instruction's text line if there actualy
// presents an insruction.
// (*line) points to the result of the get_token call, delimiter is
// the previous delimiter, token should be the last get_token call result.
// `line_ref' should point to the current line.
static int parse_instruction (ParserData *pd, char *token,
				char **line, char *delimiter,
				char *line_ref)
{
   RAM_InstructionType type;

   (pd -> instruction) ++;
   
   type = get_instruction (token);
   if (type == RAM_NONE)
   {
      Label *l;

      l = label_new ();
      (l -> operator) = (pd -> instruction);
      (l -> name) = strdup (token);

      (l -> next) = (pd -> labels);
      (pd -> labels) = l;

      token = get_token (line, INPUT_DELIMITERS, delimiter);
      if (!token)
      {
         parse_error ((pd -> line_no), "no instruction found <1>");
	 return 0;
      }
      type = get_instruction (token);
      if (type == RAM_NONE)
      {
	 parse_error ((pd -> line_no), "no instruction found <2>");
	 return 0;
      }
   }
   {
      Instruction *i;
      int parse_result = 1;

      i = instruction_new ();
      (i -> line) = (pd -> line_no);
      (i -> next) = (pd -> instructions);
      (pd -> instructions) = i;

      (i -> instruction) = ram_instruction_new ();
      (i -> instruction -> instruction) = type;
      
      token = get_token (line, INPUT_DELIMITERS, delimiter);
      while (get_token (line, "\n", delimiter)); // And the rest of the line

      switch (type)
      {
      case RAM_LOAD:
      case RAM_ADD:
         parse_result = parse_argument (token, i, CONSTANT_PARAMETER |
			    POINTER_PARAMETER |
			    INDIRECT_POINTER_PARAMETER);
	 break;
      case RAM_STORE:
	 parse_result = parse_argument (token, i, POINTER_PARAMETER |
			    INDIRECT_POINTER_PARAMETER);
	 break;
      case RAM_JUMP:
      case RAM_JGTZ:
	 parse_result = parse_argument (token, i, INSTRUCTION_PARAMETER);
	 break;
      default:
	 break;
      }

      if (! parse_result)
      {
         parse_error ((pd -> line_no), "error in argument");
	 return 0;
      }

      (i -> text_reference) = line_ref;
   }

   return 1;
}

// `instruction_line' can be NULL.
// Returns `true' if there were no errors.
static int format_instructions (ParserData *pd, int *instruction_line)
{
   int ok = 1;

   (pd -> program -> instructions) = (RAM_Instruction **) calloc
	   		((pd -> instruction), sizeof (RAM_Instruction *));
   if (!(pd -> program -> instructions))
       err_fatal_perror ("calloc",
		  "(program -> instructions) memory allocation failure");

   {
      RAM_Instruction **i;
      unsigned int instruction = (pd -> instruction) - 1;

      i = (pd -> program -> instructions) + instruction;
      while (pd -> instructions)
      {	      
	 (*i) = (pd -> instructions -> instruction);
	 (pd -> instructions -> instruction) = NULL;

	 if (instruction_line)
	    instruction_line [instruction] =
		    (pd -> instructions -> line) - 1;
	 
	 if (pd -> instructions -> reference)
	 {
	    Label *l;

	    // TODO: inefficient label lookup here.
	    l = (pd -> labels);
	    while (l)
	    {
	       if (! strcmp ((pd -> instructions -> reference), (l -> name)))
	       {
		  mpz_init_set_ui (((*i) -> parameter),
				  (l -> operator));
		  break;
	       }

	       l = (l -> next);
	    }

	    if (! l)
	    {
	       parse_err_with_string
		   ((pd -> instructions -> line), "label `%s' not found",
		    (pd -> instructions -> reference));
	       ok = 0;
	       break;
	    }
	 }

	 i --;
	 {
            Instruction *next;

	    next = (pd -> instructions -> next);
	    instruction_struct_delete (pd -> instructions);
	    (pd -> instructions) = next;
	 }
	 instruction --;
      }

      (pd -> program -> n) = (pd -> instruction);
   }

   return ok;
}

// If `text' is non-NULL, this argument will be considered as pointer to
// allocated RAM_Text struct, and will be tried to set to correct program text.
RAM_Program *ram_program_parse (FILE *f, RAM_Text *text)
{
   ParserData *pd;
   char *line = NULL;
  
   int ok = 1;

   pd = parser_data_new ();
   
   (pd -> program) = ram_program_new ();

   while ((line = read_line_and_return_it (f)))
   {
      char *token, *s;
      char delimiter = '\0';

      if (text)
      {
	 TextLine *tl = text_line_new_by (line);
	 
	 if (pd -> text_end)
	 {
	    (pd -> text_end -> next) = tl;
	    (pd -> text_end) = (pd -> text_end -> next);
	 }
	 else
            (pd -> text_start) = (pd -> text_end) = tl;
      }
      (pd -> line_no) ++;

      s = line;
      token = get_token (&s, " \n\t:", &delimiter);

      if (token)
	 if (strncmp (token, "//", 2))
            ok = ok && parse_instruction (pd, token, &s, &delimiter, line);

      if (! text)
      {
         free (line);
         line = NULL;
      }
      else
      {
	 int length;
	 
	 get_token (&s, "", &delimiter);

	 length = strlen (line);
	 if (length)	// We cannot get empty string, though
	    *(line + length - 1) = '\0'; // Remove '\n' at the end
      }
   }

   if (text)
   {
      (text -> text) = (char **) calloc ((pd -> line_no),  sizeof (char *));
      (text -> text_size) = (pd -> line_no);
      if (!text)
	 err_fatal_perror ("calloc",
			 "(text -> text) memory allocation failure");

      (text -> instruction_line) = (int *) malloc ((pd -> instruction) *
					      sizeof (int));
      if (!(text -> instruction_line))
	 err_fatal_perror ("malloc",
		"(text -> instruction_line) memory allocation failure");

      {
         TextLine *tl = (pd -> text_start);
	 char **t = (text -> text);

	 while (tl != NULL)
	 {
	     TextLine *tl_next = (tl -> next);
	     
             *t = (tl -> line);
	     text_line_struct_delete (tl);

	     tl = tl_next;
	     t ++;
	 }
      }
   }

   ok = ok && 
	   format_instructions (pd, text ? (text -> instruction_line) : NULL);

   label_list_delete (pd -> labels);

   if (! ok)
   {
      ram_program_delete (pd -> program);
      (pd -> program) = NULL;
   }
   
   {
      RAM_Program *program;

      program = (pd -> program);
      (pd -> program) = NULL;
      
      parser_data_struct_delete (pd);

      return program;
   }
}

