/* Copyright (C) 1999, 2000, 2001 Simon Patarin, INRIA

This file is part of Pandora, the Flexible Monitoring Platform.

Pandora is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

Pandora is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Pandora; see the file COPYING.  If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.  */

#include <libpandora/global.h> 

extern "C" {
#include <ctype.h>
}

#include <iostream>
#include "httpscancomponent.h"
#include <libpandora/http_date.h>
#include <libpandora/encoding.h>
#include <pandora_components/httppacket.h>
#include <pandora_components/tcppacket.h>
#include <pandora_components/ippacket.h>

component_export(HTTPScanComponent, TCPPacket, HTTPPacket);

Mutex HTTPScanComponent::mx;

static void get_date(HTTPScanComponent *, void *, const char *);
static void get_text(HTTPScanComponent *, void *, const char *);
static void get_integer(HTTPScanComponent *, void *, const char *);
static void get_pragma(HTTPScanComponent *, void *, const char *);
static void get_mime(HTTPScanComponent *, void *, const char *);
static void get_enc(HTTPScanComponent *, void *, const char *);
static void get_cctrl(HTTPScanComponent *, void *, const char *);
static void get_inc(HTTPScanComponent *, void *, const char *);

#if NO_FLEX
#include "header_action.cc"
#endif

bool HTTPScanComponent::add(Packet *pkt)
{
  tcpp = static_cast<TCPPacket *>(pkt);
  locatePacket(IPPacket, ipp, tcpp);
  pandora_assert(ipp != NULL);
  ipdata = &(ipp->_data);

  if (tcpp->gap) gap = true;
  tcpp->refer();

  bool eof = tcpp->eof;
  
  if (fast) {
#if 1
    if (flowType == HTTPPacket::response) {
      cleanPacket(tcpp);
      cleanPacket(msg);
      return eof;
    }
#endif
  }
  
  SEQ_GO(0);
  
  if (tcpp->length == 0) goto finished;
  if (eof && (tcpp->length == 1)) goto finished;

  while (keepGoing()) {
    int off = -1;
    //pandora_debug(data << ", " << (msg != NULL ? msg->cl : -2));
    if ((msg == NULL) || (msg->hdrOK)) {
      off = matchFlow();
      //pandora_debug("match " << off);
      if (off <= 0) goto finished;
      else SEQ_MOVE(off);
    }
    
    // if we have seen the end of the previous message, a gap may have
    // prevented us from seeing the next one...
    if ( msg == NULL ) goto finished;

    if (fast) msg->hdrOK = true;
    
    if (!msg->hdrOK) parseHeaders();
  }
  
  
 finished:
  if (msg != NULL) adjustHTTPpkt();

  SEQ_GO(tcpp->length);
  if ((msg != NULL) && (msg->hdrOK)) {
    if (((msg->type == HTTPPacket::request)  
	 && (msg->method != HTTPPacket::POST))
	|| ((msg->cl >= 0)  && (msg->cl == ACT_MSG_LEN))) {
      //pandora_debug("finish1 " << actSeq);
      endHTTPpkt(!eof);
    }
  }
  
  if (eof) {
    //pandora_debug("finish2 " << actSeq);
    endHTTPpkt(false);
  }
  //pandora_debug("end add");

  tcpp->release();
  cleanPacket(tcpp);
  
  return false;
  //return eof;
}


void HTTPScanComponent::cleanup(void) 
{
  if (msg != NULL) endHTTPpkt(false);
  pandora_assert(msg == NULL);
  hdrbuf.reset(); reqbuf.reset(); respbuf.reset();
  flowType = HTTPPacket::unknown;
  actSeq = actLength = 0;
  gap = false;
}


void HTTPScanComponent::parseHeaders(void)
{
  pandora_assert( msg != NULL );

  unmatched = 0;
  char *ptr = NULL;
  char *_ptr = (char *)(ipdata->getData() + data);
  int roff = 0;
  int len = ipdata->getLength() - data;
  
  if (hdrbuf.isEmpty()) {
    ptr = _ptr;
  } else {
    roff = hdrbuf.restore(&ptr, _ptr, len);
    len += roff;
    SEQ_MOVE(-roff);
    pandora_assert( ptr != NULL );
  }
  
#if NO_FLEX
  msg->hdrOK = doParse(ptr, len);
#else
  mx.lock();
  init_header_buffer(ptr, len);
  msg->hdrOK = headerlex(this); 
  release_header_buffer();
  mx.unlock();
#endif
  
  SEQ_UPDATE;
  
  if (unmatched > 0) {
    pandora_assert (data >= unmatched);
    hdrbuf.save((char *)(ipdata->getData() + unmatched), 
		data - unmatched);
  }
  
  if (msg->hdrOK) endHeader();
  
  hdrbuf.clean();
}

int HTTPScanComponent::matchRequest(void)
{
  int off = -1, roff = 0;
  HTTPPacket::http_method_t method = HTTPPacket::UNDEF;
  char *endurl;
  int skip = -1;
  char *_ptr = (char *)(ipdata->getData() + data);
  int len = ipdata->getLength() - data;
  char *ptr = NULL;
  int version = 0;

  if (reqbuf.isEmpty()) {
    ptr = _ptr;
  } else {
    roff = reqbuf.restore(&ptr, _ptr, len);
    len += roff;
    pandora_assert( ptr != NULL );
  }

  // the following line is an ugly fix for a nasty bug occuring when requests
  // are pipelined because flex puts a \0 after the last character matched.
  if (ptr[0] == '\0') ptr[0] = ((ptr[2] == 'A') ? 'H' : 'G');

  if (len < 4) {
    //CERR(LOG_DEBUG) << "req  save (" << len << "): " << *tcpp;
    //reqbuf.save(ptr, len);
    return -1;
  }
  
  switch (ptr[0]) {
  case 'G':
    if ((skip = match(ptr, len, "GET ", 4, &reqbuf)) > 0) {
      method = HTTPPacket::GET;
      goto finished;
    }
    break;
    
  case 'P':
    if ((skip = match(ptr, len, "POST ", 5, &reqbuf)) > 0) {
      method = HTTPPacket::POST;
      goto finished;
    }
    
    if ((skip = match(ptr, len, "PUT ", 4, &reqbuf)) > 0) {
      method = HTTPPacket::PUT;
      goto finished;
    }
    break;

  case 'H':
    if ((skip = match(ptr, len, "HEAD ", 5, &reqbuf)) > 0) {
      method = HTTPPacket::HEAD;
      goto finished;
    }
    break;
    
  case 'T':
    if ((skip = match(ptr, len, "TRACE ", 6, &reqbuf)) > 0) {
      method = HTTPPacket::TRACE;
      goto finished;
    }
    break;

  case 'D':
    if ((skip = match(ptr, len, "DELETE ", 7, &reqbuf)) > 0) {
      method = HTTPPacket::DELETE;
      goto finished;
    }
    break;
    
  default:
    goto clean;
  }
  
 finished:
  if (skip > 0) {
    while (isspace(*(ptr + skip))) {
      ++skip;
      if (skip == len) break;
    }
    char *endurl = (char *) memchr(ptr + skip, ' ', len - skip);
    if (endurl == NULL) {
      //pandora_debug("cannot find end of URL");
#ifdef PERSIST
      reqbuf.save(ptr, len);
      goto clean;
#else
      return -1;
#endif
    }
    char *endline = (char *) memchr(endurl, '\n', len - (endurl - ptr));
    if (endline != NULL) {
      version = parseVersion((char *) memchr(endurl, '/', 
					     len - (endurl - ptr)) + 1);
      off = (endline - ptr) + 1 - roff;
    } else {
      //pandora_debug("cannot find end of line");
#ifdef PERSIST
      reqbuf.save(ptr, len);
      goto clean;
#else
      return -1;
#endif
    }
#ifdef PERSIST
    actSeq -= roff;
    newHTTPpkt();
    actSeq += roff;
#else
    newHTTPpkt();    
#endif
    msg->method = method;
    msg->type = HTTPPacket::request;
    msg->version = version;
#if 0    
    int urllen = (endurl - ptr) - skip;
    msg->url.data = (char *)xmalloc((1+urllen)*sizeof(char));
    strncpy(msg->url.data, ptr+skip, urllen);
    msg->url.data[urllen] = '\0';
    msg->url.len = urllen;
#else
    //pandora_debug(strlen(ptr+skip) << " / " << (endurl - ptr) - skip);
    msg->url.init(ptr+skip, (endurl - ptr) - skip);
    //pandora_debug("url: " << msg->url);
    //pandora_debug("passed");
#endif
  } 

 clean:
#ifdef PERSIST
  reqbuf.clean();
#endif
  //pandora_debug("match request return: " << off);
  return off;
}

int HTTPScanComponent::matchResponse(void)
{
  int off = -1, roff = 0;
  char *_ptr = (char *)(ipdata->getData() + data);
  int len = ipdata->getLength() - data;
  char *ptr = NULL;

  pandora_assert (len >= 0);
  if (len > 65535) {
    pandora_warning("big len, skipping");
    return -1;
  }
  
  if (respbuf.isEmpty()) {
    ptr = _ptr;
  } else {
    //pandora_debug("restore " << tcpp);
    roff = respbuf.restore(&ptr, _ptr, len);
    len += roff;
    //pandora_debug(roff);
    pandora_assert( ptr != NULL );
  }
  
  if (len < 7) {
    //CERR(LOG_DEBUG) << "resp save(" << len << "): " << *tcpp;
    //respbuf.save(ptr, len);
    return -1;
  }
  
  // why ptr == NULL sometimes?...
  if (ptr == NULL) return -1;
  if (strncmp(ptr, "HTTP/1.", 7) == 0) {
    char *ptr2 = (char *) memchr(ptr+7, '\n', len - 7);
    
    if (ptr2 !=NULL) {
      off = (ptr2-ptr)+1-roff;
    } else {
      respbuf.save(ptr, len);
      return -1;
    }

    int version = parseVersion(ptr+5); // 5 == strlen("HTTP/");
    u_short code = atoi(ptr+9); // 9 == strlen("HTTP/x.y ");
    if ((code > 999) || (code < 100)) goto finished;
    actSeq -= roff;
    newHTTPpkt();
    actSeq += roff;
    msg->type = HTTPPacket::response;
    msg->code = code;
    msg->version = version;
  }

 finished:
  respbuf.clean();
  return off;
}

#if NO_FLEX
int HTTPScanComponent::doParse(char *str, int len)
{
  const char *const end = str + len;
  register char *ptr;

  bool got_attr = false;
  bool skip = false;
  bool finished = false;

  const char *beg_hdr = str;
  char *end_hdr = NULL;
  const char *beg_data = NULL;
  char *end_data = NULL;

  for (ptr = str; ptr < end; ++ptr) {
    switch (*ptr) {
    case '\n':
      if (!got_attr) {
	if (ptr - beg_hdr < 2) {
	  //pandora_debug("FINISHED at byte " << (ptr - str));
	  ++ptr;
	  finished = true;
	  goto exit;
	}
      } else {
	*end_hdr = *end_data = '\0';
	//pandora_debug('[' << beg_hdr << "] [" << beg_data << ']');
	header_action *ha = in_word_set(beg_hdr, end_hdr - beg_hdr);
	if (ha != NULL) {
	  (*ha->action)(this, ha->var, beg_data);
	}
      }

      beg_hdr = ptr + 1;
      beg_data = end_data = NULL;
      got_attr = false;      
      break;

    case ' ': case '\t': case '\r':
      break;

    case ':':
      if (!got_attr) {
	end_hdr = ptr;
	got_attr = skip = true;
	break;
      }
      // else fall through

    default:
      if (skip) {
	beg_data = ptr;
	skip = false;
      }
      end_data = ptr + 1;      
      break;
    }
  }
    
 exit:
  data += (ptr - str);
  if (!finished && beg_hdr < end) unmatched = data - (end - beg_hdr);
#if 0
  if (unmatched != 0) {
    pandora_debug("UNMATCHED = " << unmatched << " (" << end - beg_hdr << ')');
    pandora_debug("UNMATCHED DATA: " << beg_hdr);
  }
#endif
  return finished;
}

void get_date(HTTPScanComponent *comp, void *var, const char *data)
{
  time_t HTTPPacket::*v = (time_t HTTPPacket::*) var;
  (comp->msg->*v) = http_date_to_time(data);
}

void get_text(HTTPScanComponent *comp, void *var, const char *data)
{
  text HTTPPacket::*v = (text HTTPPacket::*) var;
  (comp->msg->*v).init(data);
}

void get_integer(HTTPScanComponent *comp, void *var, const char *data)
{
  int HTTPPacket::*v = (int HTTPPacket::*) var;
  (comp->msg->*v) = atoi(data);  
}

void get_pragma(HTTPScanComponent *comp, void *var, const char *data)
{
  HTTPPacket::http_pragma_t HTTPPacket::*v = 
    (HTTPPacket::http_pragma_t HTTPPacket::*) var;
  (comp->msg->*v) = (strcasecmp(data,"no-cache")==0 
		     ? HTTPPacket::no_cache 
		     : HTTPPacket::unset);
}

void get_mime(HTTPScanComponent *comp, void *var, const char *data)
{
  if (comp->MIME == NULL) return;
  int HTTPPacket::*v = (int HTTPPacket::*) var;
  (comp->msg->*v) = comp->MIME->encode(data);  
}

void get_enc(HTTPScanComponent *comp, void *var, const char *data)
{
  if (comp->ENC == NULL) return;
  int HTTPPacket::*v = (int HTTPPacket::*) var;
  (comp->msg->*v) = comp->ENC->encode(data);  
}

void get_cctrl(HTTPScanComponent *comp, void *var, const char *data)
{
}

void get_inc(HTTPScanComponent *comp, void *var, const char *data)
{
  int HTTPPacket::*v = (int HTTPPacket::*) var;
  ++(comp->msg->*v); 
}
#endif
