(* Library for escaping and decoding.
 * Copyright (C) 2003-2004 Merjis Ltd. (http://www.merjis.com/)
 * Copyright (C) 1997 Daniel de Rauglaudre, INRIA
 * Copyright (C) 1998 Jean-Christophe FILLIATRE
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the Free
 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * $Id: cgi_escape.ml,v 1.1 2004/02/26 12:10:51 rwmj Exp $
 *)

(* Code from wserver.ml, (C) 1997 Daniel de Rauglaudre, INRIA. *)

let hexa_digit x =
  if x >= 10 then Char.chr (Char.code 'A' + x - 10)
  else Char.chr (Char.code '0' + x)

let hexa_val conf =
  match conf with
    | '0'..'9' -> Char.code conf - Char.code '0'
    | 'a'..'f' -> Char.code conf - Char.code 'a' + 10
    | 'A'..'F' -> Char.code conf - Char.code 'A' + 10
    | _ -> 0

let raw_decode s =
  let rec need_decode i =
    if i < String.length s then
      match s.[i] with
	| '%' | '+' -> true
	| _ -> need_decode (succ i)
    else false
  in
  let rec compute_len i i1 =
    if i < String.length s then
      let i =
        match s.[i] with
          | '%' when i + 2 < String.length s -> i + 3
          | _ -> succ i
      in
	compute_len i (succ i1)
    else i1
  in
  let rec copy_decode_in s1 i i1 =
    if i < String.length s then
      let i =
        match s.[i] with
          | '%' when i + 2 < String.length s ->
              let v = hexa_val s.[i + 1] * 16 + hexa_val s.[i + 2] in
		s1.[i1] <- Char.chr v; i + 3
          | '+' -> s1.[i1] <- ' '; succ i
          | x -> s1.[i1] <- x; succ i
      in
	copy_decode_in s1 i (succ i1)
    else s1
  in
  if need_decode 0 then
    let len = compute_len 0 0 in
    let s1 = String.create len in
    copy_decode_in s1 0 0
  else 
    s

let decode s =
  let rs = raw_decode s in
  let rec strip_heading_and_trailing_spaces s =
    if String.length s > 0 then
      if s.[0] == ' ' then
        strip_heading_and_trailing_spaces
          (String.sub s 1 (String.length s - 1))
      else if s.[String.length s - 1] == ' ' then
        strip_heading_and_trailing_spaces
          (String.sub s 0 (String.length s - 1))
      else 
	s
    else 
      s
  in
  strip_heading_and_trailing_spaces rs

(* special characters must be encoded. According to RFC 1738 they are: *)

let special = function 
  | '\000'..'\031' | '\127'..'\255'                      (* non US ASCII *)
  | '<' | '>' | '"' | '#' | '%'                          (* space should be here, but its encoding uses only one char *)
  | '{' | '}' | '|' | '\\' | '^' | '~' | '[' | ']' | '`' (* unsafe *)
  | ';' | '/' | '?' | ':' | '@' | '=' | '&'              (* reserved *)
      -> true
  | '+' -> true
  | _ -> false
      
(* '"' *)
      
let encode s =
  let rec need_code i =
    if i < String.length s then
      match s.[i] with
        | ' ' -> true
	| x -> if special x then true else need_code (succ i)
    else false
  in
  let rec compute_len i i1 =
    if i < String.length s then
      let i1 = if special s.[i] then i1 + 3 else succ i1 in
	compute_len (succ i) i1
    else i1
  in
  let rec copy_code_in s1 i i1 =
    if i < String.length s then
      let i1 =
        match s.[i] with
          | ' ' -> s1.[i1] <- '+'; succ i1
          | c ->
              if special c then
		begin
                  s1.[i1] <- '%';
                  s1.[i1 + 1] <- hexa_digit (Char.code c / 16);
                  s1.[i1 + 2] <- hexa_digit (Char.code c mod 16);
                  i1 + 3
		end
              else begin s1.[i1] <- c; succ i1 end
      in
      copy_code_in s1 (succ i) i1
    else 
      s1
  in
  if need_code 0 then
    let len = compute_len 0 0 in copy_code_in (String.create len) 0 0
  else 
    s

(* Types of escaping performed by the escape function. *)
type escape_t = EscapeNone | EscapeUrl | EscapeHtml | EscapeHtmlTag
              | EscapeHtmlTextarea

let amp_re = Pcre.regexp "&"
let lt_re = Pcre.regexp "<"
let gt_re = Pcre.regexp ">"
let quot_re = Pcre.regexp "\""
let eol_re = Pcre.regexp "\r?\n"

let escape escape_type str =
  let replace rex templ = Pcre.replace ~rex ~templ in

  match escape_type with
    EscapeNone -> str
  | EscapeUrl -> encode str
  | EscapeHtml ->
      let str = replace amp_re "&amp;" str in
      let str = replace lt_re "&lt;" str in
      let str = replace gt_re "&gt;" str in
      let str = replace quot_re "&quot;" str in
      let str = replace eol_re "<br/>" str in
      str
  | EscapeHtmlTag ->
      let str = replace amp_re "&amp;" str in
      let str = replace lt_re "&lt;" str in
      let str = replace gt_re "&gt;" str in
      let str = replace quot_re "&quot;" str in
      let str = replace eol_re "&#13;&#10;" str in
      str
  | EscapeHtmlTextarea ->
      let str = replace amp_re "&amp;" str in
      let str = replace lt_re "&lt;" str in
      let str = replace gt_re "&gt;" str in
      let str = replace quot_re "&quot;" str in
      str

let escape_url = escape EscapeUrl
let escape_html = escape EscapeHtml
let escape_html_tag = escape EscapeHtmlTag
let escape_html_textarea = escape EscapeHtmlTextarea
