;;;; string.lisp --- FFI definitions for libxml-clisp

;;; Copyright (C) 2009 N. Raghavendra.  All rights reserved.
;;; 
;;; Redistribution and use in source and binary forms, with or without
;;; modification, are permitted provided that the following conditions
;;; are met:
;;; 1. Redistributions of source code must retain the above copyright
;;;    notice, this list of conditions and the following disclaimer.
;;; 2. Redistributions in binary form must reproduce the above
;;;    copyright notice, this list of conditions and the following
;;;    disclaimer in the documentation and/or other materials provided
;;;    with the distribution.
;;; 
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

;;; N. Raghavendra <raghu@retrotexts.net>
;;; 
;;; Created: 2009-07-08
;;; 
;;; $Hg: string.lisp,v ae6366215b15 2009-09-03T20:01:35+05:30 raghu $

(in-package "NET.RETROTEXTS.LIBXML-CLISP")



(defun make-xml-string (&optional address)
  "Return an XML String with address ADDRESS."
  (make-instance 'xml-string :address address))


;; FIXME: Doesn't work with UTF-16.
(defun address-to-string (address encoding &key (start 0) end)
  "Return the string encoded by the octet sequence at ADDRESS.
The string is encoded according to ENCODING.  START and END denote the
bounding indices of the octet sequence.  If END is nil, then the octet
sequence is assumed to be terminated by the zero octet."
  (if (null-address-p address)
    ""
    ;; Use `ext:letf' since `custom:*foreign-encoding*' is a
    ;; symbol-macro place.
    (ext:letf ((custom:*foreign-encoding* encoding))
      (if (and (integerp end) (< start end))
        (foreign-value (foreign-variable address
                                         (parse-c-type
                                          `(c-array-max character
                                                        ,(- end start)))))
        (address-as-foreign-string address)))))

(defun xml-string-to-string (xml-string &key (start 0) end)
  "Return the Unicode string that is represented by XML-STRING.
START and END denote the bounding indices of the octet sequence
wrapped in XML-STRING."
  (address-to-string (item-address xml-string) charset:utf-8
                     :start start :end end))

(defun write-string-from-address (address encoding
                                  &key (start 0) end (stream *xml-output*))
  "Write the string encoded by the octet sequence at ADDRESS to STREAM.
The string is encoded according to ENCODING.  START and END denote the
bounding indices of the octet sequence.  If END is nil, then the octet
sequence is assumed to be terminated by the zero octet."
  (write-string (address-to-string address encoding :start start :end end)
                stream))

(defun char-utf8-octet-length (character)
  "Return the number of octets in the UTF-8 encoding of CHARACTER."
  (let ((code-point (char-code character)))
    (cond ((< code-point 128) 1)   ;[0,    2^7)
          ((< code-point 2048) 2)  ;[2^7,  2^11)
          ((< code-point 65536) 3) ;[2^11, 2^16)
          (t 4))))                 ;[2^16, 17*2^16)

(defun utf8-octet-length (string)
  "Return the number of octets in the UTF-8 encoding of STRING."
  (loop for char across string
     sum (char-utf8-octet-length char)))

(defun string-to-xml-string (string &key (start 0) end)
  "Return an XML String representing STRING.
START and END denote the bounding character indices of STRING."
  ;; Use `ext:letf' since `custom:*foreign-encoding*' is a
  ;; symbol-macro place.
  (ext:letf ((custom:*foreign-encoding* charset:utf-8)
             (substring (subseq string start end)))
    (make-xml-string
     (foreign-address
      (allocate-deep 'character substring
                     :count (1+ (utf8-octet-length substring)))))))

(defun xml-string (object)
  "Return the XML String described by OBJECT.
OBJECT must be an XML String designator, i.e., an XML String, an
address, or a string designator."
  (etypecase object
    (xml-string object)
    (address (make-xml-string object))
    ((or string symbol character)
      (string-to-xml-string (string object)))))

(defmacro with-xml-string ((xml-string object) &body body)
  "Evaluate BODY using the XML String described by OBJECT.
OBJECT must be an XML String designator.  During the evaluation, the
variable XML-STRING is bound to the XML String described by OBJECT.
That XML String has dynamic extent, which ends when the form is
exited."
  (let ((created (gensym)))
    `(let* ((,created nil)
            (,xml-string (xml-string ,object)))
       (unwind-protect (progn (setf ,created t)
                              ,@body)
         (when ,created
           (typecase ,object
             ((and (not xml-string) (not address))
               ;; The address of XML-STRING is owned by us.
               (free-item ,xml-string))))))))

(defun replace-entities (document string)
  "Replace entity references in STRING with their values.
The document DOCUMENT provides the context for the replacement.
STRING must be an XML String designator.  Also replaces non-ASCII
characters in STRING with the corresponding character entity
references.  Returns the resulting string."
  (if (null-item-p document)
    (error 'empty-document-error :document document)
    (with-xml-string (xstring string)
      (if (null-item-p xstring)
        (error 'null-item-error :item xstring)
        (let ((result (make-xml-string ($xml-encode-entities-reentrant
                                        (item-address document)
                                        (item-address xstring)))))
          (prog1
            (xml-string-to-string result)
            (free-item result)))))))



;;; Local Variables:
;;; mode: lisp
;;; comment-column: 32
;;; End:

;;;; string.lisp ends here