;;; x001                          -*- mode: scheme; coding: iso-8859-1 -*-

;; Copyright (C) 2007, 2009-2011, 2014, 2019-2021 Thien-Thi Nguyen
;; Portions Copyright (C) 1999, 2000 Thierry Bzecourt
;;
;; This is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; This software is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program.  If not, see <https://www.gnu.org/licenses/>.

(cond-expand (guile-2.2 (use-modules (rnrs bytevectors)
                                     (ice-9 iconv)))
             (else #f))

(use-modules
 ((ice-9 regex) #:select (string-match
                          match:substring))
 ((mixp expat) #:prefix E: #:select (make-xml-encoding
                                     expat-version
                                     get-feature-list
                                     parser-create
                                     set-encoding
                                     hset!
                                     set-hash-salt
                                     set-base
                                     get-base
                                     parse
                                     stop-parser
                                     resume-parser
                                     get-parsing-status
                                     set-param-entity-parsing
                                     error-symbol
                                     get-locus
                                     get-attribute-info
                                     error-string))
 ((mixp utils) #:prefix U: #:select (parse-data
                                     utf8->latin1
                                     utf8->ucs2
                                     xml->tree)))

(define testfile
  (let ((srcdir (getenv "srcdir")))
    (lambda (filename)
      (in-vicinity srcdir filename))))

(define (fs s . args)
  (apply simple-format #f s args))

(define verbose? (equal? "1" (getenv "VERBOSE")))

(define (fso s . args)
  (and verbose? (apply simple-format #t s args)))

(define (fse s . args)
  (apply simple-format (current-error-port) s args))

(define (accumulator)
  (let* ((ls (list #f))
         (tp ls))
    (lambda stuff
      (cond ((null? stuff) (cdr ls))
            (else (set-cdr! tp stuff)
                  (set! tp (last-pair tp)))))))

(define (sixty-seven-and-eight-encoding)
  (let* ((mapping (make-vector 256 67))
         (convert (lambda (s) (trace "convert") 68))
         (release (lambda () (trace "release"))))
    (E:make-xml-encoding mapping convert release)))

(define memcheck? (equal? "1" (getenv "MEMCHECK")))

(cond (memcheck? (gc) (gc) (gc) (sleep 5)))

(define (bad-parse? symbol)
  (eq? 'XML_STATUS_ERROR symbol))

(define (err-sym-n-str parser)
  (let ((sym (E:error-symbol parser)))
    (fs "~A (~A)"
        sym (E:error-string sym))))

;; A sample XML document used in several tests
(define my-document
  "<?xml version='1.0' encoding='ISO-8859-1' standalone='yes'?>
<!DOCTYPE doc [<!ENTITY program.name 'gexpat'>
               <!NOTATION SomeNotation PUBLIC 'nota'>]>
<!-- Comment for the sample document -->
<doc> <title> A sample XML document for &program.name; </title>

<chapter name='Chapter 1' id='1'>Contents of Chapter 1.<br/> </chapter>

<![CDATA[ba l> dsq>]]>

<chapter name='Chapter 2' id='2'>Contents of Chapter 2.</chapter>

</doc>")

(define (err s . args)
  (apply fse s args)
  (newline (current-error-port)))

(define trace (if (equal? "1" (getenv "TRACE"))
                  (lambda (s . args)
                    (apply simple-format #t s args)
                    (newline))
                  noop))

(define count (make-object-property))

(define test-counter 0)
(define test-failed (accumulator))
(set! (count test-failed) 0)

(define (failed++ desc failed s . args)
  (test-failed (list test-counter desc))
  (set! (count test-failed) (1+ (count test-failed)))
  (apply err (string-append "FAIL[~S]: ~A - ~A. " s)
         test-counter desc (failed) args))

(define (good desc . failed)
  (fso "PASS[~A]: ~A~A~%"
       test-counter desc
       (if (null? failed)
           ""
           (fs " - ~A" (car failed)))))

(define-macro (check expression desc failed-msg)
  `(let ((where (fs "evaluating: ~S" ',expression))
         (thunk (lambda () ,expression))
         (failed (lambda () ,failed-msg)))
     (define (bad s . args)
       (apply failed++ ,desc failed s args))
     (set! test-counter (1+ test-counter))
     (let ((res (catch #t thunk (lambda x
                                  (if (and (not (null? x))
                                           (pair? (car x)))
                                      (caar x)
                                      x)))))
       (cond ((eq? res #t)
              (good ,desc))
             ((eq? res #f)
              (bad "Result #f from ~A" where))
             (else
              (bad "Exception `~S' received while ~A" res where))))))

(define-macro (check-exc expression exc desc failed-msg)
  `(let ((during (fs "while evaluating: ~S" ',expression))
         (thunk (lambda () ,expression))
         (failed (lambda () ,failed-msg)))
     (define (bad s . args)
       (apply failed++ ,desc failed s args))
     (set! test-counter (1+ test-counter))
     (let ((res (catch #t thunk (lambda args (car args)))))
       (cond ((memq res '(#t #f))
              (bad "Missing exception `~S' ~A" ,exc during))
             ((eq? res ,exc)
              (good ,desc (failed)))
             (else
              (bad "Received unexpected answer `~S' ~A" res during))))))

(define TESTS '())

(define-macro (test description . body)
  `(set! TESTS (append! TESTS (list (lambda () ,description ,@body)))))

(define indent "")
(define (indent-increase)
  (set! indent (string-append indent "    ")))
(define (indent-decrease)
  (set! indent (substring indent 0 (- (string-length indent) 4))))

(test "Test version info"
  (let ((v (E:expat-version)))
    (fso "(expat-version) => ~S~%" v)
    (check (and (list? v)
                (= 4 (length v))
                (call-with-values (lambda () (apply values v))
                  (lambda (major minor micro str)
                    (let ((m (string-match "([0-9]+)\\.([0-9]+)\\.([0-9]+)$"
                                           str)))
                      (define (chk num ref)
                        (= num (string->number (match:substring m ref))))
                      (and (chk major 1)
                           (chk minor 2)
                           (chk micro 3))))))
           "valid version information"
           (fs "strange expat-version return value: ~S" v))))

(test "Test features"
  (let ((features (E:get-feature-list)))
    (fso "features: ~S~%" features)
    (check (list? features)
           "features list"
           (fs "get-features-list did not return a list, instead: ~S"
               features))
    (let ((first (car features)))
      (fso "first feature: ~S~%" first)
      (check (and (pair? first)
                  (string? (car first))
                  (string=? "EXPAT_VERSION" (car first)))
             (let ((v (cdr first)))
               (fs "first element is the libexpat version (#x~A => v~A)"
                   (number->string v 16)
                   (fs "~A.~A.~A" (ash v -16)
                       (logand (ash v -8) #xff)
                       (logand v #xff))))
             (fs "bad first element: ~S" first)))))

(test "Test handlers"
  ;; create the parser, and set the handlers
  (let* ((my-parser (E:parser-create))
         (v (E:get-locus my-parser)))
    (E:hset!
     my-parser

     'element-start
     (lambda (name atts)
       (E:get-locus my-parser v)
       (trace "~A ELEMENT START '~A' - attributes ~S @~A"
              indent name atts v)
       (and (string=? "chapter" name)
            (pair? atts)
            (string=? "Chapter 1" (assoc-ref atts "name"))
            (E:stop-parser my-parser #t))
       (indent-increase))

     'element-end
     (lambda (name)
       (indent-decrease)
       (trace "~A ELEMENT END '~A'"
              indent name))

     'character-data
     (lambda (value)
       (trace "~A CHARACTER DATA '~A' "
              indent (if (equal? "\n" value)
                         "\\n"
                         value)))

     'comment
     (lambda (value)
       (trace "~A COMMENT '~A' "
              indent value))

     'cdata-section-start
     (lambda ()
       (trace "~A CDATA START "
              indent)
       (indent-increase))

     'cdata-section-end
     (lambda ()
       (indent-decrease)
       (trace "~A CDATA END "
              indent)))

    (let ((res (E:set-hash-salt my-parser 42)))
      (check res
             "set-hash-salt at the right time ok"
             "set-hash-salt at the right timem not ok"))

    ;; parse the document, and print the errors, if any
    ;; (do it in two phases, to test stop/resume)
    (let ((res-parse (E:parse my-parser my-document #t)))
      (check (not (bad-parse? res-parse))
             "parse (w/ stop)"
             (fs "Parser error: ~A"
                 (err-sym-n-str my-parser))))
    (let ((status (E:get-parsing-status my-parser)))
      (fso "(get-parsing-status) => ~S~%" status)
      (check (eq? 'XML_SUSPENDED (car status))
             (fs "parsing ~A" (car status))
             (fs "parsing ~A" (car status))))
    (let ((res (E:set-hash-salt my-parser 42)))
      (check (not res)
             "set-hash-salt at the wrong time ok"
             "set-hash-salt at the wrong timem not ok"))
    (let ((res-resume (E:resume-parser my-parser)))
      (check (not (bad-parse? res-resume))
             "resume parse"
             (fs "Parser error: ~A"
                 (err-sym-n-str my-parser))))))

(test "Test get-attribute-info"
  (let ((p (E:parser-create))
        (doc "<element a1='v1' a2='v2'>text</element>")
        (expected #2u64((9 11 13 15) (17 19 21 23)))
        (ainf #:UNSET))
    (E:hset! p
             'element-start
             (lambda (name atts)
               (set! ainf (E:get-attribute-info p))))
    (check (eq? 'XML_STATUS_OK (E:parse p doc))
           "parse ok"
           (fs "parse failed: ~A" (err-sym-n-str p)))
    (check (or (not ainf)
               (and (typed-array? ainf 'u64)
                    (equal? '((0 1) (0 3)) (array-shape ainf))
                    (equal? expected ainf)))
           (fs "got ~S" ainf)
           (fs "expected ~S but got ~S" expected ainf))))

(test "Try to parse a not well-formed document"
  (let ((bad-xml "<doc>dfssfd</do>")
        (badness 13)) ;;; -------^
    (trace "Parsing a not well-formed document with E:parse...")
    (let ((p (E:parser-create)))
      (let ((res (E:set-encoding p "ISO-8859-1")))
        (check (eq? 'XML_STATUS_OK res)
               "Able to do set-encoding"
               (fs "set-encoding failed: ~A" (err-sym-n-str p))))
      (E:hset! p 'character-data (lambda (c)
                                   '()))
      (let ((res (E:parse p bad-xml #t)))
        (check (bad-parse? res)
               "Parse bad XML"
               "Parser did not return an error on not well-formed document")
        (if (equal? res 0)
            (let ((locus (E:get-locus p)))
              (check (eqv? es 'XML_ERROR_TAG_MISMATCH)
                     "Expect an 'XML_ERROR_TAG_MISMATCH error"
                     (fs "Expecting ~A received ~A"
                         'XML_ERROR_TAG_MISMATCH
                         (err-sym-n-str p)))
              (let ((want (vector 1 badness 0 badness)))
                (check (equal? want locus)
                       "Locus of badness"
                       (fs "Should be ~S but got ~S" want locus)))))))
    (trace "Parsing a not well-formed document with U:parse-data...")
    (check-exc (call-with-input-string bad-xml U:parse-data)
               'XML_ERROR_TAG_MISMATCH
               "Expect an 'XML_ERROR_TAG_MISMATCH exception"
               "Parser did not return the expected exception on not well-formed document")))

(test "Test doctype, entities, notations"
  (let ((xml-doc "<!DOCTYPE doc [
<!NOTATION vrml PUBLIC 'VRML 2'>
<!ENTITY Antarctica SYSTEM 'http://www.antarctica.net' NDATA vrml>
<!ATTLIST World src ENTITY #REQUIRED>]>
<doc>
 <World src='Antarctica' />
</doc>")
        (my-parser (E:parser-create))
        (acc (accumulator)))

    (define (more handler)
      (lambda args
        (acc (cons handler args))))

    (E:hset! my-parser
             'doctype-decl-start   (more 'doctype-decl-start)
             'doctype-decl-end     (more 'doctype-decl-end)
             'notation-decl        (more 'notation-decl)
             'entity-decl          (more 'entity-decl))
    (let ((res-parse (E:parse my-parser xml-doc #t)))
      (check (not (bad-parse? res-parse))
             "entities"
             (fs "Parser error: ~A"
                 (err-sym-n-str my-parser)))
      (check (equal? (acc)
                     '((doctype-decl-start
                        "doc" #f #f #t)
                       (notation-decl
                        "vrml" #f #f "VRML 2")
                       (entity-decl
                        "Antarctica" #f #f #f "http://www.antarctica.net" #f "vrml")
                       (doctype-decl-end)))
             "entities"
             (fs "Handler error: composed the following user data: ~S"
                 (acc))))))

(test "Test the error string reporting"
  (let ((msg (E:error-string 'XML_ERROR_UNCLOSED_TOKEN)))
    (check (> (string-length msg) 1)
           "error string not empty"
           (fs "Returned message was `~A'" msg))))

(test "Create and print an encoding"
  (let ((encoding (sixty-seven-and-eight-encoding)))
    (trace "~S" encoding)
    #t))

(test "Use a document with characters which have a wrong encoding"
  ;; Wrong encoding in the document should raise an error.
  (let* ((doc "<?xml version='1.0' encoding='US-ASCII'
standalone='yes'?> <body>Les Franais ne sont pas ascii</body>")
         (my-parser (E:parser-create))
         (res-parse (E:parse my-parser doc #t)))
    (check (bad-parse? res-parse)
           "Expect an encoding error"
           "Error, expecting an encoding error")
    (if (bad-parse? res-parse)
        (let ((es (E:error-symbol my-parser)))
          (check (eqv? es 'XML_ERROR_INVALID_TOKEN)
                 "Ok, got an INVALID TOKEN error as expected"
                 (fs "Error: expecting ~A, got ~A"
                     'XML_ERROR_INVALID_TOKEN
                     (err-sym-n-str my-parser)))))))

';;; DISABLED.  TODO: unknown encoding handlers are not easy to test.
(test "Test parsing of unknown encoding"
  (let ((my-parser (E:parser-create "thb-encoding")))
    (E:hset! my-parser
             'unknown-encoding
             (lambda (name)
               (if (string=? name "thb-encoding")
                   (let ((encoding (sixty-seven-and-eight-encoding)))
                     (trace "Recognized encoding ~S" name)
                     encoding))))
    (let ((res-parse (E:parse my-parser my-document #t)))
      (check (not (bad-parse? res-parse))
             "Unknown encoding handler"
             (fs "Parser error: ~A"
                 (err-sym-n-str my-parser))))))

(test "Use the base"
  (let ((my-parser (E:parser-create))
        (my-base "My base"))
    (E:hset! my-parser
             'notation-decl
             (lambda (notation-name base system-id public-id)
               (trace " NOTATION: ~A=~S, ~A=~S, ~A=~S, ~A=~S "
                      'notation-name notation-name
                      'base base
                      'system-id system-id
                      'public-id public-id)
               (check (equal? my-base base)
                      "in handler"
                      (fs "expected ~S but got ~S" my-base base))))
    (let ((base (E:get-base my-parser)))
      (check (not base)
             "initially #f"
             (fs "expected ~S but got ~S" #f base)))
    (E:set-base my-parser my-base)
    (let ((base (E:get-base my-parser)))
      (check (equal? my-base base)
             "from E:get-base"
             (fs "expected ~S but got ~S" my-base base)))
    (let ((res-parse (E:parse my-parser my-document #t)))
      (check (not (bad-parse? res-parse))
             "parse w/ base"
             (fs "Parser error: ~A"
                 (err-sym-n-str my-parser))))))

(define (unicode-string . bytes)
  (cond-expand (guile-2.2 (bytevector->string
                           (u8-list->bytevector bytes)
                           "UTF-8"))
               (else (map integer->char bytes))))

(test "Test the encoding conversion functions"
  (let ((from "an ASCII string"))
    (check (equal? (U:utf8->latin1 from) from)
           "U:utf8->latin1 on ASCII"
           (fs "error with ascii string ~A != ~A~%"
               (U:utf8->latin1 from) from)))
  (let ((from (unicode-string #x42 #xc3 #xa9 #x7a #x65
                              #x63 #x6f #x75 #x72 #x74))
        (to "Bzecourt"))
    (check (equal? (U:utf8->latin1 from) to)
           "U:utf8->latin1 on Latin1"
           "error with Latin1 string~%"))
  ;; From rfc-2279
  (let ((from (map integer->char '(#x41 #xE2 #x89 #xA2 #xCE #x91 #x2E)))
        (to (map (lambda (c) (map integer->char c))
                 '((#x00 #x41) (#x22 #x62) (#x03 #x91) (#x00 #x2E)))))
    (check (equal? (U:utf8->ucs2 from) to)
           "U:utf8->ucs2"
           "error with rfc-2279 sample #1~%"))
  (let ((from (map integer->char
                   '(#xED #x95 #x9C #xEA #xB5 #xAD #xEC #x96 #xB4)))
        (to (map (lambda (c) (map integer->char c))
                 '((#xD5 #x5C) (#xAD #x6D) (#xC5 #xB4)))))
    (check (equal? (U:utf8->ucs2 from) to)
           "U:utf8->ucs2"
           "error with rfc-2279 sample #2~%"))
  (let ((from (map integer->char
                   '(#xE6 #x97 #xA5 #xE6 #x9C #xAC #xE8 #xAA #x9E)))
        (to (map (lambda (c) (map integer->char c))
                 '((#x65 #xE5) (#x67 #x2C) (#x8A #x9E)))))
    (check (equal? (U:utf8->ucs2 from) to)
           "U:utf8->ucs2"
           "error with rfc-2279 sample #3~%"))
  (check-exc (let ((from (map integer->char '(#xFF #xFF))))
               (U:utf8->ucs2 from))
             'invalid-utf8
             "U:utf8->ucs2 on invalid input"
             "An error was expected with invalid UTF-8 input (1)")
  (check-exc (U:utf8->ucs2 (map integer->char '(#xC0 #x80)))
             'invalid-utf8
             "U:utf8->ucs2 on invalid input"
             "An error was expected with invalid UTF-8 input (2)")
  (check-exc (U:utf8->ucs2 (map integer->char '(#xED #xA1 #x8C #xED #xBE #xB4)))
             'no-ucs2
             "U:utf8->ucs2 on invalid input"
             "An error was expected with invalid UTF-8 input (3)"))

(test "Test the XML to tree conversion"
  (let ((document "<foo><bar>Some text</bar><void/></foo>"))
    (let ((xml-tree (call-with-input-string document
                                            U:xml->tree))
          (expected '((element ("foo" ())
                        (element ("bar" ())
                          (character-data "Some text"))
                        (element ("void" ()))))))
      (check (equal? xml-tree expected)
             "xml->tree"
             (fs "tree built from XML different from expected tree ~A~%!=~%~A"
                 xml-tree expected))))
  (let* ((file (testfile "REC-xml-20081126.xml"))
         (xml-tree (call-with-input-file file U:xml->tree)))
    (define (nth n ls)
      (list-ref ls n))
    (check (equal? (nth 1 (nth 37 (nth 22 (nth 15 (nth 1 xml-tree)))))
                   '("div2" (("id" . "sec-predefined-ent"))))
           "check an element in the tree"
           "failed")))

(test "Test the DTD parsing feature"
  (let ((my-document
         (fs "<!DOCTYPE numbers SYSTEM ~S>
<numbers>&numbers;</numbers>" (testfile "test.dtd")))
        (my-parser (E:parser-create))
        (str ""))
    ;; Make sure sub.inc is available in cwd.
    (let* ((sub "sub.inc")
           (here (in-vicinity "." sub)))
      (or (file-exists? here)
          (copy-file (testfile sub) here)))
    (E:set-param-entity-parsing
     my-parser 'XML_PARAM_ENTITY_PARSING_ALWAYS)
    (E:hset! my-parser
             'character-data
             (lambda (value)
               (set! str (string-append str value)))
             'external-entity-ref
             (lambda (context base system-id public-id)
               (open-file system-id "r")))
    (let ((res-parse (E:parse my-parser
                              my-document
                              #t)))
      (check (not (bad-parse? res-parse))
             "parse"
             (fs "Parser error: ~A"
                 (err-sym-n-str my-parser)))
      (check (equal? str "0, 2, 4, 6, 8, 1, 3, 5, 7, 9")
             "check the entity expansion"
             "failed"))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Main program : executes test functions, and catches the errors
;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(let ((errors-list '()))
  (for-each (lambda (fn)
              (let* ((failed-count-before (count test-failed))
                     (name (procedure-documentation fn)))
                (fso "* ~A~%" name)
                (fn)
                (if (= (count test-failed) failed-count-before)
                    (fso "=> ok")
                    (fso "=> not ok: ~A tests failed"
                         (- (count test-failed)
                            failed-count-before)))
                (fso "~%~%")))
            TESTS)
  (if (zero? (count test-failed))
      (fso "All tests are successful~%")
      (begin
        (fso "The following tests failed:~%")
        (for-each (lambda (t)
                    (fso "\t~A: ~A~%" (car t) (cadr t)))
                  (test-failed))
        (exit #f))))

(cond (memcheck? (gc) (sleep 5)
                 (gc) (sleep 5)
                 (gc) (sleep 5)
                 (gc) (sleep 5)))

;;; x001 ends here
