From d59edcd022a73e0ec9460d95a092f050edf411b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Mon, 10 Apr 2023 22:46:01 +0200 Subject: Add (sxml namespaced). --- doc/ref/guile.texi | 1 + doc/ref/guile/sxml.texi | 97 ++++++++++++++++ module/sxml/namespaced.scm | 257 +++++++++++++++++++++++++++++++++++++++++ tests/test/sxml-namespaced.scm | 170 +++++++++++++++++++++++++++ 4 files changed, 525 insertions(+) create mode 100644 doc/ref/guile/sxml.texi create mode 100644 module/sxml/namespaced.scm create mode 100644 tests/test/sxml-namespaced.scm diff --git a/doc/ref/guile.texi b/doc/ref/guile.texi index a6c5ebe4..970e8dee 100644 --- a/doc/ref/guile.texi +++ b/doc/ref/guile.texi @@ -10,6 +10,7 @@ @include guile/base64.texi @include guile/web.texi @include guile/vcomponent.texi +@include guile/sxml.texi @node Errors and Conditions @section Errors and Conditions diff --git a/doc/ref/guile/sxml.texi b/doc/ref/guile/sxml.texi new file mode 100644 index 00000000..8b0246f7 --- /dev/null +++ b/doc/ref/guile/sxml.texi @@ -0,0 +1,97 @@ +@node sxml namespaced +@section Namespaced SXML + +Namespaces is a variant to ``regular'' SXML. Difference being that +instead of representing XML-tags as symbols, they are instead actual +objects. + +For example +@example +`(a (b "Content")) +@end example + +Would be represented as +@example +`(,(xml 'a) + (,(xml 'b) + "Content")) +@end example + +@defun namespaced-sxml->sxml tree [namespace-prefixes='()] +Takes a tree of namespaced-sxml, and optionally an assoc list from +namespace symbols, to prefered prefix. + +Returns a sxml tree, with xmlns:=namespace attributes +@end defun + +@defun namespaced-sxml->xml tree [namespaces='()] [port='(current-output-port)] +Serializes the namespaced sxml tree to port. @var{namespaces} should +be an association list from namespace symbols, to prefered prefixes. +@end defun + +@defun namespaced-sxml->sxml/namespaces tree [namespace-prefixes='()] +Returns two values: +@itemize +@item An SXML tree (which doesn't have namespace attributes) +@item an association list from namespace symbols, to used prefixes. +@end itemize +@end defun + +@c xml->namespcaed-sxml and sxml->namespaced-sxml don't share +@c implementation, despite doing almost the same thing. This is since +@c xml->namespaced-sxml directly uses the ssax parser, giving us great +@c controll, while sxml->namespaced-sxml attempt to look at symbols. + +@defun xml->namespaced-sxml port-or-string +Reads xml from port, and return a namespaced SXML tree. +@end defun + +@defun sxml->namespaced-sxml tree namespaces +Converts a ``regular'' SXML tree into a namespaced sxml tree. +@var{namespaces} must be an association list which maps each prefix +used in @var{tree} onto a full XML namespace. +@end defun + +@defun xml tag +@defunx xml ns tag [attrs] +@anchor{xml-tag} + A single XML element, suitable to go as the car of a list to + create a full object. + + @var{xml} is a shorthand to @code{make-xml-element}, which + either takes just a tag (for non-namespaced elements), or a + namespace, a tag, and a list of attributes. + + @itemize + @item @var{tag} should be a symbol. + @item @var{ns} should be a symbol. + @item @var{attrs} should be a hash table. + @end itemize + + @defun make-xml-element tagname namespace attributes + @end defun + + @defun xml-element? x + @end defun + + @defun xml-element-tagname el + @end defun + + @defun xml-element-namespace el + @end defun + + @defun xml-element-attributes el + @end defun +@end defun + + +@defun make-pi-element tag body + @defun pi-element? x + @end defun + + @defun pi-tag pi + @end defun + + @defun pi-body pi + @end defun +@end defun diff --git a/module/sxml/namespaced.scm b/module/sxml/namespaced.scm new file mode 100644 index 00000000..746b98bb --- /dev/null +++ b/module/sxml/namespaced.scm @@ -0,0 +1,257 @@ +(define-module (sxml namespaced) + :use-module (sxml ssax) + :use-module (sxml util) + :use-module (ice-9 match) + :use-module (srfi srfi-1) + :use-module (srfi srfi-9) + :use-module (srfi srfi-9 gnu) + :use-module (srfi srfi-71) + :use-module (srfi srfi-88) + :use-module (hnh util state-monad) + :use-module ((hnh util io) :select (->port)) + :export (xml->namespaced-sxml + namespaced-sxml->xml + namespaced-sxml->sxml + namespaced-sxml->sxml/namespaces + sxml->namespaced-sxml + xml + + make-xml-element + xml-element? + xml-element-tagname xml-element-namespace xml-element-attributes + + make-pi-element + pi-element? + pi-tag + pi-body + )) + +;; XML processing instruction elements (and other things with identical syntax) +;; For example: would be encoded as +;; (make-pi-element 'xml "version=\"1.0\" encoding=\"utf-8\"") +;; tag should always be a symbol +;; body should always be a string +(define-record-type + (make-pi-element tag body) + pi-element? + (tag pi-tag) + (body pi-body)) + + +(define-record-type + (make-xml-element tagname namespace attributes) + xml-element? + (tagname xml-element-tagname) + (namespace xml-element-namespace) + (attributes xml-element-attributes)) + + +(define xml + (case-lambda + ((tag) (make-xml-element tag #f '())) + ((ns tag) (make-xml-element tag ns '())) + ((ns tag attrs) (make-xml-element tag ns attrs)))) + + +(define* (parser key: trim-whitespace?) + (ssax:make-parser + + ;; DOCTYPE + ;; (lambda (port docname systemid internal-subset? seed) + ;; (format (current-error-port) + ;; "doctype: port=~s, docname=~s, systemid=~s, internal-subset?=~s, seed=~s~%" + ;; port docname systemid internal-subset? seed) + ;; (values #f '() '() seed)) + + ;; UNDECL-ROOT + ;; (lambda (elem-gi seed) + ;; (format (current-error-port) "Undecl-root: ~s~%" elem-gi) + ;; (values #f '() '() seed)) + + ;; DECL-ROOT + ;; (lambda (elem-gi seed) + ;; (format (current-error-port) "Decl-root: ~s~%" elem-gi) + ;; seed) + + NEW-LEVEL-SEED + (lambda (elem-gi attributes namespaces expected-content seed) + (cons + (list + (match elem-gi + ((ns . tag) (make-xml-element tag ns attributes)) + (tag (make-xml-element tag #f attributes)))) + seed)) + + FINISH-ELEMENT + (lambda (elem-gi attributes namespaces parent-seed seed) + (match seed + (((self . self-children) (parent . children) . rest) + `((,parent (,self ,@(reverse self-children)) ,@children) + ,@rest)))) + + CHAR-DATA-HANDLER + (lambda (str1 str2 seed) + (define s + (if trim-whitespace? + (string-trim-both (string-append str1 str2)) + (string-append str1 str2))) + (cond ((string-null? s) seed) + (else + (match seed + (((parent . children) . rest) + `((,parent ,(string-append str1 str2) + ,@children) + ,@rest)))))) + + PI + ((*DEFAULT* . (lambda (port pi-tag seed) + (let ((body (ssax:read-pi-body-as-string port))) + (match seed + (((parent . children) . rest) + `((,parent ,(make-pi-element pi-tag body) ,@children) + ,@rest))))))) + )) + + +(define* (xml->namespaced-sxml port-or-string key: (trim-whitespace? #t)) + (match (with-ssax-error-to-port + (current-error-port) + (lambda () ((parser trim-whitespace?: trim-whitespace?) + (->port port-or-string) + '((*TOP*))))) + ((('*TOP* . items)) + `(*TOP* ,@(reverse items))))) + +(define (pi-element->sxml pi) + `(*PI* ,(pi-tag pi) ,(pi-body pi))) + + + +(define (ns-pair->attribute pair) + (let ((fqdn short (car+cdr pair))) + (list (string->symbol (format #f "xmlns:~a" short)) + (symbol->string fqdn)))) + +;; Takes an association list from full namespace names (as symbols), to their +;; short forms, and returns a list containing xmlns:x-attributes suitable for +;; splicing into scheme's "regular" sxml. +(define (ns-alist->attributes ns) + (map ns-pair->attribute ns)) + + + +(define (get-prefix ns) + (do namespaces <- (get) + (cond ((assq-ref namespaces ns) => return) + (else (do prefix = (gensym "ns") + (put (acons ns prefix namespaces)) + (return prefix)))))) + + +(define (xml-element->sxml el) + (do tag <- (cond ((xml-element-namespace el) + => (lambda (ns) + (do pre <- (get-prefix ns) + (return + (string->symbol + (format #f "~a:~a" pre (xml-element-tagname el))))))) + (else (return (xml-element-tagname el)))) + (return + (lambda (children) + (cond ((null? (xml-element-attributes el)) + `(,tag ,@children)) + (else + `(,tag (@ ,@(map (lambda (p) + (call-with-values (lambda () (car+cdr p)) list)) + (xml-element-attributes el))) + ,@children))))))) + +(define (sxml->xml-element el namespaces) + (lambda (children) + (let ((tag-symb attrs + (match el + ((tag ('@ attrs ...)) + (values tag (map (lambda (p) (apply cons p)) attrs))) + ((tag) (values tag '()))))) + (let ((parts (string-split (symbol->string tag-symb) #\:))) + (cons (case (length parts) + ((1) (xml #f (string->symbol (car parts)) attrs)) + ((2) + (cond ((assoc-ref namespaces (string->symbol (car parts))) + => (lambda (ns) (xml ns (string->symbol (cadr parts)) attrs))) + (else (scm-error 'missing-namespace "sxml->xml-element" + "Unknown namespace prefix encountered: ~s (on tag ~s)" + (list (car parts) (cadr parts)) + #f)))) + (else (scm-error 'misc-error "sxml->xml-element" + "Invalid QName: more than one colon ~s" + (list tag-symb) #f))) + children))))) + + +(define (namespaced-sxml->sxml* tree) + (cond ((null? tree) (return tree)) + ((string? tree) (return tree)) + ((pi-element? tree) (return (pi-element->sxml tree))) + ((not (pair? tree)) (return tree)) + ((car tree) symbol? + => (lambda (symb) + (case symb + ((*TOP*) (do children <- (sequence (map namespaced-sxml->sxml* + (cdr tree))) + + (return (cons '*TOP* children)))) + (else (return tree))))) + ((xml-element? (car tree)) + (do proc <- (xml-element->sxml (car tree)) + children <- (sequence (map namespaced-sxml->sxml* (cdr tree))) + (return (proc children)))) + + ;; list of xml-element? + (else (scm-error 'misc-error "namespaced-sxml->sxml*" + "Unexpected token in tree: ~s" + (list tree) + #f)))) + + +;; Takes a tree of namespaced-sxml, and optionally an assoc list from namespace symbols, to prefered prefix. +;; Returns a sxml tree, with xmlns:=namespace attributes +(define* (namespaced-sxml->sxml tree optional: (namespace-prefixes '())) + (let ((tree ns ((namespaced-sxml->sxml* tree) namespace-prefixes))) + ((get-root-element tree) + (lambda (root) + (add-attributes root (ns-alist->attributes ns)))))) + +(define* (namespaced-sxml->xml tree key: + (namespaces '()) + (port (current-output-port))) + ((@ (sxml simple) sxml->xml) (namespaced-sxml->sxml tree namespaces) port)) + +;; Takes a tree of namespaced-sxml, and optionally an assoc list from namespace symbols, to prefered prefix. +;; Returns two values: a sxml tree without declared namespaces +;; and a association list from namespace symbols, to used prefixes +(define* (namespaced-sxml->sxml/namespaces tree optional: (namespace-prefixes '())) + ((namespaced-sxml->sxml* tree) namespace-prefixes)) + +;; Takes an sxml tree, and an association list from prefixes to namespaces +;; Returns a namespaced sxml tree +(define (sxml->namespaced-sxml tree namespaces) + (match tree + (('*PI* tag body) (make-pi-element tag body)) + (('*TOP* rest ...) + `(*TOP* ,@(map (lambda (r) (sxml->namespaced-sxml r namespaces)) + rest))) + ((el ('@ attrs ...) rest ...) + ((sxml->xml-element `(,el (@ ,@attrs)) namespaces) + (map (lambda (el) (sxml->namespaced-sxml el namespaces)) + rest))) + ((el rest ...) + ((sxml->xml-element `(,el) namespaces) + (map (lambda (el) (sxml->namespaced-sxml el namespaces)) + rest))) + (atom atom))) + +;;; TODO read intro-comment in SSAX file +;;; TODO Figure out how to still use (sxml match) and (sxml xpath) with these +;;; new trees (probably rewriting to a "regular" sxml tree, and keeping +;;; a strict mapping of namespaces) diff --git a/tests/test/sxml-namespaced.scm b/tests/test/sxml-namespaced.scm new file mode 100644 index 00000000..55d52798 --- /dev/null +++ b/tests/test/sxml-namespaced.scm @@ -0,0 +1,170 @@ +(define-module (test sxml-namespaced) + :use-module (srfi srfi-64) + :use-module (srfi srfi-64 test-error) + :use-module (srfi srfi-88) + :use-module (ice-9 match) + :use-module (sxml namespaced) + :use-module (hnh util state-monad) + ) + +;;; TODO tests with attributes + +(define (ns x) + (string->symbol (format #f "http://example.com/~a" x))) + +(define (namespaced-symbol ns symb) + (string->symbol (format #f "~a:~a" ns symb))) + + + +(test-group "XML constructor utility procedure" + (test-equal "3 args" + (make-xml-element 'tagname 'namespace 'attributes) + (xml 'namespace 'tagname 'attributes)) + + (test-equal "2 args" + (make-xml-element 'tagname 'namespace '()) + (xml 'namespace 'tagname)) + + (test-equal "1 args" + (make-xml-element 'tagname #f '()) + (xml 'tagname))) + + + +(test-group "xml->namespaced-sxml" + + (test-equal + `(*TOP* (,(xml 'tag))) + (xml->namespaced-sxml "")) + + (test-equal + `(*TOP* (,(xml 'ns1 'tag))) + (xml->namespaced-sxml "")) + + (test-equal + `(*TOP* (,(xml 'ns2 'tag))) + (xml->namespaced-sxml "")) + + (test-equal + `(*TOP* (,(xml 'ns2 'tag) + (,(xml 'ns1 'tag)))) + (xml->namespaced-sxml "")) + + (test-equal "PI are passed directly" + `(*TOP* ,(make-pi-element 'xml "encoding=\"utf-8\" version=\"1.0\"") + (,(xml 'tag))) + (xml->namespaced-sxml "")) + + (test-equal "Document with whitespace in it" + `(*TOP* ,(make-pi-element 'xml "") + (,(xml 'root) + " " + (,(xml 'a)) + )) + (xml->namespaced-sxml " " + trim-whitespace?: #f)) + + ;; TODO is this expected? xml->sxml discards it. + (test-equal "Whitespace before root is kept" + `(*TOP* ,(make-pi-element 'xml "") + (,(xml 'root))) + (xml->namespaced-sxml " "))) + + + +;;; NOTE that sxml->namespaced-sxml currently ignores any existing xmlns +;;; attributes, since xml->sxml doesn't have those. +(test-group "sxml->namespaced-sxml" + (test-equal "Simplest" + `(,(xml 'a)) (sxml->namespaced-sxml '(a) '())) + (test-equal "With *TOP*" + `(*TOP* (,(xml 'a))) (sxml->namespaced-sxml '(*TOP* (a)) '())) + (test-equal "Simplest with namespace" + `(,(xml (ns 1) 'a)) + (sxml->namespaced-sxml '(x:a) + `((x . ,(ns 1))))) + (test-equal "With pi" + `(*TOP* ,(make-pi-element 'xml "test") + (,(xml 'a))) + (sxml->namespaced-sxml + `(*TOP* + (*PI* xml "test") + (a)) + '())) + (test-error "With unknown namespace" + 'missing-namespace + (sxml->namespaced-sxml '(x:a) '()))) + + + +(test-group "namespaced-sxml->*" + + ;; /namespaces is the most "primitive" one + (test-group "/namespaces" + (test-group "Without namespaces" + (call-with-values + (lambda () + (namespaced-sxml->sxml/namespaces + `(*TOP* + (,(xml 'a))))) + (lambda (tree namespaces) + (test-equal `(*TOP* (a)) tree) + (test-equal '() namespaces)))) + + (test-group "With namespaces" + (call-with-values + (lambda () + (namespaced-sxml->sxml/namespaces + `(*TOP* + (,(xml (ns 1) 'a) + (,(xml (ns 2) 'a)) + (,(xml 'a)))))) + (lambda (tree nss) + (test-eqv 2 (length nss)) + (test-equal + `(*TOP* + (,(namespaced-symbol (assoc-ref nss (ns 1)) 'a) + (,(namespaced-symbol (assoc-ref nss (ns 2)) 'a)) + (a))) + tree)))) + + (test-group "*PI*" + (call-with-values + (lambda () + (namespaced-sxml->sxml/namespaces + `(*TOP* + ,(make-pi-element 'xml "test") + (,(xml 'a))))) + (lambda (tree namespaces) + (test-equal '() namespaces) + (test-equal `(*TOP* (*PI* xml "test") + (a)) + tree))))) + + (test-group "namespaced-sxml->sxml" + (test-equal "Without namespaces" + '(*TOP* (a (@))) + (namespaced-sxml->sxml `(*TOP* (,(xml 'a))))) + + (test-group "With namespaces" + (match (namespaced-sxml->sxml `(*TOP* (,(xml (ns 1) 'a)))) + ;; (ns 1) hard coded to work with match + (`(*TOP* (,el (@ (,key "http://example.com/1")))) + (let ((el-pair (string-split (symbol->string el) #\:)) + (key-pair (string-split (symbol->string key) #\:))) + (test-equal "a" (cadr el-pair)) + (test-equal "xmlns" (car key-pair)) + (test-equal (car el-pair) (cadr key-pair)))) + (any + (test-assert (format #f "Match failed: ~s" any) #f)))))) + +;; (namespaced-sxml->xml) +;; Literal strings + + +(test-error "Namespaces x is missing, note error" + 'parser-error + (xml->namespaced-sxml "" + ; `((x . ,(ns 1))) + )) -- cgit v1.2.3