aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2023-04-10 22:46:01 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2023-04-10 23:45:29 +0200
commitd59edcd022a73e0ec9460d95a092f050edf411b1 (patch)
treee71cb7f642f7231c73c4227ca43e4d998d1d697a
parentAdd (sxml util). (diff)
downloadcalp-d59edcd022a73e0ec9460d95a092f050edf411b1.tar.gz
calp-d59edcd022a73e0ec9460d95a092f050edf411b1.tar.xz
Add (sxml namespaced).
-rw-r--r--doc/ref/guile.texi1
-rw-r--r--doc/ref/guile/sxml.texi97
-rw-r--r--module/sxml/namespaced.scm257
-rw-r--r--tests/test/sxml-namespaced.scm170
4 files changed, 525 insertions, 0 deletions
diff --git a/doc/ref/guile.texi b/doc/ref/guile.texi
index a6c5ebe4..970e8dee 100644
--- a/doc/ref/guile.texi
+++ b/doc/ref/guile.texi
@@ -10,6 +10,7 @@
@include guile/base64.texi
@include guile/web.texi
@include guile/vcomponent.texi
+@include guile/sxml.texi
@node Errors and Conditions
@section Errors and Conditions
diff --git a/doc/ref/guile/sxml.texi b/doc/ref/guile/sxml.texi
new file mode 100644
index 00000000..8b0246f7
--- /dev/null
+++ b/doc/ref/guile/sxml.texi
@@ -0,0 +1,97 @@
+@node sxml namespaced
+@section Namespaced SXML
+
+Namespaces is a variant to ``regular'' SXML. Difference being that
+instead of representing XML-tags as symbols, they are instead actual
+objects.
+
+For example
+@example
+`(a (b "Content"))
+@end example
+
+Would be represented as
+@example
+`(,(xml 'a)
+ (,(xml 'b)
+ "Content"))
+@end example
+
+@defun namespaced-sxml->sxml tree [namespace-prefixes='()]
+Takes a tree of namespaced-sxml, and optionally an assoc list from
+namespace symbols, to prefered prefix.
+
+Returns a sxml tree, with xmlns:<prefix>=namespace attributes
+@end defun
+
+@defun namespaced-sxml->xml tree [namespaces='()] [port='(current-output-port)]
+Serializes the namespaced sxml tree to port. @var{namespaces} should
+be an association list from namespace symbols, to prefered prefixes.
+@end defun
+
+@defun namespaced-sxml->sxml/namespaces tree [namespace-prefixes='()]
+Returns two values:
+@itemize
+@item An SXML tree (which doesn't have namespace attributes)
+@item an association list from namespace symbols, to used prefixes.
+@end itemize
+@end defun
+
+@c xml->namespcaed-sxml and sxml->namespaced-sxml don't share
+@c implementation, despite doing almost the same thing. This is since
+@c xml->namespaced-sxml directly uses the ssax parser, giving us great
+@c controll, while sxml->namespaced-sxml attempt to look at symbols.
+
+@defun xml->namespaced-sxml port-or-string
+Reads xml from port, and return a namespaced SXML tree.
+@end defun
+
+@defun sxml->namespaced-sxml tree namespaces
+Converts a ``regular'' SXML tree into a namespaced sxml tree.
+@var{namespaces} must be an association list which maps each prefix
+used in @var{tree} onto a full XML namespace.
+@end defun
+
+@defun xml tag
+@defunx xml ns tag [attrs]
+@anchor{xml-tag}
+ A single XML element, suitable to go as the car of a list to
+ create a full object.
+
+ @var{xml} is a shorthand to @code{make-xml-element}, which
+ either takes just a tag (for non-namespaced elements), or a
+ namespace, a tag, and a list of attributes.
+
+ @itemize
+ @item @var{tag} should be a symbol.
+ @item @var{ns} should be a symbol.
+ @item @var{attrs} should be a hash table.
+ @end itemize
+
+ @defun make-xml-element tagname namespace attributes
+ @end defun
+
+ @defun xml-element? x
+ @end defun
+
+ @defun xml-element-tagname el
+ @end defun
+
+ @defun xml-element-namespace el
+ @end defun
+
+ @defun xml-element-attributes el
+ @end defun
+@end defun
+
+
+@defun make-pi-element tag body
+ @defun pi-element? x
+ @end defun
+
+ @defun pi-tag pi
+ @end defun
+
+ @defun pi-body pi
+ @end defun
+@end defun
diff --git a/module/sxml/namespaced.scm b/module/sxml/namespaced.scm
new file mode 100644
index 00000000..746b98bb
--- /dev/null
+++ b/module/sxml/namespaced.scm
@@ -0,0 +1,257 @@
+(define-module (sxml namespaced)
+ :use-module (sxml ssax)
+ :use-module (sxml util)
+ :use-module (ice-9 match)
+ :use-module (srfi srfi-1)
+ :use-module (srfi srfi-9)
+ :use-module (srfi srfi-9 gnu)
+ :use-module (srfi srfi-71)
+ :use-module (srfi srfi-88)
+ :use-module (hnh util state-monad)
+ :use-module ((hnh util io) :select (->port))
+ :export (xml->namespaced-sxml
+ namespaced-sxml->xml
+ namespaced-sxml->sxml
+ namespaced-sxml->sxml/namespaces
+ sxml->namespaced-sxml
+ xml
+
+ make-xml-element
+ xml-element?
+ xml-element-tagname xml-element-namespace xml-element-attributes
+
+ make-pi-element
+ pi-element?
+ pi-tag
+ pi-body
+ ))
+
+;; XML processing instruction elements (and other things with identical syntax)
+;; For example: <?xml version="1.0" encoding="utf-8"?> would be encoded as
+;; (make-pi-element 'xml "version=\"1.0\" encoding=\"utf-8\"")
+;; tag should always be a symbol
+;; body should always be a string
+(define-record-type <pi-element>
+ (make-pi-element tag body)
+ pi-element?
+ (tag pi-tag)
+ (body pi-body))
+
+
+(define-record-type <xml-element>
+ (make-xml-element tagname namespace attributes)
+ xml-element?
+ (tagname xml-element-tagname)
+ (namespace xml-element-namespace)
+ (attributes xml-element-attributes))
+
+
+(define xml
+ (case-lambda
+ ((tag) (make-xml-element tag #f '()))
+ ((ns tag) (make-xml-element tag ns '()))
+ ((ns tag attrs) (make-xml-element tag ns attrs))))
+
+
+(define* (parser key: trim-whitespace?)
+ (ssax:make-parser
+
+ ;; DOCTYPE
+ ;; (lambda (port docname systemid internal-subset? seed)
+ ;; (format (current-error-port)
+ ;; "doctype: port=~s, docname=~s, systemid=~s, internal-subset?=~s, seed=~s~%"
+ ;; port docname systemid internal-subset? seed)
+ ;; (values #f '() '() seed))
+
+ ;; UNDECL-ROOT
+ ;; (lambda (elem-gi seed)
+ ;; (format (current-error-port) "Undecl-root: ~s~%" elem-gi)
+ ;; (values #f '() '() seed))
+
+ ;; DECL-ROOT
+ ;; (lambda (elem-gi seed)
+ ;; (format (current-error-port) "Decl-root: ~s~%" elem-gi)
+ ;; seed)
+
+ NEW-LEVEL-SEED
+ (lambda (elem-gi attributes namespaces expected-content seed)
+ (cons
+ (list
+ (match elem-gi
+ ((ns . tag) (make-xml-element tag ns attributes))
+ (tag (make-xml-element tag #f attributes))))
+ seed))
+
+ FINISH-ELEMENT
+ (lambda (elem-gi attributes namespaces parent-seed seed)
+ (match seed
+ (((self . self-children) (parent . children) . rest)
+ `((,parent (,self ,@(reverse self-children)) ,@children)
+ ,@rest))))
+
+ CHAR-DATA-HANDLER
+ (lambda (str1 str2 seed)
+ (define s
+ (if trim-whitespace?
+ (string-trim-both (string-append str1 str2))
+ (string-append str1 str2)))
+ (cond ((string-null? s) seed)
+ (else
+ (match seed
+ (((parent . children) . rest)
+ `((,parent ,(string-append str1 str2)
+ ,@children)
+ ,@rest))))))
+
+ PI
+ ((*DEFAULT* . (lambda (port pi-tag seed)
+ (let ((body (ssax:read-pi-body-as-string port)))
+ (match seed
+ (((parent . children) . rest)
+ `((,parent ,(make-pi-element pi-tag body) ,@children)
+ ,@rest)))))))
+ ))
+
+
+(define* (xml->namespaced-sxml port-or-string key: (trim-whitespace? #t))
+ (match (with-ssax-error-to-port
+ (current-error-port)
+ (lambda () ((parser trim-whitespace?: trim-whitespace?)
+ (->port port-or-string)
+ '((*TOP*)))))
+ ((('*TOP* . items))
+ `(*TOP* ,@(reverse items)))))
+
+(define (pi-element->sxml pi)
+ `(*PI* ,(pi-tag pi) ,(pi-body pi)))
+
+
+
+(define (ns-pair->attribute pair)
+ (let ((fqdn short (car+cdr pair)))
+ (list (string->symbol (format #f "xmlns:~a" short))
+ (symbol->string fqdn))))
+
+;; Takes an association list from full namespace names (as symbols), to their
+;; short forms, and returns a list containing xmlns:x-attributes suitable for
+;; splicing into scheme's "regular" sxml.
+(define (ns-alist->attributes ns)
+ (map ns-pair->attribute ns))
+
+
+
+(define (get-prefix ns)
+ (do namespaces <- (get)
+ (cond ((assq-ref namespaces ns) => return)
+ (else (do prefix = (gensym "ns")
+ (put (acons ns prefix namespaces))
+ (return prefix))))))
+
+
+(define (xml-element->sxml el)
+ (do tag <- (cond ((xml-element-namespace el)
+ => (lambda (ns)
+ (do pre <- (get-prefix ns)
+ (return
+ (string->symbol
+ (format #f "~a:~a" pre (xml-element-tagname el)))))))
+ (else (return (xml-element-tagname el))))
+ (return
+ (lambda (children)
+ (cond ((null? (xml-element-attributes el))
+ `(,tag ,@children))
+ (else
+ `(,tag (@ ,@(map (lambda (p)
+ (call-with-values (lambda () (car+cdr p)) list))
+ (xml-element-attributes el)))
+ ,@children)))))))
+
+(define (sxml->xml-element el namespaces)
+ (lambda (children)
+ (let ((tag-symb attrs
+ (match el
+ ((tag ('@ attrs ...))
+ (values tag (map (lambda (p) (apply cons p)) attrs)))
+ ((tag) (values tag '())))))
+ (let ((parts (string-split (symbol->string tag-symb) #\:)))
+ (cons (case (length parts)
+ ((1) (xml #f (string->symbol (car parts)) attrs))
+ ((2)
+ (cond ((assoc-ref namespaces (string->symbol (car parts)))
+ => (lambda (ns) (xml ns (string->symbol (cadr parts)) attrs)))
+ (else (scm-error 'missing-namespace "sxml->xml-element"
+ "Unknown namespace prefix encountered: ~s (on tag ~s)"
+ (list (car parts) (cadr parts))
+ #f))))
+ (else (scm-error 'misc-error "sxml->xml-element"
+ "Invalid QName: more than one colon ~s"
+ (list tag-symb) #f)))
+ children)))))
+
+
+(define (namespaced-sxml->sxml* tree)
+ (cond ((null? tree) (return tree))
+ ((string? tree) (return tree))
+ ((pi-element? tree) (return (pi-element->sxml tree)))
+ ((not (pair? tree)) (return tree))
+ ((car tree) symbol?
+ => (lambda (symb)
+ (case symb
+ ((*TOP*) (do children <- (sequence (map namespaced-sxml->sxml*
+ (cdr tree)))
+
+ (return (cons '*TOP* children))))
+ (else (return tree)))))
+ ((xml-element? (car tree))
+ (do proc <- (xml-element->sxml (car tree))
+ children <- (sequence (map namespaced-sxml->sxml* (cdr tree)))
+ (return (proc children))))
+
+ ;; list of xml-element?
+ (else (scm-error 'misc-error "namespaced-sxml->sxml*"
+ "Unexpected token in tree: ~s"
+ (list tree)
+ #f))))
+
+
+;; Takes a tree of namespaced-sxml, and optionally an assoc list from namespace symbols, to prefered prefix.
+;; Returns a sxml tree, with xmlns:<prefix>=namespace attributes
+(define* (namespaced-sxml->sxml tree optional: (namespace-prefixes '()))
+ (let ((tree ns ((namespaced-sxml->sxml* tree) namespace-prefixes)))
+ ((get-root-element tree)
+ (lambda (root)
+ (add-attributes root (ns-alist->attributes ns))))))
+
+(define* (namespaced-sxml->xml tree key:
+ (namespaces '())
+ (port (current-output-port)))
+ ((@ (sxml simple) sxml->xml) (namespaced-sxml->sxml tree namespaces) port))
+
+;; Takes a tree of namespaced-sxml, and optionally an assoc list from namespace symbols, to prefered prefix.
+;; Returns two values: a sxml tree without declared namespaces
+;; and a association list from namespace symbols, to used prefixes
+(define* (namespaced-sxml->sxml/namespaces tree optional: (namespace-prefixes '()))
+ ((namespaced-sxml->sxml* tree) namespace-prefixes))
+
+;; Takes an sxml tree, and an association list from prefixes to namespaces
+;; Returns a namespaced sxml tree
+(define (sxml->namespaced-sxml tree namespaces)
+ (match tree
+ (('*PI* tag body) (make-pi-element tag body))
+ (('*TOP* rest ...)
+ `(*TOP* ,@(map (lambda (r) (sxml->namespaced-sxml r namespaces))
+ rest)))
+ ((el ('@ attrs ...) rest ...)
+ ((sxml->xml-element `(,el (@ ,@attrs)) namespaces)
+ (map (lambda (el) (sxml->namespaced-sxml el namespaces))
+ rest)))
+ ((el rest ...)
+ ((sxml->xml-element `(,el) namespaces)
+ (map (lambda (el) (sxml->namespaced-sxml el namespaces))
+ rest)))
+ (atom atom)))
+
+;;; TODO read intro-comment in SSAX file
+;;; TODO Figure out how to still use (sxml match) and (sxml xpath) with these
+;;; new trees (probably rewriting to a "regular" sxml tree, and keeping
+;;; a strict mapping of namespaces)
diff --git a/tests/test/sxml-namespaced.scm b/tests/test/sxml-namespaced.scm
new file mode 100644
index 00000000..55d52798
--- /dev/null
+++ b/tests/test/sxml-namespaced.scm
@@ -0,0 +1,170 @@
+(define-module (test sxml-namespaced)
+ :use-module (srfi srfi-64)
+ :use-module (srfi srfi-64 test-error)
+ :use-module (srfi srfi-88)
+ :use-module (ice-9 match)
+ :use-module (sxml namespaced)
+ :use-module (hnh util state-monad)
+ )
+
+;;; TODO tests with attributes
+
+(define (ns x)
+ (string->symbol (format #f "http://example.com/~a" x)))
+
+(define (namespaced-symbol ns symb)
+ (string->symbol (format #f "~a:~a" ns symb)))
+
+
+
+(test-group "XML constructor utility procedure"
+ (test-equal "3 args"
+ (make-xml-element 'tagname 'namespace 'attributes)
+ (xml 'namespace 'tagname 'attributes))
+
+ (test-equal "2 args"
+ (make-xml-element 'tagname 'namespace '())
+ (xml 'namespace 'tagname))
+
+ (test-equal "1 args"
+ (make-xml-element 'tagname #f '())
+ (xml 'tagname)))
+
+
+
+(test-group "xml->namespaced-sxml"
+
+ (test-equal
+ `(*TOP* (,(xml 'tag)))
+ (xml->namespaced-sxml "<tag/>"))
+
+ (test-equal
+ `(*TOP* (,(xml 'ns1 'tag)))
+ (xml->namespaced-sxml "<tag xmlns='ns1'/>"))
+
+ (test-equal
+ `(*TOP* (,(xml 'ns2 'tag)))
+ (xml->namespaced-sxml "<x:tag xmlns='ns1' xmlns:x='ns2'/>"))
+
+ (test-equal
+ `(*TOP* (,(xml 'ns2 'tag)
+ (,(xml 'ns1 'tag))))
+ (xml->namespaced-sxml "<x:tag xmlns='ns1' xmlns:x='ns2'><tag/></x:tag>"))
+
+ (test-equal "PI are passed directly"
+ `(*TOP* ,(make-pi-element 'xml "encoding=\"utf-8\" version=\"1.0\"")
+ (,(xml 'tag)))
+ (xml->namespaced-sxml "<?xml encoding=\"utf-8\" version=\"1.0\"?><tag/>"))
+
+ (test-equal "Document with whitespace in it"
+ `(*TOP* ,(make-pi-element 'xml "")
+ (,(xml 'root)
+ " "
+ (,(xml 'a))
+ ))
+ (xml->namespaced-sxml "<?xml?><root> <a/></root>"
+ trim-whitespace?: #f))
+
+ ;; TODO is this expected? xml->sxml discards it.
+ (test-equal "Whitespace before root is kept"
+ `(*TOP* ,(make-pi-element 'xml "")
+ (,(xml 'root)))
+ (xml->namespaced-sxml "<?xml?> <root/>")))
+
+
+
+;;; NOTE that sxml->namespaced-sxml currently ignores any existing xmlns
+;;; attributes, since xml->sxml doesn't have those.
+(test-group "sxml->namespaced-sxml"
+ (test-equal "Simplest"
+ `(,(xml 'a)) (sxml->namespaced-sxml '(a) '()))
+ (test-equal "With *TOP*"
+ `(*TOP* (,(xml 'a))) (sxml->namespaced-sxml '(*TOP* (a)) '()))
+ (test-equal "Simplest with namespace"
+ `(,(xml (ns 1) 'a))
+ (sxml->namespaced-sxml '(x:a)
+ `((x . ,(ns 1)))))
+ (test-equal "With pi"
+ `(*TOP* ,(make-pi-element 'xml "test")
+ (,(xml 'a)))
+ (sxml->namespaced-sxml
+ `(*TOP*
+ (*PI* xml "test")
+ (a))
+ '()))
+ (test-error "With unknown namespace"
+ 'missing-namespace
+ (sxml->namespaced-sxml '(x:a) '())))
+
+
+
+(test-group "namespaced-sxml->*"
+
+ ;; /namespaces is the most "primitive" one
+ (test-group "/namespaces"
+ (test-group "Without namespaces"
+ (call-with-values
+ (lambda ()
+ (namespaced-sxml->sxml/namespaces
+ `(*TOP*
+ (,(xml 'a)))))
+ (lambda (tree namespaces)
+ (test-equal `(*TOP* (a)) tree)
+ (test-equal '() namespaces))))
+
+ (test-group "With namespaces"
+ (call-with-values
+ (lambda ()
+ (namespaced-sxml->sxml/namespaces
+ `(*TOP*
+ (,(xml (ns 1) 'a)
+ (,(xml (ns 2) 'a))
+ (,(xml 'a))))))
+ (lambda (tree nss)
+ (test-eqv 2 (length nss))
+ (test-equal
+ `(*TOP*
+ (,(namespaced-symbol (assoc-ref nss (ns 1)) 'a)
+ (,(namespaced-symbol (assoc-ref nss (ns 2)) 'a))
+ (a)))
+ tree))))
+
+ (test-group "*PI*"
+ (call-with-values
+ (lambda ()
+ (namespaced-sxml->sxml/namespaces
+ `(*TOP*
+ ,(make-pi-element 'xml "test")
+ (,(xml 'a)))))
+ (lambda (tree namespaces)
+ (test-equal '() namespaces)
+ (test-equal `(*TOP* (*PI* xml "test")
+ (a))
+ tree)))))
+
+ (test-group "namespaced-sxml->sxml"
+ (test-equal "Without namespaces"
+ '(*TOP* (a (@)))
+ (namespaced-sxml->sxml `(*TOP* (,(xml 'a)))))
+
+ (test-group "With namespaces"
+ (match (namespaced-sxml->sxml `(*TOP* (,(xml (ns 1) 'a))))
+ ;; (ns 1) hard coded to work with match
+ (`(*TOP* (,el (@ (,key "http://example.com/1"))))
+ (let ((el-pair (string-split (symbol->string el) #\:))
+ (key-pair (string-split (symbol->string key) #\:)))
+ (test-equal "a" (cadr el-pair))
+ (test-equal "xmlns" (car key-pair))
+ (test-equal (car el-pair) (cadr key-pair))))
+ (any
+ (test-assert (format #f "Match failed: ~s" any) #f))))))
+
+;; (namespaced-sxml->xml)
+;; Literal strings
+
+
+(test-error "Namespaces x is missing, note error"
+ 'parser-error
+ (xml->namespaced-sxml "<x:a xmlns:y=\"http://example.com/1\"><x:b/></x:a>"
+ ; `((x . ,(ns 1)))
+ ))