aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2023-12-09 00:14:34 +0100
committerHugo Hörnquist <hugo@lysator.liu.se>2023-12-09 00:59:16 +0100
commitc6939d3bc658347f4ee1b1b687f91e9428a59b6d (patch)
tree512fc9382abe107540f9ab4a52872ca83aef43f4
parentRename vcomponent in create module to vcomponent-create. (diff)
downloadcalp-c6939d3bc658347f4ee1b1b687f91e9428a59b6d.tar.gz
calp-c6939d3bc658347f4ee1b1b687f91e9428a59b6d.tar.xz
Rework namespaced sxml type. Still incomplete.
-rw-r--r--module/hnh/util/table.scm14
-rw-r--r--module/sxml/namespaced.scm304
-rw-r--r--tests/unit/sxml/sxml-namespaced.scm108
3 files changed, 277 insertions, 149 deletions
diff --git a/module/hnh/util/table.scm b/module/hnh/util/table.scm
index 67125c8d..94d3c110 100644
--- a/module/hnh/util/table.scm
+++ b/module/hnh/util/table.scm
@@ -16,6 +16,8 @@
(tree-remove . table-remove)
(tree->list . table->list)
(tree? . table?)
+ (tree-empty? . table-empty?)
+ (tree-equal? . table-equal?)
(serialize-tree . serialize-table)
(alist->tree . alist->table)))
@@ -55,6 +57,14 @@
(or (tree-node? x)
(tree-terminal? x)))
+(define (tree-empty? x)
+ (tree-terminal? x))
+
+(define (tree-equal? a b)
+ (or (and (tree-terminal? a) (tree-terminal? b))
+ (tree-equal? (left a) (left b))
+ (tree-equal? (right a) (right b))))
+
;;; A lens
;;; This function (tree-focus)
;;; returns a function (f),
@@ -70,11 +80,11 @@
(cond ((tree-terminal? tree) ;; new node
(tree-node key: k value: (op 'not-a-value)))
((eq? k (key tree)) ;; this node
- (value tree (op (value truee))))
+ (value tree (op (value tree))))
(else
(if (symbol<? k (key tree))
(lens-compose left* (tree-focus (left tree) k))
- (lens-compose right* (tree-focus (right tree k))))))))
+ (lens-compose right* (tree-focus (right tree) k)))))))
(define (tree-put tree k v)
(cond ((tree-terminal? tree) (tree-node key: k value: v))
diff --git a/module/sxml/namespaced.scm b/module/sxml/namespaced.scm
index 9fbcdf08..1c2eb322 100644
--- a/module/sxml/namespaced.scm
+++ b/module/sxml/namespaced.scm
@@ -2,18 +2,23 @@
:use-module (sxml ssax)
:use-module (sxml util)
:use-module (ice-9 match)
+ :use-module (ice-9 pretty-print)
:use-module (srfi srfi-1)
:use-module (srfi srfi-71)
:use-module (srfi srfi-88)
+ :use-module (hnh util)
:use-module (hnh util state-monad)
:use-module ((hnh util io) :select (->port))
:use-module (hnh util object)
:use-module (hnh util type)
+ :use-module (hnh util table)
+ :use-module ((hnh util lens) :select (car* cdr* modify lens-compose))
:export (xml->namespaced-sxml
namespaced-sxml->xml
namespaced-sxml->sxml
namespaced-sxml->sxml/namespaces
sxml->namespaced-sxml
+
xml
attribute
@@ -22,6 +27,11 @@
xml-element-tagname
xml-element-namespace
xml-element-attributes
+ xml-element-children
+
+ xml-document
+ xml-document-root
+ xml-document-pis
pi-element
pi-element?
@@ -29,6 +39,14 @@
pi-body
))
+(define (car+cadr p) (values (car p) (cadr p)))
+(define (2list->pair l)
+ (call-with-values (lambda () (car+cadr l)) cons))
+
+(define (attributes->table attributes)
+ (alist->table (map 2list->pair attributes)))
+
+
;; XML processing instruction elements (and other things with identical syntax)
;; For example: <?xml version="1.0" encoding="utf-8"?> would be encoded as
;; (make-pi-element 'xml "version=\"1.0\" encoding=\"utf-8\"")
@@ -42,94 +60,151 @@
(pi-tag type: symbol?)
(pi-body type: string?))
-(define-type (xml-element)
+
+(define (serialize-xml-element el)
+ `(xml-element tag: ,(xml-element-tagname el)
+ ns: ,(xml-element-namespace el)
+ attributes: ,(serialize-table (xml-element-attributes el))
+ children: (list ,@(map (lambda (e)
+ (cond ((xml-element? e)
+ (serialize-xml-element e))
+ (else e)))
+ (xml-element-children el)))))
+
+(define-type (xml-element printer: (lambda (r p)
+ (pretty-print (serialize-xml-element r)
+ p)))
(xml-element-tagname type: symbol?
keyword: tag)
(xml-element-namespace type: (or false? symbol?)
default: #f
keyword: ns)
- (xml-element-attributes type: (list-of (pair-of symbol? string?))
- default: '()
- keyword: attributes))
+ (xml-element-attributes type: table?
+ ;; (table-of symbol? string?)
+ ;; (table-of symbol? any?)
+ default: (table)
+ keyword: attributes)
+ (xml-element-children type: (list-of (or string?
+ xml-element?
+ pi-element?))
+ keyword: children
+ default: '()))
+
+
+(define-type (xml-document)
+ (xml-document-root type: xml-element?
+ keyword: root)
+ (xml-document-pis type: (list-of pi-element?)
+ keyword: pi
+ default: '()))
+
(define xml
(case-lambda
- ((tag) (xml-element tag: tag))
- ((ns tag) (xml-element tag: tag ns: ns))
- ((ns tag attrs) (xml-element tag: tag ns: ns attributes: attrs))))
+ ((tag) (lambda children (xml-element children: children tag: tag)))
+ ((ns tag) (lambda children (xml-element children: children tag: tag ns: ns)))
+ ((ns tag attrs) (lambda children (xml-element children: children tag: tag ns: ns
+ attributes: (attributes->table attrs))))))
(define (attribute xml attr)
(assoc-ref (xml-element-attributes xml) attr))
+(define (add-child child el)
+ (xml-element-children el (cons child (xml-element-children el))))
+
+(define (make-stack) (list))
+
+(define push cons)
+(define peek car)
+(define pop car+cdr)
+
+;;; Lens focusing the first element of list matching predicate
+(define (find* predicate)
+ (lambda (list)
+ (lambda (f)
+ (let loop ((rem list))
+ (cond ((null? rem) '())
+ ((predicate (car rem))
+ (cons (f (car rem))
+ (cdr rem)))
+ (else (cons (car rem)
+ (loop (cdr rem)))))))))
(define* (parser key: trim-whitespace?)
- (ssax:make-parser
-
- ;; DOCTYPE
- ;; (lambda (port docname systemid internal-subset? seed)
- ;; (format (current-error-port)
- ;; "doctype: port=~s, docname=~s, systemid=~s, internal-subset?=~s, seed=~s~%"
- ;; port docname systemid internal-subset? seed)
- ;; (values #f '() '() seed))
-
- ;; UNDECL-ROOT
- ;; (lambda (elem-gi seed)
- ;; (format (current-error-port) "Undecl-root: ~s~%" elem-gi)
- ;; (values #f '() '() seed))
-
- ;; DECL-ROOT
- ;; (lambda (elem-gi seed)
- ;; (format (current-error-port) "Decl-root: ~s~%" elem-gi)
- ;; seed)
-
- NEW-LEVEL-SEED
- (lambda (elem-gi attributes namespaces expected-content seed)
- (cons
- (list
+ (ssax:make-parser
+
+ ;; DOCTYPE
+ ;; (lambda (port docname systemid internal-subset? seed)
+ ;; (format (current-error-port)
+ ;; "doctype: port=~s, docname=~s, systemid=~s, internal-subset?=~s, seed=~s~%"
+ ;; port docname systemid internal-subset? seed)
+ ;; (values #f '() '() seed))
+
+ ;; UNDECL-ROOT
+ ;; (lambda (elem-gi seed)
+ ;; (format (current-error-port) "Undecl-root: ~s~%" elem-gi)
+ ;; (values #f '() '() seed))
+
+ ;; DECL-ROOT
+ ;; (lambda (elem-gi seed)
+ ;; (format (current-error-port) "Decl-root: ~s~%" elem-gi)
+ ;; seed)
+
+ NEW-LEVEL-SEED
+ (lambda (elem-gi attrs namespaces expected-content seed)
+ (push
(match elem-gi
- ((ns . tag) (xml-element tag: tag ns: ns attributes: attributes))
- (tag (xml-element tag: tag attributes: attributes))))
- seed))
-
- FINISH-ELEMENT
- (lambda (elem-gi attributes namespaces parent-seed seed)
- (match seed
- (((self . self-children) (parent . children) . rest)
- `((,parent (,self ,@(reverse self-children)) ,@children)
- ,@rest))))
-
- CHAR-DATA-HANDLER
- (lambda (str1 str2 seed)
- (define s
- (if trim-whitespace?
- (string-trim-both (string-append str1 str2))
- (string-append str1 str2)))
- (cond ((string-null? s) seed)
- (else
- (match seed
- (((parent . children) . rest)
- `((,parent ,(string-append str1 str2)
- ,@children)
- ,@rest))))))
-
- PI
- ((*DEFAULT* . (lambda (port pi-tag seed)
- (let ((body (ssax:read-pi-body-as-string port)))
- (match seed
- (((parent . children) . rest)
- `((,parent ,(pi-element pi-tag body) ,@children)
- ,@rest)))))))
- ))
+ ((ns . tag) (xml-element tag: tag attributes: (attributes->table attrs) ns: ns))
+ (tag (xml-element tag: tag attributes: (attributes->table attrs))))
+ seed))
+
+ FINISH-ELEMENT
+ (lambda (elem-gi attributes namespaces parent-seed seed)
+ (let ((head tail (pop seed)))
+ (modify tail
+ (find* xml-element?)
+ (lambda (parent)
+ (add-child
+ (modify head xml-element-children* reverse)
+ parent)))))
+
+ CHAR-DATA-HANDLER
+ (lambda (s1 s2 seed)
+ (define s
+ (if trim-whitespace?
+ (string-trim-both (string-append s1 s2))
+ (string-append s1 s2)))
+ (if (null? s)
+ seed
+ (modify seed (lens-compose (find* xml-element?)
+ xml-element-children*)
+ (lambda (ch) (cons s ch)))))
+
+
+ PI
+ ((*DEFAULT* . (lambda (port pi-tag seed)
+ (let ((body (ssax:read-pi-body-as-string port)))
+ (modify seed
+ (find* xml-element?)
+ (lambda (parent)
+ (add-child (pi-element pi-tag body)
+ parent)))))))))
(define* (xml->namespaced-sxml port-or-string key: (trim-whitespace? #t))
- (match (with-ssax-error-to-port
- (current-error-port)
- (lambda () ((parser trim-whitespace?: trim-whitespace?)
- (->port port-or-string)
- '((*TOP*)))))
- ((('*TOP* . items))
- `(*TOP* ,@(reverse items)))))
+ (define result
+ (with-ssax-error-to-port
+ (current-error-port)
+ (lambda () ((parser trim-whitespace?: trim-whitespace?)
+ (->port port-or-string)
+ (list ((xml 'ROOT)))))))
+
+ (let ((roots pis
+ (partition xml-element?
+ (-> result peek xml-element-children))))
+ (xml-document
+ root: (car roots)
+ pi: (reverse pis))))
(define (pi-element->sxml pi)
`(*PI* ,(pi-tag pi) ,(pi-body pi)))
@@ -165,15 +240,16 @@
(string->symbol
(format #f "~a:~a" pre (xml-element-tagname el)))))))
(else (return (xml-element-tagname el))))
+ children <- (sequence (map namespaced-sxml->sxml*
+ (xml-element-children el)))
(return
- (lambda (children)
- (cond ((null? (xml-element-attributes el))
- `(,tag ,@children))
- (else
- `(,tag (@ ,@(map (lambda (p)
- (call-with-values (lambda () (car+cdr p)) list))
- (xml-element-attributes el)))
- ,@children)))))))
+ (cond ((table-empty? (xml-element-attributes el))
+ `(,tag ,@children))
+ (else
+ `(,tag (@ ,@(map (lambda (p) (call-with-values (lambda () (car+cdr p)) list))
+ (table->list
+ (xml-element-attributes el))))
+ ,@children))))))
(define (sxml->xml-element el namespaces)
(lambda (children)
@@ -200,24 +276,15 @@
(define (namespaced-sxml->sxml* tree)
- (cond ((null? tree) (return tree))
- ((string? tree) (return tree))
- ((pi-element? tree) (return (pi-element->sxml tree)))
- ((not (pair? tree)) (return tree))
- ((car tree) symbol?
- => (lambda (symb)
- (case symb
- ((*TOP*) (do children <- (sequence (map namespaced-sxml->sxml*
- (cdr tree)))
-
- (return (cons '*TOP* children))))
- (else (return tree)))))
- ((xml-element? (car tree))
- (do proc <- (xml-element->sxml (car tree))
- children <- (sequence (map namespaced-sxml->sxml* (cdr tree)))
- (return (proc children))))
-
- ;; list of xml-element?
+ (cond ((string? tree) (return tree))
+ ((xml-element? tree) (xml-element->sxml tree))
+ ((pi-element? tree) (return (pi-element->sxml tree)))
+ ((xml-document? tree)
+ (do pis <- (sequence (map namespaced-sxml->sxml*
+ (xml-document-pis tree)))
+ el <- (namespaced-sxml->sxml*
+ (xml-document-root tree))
+ (return `(*TOP* ,@pis ,el))))
(else (scm-error 'misc-error "namespaced-sxml->sxml*"
"Unexpected token in tree: ~s"
(list tree)
@@ -244,22 +311,51 @@
(define* (namespaced-sxml->sxml/namespaces tree optional: (namespace-prefixes '()))
((namespaced-sxml->sxml* tree) namespace-prefixes))
+
+
+
+(define (split-namespace symb)
+ (apply values (map string->symbol
+ (string-split (symbol->string symb) #\:))))
+
;; Takes an sxml tree, and an association list from prefixes to namespaces
;; Returns a namespaced sxml tree
(define (sxml->namespaced-sxml tree namespaces)
(match tree
(('*PI* tag body) (pi-element tag body))
(('*TOP* rest ...)
- `(*TOP* ,@(map (lambda (r) (sxml->namespaced-sxml r namespaces))
- rest)))
+ (let ((groups
+ (group-by (lambda (x)
+ (cond ((xml-element? x) 'el)
+ ((pi-element? x) 'pi)
+ (else #f)))
+ (map (lambda (r) (sxml->namespaced-sxml r namespaces))
+ rest))))
+ ;; NOTE should multiple roots be allowed? sxml->xml allows it.
+ ;; NOTE should a warning be emitted if the #f groups isn't empty?
+ (xml-document root: (car (assoc-ref groups 'el))
+ pi: (or (assoc-ref groups 'pi) '()))))
+
((el ('@ attrs ...) rest ...)
- ((sxml->xml-element `(,el (@ ,@attrs)) namespaces)
- (map (lambda (el) (sxml->namespaced-sxml el namespaces))
- rest)))
+ (apply (call-with-values (lambda () (split-namespace el))
+ (case-lambda ((tag)
+ (xml (assoc-ref namespaces #f)
+ tag attrs))
+ ((ns tag)
+ (cond ((assoc-ref namespaces ns)
+ => (lambda (ns) (xml ns tag attrs)))
+ (else (scm-error
+ 'missing-namespace
+ "sxml->xml-element"
+ "Unknown namespace prefix encountered: ~s (on tag ~s)"
+ (list ns tag)
+ #f))))))
+ (map (lambda (el) (sxml->namespaced-sxml el namespaces))
+ rest)))
+
((el rest ...)
- ((sxml->xml-element `(,el) namespaces)
- (map (lambda (el) (sxml->namespaced-sxml el namespaces))
- rest)))
+ (sxml->namespaced-sxml `(,el (@) ,@rest) namespaces))
+
(atom atom)))
;;; TODO read intro-comment in SSAX file
diff --git a/tests/unit/sxml/sxml-namespaced.scm b/tests/unit/sxml/sxml-namespaced.scm
index 52f86303..18e35225 100644
--- a/tests/unit/sxml/sxml-namespaced.scm
+++ b/tests/unit/sxml/sxml-namespaced.scm
@@ -9,6 +9,15 @@
;;; TODO tests with attributes
+(test-equal
+ (pi-element 'xml "version=\"1.0\"")
+ (sxml->namespaced-sxml
+ '(*PI* xml "version=\"1.0\"")
+ '()))
+
+
+
+
(define (ns x)
(string->symbol (format #f "http://example.com/~a" x)))
@@ -19,56 +28,64 @@
(test-group "XML constructor utility procedure"
(test-equal "3 args"
- (xml-element tag: 'tagname ns: 'namespace attributes: '())
- (xml 'namespace 'tagname '()))
+ (xml-element tag: 'tagname ns: 'namespace)
+ ((xml 'namespace 'tagname '())))
(test-equal "2 args"
- (xml-element tag: 'tagname ns: 'namespace attributes: '())
- (xml 'namespace 'tagname))
+ (xml-element tag: 'tagname ns: 'namespace)
+ ((xml 'namespace 'tagname)))
(test-equal "1 args"
- (xml-element tag: 'tagname attributes: '())
- (xml 'tagname)))
+ (xml-element tag: 'tagname)
+ ((xml 'tagname))))
+
+;;; TODO Attributes
+;;; TODO children
(test-group "xml->namespaced-sxml"
- (test-equal
- `(*TOP* (,(xml 'tag)))
+ (test-equal "Without namespace"
+ (xml-document
+ root: ((xml 'tag)))
(xml->namespaced-sxml "<tag/>"))
- (test-equal
- `(*TOP* (,(xml 'ns1 'tag)))
+ (test-equal "With default namespace"
+ (xml-document
+ root: ((xml 'ns1 'tag)))
(xml->namespaced-sxml "<tag xmlns='ns1'/>"))
- (test-equal
- `(*TOP* (,(xml 'ns2 'tag)))
+ (test-equal "With unused default namespace"
+ (xml-document
+ root: ((xml 'ns2 'tag)))
(xml->namespaced-sxml "<x:tag xmlns='ns1' xmlns:x='ns2'/>"))
- (test-equal
- `(*TOP* (,(xml 'ns2 'tag)
- (,(xml 'ns1 'tag))))
+ (test-equal "With multiple namespaces"
+ (xml-document
+ root: ((xml 'ns2 'tag)
+ ((xml 'ns1 'tag))))
(xml->namespaced-sxml "<x:tag xmlns='ns1' xmlns:x='ns2'><tag/></x:tag>"))
(test-equal "PI are passed directly"
- `(*TOP* ,(pi-element 'xml "encoding=\"utf-8\" version=\"1.0\"")
- (,(xml 'tag)))
- (xml->namespaced-sxml "<?xml encoding=\"utf-8\" version=\"1.0\"?><tag/>"))
+ (xml-document
+ pi: (list (pi-element 'xml "encoding=\"utf-8\" version=\"1.0\""))
+ root: ((xml 'tag)))
+ (xml->namespaced-sxml "<?xml encoding=\"utf-8\" version=\"1.0\"?><tag/>"))
(test-equal "Document with whitespace in it"
- `(*TOP* ,(pi-element 'xml "")
- (,(xml 'root)
- " "
- (,(xml 'a))
- ))
+ (xml-document
+ pi: (list (pi-element 'xml ""))
+ root: ((xml 'root)
+ " "
+ ((xml 'a))))
(xml->namespaced-sxml "<?xml?><root> <a/></root>"
trim-whitespace?: #f))
- ;; TODO is this expected? xml->sxml discards it.
- (test-equal "Whitespace before root is kept"
- `(*TOP* ,(pi-element 'xml "")
- (,(xml 'root)))
+ (test-equal "Whitespace before root is discarded kept"
+ (xml-document
+ pi: (list (pi-element 'xml ""))
+ root: ((xml 'root)))
(xml->namespaced-sxml "<?xml?> <root/>")))
@@ -77,21 +94,27 @@
;;; attributes, since xml->sxml doesn't have those.
(test-group "sxml->namespaced-sxml"
(test-equal "Simplest"
- `(,(xml 'a)) (sxml->namespaced-sxml '(a) '()))
+ ((xml 'a))
+ (sxml->namespaced-sxml '(a) '()))
+
(test-equal "With *TOP*"
- `(*TOP* (,(xml 'a))) (sxml->namespaced-sxml '(*TOP* (a)) '()))
+ (xml-document root: ((xml 'a)))
+ (sxml->namespaced-sxml '(*TOP* (a)) '()))
+
(test-equal "Simplest with namespace"
- `(,(xml (ns 1) 'a))
+ ((xml (ns 1) 'a))
(sxml->namespaced-sxml '(x:a)
`((x . ,(ns 1)))))
+
(test-equal "With pi"
- `(*TOP* ,(pi-element 'xml "test")
- (,(xml 'a)))
+ (xml-document root: ((xml 'a))
+ pi: (list (pi-element 'xml "test")))
(sxml->namespaced-sxml
`(*TOP*
(*PI* xml "test")
(a))
'()))
+
(test-error "With unknown namespace"
'missing-namespace
(sxml->namespaced-sxml '(x:a) '())))
@@ -106,8 +129,7 @@
(call-with-values
(lambda ()
(namespaced-sxml->sxml/namespaces
- `(*TOP*
- (,(xml 'a)))))
+ (xml-document root: ((xml 'a)))))
(lambda (tree namespaces)
(test-equal `(*TOP* (a)) tree)
(test-equal '() namespaces))))
@@ -116,10 +138,10 @@
(call-with-values
(lambda ()
(namespaced-sxml->sxml/namespaces
- `(*TOP*
- (,(xml (ns 1) 'a)
- (,(xml (ns 2) 'a))
- (,(xml 'a))))))
+ (xml-document
+ root: ((xml (ns 1) 'a)
+ ((xml (ns 2) 'a))
+ ((xml 'a))))))
(lambda (tree nss)
(test-eqv 2 (length nss))
(test-equal
@@ -133,9 +155,9 @@
(call-with-values
(lambda ()
(namespaced-sxml->sxml/namespaces
- `(*TOP*
- ,(pi-element 'xml "test")
- (,(xml 'a)))))
+ (xml-document
+ pi: (list (pi-element 'xml "test"))
+ root: ((xml 'a)))))
(lambda (tree namespaces)
(test-equal '() namespaces)
(test-equal `(*TOP* (*PI* xml "test")
@@ -145,10 +167,10 @@
(test-group "namespaced-sxml->sxml"
(test-equal "Without namespaces"
'(*TOP* (a (@)))
- (namespaced-sxml->sxml `(*TOP* (,(xml 'a)))))
+ (namespaced-sxml->sxml (xml-document root: ((xml 'a)))))
(test-group "With namespaces"
- (match (namespaced-sxml->sxml `(*TOP* (,(xml (ns 1) 'a))))
+ (match (namespaced-sxml->sxml (xml-document root: ((xml (ns 1) 'a))))
;; (ns 1) hard coded to work with match
(`(*TOP* (,el (@ (,key "http://example.com/1"))))
(let ((el-pair (string-split (symbol->string el) #\:))