aboutsummaryrefslogtreecommitdiff
path: root/module/sxml/namespaced.scm
diff options
context:
space:
mode:
Diffstat (limited to 'module/sxml/namespaced.scm')
-rw-r--r--module/sxml/namespaced.scm266
1 files changed, 266 insertions, 0 deletions
diff --git a/module/sxml/namespaced.scm b/module/sxml/namespaced.scm
new file mode 100644
index 00000000..e5a334da
--- /dev/null
+++ b/module/sxml/namespaced.scm
@@ -0,0 +1,266 @@
+(define-module (sxml namespaced)
+ :use-module (sxml ssax)
+ :use-module (sxml util)
+ :use-module (ice-9 match)
+ :use-module (srfi srfi-1)
+ :use-module (srfi srfi-9)
+ :use-module (srfi srfi-9 gnu)
+ :use-module (srfi srfi-71)
+ :use-module (srfi srfi-88)
+ :use-module (hnh util state-monad)
+ :use-module ((hnh util io) :select (->port))
+ :export (xml->namespaced-sxml
+ namespaced-sxml->xml
+ namespaced-sxml->sxml
+ namespaced-sxml->sxml/namespaces
+ sxml->namespaced-sxml
+ xml
+ attribute
+
+ make-xml-element
+ xml-element?
+ xml-element-tagname
+ xml-element-namespace
+ xml-element-attributes
+
+ make-pi-element
+ pi-element?
+ pi-tag
+ pi-body
+ ))
+
+;; XML processing instruction elements (and other things with identical syntax)
+;; For example: <?xml version="1.0" encoding="utf-8"?> would be encoded as
+;; (make-pi-element 'xml "version=\"1.0\" encoding=\"utf-8\"")
+;; tag should always be a symbol
+;; body should always be a string
+(define-record-type <pi-element>
+ (make-pi-element tag body)
+ pi-element?
+ (tag pi-tag)
+ (body pi-body))
+
+
+(define-record-type <xml-element>
+ (make-xml-element tagname namespace attributes)
+ xml-element?
+ (tagname xml-element-tagname)
+ (namespace xml-element-namespace)
+ (attributes xml-element-attributes))
+
+
+(define xml
+ (case-lambda
+ ((tag) (make-xml-element tag #f '()))
+ ((ns tag) (make-xml-element tag ns '()))
+ ((ns tag attrs) (make-xml-element tag ns attrs))))
+
+(define (attribute xml attr)
+ (assoc-ref (xml-element-attributes xml) attr))
+
+
+(define* (parser key: trim-whitespace?)
+ (ssax:make-parser
+
+ ;; DOCTYPE
+ ;; (lambda (port docname systemid internal-subset? seed)
+ ;; (format (current-error-port)
+ ;; "doctype: port=~s, docname=~s, systemid=~s, internal-subset?=~s, seed=~s~%"
+ ;; port docname systemid internal-subset? seed)
+ ;; (values #f '() '() seed))
+
+ ;; UNDECL-ROOT
+ ;; (lambda (elem-gi seed)
+ ;; (format (current-error-port) "Undecl-root: ~s~%" elem-gi)
+ ;; (values #f '() '() seed))
+
+ ;; DECL-ROOT
+ ;; (lambda (elem-gi seed)
+ ;; (format (current-error-port) "Decl-root: ~s~%" elem-gi)
+ ;; seed)
+
+ NEW-LEVEL-SEED
+ (lambda (elem-gi attributes namespaces expected-content seed)
+ (cons
+ (list
+ (match elem-gi
+ ((ns . tag) (make-xml-element tag ns attributes))
+ (tag (make-xml-element tag #f attributes))))
+ seed))
+
+ FINISH-ELEMENT
+ (lambda (elem-gi attributes namespaces parent-seed seed)
+ (match seed
+ (((self . self-children) (parent . children) . rest)
+ `((,parent (,self ,@(reverse self-children)) ,@children)
+ ,@rest))))
+
+ CHAR-DATA-HANDLER
+ (lambda (str1 str2 seed)
+ (define s
+ (if trim-whitespace?
+ (string-trim-both (string-append str1 str2))
+ (string-append str1 str2)))
+ (cond ((string-null? s) seed)
+ (else
+ (match seed
+ (((parent . children) . rest)
+ `((,parent ,(string-append str1 str2)
+ ,@children)
+ ,@rest))))))
+
+ PI
+ ((*DEFAULT* . (lambda (port pi-tag seed)
+ (let ((body (ssax:read-pi-body-as-string port)))
+ (match seed
+ (((parent . children) . rest)
+ `((,parent ,(make-pi-element pi-tag body) ,@children)
+ ,@rest)))))))
+ ))
+
+
+(define* (xml->namespaced-sxml port-or-string key: (trim-whitespace? #t))
+ (match (with-ssax-error-to-port
+ (current-error-port)
+ (lambda () ((parser trim-whitespace?: trim-whitespace?)
+ (->port port-or-string)
+ '((*TOP*)))))
+ ((('*TOP* . items))
+ `(*TOP* ,@(reverse items)))))
+
+(define (pi-element->sxml pi)
+ `(*PI* ,(pi-tag pi) ,(pi-body pi)))
+
+
+
+(define (ns-pair->attribute pair)
+ (let ((fqdn short (car+cdr pair)))
+ (list (string->symbol (format #f "xmlns:~a" short))
+ (symbol->string fqdn))))
+
+;; Takes an association list from full namespace names (as symbols), to their
+;; short forms, and returns a list containing xmlns:x-attributes suitable for
+;; splicing into scheme's "regular" sxml.
+(define (ns-alist->attributes ns)
+ (map ns-pair->attribute ns))
+
+
+
+(define (get-prefix ns)
+ (do namespaces <- (get)
+ (cond ((assq-ref namespaces ns) => return)
+ (else (do prefix = (gensym "ns")
+ (put (acons ns prefix namespaces))
+ (return prefix))))))
+
+
+(define (xml-element->sxml el)
+ (do tag <- (cond ((xml-element-namespace el)
+ => (lambda (ns)
+ (do pre <- (get-prefix ns)
+ (return
+ (string->symbol
+ (format #f "~a:~a" pre (xml-element-tagname el)))))))
+ (else (return (xml-element-tagname el))))
+ (return
+ (lambda (children)
+ (cond ((null? (xml-element-attributes el))
+ `(,tag ,@children))
+ (else
+ `(,tag (@ ,@(map (lambda (p)
+ (call-with-values (lambda () (car+cdr p)) list))
+ (xml-element-attributes el)))
+ ,@children)))))))
+
+(define (sxml->xml-element el namespaces)
+ (lambda (children)
+ (let ((tag-symb attrs
+ (match el
+ ((tag ('@ attrs ...))
+ (values tag (map (lambda (p) (apply cons p)) attrs)))
+ ((tag) (values tag '())))))
+ (let ((parts (string-split (symbol->string tag-symb) #\:)))
+ (cons (case (length parts)
+ ((1) (xml (assoc-ref namespaces #f)
+ (string->symbol (car parts)) attrs))
+ ((2)
+ (cond ((assoc-ref namespaces (string->symbol (car parts)))
+ => (lambda (ns) (xml ns (string->symbol (cadr parts)) attrs)))
+ (else (scm-error 'missing-namespace "sxml->xml-element"
+ "Unknown namespace prefix encountered: ~s (on tag ~s)"
+ (list (car parts) (cadr parts))
+ #f))))
+ (else (scm-error 'misc-error "sxml->xml-element"
+ "Invalid QName: more than one colon ~s"
+ (list tag-symb) #f)))
+ children)))))
+
+
+(define (namespaced-sxml->sxml* tree)
+ (cond ((null? tree) (return tree))
+ ((string? tree) (return tree))
+ ((pi-element? tree) (return (pi-element->sxml tree)))
+ ((not (pair? tree)) (return tree))
+ ((car tree) symbol?
+ => (lambda (symb)
+ (case symb
+ ((*TOP*) (do children <- (sequence (map namespaced-sxml->sxml*
+ (cdr tree)))
+
+ (return (cons '*TOP* children))))
+ (else (return tree)))))
+ ((xml-element? (car tree))
+ (do proc <- (xml-element->sxml (car tree))
+ children <- (sequence (map namespaced-sxml->sxml* (cdr tree)))
+ (return (proc children))))
+
+ ;; list of xml-element?
+ (else (scm-error 'misc-error "namespaced-sxml->sxml*"
+ "Unexpected token in tree: ~s"
+ (list tree)
+ #f))))
+
+
+;; Takes a tree of namespaced-sxml, and optionally an assoc list from namespace symbols, to prefered prefix.
+;; Returns a sxml tree, with xmlns:<prefix>=namespace attributes
+(define* (namespaced-sxml->sxml tree optional: (namespace-prefixes '()))
+ (let ((tree ns ((namespaced-sxml->sxml* tree) namespace-prefixes)))
+ ((get-root-element tree)
+ (lambda (root)
+ (add-attributes root (ns-alist->attributes ns))))))
+
+(define* (namespaced-sxml->xml tree key:
+ (namespaces '())
+ (port (current-output-port)))
+ ((@ (sxml simple) sxml->xml)
+ (namespaced-sxml->sxml tree namespaces) port))
+
+;; Takes a tree of namespaced-sxml, and optionally an assoc list from namespace symbols, to prefered prefix.
+;; Returns two values: a sxml tree without declared namespaces
+;; and a association list from namespace symbols, to used prefixes
+(define* (namespaced-sxml->sxml/namespaces tree optional: (namespace-prefixes '()))
+ ((namespaced-sxml->sxml* tree) namespace-prefixes))
+
+;; Takes an sxml tree, and an association list from prefixes to namespaces
+;; Returns a namespaced sxml tree
+(define (sxml->namespaced-sxml tree namespaces)
+ (match tree
+ (('*PI* tag body) (make-pi-element tag body))
+ (('*TOP* rest ...)
+ `(*TOP* ,@(map (lambda (r) (sxml->namespaced-sxml r namespaces))
+ rest)))
+ ((el ('@ attrs ...) rest ...)
+ ((sxml->xml-element `(,el (@ ,@attrs)) namespaces)
+ (map (lambda (el) (sxml->namespaced-sxml el namespaces))
+ rest)))
+ ((el rest ...)
+ ((sxml->xml-element `(,el) namespaces)
+ (map (lambda (el) (sxml->namespaced-sxml el namespaces))
+ rest)))
+ (atom atom)))
+
+;;; TODO read intro-comment in SSAX file
+;;; TODO Figure out how to still use (sxml match) and (sxml xpath) with these
+;;; new trees (probably rewriting to a "regular" sxml tree, and keeping
+;;; a strict mapping of namespaces)
+