diff options
author | Hugo Hörnquist <hugo@lysator.liu.se> | 2023-04-10 22:46:01 +0200 |
---|---|---|
committer | Hugo Hörnquist <hugo@lysator.liu.se> | 2023-04-10 23:45:29 +0200 |
commit | d59edcd022a73e0ec9460d95a092f050edf411b1 (patch) | |
tree | e71cb7f642f7231c73c4227ca43e4d998d1d697a /module | |
parent | Add (sxml util). (diff) | |
download | calp-d59edcd022a73e0ec9460d95a092f050edf411b1.tar.gz calp-d59edcd022a73e0ec9460d95a092f050edf411b1.tar.xz |
Add (sxml namespaced).
Diffstat (limited to '')
-rw-r--r-- | module/sxml/namespaced.scm | 257 |
1 files changed, 257 insertions, 0 deletions
diff --git a/module/sxml/namespaced.scm b/module/sxml/namespaced.scm new file mode 100644 index 00000000..746b98bb --- /dev/null +++ b/module/sxml/namespaced.scm @@ -0,0 +1,257 @@ +(define-module (sxml namespaced) + :use-module (sxml ssax) + :use-module (sxml util) + :use-module (ice-9 match) + :use-module (srfi srfi-1) + :use-module (srfi srfi-9) + :use-module (srfi srfi-9 gnu) + :use-module (srfi srfi-71) + :use-module (srfi srfi-88) + :use-module (hnh util state-monad) + :use-module ((hnh util io) :select (->port)) + :export (xml->namespaced-sxml + namespaced-sxml->xml + namespaced-sxml->sxml + namespaced-sxml->sxml/namespaces + sxml->namespaced-sxml + xml + + make-xml-element + xml-element? + xml-element-tagname xml-element-namespace xml-element-attributes + + make-pi-element + pi-element? + pi-tag + pi-body + )) + +;; XML processing instruction elements (and other things with identical syntax) +;; For example: <?xml version="1.0" encoding="utf-8"?> would be encoded as +;; (make-pi-element 'xml "version=\"1.0\" encoding=\"utf-8\"") +;; tag should always be a symbol +;; body should always be a string +(define-record-type <pi-element> + (make-pi-element tag body) + pi-element? + (tag pi-tag) + (body pi-body)) + + +(define-record-type <xml-element> + (make-xml-element tagname namespace attributes) + xml-element? + (tagname xml-element-tagname) + (namespace xml-element-namespace) + (attributes xml-element-attributes)) + + +(define xml + (case-lambda + ((tag) (make-xml-element tag #f '())) + ((ns tag) (make-xml-element tag ns '())) + ((ns tag attrs) (make-xml-element tag ns attrs)))) + + +(define* (parser key: trim-whitespace?) + (ssax:make-parser + + ;; DOCTYPE + ;; (lambda (port docname systemid internal-subset? seed) + ;; (format (current-error-port) + ;; "doctype: port=~s, docname=~s, systemid=~s, internal-subset?=~s, seed=~s~%" + ;; port docname systemid internal-subset? seed) + ;; (values #f '() '() seed)) + + ;; UNDECL-ROOT + ;; (lambda (elem-gi seed) + ;; (format (current-error-port) "Undecl-root: ~s~%" elem-gi) + ;; (values #f '() '() seed)) + + ;; DECL-ROOT + ;; (lambda (elem-gi seed) + ;; (format (current-error-port) "Decl-root: ~s~%" elem-gi) + ;; seed) + + NEW-LEVEL-SEED + (lambda (elem-gi attributes namespaces expected-content seed) + (cons + (list + (match elem-gi + ((ns . tag) (make-xml-element tag ns attributes)) + (tag (make-xml-element tag #f attributes)))) + seed)) + + FINISH-ELEMENT + (lambda (elem-gi attributes namespaces parent-seed seed) + (match seed + (((self . self-children) (parent . children) . rest) + `((,parent (,self ,@(reverse self-children)) ,@children) + ,@rest)))) + + CHAR-DATA-HANDLER + (lambda (str1 str2 seed) + (define s + (if trim-whitespace? + (string-trim-both (string-append str1 str2)) + (string-append str1 str2))) + (cond ((string-null? s) seed) + (else + (match seed + (((parent . children) . rest) + `((,parent ,(string-append str1 str2) + ,@children) + ,@rest)))))) + + PI + ((*DEFAULT* . (lambda (port pi-tag seed) + (let ((body (ssax:read-pi-body-as-string port))) + (match seed + (((parent . children) . rest) + `((,parent ,(make-pi-element pi-tag body) ,@children) + ,@rest))))))) + )) + + +(define* (xml->namespaced-sxml port-or-string key: (trim-whitespace? #t)) + (match (with-ssax-error-to-port + (current-error-port) + (lambda () ((parser trim-whitespace?: trim-whitespace?) + (->port port-or-string) + '((*TOP*))))) + ((('*TOP* . items)) + `(*TOP* ,@(reverse items))))) + +(define (pi-element->sxml pi) + `(*PI* ,(pi-tag pi) ,(pi-body pi))) + + + +(define (ns-pair->attribute pair) + (let ((fqdn short (car+cdr pair))) + (list (string->symbol (format #f "xmlns:~a" short)) + (symbol->string fqdn)))) + +;; Takes an association list from full namespace names (as symbols), to their +;; short forms, and returns a list containing xmlns:x-attributes suitable for +;; splicing into scheme's "regular" sxml. +(define (ns-alist->attributes ns) + (map ns-pair->attribute ns)) + + + +(define (get-prefix ns) + (do namespaces <- (get) + (cond ((assq-ref namespaces ns) => return) + (else (do prefix = (gensym "ns") + (put (acons ns prefix namespaces)) + (return prefix)))))) + + +(define (xml-element->sxml el) + (do tag <- (cond ((xml-element-namespace el) + => (lambda (ns) + (do pre <- (get-prefix ns) + (return + (string->symbol + (format #f "~a:~a" pre (xml-element-tagname el))))))) + (else (return (xml-element-tagname el)))) + (return + (lambda (children) + (cond ((null? (xml-element-attributes el)) + `(,tag ,@children)) + (else + `(,tag (@ ,@(map (lambda (p) + (call-with-values (lambda () (car+cdr p)) list)) + (xml-element-attributes el))) + ,@children))))))) + +(define (sxml->xml-element el namespaces) + (lambda (children) + (let ((tag-symb attrs + (match el + ((tag ('@ attrs ...)) + (values tag (map (lambda (p) (apply cons p)) attrs))) + ((tag) (values tag '()))))) + (let ((parts (string-split (symbol->string tag-symb) #\:))) + (cons (case (length parts) + ((1) (xml #f (string->symbol (car parts)) attrs)) + ((2) + (cond ((assoc-ref namespaces (string->symbol (car parts))) + => (lambda (ns) (xml ns (string->symbol (cadr parts)) attrs))) + (else (scm-error 'missing-namespace "sxml->xml-element" + "Unknown namespace prefix encountered: ~s (on tag ~s)" + (list (car parts) (cadr parts)) + #f)))) + (else (scm-error 'misc-error "sxml->xml-element" + "Invalid QName: more than one colon ~s" + (list tag-symb) #f))) + children))))) + + +(define (namespaced-sxml->sxml* tree) + (cond ((null? tree) (return tree)) + ((string? tree) (return tree)) + ((pi-element? tree) (return (pi-element->sxml tree))) + ((not (pair? tree)) (return tree)) + ((car tree) symbol? + => (lambda (symb) + (case symb + ((*TOP*) (do children <- (sequence (map namespaced-sxml->sxml* + (cdr tree))) + + (return (cons '*TOP* children)))) + (else (return tree))))) + ((xml-element? (car tree)) + (do proc <- (xml-element->sxml (car tree)) + children <- (sequence (map namespaced-sxml->sxml* (cdr tree))) + (return (proc children)))) + + ;; list of xml-element? + (else (scm-error 'misc-error "namespaced-sxml->sxml*" + "Unexpected token in tree: ~s" + (list tree) + #f)))) + + +;; Takes a tree of namespaced-sxml, and optionally an assoc list from namespace symbols, to prefered prefix. +;; Returns a sxml tree, with xmlns:<prefix>=namespace attributes +(define* (namespaced-sxml->sxml tree optional: (namespace-prefixes '())) + (let ((tree ns ((namespaced-sxml->sxml* tree) namespace-prefixes))) + ((get-root-element tree) + (lambda (root) + (add-attributes root (ns-alist->attributes ns)))))) + +(define* (namespaced-sxml->xml tree key: + (namespaces '()) + (port (current-output-port))) + ((@ (sxml simple) sxml->xml) (namespaced-sxml->sxml tree namespaces) port)) + +;; Takes a tree of namespaced-sxml, and optionally an assoc list from namespace symbols, to prefered prefix. +;; Returns two values: a sxml tree without declared namespaces +;; and a association list from namespace symbols, to used prefixes +(define* (namespaced-sxml->sxml/namespaces tree optional: (namespace-prefixes '())) + ((namespaced-sxml->sxml* tree) namespace-prefixes)) + +;; Takes an sxml tree, and an association list from prefixes to namespaces +;; Returns a namespaced sxml tree +(define (sxml->namespaced-sxml tree namespaces) + (match tree + (('*PI* tag body) (make-pi-element tag body)) + (('*TOP* rest ...) + `(*TOP* ,@(map (lambda (r) (sxml->namespaced-sxml r namespaces)) + rest))) + ((el ('@ attrs ...) rest ...) + ((sxml->xml-element `(,el (@ ,@attrs)) namespaces) + (map (lambda (el) (sxml->namespaced-sxml el namespaces)) + rest))) + ((el rest ...) + ((sxml->xml-element `(,el) namespaces) + (map (lambda (el) (sxml->namespaced-sxml el namespaces)) + rest))) + (atom atom))) + +;;; TODO read intro-comment in SSAX file +;;; TODO Figure out how to still use (sxml match) and (sxml xpath) with these +;;; new trees (probably rewriting to a "regular" sxml tree, and keeping +;;; a strict mapping of namespaces) |