From e29e14c309cb93a19eb48cf4ba1a597edf975595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Wed, 8 Jul 2020 02:30:04 +0200 Subject: Import sxml->html from dthompson. --- module/sxml/html.scm | 364 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 364 insertions(+) create mode 100644 module/sxml/html.scm diff --git a/module/sxml/html.scm b/module/sxml/html.scm new file mode 100644 index 00000000..ab3d21c2 --- /dev/null +++ b/module/sxml/html.scm @@ -0,0 +1,364 @@ +;; Copyright © 2015 David Thompson +;; +;; This library is free software; you can redistribute it and/or +;; modify it under the terms of the GNU Lesser General Public License +;; as published by the Free Software Foundation; either version 3 of +;; the License, or (at your option) any later version. +;; +;; This library is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; Lesser General Public License for more details. +;; +;; You should have received a copy of the GNU Lesser General Public +;; License along with this library. If not, see +;; . + +;; https://dthompson.us/rendering-html-with-sxml-and-gnu-guile.html + +(define-module (sxml html) + #:use-module (sxml simple) + #:use-module (srfi srfi-26) + #:use-module (ice-9 match) + #:use-module (ice-9 format) + #:use-module (ice-9 hash-table) + #:export (sxml->html)) + +(define %void-elements + '(area + base + br + col + command + embed + hr + img + input + keygen + link + meta + param + source + track + wbr)) + +(define (void-element? tag) + "Return #t if TAG is a void element." + (pair? (memq tag %void-elements))) + +(define %escape-chars + (alist->hash-table + '((#\" . "quot") + (#\& . "amp") + (#\' . "apos") + (#\< . "lt") + (#\> . "gt") + (#\¡ . "iexcl") + (#\¢ . "cent") + (#\£ . "pound") + (#\¤ . "curren") + (#\¥ . "yen") + (#\¦ . "brvbar") + (#\§ . "sect") + (#\¨ . "uml") + (#\© . "copy") + (#\ª . "ordf") + (#\« . "laquo") + (#\¬ . "not") + (#\® . "reg") + (#\¯ . "macr") + (#\° . "deg") + (#\± . "plusmn") + (#\² . "sup2") + (#\³ . "sup3") + (#\´ . "acute") + (#\µ . "micro") + (#\¶ . "para") + (#\· . "middot") + (#\¸ . "cedil") + (#\¹ . "sup1") + (#\º . "ordm") + (#\» . "raquo") + (#\¼ . "frac14") + (#\½ . "frac12") + (#\¾ . "frac34") + (#\¿ . "iquest") + (#\À . "Agrave") + (#\Á . "Aacute") + (#\ . "Acirc") + (#\à . "Atilde") + (#\Ä . "Auml") + (#\Å . "Aring") + (#\Æ . "AElig") + (#\Ç . "Ccedil") + (#\È . "Egrave") + (#\É . "Eacute") + (#\Ê . "Ecirc") + (#\Ë . "Euml") + (#\Ì . "Igrave") + (#\Í . "Iacute") + (#\Î . "Icirc") + (#\Ï . "Iuml") + (#\Ð . "ETH") + (#\Ñ . "Ntilde") + (#\Ò . "Ograve") + (#\Ó . "Oacute") + (#\Ô . "Ocirc") + (#\Õ . "Otilde") + (#\Ö . "Ouml") + (#\× . "times") + (#\Ø . "Oslash") + (#\Ù . "Ugrave") + (#\Ú . "Uacute") + (#\Û . "Ucirc") + (#\Ü . "Uuml") + (#\Ý . "Yacute") + (#\Þ . "THORN") + (#\ß . "szlig") + (#\à . "agrave") + (#\á . "aacute") + (#\â . "acirc") + (#\ã . "atilde") + (#\ä . "auml") + (#\å . "aring") + (#\æ . "aelig") + (#\ç . "ccedil") + (#\è . "egrave") + (#\é . "eacute") + (#\ê . "ecirc") + (#\ë . "euml") + (#\ì . "igrave") + (#\í . "iacute") + (#\î . "icirc") + (#\ï . "iuml") + (#\ð . "eth") + (#\ñ . "ntilde") + (#\ò . "ograve") + (#\ó . "oacute") + (#\ô . "ocirc") + (#\õ . "otilde") + (#\ö . "ouml") + (#\÷ . "divide") + (#\ø . "oslash") + (#\ù . "ugrave") + (#\ú . "uacute") + (#\û . "ucirc") + (#\ü . "uuml") + (#\ý . "yacute") + (#\þ . "thorn") + (#\ÿ . "yuml") + (#\Œ . "OElig") + (#\œ . "oelig") + (#\Š . "Scaron") + (#\š . "scaron") + (#\Ÿ . "Yuml") + (#\ƒ . "fnof") + (#\ˆ . "circ") + (#\˜ . "tilde") + (#\Α . "Alpha") + (#\Β . "Beta") + (#\Γ . "Gamma") + (#\Δ . "Delta") + (#\Ε . "Epsilon") + (#\Ζ . "Zeta") + (#\Η . "Eta") + (#\Θ . "Theta") + (#\Ι . "Iota") + (#\Κ . "Kappa") + (#\Λ . "Lambda") + (#\Μ . "Mu") + (#\Ν . "Nu") + (#\Ξ . "Xi") + (#\Ο . "Omicron") + (#\Π . "Pi") + (#\Ρ . "Rho") + (#\Σ . "Sigma") + (#\Τ . "Tau") + (#\Υ . "Upsilon") + (#\Φ . "Phi") + (#\Χ . "Chi") + (#\Ψ . "Psi") + (#\Ω . "Omega") + (#\α . "alpha") + (#\β . "beta") + (#\γ . "gamma") + (#\δ . "delta") + (#\ε . "epsilon") + (#\ζ . "zeta") + (#\η . "eta") + (#\θ . "theta") + (#\ι . "iota") + (#\κ . "kappa") + (#\λ . "lambda") + (#\μ . "mu") + (#\ν . "nu") + (#\ξ . "xi") + (#\ο . "omicron") + (#\π . "pi") + (#\ρ . "rho") + (#\ς . "sigmaf") + (#\σ . "sigma") + (#\τ . "tau") + (#\υ . "upsilon") + (#\φ . "phi") + (#\χ . "chi") + (#\ψ . "psi") + (#\ω . "omega") + (#\ϑ . "thetasym") + (#\ϒ . "upsih") + (#\ϖ . "piv") + (#\  . "ensp") + (#\  . "emsp") + (#\  . "thinsp") + (#\– . "ndash") + (#\— . "mdash") + (#\‘ . "lsquo") + (#\’ . "rsquo") + (#\‚ . "sbquo") + (#\“ . "ldquo") + (#\” . "rdquo") + (#\„ . "bdquo") + (#\† . "dagger") + (#\‡ . "Dagger") + (#\• . "bull") + (#\… . "hellip") + (#\‰ . "permil") + (#\′ . "prime") + (#\″ . "Prime") + (#\‹ . "lsaquo") + (#\› . "rsaquo") + (#\‾ . "oline") + (#\⁄ . "frasl") + (#\€ . "euro") + (#\ℑ . "image") + (#\℘ . "weierp") + (#\ℜ . "real") + (#\™ . "trade") + (#\ℵ . "alefsym") + (#\← . "larr") + (#\↑ . "uarr") + (#\→ . "rarr") + (#\↓ . "darr") + (#\↔ . "harr") + (#\↵ . "crarr") + (#\⇐ . "lArr") + (#\⇑ . "uArr") + (#\⇒ . "rArr") + (#\⇓ . "dArr") + (#\⇔ . "hArr") + (#\∀ . "forall") + (#\∂ . "part") + (#\∃ . "exist") + (#\∅ . "empty") + (#\∇ . "nabla") + (#\∈ . "isin") + (#\∉ . "notin") + (#\∋ . "ni") + (#\∏ . "prod") + (#\∑ . "sum") + (#\− . "minus") + (#\∗ . "lowast") + (#\√ . "radic") + (#\∝ . "prop") + (#\∞ . "infin") + (#\∠ . "ang") + (#\∧ . "and") + (#\∨ . "or") + (#\∩ . "cap") + (#\∪ . "cup") + (#\∫ . "int") + (#\∴ . "there4") + (#\∼ . "sim") + (#\≅ . "cong") + (#\≈ . "asymp") + (#\≠ . "ne") + (#\≡ . "equiv") + (#\≤ . "le") + (#\≥ . "ge") + (#\⊂ . "sub") + (#\⊃ . "sup") + (#\⊄ . "nsub") + (#\⊆ . "sube") + (#\⊇ . "supe") + (#\⊕ . "oplus") + (#\⊗ . "otimes") + (#\⊥ . "perp") + (#\⋅ . "sdot") + (#\⋮ . "vellip") + (#\⌈ . "lceil") + (#\⌉ . "rceil") + (#\⌊ . "lfloor") + (#\⌋ . "rfloor") + (#\〈 . "lang") + (#\〉 . "rang") + (#\◊ . "loz") + (#\♠ . "spades") + (#\♣ . "clubs") + (#\♥ . "hearts") + (#\♦ . "diams")))) + +(define (string->escaped-html s port) + "Write the HTML escaped form of S to PORT." + (define (escape c) + (let ((escaped (hash-ref %escape-chars c))) + (if escaped + (format port "&~a;" escaped) + (display c port)))) + (string-for-each escape s)) + +(define (object->escaped-html obj port) + "Write the HTML escaped form of OBJ to PORT." + (string->escaped-html + (call-with-output-string (cut display obj <>)) + port)) + +(define (attribute-value->html value port) + "Write the HTML escaped form of VALUE to PORT." + (if (string? value) + (string->escaped-html value port) + (object->escaped-html value port))) + +(define (attribute->html attr value port) + "Write ATTR and VALUE to PORT." + (format port "~a=\"" attr) + (attribute-value->html value port) + (display #\" port)) + +(define (element->html tag attrs body port) + "Write the HTML TAG to PORT, where TAG has the attributes in the +ist ATTRS and the child nodes in BODY." + (format port "<~a" tag) + (for-each (match-lambda + ((attr value) + (display #\space port) + (attribute->html attr value port))) + attrs) + (if (and (null? body) (void-element? tag)) + (display " />" port) + (begin + (display #\> port) + (for-each (cut sxml->html <> port) body) + (format port "" tag)))) + +(define (doctype->html doctype port) + (format port "" doctype)) + +(define* (sxml->html tree #:optional (port (current-output-port))) + "Write the serialized HTML form of TREE to PORT." + (match tree + (() *unspecified*) + (('doctype type) + (doctype->html type port)) + ;; Unescaped, raw HTML output + (('raw html) + (display html port)) + (((? symbol? tag) ('@ attrs ...) body ...) + (element->html tag attrs body port)) + (((? symbol? tag) body ...) + (element->html tag '() body port)) + ((nodes ...) + (for-each (cut sxml->html <> port) nodes)) + ((? string? text) + (string->escaped-html text port)) + ;; Render arbitrary Scheme objects, too. + (obj (object->escaped-html obj port)))) -- cgit v1.2.3