diff options
author | Hugo Hörnquist <hugo@lysator.liu.se> | 2022-06-13 12:09:16 +0200 |
---|---|---|
committer | Hugo Hörnquist <hugo@lysator.liu.se> | 2022-06-13 12:09:16 +0200 |
commit | 9d4ce0b515fd71dc38fb24db77be9572ebf0df64 (patch) | |
tree | 3d0b005c4ab79577fe4847210e78a54f310dbebf /tests/validate-html | |
parent | Cleanup of zic. (diff) | |
parent | Replace some .tagName with instanceof. (diff) | |
download | calp-9d4ce0b515fd71dc38fb24db77be9572ebf0df64.tar.gz calp-9d4ce0b515fd71dc38fb24db77be9572ebf0df64.tar.xz |
Merge html-validator.
Adds an HTML validator which checks the soundness of our generated
document, both before and after javascript is ran (thanks to selenium).
This merge also fixes the initial problems, meaning that the HTML should
validate as of this commit.
Diffstat (limited to 'tests/validate-html')
-rw-r--r-- | tests/validate-html/.gitignore | 2 | ||||
-rwxr-xr-x | tests/validate-html/fetch_data.py | 46 | ||||
-rwxr-xr-x | tests/validate-html/run-validator.scm | 84 |
3 files changed, 132 insertions, 0 deletions
diff --git a/tests/validate-html/.gitignore b/tests/validate-html/.gitignore new file mode 100644 index 00000000..1ac40fc2 --- /dev/null +++ b/tests/validate-html/.gitignore @@ -0,0 +1,2 @@ +*.xhtml +geckodriver.log diff --git a/tests/validate-html/fetch_data.py b/tests/validate-html/fetch_data.py new file mode 100755 index 00000000..14ecca75 --- /dev/null +++ b/tests/validate-html/fetch_data.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +import subprocess +import urllib.request + +from selenium import webdriver +from selenium.webdriver.firefox.options import Options + +def fetch_rendered(url, port): + options = Options() + options.add_argument('--headless') + driver = webdriver.Firefox(options=options) + + driver.get(url) + page_source = driver.page_source + + # TODO check encoding from driver + page_encoded = page_source.encode('utf-8') + + cmd = subprocess.run(['xmllint', '--format', '-'], + input=page_encoded, + capture_output=True) + + if cmd.returncode == 0: + port.write(cmd.stdout) + else: + port.write(page_encoded) + +def fetch_raw(url, port): + response = urllib.request.urlopen(url) + data = response.read() + port.write(data) + +url = 'http://localhost:8080/week/2022-03-31.html' + +with open('raw.xhtml', 'wb') as f: + fetch_raw(url, f) + +# with open('raw.html', 'wb') as f: +# fetch_raw(f'{url}?html', f) + +with open('selenium.xhtml', 'wb') as f: + fetch_rendered(url, f) + +# with open('selenium.html', 'wb') as f: +# fetch_rendered(f'{url}?html', f) diff --git a/tests/validate-html/run-validator.scm b/tests/validate-html/run-validator.scm new file mode 100755 index 00000000..7e3c9f76 --- /dev/null +++ b/tests/validate-html/run-validator.scm @@ -0,0 +1,84 @@ +#!/usr/bin/bash +# -*- mode: scheme; geiser-scheme-implementation: guile -*- +here=$(dirname $(realpath $0)) + +. "$(dirname "$(dirname "$here")")/env" + +exec $GUILE -e main -s "$0" -- "$@" +!# + +(use-modules (sxml simple) + ((sxml xpath) :select (sxpath)) + (sxml match) + (rnrs lists) + (ice-9 regex) + (ice-9 popen) + (ice-9 format) + ((hnh util) :select (group-by ->))) + +(define (error-string error) + (cond (((sxpath '(// nu:message)) error) + (negate null?) => (compose sxml->string car)) + (else ""))) + +(define (ignore-rule error) + (string-match "Element (calendar|icalendar) not allowed as child" + (error-string error))) + +(define (group-by-file entries) + (group-by (sxpath '(// @ url)) + entries)) + +(define (display-entry entry) + (sxml-match + entry + [(nu:error (@ (last-line ,last-line) + (first-column ,first-column) + (last-column ,last-column)) + (nu:message ,msg ...) + (nu:extract ,extract ...)) + (format #t " - ERROR - ~a:~a-~a - ~a - ~a~%" + last-line first-column last-column + (sxml->string `(nu:message ,@msg)) + (sxml->string `(nu:extract ,@extract)))] + + [(nu:info (@ (last-line ,last-line) + (first-column ,first-column) + (last-column ,last-column) + (type ,type)) + (nu:message ,msg ...) + (nu:extract ,extract ...)) + (format #t " - ~5a - ~a:~a-~a - ~a - ~a~%" + type last-line first-column last-column + (sxml->string `(nu:message ,@msg)) + (sxml->string `(nu:extract ,@extract)))])) + +(define (main args) + (define pipe (open-pipe* OPEN_READ "html5validator" + "--format" "xml" + ;; "--verbose" + "--show-warnings" + "--" + "selenium.xhtml" + "raw.xhtml" + )) + (define data (xml->sxml pipe + #:trim-whitespace? #t + #:namespaces + '((nu . "http://n.validator.nu/messages/") + (xhtml . "http://www.w3.org/1999/xhtml")))) + (close-pipe pipe) + (let ((filtered-data + (filter (negate ignore-rule) + ((sxpath '(// nu:messages *)) data)))) + (if (null? filtered-data) + (begin + (display "Everything fine!") + (newline) + (exit 0)) + (begin + (for-each (lambda (group) + (format #t "~a~%" (-> group car (assoc-ref 'url) car)) + (for-each display-entry (cadr group))) + (group-by-file filtered-data)) + (exit 1))))) |