From 3a305ffce4ccdf505a3f3c81cee0df55020d5b4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Sun, 12 Jun 2022 03:08:57 +0200 Subject: Add html validator. --- tests/validate-html/.gitignore | 2 + tests/validate-html/fetch_data.py | 46 +++++++++++++++++++ tests/validate-html/run-validator.scm | 84 +++++++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+) create mode 100644 tests/validate-html/.gitignore create mode 100755 tests/validate-html/fetch_data.py create mode 100755 tests/validate-html/run-validator.scm diff --git a/tests/validate-html/.gitignore b/tests/validate-html/.gitignore new file mode 100644 index 00000000..1ac40fc2 --- /dev/null +++ b/tests/validate-html/.gitignore @@ -0,0 +1,2 @@ +*.xhtml +geckodriver.log diff --git a/tests/validate-html/fetch_data.py b/tests/validate-html/fetch_data.py new file mode 100755 index 00000000..14ecca75 --- /dev/null +++ b/tests/validate-html/fetch_data.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +import subprocess +import urllib.request + +from selenium import webdriver +from selenium.webdriver.firefox.options import Options + +def fetch_rendered(url, port): + options = Options() + options.add_argument('--headless') + driver = webdriver.Firefox(options=options) + + driver.get(url) + page_source = driver.page_source + + # TODO check encoding from driver + page_encoded = page_source.encode('utf-8') + + cmd = subprocess.run(['xmllint', '--format', '-'], + input=page_encoded, + capture_output=True) + + if cmd.returncode == 0: + port.write(cmd.stdout) + else: + port.write(page_encoded) + +def fetch_raw(url, port): + response = urllib.request.urlopen(url) + data = response.read() + port.write(data) + +url = 'http://localhost:8080/week/2022-03-31.html' + +with open('raw.xhtml', 'wb') as f: + fetch_raw(url, f) + +# with open('raw.html', 'wb') as f: +# fetch_raw(f'{url}?html', f) + +with open('selenium.xhtml', 'wb') as f: + fetch_rendered(url, f) + +# with open('selenium.html', 'wb') as f: +# fetch_rendered(f'{url}?html', f) diff --git a/tests/validate-html/run-validator.scm b/tests/validate-html/run-validator.scm new file mode 100755 index 00000000..7e3c9f76 --- /dev/null +++ b/tests/validate-html/run-validator.scm @@ -0,0 +1,84 @@ +#!/usr/bin/bash +# -*- mode: scheme; geiser-scheme-implementation: guile -*- +here=$(dirname $(realpath $0)) + +. "$(dirname "$(dirname "$here")")/env" + +exec $GUILE -e main -s "$0" -- "$@" +!# + +(use-modules (sxml simple) + ((sxml xpath) :select (sxpath)) + (sxml match) + (rnrs lists) + (ice-9 regex) + (ice-9 popen) + (ice-9 format) + ((hnh util) :select (group-by ->))) + +(define (error-string error) + (cond (((sxpath '(// nu:message)) error) + (negate null?) => (compose sxml->string car)) + (else ""))) + +(define (ignore-rule error) + (string-match "Element (calendar|icalendar) not allowed as child" + (error-string error))) + +(define (group-by-file entries) + (group-by (sxpath '(// @ url)) + entries)) + +(define (display-entry entry) + (sxml-match + entry + [(nu:error (@ (last-line ,last-line) + (first-column ,first-column) + (last-column ,last-column)) + (nu:message ,msg ...) + (nu:extract ,extract ...)) + (format #t " - ERROR - ~a:~a-~a - ~a - ~a~%" + last-line first-column last-column + (sxml->string `(nu:message ,@msg)) + (sxml->string `(nu:extract ,@extract)))] + + [(nu:info (@ (last-line ,last-line) + (first-column ,first-column) + (last-column ,last-column) + (type ,type)) + (nu:message ,msg ...) + (nu:extract ,extract ...)) + (format #t " - ~5a - ~a:~a-~a - ~a - ~a~%" + type last-line first-column last-column + (sxml->string `(nu:message ,@msg)) + (sxml->string `(nu:extract ,@extract)))])) + +(define (main args) + (define pipe (open-pipe* OPEN_READ "html5validator" + "--format" "xml" + ;; "--verbose" + "--show-warnings" + "--" + "selenium.xhtml" + "raw.xhtml" + )) + (define data (xml->sxml pipe + #:trim-whitespace? #t + #:namespaces + '((nu . "http://n.validator.nu/messages/") + (xhtml . "http://www.w3.org/1999/xhtml")))) + (close-pipe pipe) + (let ((filtered-data + (filter (negate ignore-rule) + ((sxpath '(// nu:messages *)) data)))) + (if (null? filtered-data) + (begin + (display "Everything fine!") + (newline) + (exit 0)) + (begin + (for-each (lambda (group) + (format #t "~a~%" (-> group car (assoc-ref 'url) car)) + (for-each display-entry (cadr group))) + (group-by-file filtered-data)) + (exit 1))))) -- cgit v1.2.3