diff options
author | Hugo Hörnquist <hugo@lysator.liu.se> | 2022-06-13 12:09:16 +0200 |
---|---|---|
committer | Hugo Hörnquist <hugo@lysator.liu.se> | 2022-06-13 12:09:16 +0200 |
commit | 9d4ce0b515fd71dc38fb24db77be9572ebf0df64 (patch) | |
tree | 3d0b005c4ab79577fe4847210e78a54f310dbebf /tests/validate-html/fetch_data.py | |
parent | Cleanup of zic. (diff) | |
parent | Replace some .tagName with instanceof. (diff) | |
download | calp-9d4ce0b515fd71dc38fb24db77be9572ebf0df64.tar.gz calp-9d4ce0b515fd71dc38fb24db77be9572ebf0df64.tar.xz |
Merge html-validator.
Adds an HTML validator which checks the soundness of our generated
document, both before and after javascript is ran (thanks to selenium).
This merge also fixes the initial problems, meaning that the HTML should
validate as of this commit.
Diffstat (limited to 'tests/validate-html/fetch_data.py')
-rwxr-xr-x | tests/validate-html/fetch_data.py | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/tests/validate-html/fetch_data.py b/tests/validate-html/fetch_data.py new file mode 100755 index 00000000..14ecca75 --- /dev/null +++ b/tests/validate-html/fetch_data.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +import subprocess +import urllib.request + +from selenium import webdriver +from selenium.webdriver.firefox.options import Options + +def fetch_rendered(url, port): + options = Options() + options.add_argument('--headless') + driver = webdriver.Firefox(options=options) + + driver.get(url) + page_source = driver.page_source + + # TODO check encoding from driver + page_encoded = page_source.encode('utf-8') + + cmd = subprocess.run(['xmllint', '--format', '-'], + input=page_encoded, + capture_output=True) + + if cmd.returncode == 0: + port.write(cmd.stdout) + else: + port.write(page_encoded) + +def fetch_raw(url, port): + response = urllib.request.urlopen(url) + data = response.read() + port.write(data) + +url = 'http://localhost:8080/week/2022-03-31.html' + +with open('raw.xhtml', 'wb') as f: + fetch_raw(url, f) + +# with open('raw.html', 'wb') as f: +# fetch_raw(f'{url}?html', f) + +with open('selenium.xhtml', 'wb') as f: + fetch_rendered(url, f) + +# with open('selenium.html', 'wb') as f: +# fetch_rendered(f'{url}?html', f) |