Merge html-validator.

Adds an HTML validator which checks the soundness of our generated document, both before and after javascript is ran (thanks to selenium). This merge also fixes the initial problems, meaning that the HTML should validate as of this commit.
author: Hugo Hörnquist <hugo@lysator.liu.se> 2022-06-13 12:09:16 +0200
committer: Hugo Hörnquist <hugo@lysator.liu.se> 2022-06-13 12:09:16 +0200
commit: 9d4ce0b515fd71dc38fb24db77be9572ebf0df64 (patch)
tree: 3d0b005c4ab79577fe4847210e78a54f310dbebf /tests/validate-html/fetch_data.py
parent: Cleanup of zic. (diff)
parent: Replace some .tagName with instanceof. (diff)
download: calp-9d4ce0b515fd71dc38fb24db77be9572ebf0df64.tar.gz
calp-9d4ce0b515fd71dc38fb24db77be9572ebf0df64.tar.xz
1 files changed, 46 insertions, 0 deletions
diff --git a/tests/validate-html/fetch_data.py b/tests/validate-html/fetch_data.py
new file mode 100755
index 00000000..14ecca75
--- /dev/null
+++ b/tests/validate-html/fetch_data.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+import subprocess
+import urllib.request
+
+from selenium import webdriver
+from selenium.webdriver.firefox.options import Options
+
+def fetch_rendered(url, port):
+    options = Options()
+    options.add_argument('--headless')
+    driver = webdriver.Firefox(options=options)
+
+    driver.get(url)
+    page_source = driver.page_source
+
+    # TODO check encoding from driver
+    page_encoded = page_source.encode('utf-8')
+
+    cmd = subprocess.run(['xmllint', '--format', '-'],
+            input=page_encoded,
+            capture_output=True)
+
+    if cmd.returncode == 0:
+        port.write(cmd.stdout)
+    else:
+        port.write(page_encoded)
+
+def fetch_raw(url, port):
+    response = urllib.request.urlopen(url)
+    data = response.read()
+    port.write(data)
+    
+url = 'http://localhost:8080/week/2022-03-31.html'
+
+with open('raw.xhtml', 'wb') as f:
+    fetch_raw(url, f)
+
+# with open('raw.html', 'wb') as f:
+#     fetch_raw(f'{url}?html', f)
+
+with open('selenium.xhtml', 'wb') as f:
+    fetch_rendered(url, f)
+
+# with open('selenium.html', 'wb') as f:
+#     fetch_rendered(f'{url}?html', f)
author	Hugo Hörnquist <hugo@lysator.liu.se>	2022-06-13 12:09:16 +0200
committer	Hugo Hörnquist <hugo@lysator.liu.se>	2022-06-13 12:09:16 +0200
commit	9d4ce0b515fd71dc38fb24db77be9572ebf0df64 (patch)
tree	3d0b005c4ab79577fe4847210e78a54f310dbebf /tests/validate-html/fetch_data.py
parent	Cleanup of zic. (diff)
parent	Replace some .tagName with instanceof. (diff)
download	calp-9d4ce0b515fd71dc38fb24db77be9572ebf0df64.tar.gz calp-9d4ce0b515fd71dc38fb24db77be9572ebf0df64.tar.xz