aboutsummaryrefslogtreecommitdiff
path: root/tests/validate-html/fetch_data.py
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2022-06-13 12:09:16 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2022-06-13 12:09:16 +0200
commit9d4ce0b515fd71dc38fb24db77be9572ebf0df64 (patch)
tree3d0b005c4ab79577fe4847210e78a54f310dbebf /tests/validate-html/fetch_data.py
parentCleanup of zic. (diff)
parentReplace some .tagName with instanceof. (diff)
downloadcalp-9d4ce0b515fd71dc38fb24db77be9572ebf0df64.tar.gz
calp-9d4ce0b515fd71dc38fb24db77be9572ebf0df64.tar.xz
Merge html-validator.
Adds an HTML validator which checks the soundness of our generated document, both before and after javascript is ran (thanks to selenium). This merge also fixes the initial problems, meaning that the HTML should validate as of this commit.
Diffstat (limited to 'tests/validate-html/fetch_data.py')
-rwxr-xr-xtests/validate-html/fetch_data.py46
1 files changed, 46 insertions, 0 deletions
diff --git a/tests/validate-html/fetch_data.py b/tests/validate-html/fetch_data.py
new file mode 100755
index 00000000..14ecca75
--- /dev/null
+++ b/tests/validate-html/fetch_data.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+import subprocess
+import urllib.request
+
+from selenium import webdriver
+from selenium.webdriver.firefox.options import Options
+
+def fetch_rendered(url, port):
+ options = Options()
+ options.add_argument('--headless')
+ driver = webdriver.Firefox(options=options)
+
+ driver.get(url)
+ page_source = driver.page_source
+
+ # TODO check encoding from driver
+ page_encoded = page_source.encode('utf-8')
+
+ cmd = subprocess.run(['xmllint', '--format', '-'],
+ input=page_encoded,
+ capture_output=True)
+
+ if cmd.returncode == 0:
+ port.write(cmd.stdout)
+ else:
+ port.write(page_encoded)
+
+def fetch_raw(url, port):
+ response = urllib.request.urlopen(url)
+ data = response.read()
+ port.write(data)
+
+url = 'http://localhost:8080/week/2022-03-31.html'
+
+with open('raw.xhtml', 'wb') as f:
+ fetch_raw(url, f)
+
+# with open('raw.html', 'wb') as f:
+# fetch_raw(f'{url}?html', f)
+
+with open('selenium.xhtml', 'wb') as f:
+ fetch_rendered(url, f)
+
+# with open('selenium.html', 'wb') as f:
+# fetch_rendered(f'{url}?html', f)