aboutsummaryrefslogtreecommitdiff
path: root/tests/validate-html/fetch_data.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/validate-html/fetch_data.py')
-rwxr-xr-xtests/validate-html/fetch_data.py46
1 files changed, 46 insertions, 0 deletions
diff --git a/tests/validate-html/fetch_data.py b/tests/validate-html/fetch_data.py
new file mode 100755
index 00000000..14ecca75
--- /dev/null
+++ b/tests/validate-html/fetch_data.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+import subprocess
+import urllib.request
+
+from selenium import webdriver
+from selenium.webdriver.firefox.options import Options
+
+def fetch_rendered(url, port):
+ options = Options()
+ options.add_argument('--headless')
+ driver = webdriver.Firefox(options=options)
+
+ driver.get(url)
+ page_source = driver.page_source
+
+ # TODO check encoding from driver
+ page_encoded = page_source.encode('utf-8')
+
+ cmd = subprocess.run(['xmllint', '--format', '-'],
+ input=page_encoded,
+ capture_output=True)
+
+ if cmd.returncode == 0:
+ port.write(cmd.stdout)
+ else:
+ port.write(page_encoded)
+
+def fetch_raw(url, port):
+ response = urllib.request.urlopen(url)
+ data = response.read()
+ port.write(data)
+
+url = 'http://localhost:8080/week/2022-03-31.html'
+
+with open('raw.xhtml', 'wb') as f:
+ fetch_raw(url, f)
+
+# with open('raw.html', 'wb') as f:
+# fetch_raw(f'{url}?html', f)
+
+with open('selenium.xhtml', 'wb') as f:
+ fetch_rendered(url, f)
+
+# with open('selenium.html', 'wb') as f:
+# fetch_rendered(f'{url}?html', f)