aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2023-02-07 01:30:54 +0100
committerHugo Hörnquist <hugo@lysator.liu.se>2023-02-07 01:30:54 +0100
commite11826cf81374af28d81d12fdef3f69fd0d03eae (patch)
tree1642dd5f06dc3ce4163286d9e908637fad6ce428
parentAdd various TODOs (diff)
downloadmu4web-e11826cf81374af28d81d12fdef3f69fd0d03eae.tar.gz
mu4web-e11826cf81374af28d81d12fdef3f69fd0d03eae.tar.xz
Also rewrite script, a, and img tags without cid uri:s.
-rw-r--r--mu4web/main.py82
-rw-r--r--mu4web/static/content-blocked.svg9
-rw-r--r--mu4web/static/enable_images.js10
3 files changed, 82 insertions, 19 deletions
diff --git a/mu4web/main.py b/mu4web/main.py
index e6b1092..981682a 100644
--- a/mu4web/main.py
+++ b/mu4web/main.py
@@ -472,25 +472,62 @@ class IMGParser(HTMLParser):
self.result = result
self.msg_id = msg_id
- def handle_starttag(self, _, attrs):
- self.result += '<img '
- for key, value in attrs:
- if key == 'src':
- if m := IMGParser.rx.match(value):
- params = urlencode({'id': self.msg_id, 'cid': m[1]})
- data = '/cid?' + params
+ def handle_starttag(self, tag, attrs):
+ # TODO this will also get called for self closing tags
+ # (<img/>), which will drop that slash. FIX
+
+ if tag == 'img':
+ # - Expand img tags with CID: url's to point to our server.
+ # These should be safe (from a tracking perspective) since
+ # they are downloaded as part of the mail.
+ # - Other images are blocked, a piece of javascript is
+ # later added to unblock them on click
+ self.result += '<img '
+ for key, value in attrs:
+ if key == 'src':
+ if m := IMGParser.rx.match(value):
+ params = urlencode({'id': self.msg_id, 'cid': m[1]})
+ key = html.escape(key)
+ data = '/cid?' + params
+ self.result += f' {key}="{data}"'
+ else:
+ # TODO Insert information about the blocked
+ # resource into the image, such as title text,
+ # original src, approximate size (and from
+ # that: infer if it's an image only used for tracking)
+ key = 'data-src'
+ data = html.escape(value, quote=True)
+ self.result += f' {key}="{data}"'
+
+ key = 'src'
+ data = '/static/content-blocked.svg'
+ self.result += f' {key}="{data}"'
else:
- data = value
- else:
- data = value
- key = html.escape(key)
- data = html.escape(data, quote=True)
-
- self.result += f'{key}="{data}"'
+ key = html.escape(key)
+ data = html.escape(value, quote=True)
+
+ self.result += f' {key}="{data}"'
+ self.result += '>'
+
+ elif tag == 'script':
+ # Keep script tag contents, but change it to text. I'm not
+ # sure how many try to inject javascript into their
+ # emails, but we don't want any of it.
+ args = ' '.join(f'{key}={value}' for (key, value) in attrs)
+ self.result += '<pre>' + html.escape(f'<script {args}>') + '</pre>'
+
+ elif tag == 'a':
+ # Add target="_parent" to all anchors. This causes links
+ # in iframe:s (where the content will probably be shown)
+ # to open in the current (top level) page, instead of
+ # inside the iframe.
+ args = ' '.join(f'{html.escape(key)}={html.escape(value)}'
+ for (key, value)
+ in [*attrs, ('target', '_parent')])
+ self.result += f'<a {args}>'
- # TODO this will also get called for self closing tags
- # (<a/>), which will drop that slash. FIX
- self.result += '>'
+ else:
+ assert False, 'Should never be reached'
@app.route('/part')
@@ -519,15 +556,22 @@ def attachement_part_page():
# on or off
result = MutableString()
- idx = 0
# Content encoding here?
source = attachement.get_content()
parser = IMGParser(result, msg_id)
- for m in re.finditer(r'< *img[^>]*>', source):
+ idx = 0
+ for m in re.finditer(r'< *(a|img|script)[^>]*>', source):
result += source[idx:m.start()]
idx = m.end()
parser.feed(m[0])
+ result += source[idx:]
+
+ # This script adds an onclick event for each image we blocked
+ # above, which unblocks it.
+ # TODO this "fails" for images wrapped in anchor tags, since
+ # the anchor tag has priority.
+ result += "\n<script src='/static/enable_images.js'></script>"
return str(result)
diff --git a/mu4web/static/content-blocked.svg b/mu4web/static/content-blocked.svg
new file mode 100644
index 0000000..4bab3a2
--- /dev/null
+++ b/mu4web/static/content-blocked.svg
@@ -0,0 +1,9 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 400 200"
+ style="background-color: black">
+ <rect x="0" y="0" width="100%" height="100%"/>
+ <text
+ x="50%" y="50%"
+ text-anchor="middle"
+ dominant-baseline="middle"
+ style="fill: red; font-size: 50px">Image blocked</text>
+</svg>
diff --git a/mu4web/static/enable_images.js b/mu4web/static/enable_images.js
new file mode 100644
index 0000000..6a85135
--- /dev/null
+++ b/mu4web/static/enable_images.js
@@ -0,0 +1,10 @@
+window.addEventListener('load', () => {
+ for (let img of document.querySelectorAll('img')) {
+ let true_source;
+ if ((true_source = img.dataset.src)) {
+ img.addEventListener('click', () => {
+ img.src = true_source;
+ })
+ }
+ }
+})