diff options
author | Hugo Hörnquist <hugo@lysator.liu.se> | 2023-02-07 01:30:54 +0100 |
---|---|---|
committer | Hugo Hörnquist <hugo@lysator.liu.se> | 2023-02-07 01:30:54 +0100 |
commit | e11826cf81374af28d81d12fdef3f69fd0d03eae (patch) | |
tree | 1642dd5f06dc3ce4163286d9e908637fad6ce428 | |
parent | Add various TODOs (diff) | |
download | mu4web-e11826cf81374af28d81d12fdef3f69fd0d03eae.tar.gz mu4web-e11826cf81374af28d81d12fdef3f69fd0d03eae.tar.xz |
Also rewrite script, a, and img tags without cid uri:s.
-rw-r--r-- | mu4web/main.py | 82 | ||||
-rw-r--r-- | mu4web/static/content-blocked.svg | 9 | ||||
-rw-r--r-- | mu4web/static/enable_images.js | 10 |
3 files changed, 82 insertions, 19 deletions
diff --git a/mu4web/main.py b/mu4web/main.py index e6b1092..981682a 100644 --- a/mu4web/main.py +++ b/mu4web/main.py @@ -472,25 +472,62 @@ class IMGParser(HTMLParser): self.result = result self.msg_id = msg_id - def handle_starttag(self, _, attrs): - self.result += '<img ' - for key, value in attrs: - if key == 'src': - if m := IMGParser.rx.match(value): - params = urlencode({'id': self.msg_id, 'cid': m[1]}) - data = '/cid?' + params + def handle_starttag(self, tag, attrs): + # TODO this will also get called for self closing tags + # (<img/>), which will drop that slash. FIX + + if tag == 'img': + # - Expand img tags with CID: url's to point to our server. + # These should be safe (from a tracking perspective) since + # they are downloaded as part of the mail. + # - Other images are blocked, a piece of javascript is + # later added to unblock them on click + self.result += '<img ' + for key, value in attrs: + if key == 'src': + if m := IMGParser.rx.match(value): + params = urlencode({'id': self.msg_id, 'cid': m[1]}) + key = html.escape(key) + data = '/cid?' + params + self.result += f' {key}="{data}"' + else: + # TODO Insert information about the blocked + # resource into the image, such as title text, + # original src, approximate size (and from + # that: infer if it's an image only used for tracking) + key = 'data-src' + data = html.escape(value, quote=True) + self.result += f' {key}="{data}"' + + key = 'src' + data = '/static/content-blocked.svg' + self.result += f' {key}="{data}"' else: - data = value - else: - data = value - key = html.escape(key) - data = html.escape(data, quote=True) - - self.result += f'{key}="{data}"' + key = html.escape(key) + data = html.escape(value, quote=True) + + self.result += f' {key}="{data}"' + self.result += '>' + + elif tag == 'script': + # Keep script tag contents, but change it to text. I'm not + # sure how many try to inject javascript into their + # emails, but we don't want any of it. + args = ' '.join(f'{key}={value}' for (key, value) in attrs) + self.result += '<pre>' + html.escape(f'<script {args}>') + '</pre>' + + elif tag == 'a': + # Add target="_parent" to all anchors. This causes links + # in iframe:s (where the content will probably be shown) + # to open in the current (top level) page, instead of + # inside the iframe. + args = ' '.join(f'{html.escape(key)}={html.escape(value)}' + for (key, value) + in [*attrs, ('target', '_parent')]) + self.result += f'<a {args}>' - # TODO this will also get called for self closing tags - # (<a/>), which will drop that slash. FIX - self.result += '>' + else: + assert False, 'Should never be reached' @app.route('/part') @@ -519,15 +556,22 @@ def attachement_part_page(): # on or off result = MutableString() - idx = 0 # Content encoding here? source = attachement.get_content() parser = IMGParser(result, msg_id) - for m in re.finditer(r'< *img[^>]*>', source): + idx = 0 + for m in re.finditer(r'< *(a|img|script)[^>]*>', source): result += source[idx:m.start()] idx = m.end() parser.feed(m[0]) + result += source[idx:] + + # This script adds an onclick event for each image we blocked + # above, which unblocks it. + # TODO this "fails" for images wrapped in anchor tags, since + # the anchor tag has priority. + result += "\n<script src='/static/enable_images.js'></script>" return str(result) diff --git a/mu4web/static/content-blocked.svg b/mu4web/static/content-blocked.svg new file mode 100644 index 0000000..4bab3a2 --- /dev/null +++ b/mu4web/static/content-blocked.svg @@ -0,0 +1,9 @@ +<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 400 200" + style="background-color: black"> + <rect x="0" y="0" width="100%" height="100%"/> + <text + x="50%" y="50%" + text-anchor="middle" + dominant-baseline="middle" + style="fill: red; font-size: 50px">Image blocked</text> +</svg> diff --git a/mu4web/static/enable_images.js b/mu4web/static/enable_images.js new file mode 100644 index 0000000..6a85135 --- /dev/null +++ b/mu4web/static/enable_images.js @@ -0,0 +1,10 @@ +window.addEventListener('load', () => { + for (let img of document.querySelectorAll('img')) { + let true_source; + if ((true_source = img.dataset.src)) { + img.addEventListener('click', () => { + img.src = true_source; + }) + } + } +}) |