From e11826cf81374af28d81d12fdef3f69fd0d03eae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Tue, 7 Feb 2023 01:30:54 +0100 Subject: Also rewrite script, a, and img tags without cid uri:s. --- mu4web/main.py | 82 ++++++++++++++++++++++++++++++--------- mu4web/static/content-blocked.svg | 9 +++++ mu4web/static/enable_images.js | 10 +++++ 3 files changed, 82 insertions(+), 19 deletions(-) create mode 100644 mu4web/static/content-blocked.svg create mode 100644 mu4web/static/enable_images.js diff --git a/mu4web/main.py b/mu4web/main.py index e6b1092..981682a 100644 --- a/mu4web/main.py +++ b/mu4web/main.py @@ -472,25 +472,62 @@ class IMGParser(HTMLParser): self.result = result self.msg_id = msg_id - def handle_starttag(self, _, attrs): - self.result += '), which will drop that slash. FIX + + if tag == 'img': + # - Expand img tags with CID: url's to point to our server. + # These should be safe (from a tracking perspective) since + # they are downloaded as part of the mail. + # - Other images are blocked, a piece of javascript is + # later added to unblock them on click + self.result += '') + '' + + elif tag == 'a': + # Add target="_parent" to all anchors. This causes links + # in iframe:s (where the content will probably be shown) + # to open in the current (top level) page, instead of + # inside the iframe. + args = ' '.join(f'{html.escape(key)}={html.escape(value)}' + for (key, value) + in [*attrs, ('target', '_parent')]) + self.result += f'' - # TODO this will also get called for self closing tags - # (), which will drop that slash. FIX - self.result += '>' + else: + assert False, 'Should never be reached' @app.route('/part') @@ -519,15 +556,22 @@ def attachement_part_page(): # on or off result = MutableString() - idx = 0 # Content encoding here? source = attachement.get_content() parser = IMGParser(result, msg_id) - for m in re.finditer(r'< *img[^>]*>', source): + idx = 0 + for m in re.finditer(r'< *(a|img|script)[^>]*>', source): result += source[idx:m.start()] idx = m.end() parser.feed(m[0]) + result += source[idx:] + + # This script adds an onclick event for each image we blocked + # above, which unblocks it. + # TODO this "fails" for images wrapped in anchor tags, since + # the anchor tag has priority. + result += "\n" return str(result) diff --git a/mu4web/static/content-blocked.svg b/mu4web/static/content-blocked.svg new file mode 100644 index 0000000..4bab3a2 --- /dev/null +++ b/mu4web/static/content-blocked.svg @@ -0,0 +1,9 @@ + + + Image blocked + diff --git a/mu4web/static/enable_images.js b/mu4web/static/enable_images.js new file mode 100644 index 0000000..6a85135 --- /dev/null +++ b/mu4web/static/enable_images.js @@ -0,0 +1,10 @@ +window.addEventListener('load', () => { + for (let img of document.querySelectorAll('img')) { + let true_source; + if ((true_source = img.dataset.src)) { + img.addEventListener('click', () => { + img.src = true_source; + }) + } + } +}) -- cgit v1.2.3