diff options
Diffstat (limited to 'mu4web/main.py')
-rw-r--r-- | mu4web/main.py | 82 |
1 files changed, 63 insertions, 19 deletions
diff --git a/mu4web/main.py b/mu4web/main.py index e6b1092..981682a 100644 --- a/mu4web/main.py +++ b/mu4web/main.py @@ -472,25 +472,62 @@ class IMGParser(HTMLParser): self.result = result self.msg_id = msg_id - def handle_starttag(self, _, attrs): - self.result += '<img ' - for key, value in attrs: - if key == 'src': - if m := IMGParser.rx.match(value): - params = urlencode({'id': self.msg_id, 'cid': m[1]}) - data = '/cid?' + params + def handle_starttag(self, tag, attrs): + # TODO this will also get called for self closing tags + # (<img/>), which will drop that slash. FIX + + if tag == 'img': + # - Expand img tags with CID: url's to point to our server. + # These should be safe (from a tracking perspective) since + # they are downloaded as part of the mail. + # - Other images are blocked, a piece of javascript is + # later added to unblock them on click + self.result += '<img ' + for key, value in attrs: + if key == 'src': + if m := IMGParser.rx.match(value): + params = urlencode({'id': self.msg_id, 'cid': m[1]}) + key = html.escape(key) + data = '/cid?' + params + self.result += f' {key}="{data}"' + else: + # TODO Insert information about the blocked + # resource into the image, such as title text, + # original src, approximate size (and from + # that: infer if it's an image only used for tracking) + key = 'data-src' + data = html.escape(value, quote=True) + self.result += f' {key}="{data}"' + + key = 'src' + data = '/static/content-blocked.svg' + self.result += f' {key}="{data}"' else: - data = value - else: - data = value - key = html.escape(key) - data = html.escape(data, quote=True) - - self.result += f'{key}="{data}"' + key = html.escape(key) + data = html.escape(value, quote=True) + + self.result += f' {key}="{data}"' + self.result += '>' + + elif tag == 'script': + # Keep script tag contents, but change it to text. I'm not + # sure how many try to inject javascript into their + # emails, but we don't want any of it. + args = ' '.join(f'{key}={value}' for (key, value) in attrs) + self.result += '<pre>' + html.escape(f'<script {args}>') + '</pre>' + + elif tag == 'a': + # Add target="_parent" to all anchors. This causes links + # in iframe:s (where the content will probably be shown) + # to open in the current (top level) page, instead of + # inside the iframe. + args = ' '.join(f'{html.escape(key)}={html.escape(value)}' + for (key, value) + in [*attrs, ('target', '_parent')]) + self.result += f'<a {args}>' - # TODO this will also get called for self closing tags - # (<a/>), which will drop that slash. FIX - self.result += '>' + else: + assert False, 'Should never be reached' @app.route('/part') @@ -519,15 +556,22 @@ def attachement_part_page(): # on or off result = MutableString() - idx = 0 # Content encoding here? source = attachement.get_content() parser = IMGParser(result, msg_id) - for m in re.finditer(r'< *img[^>]*>', source): + idx = 0 + for m in re.finditer(r'< *(a|img|script)[^>]*>', source): result += source[idx:m.start()] idx = m.end() parser.feed(m[0]) + result += source[idx:] + + # This script adds an onclick event for each image we blocked + # above, which unblocks it. + # TODO this "fails" for images wrapped in anchor tags, since + # the anchor tag has priority. + result += "\n<script src='/static/enable_images.js'></script>" return str(result) |