From 394ada5230c5a1b2890df385a9bbbf7310577a1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Tue, 7 Feb 2023 00:44:52 +0100 Subject: Add support for CID linked images. --- mu4web/main.py | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/mu4web/main.py b/mu4web/main.py index 7b85a06..c945e7c 100644 --- a/mu4web/main.py +++ b/mu4web/main.py @@ -2,6 +2,9 @@ from email.message import EmailMessage from email.headerregistry import Address from urllib.parse import urlencode from datetime import datetime +import html +from html.parser import HTMLParser +import re from flask_login import ( LoginManager, login_required, @@ -431,10 +434,65 @@ def raw_message(): return flask.send_file(filename, mimetype='message/rfc822') +class MutableString: + def __init__(self): + self.str = '' + + def __iadd__(self, other): + self.str += other + return self + + def __repr__(self): + return f'MutableString("{self.str}")' + + def __str__(self): + return self.str + + +class IMGParser(HTMLParser): + """ + Rewrites HTML image tags to be safer/have more functionality. + Should only be fed the image tag. Everything else is assumed to be + directly copied externaly. + + [Parameters] + result - A mutable string which should be appended with the + (possibly changed) img tag. + msg_id - The Email Message ID field, used to construct some links. + """ + rx = re.compile('cid:(.*)') + + def __init__(self, result, msg_id): + super().__init__() + self.result = result + self.msg_id = msg_id + + def handle_starttag(self, _, attrs): + self.result += '), which will drop that slash. FIX + self.result += '>' + + @app.route('/part') @login_required def attachement_part_page(): msg_id = request.args.get('id') + raw = request.args.get('raw') if not msg_id: return "Message id required", 404 attachement_idx = int(request.args.get('idx', 0)) @@ -445,10 +503,56 @@ def attachement_part_page(): return multipart_page(msg_id, attachement, attachement_idx) + + elif not raw and attachement.get_content_type() == 'text/html': + # Rewrites for HTML for different reasons + # 1. Expand CID links to something we can handle + # 2. add a.target = '_parent' to force links to open in + # current tab (instead of keeping in iframe) + # 3. Block external resources from loading per default + # These should come with some form of toggle for turning them + # on or off + + result = MutableString() + idx = 0 + # Content encoding here? + source = attachement.get_content() + parser = IMGParser(result, msg_id) + + for m in re.finditer(r'< *img[^>]*>', source): + result += source[idx:m.start()] + idx = m.end() + parser.feed(m[0]) + + return str(result) + else: return attachement_response(attachement) +# Returns specific content item from message with id +# RFC 2392 +# https://www.rfc-editor.org/rfc/rfc2392 +# Possibly change this to actually use that form of URI:s +@app.route('/cid', methods=['GET']) +def cid(): + msg_id = request.args.get('id') + cid = request.args.get('cid') + if not msg_id: + return "Message id required", 404 + if not cid: + return "CID required", 404 + mail = cast(EmailMessage, get_mail(msg_id)) + + # .walk(), since attachement may be a few steps down in the + # multipart/* tree + for attachment in mail.walk(): + if attachment.get('content-id') == f'<{cid}>': + return attachement_response(attachment) + + return "Object not found", 404 + + @app.route('/login', methods=['GET']) def login_page_(): returnto = request.args.get('returnto') -- cgit v1.2.3