aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2023-02-07 00:44:52 +0100
committerHugo Hörnquist <hugo@lysator.liu.se>2023-02-07 00:44:52 +0100
commit394ada5230c5a1b2890df385a9bbbf7310577a1c (patch)
tree8b8bd78615c7d5e08366764ec24af9f67c427674
parentChange info gathering to proper db reads. (diff)
downloadmu4web-394ada5230c5a1b2890df385a9bbbf7310577a1c.tar.gz
mu4web-394ada5230c5a1b2890df385a9bbbf7310577a1c.tar.xz
Add support for CID linked images.
-rw-r--r--mu4web/main.py104
1 files changed, 104 insertions, 0 deletions
diff --git a/mu4web/main.py b/mu4web/main.py
index 7b85a06..c945e7c 100644
--- a/mu4web/main.py
+++ b/mu4web/main.py
@@ -2,6 +2,9 @@ from email.message import EmailMessage
from email.headerregistry import Address
from urllib.parse import urlencode
from datetime import datetime
+import html
+from html.parser import HTMLParser
+import re
from flask_login import (
LoginManager,
login_required,
@@ -431,10 +434,65 @@ def raw_message():
return flask.send_file(filename, mimetype='message/rfc822')
+class MutableString:
+ def __init__(self):
+ self.str = ''
+
+ def __iadd__(self, other):
+ self.str += other
+ return self
+
+ def __repr__(self):
+ return f'MutableString("{self.str}")'
+
+ def __str__(self):
+ return self.str
+
+
+class IMGParser(HTMLParser):
+ """
+ Rewrites HTML image tags to be safer/have more functionality.
+ Should only be fed the image tag. Everything else is assumed to be
+ directly copied externaly.
+
+ [Parameters]
+ result - A mutable string which should be appended with the
+ (possibly changed) img tag.
+ msg_id - The Email Message ID field, used to construct some links.
+ """
+ rx = re.compile('cid:(.*)')
+
+ def __init__(self, result, msg_id):
+ super().__init__()
+ self.result = result
+ self.msg_id = msg_id
+
+ def handle_starttag(self, _, attrs):
+ self.result += '<img '
+ for key, value in attrs:
+ if key == 'src':
+ if m := IMGParser.rx.match(value):
+ params = urlencode({'id': self.msg_id, 'cid': m[1]})
+ data = '/cid?' + params
+ else:
+ data = value
+ else:
+ data = value
+ key = html.escape(key)
+ data = html.escape(data, quote=True)
+
+ self.result += f'{key}="{data}"'
+
+ # TODO this will also get called for self closing tags
+ # (<a/>), which will drop that slash. FIX
+ self.result += '>'
+
+
@app.route('/part')
@login_required
def attachement_part_page():
msg_id = request.args.get('id')
+ raw = request.args.get('raw')
if not msg_id:
return "Message id required", 404
attachement_idx = int(request.args.get('idx', 0))
@@ -445,10 +503,56 @@ def attachement_part_page():
return multipart_page(msg_id,
attachement,
attachement_idx)
+
+ elif not raw and attachement.get_content_type() == 'text/html':
+ # Rewrites for HTML for different reasons
+ # 1. Expand CID links to something we can handle
+ # 2. add a.target = '_parent' to force links to open in
+ # current tab (instead of keeping in iframe)
+ # 3. Block external resources from loading per default
+ # These should come with some form of toggle for turning them
+ # on or off
+
+ result = MutableString()
+ idx = 0
+ # Content encoding here?
+ source = attachement.get_content()
+ parser = IMGParser(result, msg_id)
+
+ for m in re.finditer(r'< *img[^>]*>', source):
+ result += source[idx:m.start()]
+ idx = m.end()
+ parser.feed(m[0])
+
+ return str(result)
+
else:
return attachement_response(attachement)
+# Returns specific content item from message with id
+# RFC 2392
+# https://www.rfc-editor.org/rfc/rfc2392
+# Possibly change this to actually use that form of URI:s
+@app.route('/cid', methods=['GET'])
+def cid():
+ msg_id = request.args.get('id')
+ cid = request.args.get('cid')
+ if not msg_id:
+ return "Message id required", 404
+ if not cid:
+ return "CID required", 404
+ mail = cast(EmailMessage, get_mail(msg_id))
+
+ # .walk(), since attachement may be a few steps down in the
+ # multipart/* tree
+ for attachment in mail.walk():
+ if attachment.get('content-id') == f'<{cid}>':
+ return attachement_response(attachment)
+
+ return "Object not found", 404
+
+
@app.route('/login', methods=['GET'])
def login_page_():
returnto = request.args.get('returnto')