summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2023-03-05 10:45:29 +0100
committerHugo Hörnquist <hugo@lysator.liu.se>2023-03-05 10:45:29 +0100
commit2d54f531746d05a1b6d757db92f7f2eccbc7c8f2 (patch)
treee0fdfe99571677ad05245b90d240ef886a5ec222
parentwork (diff)
downloadvimwiki-scripts-2d54f531746d05a1b6d757db92f7f2eccbc7c8f2.tar.gz
vimwiki-scripts-2d54f531746d05a1b6d757db92f7f2eccbc7c8f2.tar.xz
MAJOR work on handlingar path.
-rwxr-xr-xhs/mail.py109
-rw-r--r--hs/mu.py36
-rw-r--r--hs/src/Handlingar.hs201
-rw-r--r--hs/src/Mail.hs60
-rw-r--r--hs/src/Tex.hs27
-rw-r--r--hs/src/Util.hs12
-rw-r--r--hs/vimwiki.cabal6
7 files changed, 436 insertions, 15 deletions
diff --git a/hs/mail.py b/hs/mail.py
new file mode 100755
index 0000000..03a423b
--- /dev/null
+++ b/hs/mail.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+
+"""
+./main <msg-id>
+> LEN "\n" json-data
+on input:
+> LEN "\n" bytes
+"""
+
+from email.message import EmailMessage
+from email.parser import BytesParser
+from uuid import uuid4
+import email.policy
+import hashlib
+import json
+import mimetypes
+import os
+import sys
+import email.contentmanager
+
+import mu
+
+def build_part_tree(part, part_map):
+ result = []
+ id = str(uuid4())
+ part_map[id] = part
+ return {
+ 'filename': part.get_filename(),
+ 'headers': {key.lower(): value for key, value in part.items()},
+ 'id': id,
+ 'parts': [build_part_tree(p, part_map) for p in part.iter_parts()],
+ 'content-type': part.get_content_type(),
+ 'charset': part.get_content_charset(),
+ }
+
+tmpdir = f'/tmp/.vimwiki-scripts-{os.getuid()}'
+
+os.umask(0o077)
+try:
+ os.mkdir(tmpdir)
+except FileExistsError:
+ os.chmod(tmpdir, 0o700)
+
+content_manager = email.contentmanager.raw_data_manager
+content_manager.add_get_handler('text', email.contentmanager.get_non_text_content)
+
+def main():
+
+ if len(sys.argv) == 1:
+ print(f'Usage: {sys.argv[0]} message-id')
+ return
+
+ id = sys.argv[1]
+
+ filename = mu.find_file(id)
+ if not filename:
+ print(f'No mail with id=[{id}]', file=sys.stderr)
+ return
+ parser = BytesParser(policy=email.policy.default)
+ with open(filename, 'rb') as f:
+ mail = parser.parse(f)
+
+
+ part_map = {}
+ bytes = json.dumps(build_part_tree(mail, part_map),
+ ensure_ascii=True).encode('ASCII')
+ sys.stdout.buffer.write(b'%i\n' % len(bytes))
+ sys.stdout.buffer.write(bytes)
+ sys.stdout.flush()
+
+ try:
+ while True:
+ part_id = input()
+ part = part_map[part_id]
+ action, *args = input().split(' ')
+ if action == 'get-bytes':
+ # TODO get_content should be able to return a bytes object
+ # directly. We want bytes to have exact length when
+ # sending
+ s = part.get_content(content_manager=content_manager)
+ # bytes = s.encode('UTF-8')
+ bytes = s
+ sys.stdout.buffer.write(b'%i\n' % len(bytes))
+ sys.stdout.buffer.write(bytes)
+
+ elif action == 'get-file':
+ s = part.get_content()
+ bytes = s
+ digest = hashlib.sha256(bytes).hexdigest()
+ filename = os.path.join(tmpdir, digest) \
+ + mimetypes.guess_extension(part.get_content_type())
+ # Write could be skiped, but then we would
+ # probably want to ensure the correct content,
+ # meaning checksuming the existing file, which
+ # would probably be slover
+ with open(filename, 'wb') as f:
+ f.write(bytes)
+ sys.stdout.buffer.write(filename.encode('UTF-8'))
+ sys.stdout.buffer.write(b'\n')
+
+ else:
+ print(f"Unknown action '{action}'", file=sys.stderr)
+ pass
+ sys.stdout.flush()
+ except EOFError:
+ pass
+
+if __name__ == '__main__':
+ main()
diff --git a/hs/mu.py b/hs/mu.py
new file mode 100644
index 0000000..5f9f5b1
--- /dev/null
+++ b/hs/mu.py
@@ -0,0 +1,36 @@
+import subprocess
+from subprocess import PIPE
+from typing import (
+ Optional,
+)
+
+class MuError(Exception):
+ codes = {
+ 1: 'General Error',
+ 2: 'No Matches',
+ 4: 'Database is corrupted'
+ }
+
+ def __init__(self, returncode: int):
+ self.returncode: int = returncode
+ self.msg: str = MuError.codes.get(returncode, 'Unknown Error')
+
+ def __repr__(self):
+ return f'MuError({self.returncode}, "{self.msg}")'
+
+ def __str__(self):
+ return repr(self)
+
+
+def find_file(id: str) -> Optional[str]:
+ cmd = subprocess.run(['mu', 'find', '-u', f'i:{id}',
+ '--fields', 'l'],
+ stdout=PIPE)
+ filename = cmd.stdout.decode('UTF-8').strip()
+
+ if cmd.returncode == 4:
+ return None
+ if cmd.returncode != 0:
+ raise MuError(cmd.returncode)
+ return filename
+
diff --git a/hs/src/Handlingar.hs b/hs/src/Handlingar.hs
index 431e7a5..2664b08 100644
--- a/hs/src/Handlingar.hs
+++ b/hs/src/Handlingar.hs
@@ -12,7 +12,10 @@ import Prelude hiding
import Text.Pandoc
( runIOorExplode
, readVimwiki
- , WriterOptions(writerTemplate, writerVariables)
+ , readHtml
+ , WriterOptions( writerTemplate
+ , writerListings
+ , writerVariables)
, PandocMonad
, PandocIO
, writePlain
@@ -23,7 +26,7 @@ import Text.Pandoc.Writers.LaTeX (writeLaTeX)
import Text.Pandoc.Templates
( compileDefaultTemplate
)
-import Text.Pandoc.Walk (walkM)
+import Text.Pandoc.Walk (walk, walkM)
import Data.Text (Text, pack, unpack, strip)
import Data.Text.IO
( putStrLn
@@ -37,8 +40,16 @@ import Text.DocTemplates (toVal, Val(SimpleVal, ListVal), Context(Context))
import qualified Data.Text.IO as T
import qualified Data.Map.Lazy as Map
+import Data.Map ((!))
import System.Posix.Directory (changeWorkingDirectory, getWorkingDirectory)
import System.Directory (makeAbsolute)
+import System.IO (Handle, hGetLine, hPutStrLn, hFlush)
+import Data.ByteString (ByteString, hGet)
+import Data.Text.Encoding (decodeLatin1, decodeUtf8)
+import System.Process (cleanupProcess)
+import Data.Maybe (fromMaybe)
+import Tex (toTex, TeX (..))
+import Data.String (IsString(fromString))
import System.FilePath
( dropFileName
@@ -46,10 +57,25 @@ import System.FilePath
, takeExtension
)
-import Network.URI (parseURI, uriScheme, uriPath)
+import Network.URI (URI, parseURI, uriScheme, uriPath)
import Network.URI.Encode (encodeWith, decode)
import Data.Set (Set)
import qualified Data.Set as Set
+import Util (joinBy, splitBy)
+
+import Mail (getMail, MailPart(..))
+
+-- TODO pandoc possibly contains a better way to handle attachements,
+-- something about media bag
+-- TODO mail attachments from any type supported by pandoc should
+-- work, fix a (MimeType -> PandocParser) function
+-- TODO input file shouldn't be locked to vimwiki
+-- TODO output other things than PDF
+-- - Plain text, requires own redering engine (pandoc plugin?)
+-- - HTML, should be multi page for each attachement
+-- - with support for RXML templates
+-- - Mail, where mail attachements are copied verbatim
+-- other attachements become mail attachments
oneOf :: (a -> Bool) -> (a -> Bool) -> a -> Bool
oneOf f g x = f x || g x
@@ -135,6 +161,10 @@ rewriteLink (Link _ is (target, "wikilink")) = do
return lnk
rewriteLink x = return x
+shorten :: Block -> Block
+shorten (BlockQuote _) = BlockQuote [ Para [ Code ("", [], []) "[...]" ] ]
+shorten x = x
+
replaceLinks :: Pandoc -> (Pandoc, [AppendixItem])
replaceLinks = flip runState [] . walkM rewriteLink
@@ -161,16 +191,147 @@ handleFile fname ".txt" = do
return . Just $ [CodeBlock ("", [], []) text]
handleFile fname ".pdf" = do
aname <- liftIO $ makeAbsolute fname
- let pagecmd = "\\thispagestyle{fancy}\\lhead{Bilaga \\Alph{section}.\\arabic{subsection}}"
- let arg = "frame,pages={-},width=\\textwidth,pagecommand=" <> pagecmd
- let lines = [ "\\phantomsection\\stepcounter{subsection}\\includepdf[" <> arg <> "]{" <> decode aname <> "}"
- ]
+ let pagecmd = [ TexCmd "thispagestyle" [] ["fancy"]
+ , TexCmd "lhead" [] ["Bilaga \\Alph{section}.\\arabic{subsection}"]]
+ let arg = [ "frame"
+ , "pages={-}"
+ , "width=\\textwidth"
+ , "pagecommand=" <> mconcat (fmap toTex pagecmd)]
+ let lines = [ TexCmd "phantomsection" [] []
+ , TexCmd "stepcounter" [] ["subsection"]
+ , TexCmd "includepdf" [joinBy "," arg] [pack . decode $ aname] ]
let inline = RawInline
(Format "latex")
- (pack $ unlines lines)
+ (mconcat $ fmap toTex lines)
return . Just $ [ Plain [ inline ] ]
handleFile fname _ = return . Just $ [Para [Str . pack $ fname]]
+findAlternative :: [MailPart] -> Maybe MailPart
+findAlternative [] = Nothing
+findAlternative (m@MailPart { contentType = contentType }:xs) = case contentType of
+ "text/html" -> Just m
+ "text/plain" -> Just m
+ _ -> findAlternative xs
+
+
+-- TODO where are these strings defined
+decoder :: String -> (ByteString -> Text)
+decoder "iso-8859-1" = decodeLatin1
+decoder "utf-8" = decodeUtf8
+-- decoder _ = decodeUtf8Lenient
+decoder _ = decodeLatin1
+
+data MailAction = GetBytes
+ | GetFile
+ deriving (Show)
+
+serialize :: MailAction -> String
+serialize GetBytes = "get-bytes"
+serialize GetFile = "get-file"
+
+mailGet :: MailAction -> String -> (Handle, Handle) -> (Handle -> IO a) -> IO a
+mailGet action id (inp, outp) handler = do
+ hPutStrLn outp id
+ hPutStrLn outp $ serialize action
+ hFlush outp
+ handler inp
+
+
+getBytes :: String -> (Handle, Handle) -> IO ByteString
+getBytes id ports = mailGet GetBytes id ports $ \inp -> do
+ count <- readIO =<< hGetLine inp
+ hGet inp count
+
+getFile :: String -> (Handle, Handle) -> IO FilePath
+getFile id ports = mailGet GetFile id ports hGetLine
+
+
+-- https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html
+-- TODO differentiate between inline images (through cid:<> urls), and
+-- unrelated attachements
+handleMailBody :: (Handle, Handle) -> MailPart -> PandocIO [Block]
+handleMailBody ports mail =
+ case splitBy '/' $ contentType mail of
+ ("multipart", "alternative") -> do
+ case findAlternative $ reverse $ parts mail of
+ Just part -> handleMailBody ports part
+ Nothing -> return [ Para [ Str "Couldn't find any suitable alternative" ] ]
+ -- mixed, but any unknown should be treated as mixed
+ ("multipart", _) -> concat <$> mapM (handleMailBody ports) (parts mail)
+ ("text", "plain") -> do
+ bytes <- liftIO $ getBytes (partId mail) ports
+ let content = decoder (fromMaybe "ASCII" $ charset mail) bytes
+ return [ CodeBlock ("", [], []) content ]
+ ("text", "html") -> do
+ bytes <- liftIO $ getBytes (partId mail) ports
+ let content = decoder (fromMaybe "ASCII" $ charset mail) bytes
+ pdoc <- readHtml def content
+ let Pandoc _ blocks = walk shorten pdoc
+ return blocks
+ ("image", _) -> do
+ tmpFile <- liftIO $ getFile (partId mail) ports
+ let img = [ Plain
+ [ Image ("", [], [])
+ [Str "Image Caption?"]
+ (pack tmpFile, "") ]]
+ let figure = [ Figure ("", [], []) -- TODO figure ref
+ -- (Caption Nothing [Plain [Str . filename mail]])
+ (Caption Nothing [Plain [Str . pack . show $ filename mail]])
+ img
+ ]
+ return img
+ -- "application/pdf" -> ()
+ _ -> return [ Header 2 ("", [], []) [ Str "Attachment omitted" ]
+ , dlist [ ("Content-Type", contentType mail)
+ , ("Filename", show . filename $ mail) ]
+ ]
+
+-- TODO Titlecase the headers
+-- TODO from and to should monospace the
+-- address (but not the name)
+formatMail :: MailPart -> (Handle, Handle) -> PandocIO [Block]
+formatMail mail ports = do
+ let keys = ["from", "to", "subject", "date"]
+ let f key = ( [Str $ key <> ":"]
+ , [[Plain [ Str . pack $ headers mail ! unpack key ]]]
+ )
+ let kvs = map f keys
+
+ body <- handleMailBody ports mail
+
+ return $ DefinitionList kvs : body
+
+comment :: Text -> [Block]
+comment s = [ RawBlock (Format "latex") ("% " <> s)
+ , RawBlock (Format "html") ("<!-- " <> s <> " -->")
+ ]
+
+dlist :: [(String, String)] -> Block
+dlist xs = DefinitionList [ ( [Str . pack $ k]
+ , [[Plain [Str . pack $ v]]] )
+ | (k, v) <- xs ]
+
+handleMailLink :: URI -> PandocIO [Block]
+handleMailLink uri = do
+ let id = decode . uriPath $ uri
+ -- liftIO $ print id
+ mail' <- liftIO $ getMail id
+ -- TODO #short
+ body <- case mail' of
+ Left err -> return [ Para [ Str "From "
+ , Code ("", [], []) "getMail:" ]
+ , Para [Str . pack $ err]
+ , Para [Code ("", [], []) (pack id) ]]
+ Right (mail, proc@(Just stdin, Just stdout, _, _)) -> do
+ -- TODO "short" `in` uriFrag
+ bs <- formatMail mail (stdout, stdin)
+ liftIO $ cleanupProcess proc
+ return bs
+ -- TODO error
+ _ -> return []
+
+ return $ comment ("msg id: " <> pack id) <> body
+
-- See also: Html rebuildLinks
handleBilaga :: [Block] -> Text -> PandocIO (Maybe [Block])
handleBilaga currentPage url = case parseURI . encodeWith (`elem` uriChars) . unpack $ url of
@@ -179,7 +340,7 @@ handleBilaga currentPage url = case parseURI . encodeWith (`elem` uriChars) . un
"file:" -> do
let fname = uriPath uri
handleFile fname $ takeExtension fname
- "mail:" -> return $ Just [ Para [ Str "A mail would have gone here" ]]
+ "mail:" -> Just <$> handleMailLink uri
_ -> return $ Just [Para [Str . pack . show $ uri]]
Nothing -> case break (== '#') (unpack url) of
("", '#':frag) -> return $ getHeadingData (pack frag) currentPage
@@ -222,19 +383,29 @@ main = do
let apx = mconcat fragments
let trail = [ RawBlock (Format "latex") "\\appendix" ] <> apx
let content = Pandoc meta $ bs <> trail
- let packages = [ "pdfpages"
- , "fancyhdr"
- ]
- let headerIncludes = [ SimpleVal $ "\\usepackage{" <> package <> "}"
- | package <- packages]
+
let opts = [ ("boxlinks", toVal True)
, ("colorlinks", toVal True)
, ("papersize", SimpleVal "a4")
, ("numbersections", toVal True)
, ("lang", SimpleVal "swedish")
- , ("header-includes", ListVal headerIncludes)
+ , ("header-includes", ListVal $ fmap (SimpleVal . fromString . unpack . toTex) [
+ TexPackage "pdfpages" -- including PDF's
+ , TexPackage "fancyhdr" -- page headers on included pdf pages
+ , TempletizedTexPackage ["most"] "tcolorbox" -- for blockquotes
+ , LstSet [ ("breaklines", "true")
+ , ("basicstyle", "\\scriptsize") ]
+ , TexCmd "newtcolorbox" [] ["myquote", joinBy "," [ "breakable"
+ , "colback=red!5!white"
+ , "colframe=red!75!black" ]]
+ , TexCmd "renewenvironment" [] ("quote" : fmap toTex [
+ TexCmd "begin" [] ["myquote"]
+ , TexCmd "end" [] ["myquote"]
+ ])
+ ])
]
writeLaTeX (def { writerTemplate = Just texTemplate
+ , writerListings = True
, writerVariables = Context $ Map.fromList opts }) content
changeWorkingDirectory cwd
diff --git a/hs/src/Mail.hs b/hs/src/Mail.hs
new file mode 100644
index 0000000..505931e
--- /dev/null
+++ b/hs/src/Mail.hs
@@ -0,0 +1,60 @@
+{-# LANGUAGE DeriveGeneric
+ , OverloadedStrings #-}
+
+module Mail where
+
+import System.Process
+import Data.Aeson ( eitherDecode
+ , FromJSON
+ , ToJSON
+ , defaultOptions
+ , parseJSON
+ , genericToEncoding
+ , toEncoding
+ , withObject
+ , (.:)
+ )
+import Data.Map (Map)
+import GHC.Generics
+import Data.ByteString (hGet)
+import Data.ByteString.Lazy (fromStrict)
+import System.IO (Handle, hGetLine)
+
+data MailPart = MailPart
+ { filename :: Maybe String
+ , partId :: String
+ , headers :: Map String String
+ , parts :: [MailPart]
+ , contentType :: String
+ , charset :: Maybe String
+ } deriving (Generic, Show)
+
+instance ToJSON MailPart where
+ toEncoding = genericToEncoding defaultOptions
+
+instance FromJSON MailPart where
+ parseJSON = withObject "MailPart" $ \v -> MailPart
+ <$> v .: "filename"
+ <*> v .: "id"
+ <*> v .: "headers"
+ <*> v .: "parts"
+ <*> v .: "content-type"
+ <*> v .: "charset"
+
+type Proc = (Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle)
+
+getMail :: String -> IO (Either String (MailPart, Proc))
+getMail id = do
+ let prgr = "/home/hugo/code/vimwiki-scripts/hs/mail.py" -- TODO
+ let cmd = (proc prgr [id]) { std_out = CreatePipe
+ , std_in = CreatePipe }
+ proc@(_, Just stdout, _, _) <- createProcess cmd
+
+ count <- read <$> hGetLine stdout
+ bytes <- fromStrict <$> hGet stdout count
+ case eitherDecode bytes of
+ Left err -> do
+ cleanupProcess proc
+ print bytes
+ return $ Left err
+ Right mailpart -> return $ Right (mailpart, proc)
diff --git a/hs/src/Tex.hs b/hs/src/Tex.hs
new file mode 100644
index 0000000..4ba7a0d
--- /dev/null
+++ b/hs/src/Tex.hs
@@ -0,0 +1,27 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module Tex where
+
+import Data.Text (Text)
+import Util (joinBy)
+
+-- A poor mans HaTeX
+
+data TeX = TexPackage Text
+ | TempletizedTexPackage [Text] Text
+ | LstSet [(Text, Text)]
+ | TexCmd Text [Text] [Text]
+ | RawTex Text
+ deriving (Show)
+
+toTex :: TeX -> Text
+toTex (TexPackage name) = "\\usepackage{" <> name <> "}"
+toTex (TempletizedTexPackage args name) = "\\usepackage[" <> joinBy "," args <> "]{" <> name <> "}"
+toTex (LstSet kvs) = "\\lstset{" <> args <> "}"
+ where args = joinBy "," [key <> "=" <> val | (key, val) <- kvs]
+toTex (TexCmd cmd os []) = "\\" <> cmd
+ <> mconcat ["[" <> x <> "]" | x <- os] <> "{}"
+toTex (TexCmd cmd os xs) = "\\" <> cmd
+ <> mconcat ["[" <> x <> "]" | x <- os]
+ <> mconcat ["{" <> x <> "}" | x <- xs]
+toTex (RawTex t) = t
diff --git a/hs/src/Util.hs b/hs/src/Util.hs
index 06a7e86..e3c8d2f 100644
--- a/hs/src/Util.hs
+++ b/hs/src/Util.hs
@@ -4,8 +4,12 @@ module Util
, joinBy
, nullToMaybe
, firstJust
+, splitBy
) where
+swap :: (a, b) -> (b, a)
+swap (a, b) = (b, a)
+
-- Accumulate all sub-sequences,
-- For example, accumulate [1,2,3] == [[1], [1, 2], [1, 2, 3]]
accumulate :: [a] -> [[a]]
@@ -21,6 +25,14 @@ intersperse y (x:xs) = x : y : intersperse y xs
joinBy :: Monoid a => a -> [a] -> a
joinBy delim lst = mconcat $ intersperse delim lst
+splitBy' :: Eq a => a -> [a] -> ([a], [a])
+splitBy' _ [] = ([], [])
+splitBy' delim (x:xs)
+ | delim == x = (xs, [])
+ | otherwise = (x:) <$> splitBy' delim xs
+
+splitBy :: Eq a => a -> [a] -> ([a], [a])
+splitBy d = swap . splitBy' d
nullToMaybe :: (Eq a, Monoid a) => a -> Maybe a
nullToMaybe m
diff --git a/hs/vimwiki.cabal b/hs/vimwiki.cabal
index eb50315..d0a0dda 100644
--- a/hs/vimwiki.cabal
+++ b/hs/vimwiki.cabal
@@ -31,17 +31,22 @@ executable Main
Files,
Links,
Html,
+ Mail,
Handlingar,
Vimwiki.Man,
System.Home,
Util,
+ Tex,
Data.Text.Compat,
System.FilePath.Normalize
build-depends:
base >= 4.8,
+ aeson >= 2.1.1,
base64 >= 0.4,
blaze-html >= 0.9,
blaze-markup >= 0.8.2.7,
+ -- bumping this to 0.11 causes everything to go wrong
+ bytestring >= 0.10,
containers >= 0.5,
data-default >= 0.7,
directory >= 1.3.6,
@@ -58,5 +63,6 @@ executable Main
text >= 1.2.2,
unix >= 2.7.2,
uri-encode >= 1.5.0,
+ utf8-string >= 1.0.2,
xdg-basedir >= 0.2
default-language: Haskell2010