diff options
author | Hugo Hörnquist <hugo@lysator.liu.se> | 2023-03-05 10:45:29 +0100 |
---|---|---|
committer | Hugo Hörnquist <hugo@lysator.liu.se> | 2023-03-05 10:45:29 +0100 |
commit | 2d54f531746d05a1b6d757db92f7f2eccbc7c8f2 (patch) | |
tree | e0fdfe99571677ad05245b90d240ef886a5ec222 | |
parent | work (diff) | |
download | vimwiki-scripts-2d54f531746d05a1b6d757db92f7f2eccbc7c8f2.tar.gz vimwiki-scripts-2d54f531746d05a1b6d757db92f7f2eccbc7c8f2.tar.xz |
MAJOR work on handlingar path.
-rwxr-xr-x | hs/mail.py | 109 | ||||
-rw-r--r-- | hs/mu.py | 36 | ||||
-rw-r--r-- | hs/src/Handlingar.hs | 201 | ||||
-rw-r--r-- | hs/src/Mail.hs | 60 | ||||
-rw-r--r-- | hs/src/Tex.hs | 27 | ||||
-rw-r--r-- | hs/src/Util.hs | 12 | ||||
-rw-r--r-- | hs/vimwiki.cabal | 6 |
7 files changed, 436 insertions, 15 deletions
diff --git a/hs/mail.py b/hs/mail.py new file mode 100755 index 0000000..03a423b --- /dev/null +++ b/hs/mail.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 + +""" +./main <msg-id> +> LEN "\n" json-data +on input: +> LEN "\n" bytes +""" + +from email.message import EmailMessage +from email.parser import BytesParser +from uuid import uuid4 +import email.policy +import hashlib +import json +import mimetypes +import os +import sys +import email.contentmanager + +import mu + +def build_part_tree(part, part_map): + result = [] + id = str(uuid4()) + part_map[id] = part + return { + 'filename': part.get_filename(), + 'headers': {key.lower(): value for key, value in part.items()}, + 'id': id, + 'parts': [build_part_tree(p, part_map) for p in part.iter_parts()], + 'content-type': part.get_content_type(), + 'charset': part.get_content_charset(), + } + +tmpdir = f'/tmp/.vimwiki-scripts-{os.getuid()}' + +os.umask(0o077) +try: + os.mkdir(tmpdir) +except FileExistsError: + os.chmod(tmpdir, 0o700) + +content_manager = email.contentmanager.raw_data_manager +content_manager.add_get_handler('text', email.contentmanager.get_non_text_content) + +def main(): + + if len(sys.argv) == 1: + print(f'Usage: {sys.argv[0]} message-id') + return + + id = sys.argv[1] + + filename = mu.find_file(id) + if not filename: + print(f'No mail with id=[{id}]', file=sys.stderr) + return + parser = BytesParser(policy=email.policy.default) + with open(filename, 'rb') as f: + mail = parser.parse(f) + + + part_map = {} + bytes = json.dumps(build_part_tree(mail, part_map), + ensure_ascii=True).encode('ASCII') + sys.stdout.buffer.write(b'%i\n' % len(bytes)) + sys.stdout.buffer.write(bytes) + sys.stdout.flush() + + try: + while True: + part_id = input() + part = part_map[part_id] + action, *args = input().split(' ') + if action == 'get-bytes': + # TODO get_content should be able to return a bytes object + # directly. We want bytes to have exact length when + # sending + s = part.get_content(content_manager=content_manager) + # bytes = s.encode('UTF-8') + bytes = s + sys.stdout.buffer.write(b'%i\n' % len(bytes)) + sys.stdout.buffer.write(bytes) + + elif action == 'get-file': + s = part.get_content() + bytes = s + digest = hashlib.sha256(bytes).hexdigest() + filename = os.path.join(tmpdir, digest) \ + + mimetypes.guess_extension(part.get_content_type()) + # Write could be skiped, but then we would + # probably want to ensure the correct content, + # meaning checksuming the existing file, which + # would probably be slover + with open(filename, 'wb') as f: + f.write(bytes) + sys.stdout.buffer.write(filename.encode('UTF-8')) + sys.stdout.buffer.write(b'\n') + + else: + print(f"Unknown action '{action}'", file=sys.stderr) + pass + sys.stdout.flush() + except EOFError: + pass + +if __name__ == '__main__': + main() diff --git a/hs/mu.py b/hs/mu.py new file mode 100644 index 0000000..5f9f5b1 --- /dev/null +++ b/hs/mu.py @@ -0,0 +1,36 @@ +import subprocess +from subprocess import PIPE +from typing import ( + Optional, +) + +class MuError(Exception): + codes = { + 1: 'General Error', + 2: 'No Matches', + 4: 'Database is corrupted' + } + + def __init__(self, returncode: int): + self.returncode: int = returncode + self.msg: str = MuError.codes.get(returncode, 'Unknown Error') + + def __repr__(self): + return f'MuError({self.returncode}, "{self.msg}")' + + def __str__(self): + return repr(self) + + +def find_file(id: str) -> Optional[str]: + cmd = subprocess.run(['mu', 'find', '-u', f'i:{id}', + '--fields', 'l'], + stdout=PIPE) + filename = cmd.stdout.decode('UTF-8').strip() + + if cmd.returncode == 4: + return None + if cmd.returncode != 0: + raise MuError(cmd.returncode) + return filename + diff --git a/hs/src/Handlingar.hs b/hs/src/Handlingar.hs index 431e7a5..2664b08 100644 --- a/hs/src/Handlingar.hs +++ b/hs/src/Handlingar.hs @@ -12,7 +12,10 @@ import Prelude hiding import Text.Pandoc ( runIOorExplode , readVimwiki - , WriterOptions(writerTemplate, writerVariables) + , readHtml + , WriterOptions( writerTemplate + , writerListings + , writerVariables) , PandocMonad , PandocIO , writePlain @@ -23,7 +26,7 @@ import Text.Pandoc.Writers.LaTeX (writeLaTeX) import Text.Pandoc.Templates ( compileDefaultTemplate ) -import Text.Pandoc.Walk (walkM) +import Text.Pandoc.Walk (walk, walkM) import Data.Text (Text, pack, unpack, strip) import Data.Text.IO ( putStrLn @@ -37,8 +40,16 @@ import Text.DocTemplates (toVal, Val(SimpleVal, ListVal), Context(Context)) import qualified Data.Text.IO as T import qualified Data.Map.Lazy as Map +import Data.Map ((!)) import System.Posix.Directory (changeWorkingDirectory, getWorkingDirectory) import System.Directory (makeAbsolute) +import System.IO (Handle, hGetLine, hPutStrLn, hFlush) +import Data.ByteString (ByteString, hGet) +import Data.Text.Encoding (decodeLatin1, decodeUtf8) +import System.Process (cleanupProcess) +import Data.Maybe (fromMaybe) +import Tex (toTex, TeX (..)) +import Data.String (IsString(fromString)) import System.FilePath ( dropFileName @@ -46,10 +57,25 @@ import System.FilePath , takeExtension ) -import Network.URI (parseURI, uriScheme, uriPath) +import Network.URI (URI, parseURI, uriScheme, uriPath) import Network.URI.Encode (encodeWith, decode) import Data.Set (Set) import qualified Data.Set as Set +import Util (joinBy, splitBy) + +import Mail (getMail, MailPart(..)) + +-- TODO pandoc possibly contains a better way to handle attachements, +-- something about media bag +-- TODO mail attachments from any type supported by pandoc should +-- work, fix a (MimeType -> PandocParser) function +-- TODO input file shouldn't be locked to vimwiki +-- TODO output other things than PDF +-- - Plain text, requires own redering engine (pandoc plugin?) +-- - HTML, should be multi page for each attachement +-- - with support for RXML templates +-- - Mail, where mail attachements are copied verbatim +-- other attachements become mail attachments oneOf :: (a -> Bool) -> (a -> Bool) -> a -> Bool oneOf f g x = f x || g x @@ -135,6 +161,10 @@ rewriteLink (Link _ is (target, "wikilink")) = do return lnk rewriteLink x = return x +shorten :: Block -> Block +shorten (BlockQuote _) = BlockQuote [ Para [ Code ("", [], []) "[...]" ] ] +shorten x = x + replaceLinks :: Pandoc -> (Pandoc, [AppendixItem]) replaceLinks = flip runState [] . walkM rewriteLink @@ -161,16 +191,147 @@ handleFile fname ".txt" = do return . Just $ [CodeBlock ("", [], []) text] handleFile fname ".pdf" = do aname <- liftIO $ makeAbsolute fname - let pagecmd = "\\thispagestyle{fancy}\\lhead{Bilaga \\Alph{section}.\\arabic{subsection}}" - let arg = "frame,pages={-},width=\\textwidth,pagecommand=" <> pagecmd - let lines = [ "\\phantomsection\\stepcounter{subsection}\\includepdf[" <> arg <> "]{" <> decode aname <> "}" - ] + let pagecmd = [ TexCmd "thispagestyle" [] ["fancy"] + , TexCmd "lhead" [] ["Bilaga \\Alph{section}.\\arabic{subsection}"]] + let arg = [ "frame" + , "pages={-}" + , "width=\\textwidth" + , "pagecommand=" <> mconcat (fmap toTex pagecmd)] + let lines = [ TexCmd "phantomsection" [] [] + , TexCmd "stepcounter" [] ["subsection"] + , TexCmd "includepdf" [joinBy "," arg] [pack . decode $ aname] ] let inline = RawInline (Format "latex") - (pack $ unlines lines) + (mconcat $ fmap toTex lines) return . Just $ [ Plain [ inline ] ] handleFile fname _ = return . Just $ [Para [Str . pack $ fname]] +findAlternative :: [MailPart] -> Maybe MailPart +findAlternative [] = Nothing +findAlternative (m@MailPart { contentType = contentType }:xs) = case contentType of + "text/html" -> Just m + "text/plain" -> Just m + _ -> findAlternative xs + + +-- TODO where are these strings defined +decoder :: String -> (ByteString -> Text) +decoder "iso-8859-1" = decodeLatin1 +decoder "utf-8" = decodeUtf8 +-- decoder _ = decodeUtf8Lenient +decoder _ = decodeLatin1 + +data MailAction = GetBytes + | GetFile + deriving (Show) + +serialize :: MailAction -> String +serialize GetBytes = "get-bytes" +serialize GetFile = "get-file" + +mailGet :: MailAction -> String -> (Handle, Handle) -> (Handle -> IO a) -> IO a +mailGet action id (inp, outp) handler = do + hPutStrLn outp id + hPutStrLn outp $ serialize action + hFlush outp + handler inp + + +getBytes :: String -> (Handle, Handle) -> IO ByteString +getBytes id ports = mailGet GetBytes id ports $ \inp -> do + count <- readIO =<< hGetLine inp + hGet inp count + +getFile :: String -> (Handle, Handle) -> IO FilePath +getFile id ports = mailGet GetFile id ports hGetLine + + +-- https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html +-- TODO differentiate between inline images (through cid:<> urls), and +-- unrelated attachements +handleMailBody :: (Handle, Handle) -> MailPart -> PandocIO [Block] +handleMailBody ports mail = + case splitBy '/' $ contentType mail of + ("multipart", "alternative") -> do + case findAlternative $ reverse $ parts mail of + Just part -> handleMailBody ports part + Nothing -> return [ Para [ Str "Couldn't find any suitable alternative" ] ] + -- mixed, but any unknown should be treated as mixed + ("multipart", _) -> concat <$> mapM (handleMailBody ports) (parts mail) + ("text", "plain") -> do + bytes <- liftIO $ getBytes (partId mail) ports + let content = decoder (fromMaybe "ASCII" $ charset mail) bytes + return [ CodeBlock ("", [], []) content ] + ("text", "html") -> do + bytes <- liftIO $ getBytes (partId mail) ports + let content = decoder (fromMaybe "ASCII" $ charset mail) bytes + pdoc <- readHtml def content + let Pandoc _ blocks = walk shorten pdoc + return blocks + ("image", _) -> do + tmpFile <- liftIO $ getFile (partId mail) ports + let img = [ Plain + [ Image ("", [], []) + [Str "Image Caption?"] + (pack tmpFile, "") ]] + let figure = [ Figure ("", [], []) -- TODO figure ref + -- (Caption Nothing [Plain [Str . filename mail]]) + (Caption Nothing [Plain [Str . pack . show $ filename mail]]) + img + ] + return img + -- "application/pdf" -> () + _ -> return [ Header 2 ("", [], []) [ Str "Attachment omitted" ] + , dlist [ ("Content-Type", contentType mail) + , ("Filename", show . filename $ mail) ] + ] + +-- TODO Titlecase the headers +-- TODO from and to should monospace the +-- address (but not the name) +formatMail :: MailPart -> (Handle, Handle) -> PandocIO [Block] +formatMail mail ports = do + let keys = ["from", "to", "subject", "date"] + let f key = ( [Str $ key <> ":"] + , [[Plain [ Str . pack $ headers mail ! unpack key ]]] + ) + let kvs = map f keys + + body <- handleMailBody ports mail + + return $ DefinitionList kvs : body + +comment :: Text -> [Block] +comment s = [ RawBlock (Format "latex") ("% " <> s) + , RawBlock (Format "html") ("<!-- " <> s <> " -->") + ] + +dlist :: [(String, String)] -> Block +dlist xs = DefinitionList [ ( [Str . pack $ k] + , [[Plain [Str . pack $ v]]] ) + | (k, v) <- xs ] + +handleMailLink :: URI -> PandocIO [Block] +handleMailLink uri = do + let id = decode . uriPath $ uri + -- liftIO $ print id + mail' <- liftIO $ getMail id + -- TODO #short + body <- case mail' of + Left err -> return [ Para [ Str "From " + , Code ("", [], []) "getMail:" ] + , Para [Str . pack $ err] + , Para [Code ("", [], []) (pack id) ]] + Right (mail, proc@(Just stdin, Just stdout, _, _)) -> do + -- TODO "short" `in` uriFrag + bs <- formatMail mail (stdout, stdin) + liftIO $ cleanupProcess proc + return bs + -- TODO error + _ -> return [] + + return $ comment ("msg id: " <> pack id) <> body + -- See also: Html rebuildLinks handleBilaga :: [Block] -> Text -> PandocIO (Maybe [Block]) handleBilaga currentPage url = case parseURI . encodeWith (`elem` uriChars) . unpack $ url of @@ -179,7 +340,7 @@ handleBilaga currentPage url = case parseURI . encodeWith (`elem` uriChars) . un "file:" -> do let fname = uriPath uri handleFile fname $ takeExtension fname - "mail:" -> return $ Just [ Para [ Str "A mail would have gone here" ]] + "mail:" -> Just <$> handleMailLink uri _ -> return $ Just [Para [Str . pack . show $ uri]] Nothing -> case break (== '#') (unpack url) of ("", '#':frag) -> return $ getHeadingData (pack frag) currentPage @@ -222,19 +383,29 @@ main = do let apx = mconcat fragments let trail = [ RawBlock (Format "latex") "\\appendix" ] <> apx let content = Pandoc meta $ bs <> trail - let packages = [ "pdfpages" - , "fancyhdr" - ] - let headerIncludes = [ SimpleVal $ "\\usepackage{" <> package <> "}" - | package <- packages] + let opts = [ ("boxlinks", toVal True) , ("colorlinks", toVal True) , ("papersize", SimpleVal "a4") , ("numbersections", toVal True) , ("lang", SimpleVal "swedish") - , ("header-includes", ListVal headerIncludes) + , ("header-includes", ListVal $ fmap (SimpleVal . fromString . unpack . toTex) [ + TexPackage "pdfpages" -- including PDF's + , TexPackage "fancyhdr" -- page headers on included pdf pages + , TempletizedTexPackage ["most"] "tcolorbox" -- for blockquotes + , LstSet [ ("breaklines", "true") + , ("basicstyle", "\\scriptsize") ] + , TexCmd "newtcolorbox" [] ["myquote", joinBy "," [ "breakable" + , "colback=red!5!white" + , "colframe=red!75!black" ]] + , TexCmd "renewenvironment" [] ("quote" : fmap toTex [ + TexCmd "begin" [] ["myquote"] + , TexCmd "end" [] ["myquote"] + ]) + ]) ] writeLaTeX (def { writerTemplate = Just texTemplate + , writerListings = True , writerVariables = Context $ Map.fromList opts }) content changeWorkingDirectory cwd diff --git a/hs/src/Mail.hs b/hs/src/Mail.hs new file mode 100644 index 0000000..505931e --- /dev/null +++ b/hs/src/Mail.hs @@ -0,0 +1,60 @@ +{-# LANGUAGE DeriveGeneric + , OverloadedStrings #-} + +module Mail where + +import System.Process +import Data.Aeson ( eitherDecode + , FromJSON + , ToJSON + , defaultOptions + , parseJSON + , genericToEncoding + , toEncoding + , withObject + , (.:) + ) +import Data.Map (Map) +import GHC.Generics +import Data.ByteString (hGet) +import Data.ByteString.Lazy (fromStrict) +import System.IO (Handle, hGetLine) + +data MailPart = MailPart + { filename :: Maybe String + , partId :: String + , headers :: Map String String + , parts :: [MailPart] + , contentType :: String + , charset :: Maybe String + } deriving (Generic, Show) + +instance ToJSON MailPart where + toEncoding = genericToEncoding defaultOptions + +instance FromJSON MailPart where + parseJSON = withObject "MailPart" $ \v -> MailPart + <$> v .: "filename" + <*> v .: "id" + <*> v .: "headers" + <*> v .: "parts" + <*> v .: "content-type" + <*> v .: "charset" + +type Proc = (Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle) + +getMail :: String -> IO (Either String (MailPart, Proc)) +getMail id = do + let prgr = "/home/hugo/code/vimwiki-scripts/hs/mail.py" -- TODO + let cmd = (proc prgr [id]) { std_out = CreatePipe + , std_in = CreatePipe } + proc@(_, Just stdout, _, _) <- createProcess cmd + + count <- read <$> hGetLine stdout + bytes <- fromStrict <$> hGet stdout count + case eitherDecode bytes of + Left err -> do + cleanupProcess proc + print bytes + return $ Left err + Right mailpart -> return $ Right (mailpart, proc) diff --git a/hs/src/Tex.hs b/hs/src/Tex.hs new file mode 100644 index 0000000..4ba7a0d --- /dev/null +++ b/hs/src/Tex.hs @@ -0,0 +1,27 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Tex where + +import Data.Text (Text) +import Util (joinBy) + +-- A poor mans HaTeX + +data TeX = TexPackage Text + | TempletizedTexPackage [Text] Text + | LstSet [(Text, Text)] + | TexCmd Text [Text] [Text] + | RawTex Text + deriving (Show) + +toTex :: TeX -> Text +toTex (TexPackage name) = "\\usepackage{" <> name <> "}" +toTex (TempletizedTexPackage args name) = "\\usepackage[" <> joinBy "," args <> "]{" <> name <> "}" +toTex (LstSet kvs) = "\\lstset{" <> args <> "}" + where args = joinBy "," [key <> "=" <> val | (key, val) <- kvs] +toTex (TexCmd cmd os []) = "\\" <> cmd + <> mconcat ["[" <> x <> "]" | x <- os] <> "{}" +toTex (TexCmd cmd os xs) = "\\" <> cmd + <> mconcat ["[" <> x <> "]" | x <- os] + <> mconcat ["{" <> x <> "}" | x <- xs] +toTex (RawTex t) = t diff --git a/hs/src/Util.hs b/hs/src/Util.hs index 06a7e86..e3c8d2f 100644 --- a/hs/src/Util.hs +++ b/hs/src/Util.hs @@ -4,8 +4,12 @@ module Util , joinBy , nullToMaybe , firstJust +, splitBy ) where +swap :: (a, b) -> (b, a) +swap (a, b) = (b, a) + -- Accumulate all sub-sequences, -- For example, accumulate [1,2,3] == [[1], [1, 2], [1, 2, 3]] accumulate :: [a] -> [[a]] @@ -21,6 +25,14 @@ intersperse y (x:xs) = x : y : intersperse y xs joinBy :: Monoid a => a -> [a] -> a joinBy delim lst = mconcat $ intersperse delim lst +splitBy' :: Eq a => a -> [a] -> ([a], [a]) +splitBy' _ [] = ([], []) +splitBy' delim (x:xs) + | delim == x = (xs, []) + | otherwise = (x:) <$> splitBy' delim xs + +splitBy :: Eq a => a -> [a] -> ([a], [a]) +splitBy d = swap . splitBy' d nullToMaybe :: (Eq a, Monoid a) => a -> Maybe a nullToMaybe m diff --git a/hs/vimwiki.cabal b/hs/vimwiki.cabal index eb50315..d0a0dda 100644 --- a/hs/vimwiki.cabal +++ b/hs/vimwiki.cabal @@ -31,17 +31,22 @@ executable Main Files, Links, Html, + Mail, Handlingar, Vimwiki.Man, System.Home, Util, + Tex, Data.Text.Compat, System.FilePath.Normalize build-depends: base >= 4.8, + aeson >= 2.1.1, base64 >= 0.4, blaze-html >= 0.9, blaze-markup >= 0.8.2.7, + -- bumping this to 0.11 causes everything to go wrong + bytestring >= 0.10, containers >= 0.5, data-default >= 0.7, directory >= 1.3.6, @@ -58,5 +63,6 @@ executable Main text >= 1.2.2, unix >= 2.7.2, uri-encode >= 1.5.0, + utf8-string >= 1.0.2, xdg-basedir >= 0.2 default-language: Haskell2010 |