MAJOR work on handlingar path.

author: Hugo Hörnquist <hugo@lysator.liu.se> 2023-03-05 10:45:29 +0100
committer: Hugo Hörnquist <hugo@lysator.liu.se> 2023-03-05 10:45:29 +0100
commit: 2d54f531746d05a1b6d757db92f7f2eccbc7c8f2 (patch)
tree: e0fdfe99571677ad05245b90d240ef886a5ec222
parent: work (diff)
download: vimwiki-scripts-2d54f531746d05a1b6d757db92f7f2eccbc7c8f2.tar.gz
vimwiki-scripts-2d54f531746d05a1b6d757db92f7f2eccbc7c8f2.tar.xz
7 files changed, 436 insertions, 15 deletions
diff --git a/hs/mail.py b/hs/mail.py
new file mode 100755
index 0000000..03a423b
--- /dev/null
+++ b/hs/mail.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+
+"""
+./main <msg-id>
+> LEN "\n" json-data
+on input:
+> LEN "\n" bytes
+"""
+
+from email.message import EmailMessage
+from email.parser import BytesParser
+from uuid import uuid4
+import email.policy
+import hashlib
+import json
+import mimetypes
+import os
+import sys
+import email.contentmanager
+
+import mu
+
+def build_part_tree(part, part_map):
+    result = []
+    id = str(uuid4())
+    part_map[id] = part
+    return {
+        'filename': part.get_filename(),
+        'headers': {key.lower(): value for key, value in part.items()},
+        'id': id,
+        'parts': [build_part_tree(p, part_map) for p in part.iter_parts()],
+        'content-type': part.get_content_type(),
+        'charset': part.get_content_charset(),
+    }
+
+tmpdir = f'/tmp/.vimwiki-scripts-{os.getuid()}'
+
+os.umask(0o077)
+try:
+    os.mkdir(tmpdir)
+except FileExistsError:
+    os.chmod(tmpdir, 0o700)
+
+content_manager = email.contentmanager.raw_data_manager
+content_manager.add_get_handler('text', email.contentmanager.get_non_text_content)
+
+def main():
+
+    if len(sys.argv) == 1:
+        print(f'Usage: {sys.argv[0]} message-id')
+        return
+
+    id = sys.argv[1]
+
+    filename = mu.find_file(id)
+    if not filename:
+        print(f'No mail with id=[{id}]', file=sys.stderr)
+        return
+    parser = BytesParser(policy=email.policy.default)
+    with open(filename, 'rb') as f:
+        mail = parser.parse(f)
+
+
+    part_map = {}
+    bytes = json.dumps(build_part_tree(mail, part_map), 
+                       ensure_ascii=True).encode('ASCII')
+    sys.stdout.buffer.write(b'%i\n' % len(bytes))
+    sys.stdout.buffer.write(bytes)
+    sys.stdout.flush()
+
+    try:
+        while True:
+            part_id = input()
+            part = part_map[part_id]
+            action, *args = input().split(' ')
+            if action == 'get-bytes':
+                # TODO get_content should be able to return a bytes object
+                # directly. We want bytes to have exact length when
+                # sending
+                s = part.get_content(content_manager=content_manager)
+                # bytes = s.encode('UTF-8')
+                bytes = s
+                sys.stdout.buffer.write(b'%i\n' % len(bytes))
+                sys.stdout.buffer.write(bytes)
+
+            elif action == 'get-file':
+                s = part.get_content()
+                bytes = s
+                digest = hashlib.sha256(bytes).hexdigest()
+                filename = os.path.join(tmpdir, digest) \
+                         + mimetypes.guess_extension(part.get_content_type())
+                # Write could be skiped, but then we would
+                # probably want to ensure the correct content,
+                # meaning checksuming the existing file, which
+                # would probably be slover
+                with open(filename, 'wb') as f:
+                    f.write(bytes)
+                sys.stdout.buffer.write(filename.encode('UTF-8'))
+                sys.stdout.buffer.write(b'\n')
+
+            else:
+                print(f"Unknown action '{action}'", file=sys.stderr)
+                pass
+            sys.stdout.flush()
+    except EOFError:
+        pass
+
+if __name__ == '__main__':
+    main()
diff --git a/hs/mu.py b/hs/mu.py
new file mode 100644
index 0000000..5f9f5b1
--- /dev/null
+++ b/hs/mu.py
@@ -0,0 +1,36 @@
+import subprocess
+from subprocess import PIPE
+from typing import (
+    Optional,
+)
+
+class MuError(Exception):
+    codes = {
+        1: 'General Error',
+        2: 'No Matches',
+        4: 'Database is corrupted'
+    }
+
+    def __init__(self, returncode: int):
+        self.returncode: int = returncode
+        self.msg: str = MuError.codes.get(returncode, 'Unknown Error')
+
+    def __repr__(self):
+        return f'MuError({self.returncode}, "{self.msg}")'
+
+    def __str__(self):
+        return repr(self)
+
+
+def find_file(id: str) -> Optional[str]:
+    cmd = subprocess.run(['mu', 'find', '-u', f'i:{id}',
+                          '--fields', 'l'],
+                         stdout=PIPE)
+    filename = cmd.stdout.decode('UTF-8').strip()
+
+    if cmd.returncode == 4:
+        return None
+    if cmd.returncode != 0:
+        raise MuError(cmd.returncode)
+    return filename
+
diff --git a/hs/src/Handlingar.hs b/hs/src/Handlingar.hs
index 431e7a5..2664b08 100644
--- a/hs/src/Handlingar.hs
+++ b/hs/src/Handlingar.hs
@@ -12,7 +12,10 @@ import Prelude hiding
 import Text.Pandoc
     ( runIOorExplode
     , readVimwiki
-    , WriterOptions(writerTemplate, writerVariables)
+    , readHtml
+    , WriterOptions( writerTemplate
+                   , writerListings
+                   , writerVariables)
     , PandocMonad
     , PandocIO
     , writePlain
@@ -23,7 +26,7 @@ import Text.Pandoc.Writers.LaTeX (writeLaTeX)
 import Text.Pandoc.Templates
     ( compileDefaultTemplate
     )
-import Text.Pandoc.Walk (walkM)
+import Text.Pandoc.Walk (walk, walkM)
 import Data.Text (Text, pack, unpack, strip)
 import Data.Text.IO
     ( putStrLn
@@ -37,8 +40,16 @@ import Text.DocTemplates (toVal, Val(SimpleVal, ListVal), Context(Context))
 
 import qualified Data.Text.IO as T
 import qualified Data.Map.Lazy as Map
+import Data.Map ((!))
 import System.Posix.Directory (changeWorkingDirectory, getWorkingDirectory)
 import System.Directory (makeAbsolute)
+import System.IO (Handle, hGetLine, hPutStrLn, hFlush)
+import Data.ByteString (ByteString, hGet)
+import Data.Text.Encoding (decodeLatin1, decodeUtf8)
+import System.Process (cleanupProcess)
+import Data.Maybe (fromMaybe)
+import Tex (toTex, TeX (..))
+import Data.String (IsString(fromString))
 
 import System.FilePath
     ( dropFileName
@@ -46,10 +57,25 @@ import System.FilePath
     , takeExtension
     )
 
-import Network.URI (parseURI, uriScheme, uriPath)
+import Network.URI (URI, parseURI, uriScheme, uriPath)
 import Network.URI.Encode (encodeWith, decode)
 import Data.Set (Set)
 import qualified Data.Set as Set
+import Util (joinBy, splitBy)
+
+import Mail (getMail, MailPart(..))
+
+-- TODO pandoc possibly contains a better way to handle attachements,
+--      something about media bag
+-- TODO mail attachments from any type supported by pandoc should
+--      work, fix a (MimeType -> PandocParser) function
+-- TODO input file shouldn't be locked to vimwiki
+-- TODO output other things than PDF
+--    - Plain text, requires own redering engine (pandoc plugin?)
+--    - HTML, should be multi page for each attachement
+--      - with support for RXML templates
+--    - Mail, where mail attachements are copied verbatim
+--            other attachements become mail attachments
 
 oneOf :: (a -> Bool) -> (a -> Bool) -> a -> Bool
 oneOf f g x = f x || g x
@@ -135,6 +161,10 @@ rewriteLink (Link _ is (target, "wikilink")) = do
     return lnk
 rewriteLink x = return x
 
+shorten :: Block -> Block
+shorten (BlockQuote _) = BlockQuote [ Para [ Code ("", [], []) "[...]" ] ]
+shorten x = x
+
 replaceLinks :: Pandoc -> (Pandoc, [AppendixItem])
 replaceLinks = flip runState [] . walkM rewriteLink
 
@@ -161,16 +191,147 @@ handleFile fname ".txt" = do
     return . Just $ [CodeBlock ("", [], []) text]
 handleFile fname ".pdf" = do
     aname <- liftIO $ makeAbsolute fname
-    let pagecmd = "\\thispagestyle{fancy}\\lhead{Bilaga \\Alph{section}.\\arabic{subsection}}"
-    let arg = "frame,pages={-},width=\\textwidth,pagecommand=" <> pagecmd
-    let lines = [ "\\phantomsection\\stepcounter{subsection}\\includepdf[" <> arg <> "]{" <> decode aname <> "}"
-                ]
+    let pagecmd = [ TexCmd "thispagestyle" [] ["fancy"]
+                  , TexCmd "lhead" [] ["Bilaga \\Alph{section}.\\arabic{subsection}"]]
+    let arg = [ "frame"
+              , "pages={-}"
+              , "width=\\textwidth"
+              , "pagecommand=" <> mconcat (fmap toTex pagecmd)]
+    let lines = [ TexCmd "phantomsection" [] []
+                , TexCmd "stepcounter" [] ["subsection"]
+                , TexCmd "includepdf" [joinBy "," arg] [pack . decode $ aname] ]
     let inline = RawInline
                  (Format "latex")
-                 (pack $ unlines lines)
+                 (mconcat $ fmap toTex lines)
     return . Just $ [ Plain [ inline ] ]
 handleFile fname _ = return . Just $ [Para [Str . pack $ fname]]
 
+findAlternative :: [MailPart] -> Maybe MailPart
+findAlternative [] = Nothing
+findAlternative (m@MailPart { contentType = contentType }:xs) = case contentType of
+    "text/html"  -> Just m
+    "text/plain" -> Just m
+    _            -> findAlternative xs
+
+
+-- TODO where are these strings defined
+decoder :: String -> (ByteString -> Text)
+decoder "iso-8859-1" = decodeLatin1
+decoder "utf-8"      = decodeUtf8
+-- decoder _            = decodeUtf8Lenient
+decoder _            = decodeLatin1
+
+data MailAction = GetBytes
+                | GetFile
+                deriving (Show)
+
+serialize :: MailAction -> String
+serialize GetBytes = "get-bytes"
+serialize GetFile = "get-file"
+
+mailGet :: MailAction -> String -> (Handle, Handle) -> (Handle -> IO a) -> IO a
+mailGet action id (inp, outp) handler = do
+    hPutStrLn outp id
+    hPutStrLn outp $ serialize action
+    hFlush outp
+    handler inp
+
+
+getBytes :: String -> (Handle, Handle) -> IO ByteString
+getBytes id ports = mailGet GetBytes id ports $ \inp -> do
+    count <- readIO =<< hGetLine inp
+    hGet inp count
+
+getFile :: String -> (Handle, Handle) -> IO FilePath
+getFile id ports = mailGet GetFile id ports hGetLine
+
+
+-- https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html
+-- TODO differentiate between inline images (through cid:<> urls), and
+-- unrelated attachements
+handleMailBody :: (Handle, Handle) -> MailPart -> PandocIO [Block]
+handleMailBody ports mail =
+    case splitBy '/' $ contentType mail of
+        ("multipart", "alternative") -> do
+            case findAlternative $ reverse $ parts mail of
+                Just part -> handleMailBody ports part
+                Nothing   -> return [ Para [ Str "Couldn't find any suitable alternative" ] ]
+        -- mixed, but any unknown should be treated as mixed
+        ("multipart", _) -> concat <$> mapM (handleMailBody ports) (parts mail)
+        ("text", "plain") -> do
+            bytes <- liftIO $ getBytes (partId mail) ports
+            let content = decoder (fromMaybe "ASCII" $ charset mail) bytes
+            return [ CodeBlock ("", [], []) content ]
+        ("text", "html") -> do
+            bytes <- liftIO $ getBytes (partId mail) ports
+            let content = decoder (fromMaybe "ASCII" $ charset mail) bytes
+            pdoc <- readHtml def content
+            let Pandoc _ blocks = walk shorten pdoc
+            return blocks
+        ("image", _) -> do
+            tmpFile <- liftIO $ getFile (partId mail) ports
+            let img = [ Plain
+                        [ Image ("", [], [])
+                          [Str "Image Caption?"]
+                          (pack tmpFile, "") ]]
+            let figure = [ Figure ("", [], []) -- TODO figure ref
+                      -- (Caption Nothing [Plain [Str . filename mail]])
+                      (Caption Nothing [Plain [Str . pack . show $ filename mail]])
+                      img
+                      ]
+            return img
+        -- "application/pdf"       -> ()
+        _  -> return [ Header 2 ("", [], []) [ Str "Attachment omitted" ]
+                     , dlist [ ("Content-Type", contentType mail)
+                             , ("Filename", show . filename $ mail) ]
+                     ]
+
+-- TODO Titlecase the headers
+-- TODO from and to should monospace the
+--      address (but not the name)
+formatMail :: MailPart -> (Handle, Handle) -> PandocIO [Block]
+formatMail mail ports = do
+    let keys = ["from", "to", "subject", "date"]
+    let f key = ( [Str $ key <> ":"]
+                , [[Plain [ Str . pack $ headers mail ! unpack key ]]]
+                )
+    let kvs = map f keys
+
+    body <- handleMailBody ports mail
+
+    return $ DefinitionList kvs : body
+
+comment :: Text -> [Block]
+comment s = [ RawBlock (Format "latex") ("% " <> s)
+            , RawBlock (Format "html") ("<!-- " <> s <> " -->")
+            ]
+
+dlist :: [(String, String)] -> Block
+dlist xs = DefinitionList [ ( [Str . pack $ k]
+                            , [[Plain [Str . pack $ v]]] )
+                          | (k, v) <- xs ]
+
+handleMailLink :: URI -> PandocIO [Block]
+handleMailLink uri = do
+    let id = decode . uriPath $ uri
+    -- liftIO $ print id
+    mail' <- liftIO $ getMail id
+    -- TODO #short
+    body <- case mail' of
+        Left err -> return [ Para [ Str "From "
+                                    , Code ("", [], []) "getMail:"  ]
+                             , Para [Str . pack $ err]
+                             , Para [Code ("", [], []) (pack id) ]]
+        Right (mail, proc@(Just stdin, Just stdout, _, _)) -> do
+            -- TODO "short" `in` uriFrag
+            bs <- formatMail mail (stdout, stdin)
+            liftIO $ cleanupProcess proc
+            return bs
+        -- TODO error
+        _ -> return []
+
+    return $ comment ("msg id: " <> pack id) <> body
+
 -- See also: Html rebuildLinks
 handleBilaga :: [Block] -> Text -> PandocIO (Maybe [Block])
 handleBilaga currentPage url = case parseURI . encodeWith (`elem` uriChars) . unpack $ url of
@@ -179,7 +340,7 @@ handleBilaga currentPage url = case parseURI . encodeWith (`elem` uriChars) . un
                     "file:" -> do
                         let fname = uriPath uri
                         handleFile fname $ takeExtension fname
-                    "mail:" -> return $ Just [ Para [ Str "A mail would have gone here" ]]
+                    "mail:" -> Just <$> handleMailLink uri
                     _ -> return $ Just [Para [Str . pack . show $ uri]]
     Nothing  -> case break (== '#') (unpack url) of
         ("", '#':frag)   -> return $ getHeadingData (pack frag) currentPage
@@ -222,19 +383,29 @@ main = do
                 let apx = mconcat fragments
                 let trail = [ RawBlock (Format "latex") "\\appendix" ] <> apx
                 let content = Pandoc meta $ bs <> trail
-                let packages = [ "pdfpages"
-                               , "fancyhdr"
-                               ]
-                let headerIncludes = [ SimpleVal $ "\\usepackage{" <> package <> "}"
-                                     | package <- packages]
+
                 let opts = [ ("boxlinks",       toVal True)
                            , ("colorlinks",     toVal True)
                            , ("papersize",      SimpleVal "a4")
                            , ("numbersections", toVal True)
                            , ("lang",           SimpleVal "swedish")
-                           , ("header-includes",  ListVal headerIncludes)
+                           , ("header-includes",  ListVal $ fmap (SimpleVal . fromString . unpack . toTex) [
+                                 TexPackage "pdfpages" -- including PDF's
+                               , TexPackage "fancyhdr" -- page headers on included pdf pages
+                               , TempletizedTexPackage ["most"] "tcolorbox" -- for blockquotes
+                               , LstSet [ ("breaklines", "true")
+                                        , ("basicstyle", "\\scriptsize") ]
+                               , TexCmd "newtcolorbox" [] ["myquote", joinBy "," [ "breakable"
+                                                                                 , "colback=red!5!white"
+                                                                                 , "colframe=red!75!black" ]]
+                               , TexCmd "renewenvironment" [] ("quote" : fmap toTex [
+                                                                             TexCmd "begin" [] ["myquote"]
+                                                                           , TexCmd "end"   [] ["myquote"]
+                                                                           ])
+                               ])
                            ]
                 writeLaTeX (def { writerTemplate = Just texTemplate
+                                , writerListings = True
                                 , writerVariables = Context $ Map.fromList opts }) content
 
             changeWorkingDirectory cwd
diff --git a/hs/src/Mail.hs b/hs/src/Mail.hs
new file mode 100644
index 0000000..505931e
--- /dev/null
+++ b/hs/src/Mail.hs
@@ -0,0 +1,60 @@
+{-# LANGUAGE DeriveGeneric
+           , OverloadedStrings #-}
+
+module Mail where
+
+import System.Process
+import Data.Aeson ( eitherDecode
+                  , FromJSON
+                  , ToJSON
+                  , defaultOptions
+                  , parseJSON
+                  , genericToEncoding
+                  , toEncoding
+                  , withObject
+                  , (.:)
+                  )
+import Data.Map (Map)
+import GHC.Generics
+import Data.ByteString (hGet)
+import Data.ByteString.Lazy (fromStrict)
+import System.IO (Handle, hGetLine)
+
+data MailPart = MailPart
+    { filename :: Maybe String
+    , partId :: String
+    , headers :: Map String String
+    , parts :: [MailPart]
+    , contentType :: String
+    , charset :: Maybe String
+    } deriving (Generic, Show)
+
+instance ToJSON MailPart where
+  toEncoding = genericToEncoding defaultOptions
+
+instance FromJSON MailPart where
+    parseJSON = withObject "MailPart" $ \v -> MailPart
+        <$> v .: "filename"
+        <*> v .: "id"
+        <*> v .: "headers"
+        <*> v .: "parts"
+        <*> v .: "content-type"
+        <*> v .: "charset"
+
+type Proc = (Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle)
+
+getMail :: String -> IO (Either String (MailPart, Proc))
+getMail id = do
+    let prgr = "/home/hugo/code/vimwiki-scripts/hs/mail.py" -- TODO
+    let cmd = (proc prgr [id]) { std_out = CreatePipe
+                               , std_in  = CreatePipe }
+    proc@(_, Just stdout, _, _) <- createProcess cmd
+
+    count <- read <$> hGetLine stdout
+    bytes <- fromStrict <$> hGet stdout count
+    case eitherDecode bytes of
+        Left err -> do
+            cleanupProcess proc
+            print bytes
+            return $ Left err
+        Right mailpart -> return $ Right (mailpart, proc)
diff --git a/hs/src/Tex.hs b/hs/src/Tex.hs
new file mode 100644
index 0000000..4ba7a0d
--- /dev/null
+++ b/hs/src/Tex.hs
@@ -0,0 +1,27 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module Tex where
+
+import Data.Text (Text)
+import Util (joinBy)
+
+-- A poor mans HaTeX
+
+data TeX = TexPackage Text
+         | TempletizedTexPackage [Text] Text
+         | LstSet [(Text, Text)]
+         | TexCmd Text [Text] [Text]
+         | RawTex Text
+         deriving (Show)
+
+toTex :: TeX -> Text
+toTex (TexPackage name) = "\\usepackage{" <> name <> "}"
+toTex (TempletizedTexPackage args name) = "\\usepackage[" <> joinBy "," args <> "]{" <> name <> "}"
+toTex (LstSet kvs) = "\\lstset{" <> args <> "}"
+    where args = joinBy "," [key <> "=" <> val | (key, val) <- kvs]
+toTex (TexCmd cmd os []) = "\\" <> cmd
+                                <> mconcat ["[" <> x <> "]" | x <- os] <> "{}"
+toTex (TexCmd cmd os xs) = "\\" <> cmd
+                                <> mconcat ["[" <> x <> "]" | x <- os] 
+                                <> mconcat ["{" <> x <> "}" | x <- xs]
+toTex (RawTex t) = t
diff --git a/hs/src/Util.hs b/hs/src/Util.hs
index 06a7e86..e3c8d2f 100644
--- a/hs/src/Util.hs
+++ b/hs/src/Util.hs
@@ -4,8 +4,12 @@ module Util
 , joinBy
 , nullToMaybe
 , firstJust
+, splitBy
 ) where
 
+swap :: (a, b) -> (b, a)
+swap (a, b) = (b, a)
+
 -- Accumulate all sub-sequences,
 -- For example, accumulate [1,2,3] == [[1], [1, 2], [1, 2, 3]]
 accumulate :: [a] -> [[a]]
@@ -21,6 +25,14 @@ intersperse y (x:xs) = x : y : intersperse y xs
 joinBy :: Monoid a => a -> [a] -> a
 joinBy delim lst = mconcat $ intersperse delim lst
 
+splitBy' :: Eq a => a -> [a] -> ([a], [a])
+splitBy' _ [] = ([], [])
+splitBy' delim (x:xs)
+    | delim == x = (xs, [])
+    | otherwise = (x:) <$> splitBy' delim xs
+
+splitBy :: Eq a => a -> [a] -> ([a], [a])
+splitBy d = swap . splitBy' d
 
 nullToMaybe :: (Eq a, Monoid a) => a -> Maybe a
 nullToMaybe m
diff --git a/hs/vimwiki.cabal b/hs/vimwiki.cabal
index eb50315..d0a0dda 100644
--- a/hs/vimwiki.cabal
+++ b/hs/vimwiki.cabal
@@ -31,17 +31,22 @@ executable Main
     Files,
     Links,
     Html,
+    Mail,
     Handlingar,
     Vimwiki.Man,
     System.Home,
     Util,
+    Tex,
     Data.Text.Compat,
     System.FilePath.Normalize
   build-depends:
     base                 >= 4.8,
+    aeson                >= 2.1.1,
     base64               >= 0.4,
     blaze-html           >= 0.9,
     blaze-markup         >= 0.8.2.7,
+    -- bumping this to 0.11 causes everything to go wrong
+    bytestring           >= 0.10,
     containers           >= 0.5,
     data-default         >= 0.7,
     directory            >= 1.3.6,
@@ -58,5 +63,6 @@ executable Main
     text                 >= 1.2.2,
     unix                 >= 2.7.2,
     uri-encode           >= 1.5.0,
+    utf8-string          >= 1.0.2,
     xdg-basedir          >= 0.2
   default-language:    Haskell2010
author	Hugo Hörnquist <hugo@lysator.liu.se>	2023-03-05 10:45:29 +0100
committer	Hugo Hörnquist <hugo@lysator.liu.se>	2023-03-05 10:45:29 +0100
commit	2d54f531746d05a1b6d757db92f7f2eccbc7c8f2 (patch)
tree	e0fdfe99571677ad05245b90d240ef886a5ec222
parent	work (diff)
download	vimwiki-scripts-2d54f531746d05a1b6d757db92f7f2eccbc7c8f2.tar.gz vimwiki-scripts-2d54f531746d05a1b6d757db92f7f2eccbc7c8f2.tar.xz