summaryrefslogtreecommitdiff
path: root/hs/src/Handlingar.hs
blob: 96ee6cfdb389ff80fb51b7377d55943d3a54c528 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
{-# LANGUAGE OverloadedStrings #-}

module Handlingar
( main
) where

import Prelude hiding
    ( putStrLn
    , writeFile
    )

import Control.Monad.State.Lazy
import Data.ByteString (ByteString)
import Data.Default (def)
import Data.Map ((!))
import Data.Maybe (fromMaybe)
import Data.Set (Set)
import Data.String (IsString(fromString))
import Data.Text (Text, pack, unpack, strip)
import Data.Text.Encoding (decodeLatin1, decodeUtf8)
import Data.Text.Encoding.Base64 (encodeBase64)
import Data.Text.IO (putStrLn, writeFile)
import Mail (getMail, MailPart(..), getBytes, getFile)
import Network.URI (URI, parseURI, uriScheme, uriPath)
import Network.URI.Encode (encodeWith, decode)
import System.Directory (makeAbsolute)
import System.FilePath (dropFileName, takeExtension, (<.>))
import System.IO (Handle)
import System.Posix.Directory (changeWorkingDirectory, getWorkingDirectory)
import System.Process (cleanupProcess)
import Tex (toTex, TeX (..))
import Text.DocTemplates (toVal, Val(SimpleVal, ListVal), Context(Context))
import Text.Pandoc
    ( runIOorExplode
    , readVimwiki
    , readHtml
    , WriterOptions( writerTemplate
                   , writerListings
                   , writerVariables)
    , PandocMonad
    , PandocIO
    , writePlain
    )
import Text.Pandoc.Builder
import Text.Pandoc.Extract (AppendixItem, getHeadingData, extractKV) 
import Text.Pandoc.Items (comment, dlist) 
import Text.Pandoc.Templates (compileDefaultTemplate)
import Text.Pandoc.Walk (walk, walkM)
import Text.Pandoc.Writers.LaTeX (writeLaTeX)
import Util (joinBy, splitBy, uncurry3, (<&>))
import qualified Data.Map.Lazy as Map
import qualified Data.Set as Set
import qualified Data.Text.IO as T

-- TODO pandoc possibly contains a better way to handle attachements,
--      something about media bag
-- TODO mail attachments from any type supported by pandoc should
--      work, fix a (MimeType -> PandocParser) function
-- TODO input file shouldn't be locked to vimwiki
-- TODO output other things than PDF
--    - Plain text, requires own redering engine (pandoc plugin?)
--    - HTML, should be multi page for each attachement
--      - with support for RXML templates
--    - Mail, where mail attachements are copied verbatim
--            other attachements become mail attachments

-- Takes a list of Pandoc Blocks starting with a heading. If the first
-- element after the heading is a Definition list, then parse that to
-- kv-pairs, and return it also.
handleBlocks :: PandocMonad m => [Block] -> m (Text, [(Text, Text)], [Block])
handleBlocks (head:DefinitionList definitions:blocks) = do
    heading <- writePlain def (Pandoc nullMeta [head])
    kvs <- mapM extractKV definitions
    return (strip heading, kvs, blocks)
handleBlocks (head:blocks) = do
    heading <- writePlain def (Pandoc nullMeta [head])
    return (strip heading, [], blocks)
handleBlocks blocks = return ("Heading missing", [], blocks)

buildPrimary :: PandocMonad m
             => Pandoc
             -> m (Pandoc, [AppendixItem])
buildPrimary (Pandoc meta blocks) = do
    -- Pandoc meta blocks <- reader def text
    -- let Just blocks = getHeadingData heading blocks
    (heading, kvs, wantedBlocks') <- handleBlocks blocks
    let (pandoc, appendices) = replaceLinks $ Pandoc meta wantedBlocks'

    let pandoc' = foldl (flip . uncurry $ setMeta) pandoc
                $ ("title", heading) : kvs
    return (pandoc', appendices)


rewriteLink :: Inline -> State [AppendixItem] Inline
rewriteLink (Link _ is (target, "wikilink")) = do
    let appendixRef = encodeBase64 target
    let txt = [ Str "(bilaga"
              , RawInline (Format "latex")
                          ("~\\ref{bilaga:" <> appendixRef <> "}")
              , Str ")"
              ]
    let lnk = Link ("", [], [ ("reference-type", "ref")
                            , ("reference", "bilaga:" <> appendixRef) ])
                   (is <> [Superscript txt])
                   ( "#bilaga:" <> appendixRef, "" )
    modify ((is, appendixRef, target):)
    return lnk
rewriteLink x = return x

shorten :: Block -> Block
shorten (BlockQuote _) = BlockQuote [ Para [ Code ("", [], []) "[...]" ] ]
shorten x = x

replaceLinks :: Pandoc -> (Pandoc, [AppendixItem])
replaceLinks = flip runState [] . walkM rewriteLink

handleBilagaHeading :: [Inline] -> Text -> Block
handleBilagaHeading is ref = Header 1 ("bilaga:" <> ref, [], []) is


uriChars :: Set Char
uriChars = Set.fromList $ ":/?#"  -- Allows us to modifiy existing URL
                        <> "-._~" -- URL safe
                        <> ['A'..'Z'] <> ['a'..'z'] <> ['0'..'9']

getVimwikiPage :: FilePath -> PandocIO [Block]
getVimwikiPage path = do
    text <- liftIO $ T.readFile path
    Pandoc _ blocks <- readVimwiki def text
    return blocks

handleFile :: FilePath -> String -> PandocIO (Maybe [Block])
handleFile fname ".txt" = do
    text <- liftIO $ T.readFile fname
    return . Just $ [CodeBlock ("", [], []) text]
handleFile fname ".pdf" = do
    aname <- liftIO $ makeAbsolute fname
    let pagecmd = [ TexCmd "thispagestyle" [] ["fancy"]
                  , TexCmd "lhead" [] ["Bilaga \\Alph{section}.\\arabic{subsection}"]]
    let arg = [ "frame"
              , "pages={-}"
              , "width=\\textwidth"
              , "pagecommand=" <> mconcat (fmap toTex pagecmd)]
    let lines = [ TexCmd "phantomsection" [] []
                , TexCmd "stepcounter" [] ["subsection"]
                , TexCmd "includepdf" [joinBy "," arg] [pack . decode $ aname] ]
    let inline = RawInline
                 (Format "latex")
                 (mconcat $ fmap toTex lines)
    return . Just $ [ Plain [ inline ] ]
handleFile fname _ = return . Just $ [Para [Str . pack $ fname]]

findAlternative :: [MailPart] -> Maybe MailPart
findAlternative [] = Nothing
findAlternative (m@MailPart { contentType = contentType }:xs) = case contentType of
    "text/html"  -> Just m
    "text/plain" -> Just m
    _            -> findAlternative xs


-- TODO where are these strings defined
-- Strict Bytestring
decoder :: String -> (ByteString -> Text)
decoder "iso-8859-1" = decodeLatin1
decoder "utf-8"      = decodeUtf8
-- decoder _            = decodeUtf8Lenient
decoder _            = decodeLatin1


-- https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html
-- TODO differentiate between inline images (through cid:<> urls), and
-- unrelated attachements
handleMailBody :: (Handle, Handle) -> MailPart -> PandocIO [Block]
handleMailBody ports mail =
    case splitBy '/' $ contentType mail of
        ("multipart", "alternative") -> do
            case findAlternative $ reverse $ parts mail of
                Just part -> handleMailBody ports part
                Nothing   -> return [ Para [ Str "Couldn't find any suitable alternative" ] ]
        -- mixed, but any unknown should be treated as mixed
        ("multipart", _) -> concat <$> mapM (handleMailBody ports) (parts mail)
        ("text", "plain") -> do
            bytes <- liftIO $ getBytes (partId mail) ports
            let content = decoder (fromMaybe "ASCII" $ charset mail) bytes
            return [ CodeBlock ("", [], []) content ]
        ("text", "html") -> do
            bytes <- liftIO $ getBytes (partId mail) ports
            let content = decoder (fromMaybe "ASCII" $ charset mail) bytes
            pdoc <- readHtml def content
            -- TODO renumber links
            let Pandoc _ blocks = walk shorten pdoc
            return blocks
        ("image", _) -> do
            tmpFile <- liftIO $ getFile (partId mail) ports
            let img = [ Plain
                        [ Image ("", [], [])
                          [Str "Image Caption?"]
                          (pack tmpFile, "") ]]
            let figure = [ Figure ("", [], []) -- TODO figure ref
                      -- (Caption Nothing [Plain [Str . filename mail]])
                      (Caption Nothing [Plain [Str . pack . show $ filename mail]])
                      img
                      ]
            return img
        -- "application/pdf"       -> ()
        _  -> return [ Header 2 ("", [], []) [ Str "Attachment omitted" ]
                     , dlist [ ("Content-Type", contentType mail)
                             , ("Filename", show . filename $ mail) ]
                     ]

-- TODO Titlecase the headers
-- TODO from and to should monospace the
--      address (but not the name)
formatMail :: MailPart -> (Handle, Handle) -> PandocIO [Block]
formatMail mail ports = do
    let keys = ["from", "to", "subject", "date"]
    let f key = ( [Str $ key <> ":"]
                , [[Plain [ Str . pack $ headers mail ! unpack key ]]]
                )
    let kvs = map f keys

    body <- handleMailBody ports mail

    return $ DefinitionList kvs : body


handleMailLink :: URI -> PandocIO [Block]
handleMailLink uri = do
    let id = decode . uriPath $ uri
    -- liftIO $ print id
    mail' <- liftIO $ getMail id
    -- TODO #short
    body <- case mail' of
        Left err -> return [ Para [ Str "From "
                                    , Code ("", [], []) "getMail:"  ]
                             , Para [Str . pack $ err]
                             , Para [Code ("", [], []) (pack id) ]]
        Right (mail, proc@(Just stdin, Just stdout, _, _)) -> do
            -- TODO "short" `in` uriFrag
            bs <- formatMail mail (stdout, stdin)
            liftIO $ cleanupProcess proc
            return bs
        -- TODO error
        _ -> return []

    return $ comment ("msg id: " <> pack id) <> body

-- See also: Html rebuildLinks
handleBilaga :: [Block] -> Text -> PandocIO (Maybe [Block])
handleBilaga currentPage url = case parseURI . encodeWith (`elem` uriChars) . unpack $ url of
    Just uri -> case uriScheme uri of
                    ('w':'n':'.':wikiname) -> return . Just $ [Para [Str $ "Interwiki" <> pack wikiname]]
                    "file:" -> do
                        let fname = uriPath uri
                        handleFile fname $ takeExtension fname
                    "mail:" -> Just <$> handleMailLink uri
                    _ -> return $ Just [Para [Str . pack . show $ uri]]
    Nothing  -> case break (== '#') (unpack url) of
        ("", '#':frag)   -> return $ getHeadingData (pack frag) currentPage
        (page, '#':frag) -> getVimwikiPage (page <.> "wiki") <&> getHeadingData (pack frag)
        (page, "")       -> Just <$> getVimwikiPage (page <.> "wiki")
        _                -> return $ Just [Para [Str "ERROR"]]

f :: [Block] -> [Inline] -> Text -> Text -> PandocIO [Block]
f bs is a b = do
    let bg = handleBilagaHeading is a
    mxs <- handleBilaga bs b
    case mxs of
        Just xs -> return $ bg : xs
        Nothing -> return []

-- renumber :: Int -> [Blocks] -> [Blocks]

main :: [String] -> IO ()
main args = do
    case args of
        [file, heading] -> do

            cwd <- getWorkingDirectory
            changeWorkingDirectory  (dropFileName file)

            text <- T.readFile file

            -- let workdir = dropFileName file

            tex <- runIOorExplode $ do
                texTemplate <- compileDefaultTemplate "latex"
                Pandoc m' og_bs <- readVimwiki def text
                let Just better_blocks = getHeadingData (pack heading) og_bs
                -- TODO limit to only relevant heading
                (Pandoc meta bs, appendices) <- buildPrimary (Pandoc m' better_blocks)
                -- apx <- mconcat $ mapM handleBilaga $ reverse appendicies
                -- TODO this bs should be the initial source, NOT
                -- the result of buildPrimary
                fragments <- mapM (uncurry3 (f og_bs)) $ reverse appendices
                let apx = mconcat fragments
                let trail = [ RawBlock (Format "latex") "\\appendix" ] <> apx
                let content = Pandoc meta $ bs <> trail

                let opts = [ ("boxlinks",       toVal True)
                           , ("colorlinks",     toVal True)
                           , ("papersize",      SimpleVal "a4")
                           , ("numbersections", toVal True)
                           , ("lang",           SimpleVal "swedish")
                           , ("header-includes",  ListVal $ fmap (SimpleVal . fromString . unpack . toTex) [
                                 TexPackage "pdfpages" -- including PDF's
                               , TexPackage "fancyhdr" -- page headers on included pdf pages
                               , TempletizedTexPackage ["most"] "tcolorbox" -- for blockquotes
                               , LstSet [ ("breaklines", "true")
                                        , ("basicstyle", "\\scriptsize") ]
                               , TexCmd "newtcolorbox" [] ["myquote", joinBy "," [ "breakable"
                                                                                 , "colback=red!5!white"
                                                                                 , "colframe=red!75!black" ]]
                               , TexCmd "renewenvironment" [] ("quote" : fmap toTex [
                                                                             TexCmd "begin" [] ["myquote"]
                                                                           , TexCmd "end"   [] ["myquote"]
                                                                           ])
                               ])
                           ]
                writeLaTeX (def { writerTemplate = Just texTemplate
                                , writerListings = True
                                , writerVariables = Context $ Map.fromList opts }) content

            changeWorkingDirectory cwd
            writeFile "out.tex" tex
            putStrLn "Wrote result to out.tex"
        _ -> error "Usage: ./main handlingar <input-file> <heading>"