From 52acedd6014ad73f38ca753d305ab873719158a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Wed, 11 Oct 2023 15:28:35 +0200 Subject: Handlingar to HTML work. --- hs/src/Handlingar.hs | 21 +++---- hs/src/Handlingar/Common.hs | 121 +++++++++++++++++++++++++++++++--------- hs/src/Handlingar/HtmlOutput.hs | 93 ++++++++++++++++++++++++++++++ hs/src/Handlingar/TexOutput.hs | 97 ++++++++++++++++---------------- hs/src/Text/Pandoc/Extract.hs | 6 ++ hs/src/Text/URI/Decode.hs | 17 ++++++ hs/vimwiki.cabal | 1 + 7 files changed, 268 insertions(+), 88 deletions(-) create mode 100644 hs/src/Handlingar/HtmlOutput.hs create mode 100644 hs/src/Text/URI/Decode.hs diff --git a/hs/src/Handlingar.hs b/hs/src/Handlingar.hs index cb126b7..157e95d 100644 --- a/hs/src/Handlingar.hs +++ b/hs/src/Handlingar.hs @@ -4,18 +4,14 @@ module Handlingar ( main ) where -import Prelude hiding - ( putStrLn - , writeFile - ) - -import Data.Text.IO (putStrLn, writeFile) import System.FilePath (dropFileName) import System.Posix.Directory (changeWorkingDirectory, getWorkingDirectory) import Text.Pandoc (runIOorExplode) import qualified Data.Text.IO as T +import System.FilePath (()) -import Handlingar.TexOutput (handleTex) +-- import Handlingar.TexOutput (handleTex) +import Handlingar.HtmlOutput (handleHtml) -- TODO pandoc possibly contains a better way to handle attachements, -- something about media bag @@ -37,16 +33,13 @@ main args = do case args of [file, heading] -> do - cwd <- getWorkingDirectory + here <- getWorkingDirectory changeWorkingDirectory $ dropFileName file text <- T.readFile file + -- TODO mkdir + runIOorExplode $ handleHtml text heading (here "out") - -- let workdir = dropFileName file - - tex <- runIOorExplode $ handleTex text heading - changeWorkingDirectory cwd - writeFile "out.tex" tex - putStrLn "Wrote result to out.tex" + changeWorkingDirectory here _ -> error "Usage: ./main handlingar " diff --git a/hs/src/Handlingar/Common.hs b/hs/src/Handlingar/Common.hs index dec38c6..16b5591 100644 --- a/hs/src/Handlingar/Common.hs +++ b/hs/src/Handlingar/Common.hs @@ -3,17 +3,19 @@ #-} module Handlingar.Common -( decoder +( buildPrimary +, decoder , findAlternative , formatMail , getVimwikiPage +, handleBilaga , handleBilagaHeading , handleBlocks , handleMailLink , replaceLinks -, rewriteLink , shorten , uriChars +, HandlingarOps(..) ) where import Control.Monad.State.Lazy @@ -25,27 +27,80 @@ import Data.Set (Set) import Data.Set qualified as Set import Data.Text (Text, pack, unpack, strip) import Data.Text.Encoding (decodeLatin1, decodeUtf8) -import Data.Text.Encoding.Base64 (encodeBase64) import Data.Text.IO qualified as T import Mail (getMail, MailPart(..), getBytes, getFile) import Network.URI (URI, uriPath) import Network.URI.Encode (decode) +import System.FilePath (takeExtension, (<.>)) import System.IO (Handle) import System.Process (cleanupProcess) import Text.Pandoc (readVimwiki, readHtml, PandocMonad, PandocIO, writePlain) import Text.Pandoc.Builder -import Text.Pandoc.Extract (AppendixItem, extractKV) +import Text.Pandoc.Extract (AppendixItem, extractKV, getHeadingData) import Text.Pandoc.Items (comment, dlist) import Text.Pandoc.Walk (walk, walkM) -import Util (splitBy) - - +import Util (splitBy, (<&>)) +import Network.URI (parseURI, uriScheme) +import Network.URI.Encode (encodeWith) +import Text.URI.Decode (urlDecode) + + +data HandlingarOps = HandlingarOps + -- For any file included from the base document as an appendix, + -- which originally had a "file:" or "local:" scheme, generate + -- appropriate resources for that element, and return what should + -- be included in the appendix section. + -- + { handleFile :: FilePath -- ^ Path to include, may be relative the current working directory. + -> String -- ^ File extension of the given path, with a leading period + -> PandocIO (Maybe [Block]) + -- | + , rewriteLink :: Inline -> State [AppendixItem] Inline + , destination :: FilePath + } + +-- See also: Html rebuildLinks +handleBilaga :: HandlingarOps -> [Block] -> Text -> PandocIO (Maybe [Block]) +handleBilaga ops currentPage url = case parseURI . encodeWith (`elem` uriChars) . unpack $ url of + Just uri -> case uriScheme uri of + -- A link to another wiki. + -- Currently implemented by simply mentioning + -- where to look + ('w':'n':'.':wikiname) -> return . Just $ [Para [Str $ "Interwiki" <> pack wikiname]] + -- File, may (and hopefully is) within wiki the source tree + -- Note that both "file:" and "local:" gets turned + -- into "file:" by the pandoc parser. + "file:" -> do + let (Just fname) = urlDecode $ uriPath uri + (handleFile ops) fname $ takeExtension fname + -- An included email. + "mail:" -> Just <$> handleMailLink uri + _ -> return $ Just [Para [Str . pack . show $ uri]] + -- This is an internal wiki link + Nothing -> case break (== '#') (unpack url) of + ("", '#':frag) -> return $ getHeadingData (pack frag) currentPage + (page, '#':frag) -> getVimwikiPage (page <.> "wiki") <&> getHeadingData (pack frag) + (page, "") -> Just <$> getVimwikiPage (page <.> "wiki") + _ -> return $ Just [Para [Str "ERROR"]] + + + +-- | Generate a header tag from an appendix reference handleBilagaHeading :: [Inline] -> Text -> Block handleBilagaHeading is ref = Header 1 ("bilaga:" <> ref, [], []) is --- Takes a list of Pandoc Blocks starting with a heading. If the first --- element after the heading is a Definition list, then parse that to --- kv-pairs, and return it also. +-- | Extract header, metadata, and contents from a list of Pandoc Blocks. +-- Returns a three-tuple consisting of +-- * the sections title, +-- * metadata from the section +-- * Further contents of the section +-- +-- The title is gotten by rendering the first block in the list into a +-- plain string. +-- +-- If the second argument is a definition list, then each of those +-- items will be extracted into a list of key, value pairs +-- handleBlocks :: PandocMonad m => [Block] -> m (Text, [(Text, Text)], [Block]) handleBlocks (head:DefinitionList definitions:blocks) = do heading <- writePlain def (Pandoc nullMeta [head]) @@ -62,24 +117,21 @@ shorten (BlockQuote _) = BlockQuote [ Para [ Code ("", [], []) "[...]" ] ] shorten x = x -rewriteLink :: Inline -> State [AppendixItem] Inline -rewriteLink (Link _ is (target, "wikilink")) = do - let appendixRef = encodeBase64 target - let txt = [ Str "(bilaga" - , RawInline (Format "latex") - ("~\\ref{bilaga:" <> appendixRef <> "}") - , Str ")" - ] - let lnk = Link ("", [], [ ("reference-type", "ref") - , ("reference", "bilaga:" <> appendixRef) ]) - (is <> [Superscript txt]) - ( "#bilaga:" <> appendixRef, "" ) - modify ((is, appendixRef, target):) - return lnk -rewriteLink x = return x -replaceLinks :: Pandoc -> (Pandoc, [AppendixItem]) -replaceLinks = flip runState [] . walkM rewriteLink + +-- | Replace all links in document from source to destination form. +-- The source links are either regular absolute links, or relative +-- links to other items within the source tree. This function +-- translates one such link to a link which is on a suitable form form +-- the output. +-- * For TeX this will probably be a '\ref' element +-- * For HTML a link to another page. +-- +-- Note that something which actually creates the output referenced +-- items is also needed. +replaceLinks :: (Inline -> State [AppendixItem] Inline) + -> Pandoc -> (Pandoc, [AppendixItem]) +replaceLinks rewriteLink = flip runState [] . walkM rewriteLink uriChars :: Set Char @@ -185,3 +237,18 @@ handleMailLink uri = do _ -> return [] return $ comment ("msg id: " <> pack id) <> body + + +buildPrimary :: PandocMonad m + => HandlingarOps + -> Pandoc + -> m (Pandoc, [AppendixItem]) +buildPrimary ops (Pandoc meta blocks) = do + -- Pandoc meta blocks <- reader def text + -- let Just blocks = getHeadingData heading blocks + (heading, kvs, wantedBlocks') <- handleBlocks blocks + let (pandoc, appendices) = replaceLinks (rewriteLink ops) $ Pandoc meta wantedBlocks' + + let pandoc' = foldl (flip . uncurry $ setMeta) pandoc + $ ("title", heading) : kvs + return (pandoc', appendices) diff --git a/hs/src/Handlingar/HtmlOutput.hs b/hs/src/Handlingar/HtmlOutput.hs new file mode 100644 index 0000000..29121ba --- /dev/null +++ b/hs/src/Handlingar/HtmlOutput.hs @@ -0,0 +1,93 @@ +{-# LANGUAGE OverloadedStrings + , ImportQualifiedPost + #-} + +module Handlingar.HtmlOutput +( handleHtml +) where + +import Control.Monad.State.Lazy +import Data.Default (def) +import Text.Pandoc.Extract (AppendixItem, getHeadingData) +import Text.Pandoc (readVimwiki, PandocIO) +import Text.Pandoc.Definition (Meta(..)) +import Text.Pandoc.Builder +import Data.Text (Text, unpack, pack) +import Text.Pandoc.Writers.HTML (writeHtml5String) +import Handlingar.Common (handleBilaga, HandlingarOps(..), buildPrimary) +import System.FilePath ((), takeExtension) +import Data.Text.IO (writeFile) +import Data.Text qualified as T +import System.Directory (copyFile) +import System.Posix.Directory (changeWorkingDirectory, getWorkingDirectory) +import Control.Exception (catch) +import Data.Map.Strict qualified as Map + +import Prelude hiding (writeFile) + +handleAppendix :: HandlingarOps -> [Block] -> AppendixItem -> PandocIO [Block] +handleAppendix ops source_blocks (link_text, _, target) = do + mxs <- handleBilaga ops source_blocks target + case mxs of + Just xs -> do + liftIO $ putStrLn $ "target: " ++ show target + liftIO $ getWorkingDirectory >>= putStrLn + case takeExtension . unpack $ target of + "" -> return () + _ -> liftIO $ copyFile (unpack . T.drop 5 $ target) (destination ops (unpack . sourceToTarget' $ target)) + -- liftIO $ catch (copyFile (unpack target) (destination ops (unpack . sourceToTarget' $ target))) (\_ -> (copyFile (unpack target <> ".wiki") (destination ops (unpack . sourceToTarget' $ target)))) + + out <- writeHtml5String (def) (Pandoc (Meta Map.empty) xs) + liftIO $ writeFile (destination ops (unpack . sourceToTarget $ target)) + (out) + -- (pack . show $ xs) + return [] + Nothing -> return [] + +sourceToTarget' :: Text -> Text +sourceToTarget' source + | "file:" == T.take 5 source = proc $ T.drop 5 source + | otherwise = proc source + where proc s = T.replace "/" "-" s + +-- Convert a link wikilink from the source document to an html link in +-- the destination document +sourceToTarget :: Text -> Text +sourceToTarget source = sourceToTarget' source <> ".html" + +rewriteLinkHtml :: Inline -> State [AppendixItem] Inline +rewriteLinkHtml x@(Link _ displayed (target, "wikilink")) = do + modify ((displayed, "", target):) + return $ Link ("", [], []) + (displayed <> [Superscript [Str "(bilaga)"]]) + (sourceToTarget target, "") -- TODO write something nice for title +rewriteLinkHtml x = return x + +-- TODO this is probably where we should create the included files +handleFileHtml :: FilePath -> String -> PandocIO (Maybe [Block]) +handleFileHtml fname ".tex" = return . Just $ [] +handleFileHtml fname ".pdf" = return . Just $ [ Plain [ RawInline (Format "html") (" (sourceToTarget' . pack $ fname) <> "\">PDF Failed loading") ] ] +handleFileHtml fname _ = return . Just $ [] + +handleHtml :: Text -> String -> FilePath -> PandocIO () +handleHtml source_text heading destination = do + let ops = HandlingarOps { handleFile = handleFileHtml + , rewriteLink = rewriteLinkHtml + , destination = destination + } + + Pandoc m' og_bs <- readVimwiki def source_text + let Just better_blocks = getHeadingData (pack heading) og_bs + (Pandoc meta bs, appendices) <- buildPrimary ops (Pandoc m' better_blocks) + + liftIO $ putStrLn $ "meta: " ++ show meta + liftIO $ putStrLn $ "count appendies: " ++ show (length appendices) + -- liftIO $ putStrLn $ "count appendies: " ++ show appendices + + fragments <- mapM (handleAppendix ops og_bs) $ reverse appendices + let apx = mconcat fragments + liftIO $ putStrLn $ "apx: " ++ show apx + let content = Pandoc meta bs + + result <- writeHtml5String (def) content + liftIO $ writeFile (destination "index.html") result diff --git a/hs/src/Handlingar/TexOutput.hs b/hs/src/Handlingar/TexOutput.hs index 6028b5f..b54862f 100644 --- a/hs/src/Handlingar/TexOutput.hs +++ b/hs/src/Handlingar/TexOutput.hs @@ -12,40 +12,30 @@ import Data.Default (def) import Data.Map.Lazy qualified as Map import Data.String (IsString(fromString)) import Data.Text (Text, pack, unpack) +import Data.Text.Encoding.Base64 (encodeBase64) import Data.Text.IO qualified as T -import Handlingar.Common (handleBilagaHeading, getVimwikiPage, handleMailLink, handleBlocks, replaceLinks, uriChars) -import Network.URI (parseURI, uriScheme, uriPath) -import Network.URI.Encode (encodeWith, decode) +import Handlingar.Common (HandlingarOps, HandlingarOps(..), handleBilaga, buildPrimary, handleBilagaHeading) +-- import Network.URI (parseURI, uriScheme, uriPath) +import Network.URI.Encode (decode) import System.Directory (makeAbsolute, createDirectoryIfMissing, copyFile) -import System.FilePath (takeExtension, (<.>), takeBaseName) +import System.FilePath (takeBaseName) import System.Process (createProcess, shell, CreateProcess(cwd), waitForProcess) import Tex (toTex, TeX (..)) import Text.DocTemplates (toVal, Val(SimpleVal, ListVal), Context(Context)) -import Text.Pandoc (readVimwiki, WriterOptions(writerTemplate, writerListings, writerVariables), PandocMonad, PandocIO) +import Text.Pandoc (readVimwiki, WriterOptions(writerTemplate, writerListings, writerVariables), PandocIO) import Text.Pandoc.Builder import Text.Pandoc.Extract (AppendixItem, getHeadingData) import Text.Pandoc.Templates (compileDefaultTemplate) import Text.Pandoc.Writers.LaTeX (writeLaTeX) -import Text.URI.Decode (urlDecode) -import Util (joinBy, uncurry3, (<&>)) +import Util (joinBy) -buildPrimary :: PandocMonad m - => Pandoc - -> m (Pandoc, [AppendixItem]) -buildPrimary (Pandoc meta blocks) = do - -- Pandoc meta blocks <- reader def text - -- let Just blocks = getHeadingData heading blocks - (heading, kvs, wantedBlocks') <- handleBlocks blocks - let (pandoc, appendices) = replaceLinks $ Pandoc meta wantedBlocks' - let pandoc' = foldl (flip . uncurry $ setMeta) pandoc - $ ("title", heading) : kvs - return (pandoc', appendices) - - -handleFileTex :: FilePath -> String -> PandocIO (Maybe [Block]) +-- | Render and include an included file into a tex document. +handleFileTex :: FilePath -- ^ Path to include. May be relative the current working directory + -> String -- ^ Filename extension of the given path, with a leading period + -> PandocIO (Maybe [Block]) handleFileTex fname ".txt" = do text <- liftIO $ T.readFile fname return . Just $ [CodeBlock ("", [], []) text] @@ -83,46 +73,59 @@ handleFileTex fname _ = do -- TODO differentiate between inline images (through cid:<> urls), and -- unrelated attachements --- See also: Html rebuildLinks -handleBilagaTex :: [Block] -> Text -> PandocIO (Maybe [Block]) -handleBilagaTex currentPage url = case parseURI . encodeWith (`elem` uriChars) . unpack $ url of - Just uri -> case uriScheme uri of - ('w':'n':'.':wikiname) -> return . Just $ [Para [Str $ "Interwiki" <> pack wikiname]] - "file:" -> do - let (Just fname) = urlDecode $ uriPath uri - liftIO $ do - print $ "raw = " ++ uriPath uri - print $ "fname = " ++ fname - handleFileTex fname $ takeExtension fname - "mail:" -> Just <$> handleMailLink uri - _ -> return $ Just [Para [Str . pack . show $ uri]] - Nothing -> case break (== '#') (unpack url) of - ("", '#':frag) -> return $ getHeadingData (pack frag) currentPage - (page, '#':frag) -> getVimwikiPage (page <.> "wiki") <&> getHeadingData (pack frag) - (page, "") -> Just <$> getVimwikiPage (page <.> "wiki") - _ -> return $ Just [Para [Str "ERROR"]] - - -f :: [Block] -> [Inline] -> Text -> Text -> PandocIO [Block] -f bs is a b = do - let bg = handleBilagaHeading is a - mxs <- handleBilagaTex bs b +-- | Format an appendix for inclusion into the tex file +handleAppendix :: HandlingarOps -> [Block] -> AppendixItem -> PandocIO [Block] +handleAppendix ops source_blocks (link_text, uid, target) = do + let bg = handleBilagaHeading link_text uid + mxs <- handleBilaga ops source_blocks target case mxs of Just xs -> return $ bg : xs Nothing -> return [] +-- | Rewrite Vimwiki links for TeX +-- Internal links (in Pandoc noted as "wikilink") will be expanded +-- into "\sup{(bilaga~\ref{bilaga:})}" +-- An appendix item is added to the state, containing +-- * The displayed paret of the link +-- * The unique id +-- * The target of the link +rewriteLinkTex :: Inline -> State [AppendixItem] Inline +rewriteLinkTex (Link _ displayed (target, "wikilink")) = do + let appendixRef = encodeBase64 target + let txt = [ Str "(bilaga" + , RawInline (Format "latex") + ("~\\ref{bilaga:" <> appendixRef <> "}") + , Str ")" + ] + let lnk = Link ("", [], [ ("reference-type", "ref") + , ("reference", "bilaga:" <> appendixRef) ]) + (displayed <> [Superscript txt]) + ( "#bilaga:" <> appendixRef, "" ) + modify ((displayed, appendixRef, target):) + return lnk +rewriteLinkTex x = return x handleTex :: Text -> String -> PandocIO Text handleTex source_text heading = do + + let ops = HandlingarOps { handleFile = handleFileTex + , rewriteLink = rewriteLinkTex + -- TODO HERE + , destination = "" + } + texTemplate <- compileDefaultTemplate "latex" + -- Load base data from file Pandoc m' og_bs <- readVimwiki def source_text + -- Filter to only items under heading let Just better_blocks = getHeadingData (pack heading) og_bs -- TODO limit to only relevant heading - (Pandoc meta bs, appendices) <- buildPrimary (Pandoc m' better_blocks) + (Pandoc meta bs, appendices) <- buildPrimary ops (Pandoc m' better_blocks) -- apx <- mconcat $ mapM handleBilagaTex $ reverse appendicies -- TODO this bs should be the initial source, NOT -- the result of buildPrimary - fragments <- mapM (uncurry3 (f og_bs)) $ reverse appendices + liftIO $ putStrLn $ "appendices: " ++ show appendices + fragments <- mapM (handleAppendix ops og_bs) $ reverse appendices let apx = mconcat fragments let trail = [ RawBlock (Format "latex") "\\appendix" ] <> apx let content = Pandoc meta $ bs <> trail diff --git a/hs/src/Text/Pandoc/Extract.hs b/hs/src/Text/Pandoc/Extract.hs index 6fb6016..0bee6d5 100644 --- a/hs/src/Text/Pandoc/Extract.hs +++ b/hs/src/Text/Pandoc/Extract.hs @@ -11,6 +11,12 @@ import Text.Pandoc ( PandocMonad ) import Util (oneOf) +-- A three tuple of: +-- * The original displayed text of the link +-- * A unique id for this reference. +-- The same link should always get the same id +-- * The original target of the link. +-- Note that this will depend on the cwd of the source file. type AppendixItem = ([Inline], Text, Text) -- Find the first heading matching text in the block diff --git a/hs/src/Text/URI/Decode.hs b/hs/src/Text/URI/Decode.hs new file mode 100644 index 0000000..893c225 --- /dev/null +++ b/hs/src/Text/URI/Decode.hs @@ -0,0 +1,17 @@ +module Text.URI.Decode where + +-- Copied verbatim from +-- https://rosettacode.org/wiki/URL_decoding + +import qualified Data.Char as Char + +urlDecode :: String -> Maybe String +urlDecode [] = Just [] +urlDecode ('%':xs) = + case xs of + (a:b:xss) -> + urlDecode xss + >>= return . ((Char.chr . read $ "0x" ++ [a,b]) :) + _ -> Nothing +urlDecode ('+':xs) = urlDecode xs >>= return . (' ' :) +urlDecode (x:xs) = urlDecode xs >>= return . (x :) diff --git a/hs/vimwiki.cabal b/hs/vimwiki.cabal index 27442ab..75ec842 100644 --- a/hs/vimwiki.cabal +++ b/hs/vimwiki.cabal @@ -35,6 +35,7 @@ executable Main Handlingar, Handlingar.Common, Handlingar.TexOutput, + Handlingar.HtmlOutput, Vimwiki.Man, System.Home, Util, -- cgit v1.2.3