#!/usr/bin/env python3 import re import os import pwd import subprocess from subprocess import PIPE from base64 import b64encode import os.path as path import email from email.policy import default from datetime import datetime import argparse path_base = '/' parser = argparse.ArgumentParser( description='parse vimwiki files', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--mode', choices=["tex", "txt"], default='txt', help="Format of output") parser.add_argument('infile', help="Wiki-file to use for input") parser.add_argument('heading', help="Which heading to output") args = parser.parse_args() mode = args.mode # ------------------------------------------------------------ def includepdf(addr, full_file): if mode == 'tex': arg = 'frame' arg += ',pages={-}' pagecommand = '\\thispagestyle{fancy}' pagecommand += f'\\lhead{{Bilaga \\Alph{{section}}.\\arabic{{subsection}} {texify_filename(addr)} }}' arg += f',pagecommand={{{pagecommand}}}' arg += ',width=\\textwidth' return f'\\phantomsection\\stepcounter{{subsection}}\\includepdf[{arg}]{{{full_file}}}\n' elif mode == 'txt': return f"Se pdf {addr}\n" def shorten_mail(str): last_arrow = False out = [] for line in str.split('\n'): if not line: out += [''] continue if line[0] == '>': arrow = True if arrow and not last_arrow: out += ['> [...]'] last_arrow = True continue out += [line] return '\n'.join(out) def texify_filename(str): out = '' for c in str: if c == '_': out += '\\_' else: out += c return out def resolve_mail(addr, frag, comment): outstr = '' msg_id = addr mailfile = subprocess.run(f"mu find -u 'i:{msg_id}' --fields 'l'", shell=True, stdout=PIPE).stdout.decode('UTF-8').strip() # mail = subprocess.run(f"mu view {mailfile}", shell=True, stdout=PIPE).stdout.decode('UTF-8') with open(mailfile, 'rb') as f: msg = email.message_from_binary_file(f, policy=default) if mode == 'tex': outstr += '\\begin{verbatim}\n' # mail += msg.get_content_type() + '\n' body = msg.get_body(preferencelist=('related', 'plain', 'html')) for field in ['Date', 'From', 'To' 'Cc', 'Subject']: h = msg.get(field) if h: outstr += f'{field}: {h}\n' outstr += '\n' if frag == 'short': outstr += shorten_mail(body.get_content()) else: outstr += body.get_content() if mode == 'tex': outstr += '\\end{verbatim}\n' # TODO merge this logic with the resolve file logic below for part in msg.iter_attachments(): if part.get_content_type() == 'application/pdf': safe_filename = part.get_filename() \ .replace('/', '_') \ .replace('å', 'a') \ .replace('ä', 'a') \ .replace('ö', 'o') with open('/tmp/' + safe_filename, 'wb') as f: f.write(part.get_content()) # outstr += part.get_filename() + '\n' outstr += includepdf(safe_filename, '/tmp/' + safe_filename) else: if mode == 'tex': outstr += f'\\subsection{{{texify_filename(part.get_filename())}}}\n' elif mode == 'txt': global subsectioncounter subsectioncounter += 1 outstr += f' {sectioncounter}.{subsectioncounter}. {part.get_filename()}\n' content = part.get_content() if type(content) == bytes: content = content.decode('utf-8') if max(len(s) for s in content.split('\n')) > 100: # outstr += '\\setlength\\parindent{24pt}' outstr += content + '\n' else: if mode == 'tex': outstr += '\n\\begin{verbatim}' outstr += content + '\n' if mode == 'tex': outstr += '\\end{verbatim}' # mail += f'{part.get_content_type()} - {part.get_filename()}\n' # for part in msg.walk(): # mail += f'{part.get_content_type()} - {part.get_filename()}\n' # for part in msg.walk(): #for part in msg.walk(): # if part.get_content_maintype() == 'multipart': # continue # mail += f'== {part.get_content_type()} ==\n\n' # if part.get_content_type() == 'application/pdf': # mail += part.get_filename() + '\n\n' # else: # mail += part.as_string(policy=default) # mail = msg.as_string() return outstr def resolve_help(addr, frag, comment): pass return '' def resolve_file(addr, frag, comment): extension = addr.split('.')[-1] full_file = path.join(path_base, addr) if extension == 'pdf': s = '\\stepcounter{section}\n' s += '\\phantomsection\n' # s += f'\\addcontentsline{{toc}}{{chapter}}{{{texify_filename(addr)}}}\n' # s = f'\\includepdf[pages={{1}},width=\\textwidth]{{{full_file}}}\n' s += includepdf(addr, full_file) return s # TODO # elif extension == 'tex': # return f'\\subfile{{{full_file}}}\n' else: with open(full_file, 'r') as f: return f.read() + '\n' def resolve_default(addr, frag, comment): outstr = '' if addr == '': page = get_heading(frag, data) if mode == 'tex': outstr += '\\begin{verbatim}\n' outstr += page + '\n' if mode == 'tex': outstr += '\\end{verbatim}\n' else: print(addr) print("Vimwiki intra-page links not yet supported") outstr = '' return outstr # Should all take (url without protocol, fragment, comment) resolvers = { 'mail': resolve_mail, 'file': resolve_file, 'help': resolve_help, } # ------------------------------------------------------------ # TODO handle case where heading is missing def get_heading(name, data): start_match = re.search(f'(?m)^(=+) {name} =+$', data) print(start_match) heading_level = len(start_match.group(1)) print(start_match.end()) tail = data[start_match.end():] pat = f'(?m)^(={{1,{heading_level}}} .*|---*)$' print(pat) end_match = re.search(pat, tail) print(end_match) if end_match: return tail[:end_match.start() - 1] else: return tail # ------------------------------------------------------------ infile = args.infile heading = args.heading file = open(infile, 'r') data = file.read() path_base = path.dirname(infile) output = open(f'doc.{mode}', 'w') name = pwd.getpwuid(os.getuid()).pw_gecos if mode == 'tex': output.write(f''' \\documentclass[a4paper]{{article}} \\usepackage[T1]{{fontenc}} \\usepackage[utf8]{{inputenc}} \\usepackage[swedish]{{babel}} \\usepackage{{verbatim}} \\usepackage{{fullpage}} \\usepackage{{pdfpages}} \\usepackage{{subfiles}} \\usepackage{{parskip}} \\usepackage{{fancyhdr}} \\usepackage{{hyperref}} \\title{{{heading}}} \\date\\today \\author{{{name}}} \\begin{{document}} \\maketitle % \\tableofcontents ''') elif mode == 'txt': d = f'{datetime.now():%Y-%m-%d}' output.write(f'\n{heading.center(40)}\n{d.center(40)}\n') sectioncounter = 0 subsectioncounter = 0 # '\\verbatiminput page1 = get_heading(heading, data) # bilagor = get_heading('Bilagor', page1).strip().split('\n') r = "\\[\\[([^|#\\]]*)(?:#([^|\\]]*))?(?:[|]([^\\]]*))?\\]\\]" # bilagor = re.findall(r, page1) tag_iter = re.finditer(r, page1) # bilagor = [m[0] for m in re.findall(r, data)] bilagor = [] outstr = "" pos = 0 bilaga_nr = 1 for match in tag_iter: url = match.group(1) or '' frag = match.group(2) or '' comment = match.group(3) or '' outstr += page1[pos:match.start()] pos = match.end() uid = b64encode((url + frag).encode('UTF-8')) title = comment or url or frag # outstr += title + f"(bilaga~\\ref{{{uid}}})" # TODO outstr += title + f' (bilaga {chr(bilaga_nr + 64)})' # outstr += title + " (se bilaga)" bilaga_nr += 1 try: proto, addr = url.split(":", 1) except ValueError: proto = '' addr = url print(proto, addr) # disabled for full PDF:s attach_str = '' if mode == 'tex': if proto != 'file': attach_str += f"\\section{{{title}}}\n\\label{{{uid}}}\n" else: attach_str += f"\\phantomsection\n\\label{{{uid}}}\n" elif mode == 'txt': sectioncounter += 1 subsectioncounter = 0 if proto != 'file': st = f"{chr(sectioncounter + 64)}. {title}" attach_str += f'\n{st}\n{"="*len(st)}\n' resolver = resolvers.get(proto, resolve_default) attach_str += resolver(addr, frag, comment) bilagor += [attach_str] outstr += page1[pos:] if mode == 'tex': output.write('\\begin{verbatim}\n') output.write(outstr + '\n') if mode == 'tex': output.write('\\end{verbatim}\n\\appendix\n') output.write('\n'.join(bilagor)) if mode == 'tex': output.write('\\end{document}')