#!/usr/bin/env python3 import re import os import pwd import subprocess from subprocess import PIPE import hashlib import os.path as path import email from email.policy import default from datetime import datetime from dateutil import parser as dtparser import argparse path_base = '/' # ------------------------------------------------------------ def includepdf(mode, addr, full_file): if mode == 'tex': arg = 'frame' arg += ',pages={-}' pagecommand = '\\thispagestyle{fancy}' pagecommand += f'\\lhead{{Bilaga \\Alph{{section}}.\\arabic{{subsection}} {texify_filename(addr)} }}' arg += f',pagecommand={{{pagecommand}}}' arg += ',width=\\textwidth' return f'\\phantomsection\\stepcounter{{subsection}}\\includepdf[{arg}]{{{full_file}}}\n' elif mode == 'txt': return f"Se pdf {addr}\n" def shorten_mail(str): last_arrow = False out = [] for line in str.split('\n'): if not line: out += [''] continue if line[0] == '>': arrow = True if arrow and not last_arrow: out += ['> [...]'] last_arrow = True continue out += [line] return '\n'.join(out) def texify_filename(str): out = '' for c in str: if c == '_': out += '\\_' else: out += c return out def resolve_mail(mode, addr, frag, comment): outstr = '' msg_id = addr mailfile = subprocess.run(f"mu find -u 'i:{msg_id}' --fields 'l'", shell=True, stdout=PIPE).stdout.decode('UTF-8').strip() # mail = subprocess.run(f"mu view {mailfile}", shell=True, stdout=PIPE).stdout.decode('UTF-8') with open(mailfile, 'rb') as f: msg = email.message_from_binary_file(f, policy=default) if mode == 'tex': outstr += '\\begin{verbatim}\n' # mail += msg.get_content_type() + '\n' body = msg.get_body(preferencelist=('related', 'plain', 'html')) for field in ['Date', 'From', 'To' 'Cc', 'Subject']: h = msg.get(field) if h: outstr += f'{field}: {h}\n' outstr += '\n' if frag == 'short': outstr += shorten_mail(body.get_content()) else: outstr += body.get_content() if mode == 'tex': outstr += '\\end{verbatim}\n' # TODO merge this logic with the resolve file logic below for part in msg.iter_attachments(): if part.get_content_type() == 'application/pdf': safe_filename = part.get_filename() \ .replace('/', '_') \ .replace('å', 'a') \ .replace('ä', 'a') \ .replace('ö', 'o') with open('/tmp/' + safe_filename, 'wb') as f: f.write(part.get_content()) # outstr += part.get_filename() + '\n' outstr += includepdf(mode, safe_filename, '/tmp/' + safe_filename) else: if mode == 'tex': outstr += f'\\subsection{{{texify_filename(part.get_filename())}}}\n' elif mode == 'txt': global subsectioncounter subsectioncounter += 1 outstr += f' {sectioncounter}.{subsectioncounter}. {part.get_filename()}\n' content = part.get_content() if type(content) == bytes: content = content.decode('utf-8') if max(len(s) for s in content.split('\n')) > 100: # outstr += '\\setlength\\parindent{24pt}' outstr += content + '\n' else: if mode == 'tex': outstr += '\n\\begin{verbatim}' outstr += content + '\n' if mode == 'tex': outstr += '\\end{verbatim}' # mail += f'{part.get_content_type()} - {part.get_filename()}\n' # for part in msg.walk(): # mail += f'{part.get_content_type()} - {part.get_filename()}\n' # for part in msg.walk(): #for part in msg.walk(): # if part.get_content_maintype() == 'multipart': # continue # mail += f'== {part.get_content_type()} ==\n\n' # if part.get_content_type() == 'application/pdf': # mail += part.get_filename() + '\n\n' # else: # mail += part.as_string(policy=default) # mail = msg.as_string() return outstr def resolve_help(mode, addr, frag, comment): pass return '' def resolve_file(mode, addr, frag, comment): extension = addr.split('.')[-1] full_file = path.join(path_base, addr) if extension == 'pdf': s = '\\stepcounter{section}\n' s += '\\phantomsection\n' # s += f'\\addcontentsline{{toc}}{{chapter}}{{{texify_filename(addr)}}}\n' # s = f'\\includepdf[pages={{1}},width=\\textwidth]{{{full_file}}}\n' s += includepdf(mode, addr, full_file) return s # TODO # elif extension == 'tex': # return f'\\subfile{{{full_file}}}\n' else: with open(full_file, 'r') as f: return f.read() + '\n' def resolve_default(mode, addr, frag, comment): outstr = '' if addr == '': page = get_heading(frag, data) if mode == 'tex': outstr += '\\begin{verbatim}\n' outstr += page + '\n' if mode == 'tex': outstr += '\\end{verbatim}\n' else: print(addr) print("Vimwiki intra-page links not yet supported") outstr = '' return outstr # Should all take (mode string, url without protocol, fragment, comment) # and return the complete text of the attachement, formatted according # to mode resolvers = { 'mail': resolve_mail, 'file': resolve_file, 'help': resolve_help, } # ------------------------------------------------------------ # TODO handle case where heading is missing def get_heading(name, data): start_match = re.search(f'(?m)^(=+) {name} =+$', data) print(start_match) heading_level = len(start_match.group(1)) print(start_match.end()) tail = data[start_match.end():] pat = f'(?m)^(={{1,{heading_level}}} .*|---*)$' print(pat) end_match = re.search(pat, tail) print(end_match) if end_match: return tail[:end_match.start() - 1] else: return tail # ------------------------------------------------------------ if __name__ == '__main__': parser = argparse.ArgumentParser( description='parse vimwiki files', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--mode', choices=["tex", "txt"], default='txt', help="Format of output") parser.add_argument('infile', help="Wiki-file to use for input") parser.add_argument('heading', help="Which heading to output") args = parser.parse_args() mode = args.mode infile = args.infile heading = args.heading file = open(infile, 'r') data = file.read() path_base = path.dirname(infile) output = open(f'doc.{mode}', 'w') name = pwd.getpwuid(os.getuid()).pw_gecos if mode == 'tex': output.write(f''' \\documentclass[a4paper]{{article}} \\usepackage[T1]{{fontenc}} \\usepackage[utf8]{{inputenc}} \\usepackage[swedish]{{babel}} \\usepackage{{verbatim}} \\usepackage{{fullpage}} \\usepackage{{pdfpages}} \\usepackage{{subfiles}} \\usepackage{{parskip}} \\usepackage{{fancyhdr}} \\usepackage{{hyperref}} \\usepackage{{soul}} ''') elif mode == 'txt': sectioncounter = 0 subsectioncounter = 0 # '\\verbatiminput page1 = get_heading(heading, data) # bilagor = get_heading('Bilagor', page1).strip().split('\n') # Find all links r = r"\[\[([^|#\]]*)(?:#([^|\]]*))?(?:[|]([^\]]*))?\]\]" # bilagor = re.findall(r, page1) tag_iter = re.finditer(r, page1) # bilagor = [m[0] for m in re.findall(r, data)] bilagor = [] outstr = "" pos = 0 # Resolve all links bilaga_nr = 1 for match in tag_iter: url = match.group(1) or '' frag = match.group(2) or '' comment = match.group(3) or '' outstr += page1[pos:match.start()] pos = match.end() uid = hashlib.md5((url + frag).encode('UTF-8')).hexdigest() title = comment or url or frag # outstr += title + f"(bilaga~\\ref{{{uid}}})" # TODO outstr += title + f' (bilaga {chr(bilaga_nr + 64)})' # outstr += title + " (se bilaga)" bilaga_nr += 1 try: proto, addr = url.split(":", 1) except ValueError: proto = '' addr = url print(proto, addr) # disabled for full PDF:s attach_str = '' if mode == 'tex': if proto != 'file': attach_str += f"\\section{{{title}}}\n\\label{{{uid}}}\n" else: attach_str += f"\\phantomsection\n\\label{{{uid}}}\n" elif mode == 'txt': sectioncounter += 1 subsectioncounter = 0 if proto != 'file': st = f"{chr(sectioncounter + 64)}. {title}" attach_str += f'\n{st}\n{"="*len(st)}\n' resolver = resolvers.get(proto, resolve_default) attach_str += resolver(addr, frag, comment) bilagor += [attach_str] outstr += page1[pos:] options = { 'date': '\\today' if mode == 'tex' else f'{datetime.now():%Y-%m-%d}', 'mode': 'vimwiki', } # split the text into paragraph blocks lines = outstr.split('\n') paragraphs = [] paragraph = [] for line in lines: if line == '': if paragraph == []: continue else: paragraphs.append(paragraph) paragraph = [] else: paragraph.append(line) # check the first paragraph for a deffinition list, and parse that # into variables for i in range(len(paragraphs[0])): line = paragraphs[0][i] if m := re.match('^(\w+)\s*::\s*(.*)', line): if m[1] == 'date': dt = dtparser.parse(m[2]) if mode == 'tex': options['date'] = f'{dt:%Y--%m--%d}' else: options['date'] = f'{dt:%Y-%m-%d}' else: options[m[1]] = m[2] else: # remove handled lines, keep remaining lines in paragraphs list paragraphs[0] = paragraphs[0][i:] break else: # remove first paragraph from paragraphs list paragraphs = paragraphs[1:] if mode == 'tex': output.write(f""" \\title{{{heading}}} \\date{{{options['date']}}} \\author{{{name}}} \\begin{{document}} \\maketitle % \\tableofcontents """) if options['mode'] == 'tex': pass else: output.write('\\begin{verbatim}\n') elif mode == 'txt': d = options['date'] output.write(f'\n{heading.center(40)}\n{d.center(40)}\n') for paragraph in paragraphs: output.write('\n'.join(paragraph)) output.write('\n\n') # output.write(outstr + '\n') if mode == 'tex': if options['mode'] == 'tex': pass else: output.write('\\end{verbatim}\n') output.write('\\appendix\n') output.write('\n'.join(bilagor)) if mode == 'tex': output.write('\\end{document}')