"""
Loads all puppet files in environment, parse them, and store the
parsed data in the database.
"""

# TODO write propper tests
# Which escpecially tests environments

from typing import Union
import hashlib
import json
import os
import os.path
import subprocess
# import time
import traceback

from sqlalchemy.sql import text

# import pyenc
from pyenc.app.model import db
from pyenc.app import model


Path = Union[str, bytes]


def find(path: Path, **kvs) -> list[bytes]:
    """
    Wrapper around find(1).

    variables:
    path -- base path for the find operation

    key word args:
    any that find(1) takes, but with the leading dash (-) removed.
    """
    cmdline = ['find', path]
    for key, value in kvs.items():
        cmdline.append(f'-{key}')
        cmdline.append(value)
    cmdline.append('-print0')

    cmd = subprocess.run(cmdline, capture_output=True, check=True)
    return (f.decode('UTF-8') for f in cmd.stdout.split(b'\0') if f)


class PuppetParseError(Exception):
    """
    Error holding failure result of `puppet parser dump`.
    """
    def __init__(self, code, msg):
        super().__init__()
        self.code = code
        self.msg = msg

    def __repr__(self):
        return f'PuppetParserError({self.code}, {self.msg})'

    def __str__(self):
        return repr(self)


def puppet_parse(file: Path) -> bytes:
    """
    Runs the external puppet parser, and returns json as bytes.

    Note that this is really slow.

    file -- Path to the file to check
    """
    with subprocess.Popen(
            ['puppet', 'parser', 'dump', '--format', 'json', file],
            text=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE) as cmd:
        if cmd.returncode and cmd.returncode != 0:
            raise PuppetParseError(cmd.returncode, cmd.stderr.read())

        json_data = cmd.stdout.read()

        if (value := cmd.wait()) != 0:
            raise PuppetParseError(value, cmd.stderr.read())

        return json_data


def interpret_file(json_data: dict) -> list[str]:
    """Find all puppet class names in json-representation of file."""
    top = json_data['^']
    if top[0] == 'class':
        tmp = top[1]['#']
        idx = tmp.index('name')
        return [tmp[idx + 1]]
        # print(tmp[idx + 1])
    if top[0] == 'block':
        ret_value = []
        for element in top[1:]:
            if element['^'][0] == 'class':
                tmp = element['^'][1]['#']
                idx = tmp.index('name')
                ret_value.append(tmp[idx + 1])
        return ret_value
    return []


def enumerate_files(path_base, environment):
    """
    Enumerate all puppete files in a puppet environment.

    Updates the database so that the query
    >>> SELECT path FROM puppet_file WHERE environment = :environment
    Returns how the directory tree looks *right now*.
    """
    path = os.path.join(path_base, environment.name)
    files = find(path, type='f', name='*.pp')
    files = [f for f in find(path, type='f', name='*.pp')
             if os.path.basename(os.path.dirname(f)) == 'manifests']

    try:
        for puppet_file in (model.PuppetFile(path=file) for file in files):
            with open(puppet_file.path, 'rb') as f:
                checksum = hashlib.sha256(f.read()).hexdigest()
            # Returns puppet_file.path, relative to path_base

            # This works in at least postgres and sqlite3
            db.engine.execute(text("""
            INSERT INTO puppet_file (path, environment_id, checksum)
            VALUES (:path, :environment, :checksum)
            ON CONFLICT (path, environment_id)
            DO UPDATE SET checksum = EXCLUDED.checksum
            """), {
                'path': os.path.relpath(puppet_file.path, path),
                'environment': environment.id,
                'checksum': checksum,
            })

    finally:
        db.session.commit()


def ensure_environment(name):
    """
    Returns a valid PuppetEnvironment object for the named environment.

    If it already exists in the database the return the existing,
    otherwise create it and return that.
    """
    environment = model \
        .Environment \
        .query \
        .where(model.Environment.name == name) \
        .one_or_none()

    if not environment:
        environment = model.Environment(name=name)
        db.session.add(environment)
        # This also updates our environment object, filling in
        # autogenerated fieldsfields.
        db.session.commit()

    return environment


def run(path_base: Path = '/etc/puppetlabs/code/environments',
        environment_name: str = 'production'):
    """
    Runs the class enumeration.

    Arguments:
    path_base -- Path to where each puppet environment resides
    environment_name -- Which puppet environment to parse
    """

    ### Ensure that we have our environment
    environment = ensure_environment(environment_name)

    ### Identify all puppet files, and note the base of their content
    enumerate_files(path_base, environment)

    ### Find all puppet files which we haven't parsed
    subexpr = model.db.session \
                   .query(model.PuppetFile.path,
                          model.PuppetFile.checksum,
                          # Selects any of the availably environmentns. Since the checksum
                          # is the same the file should also be the same, regardles of
                          # which environment we chose
                          model.db.func.min(model.PuppetFile.environment_id).label('env_id')) \
                   .outerjoin(model.PuppetFileContent,
                              model.PuppetFile.checksum == model.PuppetFileContent.checksum) \
                   .where(model.PuppetFileContent.json == None) \
                   .group_by(model.PuppetFile.checksum,
                             model.PuppetFile.path) \
                   .cte()

    base = model.db.session \
                   .query(subexpr.c.path,
                          subexpr.c.checksum,
                          model.Environment.name) \
                   .join(model.Environment,
                         model.Environment.id == subexpr.c.env_id)

    files = base.all()
    count = base.count()

    db.session.commit()

    # Parse all puppet files, and store their output into pupet_file_content
    try:
        for (i, (path, checksum, env)) in enumerate(files):
            print(f'\x1b[2K{env} {path}')
            print(f'{i} / {count}', end='\r')

            full_path = os.path.join(path_base, env, path)

            try:
                item = puppet_parse(full_path)
            except PuppetParseError as e:
                print(e)
                continue

            # Check that the file we just parsed is the file we
            # expected.
            # NOTE this is technically incorrect, consider
            # | Us                     | Attacker     |
            # |------------------------|--------------|
            # | initial checksum       |              |
            # |                        | replace file |
            # | parse                  |              |
            # |                        | restore file |
            # | second checksum (this) |              |

            with open(full_path, 'rb') as f:
                current_checksum = hashlib.sha256(f.read()).hexdigest()

            if current_checksum != checksum:
                print(f'Checksum changed for {env}/{path}')
                continue

            # File parsed was file we expected to parse, addit to the
            # database
            pfc = model.PuppetFileContent(checksum=checksum,
                                          json=item)
            db.session.add(pfc)

        print('loop finished')
    finally:
        # TODO sqlite fails here, complains that the "database is locked"
        db.session.commit()

    # Interpret the parsed result of all parsed puppet files
    # This takes a few seconds
    for file in model.PuppetFile.query.where(model.PuppetFile.content).all():
        try:
            class_names = interpret_file(json.loads(file.content.json))
            for class_name in class_names:
                db.engine.execute(text("""
                INSERT INTO puppet_class (name)
                VALUES (:name)
                ON CONFLICT (name) DO NOTHING
                """), {'name': class_name})

                # Add class to environment (if not already there)
                # TODO this adds to much
                db.engine.execute(text("""
                INSERT INTO environment_classes (environment_id, class_id)
                SELECT :env, id FROM puppet_class WHERE puppet_class.name = :name
                ON CONFLICT (environment_id, class_id) DO NOTHING
                """), {'env': environment.id, 'name': class_name})

                # Add class to file mapping (if not already there)
                db.engine.execute(text("""
                INSERT INTO class_files (file_id, class_id)
                SELECT :file, id FROM puppet_class WHERE puppet_class.name = :name
                ON CONFLICT (file_id, class_id) DO NOTHING
                """), {'file': file.id, 'name': class_name})
        except Exception as e:
            print(f'Error for {file.id} ({file.path}) - {e}')
            traceback.print_exc()

    db.session.commit()


def gc_puppet_files():
    """
    Remove unused puppet file content.

    Removes all puppet file contents which no longer has an "owning" file.
    """

    db.engine.execute(text("""
    DELETE FROM puppet_file_content WHERE pfc.id IN
    ( SELECT pfc.id FROM puppet_file_content pfc
      LEFT JOIN puppet_file f ON pfc.checksum = f.checksum
      WHERE f.id IS NULL
    ) """))