From ba1516c255f7167b696ca7f4c6db2846cf6bb046 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Mon, 8 Aug 2022 04:41:11 +0200 Subject: work --- .pylintrc | 9 +- import_yaml.py | 12 +- pyenc/__init__.py | 42 +++--- pyenc/api.py | 78 ---------- pyenc/app/__init__.py | 0 pyenc/app/api.py | 117 +++++++++++++++ pyenc/app/cmdline.py | 37 +++++ pyenc/app/model.py | 191 +++++++++++++++++++++++ pyenc/cmdline.py | 29 ---- pyenc/enc.py | 4 +- pyenc/enumerate_classes.py | 325 +++++++++++++++++++++++----------------- pyenc/model.py | 123 --------------- pyenc/templates/start_page.html | 2 +- setup.cfg | 6 + 14 files changed, 572 insertions(+), 403 deletions(-) delete mode 100644 pyenc/api.py create mode 100644 pyenc/app/__init__.py create mode 100644 pyenc/app/api.py create mode 100644 pyenc/app/cmdline.py create mode 100644 pyenc/app/model.py delete mode 100644 pyenc/cmdline.py delete mode 100644 pyenc/model.py create mode 100644 setup.cfg diff --git a/.pylintrc b/.pylintrc index 85507dc..d5a8416 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,6 +1,11 @@ + [MESSAGES CONTROL] disable=consider-using-f-string, - missing-module-docstring + missing-module-docstring, + too-few-public-methods, # False positives for db.Model +ignored-classes=SQLAlchemy, + SQLObject, + scoped_session [FORMAT] -good-names=f,i +good-names=f,i,e diff --git a/import_yaml.py b/import_yaml.py index 35558c8..d602cad 100755 --- a/import_yaml.py +++ b/import_yaml.py @@ -2,12 +2,10 @@ """Import extisting nodes.yaml into database""" -import json import yaml import pyenc -from pyenc.db import db -import pyenc.model as model +from pyenc.app import model app = pyenc.create_app() app.app_context().push() @@ -21,8 +19,10 @@ for fqdn, val in data.items(): h = model.Host.query.where(model.Host.fqdn == fqdn).first() if not h: h = model.Host(fqdn=fqdn) - h.environment = val.get('environment') - print(h) + h.environment = model.PuppetEnvironment \ + .query \ + .where(model.PuppetEnvironment.name == val.get('environment', 'production')) \ + .one() classes = val['classes'] if type(classes) == dict: @@ -34,4 +34,4 @@ for fqdn, val in data.items(): h.classes.extend(cls) -db.session.commit() +model.db.session.commit() diff --git a/pyenc/__init__.py b/pyenc/__init__.py index 7249936..c5f5ada 100644 --- a/pyenc/__init__.py +++ b/pyenc/__init__.py @@ -1,22 +1,29 @@ -"""App object setup for application.""" +""" +App object setup for application. +Contains the create_app() procedure for instansiating new Flask app +ins. Settings are instanciated from instance/settings.py, and extra +functionallity is pulled in from other modules. +""" + +import logging import random -import json -import yaml import flask from flask import ( Flask, request, - Response, flash, redirect, url_for ) -from . import model -from . import cmdline -from . import api +from .app import model +from .app import cmdline +from .app import api + + +logging.basicConfig(level=logging.DEBUG) def create_app(): @@ -32,12 +39,11 @@ def create_app(): app.config.from_pyfile('settings.py') - model.db.init_app(app) - for module in [ + model, cmdline, api, - ]: + ]: module.init_app(app) # not API @@ -58,20 +64,6 @@ def create_app(): flash('Classes removed') return redirect(url_for('root_page')) - - # @app.route('/enc') - # def enc(): - # fqdn = request.args.get('fqdn', 'default') - # host = model.Host.query.where(model.Host.fqdn == fqdn).first() - # if not host: - # return Response(f"No host with name {fqdn}", - # status=404) - - # out = { - # 'environment': host.environment, - # 'classes': [cls.class_name for cls in host.classes], - # } - # return Response(yaml.dump(out), - # mimetype='application/x-yaml') + # a /enc route for the classifier might be a good idea return app diff --git a/pyenc/api.py b/pyenc/api.py deleted file mode 100644 index 8fea554..0000000 --- a/pyenc/api.py +++ /dev/null @@ -1,78 +0,0 @@ -import json - -import flask -from flask import ( - Blueprint, - Response, - request, - url_for - ) -from . import model -# from . import db - -api = Blueprint('api', __name__) -# API -@api.route('/list-classes') -def list_classes(): - """Return all classes fuzzy matching q.""" - query = request.args.get('q', '') - wildcarded_query = '%{}%'.format('%'.join(query.split(' '))) - - results = \ - model \ - .PuppetClass \ - .query \ - .where(model.PuppetClass.class_name.like(wildcarded_query)) \ - .all() - print(wildcarded_query) - return Response(json.dumps([x.class_name for x in results]), - mimetype='application/json') - -@api.route('/classes-for') -def classes_for(): - """Return classes mapped to host `fqdn'.""" - fqdn = request.args.get('fqdn') - classes = [cls.class_name - for cls in model.Host.query.where(model.Host.fqdn == fqdn) - .first().classes] - return Response(json.dumps(classes), - mimetype='application/json') - -@api.route('/change-classes', methods=['POST']) -def change_classes(): - """ - Update set of active classes for node. - - Takes a json object as the POST body, which should have the keys - - fqdn :: which host to operate on - - removed :: classes which should be removed - - added :: classes which should be added - """ - j = request.json - host = model.Host.query.where(model.Host.fqdn == j['fqdn']).first() - remove_set = set(j['removed']) - - new_cls = [] - for cls in host.classes: - if cls.class_name in remove_set: - continue - new_cls.append(cls) - host.classes = new_cls - - cls = model.PuppetClass.query \ - .where(model.PuppetClass.class_name.in_(j['added'])) \ - .all() - host.classes.extend(cls) - # print(remove_set, db.db.session.dirty) - return flask.redirect(url_for('classes_for', fqdn=j['fqdn'])) - -@api.route('/hosts') -def list_hosts(): - """Return all hosts.""" - data = [x.serialize() for x in model.Host.query.all()] - return Response(json.dumps(data), - mimetype='application/json') - -def init_app(app): - """Register blueprint to app.""" - app.register_blueprint(api) diff --git a/pyenc/app/__init__.py b/pyenc/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pyenc/app/api.py b/pyenc/app/api.py new file mode 100644 index 0000000..a8a37d9 --- /dev/null +++ b/pyenc/app/api.py @@ -0,0 +1,117 @@ +import flask +from flask import ( + Blueprint, + request, + url_for +) + +from . import model + + +def concatenate(listlist): + result = [] + for lst in listlist: + result.extend(lst) + return result + + +api = Blueprint('api', __name__) + + +@api.route('/classes') +def list_classes(): + """Return all classes fuzzy matching q.""" + environment = request.args.get('environment', 'production') + query = request.args.get('q', '') + wildcarded_query = '%{}%'.format('%'.join(query.split(' '))) + + result = model.db.engine.execute(model.db.text(""" + SELECT pc.class_name + FROM environment_classes ec + LEFT JOIN puppet_environment e ON ec.environment_id = e.id + LEFT JOIN puppet_class pc ON ec.class_id = pc.id + WHERE e.name = :environment + AND pc.class_name LIKE :wildcard + """), { + 'environment': environment, + 'wildcard': wildcarded_query, + }) + + return flask.json.jsonify([x for (x,) in result]) + + # results = \ + # model \ + # .PuppetClass \ + # .query \ + # .where(model.PuppetClass.class_name.like(wildcarded_query)) \ + # .where(model.PuppetClass.environments.name == environment) \ + # .all() + # print(wildcarded_query) + # return flask.json.jsonify([x.class_name for x in results]) + + +@api.route('/environments') +def list_environments(): + envs = model.PuppetEnvironment.query.all() + return flask.json.jsonify([env.name for env in envs]) + + +@api.route('/class-file') +def class_file(): + class_name = request.args.get('class') + result = model.PuppetClass.query \ + .where(model.PuppetClass.class_name == class_name) \ + .all() + return flask.json.jsonify(concatenate([ + [f.path for f in x.files] + for x in result])) + + +@api.route('/hosts') +def hosts(): + result = model.Host.query.all() + return flask.json.jsonify([x.fqdn for x in result]) + + +@api.route('/classes-for') +def classes_for(): + """Return classes mapped to host `fqdn'.""" + fqdn = request.args.get('fqdn') + classes = [cls.class_name + for cls in model.Host.query.where(model.Host.fqdn == fqdn) + .first().classes] + return flask.json.jsonify(classes) + + +@api.route('/change-classes', methods=['POST']) +def change_classes(): + """ + Update set of active classes for node. + + Takes a json object as the POST body, which should have the keys + - fqdn :: which host to operate on + - removed :: classes which should be removed + - added :: classes which should be added + """ + j = request.json + host = model.Host.query.where(model.Host.fqdn == j['fqdn']).first() + remove_set = set(j['removed']) + + new_cls = [] + for cls in host.classes: + if cls.class_name in remove_set: + continue + new_cls.append(cls) + host.classes = new_cls + + cls = model.PuppetClass.query \ + .where(model.PuppetClass.class_name.in_(j['added'])) \ + .all() + host.classes.extend(cls) + # print(remove_set, db.db.session.dirty) + return flask.redirect(url_for('classes_for', fqdn=j['fqdn'])) + + +def init_app(app): + """Register blueprint to app.""" + app.register_blueprint(api, url_prefix='/api') diff --git a/pyenc/app/cmdline.py b/pyenc/app/cmdline.py new file mode 100644 index 0000000..4e7e33f --- /dev/null +++ b/pyenc/app/cmdline.py @@ -0,0 +1,37 @@ +import click +from flask.cli import AppGroup + +app_group = AppGroup('user', help="Testt") + + +@app_group.command('enc') +@click.argument('fqdn') +def enc(fqdn): + from pyenc import enc + enc.run_enc(fqdn) + + +@app_group.command('init-db') +def initialize_database(): + from pyenc.app import model + model.db.create_all() + # model.db.session.add(model.Misc(key='db-version', value='0.1')) + # model.db.session.add(model.Misc(key='default-table', value='production')) + # model.db.session.commit() + + +@app_group.command('enumerate-classes') +@click.argument('environment') +def enumerate_classes(environment): + from pyenc import enumerate_classes as enumerate_classes_ + environment_name = environment + # TODO path should be an argument + path_base = '/var/lib/machines/busting/etc/puppetlabs/code/environments/' + enumerate_classes_.run( + path_base=path_base, + environment_name=environment_name) + + +def init_app(app): + """Add command line options to current flask app.""" + app.cli.add_command(app_group) diff --git a/pyenc/app/model.py b/pyenc/app/model.py new file mode 100644 index 0000000..f67cd10 --- /dev/null +++ b/pyenc/app/model.py @@ -0,0 +1,191 @@ +"""Database model for application.""" + +from flask_sqlalchemy import SQLAlchemy + +# db = SQLAlchemy(session_options={"autoflush": False}) +db = SQLAlchemy() + + +def init_app(app): + """Adds database bindings to a Flask App.""" + db.init_app(app) + import logging + # logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) + + +host_classes = db.Table( + 'host_classes', + db.Column('host_id', db.ForeignKey('host.id'), primary_key=True), + db.Column('class_id', db.ForeignKey('puppet_class.id'), primary_key=True), +) + + +# NOTE this is non-final, and might get removed shortly +environment_classes = db.Table( + 'environment_classes', + db.Column('environment_id', db.ForeignKey('puppet_environment.id'), primary_key=True), + db.Column('class_id', db.ForeignKey('puppet_class.id'), primary_key=True), +) + + +class_files = db.Table( + 'class_files', + db.Column('class_id', db.ForeignKey('puppet_class.id'), primary_key=True), + db.Column('file_id', db.ForeignKey('puppet_file.id'), primary_key=True), + db.UniqueConstraint('class_id', 'file_id'), +) + + +# class HostClasses(db.Model): +# __tablename__ = 'host_classes' +# id = db.Column(db.Integer, primary_key=True) +# host_id = db.Column(db.Integer, db.ForeignKey('host.id'), nullable=False) +# class_id = db.Column(db.Integer, db.ForeignKey('puppet_class.id'), nullable=False) + + +class PuppetEnvironment(db.Model): + """ + A puppet environment. + + An enviromnet is a collection of modules, but here we only keep + the files of the modules, in PuppetFile. + """ + __tablename__ = 'puppet_environment' + id = db.Column(db.Integer, primary_key=True) + name = db.Column(db.Text, nullable=False, unique=True) + classes = db.relationship( + 'PuppetClass', + back_populates='environments', + secondary=environment_classes) + hosts = db.relationship( + 'Host', + back_populates='environment') + + +class Host(db.Model): + """ + Single computer. + + A computer has a name (machine.example.com.), an environment + (production) and a list of puppet classes. + + (TODO and direct values?) + """ + + __tablename__ = 'host' + id = db.Column(db.Integer, primary_key=True) + fqdn = db.Column(db.Text, nullable=False, unique=True) + environment_id = db.Column(db.Integer, db.ForeignKey(f'{PuppetEnvironment.__tablename__}.id')) + environment = db.relationship('PuppetEnvironment', back_populates='hosts') + # classes = db.relationship('HostClasses', backref='host', lazy='dynamic') + classes = db.relationship( + 'PuppetClass', + back_populates='hosts', + secondary=host_classes) + + def serialize(self): # pylint: disable=missing-function-docstring + return {column.name: self.__getattribute__(column.name) + for column in self.__table__.columns} + + +class PuppetFile(db.Model): + """ + Puppet source code file. + + Keeps track of known puppet files. Each file contains 0 to many + puppet classes. + + Each file is uniquely identified by the pair (path, environment). + """ + + __tablename__ = 'puppet_file' + id = db.Column(db.Integer, primary_key=True) + # Where we found the file (path inside environment) + # e.g. /etc/puppetlabs/code/environments// + path = db.Column(db.Text, nullable=False) + + # Puppet environment this file belongs in + environment = db.Column(db.Integer, + db.ForeignKey(f'{PuppetEnvironment.__tablename__}.id'), + nullable=False) + + # Checksum of the content, should be usable as a key in PuppetFileContent + checksum = db.Column(db.Text, nullable=False) + + # When we last read data into json + last_parse = db.Column(db.Float) + + classes = db.relationship('PuppetClass', + back_populates='files', + secondary=class_files) + content = db.relationship('PuppetFileContent', backref='file') + + __table_args__ = ( + db.UniqueConstraint('path', 'environment'), + ) + + +class PuppetFileContent(db.Model): + """ + (Parsed) contents of puppet source files. + + Separate from PuppetFile since many environments can share files, + and I don't want to store reduntand data. + """ + __tablename__ = 'puppet_file_content' + + id = db.Column(db.Integer, primary_key=True) + + file_id = db.Column(db.Integer, db.ForeignKey(f'{PuppetFile.__tablename__}.id')) + + # Checksum of the original file + checksum = db.Column(db.Text, nullable=False) + + # Output of 'puppet parser dump --format json ' + json = db.Column(db.Text, nullable=False) + + +# TODO class environment mappings? +# - the same class can exist in multiple environmentns +# - the same class in multiple environments might be different +# - the class can come or go when the file is changed (??) +# - when a node changes environment it still has its classes, but they +# refer to something else now + +# Possibly: +# nodes holds its list of classes as a list of strings +# I have tables which maps class names to files per environment + +# What happens when two different environments have separate classes +# which share a name + +class PuppetClass(db.Model): + """ + A puppet class. + + The class itself only keeps track of its name here, and mostly + ensures that only existing classes can be added to a given node/host. + """ + __tablename__ = 'puppet_class' + id = db.Column(db.Integer, primary_key=True) + class_name = db.Column(db.Text, nullable=False, unique=True) + + hosts = db.relationship( + 'Host', + back_populates='classes', + secondary=host_classes) + environments = db.relationship( + 'PuppetEnvironment', + back_populates='classes', + secondary=environment_classes) + files = db.relationship( + 'PuppetFile', + back_populates='classes', + secondary=class_files) + + +class Misc(db.Model): + __tablename__ = 'misc' + id = db.Column(db.Integer, primary_key=True) + key = db.Column(db.Text, nullable=False) + value = db.Column(db.Text) diff --git a/pyenc/cmdline.py b/pyenc/cmdline.py deleted file mode 100644 index 099018d..0000000 --- a/pyenc/cmdline.py +++ /dev/null @@ -1,29 +0,0 @@ -import click -from flask.cli import AppGroup - -app_group = AppGroup('user', help="Testt") - -@app_group.command('enc') -@click.argument('fqdn') -def enc(fqdn): - from . import enc - enc.run_enc(fqdn) - -@app_group.command('init-db') -def initialize_database(): - from . import model - model.db.create_all() - -@app_group.command('enumerate-classes') -def enumerate_classes(): - from . import enumerate_classes - environment_name = 'production' - path_base = '/var/lib/machines/busting/etc/puppetlabs/code/environments/' - enumerate_classes.run( - path_base=path_base, - environment_name=environment_name) - -def init_app(app): - """Add command line options to current flask app.""" - app.cli.add_command(app_group) - diff --git a/pyenc/enc.py b/pyenc/enc.py index 8732af2..3f7a0b2 100644 --- a/pyenc/enc.py +++ b/pyenc/enc.py @@ -5,6 +5,7 @@ Command line entry point for Puppet External Node Classifier (enc). import yaml from . import model + def run_enc(fqdn): """ Run the puppet node classifier. @@ -16,9 +17,8 @@ def run_enc(fqdn): print(f"No host with name {fqdn}") return 1 out = { - 'environment': host.environment, + 'environment': host.environment.name, 'classes': [cls.class_name for cls in host.classes], } print(yaml.dump(out)) return 0 - diff --git a/pyenc/enumerate_classes.py b/pyenc/enumerate_classes.py index 822852e..d4197bd 100644 --- a/pyenc/enumerate_classes.py +++ b/pyenc/enumerate_classes.py @@ -1,44 +1,39 @@ - """ Loads all puppet files in environment, parse them, and store the parsed data in the database. """ +# TODO write propper tests +# Which escpecially tests environments + +from typing import Union import hashlib import json import os import os.path import subprocess -import time -from sqlalchemy.sql import text - -import threading -from threading import Lock, Thread -from queue import Queue +# import time +from sqlalchemy.sql import text # import pyenc -from pyenc.model import db -from pyenc import model +from pyenc.app.model import db +from pyenc.app import model -from typing import Union, Generator - -def with_lock(lock, proc): - try: - lock.acquire() - proc() - finally: - lock.release() +Path = Union[str, bytes] -def call(proc, *args): - proc(*args) +def find(path: Path, **kvs) -> list[bytes]: + """ + Wrapper around find(1). -path = Union[str, bytes] + variables: + path -- base path for the find operation -def find(path: path, **kvs) -> list[bytes]: - """Wrapper around find(1).""" + key word args: + any that find(1) takes, but with the leading dash (-) removed. + """ cmdline = ['find', path] for key, value in kvs.items(): cmdline.append(f'-{key}') @@ -46,10 +41,13 @@ def find(path: path, **kvs) -> list[bytes]: cmdline.append('-print0') cmd = subprocess.run(cmdline, capture_output=True, check=True) - return (f for f in cmd.stdout.split(b'\0') if f) + return (f.decode('UTF-8') for f in cmd.stdout.split(b'\0') if f) class PuppetParseError(Exception): + """ + Error holding failure result of `puppet parser dump`. + """ def __init__(self, code, msg): super().__init__() self.code = code @@ -62,62 +60,39 @@ class PuppetParseError(Exception): return repr(self) -def puppet_parse(file: path) -> bytes: +def puppet_parse(file: Path) -> bytes: + """ + Runs the external puppet parser, and returns json as bytes. + + Note that this is really slow. + + file -- Path to the file to check + """ with subprocess.Popen( ['puppet', 'parser', 'dump', '--format', 'json', file], + text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as cmd: - if cmd.retuncode and cmd.returncode != 0: - raise PuppetParseError(cmd.returncode, cmd.stderr.read().decode('UTF-8')) + if cmd.returncode and cmd.returncode != 0: + raise PuppetParseError(cmd.returncode, cmd.stderr.read()) json_data = cmd.stdout.read() if (value := cmd.wait()) != 0: - raise PuppetParseError(value, cmd.stderr.read().decode('UTF-8')) + raise PuppetParseError(value, cmd.stderr.read()) return json_data -def parse_files(files: list[path]) -> Generator[model.PuppetFile]: - for i, file in enumerate(files): - try: - stat = os.stat(file) - - last_modify = stat.st_mtime - old_object = model.PuppetFile.query \ - .where(model.PuppetFile.path == file) \ - .first() - - if old_object and old_object.last_parse > last_modify: - # file unchanged since our last parse, skip - continue - - print(f'{i}/{len(files)}: {file}') - - if old_object: - puppet_file = old_object - else: - puppet_file = model.PuppetFile(path=file) - puppet_file.last_parse = time.time() - # m.json = puppet_parse(file) - - yield puppet_file - - except PuppetParseError as err: - # TODO cache error - print('Error:', err) - continue - - def interpret_file(json_data: dict) -> list[str]: - """Find all classes in json-representation of file.""" + """Find all puppet class names in json-representation of file.""" top = json_data['^'] if top[0] == 'class': tmp = top[1]['#'] idx = tmp.index('name') return [tmp[idx + 1]] # print(tmp[idx + 1]) - elif top[0] == 'block': + if top[0] == 'block': ret_value = [] for element in top[1:]: if element['^'][0] == 'class': @@ -125,111 +100,187 @@ def interpret_file(json_data: dict) -> list[str]: idx = tmp.index('name') ret_value.append(tmp[idx + 1]) return ret_value - else: - return [] + return [] +def enumerate_files(path_base, environment): + """ + Enumerate all puppete files in a puppet environment. -def enumerate_files(path_base, environment_name): + Updates the database so that the query + >>> SELECT path FROM puppet_file WHERE environment = :environment + Returns how the directory tree looks *right now*. + """ path = os.path.join(path_base, environment.name) - - files = list(find(path, type='f', name='*.pp')) + files = find(path, type='f', name='*.pp') try: - for puppet_file in parse_files(files): + for puppet_file in (model.PuppetFile(path=file) for file in files): with open(puppet_file.path, 'rb') as f: checksum = hashlib.sha256(f.read()).hexdigest() # Returns puppet_file.path, relative to path_base - puppet_file.path = os.path.relpath(puppet_file.path, path.encode('UTF-8')) - # TODO does flask want the whole environment object? - puppet_file.environment = environment.id - puppet_file.checksum = checksum - db.session.add(puppet_file) + + # This works in at least postgres and sqlite3 + db.engine.execute(text(""" + INSERT INTO puppet_file (path, environment, checksum) + VALUES (:path, :environment, :checksum) + ON CONFLICT (path, environment) + DO UPDATE SET checksum = EXCLUDED.checksum + """), { + 'path': os.path.relpath(puppet_file.path, path), + 'environment': environment.id, + 'checksum': checksum, + }) finally: db.session.commit() -def run(path_base: path, environment_name: str): +def ensure_environment(name): + """ + Returns a valid PuppetEnvironment object for the named environment. + + If it already exists in the database the return the existing, + otherwise create it and return that. + """ + environment = model \ + .PuppetEnvironment \ + .query \ + .where(model.PuppetEnvironment.name == name) \ + .one_or_none() - ### Ensure that we have oru environment - environment = model.PuppetEnvironment.query.where(model.PuppetEnvironment.name == environment_name).first() if not environment: - environment = model.PuppetEnvironment(name=environment_name) + environment = model.PuppetEnvironment(name=name) db.session.add(environment) - # TODO does this update the environment object + # This also updates our environment object, filling in + # autogenerated fieldsfields. db.session.commit() - ### Identify all puppet files, and note the base of their content - # enumerate_files(path_base, environment_name) + return environment + +def run(path_base: Path = '/etc/puppetlabs/code/environments', + environment_name: str = 'production'): + """ + Runs the class enumeration. + + Arguments: + path_base -- Path to where each puppet environment resides + environment_name -- Which puppet environment to parse + """ + + ### Ensure that we have our environment + environment = ensure_environment(environment_name) + + ### Identify all puppet files, and note the base of their content + enumerate_files(path_base, environment) ### Find all puppet files which we haven't parsed - result = db.engine.execute(text(""" - SELECT - f.id, - f.path, - f.last_parse, - f.checksum, - env.name - FROM puppet_file f - LEFT OUTER JOIN puppet_file_content c - ON f.checksum = c.checksum - LEFT JOIN puppet_environment env - ON f.environment = env.id - WHERE c.json IS NULL - """)) - - # db_lock = Lock() - threads = [] - q = Queue() - for (id, path, last, checksum, environment) in result: - print(environment, path) - # return - full_path = os.path.join(path_base.encode('UTF-8'), environment.encode('UTF-8'), path) - - with open(full_path, 'rb') as f: - current_checksum = hashlib.sha256(f.read()).hexdigest() - - if current_checksum != checksum: - print(f'Checksum changed for {environment}/{path}') - # db.engine.execute(model.PuppetFile.delete().where(model.PuppetFile.id == id)) - continue - - thread = Thread(target=lambda checksum, full_path: (checksum, puppet_parse(full_path)), - args=(checksum, full_path), - name=f'{environment}/{path}') - thread.start() - threads.append(thread) + base = model.PuppetFile \ + .query \ + .outerjoin(model.PuppetFileContent, + model.PuppetFile.checksum == model.PuppetFileContent.checksum) \ + .where(model.PuppetFileContent.json == None) # noqa: E711 - try: - # for thread in threads: - # print(f'Waiting on {thread.name}') - # thread.join() - # print(f'{thread.name} joined') - while not q.empty(): - print('Getting something from queue') - (checksum, item) = q.get() - print(checksum) - pfc = model.PuppetFileContent(checksum=checksum, json=item) - db.session.add(pfc) - q.task_done() - finally: - db.session.commit() + # count for progress bar + count = base.count() - return + result = base \ + .join(model.PuppetEnvironment) \ + .add_column(model.PuppetEnvironment.name) \ + .all() + db.session.commit() try: - for puppet_file in model.PuppetFile.query.all(): + for (i, (puppet_file, env)) in enumerate(result): + print(env, puppet_file.path) + print(f'{i} / {count}', end='\r') + + full_path = os.path.join(path_base, env, puppet_file.path) + try: - class_names = interpret_file(json.loads(os.path.join(path, puppet_file.json))) - for class_name in class_names: - db.session.add(model.PuppetClass( - class_name=class_name, - comes_from=puppet_file)) - except Exception as e: + item = puppet_parse(full_path) + except PuppetParseError as e: print(e) - print(f'Failed: {puppet_file.path}') + continue + + # Check that the file we just parsed is the file we + # expected. + # NOTE this is technically incorrect, consider + # | Us | Attacker | + # |------------------------|--------------| + # | initial checksum | | + # | | replace file | + # | parse | | + # | | restore file | + # | second checksum (this) | | + + with open(full_path, 'rb') as f: + current_checksum = hashlib.sha256(f.read()).hexdigest() + + if current_checksum != puppet_file.checksum: + print(f'Checksum changed for {env}/{puppet_file.path}') + continue + + # File parsed was file we expected to parse, addit to the + # database + pfc = model.PuppetFileContent(file_id=puppet_file.id, + checksum=puppet_file.checksum, + json=item) + db.session.add(pfc) + + print('loop finished') finally: + # TODO sqlite fails here, complains that the "database is locked" db.session.commit() + + for file_content in model.PuppetFileContent.query.all(): + try: + class_names = interpret_file(json.loads(file_content.json)) + for class_name in class_names: + # cls = model.PuppetClass(class_name=class_name) + # cls.environments.append(environment) + # cls.files.append(file_content.file) + + # Add classs (if not exists) + db.engine.execute(text(""" + INSERT INTO puppet_class (class_name) + VALUES (:name) + ON CONFLICT (class_name) DO NOTHING + """), {'name': class_name}) + + # Add class to environment (if not already there) + db.engine.execute(text(""" + INSERT INTO environment_classes (environment_id, class_id) + SELECT :env, id FROM puppet_class WHERE class_name = :name + ON CONFLICT (environment_id, class_id) DO NOTHING + """), {'env': environment.id, 'name': class_name}) + + # Add class to file mapping (if not already there) + db.engine.execute(text(""" + INSERT INTO class_files (file_id, class_id) + SELECT :file, id FROM puppet_class WHERE class_name = :name + ON CONFLICT (file_id, class_id) DO NOTHING + """), {'file': file_content.file_id, 'name': class_name}) + + except Exception as e: + print(e) + # print(f'Failed: {puppet_file.path}') + + db.session.commit() + + +def gc_puppet_files(): + """ + Remove unused puppet file content. + + Removes all puppet file contents which no longer has an "owning" file. + """ + + db.engine.execute(text(""" + DELETE FROM puppet_file_content WHERE pfc.id IN + ( SELECT pfc.id FROM puppet_file_content pfc + LEFT JOIN puppet_file f ON pfc.checksum = f.checksum + WHERE f.id IS NULL + ) """)) diff --git a/pyenc/model.py b/pyenc/model.py deleted file mode 100644 index e774014..0000000 --- a/pyenc/model.py +++ /dev/null @@ -1,123 +0,0 @@ -"""Database model for application.""" - -from flask_sqlalchemy import SQLAlchemy -import requests -import yaml - -db = SQLAlchemy() - -host_classes = db.Table( - 'host_classes', - db.Column('host_id', db.ForeignKey('host.id')), - db.Column('class_id', db.ForeignKey('puppet_class.id'))) - -# class HostClasses(db.Model): -# __tablename__ = 'host_classes' -# id = db.Column(db.Integer, primary_key=True) -# host_id = db.Column(db.Integer, db.ForeignKey('host.id'), nullable=False) -# class_id = db.Column(db.Integer, db.ForeignKey('puppet_class.id'), nullable=False) - - -class Host(db.Model): - """ - Single computer. - - A computer has a name (machine.example.com.), an environment - (production) and a list of puppet classes. - - (TODO and direct values?) - """ - - __tablename__ = 'host' - id = db.Column(db.Integer, primary_key=True) - fqdn = db.Column(db.Text, nullable=False) - environment = db.Column(db.Text) - # classes = db.relationship('HostClasses', backref='host', lazy='dynamic') - classes = db.relationship( - 'PuppetClass', - back_populates='hosts', - secondary=host_classes) - - def serialize(self): - return {column.name: self.__getattribute__(column.name) - for column in self.__table__.columns} - - -class PuppetEnvironment(db.Model): - """ - A puppet environment. - - An enviromnet is a collection of modules, but here we only keep - the files of the modules, in PuppetFile. - """ - __tablename__ = 'puppet_environment' - id = db.Column(db.Integer, primary_key=True) - name = db.Column(db.Text, nullable=False) - - -class PuppetFile(db.Model): - """ - Puppet source code file. - - Keeps track of known puppet files. Each file contains 0 to many - puppet classes. - - Each file is uniquely identified by the pair (path, environment). - """ - - __tablename__ = 'puppet_file' - id = db.Column(db.Integer, primary_key=True) - # Where we found the file - # TODO normalize this to - path = db.Column(db.Text, nullable=False) - - environment = db.Column(db.Integer, db.ForeignKey(f'{PuppetEnvironment.__tablename__}.id')) - - # Checksum of the content, should be usable as a key in - # PuppetFileContent - # TODO flask weak keys? - checksum = db.Column(db.Text) - - # When we last read data into json - last_parse = db.Column(db.Float) - # classes = db.relationship('PuppetClass', back_populates='comes_from') - classes = db.relationship('PuppetClass', backref='comes_from') - - -class PuppetFileContent(db.Model): - """ - (Parsed) contents of puppet source files. - - Separate from PuppetFile since many environments can share files, - and I don't want to store reduntand data. - """ - __tablename__ = 'puppet_file_content' - - id = db.Column(db.Integer, primary_key=True) - - # Checksum of the original file - checksum = db.Column(db.Text, nullable=False) - - # Output of 'puppet parser dump --format json ' - json = db.Column(db.Text, nullable=False) - - - -class PuppetClass(db.Model): - """ - A puppet class. - - The class itself only keeps track of its name here, and mostyl - ensures that only existing classes can be added to a given node/host. - """ - - __tablename__ = 'puppet_class' - id = db.Column(db.Integer, primary_key=True) - class_name = db.Column(db.Text, nullable=False) - # comes_from = db.relationship('PuppetFile', back_populates='classes') - comes_from_id = db.Column(db.Integer, - db.ForeignKey(f'{PuppetFile.__tablename__}.id')) - hosts = db.relationship( - 'Host', - back_populates='classes', - secondary=host_classes) diff --git a/pyenc/templates/start_page.html b/pyenc/templates/start_page.html index 268f7d1..6186e25 100644 --- a/pyenc/templates/start_page.html +++ b/pyenc/templates/start_page.html @@ -1,7 +1,7 @@ {% extends "base.html" %} {% block content %}

This certainly is a page

-
+
This should be replaced by react
{% for host in hosts %}
diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..029b063 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,6 @@ +[mypy] +plugins = sqlalchemy.ext.mypy.plugin + +[flake8] +max-line-length = 100 +ignore = E266 # Too many leading "#" -- cgit v1.2.3