aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2022-08-08 04:41:11 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2022-08-08 04:41:11 +0200
commitba1516c255f7167b696ca7f4c6db2846cf6bb046 (patch)
treec1794abe263034300efce07bb629cb10edc5ddd5
parentwork (diff)
downloadpuppet-classifier-ba1516c255f7167b696ca7f4c6db2846cf6bb046.tar.gz
puppet-classifier-ba1516c255f7167b696ca7f4c6db2846cf6bb046.tar.xz
work
-rw-r--r--.pylintrc9
-rwxr-xr-ximport_yaml.py12
-rw-r--r--pyenc/__init__.py42
-rw-r--r--pyenc/api.py78
-rw-r--r--pyenc/app/__init__.py0
-rw-r--r--pyenc/app/api.py117
-rw-r--r--pyenc/app/cmdline.py37
-rw-r--r--pyenc/app/model.py191
-rw-r--r--pyenc/cmdline.py29
-rw-r--r--pyenc/enc.py4
-rw-r--r--pyenc/enumerate_classes.py325
-rw-r--r--pyenc/model.py123
-rw-r--r--pyenc/templates/start_page.html2
-rw-r--r--setup.cfg6
14 files changed, 572 insertions, 403 deletions
diff --git a/.pylintrc b/.pylintrc
index 85507dc..d5a8416 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,6 +1,11 @@
+
[MESSAGES CONTROL]
disable=consider-using-f-string,
- missing-module-docstring
+ missing-module-docstring,
+ too-few-public-methods, # False positives for db.Model
+ignored-classes=SQLAlchemy,
+ SQLObject,
+ scoped_session
[FORMAT]
-good-names=f,i
+good-names=f,i,e
diff --git a/import_yaml.py b/import_yaml.py
index 35558c8..d602cad 100755
--- a/import_yaml.py
+++ b/import_yaml.py
@@ -2,12 +2,10 @@
"""Import extisting nodes.yaml into database"""
-import json
import yaml
import pyenc
-from pyenc.db import db
-import pyenc.model as model
+from pyenc.app import model
app = pyenc.create_app()
app.app_context().push()
@@ -21,8 +19,10 @@ for fqdn, val in data.items():
h = model.Host.query.where(model.Host.fqdn == fqdn).first()
if not h:
h = model.Host(fqdn=fqdn)
- h.environment = val.get('environment')
- print(h)
+ h.environment = model.PuppetEnvironment \
+ .query \
+ .where(model.PuppetEnvironment.name == val.get('environment', 'production')) \
+ .one()
classes = val['classes']
if type(classes) == dict:
@@ -34,4 +34,4 @@ for fqdn, val in data.items():
h.classes.extend(cls)
-db.session.commit()
+model.db.session.commit()
diff --git a/pyenc/__init__.py b/pyenc/__init__.py
index 7249936..c5f5ada 100644
--- a/pyenc/__init__.py
+++ b/pyenc/__init__.py
@@ -1,22 +1,29 @@
-"""App object setup for application."""
+"""
+App object setup for application.
+Contains the create_app() procedure for instansiating new Flask app
+ins. Settings are instanciated from instance/settings.py, and extra
+functionallity is pulled in from other modules.
+"""
+
+import logging
import random
-import json
-import yaml
import flask
from flask import (
Flask,
request,
- Response,
flash,
redirect,
url_for
)
-from . import model
-from . import cmdline
-from . import api
+from .app import model
+from .app import cmdline
+from .app import api
+
+
+logging.basicConfig(level=logging.DEBUG)
def create_app():
@@ -32,12 +39,11 @@ def create_app():
app.config.from_pyfile('settings.py')
- model.db.init_app(app)
-
for module in [
+ model,
cmdline,
api,
- ]:
+ ]:
module.init_app(app)
# not API
@@ -58,20 +64,6 @@ def create_app():
flash('Classes removed')
return redirect(url_for('root_page'))
-
- # @app.route('/enc')
- # def enc():
- # fqdn = request.args.get('fqdn', 'default')
- # host = model.Host.query.where(model.Host.fqdn == fqdn).first()
- # if not host:
- # return Response(f"No host with name {fqdn}",
- # status=404)
-
- # out = {
- # 'environment': host.environment,
- # 'classes': [cls.class_name for cls in host.classes],
- # }
- # return Response(yaml.dump(out),
- # mimetype='application/x-yaml')
+ # a /enc route for the classifier might be a good idea
return app
diff --git a/pyenc/api.py b/pyenc/api.py
deleted file mode 100644
index 8fea554..0000000
--- a/pyenc/api.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import json
-
-import flask
-from flask import (
- Blueprint,
- Response,
- request,
- url_for
- )
-from . import model
-# from . import db
-
-api = Blueprint('api', __name__)
-# API
-@api.route('/list-classes')
-def list_classes():
- """Return all classes fuzzy matching q."""
- query = request.args.get('q', '')
- wildcarded_query = '%{}%'.format('%'.join(query.split(' ')))
-
- results = \
- model \
- .PuppetClass \
- .query \
- .where(model.PuppetClass.class_name.like(wildcarded_query)) \
- .all()
- print(wildcarded_query)
- return Response(json.dumps([x.class_name for x in results]),
- mimetype='application/json')
-
-@api.route('/classes-for')
-def classes_for():
- """Return classes mapped to host `fqdn'."""
- fqdn = request.args.get('fqdn')
- classes = [cls.class_name
- for cls in model.Host.query.where(model.Host.fqdn == fqdn)
- .first().classes]
- return Response(json.dumps(classes),
- mimetype='application/json')
-
-@api.route('/change-classes', methods=['POST'])
-def change_classes():
- """
- Update set of active classes for node.
-
- Takes a json object as the POST body, which should have the keys
- - fqdn :: which host to operate on
- - removed :: classes which should be removed
- - added :: classes which should be added
- """
- j = request.json
- host = model.Host.query.where(model.Host.fqdn == j['fqdn']).first()
- remove_set = set(j['removed'])
-
- new_cls = []
- for cls in host.classes:
- if cls.class_name in remove_set:
- continue
- new_cls.append(cls)
- host.classes = new_cls
-
- cls = model.PuppetClass.query \
- .where(model.PuppetClass.class_name.in_(j['added'])) \
- .all()
- host.classes.extend(cls)
- # print(remove_set, db.db.session.dirty)
- return flask.redirect(url_for('classes_for', fqdn=j['fqdn']))
-
-@api.route('/hosts')
-def list_hosts():
- """Return all hosts."""
- data = [x.serialize() for x in model.Host.query.all()]
- return Response(json.dumps(data),
- mimetype='application/json')
-
-def init_app(app):
- """Register blueprint to app."""
- app.register_blueprint(api)
diff --git a/pyenc/app/__init__.py b/pyenc/app/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/pyenc/app/__init__.py
diff --git a/pyenc/app/api.py b/pyenc/app/api.py
new file mode 100644
index 0000000..a8a37d9
--- /dev/null
+++ b/pyenc/app/api.py
@@ -0,0 +1,117 @@
+import flask
+from flask import (
+ Blueprint,
+ request,
+ url_for
+)
+
+from . import model
+
+
+def concatenate(listlist):
+ result = []
+ for lst in listlist:
+ result.extend(lst)
+ return result
+
+
+api = Blueprint('api', __name__)
+
+
+@api.route('/classes')
+def list_classes():
+ """Return all classes fuzzy matching q."""
+ environment = request.args.get('environment', 'production')
+ query = request.args.get('q', '')
+ wildcarded_query = '%{}%'.format('%'.join(query.split(' ')))
+
+ result = model.db.engine.execute(model.db.text("""
+ SELECT pc.class_name
+ FROM environment_classes ec
+ LEFT JOIN puppet_environment e ON ec.environment_id = e.id
+ LEFT JOIN puppet_class pc ON ec.class_id = pc.id
+ WHERE e.name = :environment
+ AND pc.class_name LIKE :wildcard
+ """), {
+ 'environment': environment,
+ 'wildcard': wildcarded_query,
+ })
+
+ return flask.json.jsonify([x for (x,) in result])
+
+ # results = \
+ # model \
+ # .PuppetClass \
+ # .query \
+ # .where(model.PuppetClass.class_name.like(wildcarded_query)) \
+ # .where(model.PuppetClass.environments.name == environment) \
+ # .all()
+ # print(wildcarded_query)
+ # return flask.json.jsonify([x.class_name for x in results])
+
+
+@api.route('/environments')
+def list_environments():
+ envs = model.PuppetEnvironment.query.all()
+ return flask.json.jsonify([env.name for env in envs])
+
+
+@api.route('/class-file')
+def class_file():
+ class_name = request.args.get('class')
+ result = model.PuppetClass.query \
+ .where(model.PuppetClass.class_name == class_name) \
+ .all()
+ return flask.json.jsonify(concatenate([
+ [f.path for f in x.files]
+ for x in result]))
+
+
+@api.route('/hosts')
+def hosts():
+ result = model.Host.query.all()
+ return flask.json.jsonify([x.fqdn for x in result])
+
+
+@api.route('/classes-for')
+def classes_for():
+ """Return classes mapped to host `fqdn'."""
+ fqdn = request.args.get('fqdn')
+ classes = [cls.class_name
+ for cls in model.Host.query.where(model.Host.fqdn == fqdn)
+ .first().classes]
+ return flask.json.jsonify(classes)
+
+
+@api.route('/change-classes', methods=['POST'])
+def change_classes():
+ """
+ Update set of active classes for node.
+
+ Takes a json object as the POST body, which should have the keys
+ - fqdn :: which host to operate on
+ - removed :: classes which should be removed
+ - added :: classes which should be added
+ """
+ j = request.json
+ host = model.Host.query.where(model.Host.fqdn == j['fqdn']).first()
+ remove_set = set(j['removed'])
+
+ new_cls = []
+ for cls in host.classes:
+ if cls.class_name in remove_set:
+ continue
+ new_cls.append(cls)
+ host.classes = new_cls
+
+ cls = model.PuppetClass.query \
+ .where(model.PuppetClass.class_name.in_(j['added'])) \
+ .all()
+ host.classes.extend(cls)
+ # print(remove_set, db.db.session.dirty)
+ return flask.redirect(url_for('classes_for', fqdn=j['fqdn']))
+
+
+def init_app(app):
+ """Register blueprint to app."""
+ app.register_blueprint(api, url_prefix='/api')
diff --git a/pyenc/app/cmdline.py b/pyenc/app/cmdline.py
new file mode 100644
index 0000000..4e7e33f
--- /dev/null
+++ b/pyenc/app/cmdline.py
@@ -0,0 +1,37 @@
+import click
+from flask.cli import AppGroup
+
+app_group = AppGroup('user', help="Testt")
+
+
+@app_group.command('enc')
+@click.argument('fqdn')
+def enc(fqdn):
+ from pyenc import enc
+ enc.run_enc(fqdn)
+
+
+@app_group.command('init-db')
+def initialize_database():
+ from pyenc.app import model
+ model.db.create_all()
+ # model.db.session.add(model.Misc(key='db-version', value='0.1'))
+ # model.db.session.add(model.Misc(key='default-table', value='production'))
+ # model.db.session.commit()
+
+
+@app_group.command('enumerate-classes')
+@click.argument('environment')
+def enumerate_classes(environment):
+ from pyenc import enumerate_classes as enumerate_classes_
+ environment_name = environment
+ # TODO path should be an argument
+ path_base = '/var/lib/machines/busting/etc/puppetlabs/code/environments/'
+ enumerate_classes_.run(
+ path_base=path_base,
+ environment_name=environment_name)
+
+
+def init_app(app):
+ """Add command line options to current flask app."""
+ app.cli.add_command(app_group)
diff --git a/pyenc/app/model.py b/pyenc/app/model.py
new file mode 100644
index 0000000..f67cd10
--- /dev/null
+++ b/pyenc/app/model.py
@@ -0,0 +1,191 @@
+"""Database model for application."""
+
+from flask_sqlalchemy import SQLAlchemy
+
+# db = SQLAlchemy(session_options={"autoflush": False})
+db = SQLAlchemy()
+
+
+def init_app(app):
+ """Adds database bindings to a Flask App."""
+ db.init_app(app)
+ import logging
+ # logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
+
+
+host_classes = db.Table(
+ 'host_classes',
+ db.Column('host_id', db.ForeignKey('host.id'), primary_key=True),
+ db.Column('class_id', db.ForeignKey('puppet_class.id'), primary_key=True),
+)
+
+
+# NOTE this is non-final, and might get removed shortly
+environment_classes = db.Table(
+ 'environment_classes',
+ db.Column('environment_id', db.ForeignKey('puppet_environment.id'), primary_key=True),
+ db.Column('class_id', db.ForeignKey('puppet_class.id'), primary_key=True),
+)
+
+
+class_files = db.Table(
+ 'class_files',
+ db.Column('class_id', db.ForeignKey('puppet_class.id'), primary_key=True),
+ db.Column('file_id', db.ForeignKey('puppet_file.id'), primary_key=True),
+ db.UniqueConstraint('class_id', 'file_id'),
+)
+
+
+# class HostClasses(db.Model):
+# __tablename__ = 'host_classes'
+# id = db.Column(db.Integer, primary_key=True)
+# host_id = db.Column(db.Integer, db.ForeignKey('host.id'), nullable=False)
+# class_id = db.Column(db.Integer, db.ForeignKey('puppet_class.id'), nullable=False)
+
+
+class PuppetEnvironment(db.Model):
+ """
+ A puppet environment.
+
+ An enviromnet is a collection of modules, but here we only keep
+ the files of the modules, in PuppetFile.
+ """
+ __tablename__ = 'puppet_environment'
+ id = db.Column(db.Integer, primary_key=True)
+ name = db.Column(db.Text, nullable=False, unique=True)
+ classes = db.relationship(
+ 'PuppetClass',
+ back_populates='environments',
+ secondary=environment_classes)
+ hosts = db.relationship(
+ 'Host',
+ back_populates='environment')
+
+
+class Host(db.Model):
+ """
+ Single computer.
+
+ A computer has a name (machine.example.com.), an environment
+ (production) and a list of puppet classes.
+
+ (TODO and direct values?)
+ """
+
+ __tablename__ = 'host'
+ id = db.Column(db.Integer, primary_key=True)
+ fqdn = db.Column(db.Text, nullable=False, unique=True)
+ environment_id = db.Column(db.Integer, db.ForeignKey(f'{PuppetEnvironment.__tablename__}.id'))
+ environment = db.relationship('PuppetEnvironment', back_populates='hosts')
+ # classes = db.relationship('HostClasses', backref='host', lazy='dynamic')
+ classes = db.relationship(
+ 'PuppetClass',
+ back_populates='hosts',
+ secondary=host_classes)
+
+ def serialize(self): # pylint: disable=missing-function-docstring
+ return {column.name: self.__getattribute__(column.name)
+ for column in self.__table__.columns}
+
+
+class PuppetFile(db.Model):
+ """
+ Puppet source code file.
+
+ Keeps track of known puppet files. Each file contains 0 to many
+ puppet classes.
+
+ Each file is uniquely identified by the pair (path, environment).
+ """
+
+ __tablename__ = 'puppet_file'
+ id = db.Column(db.Integer, primary_key=True)
+ # Where we found the file (path inside environment)
+ # e.g. /etc/puppetlabs/code/environments/<environment name>/<this path>
+ path = db.Column(db.Text, nullable=False)
+
+ # Puppet environment this file belongs in
+ environment = db.Column(db.Integer,
+ db.ForeignKey(f'{PuppetEnvironment.__tablename__}.id'),
+ nullable=False)
+
+ # Checksum of the content, should be usable as a key in PuppetFileContent
+ checksum = db.Column(db.Text, nullable=False)
+
+ # When we last read data into json
+ last_parse = db.Column(db.Float)
+
+ classes = db.relationship('PuppetClass',
+ back_populates='files',
+ secondary=class_files)
+ content = db.relationship('PuppetFileContent', backref='file')
+
+ __table_args__ = (
+ db.UniqueConstraint('path', 'environment'),
+ )
+
+
+class PuppetFileContent(db.Model):
+ """
+ (Parsed) contents of puppet source files.
+
+ Separate from PuppetFile since many environments can share files,
+ and I don't want to store reduntand data.
+ """
+ __tablename__ = 'puppet_file_content'
+
+ id = db.Column(db.Integer, primary_key=True)
+
+ file_id = db.Column(db.Integer, db.ForeignKey(f'{PuppetFile.__tablename__}.id'))
+
+ # Checksum of the original file
+ checksum = db.Column(db.Text, nullable=False)
+
+ # Output of 'puppet parser dump --format json <filename>'
+ json = db.Column(db.Text, nullable=False)
+
+
+# TODO class environment mappings?
+# - the same class can exist in multiple environmentns
+# - the same class in multiple environments might be different
+# - the class can come or go when the file is changed (??)
+# - when a node changes environment it still has its classes, but they
+# refer to something else now
+
+# Possibly:
+# nodes holds its list of classes as a list of strings
+# I have tables which maps class names to files per environment
+
+# What happens when two different environments have separate classes
+# which share a name
+
+class PuppetClass(db.Model):
+ """
+ A puppet class.
+
+ The class itself only keeps track of its name here, and mostly
+ ensures that only existing classes can be added to a given node/host.
+ """
+ __tablename__ = 'puppet_class'
+ id = db.Column(db.Integer, primary_key=True)
+ class_name = db.Column(db.Text, nullable=False, unique=True)
+
+ hosts = db.relationship(
+ 'Host',
+ back_populates='classes',
+ secondary=host_classes)
+ environments = db.relationship(
+ 'PuppetEnvironment',
+ back_populates='classes',
+ secondary=environment_classes)
+ files = db.relationship(
+ 'PuppetFile',
+ back_populates='classes',
+ secondary=class_files)
+
+
+class Misc(db.Model):
+ __tablename__ = 'misc'
+ id = db.Column(db.Integer, primary_key=True)
+ key = db.Column(db.Text, nullable=False)
+ value = db.Column(db.Text)
diff --git a/pyenc/cmdline.py b/pyenc/cmdline.py
deleted file mode 100644
index 099018d..0000000
--- a/pyenc/cmdline.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import click
-from flask.cli import AppGroup
-
-app_group = AppGroup('user', help="Testt")
-
-@app_group.command('enc')
-@click.argument('fqdn')
-def enc(fqdn):
- from . import enc
- enc.run_enc(fqdn)
-
-@app_group.command('init-db')
-def initialize_database():
- from . import model
- model.db.create_all()
-
-@app_group.command('enumerate-classes')
-def enumerate_classes():
- from . import enumerate_classes
- environment_name = 'production'
- path_base = '/var/lib/machines/busting/etc/puppetlabs/code/environments/'
- enumerate_classes.run(
- path_base=path_base,
- environment_name=environment_name)
-
-def init_app(app):
- """Add command line options to current flask app."""
- app.cli.add_command(app_group)
-
diff --git a/pyenc/enc.py b/pyenc/enc.py
index 8732af2..3f7a0b2 100644
--- a/pyenc/enc.py
+++ b/pyenc/enc.py
@@ -5,6 +5,7 @@ Command line entry point for Puppet External Node Classifier (enc).
import yaml
from . import model
+
def run_enc(fqdn):
"""
Run the puppet node classifier.
@@ -16,9 +17,8 @@ def run_enc(fqdn):
print(f"No host with name {fqdn}")
return 1
out = {
- 'environment': host.environment,
+ 'environment': host.environment.name,
'classes': [cls.class_name for cls in host.classes],
}
print(yaml.dump(out))
return 0
-
diff --git a/pyenc/enumerate_classes.py b/pyenc/enumerate_classes.py
index 822852e..d4197bd 100644
--- a/pyenc/enumerate_classes.py
+++ b/pyenc/enumerate_classes.py
@@ -1,44 +1,39 @@
-
"""
Loads all puppet files in environment, parse them, and store the
parsed data in the database.
"""
+# TODO write propper tests
+# Which escpecially tests environments
+
+from typing import Union
import hashlib
import json
import os
import os.path
import subprocess
-import time
-from sqlalchemy.sql import text
-
-import threading
-from threading import Lock, Thread
-from queue import Queue
+# import time
+from sqlalchemy.sql import text
# import pyenc
-from pyenc.model import db
-from pyenc import model
+from pyenc.app.model import db
+from pyenc.app import model
-from typing import Union, Generator
-
-def with_lock(lock, proc):
- try:
- lock.acquire()
- proc()
- finally:
- lock.release()
+Path = Union[str, bytes]
-def call(proc, *args):
- proc(*args)
+def find(path: Path, **kvs) -> list[bytes]:
+ """
+ Wrapper around find(1).
-path = Union[str, bytes]
+ variables:
+ path -- base path for the find operation
-def find(path: path, **kvs) -> list[bytes]:
- """Wrapper around find(1)."""
+ key word args:
+ any that find(1) takes, but with the leading dash (-) removed.
+ """
cmdline = ['find', path]
for key, value in kvs.items():
cmdline.append(f'-{key}')
@@ -46,10 +41,13 @@ def find(path: path, **kvs) -> list[bytes]:
cmdline.append('-print0')
cmd = subprocess.run(cmdline, capture_output=True, check=True)
- return (f for f in cmd.stdout.split(b'\0') if f)
+ return (f.decode('UTF-8') for f in cmd.stdout.split(b'\0') if f)
class PuppetParseError(Exception):
+ """
+ Error holding failure result of `puppet parser dump`.
+ """
def __init__(self, code, msg):
super().__init__()
self.code = code
@@ -62,62 +60,39 @@ class PuppetParseError(Exception):
return repr(self)
-def puppet_parse(file: path) -> bytes:
+def puppet_parse(file: Path) -> bytes:
+ """
+ Runs the external puppet parser, and returns json as bytes.
+
+ Note that this is really slow.
+
+ file -- Path to the file to check
+ """
with subprocess.Popen(
['puppet', 'parser', 'dump', '--format', 'json', file],
+ text=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE) as cmd:
- if cmd.retuncode and cmd.returncode != 0:
- raise PuppetParseError(cmd.returncode, cmd.stderr.read().decode('UTF-8'))
+ if cmd.returncode and cmd.returncode != 0:
+ raise PuppetParseError(cmd.returncode, cmd.stderr.read())
json_data = cmd.stdout.read()
if (value := cmd.wait()) != 0:
- raise PuppetParseError(value, cmd.stderr.read().decode('UTF-8'))
+ raise PuppetParseError(value, cmd.stderr.read())
return json_data
-def parse_files(files: list[path]) -> Generator[model.PuppetFile]:
- for i, file in enumerate(files):
- try:
- stat = os.stat(file)
-
- last_modify = stat.st_mtime
- old_object = model.PuppetFile.query \
- .where(model.PuppetFile.path == file) \
- .first()
-
- if old_object and old_object.last_parse > last_modify:
- # file unchanged since our last parse, skip
- continue
-
- print(f'{i}/{len(files)}: {file}')
-
- if old_object:
- puppet_file = old_object
- else:
- puppet_file = model.PuppetFile(path=file)
- puppet_file.last_parse = time.time()
- # m.json = puppet_parse(file)
-
- yield puppet_file
-
- except PuppetParseError as err:
- # TODO cache error
- print('Error:', err)
- continue
-
-
def interpret_file(json_data: dict) -> list[str]:
- """Find all classes in json-representation of file."""
+ """Find all puppet class names in json-representation of file."""
top = json_data['^']
if top[0] == 'class':
tmp = top[1]['#']
idx = tmp.index('name')
return [tmp[idx + 1]]
# print(tmp[idx + 1])
- elif top[0] == 'block':
+ if top[0] == 'block':
ret_value = []
for element in top[1:]:
if element['^'][0] == 'class':
@@ -125,111 +100,187 @@ def interpret_file(json_data: dict) -> list[str]:
idx = tmp.index('name')
ret_value.append(tmp[idx + 1])
return ret_value
- else:
- return []
+ return []
+def enumerate_files(path_base, environment):
+ """
+ Enumerate all puppete files in a puppet environment.
-def enumerate_files(path_base, environment_name):
+ Updates the database so that the query
+ >>> SELECT path FROM puppet_file WHERE environment = :environment
+ Returns how the directory tree looks *right now*.
+ """
path = os.path.join(path_base, environment.name)
-
- files = list(find(path, type='f', name='*.pp'))
+ files = find(path, type='f', name='*.pp')
try:
- for puppet_file in parse_files(files):
+ for puppet_file in (model.PuppetFile(path=file) for file in files):
with open(puppet_file.path, 'rb') as f:
checksum = hashlib.sha256(f.read()).hexdigest()
# Returns puppet_file.path, relative to path_base
- puppet_file.path = os.path.relpath(puppet_file.path, path.encode('UTF-8'))
- # TODO does flask want the whole environment object?
- puppet_file.environment = environment.id
- puppet_file.checksum = checksum
- db.session.add(puppet_file)
+
+ # This works in at least postgres and sqlite3
+ db.engine.execute(text("""
+ INSERT INTO puppet_file (path, environment, checksum)
+ VALUES (:path, :environment, :checksum)
+ ON CONFLICT (path, environment)
+ DO UPDATE SET checksum = EXCLUDED.checksum
+ """), {
+ 'path': os.path.relpath(puppet_file.path, path),
+ 'environment': environment.id,
+ 'checksum': checksum,
+ })
finally:
db.session.commit()
-def run(path_base: path, environment_name: str):
+def ensure_environment(name):
+ """
+ Returns a valid PuppetEnvironment object for the named environment.
+
+ If it already exists in the database the return the existing,
+ otherwise create it and return that.
+ """
+ environment = model \
+ .PuppetEnvironment \
+ .query \
+ .where(model.PuppetEnvironment.name == name) \
+ .one_or_none()
- ### Ensure that we have oru environment
- environment = model.PuppetEnvironment.query.where(model.PuppetEnvironment.name == environment_name).first()
if not environment:
- environment = model.PuppetEnvironment(name=environment_name)
+ environment = model.PuppetEnvironment(name=name)
db.session.add(environment)
- # TODO does this update the environment object
+ # This also updates our environment object, filling in
+ # autogenerated fieldsfields.
db.session.commit()
- ### Identify all puppet files, and note the base of their content
- # enumerate_files(path_base, environment_name)
+ return environment
+
+def run(path_base: Path = '/etc/puppetlabs/code/environments',
+ environment_name: str = 'production'):
+ """
+ Runs the class enumeration.
+
+ Arguments:
+ path_base -- Path to where each puppet environment resides
+ environment_name -- Which puppet environment to parse
+ """
+
+ ### Ensure that we have our environment
+ environment = ensure_environment(environment_name)
+
+ ### Identify all puppet files, and note the base of their content
+ enumerate_files(path_base, environment)
### Find all puppet files which we haven't parsed
- result = db.engine.execute(text("""
- SELECT
- f.id,
- f.path,
- f.last_parse,
- f.checksum,
- env.name
- FROM puppet_file f
- LEFT OUTER JOIN puppet_file_content c
- ON f.checksum = c.checksum
- LEFT JOIN puppet_environment env
- ON f.environment = env.id
- WHERE c.json IS NULL
- """))
-
- # db_lock = Lock()
- threads = []
- q = Queue()
- for (id, path, last, checksum, environment) in result:
- print(environment, path)
- # return
- full_path = os.path.join(path_base.encode('UTF-8'), environment.encode('UTF-8'), path)
-
- with open(full_path, 'rb') as f:
- current_checksum = hashlib.sha256(f.read()).hexdigest()
-
- if current_checksum != checksum:
- print(f'Checksum changed for {environment}/{path}')
- # db.engine.execute(model.PuppetFile.delete().where(model.PuppetFile.id == id))
- continue
-
- thread = Thread(target=lambda checksum, full_path: (checksum, puppet_parse(full_path)),
- args=(checksum, full_path),
- name=f'{environment}/{path}')
- thread.start()
- threads.append(thread)
+ base = model.PuppetFile \
+ .query \
+ .outerjoin(model.PuppetFileContent,
+ model.PuppetFile.checksum == model.PuppetFileContent.checksum) \
+ .where(model.PuppetFileContent.json == None) # noqa: E711
- try:
- # for thread in threads:
- # print(f'Waiting on {thread.name}')
- # thread.join()
- # print(f'{thread.name} joined')
- while not q.empty():
- print('Getting something from queue')
- (checksum, item) = q.get()
- print(checksum)
- pfc = model.PuppetFileContent(checksum=checksum, json=item)
- db.session.add(pfc)
- q.task_done()
- finally:
- db.session.commit()
+ # count for progress bar
+ count = base.count()
- return
+ result = base \
+ .join(model.PuppetEnvironment) \
+ .add_column(model.PuppetEnvironment.name) \
+ .all()
+ db.session.commit()
try:
- for puppet_file in model.PuppetFile.query.all():
+ for (i, (puppet_file, env)) in enumerate(result):
+ print(env, puppet_file.path)
+ print(f'{i} / {count}', end='\r')
+
+ full_path = os.path.join(path_base, env, puppet_file.path)
+
try:
- class_names = interpret_file(json.loads(os.path.join(path, puppet_file.json)))
- for class_name in class_names:
- db.session.add(model.PuppetClass(
- class_name=class_name,
- comes_from=puppet_file))
- except Exception as e:
+ item = puppet_parse(full_path)
+ except PuppetParseError as e:
print(e)
- print(f'Failed: {puppet_file.path}')
+ continue
+
+ # Check that the file we just parsed is the file we
+ # expected.
+ # NOTE this is technically incorrect, consider
+ # | Us | Attacker |
+ # |------------------------|--------------|
+ # | initial checksum | |
+ # | | replace file |
+ # | parse | |
+ # | | restore file |
+ # | second checksum (this) | |
+
+ with open(full_path, 'rb') as f:
+ current_checksum = hashlib.sha256(f.read()).hexdigest()
+
+ if current_checksum != puppet_file.checksum:
+ print(f'Checksum changed for {env}/{puppet_file.path}')
+ continue
+
+ # File parsed was file we expected to parse, addit to the
+ # database
+ pfc = model.PuppetFileContent(file_id=puppet_file.id,
+ checksum=puppet_file.checksum,
+ json=item)
+ db.session.add(pfc)
+
+ print('loop finished')
finally:
+ # TODO sqlite fails here, complains that the "database is locked"
db.session.commit()
+
+ for file_content in model.PuppetFileContent.query.all():
+ try:
+ class_names = interpret_file(json.loads(file_content.json))
+ for class_name in class_names:
+ # cls = model.PuppetClass(class_name=class_name)
+ # cls.environments.append(environment)
+ # cls.files.append(file_content.file)
+
+ # Add classs (if not exists)
+ db.engine.execute(text("""
+ INSERT INTO puppet_class (class_name)
+ VALUES (:name)
+ ON CONFLICT (class_name) DO NOTHING
+ """), {'name': class_name})
+
+ # Add class to environment (if not already there)
+ db.engine.execute(text("""
+ INSERT INTO environment_classes (environment_id, class_id)
+ SELECT :env, id FROM puppet_class WHERE class_name = :name
+ ON CONFLICT (environment_id, class_id) DO NOTHING
+ """), {'env': environment.id, 'name': class_name})
+
+ # Add class to file mapping (if not already there)
+ db.engine.execute(text("""
+ INSERT INTO class_files (file_id, class_id)
+ SELECT :file, id FROM puppet_class WHERE class_name = :name
+ ON CONFLICT (file_id, class_id) DO NOTHING
+ """), {'file': file_content.file_id, 'name': class_name})
+
+ except Exception as e:
+ print(e)
+ # print(f'Failed: {puppet_file.path}')
+
+ db.session.commit()
+
+
+def gc_puppet_files():
+ """
+ Remove unused puppet file content.
+
+ Removes all puppet file contents which no longer has an "owning" file.
+ """
+
+ db.engine.execute(text("""
+ DELETE FROM puppet_file_content WHERE pfc.id IN
+ ( SELECT pfc.id FROM puppet_file_content pfc
+ LEFT JOIN puppet_file f ON pfc.checksum = f.checksum
+ WHERE f.id IS NULL
+ ) """))
diff --git a/pyenc/model.py b/pyenc/model.py
deleted file mode 100644
index e774014..0000000
--- a/pyenc/model.py
+++ /dev/null
@@ -1,123 +0,0 @@
-"""Database model for application."""
-
-from flask_sqlalchemy import SQLAlchemy
-import requests
-import yaml
-
-db = SQLAlchemy()
-
-host_classes = db.Table(
- 'host_classes',
- db.Column('host_id', db.ForeignKey('host.id')),
- db.Column('class_id', db.ForeignKey('puppet_class.id')))
-
-# class HostClasses(db.Model):
-# __tablename__ = 'host_classes'
-# id = db.Column(db.Integer, primary_key=True)
-# host_id = db.Column(db.Integer, db.ForeignKey('host.id'), nullable=False)
-# class_id = db.Column(db.Integer, db.ForeignKey('puppet_class.id'), nullable=False)
-
-
-class Host(db.Model):
- """
- Single computer.
-
- A computer has a name (machine.example.com.), an environment
- (production) and a list of puppet classes.
-
- (TODO and direct values?)
- """
-
- __tablename__ = 'host'
- id = db.Column(db.Integer, primary_key=True)
- fqdn = db.Column(db.Text, nullable=False)
- environment = db.Column(db.Text)
- # classes = db.relationship('HostClasses', backref='host', lazy='dynamic')
- classes = db.relationship(
- 'PuppetClass',
- back_populates='hosts',
- secondary=host_classes)
-
- def serialize(self):
- return {column.name: self.__getattribute__(column.name)
- for column in self.__table__.columns}
-
-
-class PuppetEnvironment(db.Model):
- """
- A puppet environment.
-
- An enviromnet is a collection of modules, but here we only keep
- the files of the modules, in PuppetFile.
- """
- __tablename__ = 'puppet_environment'
- id = db.Column(db.Integer, primary_key=True)
- name = db.Column(db.Text, nullable=False)
-
-
-class PuppetFile(db.Model):
- """
- Puppet source code file.
-
- Keeps track of known puppet files. Each file contains 0 to many
- puppet classes.
-
- Each file is uniquely identified by the pair (path, environment).
- """
-
- __tablename__ = 'puppet_file'
- id = db.Column(db.Integer, primary_key=True)
- # Where we found the file
- # TODO normalize this to <path-inside-environment>
- path = db.Column(db.Text, nullable=False)
-
- environment = db.Column(db.Integer, db.ForeignKey(f'{PuppetEnvironment.__tablename__}.id'))
-
- # Checksum of the content, should be usable as a key in
- # PuppetFileContent
- # TODO flask weak keys?
- checksum = db.Column(db.Text)
-
- # When we last read data into json
- last_parse = db.Column(db.Float)
- # classes = db.relationship('PuppetClass', back_populates='comes_from')
- classes = db.relationship('PuppetClass', backref='comes_from')
-
-
-class PuppetFileContent(db.Model):
- """
- (Parsed) contents of puppet source files.
-
- Separate from PuppetFile since many environments can share files,
- and I don't want to store reduntand data.
- """
- __tablename__ = 'puppet_file_content'
-
- id = db.Column(db.Integer, primary_key=True)
-
- # Checksum of the original file
- checksum = db.Column(db.Text, nullable=False)
-
- # Output of 'puppet parser dump --format json <filename>'
- json = db.Column(db.Text, nullable=False)
-
-
-
-class PuppetClass(db.Model):
- """
- A puppet class.
-
- The class itself only keeps track of its name here, and mostyl
- ensures that only existing classes can be added to a given node/host.
- """
-
- __tablename__ = 'puppet_class'
- id = db.Column(db.Integer, primary_key=True)
- class_name = db.Column(db.Text, nullable=False)
- # comes_from = db.relationship('PuppetFile', back_populates='classes')
- comes_from_id = db.Column(db.Integer,
- db.ForeignKey(f'{PuppetFile.__tablename__}.id'))
- hosts = db.relationship(
- 'Host',
- back_populates='classes',
- secondary=host_classes)
diff --git a/pyenc/templates/start_page.html b/pyenc/templates/start_page.html
index 268f7d1..6186e25 100644
--- a/pyenc/templates/start_page.html
+++ b/pyenc/templates/start_page.html
@@ -1,7 +1,7 @@
{% extends "base.html" %}
{% block content %}
<h1>This certainly is a page</h1>
- <div id="react-base"></div>
+ <div id="react-base"><em>This should be replaced by react</em></div>
<div class="hosts">
{% for host in hosts %}
<div class="host">
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..029b063
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,6 @@
+[mypy]
+plugins = sqlalchemy.ext.mypy.plugin
+
+[flake8]
+max-line-length = 100
+ignore = E266 # Too many leading "#"