aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2022-08-03 16:24:08 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2022-08-03 16:24:08 +0200
commit57a283a5634d47f559557f966e64d621843e4035 (patch)
tree9a93ab9fe570ceb67ef01b963f084b5fa6d4c4c9
parentwork (diff)
downloadpuppet-classifier-57a283a5634d47f559557f966e64d621843e4035.tar.gz
puppet-classifier-57a283a5634d47f559557f966e64d621843e4035.tar.xz
work
-rw-r--r--.pylintrc3
-rwxr-xr-xenumerate_classes.py142
-rw-r--r--pyenc/__init__.py8
-rw-r--r--pyenc/cmdline.py29
-rw-r--r--pyenc/db.py24
-rw-r--r--pyenc/enc.py16
-rw-r--r--pyenc/enumerate_classes.py235
-rw-r--r--pyenc/model.py44
-rw-r--r--pyenc/templates/start_page.html1
9 files changed, 314 insertions, 188 deletions
diff --git a/.pylintrc b/.pylintrc
index ab6f45d..85507dc 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,3 +1,6 @@
[MESSAGES CONTROL]
disable=consider-using-f-string,
missing-module-docstring
+
+[FORMAT]
+good-names=f,i
diff --git a/enumerate_classes.py b/enumerate_classes.py
deleted file mode 100755
index c9e1c4b..0000000
--- a/enumerate_classes.py
+++ /dev/null
@@ -1,142 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Loads all puppet files in environment, parse them, and store the
-parsed data in the database.
-"""
-
-import subprocess
-import json
-import os
-import time
-
-import pyenc
-from pyenc.db import db
-import pyenc.model as model
-
-
-def find(path, **kvs):
- """Wrapper around find(1)."""
- cmdline = ['find', path]
- for k, v in kvs.items():
- cmdline.append(f'-{k}')
- cmdline.append(v)
- cmdline.append('-print0')
-
- cmd = subprocess.run(cmdline, capture_output=True)
- return (f for f in cmd.stdout.split(b'\0') if f)
-
-
-class PuppetParseError(Exception):
- def __init__(self, code, msg):
- self.code = code
- self.msg = msg
-
- def __repr__(self):
- return f'PuppetParserError({self.code}, {self.msg})'
-
- def __str__(self):
- return repr(self)
-
-
-def puppet_parse(file):
- cmd = subprocess.Popen(
- ['puppet', 'parser', 'dump', '--format', 'json', file],
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
- if cmd.returncode and cmd.returncode != 0:
- raise PuppetParseError(cmd.returncode, cmd.stderr.read().decode('UTF-8'))
- else:
- json = cmd.stdout.read()
-
- if (value := cmd.wait()) != 0:
- raise PuppetParseError(value, cmd.stderr.read().decode('UTF-8'))
-
- return json
-
-
-def parse_files(files):
- for i, file in enumerate(files):
- try:
- st = os.stat(file)
-
- last_modify = st.st_mtime
- old_object = model.PuppetFile.query \
- .where(model.PuppetFile.path == file) \
- .first()
-
- if old_object and old_object.last_parse > last_modify:
- # file unchanged since our last parse, skip
- continue
-
- print(f'{i}/{len(files)}: {file}')
-
- if old_object:
- m = old_object
- else:
- m = model.PuppetFile(path=file)
- m.last_parse = time.time()
- m.json = puppet_parse(file)
-
- yield m
-
- except PuppetParseError as e:
- # TODO cache error
- print('Error:', e)
- continue
-
-
-def interpret_file(json_data):
- """Find all classes in json-representation of file."""
- top = json_data['^']
- if top[0] == 'class':
- tmp = top[1]['#']
- idx = tmp.index('name')
- return [tmp[idx + 1]]
- # print(tmp[idx + 1])
- elif top[0] == 'block':
- ret_value = []
- for element in top[1:]:
- if element['^'][0] == 'class':
- tmp = element['^'][1]['#']
- idx = tmp.index('name')
- ret_value.append(tmp[idx + 1])
- return ret_value
- else:
- return []
-
-
-
-
-def main():
- app = pyenc.create_app()
- app.app_context().push()
-
- path = '/var/lib/machines/busting/etc/puppetlabs/code/environments/production'
-
- files_gen = find(path, type='f', name='*.pp')
- files = [f for f in files_gen]
-
- try:
- for puppet_file in parse_files(files):
- db.session.add(puppet_file)
- finally:
- db.session.commit()
-
-
- try:
- for puppet_file in model.PuppetFile.query.all():
- try:
- class_names = interpret_file(json.loads(puppet_file.json))
- for class_name in class_names:
- db.session.add(model.PuppetClass(
- class_name=class_name,
- comes_from=puppet_file))
- except Exception as e:
- print(e)
- print(f'Failed: {puppet_file.path}')
- finally:
- db.session.commit()
-
-if __name__ == '__main__':
- main()
diff --git a/pyenc/__init__.py b/pyenc/__init__.py
index cd0d57d..7249936 100644
--- a/pyenc/__init__.py
+++ b/pyenc/__init__.py
@@ -15,8 +15,7 @@ from flask import (
)
from . import model
-from . import db
-from . import enc
+from . import cmdline
from . import api
@@ -33,9 +32,10 @@ def create_app():
app.config.from_pyfile('settings.py')
+ model.db.init_app(app)
+
for module in [
- db,
- enc,
+ cmdline,
api,
]:
module.init_app(app)
diff --git a/pyenc/cmdline.py b/pyenc/cmdline.py
new file mode 100644
index 0000000..099018d
--- /dev/null
+++ b/pyenc/cmdline.py
@@ -0,0 +1,29 @@
+import click
+from flask.cli import AppGroup
+
+app_group = AppGroup('user', help="Testt")
+
+@app_group.command('enc')
+@click.argument('fqdn')
+def enc(fqdn):
+ from . import enc
+ enc.run_enc(fqdn)
+
+@app_group.command('init-db')
+def initialize_database():
+ from . import model
+ model.db.create_all()
+
+@app_group.command('enumerate-classes')
+def enumerate_classes():
+ from . import enumerate_classes
+ environment_name = 'production'
+ path_base = '/var/lib/machines/busting/etc/puppetlabs/code/environments/'
+ enumerate_classes.run(
+ path_base=path_base,
+ environment_name=environment_name)
+
+def init_app(app):
+ """Add command line options to current flask app."""
+ app.cli.add_command(app_group)
+
diff --git a/pyenc/db.py b/pyenc/db.py
deleted file mode 100644
index 38edda1..0000000
--- a/pyenc/db.py
+++ /dev/null
@@ -1,24 +0,0 @@
-"""Database connection for application."""
-
-import click
-from flask import current_app, g
-from flask.cli import with_appcontext
-from .model import db
-
-
-@click.command('init-db')
-@with_appcontext
-def init_db_command():
- """Create database from command line."""
- # init_db()
- # print(db)
- print(db)
- db.create_all()
- click.echo('Initialized the database.')
-
-
-def init_app(app):
- """Add database (and click) to given flask app."""
- # app.teardown_appcontext(close_db)
- db.init_app(app)
- app.cli.add_command(init_db_command)
diff --git a/pyenc/enc.py b/pyenc/enc.py
index e0f3f8d..8732af2 100644
--- a/pyenc/enc.py
+++ b/pyenc/enc.py
@@ -2,21 +2,9 @@
Command line entry point for Puppet External Node Classifier (enc).
"""
-import click
import yaml
-# from flask import current_app, g
-# from flask.cli import with_appcontext, AppGroup
-from flask.cli import AppGroup
-# from .db import db
from . import model
-
-
-app_group = AppGroup('user', help="Testt")
-
-@app_group.command('enc')
-@click.argument('fqdn')
-# @with_appcontext
def run_enc(fqdn):
"""
Run the puppet node classifier.
@@ -34,7 +22,3 @@ def run_enc(fqdn):
print(yaml.dump(out))
return 0
-
-def init_app(app):
- """Add puppet enc click to current flask app."""
- app.cli.add_command(app_group)
diff --git a/pyenc/enumerate_classes.py b/pyenc/enumerate_classes.py
new file mode 100644
index 0000000..822852e
--- /dev/null
+++ b/pyenc/enumerate_classes.py
@@ -0,0 +1,235 @@
+
+"""
+Loads all puppet files in environment, parse them, and store the
+parsed data in the database.
+"""
+
+import hashlib
+import json
+import os
+import os.path
+import subprocess
+import time
+from sqlalchemy.sql import text
+
+import threading
+from threading import Lock, Thread
+from queue import Queue
+
+
+# import pyenc
+from pyenc.model import db
+from pyenc import model
+
+from typing import Union, Generator
+
+
+def with_lock(lock, proc):
+ try:
+ lock.acquire()
+ proc()
+ finally:
+ lock.release()
+
+
+def call(proc, *args):
+ proc(*args)
+
+path = Union[str, bytes]
+
+def find(path: path, **kvs) -> list[bytes]:
+ """Wrapper around find(1)."""
+ cmdline = ['find', path]
+ for key, value in kvs.items():
+ cmdline.append(f'-{key}')
+ cmdline.append(value)
+ cmdline.append('-print0')
+
+ cmd = subprocess.run(cmdline, capture_output=True, check=True)
+ return (f for f in cmd.stdout.split(b'\0') if f)
+
+
+class PuppetParseError(Exception):
+ def __init__(self, code, msg):
+ super().__init__()
+ self.code = code
+ self.msg = msg
+
+ def __repr__(self):
+ return f'PuppetParserError({self.code}, {self.msg})'
+
+ def __str__(self):
+ return repr(self)
+
+
+def puppet_parse(file: path) -> bytes:
+ with subprocess.Popen(
+ ['puppet', 'parser', 'dump', '--format', 'json', file],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE) as cmd:
+ if cmd.retuncode and cmd.returncode != 0:
+ raise PuppetParseError(cmd.returncode, cmd.stderr.read().decode('UTF-8'))
+
+ json_data = cmd.stdout.read()
+
+ if (value := cmd.wait()) != 0:
+ raise PuppetParseError(value, cmd.stderr.read().decode('UTF-8'))
+
+ return json_data
+
+
+def parse_files(files: list[path]) -> Generator[model.PuppetFile]:
+ for i, file in enumerate(files):
+ try:
+ stat = os.stat(file)
+
+ last_modify = stat.st_mtime
+ old_object = model.PuppetFile.query \
+ .where(model.PuppetFile.path == file) \
+ .first()
+
+ if old_object and old_object.last_parse > last_modify:
+ # file unchanged since our last parse, skip
+ continue
+
+ print(f'{i}/{len(files)}: {file}')
+
+ if old_object:
+ puppet_file = old_object
+ else:
+ puppet_file = model.PuppetFile(path=file)
+ puppet_file.last_parse = time.time()
+ # m.json = puppet_parse(file)
+
+ yield puppet_file
+
+ except PuppetParseError as err:
+ # TODO cache error
+ print('Error:', err)
+ continue
+
+
+def interpret_file(json_data: dict) -> list[str]:
+ """Find all classes in json-representation of file."""
+ top = json_data['^']
+ if top[0] == 'class':
+ tmp = top[1]['#']
+ idx = tmp.index('name')
+ return [tmp[idx + 1]]
+ # print(tmp[idx + 1])
+ elif top[0] == 'block':
+ ret_value = []
+ for element in top[1:]:
+ if element['^'][0] == 'class':
+ tmp = element['^'][1]['#']
+ idx = tmp.index('name')
+ ret_value.append(tmp[idx + 1])
+ return ret_value
+ else:
+ return []
+
+
+
+def enumerate_files(path_base, environment_name):
+ path = os.path.join(path_base, environment.name)
+
+ files = list(find(path, type='f', name='*.pp'))
+
+ try:
+ for puppet_file in parse_files(files):
+ with open(puppet_file.path, 'rb') as f:
+ checksum = hashlib.sha256(f.read()).hexdigest()
+ # Returns puppet_file.path, relative to path_base
+ puppet_file.path = os.path.relpath(puppet_file.path, path.encode('UTF-8'))
+ # TODO does flask want the whole environment object?
+ puppet_file.environment = environment.id
+ puppet_file.checksum = checksum
+ db.session.add(puppet_file)
+
+ finally:
+ db.session.commit()
+
+
+def run(path_base: path, environment_name: str):
+
+ ### Ensure that we have oru environment
+ environment = model.PuppetEnvironment.query.where(model.PuppetEnvironment.name == environment_name).first()
+ if not environment:
+ environment = model.PuppetEnvironment(name=environment_name)
+ db.session.add(environment)
+ # TODO does this update the environment object
+ db.session.commit()
+
+ ### Identify all puppet files, and note the base of their content
+ # enumerate_files(path_base, environment_name)
+
+
+ ### Find all puppet files which we haven't parsed
+
+ result = db.engine.execute(text("""
+ SELECT
+ f.id,
+ f.path,
+ f.last_parse,
+ f.checksum,
+ env.name
+ FROM puppet_file f
+ LEFT OUTER JOIN puppet_file_content c
+ ON f.checksum = c.checksum
+ LEFT JOIN puppet_environment env
+ ON f.environment = env.id
+ WHERE c.json IS NULL
+ """))
+
+ # db_lock = Lock()
+ threads = []
+ q = Queue()
+ for (id, path, last, checksum, environment) in result:
+ print(environment, path)
+ # return
+ full_path = os.path.join(path_base.encode('UTF-8'), environment.encode('UTF-8'), path)
+
+ with open(full_path, 'rb') as f:
+ current_checksum = hashlib.sha256(f.read()).hexdigest()
+
+ if current_checksum != checksum:
+ print(f'Checksum changed for {environment}/{path}')
+ # db.engine.execute(model.PuppetFile.delete().where(model.PuppetFile.id == id))
+ continue
+
+ thread = Thread(target=lambda checksum, full_path: (checksum, puppet_parse(full_path)),
+ args=(checksum, full_path),
+ name=f'{environment}/{path}')
+ thread.start()
+ threads.append(thread)
+
+ try:
+ # for thread in threads:
+ # print(f'Waiting on {thread.name}')
+ # thread.join()
+ # print(f'{thread.name} joined')
+ while not q.empty():
+ print('Getting something from queue')
+ (checksum, item) = q.get()
+ print(checksum)
+ pfc = model.PuppetFileContent(checksum=checksum, json=item)
+ db.session.add(pfc)
+ q.task_done()
+ finally:
+ db.session.commit()
+
+ return
+
+ try:
+ for puppet_file in model.PuppetFile.query.all():
+ try:
+ class_names = interpret_file(json.loads(os.path.join(path, puppet_file.json)))
+ for class_name in class_names:
+ db.session.add(model.PuppetClass(
+ class_name=class_name,
+ comes_from=puppet_file))
+ except Exception as e:
+ print(e)
+ print(f'Failed: {puppet_file.path}')
+ finally:
+ db.session.commit()
diff --git a/pyenc/model.py b/pyenc/model.py
index 0a240d9..e774014 100644
--- a/pyenc/model.py
+++ b/pyenc/model.py
@@ -43,26 +43,66 @@ class Host(db.Model):
for column in self.__table__.columns}
+class PuppetEnvironment(db.Model):
+ """
+ A puppet environment.
+
+ An enviromnet is a collection of modules, but here we only keep
+ the files of the modules, in PuppetFile.
+ """
+ __tablename__ = 'puppet_environment'
+ id = db.Column(db.Integer, primary_key=True)
+ name = db.Column(db.Text, nullable=False)
+
+
class PuppetFile(db.Model):
"""
Puppet source code file.
Keeps track of known puppet files. Each file contains 0 to many
puppet classes.
+
+ Each file is uniquely identified by the pair (path, environment).
"""
__tablename__ = 'puppet_file'
id = db.Column(db.Integer, primary_key=True)
# Where we found the file
+ # TODO normalize this to <path-inside-environment>
path = db.Column(db.Text, nullable=False)
- # Output of 'puppet parser dump --format json <filename>'
- json = db.Column(db.Text, nullable=False)
+
+ environment = db.Column(db.Integer, db.ForeignKey(f'{PuppetEnvironment.__tablename__}.id'))
+
+ # Checksum of the content, should be usable as a key in
+ # PuppetFileContent
+ # TODO flask weak keys?
+ checksum = db.Column(db.Text)
+
# When we last read data into json
last_parse = db.Column(db.Float)
# classes = db.relationship('PuppetClass', back_populates='comes_from')
classes = db.relationship('PuppetClass', backref='comes_from')
+class PuppetFileContent(db.Model):
+ """
+ (Parsed) contents of puppet source files.
+
+ Separate from PuppetFile since many environments can share files,
+ and I don't want to store reduntand data.
+ """
+ __tablename__ = 'puppet_file_content'
+
+ id = db.Column(db.Integer, primary_key=True)
+
+ # Checksum of the original file
+ checksum = db.Column(db.Text, nullable=False)
+
+ # Output of 'puppet parser dump --format json <filename>'
+ json = db.Column(db.Text, nullable=False)
+
+
+
class PuppetClass(db.Model):
"""
A puppet class.
diff --git a/pyenc/templates/start_page.html b/pyenc/templates/start_page.html
index 8df08a3..268f7d1 100644
--- a/pyenc/templates/start_page.html
+++ b/pyenc/templates/start_page.html
@@ -19,6 +19,7 @@
<li>
<input id="{{ id }}" type="checkbox" name="cls" value="{{ cls.class_name }}"/>
<label for="{{ id }}">{{ cls.class_name }}</label>
+ <a style="float: right; padding-left: 1em" href="/api/file?path={{cls.comes_from.path}}">Soruce</a>
</li>
{% endwith %}
{% endfor %}