aboutsummaryrefslogtreecommitdiff
path: root/pyenc/enumerate_classes.py
diff options
context:
space:
mode:
Diffstat (limited to 'pyenc/enumerate_classes.py')
-rw-r--r--pyenc/enumerate_classes.py235
1 files changed, 235 insertions, 0 deletions
diff --git a/pyenc/enumerate_classes.py b/pyenc/enumerate_classes.py
new file mode 100644
index 0000000..822852e
--- /dev/null
+++ b/pyenc/enumerate_classes.py
@@ -0,0 +1,235 @@
+
+"""
+Loads all puppet files in environment, parse them, and store the
+parsed data in the database.
+"""
+
+import hashlib
+import json
+import os
+import os.path
+import subprocess
+import time
+from sqlalchemy.sql import text
+
+import threading
+from threading import Lock, Thread
+from queue import Queue
+
+
+# import pyenc
+from pyenc.model import db
+from pyenc import model
+
+from typing import Union, Generator
+
+
+def with_lock(lock, proc):
+ try:
+ lock.acquire()
+ proc()
+ finally:
+ lock.release()
+
+
+def call(proc, *args):
+ proc(*args)
+
+path = Union[str, bytes]
+
+def find(path: path, **kvs) -> list[bytes]:
+ """Wrapper around find(1)."""
+ cmdline = ['find', path]
+ for key, value in kvs.items():
+ cmdline.append(f'-{key}')
+ cmdline.append(value)
+ cmdline.append('-print0')
+
+ cmd = subprocess.run(cmdline, capture_output=True, check=True)
+ return (f for f in cmd.stdout.split(b'\0') if f)
+
+
+class PuppetParseError(Exception):
+ def __init__(self, code, msg):
+ super().__init__()
+ self.code = code
+ self.msg = msg
+
+ def __repr__(self):
+ return f'PuppetParserError({self.code}, {self.msg})'
+
+ def __str__(self):
+ return repr(self)
+
+
+def puppet_parse(file: path) -> bytes:
+ with subprocess.Popen(
+ ['puppet', 'parser', 'dump', '--format', 'json', file],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE) as cmd:
+ if cmd.retuncode and cmd.returncode != 0:
+ raise PuppetParseError(cmd.returncode, cmd.stderr.read().decode('UTF-8'))
+
+ json_data = cmd.stdout.read()
+
+ if (value := cmd.wait()) != 0:
+ raise PuppetParseError(value, cmd.stderr.read().decode('UTF-8'))
+
+ return json_data
+
+
+def parse_files(files: list[path]) -> Generator[model.PuppetFile]:
+ for i, file in enumerate(files):
+ try:
+ stat = os.stat(file)
+
+ last_modify = stat.st_mtime
+ old_object = model.PuppetFile.query \
+ .where(model.PuppetFile.path == file) \
+ .first()
+
+ if old_object and old_object.last_parse > last_modify:
+ # file unchanged since our last parse, skip
+ continue
+
+ print(f'{i}/{len(files)}: {file}')
+
+ if old_object:
+ puppet_file = old_object
+ else:
+ puppet_file = model.PuppetFile(path=file)
+ puppet_file.last_parse = time.time()
+ # m.json = puppet_parse(file)
+
+ yield puppet_file
+
+ except PuppetParseError as err:
+ # TODO cache error
+ print('Error:', err)
+ continue
+
+
+def interpret_file(json_data: dict) -> list[str]:
+ """Find all classes in json-representation of file."""
+ top = json_data['^']
+ if top[0] == 'class':
+ tmp = top[1]['#']
+ idx = tmp.index('name')
+ return [tmp[idx + 1]]
+ # print(tmp[idx + 1])
+ elif top[0] == 'block':
+ ret_value = []
+ for element in top[1:]:
+ if element['^'][0] == 'class':
+ tmp = element['^'][1]['#']
+ idx = tmp.index('name')
+ ret_value.append(tmp[idx + 1])
+ return ret_value
+ else:
+ return []
+
+
+
+def enumerate_files(path_base, environment_name):
+ path = os.path.join(path_base, environment.name)
+
+ files = list(find(path, type='f', name='*.pp'))
+
+ try:
+ for puppet_file in parse_files(files):
+ with open(puppet_file.path, 'rb') as f:
+ checksum = hashlib.sha256(f.read()).hexdigest()
+ # Returns puppet_file.path, relative to path_base
+ puppet_file.path = os.path.relpath(puppet_file.path, path.encode('UTF-8'))
+ # TODO does flask want the whole environment object?
+ puppet_file.environment = environment.id
+ puppet_file.checksum = checksum
+ db.session.add(puppet_file)
+
+ finally:
+ db.session.commit()
+
+
+def run(path_base: path, environment_name: str):
+
+ ### Ensure that we have oru environment
+ environment = model.PuppetEnvironment.query.where(model.PuppetEnvironment.name == environment_name).first()
+ if not environment:
+ environment = model.PuppetEnvironment(name=environment_name)
+ db.session.add(environment)
+ # TODO does this update the environment object
+ db.session.commit()
+
+ ### Identify all puppet files, and note the base of their content
+ # enumerate_files(path_base, environment_name)
+
+
+ ### Find all puppet files which we haven't parsed
+
+ result = db.engine.execute(text("""
+ SELECT
+ f.id,
+ f.path,
+ f.last_parse,
+ f.checksum,
+ env.name
+ FROM puppet_file f
+ LEFT OUTER JOIN puppet_file_content c
+ ON f.checksum = c.checksum
+ LEFT JOIN puppet_environment env
+ ON f.environment = env.id
+ WHERE c.json IS NULL
+ """))
+
+ # db_lock = Lock()
+ threads = []
+ q = Queue()
+ for (id, path, last, checksum, environment) in result:
+ print(environment, path)
+ # return
+ full_path = os.path.join(path_base.encode('UTF-8'), environment.encode('UTF-8'), path)
+
+ with open(full_path, 'rb') as f:
+ current_checksum = hashlib.sha256(f.read()).hexdigest()
+
+ if current_checksum != checksum:
+ print(f'Checksum changed for {environment}/{path}')
+ # db.engine.execute(model.PuppetFile.delete().where(model.PuppetFile.id == id))
+ continue
+
+ thread = Thread(target=lambda checksum, full_path: (checksum, puppet_parse(full_path)),
+ args=(checksum, full_path),
+ name=f'{environment}/{path}')
+ thread.start()
+ threads.append(thread)
+
+ try:
+ # for thread in threads:
+ # print(f'Waiting on {thread.name}')
+ # thread.join()
+ # print(f'{thread.name} joined')
+ while not q.empty():
+ print('Getting something from queue')
+ (checksum, item) = q.get()
+ print(checksum)
+ pfc = model.PuppetFileContent(checksum=checksum, json=item)
+ db.session.add(pfc)
+ q.task_done()
+ finally:
+ db.session.commit()
+
+ return
+
+ try:
+ for puppet_file in model.PuppetFile.query.all():
+ try:
+ class_names = interpret_file(json.loads(os.path.join(path, puppet_file.json)))
+ for class_name in class_names:
+ db.session.add(model.PuppetClass(
+ class_name=class_name,
+ comes_from=puppet_file))
+ except Exception as e:
+ print(e)
+ print(f'Failed: {puppet_file.path}')
+ finally:
+ db.session.commit()