aboutsummaryrefslogtreecommitdiff
path: root/muppet/gather.py
blob: a2145075696b0b391fb5eabe954eec3795c3cb3d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
"""
Methods for gathering data.

Gathers information about all puppet modules, including which are
present in our environment, their metadata, and their output of
``puppet strings``.
"""

from dataclasses import dataclass
from typing import (
    Any,
)
import json
import os.path
import hashlib
from .puppet.strings import puppet_strings
from .cache import Cache


@dataclass
class ModuleEntry:
    """
    One entry in a module.

    :param name:
        Local name of the module, should always be the basename of path
    :param path:
        Absolute path in the filesystem where the module can be found.
    :param strings_output:
        Output of ``puppet strings``.
    """

    name: str
    path: str
    strings_output: bytes
    metadata: dict[str, Any]

    def file(self, path: str) -> str:
        """Return the absolute path of a path inside the module."""
        return os.path.join(self.path, path)


def get_puppet_strings(cache: Cache, path: str) -> bytes:
    """
    Run puppet string, but check cache first.

    The cache uses the contents of metadata.json as its key,
    so any updates without an updated metadata.json wont't be
    detected.

    Hashing the entire contents of the module was tested, but was to
    slow.
    """
    try:
        with open(os.path.join(path, 'metadata.json'), 'rb') as f:
            data = f.read()
            key = 'puppet-strings' + hashlib.sha1(data).hexdigest()
            if parsed := cache.get(key):
                result = parsed
            else:
                result = puppet_strings(path)
                cache.put(key, result)
            return result
    except FileNotFoundError:
        # TODO actually run puppet strings again.
        # This is just since without a metadata.json we always get a
        # cache miss, which is slow.
        # return puppet_strings(path)
        return b''

        # try:
        #     with open(module.file('.git/FETCH_HEAD')) as f:
        #         st = os.stat(f.fileno())
        #         st.st_mtime
        # except FileNotFoundError:
        #     pass


def get_modules(cache: Cache, dir: str) -> list[ModuleEntry]:
    """
    Enumerate modules in directory.

    The directory should be the modules subdirectory of an environment,
    e.g. /etc/puppetlabs/code/environments/production/modules.
    """
    modules: list[ModuleEntry] = []

    for entry in sorted(list(os.scandir(dir)), key=lambda d: d.name):
        # TODO Logging
        # print('- entry', entry, file=sys.stderr)
        name = entry.name
        path = os.path.join(dir, entry)
        strings_data = get_puppet_strings(cache, path)

        try:
            with open(os.path.join(path, 'metadata.json')) as f:
                metadata = json.load(f)
        except FileNotFoundError:
            metadata = {}

        modules.append(ModuleEntry(name, path, strings_data, metadata))

    return modules