From 2518e8a9aa1fb4871a73012395e0b141bb92d919 Mon Sep 17 00:00:00 2001 From: Lucidiot Date: Mon, 16 Sep 2019 10:53:38 +0200 Subject: [PATCH] First implementation --- madeleine/__init__.py | 7 ++ madeleine/base.py | 135 ++++++++++++++++++++++++++++++ madeleine/generator.py | 34 ++++++++ madeleine/helpers.py | 12 +++ madeleine/multiple.py | 182 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 370 insertions(+) create mode 100644 madeleine/__init__.py create mode 100644 madeleine/base.py create mode 100644 madeleine/generator.py create mode 100644 madeleine/helpers.py create mode 100644 madeleine/multiple.py diff --git a/madeleine/__init__.py b/madeleine/__init__.py new file mode 100644 index 0000000..e216321 --- /dev/null +++ b/madeleine/__init__.py @@ -0,0 +1,7 @@ +from madeleine.base import ( # noqa: F401 + Component, Reference, Value, +) +from madeleine.generator import Generator # noqa: F401 +from madeleine.multiple import ( # noqa: F401 + CompoundComponent, Repeat, AllOf, OneOf, Pick, +) diff --git a/madeleine/base.py b/madeleine/base.py new file mode 100644 index 0000000..0c12424 --- /dev/null +++ b/madeleine/base.py @@ -0,0 +1,135 @@ +from abc import ABCMeta, abstractmethod, abstractproperty +from collections.abc import Mapping, Iterable +from enum import Enum +from pathlib import Path +from objtools.registry import ClassRegistry +import json +import random +import yaml + + +class FileFormat(Enum): + JSON = 'json' + YAML = 'yaml' + + +class ComponentRegistry(ClassRegistry): + + def check_value(self, value): + assert issubclass(value, Component) + + +registry = ComponentRegistry() +register = registry.register +unregister = registry.unregister + + +class ComponentMetaclass(registry.metaclass, ABCMeta): + pass + + +class Component(metaclass=ComponentMetaclass, register=False): + """ + Describes any generator component. + """ + + def __init__(self, **data): + self.__dict__.update(**data) + + def resolve_references(self, references): + """ + Complex components might hold references to other components: + they should leave them unresolved until the Generator builds all the + components, then calls this method with all of the referencable names. + """ + + @abstractmethod + def generate(self): + pass + + @abstractproperty + def combinations(self): + """ + Should return how many combinations a single component may have. + Useful to perform some checks after parsing. + """ + + @classmethod + def from_path(cls, path, fmt=FileFormat.YAML): + assert isinstance(fmt, FileFormat) + if not isinstance(path, Path): + path = Path(path) + with path.open() as f: + method = json.load if fmt == FileFormat.JSON else yaml.safe_load + if cls is Component: + # Guess the component type when calling Component.from_path + return Component._make(method(f)) + return cls(**method(f)) + + @staticmethod + def _make(data): + """ + Guess which component subclass to use for a given component data, + then build and return the resulting components. + """ + if isinstance(data, str): + data = {'value': data} + if not isinstance(data, Mapping): + if isinstance(data, Iterable): + return list(map(Component._make, data)) + raise ValueError('Component description should be a mapping') + + for key, component_class in registry.items(): + if key in data: + return component_class(**data) + + raise ValueError('Could not parse component description') + + +class Reference(Component, key='ref'): + """ + A component used to temporarily hold references to other components + during the first pass of schema loading, when not all referenced names + are available, before a second pass allows resolution of all references. + """ + + @property + def combinations(self): + raise TypeError('Unresolved reference to {!r}'.format(self.ref)) + + def generate(self): + raise TypeError('Unresolved reference to {!r}'.format(self.ref)) + + +class Value(Component, key='value'): + """ + A component that holds a string value. + Will return the value on every generation, unless `optional` is True, + in which case it may return None 50% of the time. + """ + + def __init__(self, value, optional=False, **kwargs): + self.value = value + self.optional = optional + super().__init__(**kwargs) + + @property + def combinations(self): + return 1 + self.optional + + def generate(self): + if not self.optional or random.randrange(2): + return self.value + + +class Include(Component, type, key='include'): + """ + Automatically include another generator file. + """ + + def __new__(*args, include=None, format=None, **kwargs): + if format is None: + format = FileFormat.YAML + if isinstance(format, str): + format = FileFormat(format) + return Component.from_path(include, fmt=format) diff --git a/madeleine/generator.py b/madeleine/generator.py new file mode 100644 index 0000000..1951af5 --- /dev/null +++ b/madeleine/generator.py @@ -0,0 +1,34 @@ +from collections.abc import Mapping +from madeleine import Component + + +class Generator(Component, key='main'): + + def __init__(self, **data): + assert 'version' in data, 'Missing generator spec version' + assert data['version'] == '1', 'Incompatible generator spec version' + assert 'main' in data, 'Missing main component' + data.setdefault('components', {}) + assert isinstance(data['components'], Mapping), \ + 'Components should be a mapping' + + self.version = data['version'] + self.components = {} + for k, v in data['components'].items(): + self.components[k] = Component._make(v) + + self.main = Component._make(data['main']) + # Allows components to reference the main component + self.components.setdefault('main', self.main) + + # Let components resolve all their references + for k in self.components: + self.components[k].resolve_references(self.components) + self.main.resolve_references(self.components) + + @property + def combinations(self): + return self.main.combinations + + def generate(self): + return self.main.generate() diff --git a/madeleine/helpers.py b/madeleine/helpers.py new file mode 100644 index 0000000..ed9e7d8 --- /dev/null +++ b/madeleine/helpers.py @@ -0,0 +1,12 @@ +def binom(n, k): + """ + Computes the binomial coefficient using a multiplicative formula. + Stolen from https://stackoverflow.com/a/46778364/5990435 + """ + assert k >= 0 and k <= n + if k == 0 or k == n: + return 1 + b = 1 + for i in range(min(k, n-k)): + b = b * (n - i) // (i + 1) + return b diff --git a/madeleine/multiple.py b/madeleine/multiple.py new file mode 100644 index 0000000..14ef867 --- /dev/null +++ b/madeleine/multiple.py @@ -0,0 +1,182 @@ +from functools import reduce, partial +from itertools import combinations, combinations_with_replacement +from madeleine import Component, Reference +from madeleine.helpers import binom +import operator +import random + + +class Repeat(Component, key='repeat'): + """ + Repeat another component N times. The amount of times a component should + be repeated can be defined exactly using the `n` parameter, or be generated + randomly at each generation with a `min` and `max`. + Setting `unique` to True will ensure every item in the result is unique. + CAUTION: Ensure that the repeated component can return enough different + values to fill the unique constraint, or the generator will end up in an + infinite loop. + """ + + def __init__(self, + repeat, + n=None, + min=0, + max=None, + unique=False, + separator=' ', + **kwargs): + self.repeat = Component._make(repeat) + assert (n is not None) ^ (max is not None), 'Either set `n` or `max`' + self.n = n + self.min = min + self.max = max + self.unique = unique + self.separator = separator + super().__init__(**kwargs) + + def resolve_references(self, references): + if isinstance(self.repeat, Reference): + self.repeat = references[self.repeat.ref] + self.repeat.resolve_references(references) + + @property + def combinations(self): + if self.n is None: + base_combinations = self.repeat.combinations + if self.unique: + return sum(binom(base_combinations, k) + for k in range(self.min, self.max+1)) + else: + return sum(base_combinations ** k + for k in range(self.min, self.max+1)) + else: + if self.unique: + return binom(self.repeat.combinations, self.n) + else: + return self.repeat.combinations ** self.n + + def generate(self): + amount = self.n + if amount is None: + amount = random.randint(self.min, self.max) + results = [] + while len(results) < amount: + result = self.repeat.generate() + if self.unique and result in results: + continue + results.append(result) + return self.separator.join(results) + + +class CompoundComponent(Component, register=False): + """ + Abstract component for all components which hold multiple child components. + """ + items_key = None + + def __init__(self, **data): + assert self.items_key, \ + 'Missing {}.items_key attribute'.format(self.__class__.__name__) + self.items = list(map(Component._make, data.pop(self.items_key))) + super().__init__(**data) + + def resolve_references(self, references): + for i in range(len(self.items)): + if isinstance(self.items[i], Reference): + self.items[i] = references[self.items[i].ref] + self.items[i].resolve_references(references) + + +class AllOf(CompoundComponent, key='allOf'): + """ + Component which simply joins all of its child components. + """ + items_key = 'allOf' + + def __init__(self, separator=' ', **data): + self.separator = separator + super().__init__(**data) + + @property + def combinations(self): + return reduce(operator.mul, map( + operator.attrgetter('combinations'), + self.items, + )) + + def generate(self): + return self.separator.join( + filter(None, [c.generate() for c in self.items]) + ) + + +class Pick(CompoundComponent, key='pick'): + """ + Component which randomly picks N of its child components. + """ + items_key = 'pick' + + def __init__(self, + separator=' ', + unique=False, + n=None, + min=0, + max=None, + **data): + assert (n is not None) ^ (max is not None), 'Either set `n` or `max`' + self.n = n + self.min = min + self.max = max + self.unique = unique + self.separator = separator + super().__init__(**data) + + @property + def combinations(self): + child_combinations = tuple( + map(operator.attrgetter('combinations'), self.items) + ) + method = combinations if self.unique else combinations_with_replacement + if self.n is None: + pick_range = range(max(self.min, 1), self.max) + else: + pick_range = (self.n, ) + # There probably is a neat formula to compute the combinations + # with(out) replacement and include the combinations of the subsequent + # draws, knowing each item has a different number of combinations, + # but this goes well above my understanding of Wikiversity's + # combinatorics course. + # For each possible amount k of picks, multiply the combinations of + # each combination of k items with(out) remplacement. + return sum( + sum(map( + partial(reduce, operator.mul), + method(child_combinations, k), + )) + for k in pick_range + ) + (self.n is None and self.min == 0) # Add 1 if it can pick zero + + def generate(self): + amount = self.n + if amount is None: + amount = random.randint(self.min, self.max) + method = random.sample if self.unique else random.choices + return self.separator.join( + filter(None, [ + c.generate() for c in method(self.items, k=amount) + ]), + ) + + +class OneOf(CompoundComponent, key='oneOf'): + """ + Component which randomly picks one of its child components. + """ + items_key = 'oneOf' + + @property + def combinations(self): + return sum(map(operator.attrgetter('combinations'), self.items)) + + def generate(self): + return random.choice(self.items).generate()