mu/archive/1.vm/010vm.cc

901 lines
27 KiB
C++
Raw Normal View History

2016-10-22 23:33:16 +00:00
//: A Mu program is a book of 'recipes' (functions)
2015-02-18 01:14:53 +00:00
:(before "End Globals")
2015-07-05 01:22:13 +00:00
//: Each recipe is stored at a specific page number, or ordinal.
map<recipe_ordinal, recipe> Recipe;
2015-07-05 01:22:13 +00:00
//: You can also refer to each recipe by its name.
map<string, recipe_ordinal> Recipe_ordinal;
recipe_ordinal Next_recipe_ordinal = 1;
2015-02-18 01:14:53 +00:00
2015-07-05 01:22:13 +00:00
//: Ordinals are like numbers, except you can't do arithmetic on them. Ordinal
//: 1 is not less than 2, it's just different. Phone numbers are ordinals;
//: adding two phone numbers is meaningless. Here each recipe does something
//: incommensurable with any other recipe.
:(after "Types")
typedef int recipe_ordinal;
2015-07-05 01:22:13 +00:00
2015-02-18 01:14:53 +00:00
:(before "End Types")
2015-07-05 01:22:13 +00:00
// Recipes are lists of instructions. To perform or 'run' a recipe, the
// computer runs its instructions.
2015-02-18 01:14:53 +00:00
struct recipe {
recipe_ordinal ordinal;
string name;
vector<instruction> steps;
2015-04-13 03:56:45 +00:00
// End recipe Fields
2015-10-29 20:01:04 +00:00
recipe();
2015-02-18 01:14:53 +00:00
};
:(before "struct recipe")
// Each instruction is either of the form:
// product1, product2, product3, ... <- operation ingredient1, ingredient2, ingredient3, ...
2015-04-24 23:52:22 +00:00
// or just a single 'label' starting with a non-alphanumeric character
// +label
// Labels don't do anything, they're just named locations in a recipe.
2015-02-18 01:14:53 +00:00
struct instruction {
bool is_label;
string label; // only if is_label
string name; // only if !is_label
string original_string; // for error messages
recipe_ordinal operation; // get(Recipe_ordinal, name)
2015-02-18 01:14:53 +00:00
vector<reagent> ingredients; // only if !is_label
vector<reagent> products; // only if !is_label
// End instruction Fields
2015-02-18 01:14:53 +00:00
instruction();
void clear();
2016-02-19 21:57:43 +00:00
bool is_empty();
2015-02-18 01:14:53 +00:00
};
:(before "struct instruction")
2015-02-18 01:22:10 +00:00
// Ingredients and products are a single species -- a reagent. Reagents refer
// either to numbers or to locations in memory along with 'type' tags telling
// us how to interpret them. They also can contain arbitrary other lists of
// properties besides types, but we're getting ahead of ourselves.
2015-02-18 01:14:53 +00:00
struct reagent {
string original_string;
2015-02-18 01:14:53 +00:00
string name;
type_tree* type;
2016-02-24 00:30:59 +00:00
vector<pair<string, string_tree*> > properties; // can't be a map because the string_tree sometimes needs to be NULL, which can be confusing
double value;
bool initialized;
// End reagent Fields
2016-05-26 01:56:37 +00:00
reagent(const string& s);
reagent() :type(NULL), value(0), initialized(false) {}
2018-03-14 07:59:41 +00:00
reagent(type_tree* t) :type(t), value(0), initialized(false) {}
~reagent();
2015-11-28 09:28:39 +00:00
void clear();
2016-11-11 19:42:20 +00:00
reagent(const reagent& original);
reagent& operator=(const reagent& original);
2016-10-20 05:10:35 +00:00
void set_value(double v) { value = v; initialized = true; }
2015-02-18 01:14:53 +00:00
};
:(before "struct reagent")
// Types can range from a simple type ordinal, to arbitrarily complex trees of
2015-10-26 07:23:43 +00:00
// type parameters, like (map (address array character) (list number))
struct type_tree {
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
bool atom;
string name; // only if atom
type_ordinal value; // only if atom
type_tree* left; // only if !atom
type_tree* right; // only if !atom
~type_tree();
2016-11-11 19:42:20 +00:00
type_tree(const type_tree& original);
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
// atomic type ordinal
2016-07-02 05:34:50 +00:00
explicit type_tree(string name);
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
type_tree(string name, type_ordinal v) :atom(true), name(name), value(v), left(NULL), right(NULL) {}
// tree of type ordinals
type_tree(type_tree* l, type_tree* r) :atom(false), value(0), left(l), right(r) {}
2016-11-11 19:42:20 +00:00
type_tree& operator=(const type_tree& original);
bool operator==(const type_tree& other) const;
bool operator!=(const type_tree& other) const { return !operator==(other); }
bool operator<(const type_tree& other) const;
bool operator>(const type_tree& other) const { return other.operator<(*this); }
};
struct string_tree {
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
bool atom;
string value; // only if atom
string_tree* left; // only if !atom
string_tree* right; // only if !atom
~string_tree();
2016-11-11 19:42:20 +00:00
string_tree(const string_tree& original);
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
// atomic string
explicit string_tree(string v) :atom(true), value(v), left(NULL), right(NULL) {}
// tree of strings
string_tree(string_tree* l, string_tree* r) :atom(false), left(l), right(r) {}
};
// End type_tree Definition
2016-07-02 05:34:50 +00:00
:(code)
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
type_tree::type_tree(string name) :atom(true), name(name), value(get(Type_ordinal, name)), left(NULL), right(NULL) {}
2015-02-18 01:14:53 +00:00
:(before "End Globals")
// Locations refer to a common 'memory'. Each location can store a number.
map<int, double> Memory;
2017-07-09 21:34:17 +00:00
:(before "End Reset")
Memory.clear();
2015-02-18 01:14:53 +00:00
:(after "Types")
// Mu types encode how the numbers stored in different parts of memory are
2015-02-18 01:14:53 +00:00
// interpreted. A location tagged as a 'character' type will interpret the
2015-10-26 07:23:43 +00:00
// value 97 as the letter 'a', while a different location of type 'number'
2015-02-18 01:14:53 +00:00
// would not.
//
2016-10-22 23:56:07 +00:00
// Unlike most computers today, Mu stores types in a single big table, shared
// by all the Mu programs on the computer. This is useful in providing a
// seamless experience to help understand arbitrary Mu programs.
typedef int type_ordinal;
2015-02-18 01:14:53 +00:00
:(before "End Globals")
map<string, type_ordinal> Type_ordinal;
map<type_ordinal, type_info> Type;
type_ordinal Next_type_ordinal = 1;
2018-05-13 02:55:21 +00:00
type_ordinal Number_type_ordinal = 0;
type_ordinal Boolean_type_ordinal = 0;
type_ordinal Character_type_ordinal = 0;
type_ordinal Address_type_ordinal = 0;
type_ordinal Array_type_ordinal = 0;
:(code)
void setup_types() {
Type.clear(); Type_ordinal.clear();
put(Type_ordinal, "literal", 0);
Next_type_ordinal = 1;
2015-04-13 03:47:49 +00:00
// Mu Types Initialization
2018-05-13 02:55:21 +00:00
Number_type_ordinal = put(Type_ordinal, "number", Next_type_ordinal++);
get_or_insert(Type, Number_type_ordinal).name = "number";
put(Type_ordinal, "location", Number_type_ordinal); // synonym of number for addresses we'll never look up
Address_type_ordinal = put(Type_ordinal, "address", Next_type_ordinal++);
get_or_insert(Type, Address_type_ordinal).name = "address";
Boolean_type_ordinal = put(Type_ordinal, "boolean", Next_type_ordinal++);
get_or_insert(Type, Boolean_type_ordinal).name = "boolean";
Character_type_ordinal = put(Type_ordinal, "character", Next_type_ordinal++);
get_or_insert(Type, Character_type_ordinal).name = "character";
// Array types are a special modifier to any other type. For example,
// array:number or array:address:boolean.
2018-05-13 02:55:21 +00:00
Array_type_ordinal = put(Type_ordinal, "array", Next_type_ordinal++);
get_or_insert(Type, Array_type_ordinal).name = "array";
2015-04-13 03:47:49 +00:00
// End Mu Types Initialization
}
void teardown_types() {
2016-10-20 05:10:35 +00:00
for (map<type_ordinal, type_info>::iterator p = Type.begin(); p != Type.end(); ++p) {
for (int i = 0; i < SIZE(p->second.elements); ++i)
p->second.elements.clear();
}
Type_ordinal.clear();
}
:(before "End One-time Setup")
setup_types();
atexit(teardown_types);
2015-02-18 01:14:53 +00:00
:(before "End Types")
// You can construct arbitrary new types. New types are either 'containers'
// with multiple 'elements' of other types, or 'exclusive containers' containing
// one of multiple 'variants'. (These are similar to C structs and unions,
// respectively, though exclusive containers implicitly include a tag element
// recording which variant they should be interpreted as.)
2015-02-20 07:49:13 +00:00
//
// For example, storing bank balance and name for an account might require a
// container, but if bank accounts may be either for individuals or groups,
// with different properties for each, that may require an exclusive container
// whose variants are individual-account and joint-account containers.
enum kind_of_type {
2015-10-27 03:06:51 +00:00
PRIMITIVE,
CONTAINER,
EXCLUSIVE_CONTAINER
};
2015-02-18 01:14:53 +00:00
struct type_info {
string name;
kind_of_type kind;
vector<reagent> elements;
2015-04-13 03:47:49 +00:00
// End type_info Fields
type_info() :kind(PRIMITIVE) {
// End type_info Constructor
}
2015-02-18 01:14:53 +00:00
};
enum primitive_recipes {
IDLE = 0,
COPY,
// End Primitive Recipe Declarations
MAX_PRIMITIVE_RECIPES,
};
:(code)
2015-04-17 18:22:59 +00:00
//: It's all very well to construct recipes out of other recipes, but we need
//: to know how to do *something* out of the box. For the following
2016-10-22 23:56:07 +00:00
//: recipes there are only codes, no entries in the book, because Mu just knows
2015-04-17 18:22:59 +00:00
//: what to do for them.
void setup_recipes() {
Recipe.clear(); Recipe_ordinal.clear();
put(Recipe_ordinal, "idle", IDLE);
2015-04-13 03:47:49 +00:00
// Primitive Recipe Numbers
put(Recipe_ordinal, "copy", COPY);
2015-04-13 03:47:49 +00:00
// End Primitive Recipe Numbers
}
//: We could just reset the recipe table after every test, but that gets slow
//: all too quickly. Instead, initialize the common stuff just once at
2015-04-08 08:15:41 +00:00
//: startup. Later layers will carefully undo each test's additions after
//: itself.
:(before "End One-time Setup")
setup_recipes();
assert(MAX_PRIMITIVE_RECIPES < 200); // level 0 is primitives; until 199
Next_recipe_ordinal = 200;
put(Recipe_ordinal, "main", Next_recipe_ordinal++);
// Load Mu Prelude
// End Mu Prelude
:(before "End Commandline Parsing")
assert(Next_recipe_ordinal < 1000); // recipes being tested didn't overflow into test space
2017-07-09 21:34:17 +00:00
:(before "End Reset")
Next_recipe_ordinal = 1000; // consistent new numbers for each test
//: One final detail: tests can modify our global tables of recipes and types,
//: so we need some way to clean up after each test is done so it doesn't
//: influence later ones.
:(before "End Globals")
map<string, recipe_ordinal> Recipe_ordinal_snapshot;
map<recipe_ordinal, recipe> Recipe_snapshot;
map<string, type_ordinal> Type_ordinal_snapshot;
map<type_ordinal, type_info> Type_snapshot;
:(before "End One-time Setup")
save_snapshots();
2017-07-09 21:34:17 +00:00
:(before "End Reset")
restore_snapshots();
:(code)
void save_snapshots() {
Recipe_ordinal_snapshot = Recipe_ordinal;
Recipe_snapshot = Recipe;
Type_ordinal_snapshot = Type_ordinal;
Type_snapshot = Type;
// End save_snapshots
}
void restore_snapshots() {
Recipe = Recipe_snapshot;
Recipe_ordinal = Recipe_ordinal_snapshot;
2016-09-22 23:30:09 +00:00
restore_non_recipe_snapshots();
}
// when running sandboxes in the edit/ app we'll want to restore everything except recipes defined in the app
void restore_non_recipe_snapshots() {
Type_ordinal = Type_ordinal_snapshot;
Type = Type_snapshot;
// End restore_snapshots
}
//:: Helpers
2015-02-18 01:14:53 +00:00
:(code)
2015-10-29 20:01:04 +00:00
recipe::recipe() {
ordinal = -1;
2015-10-29 20:01:04 +00:00
// End recipe Constructor
}
instruction::instruction() :is_label(false), operation(IDLE) {
// End instruction Constructor
}
2017-02-07 06:52:23 +00:00
void instruction::clear() {
is_label=false;
label.clear();
name.clear();
operation=IDLE;
ingredients.clear();
products.clear();
original_string.clear();
// End instruction Clear
}
2016-02-19 21:57:43 +00:00
bool instruction::is_empty() { return !is_label && name.empty(); }
2015-02-18 01:14:53 +00:00
// Reagents have the form <name>:<type>:<type>:.../<property>/<property>/...
2016-05-26 01:56:37 +00:00
reagent::reagent(const string& s) :original_string(s), type(NULL), value(0), initialized(false) {
2015-07-28 23:38:37 +00:00
// Parsing reagent(string s)
istringstream in(s);
in >> std::noskipws;
// name and type
istringstream first_row(slurp_until(in, '/'));
first_row >> std::noskipws;
name = slurp_until(first_row, ':');
string_tree* type_names = parse_property_list(first_row);
2016-08-31 16:53:11 +00:00
// End Parsing Reagent Type Property(type_names)
type = new_type_tree(type_names);
delete type_names;
// special cases
if (is_integer(name) && type == NULL)
2016-07-02 05:34:50 +00:00
type = new type_tree("literal");
if (name == "_" && type == NULL)
2016-07-02 05:34:50 +00:00
type = new type_tree("literal");
// other properties
2016-05-26 02:33:00 +00:00
slurp_properties(in, properties);
// End Parsing reagent
}
void slurp_properties(istream& in, vector<pair<string, string_tree*> >& out) {
while (has_data(in)) {
istringstream row(slurp_until(in, '/'));
row >> std::noskipws;
string key = slurp_until(row, ':');
string_tree* value = parse_property_list(row);
2016-05-26 02:33:00 +00:00
out.push_back(pair<string, string_tree*>(key, value));
2015-02-18 01:14:53 +00:00
}
}
2015-06-14 06:13:10 +00:00
string_tree* parse_property_list(istream& in) {
skip_whitespace_but_not_newline(in);
if (!has_data(in)) return NULL;
3663 - fix a refcounting bug: '(type)' != 'type' This was a large commit, and most of it is a follow-up to commit 3309, undoing what is probably the final ill-considered optimization I added to s-expressions in Mu: I was always representing (a b c) as (a b . c), etc. That is now gone. Why did I need to take it out? The key problem was the error silently ignored in layer 30. That was causing size_of("(type)") to silently return garbage rather than loudly complain (assuming 'type' was a simple type). But to take it out I had to modify types_strictly_match (layer 21) to actually strictly match and not just do a prefix match. In the process of removing the prefix match, I had to make extracting recipe types from recipe headers more robust. So far it only matched the first element of each ingredient's type; these matched: (recipe address:number -> address:number) (recipe address -> address) I didn't notice because the dotted notation optimization was actually representing this as: (recipe address:number -> address number) --- One final little thing in this commit: I added an alias for 'assert' called 'assert_for_now', to indicate that I'm not sure something's really an invariant, that it might be triggered by (invalid) user programs, and so require more thought on error handling down the road. But this may well be an ill-posed distinction. It may be overwhelmingly uneconomic to continually distinguish between model invariants and error states for input. I'm starting to grow sympathetic to Google Analytics's recent approach of just banning assertions altogether. We'll see..
2016-11-11 05:39:02 +00:00
string_tree* first = new string_tree(slurp_until(in, ':'));
if (!has_data(in)) return first;
string_tree* rest = parse_property_list(in);
if (!has_data(in) && rest->atom)
return new string_tree(first, new string_tree(rest, NULL));
return new string_tree(first, rest);
}
:(before "End Unit Tests")
void test_parse_property_list_atom() {
istringstream in("a");
string_tree* x = parse_property_list(in);
CHECK(x->atom);
delete x;
}
void test_parse_property_list_list() {
istringstream in("a:b");
string_tree* x = parse_property_list(in);
CHECK(!x->atom);
CHECK(x->left->atom);
CHECK_EQ(x->left->value, "a");
CHECK(!x->right->atom);
CHECK(x->right->left->atom);
CHECK_EQ(x->right->left->value, "b");
CHECK(x->right->right == NULL);
delete x;
}
3663 - fix a refcounting bug: '(type)' != 'type' This was a large commit, and most of it is a follow-up to commit 3309, undoing what is probably the final ill-considered optimization I added to s-expressions in Mu: I was always representing (a b c) as (a b . c), etc. That is now gone. Why did I need to take it out? The key problem was the error silently ignored in layer 30. That was causing size_of("(type)") to silently return garbage rather than loudly complain (assuming 'type' was a simple type). But to take it out I had to modify types_strictly_match (layer 21) to actually strictly match and not just do a prefix match. In the process of removing the prefix match, I had to make extracting recipe types from recipe headers more robust. So far it only matched the first element of each ingredient's type; these matched: (recipe address:number -> address:number) (recipe address -> address) I didn't notice because the dotted notation optimization was actually representing this as: (recipe address:number -> address number) --- One final little thing in this commit: I added an alias for 'assert' called 'assert_for_now', to indicate that I'm not sure something's really an invariant, that it might be triggered by (invalid) user programs, and so require more thought on error handling down the road. But this may well be an ill-posed distinction. It may be overwhelmingly uneconomic to continually distinguish between model invariants and error states for input. I'm starting to grow sympathetic to Google Analytics's recent approach of just banning assertions altogether. We'll see..
2016-11-11 05:39:02 +00:00
:(code)
type_tree* new_type_tree(const string_tree* properties) {
if (!properties) return NULL;
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
if (properties->atom) {
const string& type_name = properties->value;
int value = 0;
if (contains_key(Type_ordinal, type_name))
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
value = get(Type_ordinal, type_name);
2018-05-12 14:55:59 +00:00
else if (is_integer(type_name)) // sometimes types will contain literal integers, like for the size of an array
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
value = 0;
else if (properties->value == "->") // used in recipe types
value = 0;
else
value = -1; // should never happen; will trigger errors later
return new type_tree(type_name, value);
}
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
return new type_tree(new_type_tree(properties->left),
new_type_tree(properties->right));
}
//: avoid memory leaks for the type tree
2016-05-17 19:13:52 +00:00
reagent::reagent(const reagent& other) {
original_string = other.original_string;
name = other.name;
value = other.value;
initialized = other.initialized;
2016-10-20 05:10:35 +00:00
for (int i = 0; i < SIZE(other.properties); ++i) {
properties.push_back(pair<string, string_tree*>(other.properties.at(i).first, copy(other.properties.at(i).second)));
}
type = copy(other.type);
// End reagent Copy Constructor
}
2016-11-11 19:42:20 +00:00
type_tree::type_tree(const type_tree& original) {
atom = original.atom;
name = original.name;
value = original.value;
left = copy(original.left);
right = copy(original.right);
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
}
2016-11-11 19:42:20 +00:00
type_tree& type_tree::operator=(const type_tree& original) {
atom = original.atom;
name = original.name;
value = original.value;
2016-11-11 23:41:46 +00:00
if (left) delete left;
left = copy(original.left);
2016-11-11 23:41:46 +00:00
if (right) delete right;
right = copy(original.right);
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
return *this;
}
bool type_tree::operator==(const type_tree& other) const {
if (atom != other.atom) return false;
if (atom)
return name == other.name && value == other.value;
return (left == other.left || *left == *other.left)
&& (right == other.right || *right == *other.right);
}
// only constraint we care about: if a < b then !(b < a)
bool type_tree::operator<(const type_tree& other) const {
if (atom != other.atom) return atom > other.atom; // atoms before non-atoms
if (atom) return value < other.value;
// first location in one that's missing in the other makes that side 'smaller'
if (left && !other.left) return false;
if (!left && other.left) return true;
if (right && !other.right) return false;
if (!right && other.right) return true;
3663 - fix a refcounting bug: '(type)' != 'type' This was a large commit, and most of it is a follow-up to commit 3309, undoing what is probably the final ill-considered optimization I added to s-expressions in Mu: I was always representing (a b c) as (a b . c), etc. That is now gone. Why did I need to take it out? The key problem was the error silently ignored in layer 30. That was causing size_of("(type)") to silently return garbage rather than loudly complain (assuming 'type' was a simple type). But to take it out I had to modify types_strictly_match (layer 21) to actually strictly match and not just do a prefix match. In the process of removing the prefix match, I had to make extracting recipe types from recipe headers more robust. So far it only matched the first element of each ingredient's type; these matched: (recipe address:number -> address:number) (recipe address -> address) I didn't notice because the dotted notation optimization was actually representing this as: (recipe address:number -> address number) --- One final little thing in this commit: I added an alias for 'assert' called 'assert_for_now', to indicate that I'm not sure something's really an invariant, that it might be triggered by (invalid) user programs, and so require more thought on error handling down the road. But this may well be an ill-posed distinction. It may be overwhelmingly uneconomic to continually distinguish between model invariants and error states for input. I'm starting to grow sympathetic to Google Analytics's recent approach of just banning assertions altogether. We'll see..
2016-11-11 05:39:02 +00:00
// now if either pointer is unequal neither side can be null
// if one side is equal that's easy
3663 - fix a refcounting bug: '(type)' != 'type' This was a large commit, and most of it is a follow-up to commit 3309, undoing what is probably the final ill-considered optimization I added to s-expressions in Mu: I was always representing (a b c) as (a b . c), etc. That is now gone. Why did I need to take it out? The key problem was the error silently ignored in layer 30. That was causing size_of("(type)") to silently return garbage rather than loudly complain (assuming 'type' was a simple type). But to take it out I had to modify types_strictly_match (layer 21) to actually strictly match and not just do a prefix match. In the process of removing the prefix match, I had to make extracting recipe types from recipe headers more robust. So far it only matched the first element of each ingredient's type; these matched: (recipe address:number -> address:number) (recipe address -> address) I didn't notice because the dotted notation optimization was actually representing this as: (recipe address:number -> address number) --- One final little thing in this commit: I added an alias for 'assert' called 'assert_for_now', to indicate that I'm not sure something's really an invariant, that it might be triggered by (invalid) user programs, and so require more thought on error handling down the road. But this may well be an ill-posed distinction. It may be overwhelmingly uneconomic to continually distinguish between model invariants and error states for input. I'm starting to grow sympathetic to Google Analytics's recent approach of just banning assertions altogether. We'll see..
2016-11-11 05:39:02 +00:00
if (left == other.left || *left == *other.left) return right && *right < *other.right;
if (right == other.right || *right == *other.right) return left && *left < *other.left;
// if the two sides criss-cross, pick the side with the smaller lhs
if ((left == other.right || *left == *other.right)
&& (right == other.left || *right == *other.left))
return *left < *other.left;
// now the hard case: both sides are not equal
// make sure we stay consistent between (a < b) and (b < a)
// just return the side with the smallest of the 4 branches
if (*left < *other.left && *left < *other.right) return true;
if (*right < *other.left && *right < *other.right) return true;
return false;
}
:(before "End Unit Tests")
// These unit tests don't always use valid types.
void test_compare_atom_types() {
reagent a("a:address"), b("b:boolean");
CHECK(*a.type < *b.type);
CHECK(!(*b.type < *a.type));
}
void test_compare_equal_atom_types() {
reagent a("a:address"), b("b:address");
CHECK(!(*a.type < *b.type));
CHECK(!(*b.type < *a.type));
}
void test_compare_atom_with_non_atom() {
reagent a("a:address:number"), b("b:boolean");
CHECK(!(*a.type < *b.type));
CHECK(*b.type < *a.type);
}
void test_compare_lists_with_identical_structure() {
reagent a("a:address:address"), b("b:address:boolean");
CHECK(*a.type < *b.type);
CHECK(!(*b.type < *a.type));
}
void test_compare_identical_lists() {
reagent a("a:address:boolean"), b("b:address:boolean");
CHECK(!(*a.type < *b.type));
CHECK(!(*b.type < *a.type));
}
void test_compare_list_with_extra_element() {
reagent a("a:address:address"), b("b:address:address:number");
CHECK(*a.type < *b.type);
CHECK(!(*b.type < *a.type));
}
void test_compare_list_with_smaller_left_but_larger_right() {
reagent a("a:number:character"), b("b:boolean:array");
CHECK(*a.type < *b.type);
CHECK(!(*b.type < *a.type));
}
void test_compare_list_with_smaller_left_but_larger_right_identical_types() {
reagent a("a:address:boolean"), b("b:boolean:address");
CHECK(*a.type < *b.type);
CHECK(!(*b.type < *a.type));
}
:(code)
2016-11-11 19:42:20 +00:00
string_tree::string_tree(const string_tree& original) {
atom = original.atom;
value = original.value;
left = copy(original.left);
right = copy(original.right);
}
2016-05-17 19:13:52 +00:00
reagent& reagent::operator=(const reagent& other) {
original_string = other.original_string;
2016-10-20 05:10:35 +00:00
for (int i = 0; i < SIZE(properties); ++i)
if (properties.at(i).second) delete properties.at(i).second;
properties.clear();
2016-10-20 05:10:35 +00:00
for (int i = 0; i < SIZE(other.properties); ++i)
properties.push_back(pair<string, string_tree*>(other.properties.at(i).first, copy(other.properties.at(i).second)));
2016-05-17 19:13:52 +00:00
name = other.name;
value = other.value;
initialized = other.initialized;
if (type) delete type;
type = copy(other.type);
// End reagent Copy Operator
return *this;
}
reagent::~reagent() {
2015-11-28 09:28:39 +00:00
clear();
}
void reagent::clear() {
2016-10-20 05:10:35 +00:00
for (int i = 0; i < SIZE(properties); ++i) {
2015-11-28 09:28:39 +00:00
if (properties.at(i).second) {
delete properties.at(i).second;
properties.at(i).second = NULL;
}
}
delete type;
2015-11-28 09:28:39 +00:00
type = NULL;
}
type_tree::~type_tree() {
delete left;
delete right;
}
string_tree::~string_tree() {
delete left;
delete right;
}
2016-08-31 20:15:29 +00:00
void append(type_tree*& base, type_tree* extra) {
if (!base) {
base = extra;
return;
}
type_tree* curr = base;
while (curr->right) curr = curr->right;
curr->right = extra;
}
void append(string_tree*& base, string_tree* extra) {
if (!base) {
base = extra;
return;
}
string_tree* curr = base;
while (curr->right) curr = curr->right;
curr->right = extra;
}
string slurp_until(istream& in, char delim) {
ostringstream out;
char c;
while (in >> c) {
if (c == delim) {
// drop the delim
break;
}
out << c;
}
return out.str();
}
2016-05-06 07:46:39 +00:00
bool has_property(const reagent& x, const string& name) {
2016-10-20 05:10:35 +00:00
for (int i = 0; i < SIZE(x.properties); ++i) {
if (x.properties.at(i).first == name) return true;
}
return false;
}
string_tree* property(const reagent& r, const string& name) {
2016-10-20 05:10:35 +00:00
for (int p = 0; p != SIZE(r.properties); ++p) {
if (r.properties.at(p).first == name)
return r.properties.at(p).second;
}
return NULL;
}
string_tree* copy(const string_tree* x) {
if (x == NULL) return NULL;
return new string_tree(*x);
}
type_tree* copy(const type_tree* x) {
if (x == NULL) return NULL;
return new type_tree(*x);
}
:(before "End Globals")
2016-10-22 23:56:07 +00:00
extern const string Ignore(","); // commas are ignored in Mu except within [] strings
:(code)
void skip_whitespace_but_not_newline(istream& in) {
while (true) {
if (!has_data(in)) break;
else if (in.peek() == '\n') break;
else if (isspace(in.peek())) in.get();
else if (Ignore.find(in.peek()) != string::npos) in.get();
else break;
}
}
void dump_memory() {
2016-10-20 05:10:35 +00:00
for (map<int, double>::iterator p = Memory.begin(); p != Memory.end(); ++p) {
2018-07-26 05:23:14 +00:00
cerr << p->first << ": " << no_scientific(p->second) << '\n';
}
}
//:: Helpers for converting various values to string
//: Use to_string() in trace(), and try to keep it stable from run to run.
//: Use debug_string() while debugging, and throw everything into it.
//: Use inspect() only for emitting a canonical format that can be parsed back
//: into the value.
string to_string(const recipe& r) {
ostringstream out;
out << "recipe " << r.name << " [\n";
2016-10-20 05:10:35 +00:00
for (int i = 0; i < SIZE(r.steps); ++i)
out << " " << to_string(r.steps.at(i)) << '\n';
out << "]\n";
return out.str();
}
string to_original_string(const recipe& r) {
ostringstream out;
out << "recipe " << r.name << " [\n";
2017-05-26 23:43:18 +00:00
for (int i = 0; i < SIZE(r.steps); ++i)
out << " " << to_original_string(r.steps.at(i)) << '\n';
out << "]\n";
return out.str();
}
string debug_string(const recipe& x) {
ostringstream out;
out << "- recipe " << x.name << '\n';
// Begin debug_string(recipe x)
2016-10-20 05:10:35 +00:00
for (int index = 0; index < SIZE(x.steps); ++index) {
const instruction& inst = x.steps.at(index);
out << "inst: " << to_string(inst) << '\n';
out << " ingredients\n";
2016-10-20 05:10:35 +00:00
for (int i = 0; i < SIZE(inst.ingredients); ++i)
out << " " << debug_string(inst.ingredients.at(i)) << '\n';
out << " products\n";
2016-10-20 05:10:35 +00:00
for (int i = 0; i < SIZE(inst.products); ++i)
out << " " << debug_string(inst.products.at(i)) << '\n';
}
return out.str();
}
string to_original_string(const instruction& inst) {
if (inst.is_label) return inst.label;
if (!inst.original_string.empty()) return inst.original_string;
ostringstream out;
2016-10-20 05:10:35 +00:00
for (int i = 0; i < SIZE(inst.products); ++i) {
if (i > 0) out << ", ";
out << inst.products.at(i).original_string;
}
if (!inst.products.empty()) out << " <- ";
2017-04-04 07:10:47 +00:00
out << inst.name;
if (!inst.ingredients.empty()) out << ' ';
2016-10-20 05:10:35 +00:00
for (int i = 0; i < SIZE(inst.ingredients); ++i) {
if (i > 0) out << ", ";
out << inst.ingredients.at(i).original_string;
}
return out.str();
}
string to_string(const instruction& inst) {
if (inst.is_label) return inst.label;
ostringstream out;
2016-10-20 05:10:35 +00:00
for (int i = 0; i < SIZE(inst.products); ++i) {
if (i > 0) out << ", ";
out << to_string(inst.products.at(i));
}
if (!inst.products.empty()) out << " <- ";
out << inst.name << ' ';
2016-10-20 05:10:35 +00:00
for (int i = 0; i < SIZE(inst.ingredients); ++i) {
if (i > 0) out << ", ";
out << to_string(inst.ingredients.at(i));
}
return out.str();
}
string to_string(const reagent& r) {
if (is_dummy(r)) return "_";
ostringstream out;
out << "{";
out << r.name << ": " << names_to_string(r.type);
if (!r.properties.empty()) {
2016-10-20 05:10:35 +00:00
for (int i = 0; i < SIZE(r.properties); ++i)
out << ", \"" << r.properties.at(i).first << "\": " << to_string(r.properties.at(i).second);
2015-02-18 01:14:53 +00:00
}
out << "}";
return out.str();
}
// special name for ignoring some products
bool is_dummy(const reagent& x) {
return x.name == "_";
}
2015-02-18 01:14:53 +00:00
string debug_string(const reagent& x) {
ostringstream out;
out << x.name << ": " << x.value << ' ' << to_string(x.type) << " -- " << to_string(x);
return out.str();
}
string to_string(const string_tree* property) {
if (!property) return "()";
ostringstream out;
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
dump(property, out);
return out.str();
2015-10-27 23:34:58 +00:00
}
void dump(const string_tree* x, ostream& out) {
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
if (!x) return;
if (x->atom) {
out << '"' << x->value << '"';
return;
}
out << '(';
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
const string_tree* curr = x;
while (curr && !curr->atom) {
dump(curr->left, out);
if (curr->right) out << ' ';
curr = curr->right;
}
3663 - fix a refcounting bug: '(type)' != 'type' This was a large commit, and most of it is a follow-up to commit 3309, undoing what is probably the final ill-considered optimization I added to s-expressions in Mu: I was always representing (a b c) as (a b . c), etc. That is now gone. Why did I need to take it out? The key problem was the error silently ignored in layer 30. That was causing size_of("(type)") to silently return garbage rather than loudly complain (assuming 'type' was a simple type). But to take it out I had to modify types_strictly_match (layer 21) to actually strictly match and not just do a prefix match. In the process of removing the prefix match, I had to make extracting recipe types from recipe headers more robust. So far it only matched the first element of each ingredient's type; these matched: (recipe address:number -> address:number) (recipe address -> address) I didn't notice because the dotted notation optimization was actually representing this as: (recipe address:number -> address number) --- One final little thing in this commit: I added an alias for 'assert' called 'assert_for_now', to indicate that I'm not sure something's really an invariant, that it might be triggered by (invalid) user programs, and so require more thought on error handling down the road. But this may well be an ill-posed distinction. It may be overwhelmingly uneconomic to continually distinguish between model invariants and error states for input. I'm starting to grow sympathetic to Google Analytics's recent approach of just banning assertions altogether. We'll see..
2016-11-11 05:39:02 +00:00
// check for dotted list; should never happen
if (curr) {
out << ". ";
dump(curr, out);
}
out << ')';
}
string to_string(const type_tree* type) {
if (type == NULL) return "()";
ostringstream out;
dump(type, out);
return out.str();
2015-10-27 23:34:58 +00:00
}
void dump(const type_tree* x, ostream& out) {
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
if (!x) return;
if (x->atom) {
2016-02-22 23:44:27 +00:00
dump(x->value, out);
return;
}
out << '(';
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
const type_tree* curr = x;
while (curr && !curr->atom) {
dump(curr->left, out);
if (curr->right) out << ' ';
curr = curr->right;
}
3663 - fix a refcounting bug: '(type)' != 'type' This was a large commit, and most of it is a follow-up to commit 3309, undoing what is probably the final ill-considered optimization I added to s-expressions in Mu: I was always representing (a b c) as (a b . c), etc. That is now gone. Why did I need to take it out? The key problem was the error silently ignored in layer 30. That was causing size_of("(type)") to silently return garbage rather than loudly complain (assuming 'type' was a simple type). But to take it out I had to modify types_strictly_match (layer 21) to actually strictly match and not just do a prefix match. In the process of removing the prefix match, I had to make extracting recipe types from recipe headers more robust. So far it only matched the first element of each ingredient's type; these matched: (recipe address:number -> address:number) (recipe address -> address) I didn't notice because the dotted notation optimization was actually representing this as: (recipe address:number -> address number) --- One final little thing in this commit: I added an alias for 'assert' called 'assert_for_now', to indicate that I'm not sure something's really an invariant, that it might be triggered by (invalid) user programs, and so require more thought on error handling down the road. But this may well be an ill-posed distinction. It may be overwhelmingly uneconomic to continually distinguish between model invariants and error states for input. I'm starting to grow sympathetic to Google Analytics's recent approach of just banning assertions altogether. We'll see..
2016-11-11 05:39:02 +00:00
// check for dotted list; should never happen
if (curr) {
out << ". ";
dump(curr, out);
}
out << ')';
2015-10-26 08:01:04 +00:00
}
2016-02-19 22:06:54 +00:00
void dump(type_ordinal type, ostream& out) {
if (contains_key(Type, type))
2016-02-20 16:39:10 +00:00
out << get(Type, type).name;
else
out << "?" << type;
}
string names_to_string(const type_tree* type) {
if (type == NULL) return "()"; // should never happen
ostringstream out;
dump_names(type, out);
return out.str();
}
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
void dump_names(const type_tree* x, ostream& out) {
if (!x) return;
if (x->atom) {
out << '"' << x->name << '"';
return;
}
out << '(';
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
const type_tree* curr = x;
while (curr && !curr->atom) {
dump_names(curr->left, out);
if (curr->right) out << ' ';
curr = curr->right;
}
3663 - fix a refcounting bug: '(type)' != 'type' This was a large commit, and most of it is a follow-up to commit 3309, undoing what is probably the final ill-considered optimization I added to s-expressions in Mu: I was always representing (a b c) as (a b . c), etc. That is now gone. Why did I need to take it out? The key problem was the error silently ignored in layer 30. That was causing size_of("(type)") to silently return garbage rather than loudly complain (assuming 'type' was a simple type). But to take it out I had to modify types_strictly_match (layer 21) to actually strictly match and not just do a prefix match. In the process of removing the prefix match, I had to make extracting recipe types from recipe headers more robust. So far it only matched the first element of each ingredient's type; these matched: (recipe address:number -> address:number) (recipe address -> address) I didn't notice because the dotted notation optimization was actually representing this as: (recipe address:number -> address number) --- One final little thing in this commit: I added an alias for 'assert' called 'assert_for_now', to indicate that I'm not sure something's really an invariant, that it might be triggered by (invalid) user programs, and so require more thought on error handling down the road. But this may well be an ill-posed distinction. It may be overwhelmingly uneconomic to continually distinguish between model invariants and error states for input. I'm starting to grow sympathetic to Google Analytics's recent approach of just banning assertions altogether. We'll see..
2016-11-11 05:39:02 +00:00
// check for dotted list; should never happen
if (curr) {
out << ". ";
dump_names(curr, out);
}
out << ')';
}
string names_to_string_without_quotes(const type_tree* type) {
if (type == NULL) return "()";
ostringstream out;
dump_names_without_quotes(type, out);
return out.str();
}
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
void dump_names_without_quotes(const type_tree* x, ostream& out) {
if (!x) return;
if (x->atom) {
out << x->name;
return;
}
out << '(';
3309 Rip out everything to fix one failing unit test (commit 3290; type abbreviations). This commit does several things at once that I couldn't come up with a clean way to unpack: A. It moves to a new representation for type trees without changing the actual definition of the `type_tree` struct. B. It adds unit tests for our type metadata precomputation, so that errors there show up early and in a simpler setting rather than dying when we try to load Mu code. C. It fixes a bug, guarding against infinite loops when precomputing metadata for recursive shape-shifting containers. To do this it uses a dumb way of comparing type_trees, comparing their string representations instead. That is likely incredibly inefficient. Perhaps due to C, this commit has made Mu incredibly slow. Running all tests for the core and the edit/ app now takes 6.5 minutes rather than 3.5 minutes. == more notes and details I've been struggling for the past week now to back out of a bad design decision, a premature optimization from the early days: storing atoms directly in the 'value' slot of a cons cell rather than creating a special 'atom' cons cell and storing it on the 'left' slot. In other words, if a cons cell looks like this: o / | \ left val right ..then the type_tree (a b c) used to look like this (before this commit): o | \ a o | \ b o | \ c null ..rather than like this 'classic' approach to s-expressions which never mixes val and right (which is what we now have): o / \ o o | / \ a o o | / \ b o null | c The old approach made several operations more complicated, most recently the act of replacing a (possibly atom/leaf) sub-tree with another. That was the final straw that got me to realize the contortions I was going through to save a few type_tree nodes (cons cells). Switching to the new approach was hard partly because I've been using the old approach for so long and type_tree manipulations had pervaded everything. Another issue I ran into was the realization that my layers were not cleanly separated. Key parts of early layers (precomputing type metadata) existed purely for far later ones (shape-shifting types). Layers I got repeatedly stuck at: 1. the transform for precomputing type sizes (layer 30) 2. type-checks on merge instructions (layer 31) 3. the transform for precomputing address offsets in types (layer 36) 4. replace operations in supporting shape-shifting recipes (layer 55) After much thrashing I finally noticed that it wasn't the entirety of these layers that was giving me trouble, but just the type metadata precomputation, which had bugs that weren't manifesting until 30 layers later. Or, worse, when loading .mu files before any tests had had a chance to run. A common failure mode was running into types at run time that I hadn't precomputed metadata for at transform time. Digging into these bugs got me to realize that what I had before wasn't really very good, but a half-assed heuristic approach that did a whole lot of extra work precomputing metadata for utterly meaningless types like `((address number) 3)` which just happened to be part of a larger type like `(array (address number) 3)`. So, I redid it all. I switched the representation of types (because the old representation made unit tests difficult to retrofit) and added unit tests to the metadata precomputation. I also made layer 30 only do the minimal metadata precomputation it needs for the concepts introduced until then. In the process, I also made the precomputation more correct than before, and added hooks in the right place so that I could augment the logic when I introduced shape-shifting containers. == lessons learned There's several levels of hygiene when it comes to layers: 1. Every layer introduces precisely what it needs and in the simplest way possible. If I was building an app until just that layer, nothing would seem over-engineered. 2. Some layers are fore-shadowing features in future layers. Sometimes this is ok. For example, layer 10 foreshadows containers and arrays and so on without actually supporting them. That is a net win because it lets me lay out the core of Mu's data structures out in one place. But if the fore-shadowing gets too complex things get nasty. Not least because it can be hard to write unit tests for features before you provide the plumbing to visualize and manipulate them. 3. A layer is introducing features that are tested only in later layers. 4. A layer is introducing features with tests that are invalidated in later layers. (This I knew from early on to be an obviously horrendous idea.) Summary: avoid Level 2 (foreshadowing layers) as much as possible. Tolerate it indefinitely for small things where the code stays simple over time, but become strict again when things start to get more complex. Level 3 is mostly a net lose, but sometimes it can be expedient (a real case of the usually grossly over-applied term "technical debt"), and it's better than the conventional baseline of no layers and no scenarios. Just clean it up as soon as possible. Definitely avoid layer 4 at any time. == minor lessons Avoid unit tests for trivial things, write scenarios in context as much as possible. But within those margins unit tests are fine. Just introduce them before any scenarios (commit 3297). Reorganizing layers can be easy. Just merge layers for starters! Punt on resplitting them in some new way until you've gotten them to work. This is the wisdom of Refactoring: small steps. What made it hard was not wanting to merge *everything* between layer 30 and 55. The eventual insight was realizing I just need to move those two full-strength transforms and nothing else.
2016-09-10 01:32:52 +00:00
const type_tree* curr = x;
while (curr && !curr->atom) {
dump_names_without_quotes(curr->left, out);
if (curr->right) out << ' ';
curr = curr->right;
}
3663 - fix a refcounting bug: '(type)' != 'type' This was a large commit, and most of it is a follow-up to commit 3309, undoing what is probably the final ill-considered optimization I added to s-expressions in Mu: I was always representing (a b c) as (a b . c), etc. That is now gone. Why did I need to take it out? The key problem was the error silently ignored in layer 30. That was causing size_of("(type)") to silently return garbage rather than loudly complain (assuming 'type' was a simple type). But to take it out I had to modify types_strictly_match (layer 21) to actually strictly match and not just do a prefix match. In the process of removing the prefix match, I had to make extracting recipe types from recipe headers more robust. So far it only matched the first element of each ingredient's type; these matched: (recipe address:number -> address:number) (recipe address -> address) I didn't notice because the dotted notation optimization was actually representing this as: (recipe address:number -> address number) --- One final little thing in this commit: I added an alias for 'assert' called 'assert_for_now', to indicate that I'm not sure something's really an invariant, that it might be triggered by (invalid) user programs, and so require more thought on error handling down the road. But this may well be an ill-posed distinction. It may be overwhelmingly uneconomic to continually distinguish between model invariants and error states for input. I'm starting to grow sympathetic to Google Analytics's recent approach of just banning assertions altogether. We'll see..
2016-11-11 05:39:02 +00:00
// check for dotted list; should never happen
if (curr) {
out << ". ";
dump_names_without_quotes(curr, out);
}
out << ')';
}
2017-12-22 06:20:53 +00:00
bool is_integer(const string& s) {
return s.find_first_not_of("0123456789-") == string::npos // no other characters
&& s.find_first_of("0123456789") != string::npos // at least one digit
&& s.find('-', 1) == string::npos; // '-' only at first position
}
int to_integer(string n) {
char* end = NULL;
// safe because string.c_str() is guaranteed to be null-terminated
int result = strtoll(n.c_str(), &end, /*any base*/0);
if (*end != '\0') cerr << "tried to convert " << n << " to number\n";
assert(*end == '\0');
return result;
}
void test_is_integer() {
CHECK(is_integer("1234"));
CHECK(is_integer("-1"));
CHECK(!is_integer("234.0"));
CHECK(is_integer("-567"));
CHECK(!is_integer("89-0"));
CHECK(!is_integer("-"));
CHECK(!is_integer("1e3")); // not supported
}
2016-02-20 01:01:09 +00:00
//: helper to print numbers without excessive precision
:(before "End Types")
struct no_scientific {
double x;
explicit no_scientific(double y) :x(y) {}
};
:(code)
ostream& operator<<(ostream& os, no_scientific x) {
if (!isfinite(x.x)) {
// Infinity or NaN
os << x.x;
return os;
}
ostringstream tmp;
// more accurate, but too slow
//? tmp.precision(308); // for 64-bit numbers
tmp << std::fixed << x.x;
os << trim_floating_point(tmp.str());
return os;
}
string trim_floating_point(const string& in) {
2015-09-15 07:38:00 +00:00
if (in.empty()) return "";
2016-09-02 21:39:33 +00:00
if (in.find('.') == string::npos) return in;
int length = SIZE(in);
while (length > 1) {
if (in.at(length-1) != '0') break;
--length;
}
if (in.at(length-1) == '.') --length;
2016-09-02 21:39:33 +00:00
if (length == 0) return "0";
return in.substr(0, length);
}
void test_trim_floating_point() {
2016-09-02 21:38:49 +00:00
CHECK_EQ(trim_floating_point(""), "");
2016-09-02 21:39:33 +00:00
CHECK_EQ(trim_floating_point(".0"), "0");
2016-09-02 21:38:49 +00:00
CHECK_EQ(trim_floating_point("1.5000"), "1.5");
CHECK_EQ(trim_floating_point("1.000001"), "1.000001");
CHECK_EQ(trim_floating_point("23.000000"), "23");
CHECK_EQ(trim_floating_point("23.0"), "23");
CHECK_EQ(trim_floating_point("23."), "23");
CHECK_EQ(trim_floating_point("23"), "23");
2016-09-02 21:39:33 +00:00
CHECK_EQ(trim_floating_point("230"), "230");
2016-09-02 21:38:49 +00:00
CHECK_EQ(trim_floating_point("3.000000"), "3");
CHECK_EQ(trim_floating_point("3.0"), "3");
CHECK_EQ(trim_floating_point("3."), "3");
CHECK_EQ(trim_floating_point("3"), "3");
}
2015-04-03 17:26:15 +00:00
:(before "End Includes")
2018-07-27 00:22:23 +00:00
#include <map>
using std::map;
2016-06-02 17:40:06 +00:00
#include <utility>
2015-05-22 01:10:17 +00:00
using std::pair;
2016-06-02 17:40:06 +00:00
#include <math.h>