2273 - start expanding the type system

Current plan: parsing {x: foo, y: bar} syntax for reagents parsing s-expr syntax for properties supporting reverse instructions (<-) parsing s-expr syntax for recipe headers (recipe number number -> number) static dispatch generic functions type-checking higher-order functions type of delimited continuations? need more type information First step is done, and the second partially so.
2015-10-25 11:55:35 -07:00 · 2015-10-25 11:55:35 -07:00 · a796831f3e
parent 61286c8d69
commit a796831f3e
3 changed files with 118 additions and 7 deletions
--- a/011load.cc
+++ b/011load.cc
@ -207,12 +207,6 @@ void skip_comment(istream& in) {
  }
 }

-void skip_comma(istream& in) {
-  skip_whitespace(in);
-  if (!in.eof() && in.peek() == ',') in.get();
-  skip_whitespace(in);
-}
-
 //: Warn if a recipe gets redefined, because large codebases can accidentally
 //: step on their own toes. But there'll be many occasions later where
 //: we'll want to disable the warnings.
--- a/013literal_string.cc
+++ b/013literal_string.cc
@ -109,7 +109,7 @@ if (s.at(0) == '[') {
  assert(*s.rbegin() == ']');
  // delete [] delimiters
  s.erase(0, 1);
-  s.erase(SIZE(s)-1);
+  strip_last(s);
  name = s;
  types.push_back(0);
  properties.push_back(pair<string, vector<string> >(name, vector<string>()));
@ -145,6 +145,10 @@ size_t replace(string& str, const string& from, const string& to, size_t n) {
  return result;
 }

+void strip_last(string& s) {
+  if (!s.empty()) s.erase(SIZE(s)-1);
+}
+
 :(scenario string_literal_nested)
 recipe main [
  1:address:array:character <- copy [abc [def]]
--- a/054dilated_reagent.cc
+++ b/054dilated_reagent.cc
@ -0,0 +1,113 @@
+//: An alternative syntax for reagents that permits whitespace in properties,
+//: grouped by brackets.
+
+:(scenarios load)
+:(scenario dilated_reagent)
+recipe main [
+  {1: number, foo: bar} <- copy 34
+]
+parse:   product: {name: "1", properties: ["1": "number", "foo": "bar"]}
+
+//: First augment next_word to group balanced brackets together.
+
+:(after "string next_word(istream& in)")
+  if (in.peek() == '(')
+    return slurp_balanced_bracket(in);
+  // curlies are like parens, but don't mess up labels
+  if (start_of_dilated_reagent(in))
+    return slurp_balanced_bracket(in);
+
+:(code)
+// A curly is considered a label if it's the last thing on a line. Dilated
+// reagents should remain all on one line.
+//
+// Side-effect: This might delete some whitespace after an initial '{'.
+bool start_of_dilated_reagent(istream& in) {
+  if (in.peek() != '{') return false;
+  in.get();  // slurp '{'
+  skip_whitespace(in);
+  if (in.peek() == '\n') {
+    in.putback('{');
+    return false;
+  }
+  in.putback('{');
+  return true;
+}
+
+// Assume the first letter is an open bracket, and read everything until the
+// matching close bracket.
+// We balance {} () and []. And we skip one character after '\'.
+string slurp_balanced_bracket(istream& in) {
+  ostringstream result;
+  char c;
+  list<char> open_brackets;
+  while (in >> c) {
+    if (c == '\\') {
+      // always silently skip the next character
+      result << c;
+      if (!(in >> c)) break;
+      result << c;
+      continue;
+    }
+    if (c == '(') open_brackets.push_back(c);
+    if (c == ')') {
+      assert(open_brackets.back() == '(');
+      open_brackets.pop_back();
+    }
+    if (c == '[') open_brackets.push_back(c);
+    if (c == ']') {
+      assert(open_brackets.back() == '[');
+      open_brackets.pop_back();
+    }
+    if (c == '{') open_brackets.push_back(c);
+    if (c == '}') {
+      assert(open_brackets.back() == '{');
+      open_brackets.pop_back();
+    }
+    result << c;
+    if (open_brackets.empty()) break;
+  }
+  return result.str();
+}
+
+:(after "Parsing reagent(string s)")
+if (s.at(0) == '{') {
+  istringstream in(s);
+  in >> std::noskipws;
+  in.get();  // skip '{'
+  while (!in.eof()) {
+    string key = next_dilated_word(in);
+    string value = next_dilated_word(in);
+    vector<string> values;
+    values.push_back(value);
+    properties.push_back(pair<string, vector<string> >(key, values));
+  }
+  // structures for the first row of properties
+  name = properties.at(0).first;
+  string type = properties.at(0).second.at(0);
+  if (Type_ordinal.find(type) == Type_ordinal.end()) {
+      // this type can't be an integer
+    Type_ordinal[type] = Next_type_ordinal++;
+  }
+  types.push_back(Type_ordinal[type]);
+  return;
+}
+
+:(code)
+string next_dilated_word(istream& in) {
+  while (in.peek() == ',') in.get();
+  string result = next_word(in);
+  while (true) {
+    if (result.empty())
+      return result;
+    else if (*result.rbegin() == ':')
+      strip_last(result);
+    // if the word doesn't start with a bracket, next_word() was from previous
+    // layers when reading it, and therefore oblivious about brackets
+    else if (*result.begin() != '{' && *result.rbegin() == '}')
+      strip_last(result);
+    else
+      break;
+  }
+  return result;
+}