2015-04-17 18:22:59 +00:00
|
|
|
//: For convenience, some instructions will take literal arrays of characters (strings).
|
2015-03-31 06:15:03 +00:00
|
|
|
//:
|
|
|
|
//: Instead of quotes, we'll use [] to delimit strings. That'll reduce the
|
2015-03-31 17:17:19 +00:00
|
|
|
//: need for escaping since we can support nested brackets. And we can also
|
|
|
|
//: imagine that 'recipe' might one day itself be defined in mu, doing its own
|
|
|
|
//: parsing.
|
2015-03-31 06:15:03 +00:00
|
|
|
|
2015-04-24 07:28:24 +00:00
|
|
|
:(scenarios load)
|
2015-04-24 17:19:03 +00:00
|
|
|
:(scenario string_literal)
|
2015-03-31 04:22:29 +00:00
|
|
|
recipe main [
|
2015-04-29 05:42:54 +00:00
|
|
|
1:address:array:character <- copy [abc def] # copy can't really take a string
|
2015-03-31 04:22:29 +00:00
|
|
|
]
|
2015-06-14 06:08:46 +00:00
|
|
|
+parse: ingredient: {name: "abc def", properties: [_: "literal-string"]}
|
2015-03-31 04:22:29 +00:00
|
|
|
|
2015-04-24 17:19:03 +00:00
|
|
|
:(scenario string_literal_with_colons)
|
2015-04-15 17:27:16 +00:00
|
|
|
recipe main [
|
2015-04-29 05:42:54 +00:00
|
|
|
1:address:array:character <- copy [abc:def/ghi]
|
2015-04-15 17:27:16 +00:00
|
|
|
]
|
2015-06-14 06:08:46 +00:00
|
|
|
+parse: ingredient: {name: "abc:def/ghi", properties: [_: "literal-string"]}
|
2015-04-15 17:27:16 +00:00
|
|
|
|
2015-03-31 04:22:29 +00:00
|
|
|
:(before "End Mu Types Initialization")
|
2015-07-04 16:40:50 +00:00
|
|
|
Type_ordinal["literal-string"] = 0;
|
2015-03-31 04:22:29 +00:00
|
|
|
|
|
|
|
:(after "string next_word(istream& in)")
|
2015-04-29 05:45:38 +00:00
|
|
|
if (in.peek() == '[') {
|
|
|
|
string result = slurp_quoted(in);
|
|
|
|
skip_whitespace(in);
|
|
|
|
skip_comment(in);
|
|
|
|
return result;
|
|
|
|
}
|
2015-03-31 04:22:29 +00:00
|
|
|
|
|
|
|
:(code)
|
|
|
|
string slurp_quoted(istream& in) {
|
|
|
|
ostringstream out;
|
2015-06-14 23:11:47 +00:00
|
|
|
assert(!in.eof()); assert(in.peek() == '['); out << static_cast<char>(in.get()); // slurp the '['
|
|
|
|
if (code_string(in, out))
|
|
|
|
slurp_quoted_comment_aware(in, out);
|
|
|
|
else
|
|
|
|
slurp_quoted_comment_oblivious(in, out);
|
|
|
|
return out.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
// A string is a code string if it contains a newline before any non-whitespace
|
|
|
|
// todo: support comments before the newline. But that gets messy.
|
|
|
|
bool code_string(istream& in, ostringstream& out) {
|
|
|
|
while (!in.eof()) {
|
|
|
|
char c = in.get();
|
|
|
|
if (!isspace(c)) {
|
|
|
|
in.putback(c);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
out << c;
|
|
|
|
if (c == '\n') {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read a regular string. Regular strings can only contain other regular
|
|
|
|
// strings.
|
2015-06-19 20:37:11 +00:00
|
|
|
void slurp_quoted_comment_oblivious(istream& in, ostringstream& out) {
|
2015-06-14 23:11:47 +00:00
|
|
|
int brace_depth = 1;
|
2015-03-31 04:22:29 +00:00
|
|
|
while (!in.eof()) {
|
|
|
|
char c = in.get();
|
2015-05-28 18:28:15 +00:00
|
|
|
if (c == '\\') {
|
2015-06-14 19:57:51 +00:00
|
|
|
out << static_cast<char>(in.get());
|
2015-05-28 18:28:15 +00:00
|
|
|
continue;
|
|
|
|
}
|
2015-03-31 04:22:29 +00:00
|
|
|
out << c;
|
2015-05-28 20:31:20 +00:00
|
|
|
if (c == '[') ++brace_depth;
|
|
|
|
if (c == ']') --brace_depth;
|
|
|
|
if (brace_depth == 0) break;
|
2015-03-31 04:22:29 +00:00
|
|
|
}
|
2015-05-30 19:34:40 +00:00
|
|
|
if (in.eof() && brace_depth > 0) {
|
2015-07-25 07:02:20 +00:00
|
|
|
raise << "unbalanced '['\n" << end();
|
2015-06-14 23:11:47 +00:00
|
|
|
out.clear();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read a code string. Code strings can contain either code or regular strings.
|
2015-06-19 20:37:11 +00:00
|
|
|
void slurp_quoted_comment_aware(istream& in, ostringstream& out) {
|
2015-06-14 23:11:47 +00:00
|
|
|
char c;
|
|
|
|
while (in >> c) {
|
2015-06-19 20:37:11 +00:00
|
|
|
if (c == '\\') {
|
|
|
|
out << static_cast<char>(in.get());
|
|
|
|
continue;
|
|
|
|
}
|
2015-06-14 23:11:47 +00:00
|
|
|
if (c == '#') {
|
|
|
|
out << c;
|
|
|
|
while (!in.eof() && in.peek() != '\n') out << static_cast<char>(in.get());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (c == '[') {
|
|
|
|
in.putback(c);
|
|
|
|
// recurse
|
|
|
|
out << slurp_quoted(in);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
out << c;
|
2015-06-19 20:37:11 +00:00
|
|
|
if (c == ']') return;
|
2015-05-30 19:34:40 +00:00
|
|
|
}
|
2015-07-25 07:02:20 +00:00
|
|
|
raise << "unbalanced '['\n" << end();
|
2015-06-19 20:37:11 +00:00
|
|
|
out.clear();
|
2015-03-31 04:22:29 +00:00
|
|
|
}
|
2015-03-31 06:15:03 +00:00
|
|
|
|
2015-07-28 23:38:37 +00:00
|
|
|
:(after "Parsing reagent(string s)")
|
|
|
|
if (s.at(0) == '[') {
|
|
|
|
assert(*s.rbegin() == ']');
|
|
|
|
// delete [] delimiters
|
|
|
|
s.erase(0, 1);
|
|
|
|
s.erase(SIZE(s)-1);
|
|
|
|
name = s;
|
|
|
|
types.push_back(0);
|
|
|
|
properties.push_back(pair<string, vector<string> >(name, vector<string>()));
|
|
|
|
properties.back().second.push_back("literal-string");
|
|
|
|
return;
|
|
|
|
}
|
2015-04-15 17:27:16 +00:00
|
|
|
|
2015-06-14 18:30:32 +00:00
|
|
|
//: Two tweaks to printing literal strings compared to other reagents:
|
|
|
|
//: a) Don't print the string twice in the representation, just put '_' in
|
|
|
|
//: the property list.
|
|
|
|
//: b) Escape newlines in the string to make it more friendly to trace().
|
|
|
|
|
2015-06-14 06:17:13 +00:00
|
|
|
:(after "string reagent::to_string()")
|
2015-08-02 05:16:09 +00:00
|
|
|
if (is_literal_string(*this))
|
2015-06-14 06:17:13 +00:00
|
|
|
return emit_literal_string(name);
|
|
|
|
|
|
|
|
:(code)
|
2015-08-02 05:16:09 +00:00
|
|
|
bool is_literal_string(const reagent& x) {
|
|
|
|
return !x.properties.at(0).second.empty() && x.properties.at(0).second.at(0) == "literal-string";
|
|
|
|
}
|
|
|
|
|
2015-06-14 06:17:13 +00:00
|
|
|
string emit_literal_string(string name) {
|
|
|
|
size_t pos = 0;
|
|
|
|
while (pos != string::npos)
|
|
|
|
pos = replace(name, "\n", "\\n", pos);
|
|
|
|
return "{name: \""+name+"\", properties: [_: \"literal-string\"]}";
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t replace(string& str, const string& from, const string& to, size_t n) {
|
|
|
|
size_t result = str.find(from, n);
|
|
|
|
if (result != string::npos)
|
|
|
|
str.replace(result, from.length(), to);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2015-04-24 17:19:03 +00:00
|
|
|
:(scenario string_literal_nested)
|
2015-03-31 17:17:19 +00:00
|
|
|
recipe main [
|
2015-04-29 05:42:54 +00:00
|
|
|
1:address:array:character <- copy [abc [def]]
|
2015-03-31 17:17:19 +00:00
|
|
|
]
|
2015-06-14 06:08:46 +00:00
|
|
|
+parse: ingredient: {name: "abc [def]", properties: [_: "literal-string"]}
|
2015-04-29 05:45:38 +00:00
|
|
|
|
2015-05-28 18:28:15 +00:00
|
|
|
:(scenario string_literal_escaped)
|
|
|
|
recipe main [
|
|
|
|
1:address:array:character <- copy [abc \[def]
|
|
|
|
]
|
2015-06-14 06:08:46 +00:00
|
|
|
+parse: ingredient: {name: "abc [def", properties: [_: "literal-string"]}
|
2015-05-28 18:28:15 +00:00
|
|
|
|
2015-06-19 20:37:11 +00:00
|
|
|
:(scenario string_literal_escaped_comment_aware)
|
|
|
|
recipe main [
|
|
|
|
1:address:array:character <- copy [
|
|
|
|
abc \\\[def]
|
|
|
|
]
|
|
|
|
+parse: ingredient: {name: "\nabc \[def", properties: [_: "literal-string"]}
|
|
|
|
|
2015-04-29 05:45:38 +00:00
|
|
|
:(scenario string_literal_and_comment)
|
|
|
|
recipe main [
|
|
|
|
1:address:array:character <- copy [abc] # comment
|
|
|
|
]
|
2015-04-30 04:49:09 +00:00
|
|
|
+parse: instruction: copy
|
2015-06-14 06:08:46 +00:00
|
|
|
+parse: ingredient: {name: "abc", properties: [_: "literal-string"]}
|
2015-05-21 19:36:59 +00:00
|
|
|
+parse: product: {name: "1", properties: ["1": "address":"array":"character"]}
|
2015-04-29 18:45:43 +00:00
|
|
|
# no other ingredients
|
2015-04-29 05:45:38 +00:00
|
|
|
$parse: 3
|
2015-06-14 17:07:00 +00:00
|
|
|
|
|
|
|
:(scenario string_literal_escapes_newlines_in_trace)
|
|
|
|
recipe main [
|
|
|
|
copy [abc
|
|
|
|
def]
|
|
|
|
]
|
|
|
|
+parse: ingredient: {name: "abc\ndef", properties: [_: "literal-string"]}
|
2015-06-14 23:11:47 +00:00
|
|
|
|
|
|
|
:(scenario string_literal_can_skip_past_comments)
|
|
|
|
recipe main [
|
|
|
|
copy [
|
|
|
|
# ']' inside comment
|
|
|
|
bar
|
|
|
|
]
|
|
|
|
]
|
|
|
|
+parse: ingredient: {name: "\n # ']' inside comment\n bar\n ", properties: [_: "literal-string"]}
|
|
|
|
|
|
|
|
:(scenario string_literal_empty)
|
|
|
|
recipe main [
|
|
|
|
copy []
|
|
|
|
]
|
|
|
|
+parse: ingredient: {name: "", properties: [_: "literal-string"]}
|