mu/038new_text.cc

289 lines
7.6 KiB
C++
Raw Normal View History

2016-09-17 07:01:45 +00:00
//: Extend 'new' to handle a unicode string literal argument or 'text'.
//: A Mu text is an address to an array of characters.
:(before "End Mu Types Initialization")
put(Type_abbreviations, "text", new_type_tree("&:@:character"));
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
:(code)
void test_new_string() {
run(
"def main [\n"
" 10:text <- new [abc def]\n"
" 20:char <- index *10:text, 5\n"
"]\n"
);
CHECK_TRACE_CONTENTS(
// number code for 'e'
"mem: storing 101 in location 20\n"
);
}
void test_new_string_handles_unicode() {
run(
"def main [\n"
" 10:text <- new [a«c]\n"
" 20:num <- length *10:text\n"
" 21:char <- index *10:text, 1\n"
"]\n"
);
CHECK_TRACE_CONTENTS(
"mem: storing 3 in location 20\n"
// unicode for '«'
"mem: storing 171 in location 21\n"
);
}
:(before "End NEW Check Special-cases")
2016-09-17 06:52:15 +00:00
if (is_literal_text(inst.ingredients.at(0))) break;
:(before "Convert 'new' To 'allocate'")
if (inst.name == "new" && !inst.ingredients.empty() && is_literal_text(inst.ingredients.at(0))) continue;
:(after "case NEW" following "Primitive Recipe Implementations")
2016-09-17 06:52:15 +00:00
if (is_literal_text(current_instruction().ingredients.at(0))) {
products.resize(1);
products.at(0).push_back(/*alloc id*/0);
2016-09-17 06:52:15 +00:00
products.at(0).push_back(new_mu_text(current_instruction().ingredients.at(0).name));
trace(Callstack_depth+1, "mem") << "new string alloc: " << products.at(0).at(0) << end();
break;
}
:(code)
2016-09-17 06:52:15 +00:00
int new_mu_text(const string& contents) {
// allocate an array just large enough for it
int string_length = unicode_length(contents);
//? Total_alloc += string_length+1;
//? ++Num_alloc;
int result = allocate(/*array length*/1 + string_length);
int curr_address = result;
++curr_address; // skip alloc id
trace(Callstack_depth+1, "mem") << "storing string length " << string_length << " in location " << curr_address << end();
2016-08-26 18:47:10 +00:00
put(Memory, curr_address, string_length);
++curr_address; // skip length
int curr = 0;
const char* raw_contents = contents.c_str();
2016-10-20 05:10:35 +00:00
for (int i = 0; i < string_length; ++i) {
uint32_t curr_character;
assert(curr < SIZE(contents));
tb_utf8_char_to_unicode(&curr_character, &raw_contents[curr]);
trace(Callstack_depth+1, "mem") << "storing string character " << curr_character << " in location " << curr_address << end();
2016-08-26 18:47:10 +00:00
put(Memory, curr_address, curr_character);
curr += tb_utf8_char_length(raw_contents[curr]);
2016-08-26 18:47:10 +00:00
++curr_address;
}
2016-10-22 23:56:07 +00:00
// Mu strings are not null-terminated in memory.
return result;
}
//: a new kind of typo
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
void test_literal_text_without_instruction() {
Hide_errors = true;
run(
"def main [\n"
" [abc]\n"
"]\n"
);
CHECK_TRACE_CONTENTS(
"error: main: instruction '[abc]' has no recipe in '[abc]'\n"
);
}
//: stash recognizes texts
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
void test_stash_text() {
run(
"def main [\n"
" 1:text <- new [abc]\n"
" stash [foo:], 1:text\n"
"]\n"
);
CHECK_TRACE_CONTENTS(
"app: foo: abc\n"
);
}
2016-11-12 07:30:16 +00:00
:(before "End inspect Special-cases(r, data)")
2016-09-17 06:52:15 +00:00
if (is_mu_text(r)) {
return read_mu_text(data.at(/*skip alloc id*/1));
}
2016-11-12 09:05:38 +00:00
:(before "End $print Special-cases")
else if (is_mu_text(current_instruction().ingredients.at(i))) {
cout << read_mu_text(ingredients.at(i).at(/*skip alloc id*/1));
2016-11-12 09:05:38 +00:00
}
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
:(code)
void test_unicode_text() {
run(
"def main [\n"
" 1:text <- new [♠]\n"
" stash [foo:], 1:text\n"
"]\n"
);
CHECK_TRACE_CONTENTS(
"app: foo: ♠\n"
);
}
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
void test_stash_space_after_text() {
run(
"def main [\n"
" 1:text <- new [abc]\n"
" stash 1:text, [foo]\n"
"]\n"
);
CHECK_TRACE_CONTENTS(
"app: abc foo\n"
);
}
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
void test_stash_text_as_array() {
run(
"def main [\n"
" 1:text <- new [abc]\n"
" stash *1:text\n"
"]\n"
);
CHECK_TRACE_CONTENTS(
"app: 3 97 98 99\n"
);
}
//: fixes way more than just stash
2016-09-17 06:52:15 +00:00
:(before "End Preprocess is_mu_text(reagent x)")
if (!canonize_type(x)) return false;
//: Allocate more to routine when initializing a literal text
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
:(code)
void test_new_text_overflow() {
Initial_memory_per_routine = 3;
run(
"def main [\n"
" 10:&:num/raw <- new number:type\n"
" 20:text/raw <- new [a]\n" // not enough room in initial page, if you take the array length into account
"]\n"
);
CHECK_TRACE_CONTENTS(
"new: routine allocated memory from 1000 to 1003\n"
"new: routine allocated memory from 1003 to 1006\n"
);
}
//: helpers
:(code)
int unicode_length(const string& s) {
const char* in = s.c_str();
int result = 0;
int curr = 0;
while (curr < SIZE(s)) { // carefully bounds-check on the string
// before accessing its raw pointer
++result;
curr += tb_utf8_char_length(in[curr]);
}
return result;
}
2016-09-17 06:52:15 +00:00
string read_mu_text(int address) {
if (address == 0) return "";
int length = get_or_insert(Memory, address+/*alloc id*/1);
if (length == 0) return "";
return read_mu_characters(address+/*alloc id*/1+/*length*/1, length);
}
string read_mu_characters(int start, int length) {
ostringstream tmp;
for (int curr = start; curr < start+length; ++curr)
tmp << to_unicode(static_cast<uint32_t>(get_or_insert(Memory, curr)));
return tmp.str();
}
2016-11-27 04:44:52 +00:00
//:: some miscellaneous helpers now that we have text
//: assert: perform sanity checks at runtime
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
void test_assert_literal() {
Hide_errors = true;
run(
"def main [\n"
" assert 0, [this is an assert in Mu]\n"
"]\n"
);
CHECK_TRACE_CONTENTS(
"error: this is an assert in Mu\n"
);
}
void test_assert() {
Hide_errors = true;
run(
"def main [\n"
" 1:text <- new [this is an assert in Mu]\n"
" assert 0, 1:text\n"
"]\n"
);
CHECK_TRACE_CONTENTS(
"error: this is an assert in Mu\n"
);
}
2016-11-27 04:44:52 +00:00
:(before "End Primitive Recipe Declarations")
ASSERT,
:(before "End Primitive Recipe Numbers")
put(Recipe_ordinal, "assert", ASSERT);
:(before "End Primitive Recipe Checks")
case ASSERT: {
if (SIZE(inst.ingredients) != 2) {
2017-05-26 23:43:18 +00:00
raise << maybe(get(Recipe, r).name) << "'assert' takes exactly two ingredients rather than '" << to_original_string(inst) << "'\n" << end();
2016-11-27 04:44:52 +00:00
break;
}
if (!is_mu_address(inst.ingredients.at(0)) && !is_mu_scalar(inst.ingredients.at(0))) {
raise << maybe(get(Recipe, r).name) << "'assert' requires a scalar or address for its first ingredient, but got '" << inst.ingredients.at(0).original_string << "'\n" << end();
2016-11-27 04:44:52 +00:00
break;
}
if (!is_literal_text(inst.ingredients.at(1)) && !is_mu_text(inst.ingredients.at(1))) {
raise << maybe(get(Recipe, r).name) << "'assert' requires a text as its second ingredient, but got '" << inst.ingredients.at(1).original_string << "'\n" << end();
break;
}
break;
}
:(before "End Primitive Recipe Implementations")
case ASSERT: {
if (!scalar_ingredient(ingredients, 0)) {
2016-11-27 04:44:52 +00:00
if (is_literal_text(current_instruction().ingredients.at(1)))
raise << current_instruction().ingredients.at(1).name << '\n' << end();
else
raise << read_mu_text(ingredients.at(1).at(/*skip alloc id*/1)) << '\n' << end();
if (!Hide_errors) exit(1);
2016-11-27 04:44:52 +00:00
}
break;
}
//: 'cheating' by using the host system
:(before "End Primitive Recipe Declarations")
_READ,
:(before "End Primitive Recipe Numbers")
put(Recipe_ordinal, "$read", _READ);
:(before "End Primitive Recipe Checks")
case _READ: {
break;
}
:(before "End Primitive Recipe Implementations")
case _READ: {
skip_whitespace(cin);
string result;
if (has_data(cin))
cin >> result;
products.resize(1);
products.at(0).push_back(new_mu_text(result));
break;
}
:(code)
void skip_whitespace(istream& in) {
while (true) {
if (!has_data(in)) break;
if (isspace(in.peek())) in.get();
else break;
}
}