236 lines
6.8 KiB
C++
236 lines
6.8 KiB
C++
//: Allow instructions to mention literals directly.
|
||
//:
|
||
//: This layer will transparently move them to the global segment (assumed to
|
||
//: always be the second segment).
|
||
|
||
:(scenario transform_literal_string)
|
||
== code
|
||
b8/copy "test"/imm32
|
||
== data # need to manually create this for now
|
||
+transform: -- move literal strings to data segment
|
||
+transform: adding global variable '__subx_global_1' containing "test"
|
||
+transform: instruction after transform: 'b8 __subx_global_1'
|
||
|
||
//: We don't rely on any transforms running in previous layers, but this layer
|
||
//: knows about labels and global variables and will emit them for previous
|
||
//: layers to transform.
|
||
:(after "Begin Transforms")
|
||
// Begin Level-3 Transforms
|
||
Transform.push_back(transform_literal_strings);
|
||
// End Level-3 Transforms
|
||
|
||
:(before "End Globals")
|
||
int Next_auto_global = 1;
|
||
:(code)
|
||
void transform_literal_strings(program& p) {
|
||
trace(99, "transform") << "-- move literal strings to data segment" << end();
|
||
if (p.segments.empty()) return;
|
||
segment& code = p.segments.at(0);
|
||
segment data;
|
||
for (int i = 0; i < SIZE(code.lines); ++i) {
|
||
line& inst = code.lines.at(i);
|
||
for (int j = 0; j < SIZE(inst.words); ++j) {
|
||
word& curr = inst.words.at(j);
|
||
if (curr.data.at(0) != '"') continue;
|
||
ostringstream global_name;
|
||
global_name << "__subx_global_" << Next_auto_global;
|
||
++Next_auto_global;
|
||
add_global_to_data_segment(global_name.str(), curr, data);
|
||
curr.data = global_name.str();
|
||
}
|
||
trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
|
||
}
|
||
if (data.lines.empty()) return;
|
||
if (SIZE(p.segments) < 2) {
|
||
p.segments.resize(2);
|
||
p.segments.at(1).lines.swap(data.lines);
|
||
}
|
||
vector<line>& existing_data = p.segments.at(1).lines;
|
||
existing_data.insert(existing_data.end(), data.lines.begin(), data.lines.end());
|
||
}
|
||
|
||
void add_global_to_data_segment(const string& name, const word& value, segment& data) {
|
||
trace(99, "transform") << "adding global variable '" << name << "' containing " << value.data << end();
|
||
// emit label
|
||
data.lines.push_back(label(name));
|
||
// emit size for size-prefixed array
|
||
data.lines.push_back(line());
|
||
emit_hex_bytes(data.lines.back(), SIZE(value.data)-/*skip quotes*/2, 4/*bytes*/);
|
||
// emit data byte by byte
|
||
data.lines.push_back(line());
|
||
line& curr = data.lines.back();
|
||
for (int i = /*skip start quote*/1; i < SIZE(value.data)-/*skip end quote*/1; ++i) {
|
||
char c = value.data.at(i);
|
||
curr.words.push_back(word());
|
||
curr.words.back().data = hex_byte_to_string(c);
|
||
curr.words.back().metadata.push_back(string(1, c));
|
||
}
|
||
}
|
||
|
||
//: Within strings, whitespace is significant. So we need to redo our instruction
|
||
//: parsing.
|
||
|
||
:(scenarios parse_instruction_character_by_character)
|
||
:(scenario instruction_with_string_literal)
|
||
a "abc def" z # two spaces inside string
|
||
+parse2: word: a
|
||
+parse2: word: "abc def"
|
||
+parse2: word: z
|
||
# no other words
|
||
$parse2: 3
|
||
|
||
:(before "End Line Parsing Special-cases(line_data -> l)")
|
||
if (line_data.find('"') != string::npos) { // can cause false-positives, but we can handle them
|
||
parse_instruction_character_by_character(line_data, l);
|
||
continue;
|
||
}
|
||
|
||
:(code)
|
||
void parse_instruction_character_by_character(const string& line_data, vector<line>& out) {
|
||
if (line_data.find('\n') != string::npos && line_data.find('\n') != line_data.size()-1) {
|
||
raise << "parse_instruction_character_by_character: should receive only a single line\n" << end();
|
||
return;
|
||
}
|
||
// parse literals
|
||
istringstream in(line_data);
|
||
in >> std::noskipws;
|
||
line result;
|
||
// add tokens (words or strings) one by one
|
||
while (has_data(in)) {
|
||
skip_whitespace(in);
|
||
if (!has_data(in)) break;
|
||
char c = in.get();
|
||
if (c == '#') break; // comment; drop rest of line
|
||
if (c == ':') break; // line metadata; skip for now
|
||
if (c == '.') {
|
||
if (!has_data(in)) break; // comment token at end of line
|
||
if (isspace(in.peek()))
|
||
continue; // '.' followed by space is comment token; skip
|
||
}
|
||
result.words.push_back(word());
|
||
if (c == '"') {
|
||
// slurp word data
|
||
ostringstream d;
|
||
d << c;
|
||
while (has_data(in)) {
|
||
in >> c;
|
||
d << c;
|
||
if (c == '"') break;
|
||
}
|
||
result.words.back().data = d.str();
|
||
// slurp metadata
|
||
ostringstream m;
|
||
while (!isspace(in.peek()) && has_data(in)) {
|
||
in >> c;
|
||
if (c == '/') {
|
||
if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
|
||
m.str("");
|
||
}
|
||
else {
|
||
m << c;
|
||
}
|
||
}
|
||
if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
|
||
}
|
||
else {
|
||
// slurp all characters until whitespace
|
||
ostringstream w;
|
||
w << c;
|
||
while (!isspace(in.peek()) && has_data(in)) { // peek can sometimes trigger eof(), so do it first
|
||
in >> c;
|
||
w << c;
|
||
}
|
||
parse_word(w.str(), result.words.back());
|
||
}
|
||
trace(99, "parse2") << "word: " << to_string(result.words.back()) << end();
|
||
}
|
||
if (!result.words.empty())
|
||
out.push_back(result);
|
||
}
|
||
|
||
void skip_whitespace(istream& in) {
|
||
while (true) {
|
||
if (has_data(in) && isspace(in.peek())) in.get();
|
||
else break;
|
||
}
|
||
}
|
||
|
||
void skip_comment(istream& in) {
|
||
if (has_data(in) && in.peek() == '#') {
|
||
in.get();
|
||
while (has_data(in) && in.peek() != '\n') in.get();
|
||
}
|
||
}
|
||
|
||
// helper for tests
|
||
void parse_instruction_character_by_character(const string& line_data) {
|
||
vector<line> out;
|
||
parse_instruction_character_by_character(line_data, out);
|
||
}
|
||
|
||
:(scenario parse2_comment_token_in_middle)
|
||
a . z
|
||
+parse2: word: a
|
||
+parse2: word: z
|
||
-parse2: word: .
|
||
# no other words
|
||
$parse2: 2
|
||
|
||
:(scenario parse2_word_starting_with_dot)
|
||
a .b c
|
||
+parse2: word: a
|
||
+parse2: word: .b
|
||
+parse2: word: c
|
||
|
||
:(scenario parse2_comment_token_at_start)
|
||
. a b
|
||
+parse2: word: a
|
||
+parse2: word: b
|
||
-parse2: word: .
|
||
|
||
:(scenario parse2_comment_token_at_end)
|
||
a b .
|
||
+parse2: word: a
|
||
+parse2: word: b
|
||
-parse2: word: .
|
||
|
||
:(scenario parse2_word_starting_with_dot_at_start)
|
||
.a b c
|
||
+parse2: word: .a
|
||
+parse2: word: b
|
||
+parse2: word: c
|
||
|
||
:(scenario parse2_metadata)
|
||
.a b/c d
|
||
+parse2: word: .a
|
||
+parse2: word: b /c
|
||
+parse2: word: d
|
||
|
||
:(scenario parse2_string_with_metadata)
|
||
a "bc def"/disp32 g
|
||
+parse2: word: a
|
||
+parse2: word: "bc def" /disp32
|
||
+parse2: word: g
|
||
|
||
:(scenario parse2_string_with_metadata_at_end)
|
||
a "bc def"/disp32
|
||
+parse2: word: a
|
||
+parse2: word: "bc def" /disp32
|
||
|
||
:(code)
|
||
void test_parse2_string_with_metadata_at_end_of_line_without_newline() {
|
||
parse_instruction_character_by_character(
|
||
"68/push \"test\"/f" // no newline, which is how calls from parse() will look
|
||
);
|
||
CHECK_TRACE_CONTENTS(
|
||
"parse2: word: 68 /push"
|
||
"parse2: word: \"test\" /f"
|
||
);
|
||
}
|
||
|
||
//: Make sure slashes inside strings don't trigger adding stuff from inside the
|
||
//: string to metadata.
|
||
:(scenario parse2_string_containing_slashes)
|
||
a "bc/def"/disp32
|
||
+parse2: word: "bc/def" /disp32
|