2018-07-27 17:58:15 +00:00
|
|
|
//: Labels are defined by ending names with a ':'. This layer will compute
|
2018-07-27 18:10:18 +00:00
|
|
|
//: addresses for labels, and compute the offset for instructions using them.
|
2018-08-21 05:13:45 +00:00
|
|
|
//:
|
|
|
|
//: We won't check this, but our convention will be that jump targets will
|
|
|
|
//: start with a '$', while functions will not. Function names will never be
|
|
|
|
//: jumped to, and jump targets will never be called.
|
2018-07-27 17:58:15 +00:00
|
|
|
|
2018-08-09 06:28:58 +00:00
|
|
|
//: We're introducing non-number names for the first time, so it's worth
|
|
|
|
//: laying down some ground rules all transforms will follow, so things don't
|
|
|
|
//: get too confusing:
|
|
|
|
//: - if it starts with a digit, it's treated as a number. If it can't be
|
|
|
|
//: parsed as hex it will raise an error.
|
|
|
|
//: - if it starts with '-' it's treated as a number.
|
|
|
|
//: - if it starts with '0x' it's treated as a number.
|
|
|
|
//: - if it's two characters long, it can't be a name. Either it's a hex
|
|
|
|
//: byte, or it raises an error.
|
|
|
|
//: That's it. Names can start with any non-digit that isn't a dash. They can
|
|
|
|
//: be a single character long. 'a' is not a hex number, it's a variable.
|
|
|
|
//: Later layers may add more conventions partitioning the space of names. But
|
|
|
|
//: the above rules will remain inviolate.
|
2018-08-12 18:38:36 +00:00
|
|
|
void check_valid_name(const string& s) {
|
|
|
|
if (s.empty()) {
|
|
|
|
raise << "empty name!\n" << end();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (s.at(0) == '-')
|
|
|
|
raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end();
|
|
|
|
if (s.substr(0, 2) == "0x") {
|
|
|
|
raise << "'" << s << "' looks like a hex number; use a different name\n" << end();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (isdigit(s.at(0)))
|
|
|
|
raise << "'" << s << "' starts with a digit, and so can be confused with a negative number; use a different name.\n" << end();
|
|
|
|
if (SIZE(s) == 2)
|
|
|
|
raise << "'" << s << "' is two characters long which can look like raw hex bytes at a glance; use a different name\n" << end();
|
|
|
|
}
|
2018-08-09 06:28:58 +00:00
|
|
|
|
2018-07-27 17:58:15 +00:00
|
|
|
:(scenarios transform)
|
|
|
|
:(scenario map_label)
|
|
|
|
== 0x1
|
2018-08-04 06:23:39 +00:00
|
|
|
# instruction effective address operand displacement immediate
|
|
|
|
# op subop mod rm32 base index scale r32
|
|
|
|
# 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes
|
2018-07-27 17:58:15 +00:00
|
|
|
loop:
|
2018-08-04 06:23:39 +00:00
|
|
|
05 0x0d0c0b0a/imm32 # add to EAX
|
2018-07-27 19:33:08 +00:00
|
|
|
+transform: label 'loop' is at address 1
|
2018-07-27 17:58:15 +00:00
|
|
|
|
2018-08-05 04:29:58 +00:00
|
|
|
:(before "End Level-2 Transforms")
|
2018-07-27 19:33:08 +00:00
|
|
|
Transform.push_back(rewrite_labels);
|
2018-07-27 17:58:15 +00:00
|
|
|
:(code)
|
2018-07-27 19:33:08 +00:00
|
|
|
void rewrite_labels(program& p) {
|
|
|
|
trace(99, "transform") << "-- rewrite labels" << end();
|
2018-07-27 17:58:15 +00:00
|
|
|
if (p.segments.empty()) return;
|
|
|
|
segment& code = p.segments.at(0);
|
2018-08-11 18:09:22 +00:00
|
|
|
// Rewrite Labels(segment code)
|
2018-07-27 20:26:12 +00:00
|
|
|
map<string, int32_t> address; // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits
|
2018-07-27 17:58:15 +00:00
|
|
|
compute_addresses_for_labels(code, address);
|
|
|
|
if (trace_contains_errors()) return;
|
|
|
|
drop_labels(code);
|
|
|
|
if (trace_contains_errors()) return;
|
|
|
|
replace_labels_with_addresses(code, address);
|
|
|
|
}
|
|
|
|
|
2018-07-27 20:26:12 +00:00
|
|
|
void compute_addresses_for_labels(const segment& code, map<string, int32_t>& address) {
|
2018-07-27 17:58:15 +00:00
|
|
|
int current_byte = 0;
|
|
|
|
for (int i = 0; i < SIZE(code.lines); ++i) {
|
|
|
|
const line& inst = code.lines.at(i);
|
|
|
|
for (int j = 0; j < SIZE(inst.words); ++j) {
|
|
|
|
const word& curr = inst.words.at(j);
|
|
|
|
// hack: if we have any operand metadata left after previous transforms,
|
|
|
|
// deduce its size
|
|
|
|
// Maybe we should just move this transform to before instruction
|
|
|
|
// packing, and deduce the size of *all* operands. But then we'll also
|
|
|
|
// have to deal with bitfields.
|
|
|
|
if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
|
|
|
|
if (*curr.data.rbegin() == ':')
|
|
|
|
raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
|
|
|
|
current_byte += 4;
|
|
|
|
}
|
|
|
|
// automatically handle /disp8 and /imm8 here
|
|
|
|
else if (*curr.data.rbegin() != ':') {
|
|
|
|
++current_byte;
|
|
|
|
}
|
|
|
|
else {
|
2018-08-08 23:36:19 +00:00
|
|
|
string label = drop_last(curr.data);
|
2018-08-09 06:21:41 +00:00
|
|
|
// ensure labels look sufficiently different from raw hex
|
2018-08-12 18:38:36 +00:00
|
|
|
check_valid_name(label);
|
|
|
|
if (trace_contains_errors()) return;
|
2018-07-27 17:58:15 +00:00
|
|
|
if (contains_any_operand_metadata(curr))
|
2018-07-27 18:01:44 +00:00
|
|
|
raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
|
2018-07-27 18:05:54 +00:00
|
|
|
if (j > 0)
|
2018-07-27 17:58:15 +00:00
|
|
|
raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
|
2018-07-27 18:05:54 +00:00
|
|
|
put(address, label, current_byte);
|
2018-07-27 19:33:08 +00:00
|
|
|
trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
|
2018-07-27 18:05:54 +00:00
|
|
|
// no modifying current_byte; label definitions won't be in the final binary
|
2018-07-27 17:58:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void drop_labels(segment& code) {
|
|
|
|
for (int i = 0; i < SIZE(code.lines); ++i) {
|
|
|
|
line& inst = code.lines.at(i);
|
2018-07-27 20:26:12 +00:00
|
|
|
vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
|
|
|
|
inst.words.erase(new_end, inst.words.end());
|
2018-07-27 17:58:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool is_label(const word& w) {
|
|
|
|
return *w.data.rbegin() == ':';
|
|
|
|
}
|
|
|
|
|
2018-07-27 20:26:12 +00:00
|
|
|
void replace_labels_with_addresses(segment& code, const map<string, int32_t>& address) {
|
|
|
|
int32_t byte_next_instruction_starts_at = 0;
|
|
|
|
for (int i = 0; i < SIZE(code.lines); ++i) {
|
|
|
|
line& inst = code.lines.at(i);
|
|
|
|
byte_next_instruction_starts_at += num_bytes(inst);
|
|
|
|
line new_inst;
|
|
|
|
for (int j = 0; j < SIZE(inst.words); ++j) {
|
|
|
|
const word& curr = inst.words.at(j);
|
|
|
|
if (contains_key(address, curr.data)) {
|
|
|
|
int32_t offset = static_cast<int32_t>(get(address, curr.data)) - byte_next_instruction_starts_at;
|
|
|
|
if (has_metadata(curr, "disp8") || has_metadata(curr, "imm8")) {
|
|
|
|
if (offset > 0xff || offset < -0x7f)
|
|
|
|
raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 8 bits\n" << end();
|
|
|
|
else
|
|
|
|
emit_hex_bytes(new_inst, offset, 1);
|
|
|
|
}
|
|
|
|
else if (has_metadata(curr, "disp16")) {
|
|
|
|
if (offset > 0xffff || offset < -0x7fff)
|
|
|
|
raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 16 bits\n" << end();
|
|
|
|
else
|
|
|
|
emit_hex_bytes(new_inst, offset, 2);
|
|
|
|
}
|
|
|
|
else if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
|
|
|
|
emit_hex_bytes(new_inst, offset, 4);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
new_inst.words.push_back(curr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
inst.words.swap(new_inst.words);
|
|
|
|
trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Assumes all bitfields are packed.
|
|
|
|
uint32_t num_bytes(const line& inst) {
|
|
|
|
uint32_t sum = 0;
|
|
|
|
for (int i = 0; i < SIZE(inst.words); ++i) {
|
|
|
|
const word& curr = inst.words.at(i);
|
|
|
|
if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) // only multi-byte operands
|
|
|
|
sum += 4;
|
|
|
|
else
|
|
|
|
sum++;
|
|
|
|
}
|
|
|
|
return sum;
|
|
|
|
}
|
|
|
|
|
|
|
|
string data_to_string(const line& inst) {
|
|
|
|
ostringstream out;
|
|
|
|
for (int i = 0; i < SIZE(inst.words); ++i) {
|
|
|
|
if (i > 0) out << ' ';
|
|
|
|
out << inst.words.at(i).data;
|
|
|
|
}
|
|
|
|
return out.str();
|
2018-07-27 17:58:15 +00:00
|
|
|
}
|
|
|
|
|
2018-08-08 23:36:19 +00:00
|
|
|
string drop_last(const string& s) {
|
|
|
|
return string(s.begin(), --s.end());
|
|
|
|
}
|
|
|
|
|
2018-07-27 17:58:15 +00:00
|
|
|
//: Label definitions must be the first word on a line. No jumping inside
|
|
|
|
//: instructions.
|
|
|
|
//: They should also be the only word on a line.
|
|
|
|
//: However, you can absolutely have multiple labels map to the same address,
|
|
|
|
//: as long as they're on separate lines.
|
|
|
|
|
|
|
|
:(scenario multiple_labels_at)
|
|
|
|
== 0x1
|
2018-08-04 06:23:39 +00:00
|
|
|
# instruction effective address operand displacement immediate
|
|
|
|
# op subop mod rm32 base index scale r32
|
|
|
|
# 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes
|
2018-07-27 20:26:12 +00:00
|
|
|
# address 1
|
2018-08-13 05:38:54 +00:00
|
|
|
loop:
|
2018-08-13 04:04:14 +00:00
|
|
|
$loop2:
|
2018-07-27 20:26:12 +00:00
|
|
|
# address 1 (labels take up no space)
|
2018-08-04 06:23:39 +00:00
|
|
|
05 0x0d0c0b0a/imm32 # add to EAX
|
2018-07-27 20:26:12 +00:00
|
|
|
# address 6
|
2018-08-13 04:04:14 +00:00
|
|
|
eb $loop2/disp8
|
2018-07-27 20:26:12 +00:00
|
|
|
# address 8
|
2018-08-13 04:04:14 +00:00
|
|
|
eb $loop3/disp8
|
2018-08-13 23:10:41 +00:00
|
|
|
# address 0xa
|
2018-08-13 04:04:14 +00:00
|
|
|
$loop3:
|
2018-08-13 05:38:54 +00:00
|
|
|
+transform: label 'loop' is at address 1
|
2018-08-13 04:04:14 +00:00
|
|
|
+transform: label '$loop2' is at address 1
|
2018-08-13 23:10:41 +00:00
|
|
|
+transform: label '$loop3' is at address a
|
2018-07-27 20:26:12 +00:00
|
|
|
# first jump is to -7
|
|
|
|
+transform: instruction after transform: 'eb f9'
|
|
|
|
# second jump is to 0 (fall through)
|
|
|
|
+transform: instruction after transform: 'eb 00'
|
2018-08-08 23:36:19 +00:00
|
|
|
|
|
|
|
:(scenario label_too_short)
|
|
|
|
% Hide_errors = true;
|
|
|
|
== 0x1
|
|
|
|
# instruction effective address operand displacement immediate
|
|
|
|
# op subop mod rm32 base index scale r32
|
|
|
|
# 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes
|
|
|
|
xz:
|
|
|
|
05 0x0d0c0b0a/imm32 # add to EAX
|
2018-08-12 18:38:36 +00:00
|
|
|
+error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name
|
2018-08-09 06:21:41 +00:00
|
|
|
|
|
|
|
:(scenario label_hex)
|
|
|
|
% Hide_errors = true;
|
|
|
|
== 0x1
|
|
|
|
# instruction effective address operand displacement immediate
|
|
|
|
# op subop mod rm32 base index scale r32
|
|
|
|
# 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes
|
|
|
|
0xab:
|
|
|
|
05 0x0d0c0b0a/imm32 # add to EAX
|
2018-08-12 18:38:36 +00:00
|
|
|
+error: '0xab' looks like a hex number; use a different name
|
|
|
|
|
|
|
|
:(scenario label_negative_hex)
|
|
|
|
% Hide_errors = true;
|
|
|
|
== 0x1
|
|
|
|
# instruction effective address operand displacement immediate
|
|
|
|
# op subop mod rm32 base index scale r32
|
|
|
|
# 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes
|
2018-08-13 04:04:14 +00:00
|
|
|
-a: # indent to avoid looking like a trace_should_not_contain command for this scenario
|
2018-08-12 18:38:36 +00:00
|
|
|
05 0x0d0c0b0a/imm32 # add to EAX
|
|
|
|
+error: '-a' starts with '-', which can be confused with a negative number; use a different name
|