mu/032check_operand_bounds.cc

144 lines
4.3 KiB
C++
Raw Normal View History

//:: Check that the different operands of an instruction aren't too large for their bitfields.
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
void test_check_bitfield_sizes() {
Hide_errors = true;
run(
"== code 0x1\n"
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
"01/add 4/mod 3/rm32 1/r32\n" // add ECX to EBX
);
CHECK_TRACE_CONTENTS(
"error: '4/mod' too large to fit in bitfield mod\n"
);
}
:(before "End Globals")
map<string, uint32_t> Operand_bound;
:(before "End One-time Setup")
2018-10-14 06:55:07 +00:00
put_new(Operand_bound, "subop", 1<<3);
put_new(Operand_bound, "mod", 1<<2);
put_new(Operand_bound, "rm32", 1<<3);
put_new(Operand_bound, "base", 1<<3);
put_new(Operand_bound, "index", 1<<3);
put_new(Operand_bound, "scale", 1<<2);
put_new(Operand_bound, "r32", 1<<3);
put_new(Operand_bound, "disp8", 1<<8);
put_new(Operand_bound, "disp16", 1<<16);
// no bound needed for disp32
2018-10-14 06:55:07 +00:00
put_new(Operand_bound, "imm8", 1<<8);
// no bound needed for imm32
:(before "Pack Operands(segment code)")
check_operand_bounds(code);
if (trace_contains_errors()) return;
:(code)
void check_operand_bounds(const segment& code) {
trace(3, "transform") << "-- check operand bounds" << end();
for (int i = 0; i < SIZE(code.lines); ++i) {
const line& inst = code.lines.at(i);
for (int j = first_operand(inst); j < SIZE(inst.words); ++j)
check_operand_bounds(inst.words.at(j));
if (trace_contains_errors()) return; // stop at the first mal-formed instruction
}
}
void check_operand_bounds(const word& w) {
for (map<string, uint32_t>::iterator p = Operand_bound.begin(); p != Operand_bound.end(); ++p) {
if (!has_operand_metadata(w, p->first)) continue;
if (!looks_like_hex_int(w.data)) continue; // later transforms are on their own to do their own bounds checking
2018-07-27 20:26:12 +00:00
int32_t x = parse_int(w.data);
if (x >= 0) {
2019-03-18 05:57:42 +00:00
if (p->first == "disp8" || p->first == "disp16") {
if (static_cast<uint32_t>(x) >= p->second/2)
raise << "'" << w.original << "' too large to fit in signed bitfield " << p->first << '\n' << end();
}
else {
if (static_cast<uint32_t>(x) >= p->second)
raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
}
2018-07-27 20:26:12 +00:00
}
else {
// hacky? assuming bound is a power of 2
if (x < -1*static_cast<int32_t>(p->second/2))
raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
}
}
}
2019-03-18 05:57:42 +00:00
void test_check_bitfield_sizes_for_imm8() {
run(
"== code 0x1\n"
2019-03-18 05:57:42 +00:00
"c1/shift 4/subop/left 3/mod/direct 1/rm32/ECX 0xff/imm8" // shift EBX left
);
CHECK(!trace_contains_errors());
}
void test_check_bitfield_sizes_for_imm8_error() {
Hide_errors = true;
run(
"== code 0x1\n"
2019-03-18 05:57:42 +00:00
"c1/shift 4/subop/left 3/mod/direct 1/rm32/ECX 0x100/imm8" // shift EBX left
);
CHECK_TRACE_CONTENTS(
"error: '0x100/imm8' too large to fit in bitfield imm8\n"
);
}
void test_check_bitfield_sizes_for_negative_imm8() {
run(
"== code 0x1\n"
2019-03-18 05:57:42 +00:00
"c1/shift 4/subop/left 3/mod/direct 1/rm32/ECX -0x80/imm8" // shift EBX left
);
CHECK(!trace_contains_errors());
}
void test_check_bitfield_sizes_for_negative_imm8_error() {
Hide_errors = true;
run(
"== code 0x1\n"
2019-03-18 05:57:42 +00:00
"c1/shift 4/subop/left 3/mod/direct 1/rm32/ECX -0x81/imm8" // shift EBX left
);
CHECK_TRACE_CONTENTS(
"error: '-0x81/imm8' too large to fit in bitfield imm8\n"
);
}
void test_check_bitfield_sizes_for_disp8() {
// not bothering to run
transform(
"== code 0x1\n"
2019-03-18 05:57:42 +00:00
"01/add 1/mod/*+disp8 3/rm32 1/r32 0x7f/disp8\n" // add ECX to *(EBX+0x7f)
);
CHECK(!trace_contains_errors());
}
void test_check_bitfield_sizes_for_disp8_error() {
Hide_errors = true;
run(
"== code 0x1\n"
2019-03-18 05:57:42 +00:00
"01/add 1/mod/*+disp8 3/rm32 1/r32 0x80/disp8\n" // add ECX to *(EBX+0x80)
);
CHECK_TRACE_CONTENTS(
"error: '0x80/disp8' too large to fit in signed bitfield disp8\n"
);
}
void test_check_bitfield_sizes_for_negative_disp8() {
// not bothering to run
transform(
"== code 0x1\n"
2019-03-18 05:57:42 +00:00
"01/add 1/mod/*+disp8 3/rm32 1/r32 -0x80/disp8\n" // add ECX to *(EBX-0x80)
);
CHECK(!trace_contains_errors());
}
void test_check_bitfield_sizes_for_negative_disp8_error() {
Hide_errors = true;
run(
"== code 0x1\n"
2019-03-18 05:57:42 +00:00
"01/add 1/mod/*+disp8 3/rm32 1/r32 -0x81/disp8\n" // add ECX to *(EBX-0x81)
);
CHECK_TRACE_CONTENTS(
"error: '-0x81/disp8' too large to fit in bitfield disp8\n"
);
}