mu/linux/bootstrap/020byte_addressing.cc

273 lines
10 KiB
C++
Raw Permalink Normal View History

//: SubX mostly deals with instructions operating on 32-bit operands, but we
//: still need to deal with raw bytes for strings and so on.
//: Unfortunately the register encodings when dealing with bytes are a mess.
//: We need a special case for them.
:(code)
string rname_8bit(uint8_t r) {
switch (r) {
case 0: return "AL"; // lowest byte of EAX
case 1: return "CL"; // lowest byte of ECX
case 2: return "DL"; // lowest byte of EDX
case 3: return "BL"; // lowest byte of EBX
case 4: return "AH"; // second lowest byte of EAX
case 5: return "CH"; // second lowest byte of ECX
case 6: return "DH"; // second lowest byte of EDX
case 7: return "BH"; // second lowest byte of EBX
default: raise << "invalid 8-bit register " << r << '\n' << end(); return "";
}
}
uint8_t* effective_byte_address(uint8_t modrm) {
uint8_t mod = (modrm>>6);
uint8_t rm = modrm & 0x7;
if (mod == 3) {
// select an 8-bit register
trace(Callstack_depth+1, "run") << "r/m8 is " << rname_8bit(rm) << end();
return reg_8bit(rm);
}
// the rest is as usual
return mem_addr_u8(effective_address_number(modrm));
}
uint8_t* reg_8bit(uint8_t rm) {
uint8_t* result = reinterpret_cast<uint8_t*>(&Reg[rm & 0x3].i); // _L register
if (rm & 0x4)
++result; // _H register; assumes host is little-endian
return result;
}
2018-10-13 06:45:41 +00:00
2018-10-14 07:00:39 +00:00
:(before "End Initialize Op Names")
put_new(Name, "88", "copy r8 to r8/m8-at-r32");
2018-10-13 06:45:41 +00:00
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
:(code)
void test_copy_r8_to_mem_at_rm32() {
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
Reg[EBX].i = 0x224488ab;
Reg[EAX].i = 0x2000;
run(
"== code 0x1\n"
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
// op ModR/M SIB displacement immediate
" 88 18 \n" // copy BL to the byte at *EAX
// ModR/M in binary: 00 (indirect mode) 011 (src BL) 000 (dest EAX)
"== data 0x2000\n"
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
"f0 cc bb aa\n"
);
CHECK_TRACE_CONTENTS(
"run: copy BL to r8/m8-at-r32\n"
"run: effective address is 0x00002000 (EAX)\n"
"run: storing 0xab\n"
);
CHECK_EQ(0xaabbccab, read_mem_u32(0x2000));
}
2018-10-13 06:45:41 +00:00
:(before "End Single-Byte Opcodes")
case 0x88: { // copy r8 to r/m8
const uint8_t modrm = next();
const uint8_t rsrc = (modrm>>3)&0x7;
trace(Callstack_depth+1, "run") << "copy " << rname_8bit(rsrc) << " to r8/m8-at-r32" << end();
2018-10-13 06:45:41 +00:00
// use unsigned to zero-extend 8-bit value to 32 bits
2020-03-07 01:41:36 +00:00
uint8_t* dest = effective_byte_address(modrm);
const uint8_t* src = reg_8bit(rsrc);
*dest = *src; // Read/write multiple elements of vector<uint8_t> at once. Assumes sizeof(int) == 4 on the host as well.
trace(Callstack_depth+1, "run") << "storing 0x" << HEXBYTE << NUM(*dest) << end();
2018-10-13 06:45:41 +00:00
break;
}
//:
2018-10-14 07:00:39 +00:00
:(before "End Initialize Op Names")
put_new(Name, "8a", "copy r8/m8-at-r32 to r8");
2018-10-13 06:45:41 +00:00
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
:(code)
void test_copy_mem_at_rm32_to_r8() {
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
Reg[EBX].i = 0xaabbcc0f; // one nibble each of lowest byte set to all 0s and all 1s, to maximize value of this test
Reg[EAX].i = 0x2000;
run(
"== code 0x1\n"
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
// op ModR/M SIB displacement immediate
" 8a 18 \n" // copy just the byte at *EAX to BL
// ModR/M in binary: 00 (indirect mode) 011 (dest EBX) 000 (src EAX)
"== data 0x2000\n"
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
"ab ff ff ff\n" // 0xab with more data in following bytes
);
CHECK_TRACE_CONTENTS(
"run: copy r8/m8-at-r32 to BL\n"
"run: effective address is 0x00002000 (EAX)\n"
"run: storing 0xab\n"
// remaining bytes of EBX are *not* cleared
"run: EBX now contains 0xaabbccab\n"
);
}
2018-10-13 06:45:41 +00:00
:(before "End Single-Byte Opcodes")
case 0x8a: { // copy r/m8 to r8
const uint8_t modrm = next();
const uint8_t rdest = (modrm>>3)&0x7;
trace(Callstack_depth+1, "run") << "copy r8/m8-at-r32 to " << rname_8bit(rdest) << end();
2018-10-13 06:45:41 +00:00
// use unsigned to zero-extend 8-bit value to 32 bits
2020-03-07 01:41:36 +00:00
const uint8_t* src = effective_byte_address(modrm);
uint8_t* dest = reg_8bit(rdest);
trace(Callstack_depth+1, "run") << "storing 0x" << HEXBYTE << NUM(*src) << end();
*dest = *src; // Read/write multiple elements of vector<uint8_t> at once. Assumes sizeof(int) == 4 on the host as well.
const uint8_t rdest_32bit = rdest & 0x3;
trace(Callstack_depth+1, "run") << rname(rdest_32bit) << " now contains 0x" << HEXWORD << Reg[rdest_32bit].u << end();
2018-10-13 06:45:41 +00:00
break;
}
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
:(code)
void test_cannot_copy_byte_to_ESP_EBP_ESI_EDI() {
Reg[ESI].u = 0xaabbccdd;
Reg[EBX].u = 0x11223344;
run(
"== code 0x1\n"
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
// op ModR/M SIB displacement immediate
" 8a f3 \n" // copy just the byte at *EBX to 8-bit register '6'
// ModR/M in binary: 11 (direct mode) 110 (dest 8-bit 'register 6') 011 (src EBX)
);
CHECK_TRACE_CONTENTS(
// ensure 8-bit register '6' is DH, not ESI
"run: copy r8/m8-at-r32 to DH\n"
"run: storing 0x44\n"
);
// ensure ESI is unchanged
CHECK_EQ(Reg[ESI].u, 0xaabbccdd);
}
//:
:(before "End Initialize Op Names")
put_new(Name, "c6", "copy imm8 to r8/m8-at-r32 with subop 0 (mov)");
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
:(code)
void test_copy_imm8_to_mem_at_rm32() {
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
Reg[EAX].i = 0x2000;
run(
"== code 0x1\n"
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
// op ModR/M SIB displacement immediate
" c6 00 dd \n" // copy to the byte at *EAX
// ModR/M in binary: 00 (indirect mode) 000 (unused) 000 (dest EAX)
"== data 0x2000\n"
5001 - drop the :(scenario) DSL I've been saying for a while[1][2][3] that adding extra abstractions makes things harder for newcomers, and adding new notations doubly so. And then I notice this DSL in my own backyard. Makes me feel like a hypocrite. [1] https://news.ycombinator.com/item?id=13565743#13570092 [2] https://lobste.rs/s/to8wpr/configuration_files_are_canary_warning [3] https://lobste.rs/s/mdmcdi/little_languages_by_jon_bentley_1986#c_3miuf2 The implementation of the DSL was also highly hacky: a) It was happening in the tangle/ tool, but was utterly unrelated to tangling layers. b) There were several persnickety constraints on the different kinds of lines and the specific order they were expected in. I kept finding bugs where the translator would silently do the wrong thing. Or the error messages sucked, and readers may be stuck looking at the generated code to figure out what happened. Fixing error messages would require a lot more code, which is one of my arguments against DSLs in the first place: they may be easy to implement, but they're hard to design to go with the grain of the underlying platform. They require lots of iteration. Is that effort worth prioritizing in this project? On the other hand, the DSL did make at least some readers' life easier, the ones who weren't immediately put off by having to learn a strange syntax. There were fewer quotes to parse, fewer backslash escapes. Anyway, since there are also people who dislike having to put up with strange syntaxes, we'll call that consideration a wash and tear this DSL out. --- This commit was sheer drudgery. Hopefully it won't need to be redone with a new DSL because I grow sick of backslashes.
2019-03-13 01:56:55 +00:00
"f0 cc bb aa\n"
);
CHECK_TRACE_CONTENTS(
"run: copy imm8 to r8/m8-at-r32\n"
"run: effective address is 0x00002000 (EAX)\n"
"run: storing 0xdd\n"
);
CHECK_EQ(0xaabbccdd, read_mem_u32(0x2000));
}
:(before "End Single-Byte Opcodes")
case 0xc6: { // copy imm8 to r/m8
const uint8_t modrm = next();
const uint8_t src = next();
trace(Callstack_depth+1, "run") << "copy imm8 to r8/m8-at-r32" << end();
2020-10-25 04:04:58 +00:00
trace(Callstack_depth+1, "run") << "imm8 is 0x" << HEXBYTE << NUM(src) << end();
const uint8_t subop = (modrm>>3)&0x7; // middle 3 'reg opcode' bits
if (subop != 0) {
cerr << "unrecognized subop for opcode c6: " << NUM(subop) << " (only 0/copy currently implemented)\n";
exit(1);
}
// use unsigned to zero-extend 8-bit value to 32 bits
2020-03-07 01:41:36 +00:00
uint8_t* dest = effective_byte_address(modrm);
*dest = src; // Write multiple elements of vector<uint8_t> at once. Assumes sizeof(int) == 4 on the host as well.
trace(Callstack_depth+1, "run") << "storing 0x" << HEXBYTE << NUM(*dest) << end();
break;
}
2020-03-07 01:41:36 +00:00
//:: set flags (setcc)
:(before "End Initialize Op Names")
put_new(Name_0f, "94", "set r8/m8-at-rm32 to 1 if equal, if ZF is set, 0 otherwise (setcc/setz/sete)");
put_new(Name_0f, "95", "set r8/m8-at-rm32 to 1 if not equal, if ZF is not set, 0 otherwise (setcc/setnz/setne)");
2020-10-16 04:46:51 +00:00
put_new(Name_0f, "9f", "set r8/m8-at-rm32 to 1 if greater, if ZF is unset and SF == OF, 0 otherwise (setcc/setg/setnle)");
put_new(Name_0f, "97", "set r8/m8-at-rm32 to 1 if greater (addr, float), if ZF is unset and CF is unset, 0 otherwise (setcc/seta/setnbe)");
put_new(Name_0f, "9d", "set r8/m8-at-rm32 to 1 if greater or equal, if SF == OF, 0 otherwise (setcc/setge/setnl)");
put_new(Name_0f, "93", "set r8/m8-at-rm32 to 1 if greater or equal (addr, float), if CF is unset, 0 otherwise (setcc/setae/setnb)");
put_new(Name_0f, "9c", "set r8/m8-at-rm32 to 1 if lesser, if SF != OF, 0 otherwise (setcc/setl/setnge)");
put_new(Name_0f, "92", "set r8/m8-at-rm32 to 1 if lesser (addr, float), if CF is set, 0 otherwise (setcc/setb/setnae)");
put_new(Name_0f, "9e", "set r8/m8-at-rm32 to 1 if lesser or equal, if ZF is set or SF != OF, 0 otherwise (setcc/setle/setng)");
put_new(Name_0f, "96", "set r8/m8-at-rm32 to 1 if lesser or equal (addr, float), if ZF is set or CF is set, 0 otherwise (setcc/setbe/setna)");
2020-03-07 01:41:36 +00:00
:(before "End Two-Byte Opcodes Starting With 0f")
case 0x94: { // set r8/m8-at-rm32 if ZF
const uint8_t modrm = next();
trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
uint8_t* dest = effective_byte_address(modrm);
*dest = ZF;
2020-03-08 02:32:36 +00:00
trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
2020-03-07 01:41:36 +00:00
break;
}
case 0x95: { // set r8/m8-at-rm32 if !ZF
const uint8_t modrm = next();
trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
uint8_t* dest = effective_byte_address(modrm);
*dest = !ZF;
2020-03-08 02:32:36 +00:00
trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
2020-03-07 01:41:36 +00:00
break;
}
case 0x9f: { // set r8/m8-at-rm32 if !SF and !ZF
const uint8_t modrm = next();
trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
uint8_t* dest = effective_byte_address(modrm);
*dest = !ZF && SF == OF;
2020-03-08 02:32:36 +00:00
trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
2020-03-07 01:41:36 +00:00
break;
}
case 0x97: { // set r8/m8-at-rm32 if !CF and !ZF
const uint8_t modrm = next();
trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
uint8_t* dest = effective_byte_address(modrm);
*dest = (!CF && !ZF);
2020-03-08 02:32:36 +00:00
trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
2020-03-07 01:41:36 +00:00
break;
}
case 0x9d: { // set r8/m8-at-rm32 if !SF
const uint8_t modrm = next();
trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
uint8_t* dest = effective_byte_address(modrm);
*dest = (SF == OF);
2020-03-08 02:32:36 +00:00
trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
2020-03-07 01:41:36 +00:00
break;
}
case 0x93: { // set r8/m8-at-rm32 if !CF
const uint8_t modrm = next();
trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
uint8_t* dest = effective_byte_address(modrm);
*dest = !CF;
2020-03-08 02:32:36 +00:00
trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
2020-03-07 01:41:36 +00:00
break;
}
case 0x9c: { // set r8/m8-at-rm32 if SF and !ZF
const uint8_t modrm = next();
trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
uint8_t* dest = effective_byte_address(modrm);
*dest = (SF != OF);
2020-03-08 02:32:36 +00:00
trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
2020-03-07 01:41:36 +00:00
break;
}
case 0x92: { // set r8/m8-at-rm32 if CF
const uint8_t modrm = next();
trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
uint8_t* dest = effective_byte_address(modrm);
*dest = CF;
2020-03-08 02:32:36 +00:00
trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
2020-03-07 01:41:36 +00:00
break;
}
case 0x9e: { // set r8/m8-at-rm32 if SF or ZF
const uint8_t modrm = next();
trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
uint8_t* dest = effective_byte_address(modrm);
*dest = (ZF || SF != OF);
2020-03-08 02:32:36 +00:00
trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
2020-03-07 01:41:36 +00:00
break;
}
case 0x96: { // set r8/m8-at-rm32 if ZF or CF
const uint8_t modrm = next();
trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
uint8_t* dest = effective_byte_address(modrm);
*dest = (ZF || CF);
2020-03-08 02:32:36 +00:00
trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
2020-03-07 01:41:36 +00:00
break;
}