subx.md distinguishes between operands and arguments. Let's use that terminology
more consistently in the sources.
This commit is contained in:
Kartik Agaram 2020-09-27 22:05:11 -07:00
parent f068bda98e
commit eee09a5607
6 changed files with 327 additions and 327 deletions

View File

@ -7,18 +7,18 @@
//: metastasizes at runtime into kilobytes of misinterpreted instructions.
//:
//: To mitigate these issues, we'll start programming in terms of logical
//: operands rather than physical bytes. Some operands are smaller than a
//: arguments rather than physical bytes. Some arguments are smaller than a
//: byte, and others may consist of multiple bytes. This layer will correctly
//: pack and order the bytes corresponding to the operands in an instruction.
//: pack and order the bytes corresponding to the arguments in an instruction.
:(before "End Help Texts")
put_new(Help, "instructions",
"Each x86 instruction consists of an instruction or opcode and some number\n"
"of operands.\n"
"Each operand has a type. An instruction won't have more than one operand of\n"
"of arguments.\n"
"Each argument has a type. An instruction won't have more than one argument of\n"
"any type.\n"
"Each instruction has some set of allowed operand types. It'll reject others.\n"
"The complete list of operand types: mod, subop, r32 (register), rm32\n"
"Each instruction has some set of allowed argument types. It'll reject others.\n"
"The complete list of argument types: mod, subop, r32 (register), rm32\n"
"(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
"imm32.\n"
"Each of these has its own help page. Try reading 'bootstrap help mod' next.\n"
@ -43,32 +43,32 @@ void test_pack_immediate_constants() {
);
}
//: complete set of valid operand types
//: complete set of valid argument types
:(before "End Globals")
set<string> Instruction_operands;
set<string> Instruction_arguments;
:(before "End One-time Setup")
Instruction_operands.insert("subop");
Instruction_operands.insert("mod");
Instruction_operands.insert("rm32");
Instruction_operands.insert("base");
Instruction_operands.insert("index");
Instruction_operands.insert("scale");
Instruction_operands.insert("r32");
Instruction_operands.insert("disp8");
Instruction_operands.insert("disp16");
Instruction_operands.insert("disp32");
Instruction_operands.insert("imm8");
Instruction_operands.insert("imm32");
Instruction_arguments.insert("subop");
Instruction_arguments.insert("mod");
Instruction_arguments.insert("rm32");
Instruction_arguments.insert("base");
Instruction_arguments.insert("index");
Instruction_arguments.insert("scale");
Instruction_arguments.insert("r32");
Instruction_arguments.insert("disp8");
Instruction_arguments.insert("disp16");
Instruction_arguments.insert("disp32");
Instruction_arguments.insert("imm8");
Instruction_arguments.insert("imm32");
:(before "End Help Texts")
init_operand_type_help();
init_argument_type_help();
:(code)
void init_operand_type_help() {
void init_argument_type_help() {
put(Help, "mod",
"2-bit operand controlling the _addressing mode_ of many instructions,\n"
"2-bit argument controlling the _addressing mode_ of many instructions,\n"
"to determine how to compute the _effective address_ to look up memory at\n"
"based on the 'rm32' operand and potentially others.\n"
"based on the 'rm32' argument and potentially others.\n"
"\n"
"If mod = 3, just operate on the contents of the register specified by rm32\n"
" (direct mode).\n"
@ -87,18 +87,18 @@ void init_operand_type_help() {
" - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
);
put(Help, "subop",
"Additional 3-bit operand for determining the instruction when the opcode\n"
"Additional 3-bit argument for determining the instruction when the opcode\n"
"is 81, 8f, d3, f7 or ff.\n"
"Can't coexist with operand of type 'r32' in a single instruction, because\n"
"Can't coexist with argument of type 'r32' in a single instruction, because\n"
"the two use the same bits.\n"
);
put(Help, "r32",
"3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
"3-bit argument specifying a register argument used directly, without any further addressing modes.\n"
);
put(Help, "rm32",
"32-bit value in register or memory. The precise details of its construction\n"
"depend on the eponymous 3-bit 'rm32' operand, the 'mod' operand, and also\n"
"potentially the 'SIB' operands ('scale', 'index' and 'base') and a displacement\n"
"depend on the eponymous 3-bit 'rm32' argument, the 'mod' argument, and also\n"
"potentially the 'SIB' arguments ('scale', 'index' and 'base') and a displacement\n"
"('disp8' or 'disp32').\n"
"\n"
"For complete details, spend some time with two tables in the IA-32 software\n"
@ -107,26 +107,26 @@ void init_operand_type_help() {
" - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
);
put(Help, "base",
"Additional 3-bit operand (when 'rm32' is 4, unless 'mod' is 3) specifying the\n"
"Additional 3-bit argument (when 'rm32' is 4, unless 'mod' is 3) specifying the\n"
"register containing an address to look up.\n"
"This address may be further modified by 'index' and 'scale' operands.\n"
"This address may be further modified by 'index' and 'scale' arguments.\n"
" effective address = base + index*scale + displacement (disp8 or disp32)\n"
"For complete details, spend some time with the IA-32 software developer's manual,\n"
"volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
"It is included in this repository as 'sib.pdf'.\n"
);
put(Help, "index",
"Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to\n"
"the 'base' operand to compute the 'effective address' at which to look up memory.\n"
"Optional 3-bit argument (when 'rm32' is 4 unless 'mod' is 3) that can be added to\n"
"the 'base' argument to compute the 'effective address' at which to look up memory.\n"
" effective address = base + index*scale + displacement (disp8 or disp32)\n"
"For complete details, spend some time with the IA-32 software developer's manual,\n"
"volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
"It is included in this repository as 'sib.pdf'.\n"
);
put(Help, "scale",
"Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that encodes a\n"
"power of 2 to be multiplied to the 'index' operand before adding the result to\n"
"the 'base' operand to compute the _effective address_ to operate on.\n"
"Optional 2-bit argument (when 'rm32' is 4 unless 'mod' is 3) that encodes a\n"
"power of 2 to be multiplied to the 'index' argument before adding the result to\n"
"the 'base' argument to compute the _effective address_ to operate on.\n"
" effective address = base + index * scale + displacement (disp8 or disp32)\n"
"\n"
"When scale is 0, use index unmodified.\n"
@ -156,27 +156,27 @@ void init_operand_type_help() {
);
}
//:: transform packing operands into bytes in the right order
//:: transform packing arguments into bytes in the right order
:(after "Begin Transforms")
Transform.push_back(pack_operands);
Transform.push_back(pack_arguments);
:(code)
void pack_operands(program& p) {
void pack_arguments(program& p) {
if (p.segments.empty()) return;
segment& code = *find(p, "code");
// Pack Operands(segment code)
trace(3, "transform") << "-- pack operands" << end();
trace(3, "transform") << "-- pack arguments" << end();
for (int i = 0; i < SIZE(code.lines); ++i) {
line& inst = code.lines.at(i);
if (all_hex_bytes(inst)) continue;
trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end();
pack_operands(inst);
pack_arguments(inst);
trace(99, "transform") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end();
}
}
void pack_operands(line& inst) {
void pack_arguments(line& inst) {
line new_inst;
add_opcodes(inst, new_inst);
add_modrm_byte(inst, new_inst);
@ -201,19 +201,19 @@ void add_modrm_byte(const line& in, line& out) {
bool emit = false;
for (int i = 0; i < SIZE(in.words); ++i) {
const word& curr = in.words.at(i);
if (has_operand_metadata(curr, "mod")) {
if (has_argument_metadata(curr, "mod")) {
mod = hex_byte(curr.data);
emit = true;
}
else if (has_operand_metadata(curr, "rm32")) {
else if (has_argument_metadata(curr, "rm32")) {
rm32 = hex_byte(curr.data);
emit = true;
}
else if (has_operand_metadata(curr, "r32")) {
else if (has_argument_metadata(curr, "r32")) {
reg_subop = hex_byte(curr.data);
emit = true;
}
else if (has_operand_metadata(curr, "subop")) {
else if (has_argument_metadata(curr, "subop")) {
reg_subop = hex_byte(curr.data);
emit = true;
}
@ -227,15 +227,15 @@ void add_sib_byte(const line& in, line& out) {
bool emit = false;
for (int i = 0; i < SIZE(in.words); ++i) {
const word& curr = in.words.at(i);
if (has_operand_metadata(curr, "scale")) {
if (has_argument_metadata(curr, "scale")) {
scale = hex_byte(curr.data);
emit = true;
}
else if (has_operand_metadata(curr, "index")) {
else if (has_argument_metadata(curr, "index")) {
index = hex_byte(curr.data);
emit = true;
}
else if (has_operand_metadata(curr, "base")) {
else if (has_argument_metadata(curr, "base")) {
base = hex_byte(curr.data);
emit = true;
}
@ -247,11 +247,11 @@ void add_sib_byte(const line& in, line& out) {
void add_disp_bytes(const line& in, line& out) {
for (int i = 0; i < SIZE(in.words); ++i) {
const word& curr = in.words.at(i);
if (has_operand_metadata(curr, "disp8"))
if (has_argument_metadata(curr, "disp8"))
emit_hex_bytes(out, curr, 1);
if (has_operand_metadata(curr, "disp16"))
if (has_argument_metadata(curr, "disp16"))
emit_hex_bytes(out, curr, 2);
else if (has_operand_metadata(curr, "disp32"))
else if (has_argument_metadata(curr, "disp32"))
emit_hex_bytes(out, curr, 4);
}
}
@ -259,9 +259,9 @@ void add_disp_bytes(const line& in, line& out) {
void add_imm_bytes(const line& in, line& out) {
for (int i = 0; i < SIZE(in.words); ++i) {
const word& curr = in.words.at(i);
if (has_operand_metadata(curr, "imm8"))
if (has_argument_metadata(curr, "imm8"))
emit_hex_bytes(out, curr, 1);
else if (has_operand_metadata(curr, "imm32"))
else if (has_argument_metadata(curr, "imm32"))
emit_hex_bytes(out, curr, 4);
}
}
@ -435,7 +435,7 @@ bool all_hex_bytes(const line& inst) {
}
bool is_hex_byte(const word& curr) {
if (contains_any_operand_metadata(curr))
if (contains_any_argument_metadata(curr))
return false;
if (SIZE(curr.data) != 2)
return false;
@ -444,19 +444,19 @@ bool is_hex_byte(const word& curr) {
return true;
}
bool contains_any_operand_metadata(const word& word) {
bool contains_any_argument_metadata(const word& word) {
for (int i = 0; i < SIZE(word.metadata); ++i)
if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
if (Instruction_arguments.find(word.metadata.at(i)) != Instruction_arguments.end())
return true;
return false;
}
bool has_operand_metadata(const line& inst, const string& m) {
bool has_argument_metadata(const line& inst, const string& m) {
bool result = false;
for (int i = 0; i < SIZE(inst.words); ++i) {
if (!has_operand_metadata(inst.words.at(i), m)) continue;
if (!has_argument_metadata(inst.words.at(i), m)) continue;
if (result) {
raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
raise << "'" << to_string(inst) << "' has conflicting " << m << " arguments\n" << end();
return false;
}
result = true;
@ -464,14 +464,14 @@ bool has_operand_metadata(const line& inst, const string& m) {
return result;
}
bool has_operand_metadata(const word& w, const string& m) {
bool has_argument_metadata(const word& w, const string& m) {
bool result = false;
bool metadata_found = false;
for (int i = 0; i < SIZE(w.metadata); ++i) {
const string& curr = w.metadata.at(i);
if (Instruction_operands.find(curr) == Instruction_operands.end()) continue; // ignore unrecognized metadata
if (Instruction_arguments.find(curr) == Instruction_arguments.end()) continue; // ignore unrecognized metadata
if (metadata_found) {
raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
raise << "'" << w.original << "' has conflicting argument types; it should have only one\n" << end();
return false;
}
metadata_found = true;
@ -482,7 +482,7 @@ bool has_operand_metadata(const word& w, const string& m) {
word metadata(const line& inst, const string& m) {
for (int i = 0; i < SIZE(inst.words); ++i)
if (has_operand_metadata(inst.words.at(i), m))
if (has_argument_metadata(inst.words.at(i), m))
return inst.words.at(i);
assert(false);
}

View File

@ -1,41 +1,41 @@
//: Since we're tagging operands with their types, let's start checking these
//: operand types for each instruction.
//: Since we're tagging arguments with their types, let's start checking these
//: argument types for each instruction.
void test_check_missing_imm8_operand() {
void test_check_missing_imm8_argument() {
Hide_errors = true;
run(
"== code 0x1\n"
"cd\n" // interrupt ??
);
CHECK_TRACE_CONTENTS(
"error: 'cd' (software interrupt): missing imm8 operand\n"
"error: 'cd' (software interrupt): missing imm8 argument\n"
);
}
:(before "Pack Operands(segment code)")
check_operands(code);
check_arguments(code);
if (trace_contains_errors()) return;
:(code)
void check_operands(const segment& code) {
trace(3, "transform") << "-- check operands" << end();
void check_arguments(const segment& code) {
trace(3, "transform") << "-- check arguments" << end();
for (int i = 0; i < SIZE(code.lines); ++i) {
check_operands(code.lines.at(i));
check_arguments(code.lines.at(i));
if (trace_contains_errors()) return; // stop at the first mal-formed instruction
}
}
void check_operands(const line& inst) {
void check_arguments(const line& inst) {
word op = preprocess_op(inst.words.at(0));
if (op.data == "0f") {
check_operands_0f(inst);
check_arguments_0f(inst);
return;
}
if (op.data == "f3") {
check_operands_f3(inst);
check_arguments_f3(inst);
return;
}
check_operands(inst, op);
check_arguments(inst, op);
}
word preprocess_op(word/*copy*/ op) {
@ -54,17 +54,17 @@ void test_preprocess_op() {
CHECK_EQ(preprocess_op(w1).data, preprocess_op(w2).data);
}
//: To check the operands for an opcode, we'll track the permitted operands
//: To check the arguments for an opcode, we'll track the permitted arguments
//: for each supported opcode in a bitvector. That way we can often compute the
//: 'received' operand bitvector for each instruction's operands and compare
//: 'received' argument bitvector for each instruction's arguments and compare
//: it with the 'expected' bitvector.
//:
//: The 'expected' and 'received' bitvectors can be different; the MODRM bit
//: in the 'expected' bitvector maps to multiple 'received' operand types in
//: in the 'expected' bitvector maps to multiple 'received' argument types in
//: an instruction. We deal in expected bitvectors throughout.
:(before "End Types")
enum expected_operand_type {
enum expected_argument_type {
// start from the least significant bit
MODRM, // more complex, may also involve disp8 or disp32
SUBOP,
@ -77,7 +77,7 @@ enum expected_operand_type {
};
:(before "End Globals")
vector<string> Operand_type_name;
map<string, expected_operand_type> Operand_type;
map<string, expected_argument_type> Operand_type;
:(before "End One-time Setup")
init_op_types();
:(code)
@ -96,160 +96,160 @@ void init_op_types() {
}
:(before "End Globals")
map</*op*/string, /*bitvector*/uint8_t> Permitted_operands;
const uint8_t INVALID_OPERANDS = 0xff; // no instruction uses all the operand types
map</*op*/string, /*bitvector*/uint8_t> Permitted_arguments;
const uint8_t INVALID_OPERANDS = 0xff; // no instruction uses all the argument types
:(before "End One-time Setup")
init_permitted_operands();
init_permitted_arguments();
:(code)
void init_permitted_operands() {
//// Class A: just op, no operands
void init_permitted_arguments() {
//// Class A: just op, no arguments
// halt
put(Permitted_operands, "f4", 0x00);
put(Permitted_arguments, "f4", 0x00);
// inc
put(Permitted_operands, "40", 0x00);
put(Permitted_operands, "41", 0x00);
put(Permitted_operands, "42", 0x00);
put(Permitted_operands, "43", 0x00);
put(Permitted_operands, "44", 0x00);
put(Permitted_operands, "45", 0x00);
put(Permitted_operands, "46", 0x00);
put(Permitted_operands, "47", 0x00);
put(Permitted_arguments, "40", 0x00);
put(Permitted_arguments, "41", 0x00);
put(Permitted_arguments, "42", 0x00);
put(Permitted_arguments, "43", 0x00);
put(Permitted_arguments, "44", 0x00);
put(Permitted_arguments, "45", 0x00);
put(Permitted_arguments, "46", 0x00);
put(Permitted_arguments, "47", 0x00);
// dec
put(Permitted_operands, "48", 0x00);
put(Permitted_operands, "49", 0x00);
put(Permitted_operands, "4a", 0x00);
put(Permitted_operands, "4b", 0x00);
put(Permitted_operands, "4c", 0x00);
put(Permitted_operands, "4d", 0x00);
put(Permitted_operands, "4e", 0x00);
put(Permitted_operands, "4f", 0x00);
put(Permitted_arguments, "48", 0x00);
put(Permitted_arguments, "49", 0x00);
put(Permitted_arguments, "4a", 0x00);
put(Permitted_arguments, "4b", 0x00);
put(Permitted_arguments, "4c", 0x00);
put(Permitted_arguments, "4d", 0x00);
put(Permitted_arguments, "4e", 0x00);
put(Permitted_arguments, "4f", 0x00);
// push
put(Permitted_operands, "50", 0x00);
put(Permitted_operands, "51", 0x00);
put(Permitted_operands, "52", 0x00);
put(Permitted_operands, "53", 0x00);
put(Permitted_operands, "54", 0x00);
put(Permitted_operands, "55", 0x00);
put(Permitted_operands, "56", 0x00);
put(Permitted_operands, "57", 0x00);
put(Permitted_arguments, "50", 0x00);
put(Permitted_arguments, "51", 0x00);
put(Permitted_arguments, "52", 0x00);
put(Permitted_arguments, "53", 0x00);
put(Permitted_arguments, "54", 0x00);
put(Permitted_arguments, "55", 0x00);
put(Permitted_arguments, "56", 0x00);
put(Permitted_arguments, "57", 0x00);
// pop
put(Permitted_operands, "58", 0x00);
put(Permitted_operands, "59", 0x00);
put(Permitted_operands, "5a", 0x00);
put(Permitted_operands, "5b", 0x00);
put(Permitted_operands, "5c", 0x00);
put(Permitted_operands, "5d", 0x00);
put(Permitted_operands, "5e", 0x00);
put(Permitted_operands, "5f", 0x00);
put(Permitted_arguments, "58", 0x00);
put(Permitted_arguments, "59", 0x00);
put(Permitted_arguments, "5a", 0x00);
put(Permitted_arguments, "5b", 0x00);
put(Permitted_arguments, "5c", 0x00);
put(Permitted_arguments, "5d", 0x00);
put(Permitted_arguments, "5e", 0x00);
put(Permitted_arguments, "5f", 0x00);
// sign-extend EAX into EDX
put(Permitted_operands, "99", 0x00);
put(Permitted_arguments, "99", 0x00);
// return
put(Permitted_operands, "c3", 0x00);
put(Permitted_arguments, "c3", 0x00);
//// Class B: just op and disp8
// imm32 imm8 disp32 |disp16 disp8 subop modrm
// 0 0 0 |0 1 0 0
// jump
put(Permitted_operands, "eb", 0x04);
put(Permitted_operands, "72", 0x04);
put(Permitted_operands, "73", 0x04);
put(Permitted_operands, "74", 0x04);
put(Permitted_operands, "75", 0x04);
put(Permitted_operands, "76", 0x04);
put(Permitted_operands, "77", 0x04);
put(Permitted_operands, "7c", 0x04);
put(Permitted_operands, "7d", 0x04);
put(Permitted_operands, "7e", 0x04);
put(Permitted_operands, "7f", 0x04);
put(Permitted_arguments, "eb", 0x04);
put(Permitted_arguments, "72", 0x04);
put(Permitted_arguments, "73", 0x04);
put(Permitted_arguments, "74", 0x04);
put(Permitted_arguments, "75", 0x04);
put(Permitted_arguments, "76", 0x04);
put(Permitted_arguments, "77", 0x04);
put(Permitted_arguments, "7c", 0x04);
put(Permitted_arguments, "7d", 0x04);
put(Permitted_arguments, "7e", 0x04);
put(Permitted_arguments, "7f", 0x04);
//// Class D: just op and disp32
// imm32 imm8 disp32 |disp16 disp8 subop modrm
// 0 0 1 |0 0 0 0
put(Permitted_operands, "e8", 0x10); // call
put(Permitted_operands, "e9", 0x10); // jump
put(Permitted_arguments, "e8", 0x10); // call
put(Permitted_arguments, "e9", 0x10); // jump
//// Class E: just op and imm8
// imm32 imm8 disp32 |disp16 disp8 subop modrm
// 0 1 0 |0 0 0 0
put(Permitted_operands, "cd", 0x20); // software interrupt
put(Permitted_arguments, "cd", 0x20); // software interrupt
//// Class F: just op and imm32
// imm32 imm8 disp32 |disp16 disp8 subop modrm
// 1 0 0 |0 0 0 0
put(Permitted_operands, "05", 0x40); // add
put(Permitted_operands, "2d", 0x40); // subtract
put(Permitted_operands, "25", 0x40); // and
put(Permitted_operands, "0d", 0x40); // or
put(Permitted_operands, "35", 0x40); // xor
put(Permitted_operands, "3d", 0x40); // compare
put(Permitted_operands, "68", 0x40); // push
put(Permitted_arguments, "05", 0x40); // add
put(Permitted_arguments, "2d", 0x40); // subtract
put(Permitted_arguments, "25", 0x40); // and
put(Permitted_arguments, "0d", 0x40); // or
put(Permitted_arguments, "35", 0x40); // xor
put(Permitted_arguments, "3d", 0x40); // compare
put(Permitted_arguments, "68", 0x40); // push
// copy
put(Permitted_operands, "b8", 0x40);
put(Permitted_operands, "b9", 0x40);
put(Permitted_operands, "ba", 0x40);
put(Permitted_operands, "bb", 0x40);
put(Permitted_operands, "bc", 0x40);
put(Permitted_operands, "bd", 0x40);
put(Permitted_operands, "be", 0x40);
put(Permitted_operands, "bf", 0x40);
put(Permitted_arguments, "b8", 0x40);
put(Permitted_arguments, "b9", 0x40);
put(Permitted_arguments, "ba", 0x40);
put(Permitted_arguments, "bb", 0x40);
put(Permitted_arguments, "bc", 0x40);
put(Permitted_arguments, "bd", 0x40);
put(Permitted_arguments, "be", 0x40);
put(Permitted_arguments, "bf", 0x40);
//// Class M: using ModR/M byte
// imm32 imm8 disp32 |disp16 disp8 subop modrm
// 0 0 0 |0 0 0 1
// add
put(Permitted_operands, "01", 0x01);
put(Permitted_operands, "03", 0x01);
put(Permitted_arguments, "01", 0x01);
put(Permitted_arguments, "03", 0x01);
// subtract
put(Permitted_operands, "29", 0x01);
put(Permitted_operands, "2b", 0x01);
put(Permitted_arguments, "29", 0x01);
put(Permitted_arguments, "2b", 0x01);
// and
put(Permitted_operands, "21", 0x01);
put(Permitted_operands, "23", 0x01);
put(Permitted_arguments, "21", 0x01);
put(Permitted_arguments, "23", 0x01);
// or
put(Permitted_operands, "09", 0x01);
put(Permitted_operands, "0b", 0x01);
put(Permitted_arguments, "09", 0x01);
put(Permitted_arguments, "0b", 0x01);
// xor
put(Permitted_operands, "31", 0x01);
put(Permitted_operands, "33", 0x01);
put(Permitted_arguments, "31", 0x01);
put(Permitted_arguments, "33", 0x01);
// compare
put(Permitted_operands, "39", 0x01);
put(Permitted_operands, "3b", 0x01);
put(Permitted_arguments, "39", 0x01);
put(Permitted_arguments, "3b", 0x01);
// copy
put(Permitted_operands, "88", 0x01);
put(Permitted_operands, "89", 0x01);
put(Permitted_operands, "8a", 0x01);
put(Permitted_operands, "8b", 0x01);
put(Permitted_arguments, "88", 0x01);
put(Permitted_arguments, "89", 0x01);
put(Permitted_arguments, "8a", 0x01);
put(Permitted_arguments, "8b", 0x01);
// swap
put(Permitted_operands, "87", 0x01);
put(Permitted_arguments, "87", 0x01);
// copy address (lea)
put(Permitted_operands, "8d", 0x01);
put(Permitted_arguments, "8d", 0x01);
//// Class N: op, ModR/M and subop (not r32)
// imm32 imm8 disp32 |disp16 disp8 subop modrm
// 0 0 0 |0 0 1 1
put(Permitted_operands, "8f", 0x03); // pop
put(Permitted_operands, "d3", 0x03); // shift
put(Permitted_operands, "f7", 0x03); // test/not/mul/div
put(Permitted_operands, "ff", 0x03); // jump/push/call
put(Permitted_arguments, "8f", 0x03); // pop
put(Permitted_arguments, "d3", 0x03); // shift
put(Permitted_arguments, "f7", 0x03); // test/not/mul/div
put(Permitted_arguments, "ff", 0x03); // jump/push/call
//// Class O: op, ModR/M, subop (not r32) and imm8
// imm32 imm8 disp32 |disp16 disp8 subop modrm
// 0 1 0 |0 0 1 1
put(Permitted_operands, "c1", 0x23); // combine
put(Permitted_operands, "c6", 0x23); // copy
put(Permitted_arguments, "c1", 0x23); // combine
put(Permitted_arguments, "c6", 0x23); // copy
//// Class P: op, ModR/M, subop (not r32) and imm32
// imm32 imm8 disp32 |disp16 disp8 subop modrm
// 1 0 0 |0 0 1 1
put(Permitted_operands, "81", 0x43); // combine
put(Permitted_operands, "c7", 0x43); // copy
put(Permitted_arguments, "81", 0x43); // combine
put(Permitted_arguments, "c7", 0x43); // copy
//// Class Q: op, ModR/M and imm32
// imm32 imm8 disp32 |disp16 disp8 subop modrm
// 1 0 0 |0 0 0 1
put(Permitted_operands, "69", 0x41); // multiply
put(Permitted_arguments, "69", 0x41); // multiply
// End Init Permitted Operands
}
@ -258,11 +258,11 @@ void init_permitted_operands() {
#define SET(bitvector, bit) ((bitvector) | (1 << (bit)))
#define CLEAR(bitvector, bit) ((bitvector) & (~(1 << (bit))))
void check_operands(const line& inst, const word& op) {
void check_arguments(const line& inst, const word& op) {
if (!is_hex_byte(op)) return;
uint8_t expected_bitvector = get(Permitted_operands, op.data);
uint8_t expected_bitvector = get(Permitted_arguments, op.data);
if (HAS(expected_bitvector, MODRM)) {
check_operands_modrm(inst, op);
check_arguments_modrm(inst, op);
compare_bitvector_modrm(inst, expected_bitvector, maybe_name(op));
}
else {
@ -273,18 +273,18 @@ void check_operands(const line& inst, const word& op) {
//: Many instructions can be checked just by comparing bitvectors.
void compare_bitvector(const line& inst, uint8_t expected, const string& maybe_op_name) {
if (all_hex_bytes(inst) && has_operands(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere
uint8_t bitvector = compute_expected_operand_bitvector(inst);
if (trace_contains_errors()) return; // duplicate operand type
if (all_hex_bytes(inst) && has_arguments(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere
uint8_t bitvector = compute_expected_argument_bitvector(inst);
if (trace_contains_errors()) return; // duplicate argument type
if (bitvector == expected) return; // all good with this instruction
for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) {
//? cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
if ((bitvector & 0x1) == (expected & 0x1)) continue; // all good with this operand
if ((bitvector & 0x1) == (expected & 0x1)) continue; // all good with this argument
const string& optype = Operand_type_name.at(i);
if ((bitvector & 0x1) > (expected & 0x1))
raise << "'" << to_string(inst) << "'" << maybe_op_name << ": unexpected " << optype << " operand\n" << end();
raise << "'" << to_string(inst) << "'" << maybe_op_name << ": unexpected " << optype << " argument\n" << end();
else
raise << "'" << to_string(inst) << "'" << maybe_op_name << ": missing " << optype << " operand\n" << end();
raise << "'" << to_string(inst) << "'" << maybe_op_name << ": missing " << optype << " argument\n" << end();
// continue giving all errors for a single instruction
}
// ignore settings in any unused bits
@ -298,21 +298,21 @@ string maybe_name(const word& op) {
return " ("+s.substr(0, s.find(" ("))+')';
}
uint32_t compute_expected_operand_bitvector(const line& inst) {
set<string> operands_found;
uint32_t compute_expected_argument_bitvector(const line& inst) {
set<string> arguments_found;
uint32_t bitvector = 0;
for (int i = /*skip op*/1; i < SIZE(inst.words); ++i) {
bitvector = bitvector | expected_bit_for_received_operand(inst.words.at(i), operands_found, inst);
if (trace_contains_errors()) return INVALID_OPERANDS; // duplicate operand type
bitvector = bitvector | expected_bit_for_received_argument(inst.words.at(i), arguments_found, inst);
if (trace_contains_errors()) return INVALID_OPERANDS; // duplicate argument type
}
return bitvector;
}
bool has_operands(const line& inst) {
return SIZE(inst.words) > first_operand(inst);
bool has_arguments(const line& inst) {
return SIZE(inst.words) > first_argument(inst);
}
int first_operand(const line& inst) {
int first_argument(const line& inst) {
if (inst.words.at(0).data == "0f") return 2;
if (inst.words.at(0).data == "f2" || inst.words.at(0).data == "f3") {
if (inst.words.at(1).data == "0f")
@ -323,9 +323,9 @@ int first_operand(const line& inst) {
return 1;
}
// Scan the metadata of 'w' and return the expected bit corresponding to any operand type.
// Also raise an error if metadata contains multiple operand types.
uint32_t expected_bit_for_received_operand(const word& w, set<string>& instruction_operands, const line& inst) {
// Scan the metadata of 'w' and return the expected bit corresponding to any argument type.
// Also raise an error if metadata contains multiple argument types.
uint32_t expected_bit_for_received_argument(const word& w, set<string>& instruction_arguments, const line& inst) {
uint32_t bv = 0;
bool found = false;
for (int i = 0; i < SIZE(w.metadata); ++i) {
@ -335,65 +335,65 @@ uint32_t expected_bit_for_received_operand(const word& w, set<string>& instructi
expected_metadata = "modrm";
else if (!contains_key(Operand_type, curr)) continue; // ignore unrecognized metadata
if (found) {
raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
raise << "'" << w.original << "' has conflicting argument types; it should have only one\n" << end();
return INVALID_OPERANDS;
}
if (instruction_operands.find(curr) != instruction_operands.end()) {
raise << "'" << to_string(inst) << "': duplicate " << curr << " operand\n" << end();
if (instruction_arguments.find(curr) != instruction_arguments.end()) {
raise << "'" << to_string(inst) << "': duplicate " << curr << " argument\n" << end();
return INVALID_OPERANDS;
}
instruction_operands.insert(curr);
instruction_arguments.insert(curr);
bv = (1 << get(Operand_type, expected_metadata));
found = true;
}
return bv;
}
void test_conflicting_operand_type() {
void test_conflicting_argument_type() {
Hide_errors = true;
run(
"== code 0x1\n"
"cd/software-interrupt 80/imm8/imm32\n"
);
CHECK_TRACE_CONTENTS(
"error: '80/imm8/imm32' has conflicting operand types; it should have only one\n"
"error: '80/imm8/imm32' has conflicting argument types; it should have only one\n"
);
}
//: Instructions computing effective addresses have more complex rules, so
//: we'll hard-code a common set of instruction-decoding rules.
void test_check_missing_mod_operand() {
void test_check_missing_mod_argument() {
Hide_errors = true;
run(
"== code 0x1\n"
"81 0/add/subop 3/rm32/ebx 1/imm32\n"
);
CHECK_TRACE_CONTENTS(
"error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod operand\n"
"error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod argument\n"
);
}
void check_operands_modrm(const line& inst, const word& op) {
void check_arguments_modrm(const line& inst, const word& op) {
if (all_hex_bytes(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere
check_operand_metadata_present(inst, "mod", op);
check_operand_metadata_present(inst, "rm32", op);
check_argument_metadata_present(inst, "mod", op);
check_argument_metadata_present(inst, "rm32", op);
// no check for r32; some instructions don't use it; just assume it's 0 if missing
if (op.data == "81" || op.data == "8f" || op.data == "f7" || op.data == "ff") { // keep sync'd with 'help subop'
check_operand_metadata_present(inst, "subop", op);
check_operand_metadata_absent(inst, "r32", op, "should be replaced by subop");
check_argument_metadata_present(inst, "subop", op);
check_argument_metadata_absent(inst, "r32", op, "should be replaced by subop");
}
if (trace_contains_errors()) return;
if (metadata(inst, "rm32").data != "4") return;
// SIB byte checks
uint8_t mod = hex_byte(metadata(inst, "mod").data);
if (mod != /*direct*/3) {
check_operand_metadata_present(inst, "base", op);
check_operand_metadata_present(inst, "index", op); // otherwise why go to SIB?
check_argument_metadata_present(inst, "base", op);
check_argument_metadata_present(inst, "index", op); // otherwise why go to SIB?
}
else {
check_operand_metadata_absent(inst, "base", op, "direct mode");
check_operand_metadata_absent(inst, "index", op, "direct mode");
check_argument_metadata_absent(inst, "base", op, "direct mode");
check_argument_metadata_absent(inst, "index", op, "direct mode");
}
// no check for scale; 0 (2**0 = 1) by default
}
@ -401,15 +401,15 @@ void check_operands_modrm(const line& inst, const word& op) {
// same as compare_bitvector, with one additional exception for modrm-based
// instructions: they may use an extra displacement on occasion
void compare_bitvector_modrm(const line& inst, uint8_t expected, const string& maybe_op_name) {
if (all_hex_bytes(inst) && has_operands(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere
uint8_t bitvector = compute_expected_operand_bitvector(inst);
if (trace_contains_errors()) return; // duplicate operand type
if (all_hex_bytes(inst) && has_arguments(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere
uint8_t bitvector = compute_expected_argument_bitvector(inst);
if (trace_contains_errors()) return; // duplicate argument type
// update 'expected' bitvector for the additional exception
if (has_operand_metadata(inst, "mod")) {
if (has_argument_metadata(inst, "mod")) {
int32_t mod = parse_int(metadata(inst, "mod").data);
switch (mod) {
case 0:
if (has_operand_metadata(inst, "rm32") && parse_int(metadata(inst, "rm32").data) == 5)
if (has_argument_metadata(inst, "rm32") && parse_int(metadata(inst, "rm32").data) == 5)
expected |= (1<<DISP32);
break;
case 1:
@ -423,25 +423,25 @@ void compare_bitvector_modrm(const line& inst, uint8_t expected, const string& m
if (bitvector == expected) return; // all good with this instruction
for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) {
//? cerr << "comparing for modrm " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
if ((bitvector & 0x1) == (expected & 0x1)) continue; // all good with this operand
if ((bitvector & 0x1) == (expected & 0x1)) continue; // all good with this argument
const string& optype = Operand_type_name.at(i);
if ((bitvector & 0x1) > (expected & 0x1))
raise << "'" << to_string(inst) << "'" << maybe_op_name << ": unexpected " << optype << " operand\n" << end();
raise << "'" << to_string(inst) << "'" << maybe_op_name << ": unexpected " << optype << " argument\n" << end();
else
raise << "'" << to_string(inst) << "'" << maybe_op_name << ": missing " << optype << " operand\n" << end();
raise << "'" << to_string(inst) << "'" << maybe_op_name << ": missing " << optype << " argument\n" << end();
// continue giving all errors for a single instruction
}
// ignore settings in any unused bits
}
void check_operand_metadata_present(const line& inst, const string& type, const word& op) {
if (!has_operand_metadata(inst, type))
raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << type << " operand\n" << end();
void check_argument_metadata_present(const line& inst, const string& type, const word& op) {
if (!has_argument_metadata(inst, type))
raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << type << " argument\n" << end();
}
void check_operand_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) {
if (has_operand_metadata(inst, type))
raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << type << " operand (" << msg << ")\n" << end();
void check_argument_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) {
if (has_argument_metadata(inst, type))
raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << type << " argument (" << msg << ")\n" << end();
}
void test_modrm_with_displacement() {
@ -461,7 +461,7 @@ void test_check_missing_disp8() {
"89/copy 1/mod/lookup+disp8 0/rm32/EAX 1/r32/ECX\n" // missing disp8
);
CHECK_TRACE_CONTENTS(
"error: '89/copy 1/mod/lookup+disp8 0/rm32/EAX 1/r32/ECX' (copy r32 to rm32): missing disp8 operand\n"
"error: '89/copy 1/mod/lookup+disp8 0/rm32/EAX 1/r32/ECX' (copy r32 to rm32): missing disp8 argument\n"
);
}
@ -472,84 +472,84 @@ void test_check_missing_disp32() {
"8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX\n" // missing disp32
);
CHECK_TRACE_CONTENTS(
"error: '8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX' (copy rm32 to r32): missing disp32 operand\n"
"error: '8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX' (copy rm32 to r32): missing disp32 argument\n"
);
}
void test_conflicting_operands_in_modrm_instruction() {
void test_conflicting_arguments_in_modrm_instruction() {
Hide_errors = true;
run(
"== code 0x1\n"
"01/add 0/mod 3/mod\n"
);
CHECK_TRACE_CONTENTS(
"error: '01/add 0/mod 3/mod' has conflicting mod operands\n"
"error: '01/add 0/mod 3/mod' has conflicting mod arguments\n"
);
}
void test_conflicting_operand_type_modrm() {
void test_conflicting_argument_type_modrm() {
Hide_errors = true;
run(
"== code 0x1\n"
"01/add 0/mod 3/rm32/r32\n"
);
CHECK_TRACE_CONTENTS(
"error: '3/rm32/r32' has conflicting operand types; it should have only one\n"
"error: '3/rm32/r32' has conflicting argument types; it should have only one\n"
);
}
void test_check_missing_rm32_operand() {
void test_check_missing_rm32_argument() {
Hide_errors = true;
run(
"== code 0x1\n"
"81 0/add/subop 0/mod 1/imm32\n"
);
CHECK_TRACE_CONTENTS(
"error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 operand\n"
"error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 argument\n"
);
}
void test_check_missing_subop_operand() {
void test_check_missing_subop_argument() {
Hide_errors = true;
run(
"== code 0x1\n"
"81 0/mod 3/rm32/ebx 1/imm32\n"
);
CHECK_TRACE_CONTENTS(
"error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop operand\n"
"error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop argument\n"
);
}
void test_check_missing_base_operand() {
void test_check_missing_base_argument() {
Hide_errors = true;
run(
"== code 0x1\n"
"81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32\n"
);
CHECK_TRACE_CONTENTS(
"error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base operand\n"
"error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base argument\n"
);
}
void test_check_missing_index_operand() {
void test_check_missing_index_argument() {
Hide_errors = true;
run(
"== code 0x1\n"
"81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32\n"
);
CHECK_TRACE_CONTENTS(
"error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index operand\n"
"error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index argument\n"
);
}
void test_check_missing_base_operand_2() {
void test_check_missing_base_argument_2() {
Hide_errors = true;
run(
"== code 0x1\n"
"81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32\n"
);
CHECK_TRACE_CONTENTS(
"error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base operand\n"
"error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base argument\n"
);
}
@ -560,22 +560,22 @@ void test_check_extra_displacement() {
"89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 4/disp8\n"
);
CHECK_TRACE_CONTENTS(
"error: '89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 4/disp8' (copy r32 to rm32): unexpected disp8 operand\n"
"error: '89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 4/disp8' (copy r32 to rm32): unexpected disp8 argument\n"
);
}
void test_check_duplicate_operand() {
void test_check_duplicate_argument() {
Hide_errors = true;
run(
"== code 0x1\n"
"89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 1/r32\n"
);
CHECK_TRACE_CONTENTS(
"error: '89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 1/r32': duplicate r32 operand\n"
"error: '89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 1/r32': duplicate r32 argument\n"
);
}
void test_check_base_operand_not_needed_in_direct_mode() {
void test_check_base_argument_not_needed_in_direct_mode() {
run(
"== code 0x1\n"
"81 0/add/subop 3/mod/indirect 4/rm32/use-sib 1/imm32\n"
@ -590,13 +590,13 @@ void test_extra_modrm() {
"59/pop-to-ECX 3/mod/direct 1/rm32/ECX 4/r32/ESP\n"
);
CHECK_TRACE_CONTENTS(
"error: '59/pop-to-ECX 3/mod/direct 1/rm32/ECX 4/r32/ESP' (pop top of stack to ECX): unexpected modrm operand\n"
"error: '59/pop-to-ECX 3/mod/direct 1/rm32/ECX 4/r32/ESP' (pop top of stack to ECX): unexpected modrm argument\n"
);
}
//:: similarly handle multi-byte opcodes
void check_operands_0f(const line& inst) {
void check_arguments_0f(const line& inst) {
assert(inst.words.at(0).data == "0f");
if (SIZE(inst.words) == 1) {
raise << "opcode '0f' requires a second opcode\n" << end();
@ -607,10 +607,10 @@ void check_operands_0f(const line& inst) {
raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end();
return;
}
check_operands_0f(inst, op);
check_arguments_0f(inst, op);
}
void check_operands_f3(const line& inst) {
void check_arguments_f3(const line& inst) {
assert(inst.words.at(0).data == "f3");
if (SIZE(inst.words) == 1) {
raise << "opcode 'f3' requires a second opcode\n" << end();
@ -619,24 +619,24 @@ void check_operands_f3(const line& inst) {
word op = preprocess_op(inst.words.at(1));
if (op.data == "0f") {
word op2 = preprocess_op(inst.words.at(2));
check_operands_f3_0f(inst, op2);
check_arguments_f3_0f(inst, op2);
return;
}
if (!contains_key(Name_f3, op.data)) {
raise << "unknown 2-byte opcode 'f3 " << op.data << "'\n" << end();
return;
}
check_operands_f3(inst, op);
check_arguments_f3(inst, op);
}
void test_check_missing_disp32_operand() {
void test_check_missing_disp32_argument() {
Hide_errors = true;
run(
"== code 0x1\n"
" 0f 84 # jmp if ZF to ??\n"
);
CHECK_TRACE_CONTENTS(
"error: '0f 84' (jump disp32 bytes away if equal, if ZF is set): missing disp32 operand\n"
"error: '0f 84' (jump disp32 bytes away if equal, if ZF is set): missing disp32 argument\n"
);
}
@ -649,53 +649,53 @@ void test_0f_opcode_with_modrm() {
}
:(before "End Globals")
map</*op*/string, /*bitvector*/uint8_t> Permitted_operands_0f;
map</*op*/string, /*bitvector*/uint8_t> Permitted_arguments_0f;
:(before "End Init Permitted Operands")
//// Class D: just op and disp32
// imm32 imm8 disp32 |disp16 disp8 subop modrm
// 0 0 1 |0 0 0 0
put_new(Permitted_operands_0f, "82", 0x10);
put_new(Permitted_operands_0f, "83", 0x10);
put_new(Permitted_operands_0f, "84", 0x10);
put_new(Permitted_operands_0f, "85", 0x10);
put_new(Permitted_operands_0f, "86", 0x10);
put_new(Permitted_operands_0f, "87", 0x10);
put_new(Permitted_operands_0f, "8c", 0x10);
put_new(Permitted_operands_0f, "8d", 0x10);
put_new(Permitted_operands_0f, "8e", 0x10);
put_new(Permitted_operands_0f, "8f", 0x10);
put_new(Permitted_arguments_0f, "82", 0x10);
put_new(Permitted_arguments_0f, "83", 0x10);
put_new(Permitted_arguments_0f, "84", 0x10);
put_new(Permitted_arguments_0f, "85", 0x10);
put_new(Permitted_arguments_0f, "86", 0x10);
put_new(Permitted_arguments_0f, "87", 0x10);
put_new(Permitted_arguments_0f, "8c", 0x10);
put_new(Permitted_arguments_0f, "8d", 0x10);
put_new(Permitted_arguments_0f, "8e", 0x10);
put_new(Permitted_arguments_0f, "8f", 0x10);
//// Class M: using ModR/M byte
// imm32 imm8 disp32 |disp16 disp8 subop modrm
// 0 0 0 |0 0 0 1
put_new(Permitted_operands_0f, "af", 0x01);
put_new(Permitted_arguments_0f, "af", 0x01);
// setcc
put_new(Permitted_operands_0f, "92", 0x01);
put_new(Permitted_operands_0f, "93", 0x01);
put_new(Permitted_operands_0f, "94", 0x01);
put_new(Permitted_operands_0f, "95", 0x01);
put_new(Permitted_operands_0f, "96", 0x01);
put_new(Permitted_operands_0f, "97", 0x01);
put_new(Permitted_operands_0f, "9c", 0x01);
put_new(Permitted_operands_0f, "9d", 0x01);
put_new(Permitted_operands_0f, "9e", 0x01);
put_new(Permitted_operands_0f, "9f", 0x01);
put_new(Permitted_arguments_0f, "92", 0x01);
put_new(Permitted_arguments_0f, "93", 0x01);
put_new(Permitted_arguments_0f, "94", 0x01);
put_new(Permitted_arguments_0f, "95", 0x01);
put_new(Permitted_arguments_0f, "96", 0x01);
put_new(Permitted_arguments_0f, "97", 0x01);
put_new(Permitted_arguments_0f, "9c", 0x01);
put_new(Permitted_arguments_0f, "9d", 0x01);
put_new(Permitted_arguments_0f, "9e", 0x01);
put_new(Permitted_arguments_0f, "9f", 0x01);
:(before "End Globals")
map</*op*/string, /*bitvector*/uint8_t> Permitted_operands_f3;
map</*op*/string, /*bitvector*/uint8_t> Permitted_operands_f3_0f;
map</*op*/string, /*bitvector*/uint8_t> Permitted_arguments_f3;
map</*op*/string, /*bitvector*/uint8_t> Permitted_arguments_f3_0f;
:(before "End Init Permitted Operands")
//// Class M: using ModR/M byte
// imm32 imm8 disp32 |disp16 disp8 subop modrm
// 0 0 0 |0 0 0 1
put_new(Permitted_operands_f3_0f, "2a", 0x01);
put_new(Permitted_operands_f3_0f, "5e", 0x01);
put_new(Permitted_arguments_f3_0f, "2a", 0x01);
put_new(Permitted_arguments_f3_0f, "5e", 0x01);
:(code)
void check_operands_0f(const line& inst, const word& op) {
uint8_t expected_bitvector = get(Permitted_operands_0f, op.data);
void check_arguments_0f(const line& inst, const word& op) {
uint8_t expected_bitvector = get(Permitted_arguments_0f, op.data);
if (HAS(expected_bitvector, MODRM)) {
check_operands_modrm(inst, op);
check_arguments_modrm(inst, op);
compare_bitvector_modrm(inst, expected_bitvector, maybe_name_0f(op));
}
else {
@ -703,10 +703,10 @@ void check_operands_0f(const line& inst, const word& op) {
}
}
void check_operands_f3(const line& inst, const word& op) {
uint8_t expected_bitvector = get(Permitted_operands_f3, op.data);
void check_arguments_f3(const line& inst, const word& op) {
uint8_t expected_bitvector = get(Permitted_arguments_f3, op.data);
if (HAS(expected_bitvector, MODRM)) {
check_operands_modrm(inst, op);
check_arguments_modrm(inst, op);
compare_bitvector_modrm(inst, expected_bitvector, maybe_name_f3(op));
}
else {
@ -714,10 +714,10 @@ void check_operands_f3(const line& inst, const word& op) {
}
}
void check_operands_f3_0f(const line& inst, const word& op) {
uint8_t expected_bitvector = get(Permitted_operands_f3_0f, op.data);
void check_arguments_f3_0f(const line& inst, const word& op) {
uint8_t expected_bitvector = get(Permitted_arguments_f3_0f, op.data);
if (HAS(expected_bitvector, MODRM)) {
check_operands_modrm(inst, op);
check_arguments_modrm(inst, op);
compare_bitvector_modrm(inst, expected_bitvector, maybe_name_f3_0f(op));
}
else {

View File

@ -1,4 +1,4 @@
//:: Check that the different operands of an instruction aren't too large for their bitfields.
//:: Check that the different arguments of an instruction aren't too large for their bitfields.
void test_check_bitfield_sizes() {
Hide_errors = true;
@ -28,22 +28,22 @@ put_new(Operand_bound, "imm8", 1<<8);
// no bound needed for imm32
:(before "Pack Operands(segment code)")
check_operand_bounds(code);
check_argument_bounds(code);
if (trace_contains_errors()) return;
:(code)
void check_operand_bounds(const segment& code) {
trace(3, "transform") << "-- check operand bounds" << end();
void check_argument_bounds(const segment& code) {
trace(3, "transform") << "-- check argument bounds" << end();
for (int i = 0; i < SIZE(code.lines); ++i) {
const line& inst = code.lines.at(i);
for (int j = first_operand(inst); j < SIZE(inst.words); ++j)
check_operand_bounds(inst.words.at(j));
for (int j = first_argument(inst); j < SIZE(inst.words); ++j)
check_argument_bounds(inst.words.at(j));
if (trace_contains_errors()) return; // stop at the first mal-formed instruction
}
}
void check_operand_bounds(const word& w) {
void check_argument_bounds(const word& w) {
for (map<string, uint32_t>::iterator p = Operand_bound.begin(); p != Operand_bound.end(); ++p) {
if (!has_operand_metadata(w, p->first)) continue;
if (!has_argument_metadata(w, p->first)) continue;
if (!looks_like_hex_int(w.data)) continue; // later transforms are on their own to do their own bounds checking
int32_t x = parse_int(w.data);
if (x >= 0) {

View File

@ -61,9 +61,9 @@ uint32_t num_bytes(const line& inst) {
}
int size_of(const word& w) {
if (has_operand_metadata(w, "disp32") || has_operand_metadata(w, "imm32"))
if (has_argument_metadata(w, "disp32") || has_argument_metadata(w, "imm32"))
return 4;
else if (has_operand_metadata(w, "disp16"))
else if (has_argument_metadata(w, "disp16"))
return 2;
// End size_of(word w) Special-cases
else
@ -79,7 +79,7 @@ int size_of(const word& w) {
//: (num_bytes).
//:
//: Decision: compute segment addresses before expanding labels, by being
//: aware in this layer of certain operand types that will eventually occupy
//: aware in this layer of certain argument types that will eventually occupy
//: multiple bytes.
//:
//: The layer to expand labels later hooks into num_bytes() to teach this

View File

@ -41,7 +41,7 @@ void test_Entry_label() {
if (SIZE(s) == 2) return true;
:(code)
void test_pack_immediate_ignores_single_byte_nondigit_operand() {
void test_pack_immediate_ignores_single_byte_nondigit_argument() {
Hide_errors = true;
transform(
"== code 0x1\n"
@ -54,7 +54,7 @@ void test_pack_immediate_ignores_single_byte_nondigit_operand() {
);
}
void test_pack_immediate_ignores_3_hex_digit_operand() {
void test_pack_immediate_ignores_3_hex_digit_argument() {
Hide_errors = true;
transform(
"== code 0x1\n"
@ -67,7 +67,7 @@ void test_pack_immediate_ignores_3_hex_digit_operand() {
);
}
void test_pack_immediate_ignores_non_hex_operand() {
void test_pack_immediate_ignores_non_hex_argument() {
Hide_errors = true;
transform(
"== code 0x1\n"
@ -136,17 +136,17 @@ void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>&
Source_lines_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << inst.original << '\n';
for (int j = 0; j < SIZE(inst.words); ++j) {
const word& curr = inst.words.at(j);
// hack: if we have any operand metadata left after previous transforms,
// hack: if we have any argument metadata left after previous transforms,
// deduce its size
// Maybe we should just move this transform to before instruction
// packing, and deduce the size of *all* operands. But then we'll also
// packing, and deduce the size of *all* arguments. But then we'll also
// have to deal with bitfields.
if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32")) {
if (has_argument_metadata(curr, "disp32") || has_argument_metadata(curr, "imm32")) {
if (*curr.data.rbegin() == ':')
raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
current_byte += 4;
}
else if (has_operand_metadata(curr, "disp16")) {
else if (has_argument_metadata(curr, "disp16")) {
if (*curr.data.rbegin() == ':')
raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
current_byte += 2;
@ -160,8 +160,8 @@ void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>&
// ensure labels look sufficiently different from raw hex
check_valid_name(label);
if (trace_contains_errors()) return;
if (contains_any_operand_metadata(curr))
raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
if (contains_any_argument_metadata(curr))
raise << "'" << to_string(inst) << "': label definition (':') not allowed in argument\n" << end();
if (j > 0)
raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
if (Labels_file.is_open())
@ -224,21 +224,21 @@ void replace_labels_with_displacements(segment& code, const map<string, int32_t>
const word& curr = inst.words.at(j);
if (contains_key(byte_index, curr.data)) {
int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at;
if (has_operand_metadata(curr, "disp8")) {
if (has_argument_metadata(curr, "disp8")) {
if (displacement > 0x7f || displacement < -0x7f)
raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 signed bits\n" << end();
else
emit_hex_bytes(new_inst, displacement, 1);
}
else if (has_operand_metadata(curr, "disp16")) {
else if (has_argument_metadata(curr, "disp16")) {
if (displacement > 0x7fff || displacement < -0x7fff)
raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 signed bits\n" << end();
else
emit_hex_bytes(new_inst, displacement, 2);
}
else if (has_operand_metadata(curr, "disp32")) {
else if (has_argument_metadata(curr, "disp32")) {
emit_hex_bytes(new_inst, displacement, 4);
} else if (has_operand_metadata(curr, "imm32")) {
} else if (has_argument_metadata(curr, "imm32")) {
emit_hex_bytes(new_inst, code.start + get(byte_index, curr.data), 4);
}
}

View File

@ -1,7 +1,7 @@
//: Global variables.
//:
//: Global variables are just labels in the data segment.
//: However, they can only be used in imm32 and not disp32 operands. And they
//: However, they can only be used in imm32 and not disp32 arguments. And they
//: can't be used with jump and call instructions.
//:
//: This layer has much the same structure as rewriting labels.
@ -119,17 +119,17 @@ void replace_global_variables_in_data_segment(segment& data, const map<string, u
const word& curr = l.words.at(j);
if (!contains_key(address, curr.data)) {
if (looks_like_hex_int(curr.data)) {
if (has_operand_metadata(curr, "imm32"))
if (has_argument_metadata(curr, "imm32"))
emit_hex_bytes(new_l, curr, 4);
else if (has_operand_metadata(curr, "imm16"))
else if (has_argument_metadata(curr, "imm16"))
emit_hex_bytes(new_l, curr, 2);
else if (has_operand_metadata(curr, "imm8"))
else if (has_argument_metadata(curr, "imm8"))
emit_hex_bytes(new_l, curr, 1);
else if (has_operand_metadata(curr, "disp8"))
else if (has_argument_metadata(curr, "disp8"))
raise << "can't use /disp8 in a non-code segment\n" << end();
else if (has_operand_metadata(curr, "disp16"))
else if (has_argument_metadata(curr, "disp16"))
raise << "can't use /disp16 in a non-code segment\n" << end();
else if (has_operand_metadata(curr, "disp32"))
else if (has_argument_metadata(curr, "disp32"))
raise << "can't use /disp32 in a non-code segment\n" << end();
else
new_l.words.push_back(curr);
@ -149,13 +149,13 @@ void replace_global_variables_in_data_segment(segment& data, const map<string, u
}
bool valid_use_of_global_variable(const word& curr) {
if (has_operand_metadata(curr, "imm32")) return true;
if (has_argument_metadata(curr, "imm32")) return true;
// End Valid Uses Of Global Variable(curr)
return false;
}
//:: a more complex sanity check for how we use global variables
//: requires first saving some data early before we pack operands
//: requires first saving some data early before we pack arguments
:(after "Begin Transforms")
Transform.push_back(correlate_disp32_with_mod);
@ -167,18 +167,18 @@ void correlate_disp32_with_mod(program& p) {
line& inst = code.lines.at(i);
for (int j = 0; j < SIZE(inst.words); ++j) {
word& curr = inst.words.at(j);
if (has_operand_metadata(curr, "disp32")
&& has_operand_metadata(inst, "mod"))
if (has_argument_metadata(curr, "disp32")
&& has_argument_metadata(inst, "mod"))
curr.metadata.push_back("has_mod");
}
}
}
:(before "End Valid Uses Of Global Variable(curr)")
if (has_operand_metadata(curr, "disp32"))
if (has_argument_metadata(curr, "disp32"))
return has_metadata(curr, "has_mod");
// todo: more sophisticated check, to ensure we don't use global variable
// addresses as a real displacement added to other operands.
// addresses as a real displacement added to other arguments.
:(code)
bool has_metadata(const word& w, const string& m) {