2017-10-18 06:55:25 +00:00
|
|
|
//:: simulated x86 registers; just a subset
|
|
|
|
//: assume segment registers are hard-coded to 0
|
|
|
|
//: no floating-point, MMX, etc. yet
|
2017-10-11 08:38:47 +00:00
|
|
|
|
|
|
|
:(before "End Types")
|
|
|
|
enum {
|
|
|
|
EAX,
|
|
|
|
ECX,
|
|
|
|
EDX,
|
|
|
|
EBX,
|
|
|
|
ESP,
|
|
|
|
EBP,
|
|
|
|
ESI,
|
|
|
|
EDI,
|
|
|
|
NUM_INT_REGISTERS,
|
|
|
|
};
|
|
|
|
union reg {
|
|
|
|
int32_t i;
|
|
|
|
uint32_t u;
|
|
|
|
};
|
|
|
|
:(before "End Globals")
|
2017-10-12 23:55:56 +00:00
|
|
|
reg Reg[NUM_INT_REGISTERS] = { {0} };
|
2017-10-11 08:38:47 +00:00
|
|
|
uint32_t EIP = 0;
|
|
|
|
:(before "End Reset")
|
2017-10-12 23:55:56 +00:00
|
|
|
bzero(Reg, sizeof(Reg));
|
2017-10-11 08:38:47 +00:00
|
|
|
EIP = 0;
|
|
|
|
|
|
|
|
//:: simulated flag registers; just a subset that we care about
|
|
|
|
|
|
|
|
:(before "End Globals")
|
2017-10-11 09:34:32 +00:00
|
|
|
bool SF = false; // sign flag
|
|
|
|
bool ZF = false; // zero flag
|
|
|
|
bool OF = false; // overflow flag
|
2017-10-11 08:38:47 +00:00
|
|
|
:(before "End Reset")
|
2017-10-11 09:34:32 +00:00
|
|
|
SF = ZF = OF = false;
|
2017-10-11 08:38:47 +00:00
|
|
|
|
|
|
|
//: how the flag registers are updated after each instruction
|
|
|
|
|
|
|
|
:(before "End Includes")
|
2017-10-14 04:53:00 +00:00
|
|
|
// Combine 'arg1' and 'arg2' with arithmetic operation 'op' and store the
|
|
|
|
// result in 'arg1', then update flags.
|
2017-10-11 08:38:47 +00:00
|
|
|
// beware: no side-effects in args
|
2017-10-11 09:15:17 +00:00
|
|
|
#define BINARY_ARITHMETIC_OP(op, arg1, arg2) { \
|
2017-10-11 08:38:47 +00:00
|
|
|
/* arg1 and arg2 must be signed */ \
|
|
|
|
int64_t tmp = arg1 op arg2; \
|
|
|
|
arg1 = arg1 op arg2; \
|
2017-10-13 04:38:02 +00:00
|
|
|
trace(2, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
|
2017-10-11 08:38:47 +00:00
|
|
|
SF = (arg1 < 0); \
|
|
|
|
ZF = (arg1 == 0); \
|
|
|
|
OF = (arg1 != tmp); \
|
|
|
|
}
|
|
|
|
|
2017-10-14 04:53:00 +00:00
|
|
|
// Combine 'arg1' and 'arg2' with bitwise operation 'op' and store the result
|
|
|
|
// in 'arg1', then update flags.
|
2017-10-11 09:15:17 +00:00
|
|
|
#define BINARY_BITWISE_OP(op, arg1, arg2) { \
|
2017-10-11 08:38:47 +00:00
|
|
|
/* arg1 and arg2 must be unsigned */ \
|
|
|
|
arg1 = arg1 op arg2; \
|
2017-10-13 04:38:02 +00:00
|
|
|
trace(2, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
|
2017-10-11 08:38:47 +00:00
|
|
|
SF = (arg1 >> 31); \
|
|
|
|
ZF = (arg1 == 0); \
|
|
|
|
OF = false; \
|
|
|
|
}
|
|
|
|
|
|
|
|
//:: simulated RAM
|
|
|
|
|
|
|
|
:(before "End Globals")
|
2017-10-12 23:55:56 +00:00
|
|
|
vector<uint8_t> Mem;
|
2017-10-12 16:36:55 +00:00
|
|
|
uint32_t End_of_program = 0;
|
2017-10-11 08:38:47 +00:00
|
|
|
:(before "End Reset")
|
2017-10-13 05:17:28 +00:00
|
|
|
Mem.clear();
|
2017-10-12 23:55:56 +00:00
|
|
|
Mem.resize(1024);
|
2017-10-12 16:36:55 +00:00
|
|
|
End_of_program = 0;
|
2017-10-15 02:18:34 +00:00
|
|
|
:(before "End Includes")
|
|
|
|
// depends on Mem being laid out contiguously (so you can't use a map, etc.)
|
|
|
|
// and on the host also being little-endian
|
|
|
|
#define SET_WORD_IN_MEM(addr, val) *reinterpret_cast<int32_t*>(&Mem.at(addr)) = val;
|
2017-10-11 08:38:47 +00:00
|
|
|
|
|
|
|
//:: core interpreter loop
|
|
|
|
|
|
|
|
:(scenario add_imm32_to_eax)
|
2017-10-12 16:36:55 +00:00
|
|
|
# In scenarios, programs are a series of hex bytes, each (variable-length)
|
|
|
|
# instruction on one line.
|
|
|
|
#
|
|
|
|
# x86 instructions consist of the following parts (see cheatsheet.pdf):
|
2017-10-12 23:55:56 +00:00
|
|
|
# opcode ModR/M SIB displacement immediate
|
|
|
|
# instruction mod, reg, Reg/Mem bits scale, index, base
|
|
|
|
# 1-3 bytes 0/1 byte 0/1 byte 0/1/2/4 bytes 0/1/2/4 bytes
|
|
|
|
05 0a 0b 0c 0d # add 0x0d0c0b0a to EAX
|
2017-10-12 23:03:18 +00:00
|
|
|
# All hex bytes must be exactly 2 characters each. No '0x' prefixes.
|
2017-10-12 16:36:55 +00:00
|
|
|
+load: 1 -> 05
|
|
|
|
+load: 2 -> 0a
|
|
|
|
+load: 3 -> 0b
|
|
|
|
+load: 4 -> 0c
|
|
|
|
+load: 5 -> 0d
|
|
|
|
+run: add imm32 0x0d0c0b0a to reg EAX
|
2017-10-13 05:28:06 +00:00
|
|
|
+run: storing 0x0d0c0b0a
|
2017-10-11 08:38:47 +00:00
|
|
|
|
|
|
|
:(code)
|
|
|
|
// helper for tests: load a program into memory from a textual representation
|
|
|
|
// of its bytes, and run it
|
2018-07-01 06:05:40 +00:00
|
|
|
void run(string text_bytes) {
|
|
|
|
// Begin run() For Scenarios
|
2018-07-01 07:01:13 +00:00
|
|
|
//? cerr << text_bytes << '\n';
|
2018-07-01 05:46:32 +00:00
|
|
|
load_program(text_bytes, 1); // tests always assume a starting address of 1
|
2017-10-11 08:38:47 +00:00
|
|
|
EIP = 1; // preserve null pointer
|
2017-10-12 16:36:55 +00:00
|
|
|
while (EIP < End_of_program)
|
2017-10-11 08:38:47 +00:00
|
|
|
run_one_instruction();
|
|
|
|
}
|
|
|
|
|
|
|
|
// skeleton of how x86 instructions are decoded
|
|
|
|
void run_one_instruction() {
|
|
|
|
uint8_t op=0, op2=0, op3=0;
|
2017-10-15 08:59:11 +00:00
|
|
|
trace(2, "run") << "inst: 0x" << HEXWORD << EIP << end();
|
2017-10-12 23:16:54 +00:00
|
|
|
switch (op = next()) {
|
2017-10-12 16:36:55 +00:00
|
|
|
case 0xf4: // hlt
|
|
|
|
EIP = End_of_program;
|
|
|
|
break;
|
2017-10-13 04:02:11 +00:00
|
|
|
// our first opcode
|
2017-10-12 16:36:55 +00:00
|
|
|
case 0x05: { // add imm32 to EAX
|
2017-10-11 08:38:47 +00:00
|
|
|
int32_t arg2 = imm32();
|
2017-10-12 16:36:55 +00:00
|
|
|
trace(2, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end();
|
2017-10-12 23:55:56 +00:00
|
|
|
BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2);
|
2017-10-11 08:38:47 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
// End Single-Byte Opcodes
|
|
|
|
case 0x0f:
|
|
|
|
switch(op2 = next()) {
|
2017-10-11 09:10:32 +00:00
|
|
|
// End Two-Byte Opcodes Starting With 0f
|
2017-10-11 08:38:47 +00:00
|
|
|
default:
|
2017-10-13 04:20:31 +00:00
|
|
|
cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
|
2017-10-11 08:38:47 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 0xf3:
|
|
|
|
switch(op2 = next()) {
|
2017-10-11 09:10:32 +00:00
|
|
|
// End Two-Byte Opcodes Starting With f3
|
2017-10-11 08:38:47 +00:00
|
|
|
case 0x0f:
|
|
|
|
switch(op3 = next()) {
|
2017-10-11 09:10:32 +00:00
|
|
|
// End Three-Byte Opcodes Starting With f3 0f
|
2017-10-11 08:38:47 +00:00
|
|
|
default:
|
2017-10-13 04:20:31 +00:00
|
|
|
cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
|
2017-10-11 08:38:47 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
2017-10-13 04:20:31 +00:00
|
|
|
cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
|
2017-10-11 08:38:47 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
2017-10-13 04:20:31 +00:00
|
|
|
cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
|
2017-10-11 08:38:47 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-01 05:46:32 +00:00
|
|
|
void load_program(const string& text_bytes, uint32_t addr) {
|
2017-10-12 23:03:18 +00:00
|
|
|
istringstream in(text_bytes);
|
2018-07-01 05:46:32 +00:00
|
|
|
load_program(in, addr);
|
2018-06-30 16:41:22 +00:00
|
|
|
}
|
|
|
|
void load_program(istream& in, uint32_t addr) {
|
2017-10-12 23:03:18 +00:00
|
|
|
in >> std::noskipws;
|
|
|
|
while (has_data(in)) {
|
|
|
|
char c1 = next_hex_byte(in);
|
|
|
|
if (c1 == '\0') break;
|
|
|
|
if (!has_data(in)) {
|
|
|
|
raise << "input program truncated mid-byte\n" << end();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
char c2 = next_hex_byte(in);
|
|
|
|
if (c2 == '\0') {
|
|
|
|
raise << "input program truncated mid-byte\n" << end();
|
|
|
|
return;
|
2017-10-12 19:31:17 +00:00
|
|
|
}
|
2017-10-12 23:55:56 +00:00
|
|
|
Mem.at(addr) = to_byte(c1, c2);
|
2017-10-13 04:20:31 +00:00
|
|
|
trace(99, "load") << addr << " -> " << HEXBYTE << NUM(Mem.at(addr)) << end();
|
2017-10-12 19:31:17 +00:00
|
|
|
addr++;
|
2018-07-01 05:05:09 +00:00
|
|
|
if (addr >= Mem.size()) Mem.resize(Mem.size()*2);
|
2017-10-12 19:31:17 +00:00
|
|
|
}
|
|
|
|
End_of_program = addr;
|
|
|
|
}
|
|
|
|
|
2017-10-12 23:03:18 +00:00
|
|
|
char next_hex_byte(istream& in) {
|
|
|
|
while (has_data(in)) {
|
|
|
|
char c = '\0';
|
|
|
|
in >> c;
|
|
|
|
if (c == ' ' || c == '\n') continue;
|
|
|
|
while (c == '#') {
|
|
|
|
while (has_data(in)) {
|
|
|
|
in >> c;
|
|
|
|
if (c == '\n') {
|
|
|
|
in >> c;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-10-15 08:59:11 +00:00
|
|
|
if (c == '\0') return c;
|
2017-10-12 23:03:18 +00:00
|
|
|
if (c >= '0' && c <= '9') return c;
|
2017-10-15 08:18:55 +00:00
|
|
|
if (c >= 'a' && c <= 'f') return c;
|
|
|
|
if (c >= 'A' && c <= 'F') return tolower(c);
|
2017-10-12 23:03:18 +00:00
|
|
|
// disallow any non-hex characters, including a '0x' prefix
|
|
|
|
if (!isspace(c)) {
|
2017-10-15 08:18:55 +00:00
|
|
|
raise << "invalid non-hex character " << NUM(c) << "\n" << end();
|
2017-10-12 23:03:18 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
uint8_t to_byte(char hex_byte1, char hex_byte2) {
|
|
|
|
return to_hex_num(hex_byte1)*16 + to_hex_num(hex_byte2);
|
|
|
|
}
|
|
|
|
uint8_t to_hex_num(char c) {
|
|
|
|
if (c >= '0' && c <= '9') return c - '0';
|
|
|
|
if (c >= 'a' && c <= 'f') return c - 'a' + 10;
|
|
|
|
assert(false);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-10-12 23:16:54 +00:00
|
|
|
inline uint8_t next() {
|
2017-10-12 23:55:56 +00:00
|
|
|
return Mem.at(EIP++);
|
2017-10-11 08:38:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// read a 32-bit immediate in little-endian order from the instruction stream
|
2017-10-12 16:36:55 +00:00
|
|
|
int32_t imm32() {
|
|
|
|
int32_t result = next();
|
2017-10-11 08:38:47 +00:00
|
|
|
result |= (next()<<8);
|
|
|
|
result |= (next()<<16);
|
|
|
|
result |= (next()<<24);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2018-01-24 10:47:49 +00:00
|
|
|
string rname(uint8_t r) {
|
|
|
|
switch (r) {
|
|
|
|
case 0: return "EAX";
|
|
|
|
case 1: return "ECX";
|
|
|
|
case 2: return "EDX";
|
|
|
|
case 3: return "EBX";
|
|
|
|
case 4: return "ESP";
|
|
|
|
case 5: return "EBP";
|
|
|
|
case 6: return "ESI";
|
|
|
|
case 7: return "EDI";
|
|
|
|
default: raise << "invalid register " << r << '\n' << end(); return "";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-11 08:38:47 +00:00
|
|
|
:(before "End Includes")
|
|
|
|
#include <iomanip>
|
|
|
|
#define HEXBYTE std::hex << std::setw(2) << std::setfill('0')
|
|
|
|
#define HEXWORD std::hex << std::setw(8) << std::setfill('0')
|
2017-10-13 04:20:31 +00:00
|
|
|
// ugly that iostream doesn't print uint8_t as an integer
|
|
|
|
#define NUM(X) static_cast<int>(X)
|
2017-10-11 17:16:16 +00:00
|
|
|
#include <stdint.h>
|