2018-07-16 05:59:02 +00:00
|
|
|
//: Core data structures for simulating the SubX VM (subset of an x86 processor)
|
2018-07-24 23:06:43 +00:00
|
|
|
//:
|
|
|
|
//: At the lowest level ("level 1") of abstraction, SubX executes x86
|
|
|
|
//: instructions provided in the form of an array of bytes, loaded into memory
|
|
|
|
//: starting at a specific address.
|
2018-07-16 05:59:02 +00:00
|
|
|
|
|
|
|
//:: registers
|
|
|
|
//: assume segment registers are hard-coded to 0
|
|
|
|
//: no floating-point, MMX, etc. yet
|
2017-10-11 08:38:47 +00:00
|
|
|
|
|
|
|
:(before "End Types")
|
|
|
|
enum {
|
|
|
|
EAX,
|
|
|
|
ECX,
|
|
|
|
EDX,
|
|
|
|
EBX,
|
|
|
|
ESP,
|
|
|
|
EBP,
|
|
|
|
ESI,
|
|
|
|
EDI,
|
|
|
|
NUM_INT_REGISTERS,
|
|
|
|
};
|
|
|
|
union reg {
|
|
|
|
int32_t i;
|
|
|
|
uint32_t u;
|
|
|
|
};
|
|
|
|
:(before "End Globals")
|
2017-10-12 23:55:56 +00:00
|
|
|
reg Reg[NUM_INT_REGISTERS] = { {0} };
|
2018-07-11 03:18:45 +00:00
|
|
|
uint32_t EIP = 1; // preserve null pointer
|
2017-10-11 08:38:47 +00:00
|
|
|
:(before "End Reset")
|
2017-10-12 23:55:56 +00:00
|
|
|
bzero(Reg, sizeof(Reg));
|
2018-07-11 03:18:45 +00:00
|
|
|
EIP = 1; // preserve null pointer
|
2017-10-11 08:38:47 +00:00
|
|
|
|
2018-07-20 22:50:39 +00:00
|
|
|
:(before "End Help Contents")
|
|
|
|
cerr << " registers\n";
|
|
|
|
:(before "End Help Texts")
|
|
|
|
put(Help, "registers",
|
2018-07-21 00:08:55 +00:00
|
|
|
"SubX currently supports eight 32-bit integer registers: R0 to R7.\n"
|
2018-07-24 23:06:43 +00:00
|
|
|
"R4 (ESP) contains the top of the stack.\n"
|
2018-07-25 23:40:00 +00:00
|
|
|
"\n"
|
|
|
|
"There's also a register for the address of the currently executing\n"
|
|
|
|
"instruction. It is modified by jumps.\n"
|
|
|
|
"\n"
|
|
|
|
"Various instructions modify one or more of three 1-bit 'flag' registers,\n"
|
|
|
|
"as a side-effect:\n"
|
|
|
|
"- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
|
|
|
|
" reset if not.\n"
|
2018-07-21 00:08:55 +00:00
|
|
|
"- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
|
|
|
|
"- the overflow flag (OF): usually set if an arithmetic result overflows.\n"
|
|
|
|
"The flag bits are read by conditional jumps.\n"
|
2018-07-25 23:40:00 +00:00
|
|
|
"\n"
|
2018-07-20 22:50:39 +00:00
|
|
|
"We don't support non-integer (floating-point) registers yet.\n"
|
|
|
|
);
|
|
|
|
|
2017-10-11 08:38:47 +00:00
|
|
|
:(before "End Globals")
|
2018-07-16 05:59:02 +00:00
|
|
|
// the subset of x86 flag registers we care about
|
2017-10-11 09:34:32 +00:00
|
|
|
bool SF = false; // sign flag
|
|
|
|
bool ZF = false; // zero flag
|
|
|
|
bool OF = false; // overflow flag
|
2017-10-11 08:38:47 +00:00
|
|
|
:(before "End Reset")
|
2017-10-11 09:34:32 +00:00
|
|
|
SF = ZF = OF = false;
|
2017-10-11 08:38:47 +00:00
|
|
|
|
|
|
|
//: how the flag registers are updated after each instruction
|
|
|
|
|
|
|
|
:(before "End Includes")
|
2017-10-14 04:53:00 +00:00
|
|
|
// Combine 'arg1' and 'arg2' with arithmetic operation 'op' and store the
|
|
|
|
// result in 'arg1', then update flags.
|
2017-10-11 08:38:47 +00:00
|
|
|
// beware: no side-effects in args
|
2017-10-11 09:15:17 +00:00
|
|
|
#define BINARY_ARITHMETIC_OP(op, arg1, arg2) { \
|
2017-10-11 08:38:47 +00:00
|
|
|
/* arg1 and arg2 must be signed */ \
|
|
|
|
int64_t tmp = arg1 op arg2; \
|
|
|
|
arg1 = arg1 op arg2; \
|
2017-10-13 04:38:02 +00:00
|
|
|
trace(2, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
|
2017-10-11 08:38:47 +00:00
|
|
|
SF = (arg1 < 0); \
|
|
|
|
ZF = (arg1 == 0); \
|
|
|
|
OF = (arg1 != tmp); \
|
|
|
|
}
|
|
|
|
|
2017-10-14 04:53:00 +00:00
|
|
|
// Combine 'arg1' and 'arg2' with bitwise operation 'op' and store the result
|
|
|
|
// in 'arg1', then update flags.
|
2017-10-11 09:15:17 +00:00
|
|
|
#define BINARY_BITWISE_OP(op, arg1, arg2) { \
|
2017-10-11 08:38:47 +00:00
|
|
|
/* arg1 and arg2 must be unsigned */ \
|
|
|
|
arg1 = arg1 op arg2; \
|
2017-10-13 04:38:02 +00:00
|
|
|
trace(2, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
|
2017-10-11 08:38:47 +00:00
|
|
|
SF = (arg1 >> 31); \
|
|
|
|
ZF = (arg1 == 0); \
|
|
|
|
OF = false; \
|
|
|
|
}
|
|
|
|
|
|
|
|
//:: simulated RAM
|
|
|
|
|
|
|
|
:(before "End Globals")
|
2017-10-12 23:55:56 +00:00
|
|
|
vector<uint8_t> Mem;
|
2018-07-09 05:35:48 +00:00
|
|
|
uint32_t Mem_offset = 0;
|
2017-10-12 16:36:55 +00:00
|
|
|
uint32_t End_of_program = 0;
|
2017-10-11 08:38:47 +00:00
|
|
|
:(before "End Reset")
|
2017-10-13 05:17:28 +00:00
|
|
|
Mem.clear();
|
2017-10-12 23:55:56 +00:00
|
|
|
Mem.resize(1024);
|
2018-07-09 05:35:48 +00:00
|
|
|
Mem_offset = 0;
|
2017-10-12 16:36:55 +00:00
|
|
|
End_of_program = 0;
|
2018-07-09 05:33:15 +00:00
|
|
|
:(code)
|
|
|
|
// These helpers depend on Mem being laid out contiguously (so you can't use a
|
|
|
|
// map, etc.) and on the host also being little-endian.
|
|
|
|
inline uint8_t read_mem_u8(uint32_t addr) {
|
2018-07-09 05:35:48 +00:00
|
|
|
return Mem.at(addr-Mem_offset);
|
2018-07-09 05:33:15 +00:00
|
|
|
}
|
|
|
|
inline int8_t read_mem_i8(uint32_t addr) {
|
2018-07-09 05:35:48 +00:00
|
|
|
return static_cast<int8_t>(Mem.at(addr-Mem_offset));
|
2018-07-09 05:33:15 +00:00
|
|
|
}
|
|
|
|
inline uint32_t read_mem_u32(uint32_t addr) {
|
2018-07-09 05:35:48 +00:00
|
|
|
return *reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
|
2018-07-09 05:33:15 +00:00
|
|
|
}
|
|
|
|
inline int32_t read_mem_i32(uint32_t addr) {
|
2018-07-09 05:35:48 +00:00
|
|
|
return *reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
|
2018-07-09 05:33:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
inline uint8_t* mem_addr_u8(uint32_t addr) {
|
2018-07-09 05:35:48 +00:00
|
|
|
return &Mem.at(addr-Mem_offset);
|
2018-07-09 05:33:15 +00:00
|
|
|
}
|
|
|
|
inline int8_t* mem_addr_i8(uint32_t addr) {
|
2018-07-09 05:35:48 +00:00
|
|
|
return reinterpret_cast<int8_t*>(&Mem.at(addr-Mem_offset));
|
2018-07-09 05:33:15 +00:00
|
|
|
}
|
|
|
|
inline uint32_t* mem_addr_u32(uint32_t addr) {
|
2018-07-09 05:35:48 +00:00
|
|
|
return reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
|
2018-07-09 05:33:15 +00:00
|
|
|
}
|
|
|
|
inline int32_t* mem_addr_i32(uint32_t addr) {
|
2018-07-09 05:35:48 +00:00
|
|
|
return reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
|
2018-07-09 05:33:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
inline void write_mem_u8(uint32_t addr, uint8_t val) {
|
2018-07-09 05:35:48 +00:00
|
|
|
Mem.at(addr-Mem_offset) = val;
|
2018-07-09 05:33:15 +00:00
|
|
|
}
|
|
|
|
inline void write_mem_i8(uint32_t addr, int8_t val) {
|
2018-07-09 05:35:48 +00:00
|
|
|
Mem.at(addr-Mem_offset) = static_cast<uint8_t>(val);
|
2018-07-09 05:33:15 +00:00
|
|
|
}
|
|
|
|
inline void write_mem_u32(uint32_t addr, uint32_t val) {
|
2018-07-09 05:35:48 +00:00
|
|
|
*reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset)) = val;
|
2018-07-09 05:33:15 +00:00
|
|
|
}
|
|
|
|
inline void write_mem_i32(uint32_t addr, int32_t val) {
|
2018-07-09 05:35:48 +00:00
|
|
|
*reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset)) = val;
|
2018-07-09 05:33:15 +00:00
|
|
|
}
|
2017-10-11 08:38:47 +00:00
|
|
|
|
|
|
|
//:: core interpreter loop
|
|
|
|
|
|
|
|
:(code)
|
|
|
|
// skeleton of how x86 instructions are decoded
|
|
|
|
void run_one_instruction() {
|
|
|
|
uint8_t op=0, op2=0, op3=0;
|
2017-10-15 08:59:11 +00:00
|
|
|
trace(2, "run") << "inst: 0x" << HEXWORD << EIP << end();
|
2018-07-03 23:36:37 +00:00
|
|
|
//? cerr << "inst: 0x" << EIP << '\n';
|
2017-10-12 23:16:54 +00:00
|
|
|
switch (op = next()) {
|
2017-10-12 16:36:55 +00:00
|
|
|
case 0xf4: // hlt
|
|
|
|
EIP = End_of_program;
|
|
|
|
break;
|
2017-10-11 08:38:47 +00:00
|
|
|
// End Single-Byte Opcodes
|
|
|
|
case 0x0f:
|
|
|
|
switch(op2 = next()) {
|
2017-10-11 09:10:32 +00:00
|
|
|
// End Two-Byte Opcodes Starting With 0f
|
2017-10-11 08:38:47 +00:00
|
|
|
default:
|
2017-10-13 04:20:31 +00:00
|
|
|
cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
|
2018-07-17 05:18:18 +00:00
|
|
|
DUMP("");
|
2017-10-11 08:38:47 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 0xf3:
|
|
|
|
switch(op2 = next()) {
|
2017-10-11 09:10:32 +00:00
|
|
|
// End Two-Byte Opcodes Starting With f3
|
2017-10-11 08:38:47 +00:00
|
|
|
case 0x0f:
|
|
|
|
switch(op3 = next()) {
|
2017-10-11 09:10:32 +00:00
|
|
|
// End Three-Byte Opcodes Starting With f3 0f
|
2017-10-11 08:38:47 +00:00
|
|
|
default:
|
2017-10-13 04:20:31 +00:00
|
|
|
cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
|
2018-07-17 05:18:18 +00:00
|
|
|
DUMP("");
|
2017-10-11 08:38:47 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
2017-10-13 04:20:31 +00:00
|
|
|
cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
|
2018-07-17 05:18:18 +00:00
|
|
|
DUMP("");
|
2017-10-11 08:38:47 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
2017-10-13 04:20:31 +00:00
|
|
|
cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
|
2018-07-17 05:18:18 +00:00
|
|
|
DUMP("");
|
2017-10-11 08:38:47 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-12 23:16:54 +00:00
|
|
|
inline uint8_t next() {
|
2018-07-09 05:33:15 +00:00
|
|
|
return read_mem_u8(EIP++);
|
2017-10-11 08:38:47 +00:00
|
|
|
}
|
|
|
|
|
2018-07-21 05:16:04 +00:00
|
|
|
//: start tracking supported opcodes
|
|
|
|
:(before "End Globals")
|
|
|
|
map</*op*/uint8_t, string> name;
|
|
|
|
map</*op*/uint8_t, string> name_0f;
|
|
|
|
map</*op*/uint8_t, string> name_f3;
|
|
|
|
map</*op*/uint8_t, string> name_f3_0f;
|
|
|
|
:(before "End One-time Setup")
|
|
|
|
init_op_names();
|
|
|
|
:(code)
|
|
|
|
void init_op_names() {
|
|
|
|
put(name, 0xf4, "halt");
|
|
|
|
// End Initialize Op Names(name)
|
|
|
|
}
|
|
|
|
|
|
|
|
:(before "End Help Special-cases(key)")
|
|
|
|
if (key == "opcodes") {
|
|
|
|
cerr << "Opcodes currently supported by SubX:\n";
|
|
|
|
for (map<uint8_t, string>::iterator p = name.begin(); p != name.end(); ++p)
|
|
|
|
cerr << " " << HEXBYTE << NUM(p->first) << ": " << p->second << '\n';
|
|
|
|
for (map<uint8_t, string>::iterator p = name_0f.begin(); p != name_0f.end(); ++p)
|
|
|
|
cerr << " 0f " << HEXBYTE << NUM(p->first) << ": " << p->second << '\n';
|
|
|
|
for (map<uint8_t, string>::iterator p = name_f3.begin(); p != name_f3.end(); ++p)
|
|
|
|
cerr << " f3 " << HEXBYTE << NUM(p->first) << ": " << p->second << '\n';
|
|
|
|
for (map<uint8_t, string>::iterator p = name_f3_0f.begin(); p != name_f3_0f.end(); ++p)
|
|
|
|
cerr << " f3 0f " << HEXBYTE << NUM(p->first) << ": " << p->second << '\n';
|
2018-07-22 04:17:01 +00:00
|
|
|
cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n";
|
2018-07-21 05:16:04 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
:(before "End Help Contents")
|
|
|
|
cerr << " opcodes\n";
|
|
|
|
|
2017-10-11 08:38:47 +00:00
|
|
|
:(before "End Includes")
|
|
|
|
#include <iomanip>
|
|
|
|
#define HEXBYTE std::hex << std::setw(2) << std::setfill('0')
|
|
|
|
#define HEXWORD std::hex << std::setw(8) << std::setfill('0')
|
2017-10-13 04:20:31 +00:00
|
|
|
// ugly that iostream doesn't print uint8_t as an integer
|
|
|
|
#define NUM(X) static_cast<int>(X)
|
2017-10-11 17:16:16 +00:00
|
|
|
#include <stdint.h>
|