2018-08-05 05:59:47 +00:00
//: Beginning of "level 2": tagging bytes with metadata around what field of
//: an x86 instruction they're for.
//:
//: The x86 instruction set is variable-length, and how a byte is interpreted
//: affects later instruction boundaries. A lot of the pain in programming
//: machine code stems from computer and programmer going out of sync on what
//: a byte means. The miscommunication is usually not immediately caught, and
//: metastasizes at runtime into kilobytes of misinterpreted instructions.
//:
//: To mitigate these issues, we'll start programming in terms of logical
//: operands rather than physical bytes. Some operands are smaller than a
//: byte, and others may consist of multiple bytes. This layer will correctly
//: pack and order the bytes corresponding to the operands in an instruction.
: ( before " End Help Texts " )
put ( Help , " instructions " ,
" Each x86 instruction consists of an instruction or opcode and some number \n "
" of operands. \n "
" Each operand has a type. An instruction won't have more than one operand of \n "
" any type. \n "
" Each instruction has some set of allowed operand types. It'll reject others. \n "
" The complete list of operand types: mod, subop, r32 (register), rm32 \n "
" (register or memory), scale, index, base, disp8, disp16, disp32, imm8, \n "
" imm32. \n "
" Each of these has its own help page. Try reading 'subx help mod' next. \n "
) ;
: ( before " End Help Contents " )
cerr < < " instructions \n " ;
: ( scenario pack_immediate_constants )
= = 0x1
# instruction effective address operand displacement immediate
# op subop mod rm32 base index scale r32
# 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0 / 1 / 2 / 4 bytes 0 / 1 / 2 / 4 bytes
bb 0x2a / imm32 # copy 42 to EBX
+ transform : packing instruction ' bb 0x2a / imm32 '
+ transform : instruction after packing : ' bb 2 a 00 00 00 '
+ run : copy imm32 0x0000002a to EBX
//: complete set of valid operand types
: ( before " End Globals " )
set < string > Instruction_operands ;
: ( before " End One-time Setup " )
Instruction_operands . insert ( " subop " ) ;
Instruction_operands . insert ( " mod " ) ;
Instruction_operands . insert ( " rm32 " ) ;
Instruction_operands . insert ( " base " ) ;
Instruction_operands . insert ( " index " ) ;
Instruction_operands . insert ( " scale " ) ;
Instruction_operands . insert ( " r32 " ) ;
Instruction_operands . insert ( " disp8 " ) ;
Instruction_operands . insert ( " disp16 " ) ;
Instruction_operands . insert ( " disp32 " ) ;
Instruction_operands . insert ( " imm8 " ) ;
Instruction_operands . insert ( " imm32 " ) ;
: ( before " End Help Texts " )
init_operand_type_help ( ) ;
: ( code )
void init_operand_type_help ( ) {
put ( Help , " mod " ,
" 2-bit operand controlling the _addressing mode_ of many instructions, \n "
" to determine how to compute the _effective address_ to look up memory at \n "
" based on the 'rm32' operand and potentially others. \n "
" \n "
" If mod = 3, just operate on the contents of the register specified by rm32 \n "
" (direct mode). \n "
" If mod = 2, effective address is usually* rm32 + disp32 \n "
" (indirect mode with displacement). \n "
" If mod = 1, effective address is usually* rm32 + disp8 \n "
" (indirect mode with displacement). \n "
" If mod = 0, effective address is usually* rm32 (indirect mode). \n "
" (* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP). \n "
" Using it as an address gets more involved. For more details, \n "
" try reading the help pages for 'base', 'index' and 'scale'.) \n "
" \n "
" For complete details consult the IA-32 software developer's manual, table 2-2, \n "
" \" 32-bit addressing forms with the ModR/M byte \" . \n "
" https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf \n "
) ;
put ( Help , " subop " ,
" Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff. \n "
" Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits. \n "
) ;
put ( Help , " r32 " ,
" 3-bit operand specifying a register operand used directly, without any further addressing modes. \n "
) ;
put ( Help , " rm32 " ,
" 3-bit operand specifying a register operand whose precise interpretation interacts with 'mod'. \n "
" For complete details consult the IA-32 software developer's manual, table 2-2, \n "
" \" 32-bit addressing forms with the ModR/M byte \" . \n "
" https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf \n "
) ;
put ( Help , " base " ,
" Additional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) specifying the register containing an address to look up. \n "
" This address may be further modified by 'index' and 'scale' operands. \n "
" effective address = base + index*scale + displacement (disp8 or disp32) \n "
" For complete details consult the IA-32 software developer's manual, table 2-3, \n "
" \" 32-bit addressing forms with the SIB byte \" . \n "
" https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf \n "
) ;
put ( Help , " index " ,
" Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to the 'base' operand to compute the 'effective address' at which to look up memory. \n "
" effective address = base + index*scale + displacement (disp8 or disp32) \n "
" For complete details consult the IA-32 software developer's manual, table 2-3, \n "
" \" 32-bit addressing forms with the SIB byte \" . \n "
" https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf \n "
) ;
put ( Help , " scale " ,
" Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be multiplied to the 'index' operand before adding the result to the 'base' operand to compute the _effective address_ to operate on. \n "
" effective address = base + index * scale + displacement (disp8 or disp32) \n "
" For complete details consult the IA-32 software developer's manual, table 2-3, \n "
" \" 32-bit addressing forms with the SIB byte \" . \n "
" https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf \n "
) ;
put ( Help , " disp8 " ,
" 8-bit value to be added in many instructions. \n "
) ;
put ( Help , " disp16 " ,
" 16-bit value to be added in many instructions. \n "
) ;
put ( Help , " disp32 " ,
" 32-bit value to be added in many instructions. \n "
) ;
put ( Help , " imm8 " ,
" 8-bit value for many instructions. \n "
) ;
put ( Help , " imm32 " ,
" 32-bit value for many instructions. \n "
) ;
}
//:: transform packing operands into bytes in the right order
: ( before " End Transforms " )
// Begin Level-2 Transforms
Transform . push_back ( pack_operands ) ;
// End Level-2 Transforms
: ( code )
void pack_operands ( program & p ) {
if ( p . segments . empty ( ) ) return ;
segment & code = p . segments . at ( 0 ) ;
// Pack Operands(segment code)
trace ( 99 , " transform " ) < < " -- pack operands " < < end ( ) ;
for ( int i = 0 ; i < SIZE ( code . lines ) ; + + i ) {
line & inst = code . lines . at ( i ) ;
if ( all_hex_bytes ( inst ) ) continue ;
trace ( 99 , " transform " ) < < " packing instruction ' " < < to_string ( /*with metadata*/ inst ) < < " ' " < < end ( ) ;
pack_operands ( inst ) ;
trace ( 99 , " transform " ) < < " instruction after packing: ' " < < to_string ( /*without metadata*/ inst . words ) < < " ' " < < end ( ) ;
}
}
void pack_operands ( line & inst ) {
line new_inst ;
add_opcodes ( inst , new_inst ) ;
add_modrm_byte ( inst , new_inst ) ;
add_sib_byte ( inst , new_inst ) ;
add_disp_bytes ( inst , new_inst ) ;
add_imm_bytes ( inst , new_inst ) ;
inst . words . swap ( new_inst . words ) ;
}
void add_opcodes ( const line & in , line & out ) {
out . words . push_back ( in . words . at ( 0 ) ) ;
2018-08-11 17:22:51 +00:00
if ( in . words . at ( 0 ) . data = = " 0f " | | in . words . at ( 0 ) . data = = " f2 " | | in . words . at ( 0 ) . data = = " f3 " )
2018-08-05 05:59:47 +00:00
out . words . push_back ( in . words . at ( 1 ) ) ;
if ( in . words . at ( 0 ) . data = = " f3 " & & in . words . at ( 1 ) . data = = " 0f " )
out . words . push_back ( in . words . at ( 2 ) ) ;
2018-08-11 17:22:51 +00:00
if ( in . words . at ( 0 ) . data = = " f2 " & & in . words . at ( 1 ) . data = = " 0f " )
out . words . push_back ( in . words . at ( 2 ) ) ;
2018-08-05 05:59:47 +00:00
}
void add_modrm_byte ( const line & in , line & out ) {
uint8_t mod = 0 , reg_subop = 0 , rm32 = 0 ;
bool emit = false ;
for ( int i = 0 ; i < SIZE ( in . words ) ; + + i ) {
const word & curr = in . words . at ( i ) ;
if ( has_metadata ( curr , " mod " ) ) {
mod = hex_byte ( curr . data ) ;
emit = true ;
}
else if ( has_metadata ( curr , " rm32 " ) ) {
rm32 = hex_byte ( curr . data ) ;
emit = true ;
}
else if ( has_metadata ( curr , " r32 " ) ) {
reg_subop = hex_byte ( curr . data ) ;
emit = true ;
}
else if ( has_metadata ( curr , " subop " ) ) {
reg_subop = hex_byte ( curr . data ) ;
emit = true ;
}
}
if ( emit )
out . words . push_back ( hex_byte_text ( ( mod < < 6 ) | ( reg_subop < < 3 ) | rm32 ) ) ;
}
void add_sib_byte ( const line & in , line & out ) {
uint8_t scale = 0 , index = 0 , base = 0 ;
bool emit = false ;
for ( int i = 0 ; i < SIZE ( in . words ) ; + + i ) {
const word & curr = in . words . at ( i ) ;
if ( has_metadata ( curr , " scale " ) ) {
scale = hex_byte ( curr . data ) ;
emit = true ;
}
else if ( has_metadata ( curr , " index " ) ) {
index = hex_byte ( curr . data ) ;
emit = true ;
}
else if ( has_metadata ( curr , " base " ) ) {
base = hex_byte ( curr . data ) ;
emit = true ;
}
}
if ( emit )
out . words . push_back ( hex_byte_text ( ( scale < < 6 ) | ( index < < 3 ) | base ) ) ;
}
void add_disp_bytes ( const line & in , line & out ) {
for ( int i = 0 ; i < SIZE ( in . words ) ; + + i ) {
const word & curr = in . words . at ( i ) ;
if ( has_metadata ( curr , " disp8 " ) )
emit_hex_bytes ( out , curr , 1 ) ;
if ( has_metadata ( curr , " disp16 " ) )
emit_hex_bytes ( out , curr , 2 ) ;
else if ( has_metadata ( curr , " disp32 " ) )
emit_hex_bytes ( out , curr , 4 ) ;
}
}
void add_imm_bytes ( const line & in , line & out ) {
for ( int i = 0 ; i < SIZE ( in . words ) ; + + i ) {
const word & curr = in . words . at ( i ) ;
if ( has_metadata ( curr , " imm8 " ) )
emit_hex_bytes ( out , curr , 1 ) ;
else if ( has_metadata ( curr , " imm32 " ) )
emit_hex_bytes ( out , curr , 4 ) ;
}
}
void emit_hex_bytes ( line & out , const word & w , int num ) {
assert ( num < = 4 ) ;
if ( ! is_hex_int ( w . data ) ) {
out . words . push_back ( w ) ;
return ;
}
emit_hex_bytes ( out , static_cast < uint32_t > ( parse_int ( w . data ) ) , num ) ;
}
void emit_hex_bytes ( line & out , uint32_t val , int num ) {
assert ( num < = 4 ) ;
for ( int i = 0 ; i < num ; + + i ) {
out . words . push_back ( hex_byte_text ( val & 0xff ) ) ;
val = val > > 8 ;
}
}
word hex_byte_text ( uint8_t val ) {
ostringstream out ;
out < < HEXBYTE < < NUM ( val ) ;
word result ;
result . data = out . str ( ) ;
result . original = out . str ( ) + " /auto " ;
return result ;
}
string to_string ( const vector < word > & in ) {
ostringstream out ;
for ( int i = 0 ; i < SIZE ( in ) ; + + i ) {
if ( i > 0 ) out < < ' ' ;
out < < in . at ( i ) . data ;
}
return out . str ( ) ;
}
: ( scenario pack_disp8 )
= = 0x1
74 2 / disp8 # jump 2 bytes away if ZF is set
+ transform : packing instruction ' 74 2 / disp8 '
+ transform : instruction after packing : ' 74 02 '
: ( scenarios transform )
: ( scenario pack_disp8_negative )
= = 0x1
# running this will cause an infinite loop
74 - 1 / disp8 # jump 1 byte before if ZF is set
+ transform : packing instruction ' 74 - 1 / disp8 '
+ transform : instruction after packing : ' 74 ff '
: ( scenarios run )
//: helper for scenario
: ( code )
void transform ( const string & text_bytes ) {
program p ;
istringstream in ( text_bytes ) ;
parse ( in , p ) ;
if ( trace_contains_errors ( ) ) return ;
transform ( p ) ;
}
: ( scenario pack_modrm_imm32 )
= = 0x1
# instruction effective address operand displacement immediate
# op subop mod rm32 base index scale r32
# 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0 / 1 / 2 / 4 bytes 0 / 1 / 2 / 4 bytes
81 0 / add / subop 3 / mod / direct 3 / ebx / rm32 1 / imm32 # add 1 to EBX
+ transform : packing instruction ' 81 0 / add / subop 3 / mod / direct 3 / ebx / rm32 1 / imm32 '
+ transform : instruction after packing : ' 81 c3 01 00 00 00 '
: ( scenario pack_imm32_large )
= = 0x1
b9 0x080490a7 / imm32 # copy to ECX
+ transform : packing instruction ' b9 0x080490a7 / imm32 '
+ transform : instruction after packing : ' b9 a7 90 04 08 '
: ( scenario pack_immediate_constants_hex )
= = 0x1
# instruction effective address operand displacement immediate
# op subop mod rm32 base index scale r32
# 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0 / 1 / 2 / 4 bytes 0 / 1 / 2 / 4 bytes
bb 0x2a / imm32 # copy 42 to EBX
+ transform : packing instruction ' bb 0x2a / imm32 '
+ transform : instruction after packing : ' bb 2 a 00 00 00 '
+ run : copy imm32 0x0000002a to EBX
: ( scenarios transform )
: ( scenario pack_silently_ignores_non_hex )
= = 0x1
# instruction effective address operand displacement immediate
# op subop mod rm32 base index scale r32
# 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0 / 1 / 2 / 4 bytes 0 / 1 / 2 / 4 bytes
bb foo / imm32 # copy foo to EBX
+ transform : packing instruction ' bb foo / imm32 '
# no change (we're just not printing metadata to the trace)
+ transform : instruction after packing : ' bb foo '
$ error : 0
: ( scenarios run )
//:: helpers
: ( code )
bool all_hex_bytes ( const line & inst ) {
for ( int i = 0 ; i < SIZE ( inst . words ) ; + + i )
if ( ! is_hex_byte ( inst . words . at ( i ) ) )
return false ;
return true ;
}
bool is_hex_byte ( const word & curr ) {
if ( contains_any_operand_metadata ( curr ) )
return false ;
if ( SIZE ( curr . data ) ! = 2 )
return false ;
if ( curr . data . find_first_not_of ( " 0123456789abcdefABCDEF " ) ! = string : : npos )
return false ;
return true ;
}
bool contains_any_operand_metadata ( const word & word ) {
for ( int i = 0 ; i < SIZE ( word . metadata ) ; + + i )
if ( Instruction_operands . find ( word . metadata . at ( i ) ) ! = Instruction_operands . end ( ) )
return true ;
return false ;
}
bool has_metadata ( const line & inst , const string & m ) {
bool result = false ;
for ( int i = 0 ; i < SIZE ( inst . words ) ; + + i ) {
if ( ! has_metadata ( inst . words . at ( i ) , m ) ) continue ;
if ( result ) {
raise < < " ' " < < to_string ( inst ) < < " ' has conflicting " < < m < < " operands \n " < < end ( ) ;
return false ;
}
result = true ;
}
return result ;
}
bool has_metadata ( const word & w , const string & m ) {
bool result = false ;
bool metadata_found = false ;
for ( int i = 0 ; i < SIZE ( w . metadata ) ; + + i ) {
const string & curr = w . metadata . at ( i ) ;
if ( ! contains_key ( Instruction_operands , curr ) ) continue ; // ignore unrecognized metadata
if ( metadata_found ) {
raise < < " ' " < < w . original < < " ' has conflicting operand types; it should have only one \n " < < end ( ) ;
return false ;
}
metadata_found = true ;
result = ( curr = = m ) ;
}
return result ;
}
word metadata ( const line & inst , const string & m ) {
for ( int i = 0 ; i < SIZE ( inst . words ) ; + + i )
if ( has_metadata ( inst . words . at ( i ) , m ) )
return inst . words . at ( i ) ;
assert ( false ) ;
}
bool is_hex_int ( const string & s ) {
if ( s . empty ( ) ) return false ;
size_t pos = 0 ;
if ( s . at ( 0 ) = = ' - ' | | s . at ( 0 ) = = ' + ' ) pos + + ;
if ( s . substr ( pos , pos + 2 ) = = " 0x " ) pos + = 2 ;
return s . find_first_not_of ( " 0123456789abcdefABCDEF " , pos ) = = string : : npos ;
}
int32_t parse_int ( const string & s ) {
istringstream in ( s ) ;
int32_t result = 0 ;
in > > std : : hex > > result ;
if ( ! in | | ! in . eof ( ) ) {
raise < < " not a number: " < < s < < ' \n ' < < end ( ) ;
return 0 ;
}
return result ;
}
string to_string ( const line & inst ) {
ostringstream out ;
for ( int i = 0 ; i < SIZE ( inst . words ) ; + + i ) {
if ( i > 0 ) out < < ' ' ;
out < < inst . words . at ( i ) . original ;
}
return out . str ( ) ;
}