2018-12-17 04:52:41 +00:00
//: Loading SubX programs from ELF binaries.
2018-09-21 17:06:17 +00:00
//: This will allow us to run them natively on a Linux kernel.
//: Based on https://github.com/kragen/stoneknifeforth/blob/702d2ebe1b/386.c
2017-12-31 06:53:08 +00:00
: ( before " End Main " )
assert ( argc > 1 ) ;
2018-06-28 04:52:09 +00:00
if ( is_equal ( argv [ 1 ] , " run " ) ) {
2018-07-17 05:18:18 +00:00
START_TRACING_UNTIL_END_OF_SCOPE ;
2019-02-25 08:17:46 +00:00
trace ( 2 , " run " ) < < " === Starting to run " < < end ( ) ;
2018-06-28 04:52:09 +00:00
assert ( argc > 2 ) ;
reset ( ) ;
2018-07-03 22:37:45 +00:00
cerr < < std : : hex ;
2018-08-30 08:15:45 +00:00
load_elf ( argv [ 2 ] , argc , argv ) ;
2018-06-28 22:54:52 +00:00
while ( EIP < End_of_program ) // weak final-gasp termination check
2018-06-28 04:52:09 +00:00
run_one_instruction ( ) ;
2018-10-25 05:52:48 +00:00
raise < < " executed past end of the world: " < < EIP < < " vs " < < End_of_program < < ' \n ' < < end ( ) ;
return 1 ;
2018-06-28 04:52:09 +00:00
}
2017-12-31 06:53:08 +00:00
: ( code )
2018-08-30 08:15:45 +00:00
void load_elf ( const string & filename , int argc , char * argv [ ] ) {
2017-12-31 06:53:08 +00:00
int fd = open ( filename . c_str ( ) , O_RDONLY ) ;
2018-06-28 23:34:47 +00:00
if ( fd < 0 ) raise < < filename . c_str ( ) < < " : open " < < perr ( ) < < ' \n ' < < die ( ) ;
2017-12-31 06:53:08 +00:00
off_t size = lseek ( fd , 0 , SEEK_END ) ;
lseek ( fd , 0 , SEEK_SET ) ;
uint8_t * elf_contents = static_cast < uint8_t * > ( malloc ( size ) ) ;
2018-06-28 23:34:47 +00:00
if ( elf_contents = = NULL ) raise < < " malloc( " < < size < < ' ) ' < < perr ( ) < < ' \n ' < < die ( ) ;
2017-12-31 06:53:08 +00:00
ssize_t read_size = read ( fd , elf_contents , size ) ;
2018-06-28 23:34:47 +00:00
if ( size ! = read_size ) raise < < " read → " < < size < < " (!= " < < read_size < < ' ) ' < < perr ( ) < < ' \n ' < < die ( ) ;
2018-08-30 08:15:45 +00:00
load_elf_contents ( elf_contents , size , argc , argv ) ;
2017-12-31 06:53:08 +00:00
free ( elf_contents ) ;
}
2018-08-30 08:15:45 +00:00
void load_elf_contents ( uint8_t * elf_contents , size_t size , int argc , char * argv [ ] ) {
2017-12-31 06:53:08 +00:00
uint8_t magic [ 5 ] = { 0 } ;
memcpy ( magic , elf_contents , 4 ) ;
2018-06-28 22:22:13 +00:00
if ( memcmp ( magic , " \177 ELF " , 4 ) ! = 0 )
2018-06-28 23:34:47 +00:00
raise < < " Invalid ELF file; starts with \" " < < magic < < ' " ' < < die ( ) ;
2018-06-28 22:22:13 +00:00
if ( elf_contents [ 4 ] ! = 1 )
2018-06-28 23:34:47 +00:00
raise < < " Only 32-bit ELF files (4-byte words; virtual addresses up to 4GB) supported. \n " < < die ( ) ;
2018-06-28 22:22:13 +00:00
if ( elf_contents [ 5 ] ! = 1 )
2018-06-28 23:34:47 +00:00
raise < < " Only little-endian ELF files supported. \n " < < die ( ) ;
2018-06-28 22:22:13 +00:00
// unused: remaining 10 bytes of e_ident
uint32_t e_machine_type = u32_in ( & elf_contents [ 16 ] ) ;
if ( e_machine_type ! = 0x00030002 )
2018-06-28 23:34:47 +00:00
raise < < " ELF type/machine 0x " < < HEXWORD < < e_machine_type < < " isn't i386 executable \n " < < die ( ) ;
2018-06-28 22:22:13 +00:00
// unused: e_version. We only support version 1, and later versions will be backwards compatible.
2017-12-31 06:53:08 +00:00
uint32_t e_entry = u32_in ( & elf_contents [ 24 ] ) ;
uint32_t e_phoff = u32_in ( & elf_contents [ 28 ] ) ;
2018-06-28 22:22:13 +00:00
// unused: e_shoff
// unused: e_flags
uint32_t e_ehsize = u16_in ( & elf_contents [ 40 ] ) ;
2018-06-28 23:34:47 +00:00
if ( e_ehsize < 52 ) raise < < " Invalid binary; ELF header too small \n " < < die ( ) ;
2018-06-28 22:22:13 +00:00
uint32_t e_phentsize = u16_in ( & elf_contents [ 42 ] ) ;
uint32_t e_phnum = u16_in ( & elf_contents [ 44 ] ) ;
2018-08-14 03:43:38 +00:00
trace ( 90 , " load " ) < < e_phnum < < " entries in the program header, each " < < e_phentsize < < " bytes long " < < end ( ) ;
2018-06-28 22:22:13 +00:00
// unused: e_shentsize
// unused: e_shnum
// unused: e_shstrndx
2017-12-31 06:53:08 +00:00
2018-10-01 17:05:33 +00:00
set < uint32_t > overlap ; // to detect overlapping segments
2018-06-28 22:22:13 +00:00
for ( size_t i = 0 ; i < e_phnum ; + + i )
2018-10-01 17:05:33 +00:00
load_segment_from_program_header ( elf_contents , i , size , e_phoff + i * e_phentsize , e_ehsize , overlap ) ;
2017-12-31 06:53:08 +00:00
2018-07-17 05:18:18 +00:00
// initialize code and stack
2018-10-01 17:05:33 +00:00
assert ( overlap . find ( STACK_SEGMENT ) = = overlap . end ( ) ) ;
2018-09-29 06:08:27 +00:00
Mem . push_back ( vma ( STACK_SEGMENT ) ) ;
2018-10-01 17:05:33 +00:00
assert ( overlap . find ( AFTER_STACK ) = = overlap . end ( ) ) ;
2019-05-10 17:24:24 +00:00
// The stack grows downward.
2018-07-17 05:18:18 +00:00
Reg [ ESP ] . u = AFTER_STACK ;
Reg [ EBP ] . u = 0 ;
2017-12-31 06:53:08 +00:00
EIP = e_entry ;
2018-08-30 08:15:45 +00:00
// initialize args on stack
// no envp for now
2018-09-01 06:46:58 +00:00
// we wastefully use a separate page of memory for argv
2018-09-29 06:08:27 +00:00
Mem . push_back ( vma ( ARGV_DATA_SEGMENT ) ) ;
2018-08-30 08:15:45 +00:00
uint32_t argv_data = ARGV_DATA_SEGMENT ;
2018-09-01 06:46:58 +00:00
for ( int i = argc - 1 ; i > = /*skip 'subx_bin' and 'run'*/ 2 ; - - i ) {
push ( argv_data ) ;
2018-08-30 08:15:45 +00:00
for ( size_t j = 0 ; j < = strlen ( argv [ i ] ) ; + + j ) {
2018-10-01 17:05:33 +00:00
assert ( overlap . find ( argv_data ) = = overlap . end ( ) ) ; // don't bother comparing ARGV and STACK
2018-08-30 08:15:45 +00:00
write_mem_u8 ( argv_data , argv [ i ] [ j ] ) ;
argv_data + = sizeof ( char ) ;
2018-11-23 08:21:41 +00:00
assert ( argv_data < ARGV_DATA_SEGMENT + SEGMENT_ALIGNMENT ) ;
2018-08-30 08:15:45 +00:00
}
}
2018-09-01 06:46:58 +00:00
push ( argc - /*skip 'subx_bin' and 'run'*/ 2 ) ;
2018-08-30 08:15:45 +00:00
}
void push ( uint32_t val ) {
Reg [ ESP ] . u - = 4 ;
2019-05-12 02:11:13 +00:00
if ( Reg [ ESP ] . u < STACK_SEGMENT ) {
raise < < " The stack overflowed its segment. "
< < " Maybe SPACE_FOR_SEGMENT should be larger? "
< < " Or you need to carve out an exception for the stack segment "
2019-07-15 22:47:35 +00:00
< < " to be larger. \n " < < die ( ) ;
2019-05-12 02:11:13 +00:00
}
2019-02-25 08:17:46 +00:00
trace ( Callstack_depth + 1 , " run " ) < < " decrementing ESP to 0x " < < HEXWORD < < Reg [ ESP ] . u < < end ( ) ;
trace ( Callstack_depth + 1 , " run " ) < < " pushing value 0x " < < HEXWORD < < val < < end ( ) ;
2018-08-30 08:15:45 +00:00
write_mem_u32 ( Reg [ ESP ] . u , val ) ;
2017-12-31 06:53:08 +00:00
}
2018-10-01 17:05:33 +00:00
void load_segment_from_program_header ( uint8_t * elf_contents , int segment_index , size_t size , uint32_t offset , uint32_t e_ehsize , set < uint32_t > & overlap ) {
2018-06-28 22:22:13 +00:00
uint32_t p_type = u32_in ( & elf_contents [ offset ] ) ;
2018-08-14 03:43:38 +00:00
trace ( 90 , " load " ) < < " program header at offset " < < offset < < " : type " < < p_type < < end ( ) ;
2018-06-28 22:22:13 +00:00
if ( p_type ! = 1 ) {
2018-08-14 03:43:38 +00:00
trace ( 90 , " load " ) < < " ignoring segment at offset " < < offset < < " of non PT_LOAD type " < < p_type < < " (see http://refspecs.linuxbase.org/elf/elf.pdf) " < < end ( ) ;
2018-06-28 22:22:13 +00:00
return ;
}
uint32_t p_offset = u32_in ( & elf_contents [ offset + 4 ] ) ;
uint32_t p_vaddr = u32_in ( & elf_contents [ offset + 8 ] ) ;
2018-06-28 23:34:47 +00:00
if ( e_ehsize > p_vaddr ) raise < < " Invalid binary; program header overlaps ELF header \n " < < die ( ) ;
2018-06-28 22:22:13 +00:00
// unused: p_paddr
uint32_t p_filesz = u32_in ( & elf_contents [ offset + 16 ] ) ;
uint32_t p_memsz = u32_in ( & elf_contents [ offset + 20 ] ) ;
if ( p_filesz ! = p_memsz )
2018-09-29 06:08:27 +00:00
raise < < " Can't yet handle segments where p_filesz != p_memsz (see http://refspecs.linuxbase.org/elf/elf.pdf) \n " < < die ( ) ;
2018-06-28 22:22:13 +00:00
if ( p_offset + p_filesz > size )
2018-06-28 23:34:47 +00:00
raise < < " Invalid binary; segment at offset " < < offset < < " is too large: wants to end at " < < p_offset + p_filesz < < " but the file ends at " < < size < < ' \n ' < < die ( ) ;
2018-11-23 08:21:41 +00:00
if ( p_memsz > = SEGMENT_ALIGNMENT ) {
raise < < " Code segment too small for SubX; for now please manually increase SEGMENT_ALIGNMENT. \n " < < end ( ) ;
2018-09-29 06:08:27 +00:00
return ;
}
2018-08-14 03:43:38 +00:00
trace ( 90 , " load " ) < < " blitting file offsets ( " < < p_offset < < " , " < < ( p_offset + p_filesz ) < < " ) to addresses ( " < < p_vaddr < < " , " < < ( p_vaddr + p_memsz ) < < ' ) ' < < end ( ) ;
2018-09-29 06:08:27 +00:00
if ( size > p_memsz ) size = p_memsz ;
Mem . push_back ( vma ( p_vaddr ) ) ;
2018-10-01 17:05:33 +00:00
for ( size_t i = 0 ; i < p_filesz ; + + i ) {
assert ( overlap . find ( p_vaddr + i ) = = overlap . end ( ) ) ;
2018-07-09 05:33:15 +00:00
write_mem_u8 ( p_vaddr + i , elf_contents [ p_offset + i ] ) ;
2018-10-01 17:05:33 +00:00
overlap . insert ( p_vaddr + i ) ;
}
2018-09-29 06:08:27 +00:00
if ( segment_index = = 0 & & End_of_program < p_vaddr + p_memsz )
2018-06-28 22:22:13 +00:00
End_of_program = p_vaddr + p_memsz ;
}
2018-07-10 14:18:36 +00:00
: ( before " End Includes " )
2019-05-09 16:25:01 +00:00
// Very primitive/fixed/insecure ELF segments for now.
2019-05-18 08:02:22 +00:00
// --- inaccessible: 0x00000000 -> 0x08047fff
// code: 0x09000000 -> 0x09ffffff (specified in ELF binary)
// data: 0x0a000000 -> 0x0affffff (specified in ELF binary)
// --- heap gets mmap'd somewhere here ---
// stack: 0xbdffffff -> 0xbd000000 (downward; not in ELF binary)
// argv hack: 0xbf000000 -> 0xbfffffff (not in ELF binary)
// --- reserved for kernel: 0xc0000000 -> ...
2019-05-14 00:27:45 +00:00
const uint32_t START_HEAP = 0x0b000000 ;
const uint32_t END_HEAP = 0xbd000000 ;
const uint32_t STACK_SEGMENT = 0xbd000000 ;
const uint32_t AFTER_STACK = 0xbe000000 ;
const uint32_t ARGV_DATA_SEGMENT = 0xbf000000 ;
2019-05-10 23:45:22 +00:00
// When updating the above memory map, don't forget to update `mmap`'s
// implementation in the 'syscalls' layer.
2019-02-23 21:35:19 +00:00
: ( before " End Dump Info for Instruction " )
//? dump_stack(); // slow
2018-07-10 14:18:36 +00:00
: ( code )
2018-10-25 06:17:43 +00:00
void dump_stack ( ) {
2019-02-23 21:35:19 +00:00
ostringstream out ;
2019-02-25 08:17:46 +00:00
trace ( Callstack_depth + 1 , " run " ) < < " stack: " < < end ( ) ;
2018-10-25 06:17:43 +00:00
for ( uint32_t a = AFTER_STACK - 4 ; a > Reg [ ESP ] . u ; a - = 4 )
2019-03-09 01:15:19 +00:00
trace ( Callstack_depth + 2 , " run " ) < < " 0x " < < HEXWORD < < a < < " => 0x " < < HEXWORD < < read_mem_u32 ( a ) < < end ( ) ;
trace ( Callstack_depth + 2 , " run " ) < < " 0x " < < HEXWORD < < Reg [ ESP ] . u < < " => 0x " < < HEXWORD < < read_mem_u32 ( Reg [ ESP ] . u ) < < " <=== ESP " < < end ( ) ;
2018-10-25 06:17:43 +00:00
for ( uint32_t a = Reg [ ESP ] . u - 4 ; a > Reg [ ESP ] . u - 40 ; a - = 4 )
2019-03-09 01:15:19 +00:00
trace ( Callstack_depth + 2 , " run " ) < < " 0x " < < HEXWORD < < a < < " => 0x " < < HEXWORD < < read_mem_u32 ( a ) < < end ( ) ;
2018-10-25 06:17:43 +00:00
}
2017-12-31 06:53:08 +00:00
inline uint32_t u32_in ( uint8_t * p ) {
return p [ 0 ] | p [ 1 ] < < 8 | p [ 2 ] < < 16 | p [ 3 ] < < 24 ;
}
2018-06-28 22:22:13 +00:00
inline uint16_t u16_in ( uint8_t * p ) {
return p [ 0 ] | p [ 1 ] < < 8 ;
}
2018-06-28 23:34:47 +00:00
: ( before " End Types " )
struct perr { } ;
: ( code )
2018-07-26 03:47:41 +00:00
ostream & operator < < ( ostream & os , perr /*unused*/ ) {
2017-12-31 06:53:08 +00:00
if ( errno )
2018-06-29 00:25:41 +00:00
os < < " : " < < strerror ( errno ) ;
2018-06-28 23:34:47 +00:00
return os ;
}
2018-06-28 23:34:04 +00:00
: ( before " End Includes " )
2017-12-31 06:53:08 +00:00
# include <sys/types.h>
# include <sys/stat.h>
# include <fcntl.h>
# include <stdarg.h>
2017-12-31 18:23:08 +00:00
# include <errno.h>
2018-08-04 06:42:20 +00:00
# include <unistd.h>