2018-07-24 23:06:43 +00:00
//: The bedrock level 1 of abstraction is now done, and we're going to start
//: building levels above it that make programming in x86 machine code a
//: little more ergonomic.
2018-07-03 17:06:07 +00:00
//:
2018-07-24 23:06:43 +00:00
//: All levels will be "pass through by default". Whatever they don't
//: understand they will silently pass through to lower levels.
//:
//: Since raw hex bytes of machine code are always possible to inject, SubX is
//: not a language, and we aren't building a compiler. This is something
//: deliberately leakier. Levels are more for improving auditing, checks and
//: error messages rather than for hiding low-level details.
//: Translator workflow: read 'source' file. Run a series of transforms on it,
//: each passing through what it doesn't understand. The final program should
//: be just machine code, suitable to write to an ELF binary.
//:
//: Higher levels usually transform code on the basis of metadata.
2018-07-01 05:46:32 +00:00
2018-08-04 22:54:51 +00:00
: ( before " End Main " )
if ( is_equal ( argv [ 1 ] , " translate " ) ) {
START_TRACING_UNTIL_END_OF_SCOPE ;
2018-09-20 20:51:57 +00:00
reset ( ) ;
2018-10-11 02:51:20 +00:00
// Begin subx translate
2018-08-04 22:54:51 +00:00
program p ;
2018-10-01 05:49:24 +00:00
string output_filename ;
for ( int i = /*skip 'subx translate'*/ 2 ; i < argc ; + + i ) {
if ( is_equal ( argv [ i ] , " -o " ) ) {
+ + i ;
if ( i > = argc ) {
print_translate_usage ( ) ;
cerr < < " '-o' must be followed by a filename to write results to \n " ;
exit ( 1 ) ;
}
output_filename = argv [ i ] ;
}
else {
2019-02-25 08:17:46 +00:00
trace ( 2 , " parse " ) < < argv [ i ] < < end ( ) ;
2018-10-01 05:49:24 +00:00
ifstream fin ( argv [ i ] ) ;
if ( ! fin ) {
cerr < < " could not open " < < argv [ i ] < < ' \n ' ;
return 1 ;
}
parse ( fin , p ) ;
if ( trace_contains_errors ( ) ) return 1 ;
}
}
if ( p . segments . empty ( ) ) {
print_translate_usage ( ) ;
cerr < < " nothing to do; must provide at least one file to read \n " ;
exit ( 1 ) ;
}
if ( output_filename . empty ( ) ) {
print_translate_usage ( ) ;
cerr < < " must provide a filename to write to using '-o' \n " ;
exit ( 1 ) ;
2018-08-04 22:54:51 +00:00
}
2019-02-25 08:17:46 +00:00
trace ( 2 , " transform " ) < < " begin " < < end ( ) ;
2018-08-04 22:54:51 +00:00
transform ( p ) ;
if ( trace_contains_errors ( ) ) return 1 ;
2019-02-25 08:17:46 +00:00
trace ( 2 , " translate " ) < < " begin " < < end ( ) ;
2018-10-01 05:49:24 +00:00
save_elf ( p , output_filename ) ;
2018-09-30 16:42:32 +00:00
if ( trace_contains_errors ( ) ) {
2018-10-01 05:49:24 +00:00
unlink ( output_filename . c_str ( ) ) ;
2018-09-30 16:42:32 +00:00
return 1 ;
}
2018-10-11 02:51:20 +00:00
// End subx translate
2018-08-04 22:54:51 +00:00
return 0 ;
}
2018-06-30 16:41:22 +00:00
: ( code )
2018-10-01 05:49:24 +00:00
void print_translate_usage ( ) {
2018-10-01 19:16:05 +00:00
cerr < < " Usage: subx translate file1 file2 ... -o output \n " ;
2018-10-01 05:49:24 +00:00
}
2018-07-16 05:59:02 +00:00
// write out a program to a bare-bones ELF file
2018-10-01 05:49:24 +00:00
void save_elf ( const program & p , const string & filename ) {
ofstream out ( filename . c_str ( ) , ios : : binary ) ;
2019-07-25 17:40:27 +00:00
save_elf ( p , out ) ;
out . close ( ) ;
}
void save_elf ( const program & p , ostream & out ) {
2019-07-26 00:15:20 +00:00
// validation: stay consistent with the self-hosted translator
2019-07-26 00:13:01 +00:00
if ( p . entry = = 0 ) {
raise < < " no 'Entry' label found \n " < < end ( ) ;
return ;
}
2019-07-26 00:15:20 +00:00
if ( find ( p , " data " ) = = NULL ) {
raise < < " must include a 'data' segment \n " < < end ( ) ;
return ;
}
// processing
2018-07-26 16:30:00 +00:00
write_elf_header ( out , p ) ;
2018-07-16 05:59:02 +00:00
for ( size_t i = 0 ; i < p . segments . size ( ) ; + + i )
2018-07-26 16:30:00 +00:00
write_segment ( p . segments . at ( i ) , out ) ;
2018-06-30 16:41:22 +00:00
}
2018-07-26 16:30:00 +00:00
void write_elf_header ( ostream & out , const program & p ) {
2018-06-30 16:41:22 +00:00
char c = ' \0 ' ;
# define O(X) c = (X); out.write(&c, sizeof(c))
// host is required to be little-endian
# define emit(X) out.write(reinterpret_cast<const char*>(&X), sizeof(X))
//// ehdr
// e_ident
O ( 0x7f ) ; O ( /*E*/ 0x45 ) ; O ( /*L*/ 0x4c ) ; O ( /*F*/ 0x46 ) ;
O ( 0x1 ) ; // 32-bit format
O ( 0x1 ) ; // little-endian
O ( 0x1 ) ; O ( 0x0 ) ;
for ( size_t i = 0 ; i < 8 ; + + i ) { O ( 0x0 ) ; }
// e_type
O ( 0x02 ) ; O ( 0x00 ) ;
// e_machine
O ( 0x03 ) ; O ( 0x00 ) ;
// e_version
O ( 0x01 ) ; O ( 0x00 ) ; O ( 0x00 ) ; O ( 0x00 ) ;
// e_entry
2019-07-25 17:40:27 +00:00
uint32_t e_entry = p . entry ;
2019-02-19 05:48:19 +00:00
// Override e_entry
2018-06-30 16:41:22 +00:00
emit ( e_entry ) ;
// e_phoff -- immediately after ELF header
2019-02-19 05:48:19 +00:00
uint32_t e_phoff = 0x34 ;
2018-06-30 16:41:22 +00:00
emit ( e_phoff ) ;
// e_shoff; unused
2019-02-19 05:48:19 +00:00
uint32_t dummy32 = 0 ;
2018-06-30 16:41:22 +00:00
emit ( dummy32 ) ;
// e_flags; unused
emit ( dummy32 ) ;
// e_ehsize
2018-07-16 05:59:02 +00:00
uint16_t e_ehsize = 0x34 ;
2018-06-30 16:41:22 +00:00
emit ( e_ehsize ) ;
// e_phentsize
uint16_t e_phentsize = 0x20 ;
emit ( e_phentsize ) ;
// e_phnum
2018-07-16 05:59:02 +00:00
uint16_t e_phnum = SIZE ( p . segments ) ;
2018-06-30 16:41:22 +00:00
emit ( e_phnum ) ;
// e_shentsize
uint16_t dummy16 = 0x0 ;
emit ( dummy16 ) ;
// e_shnum
emit ( dummy16 ) ;
// e_shstrndx
emit ( dummy16 ) ;
2018-09-02 03:37:54 +00:00
uint32_t p_offset = /*size of ehdr*/ 0x34 + SIZE ( p . segments ) * 0x20 /*size of each phdr*/ ;
2018-07-16 05:59:02 +00:00
for ( int i = 0 ; i < SIZE ( p . segments ) ; + + i ) {
2019-05-18 07:00:18 +00:00
const segment & curr = p . segments . at ( i ) ;
2018-07-16 05:59:02 +00:00
//// phdr
// p_type
uint32_t p_type = 0x1 ;
emit ( p_type ) ;
// p_offset
emit ( p_offset ) ;
// p_vaddr
2019-05-18 07:00:18 +00:00
uint32_t p_start = curr . start ;
2018-09-01 22:58:53 +00:00
emit ( p_start ) ;
2018-07-16 05:59:02 +00:00
// p_paddr
2018-09-01 22:58:53 +00:00
emit ( p_start ) ;
2018-07-16 05:59:02 +00:00
// p_filesz
2019-05-18 07:00:18 +00:00
uint32_t size = num_words ( curr ) ;
2018-11-23 08:21:41 +00:00
assert ( p_offset + size < SEGMENT_ALIGNMENT ) ;
2018-07-16 05:59:02 +00:00
emit ( size ) ;
// p_memsz
emit ( size ) ;
// p_flags
2019-05-18 07:00:18 +00:00
uint32_t p_flags = ( curr . name = = " code " ) ? /*r-x*/ 0x5 : /*rw-*/ 0x6 ;
2018-07-16 05:59:02 +00:00
emit ( p_flags ) ;
2018-07-16 23:21:18 +00:00
2018-07-16 05:59:02 +00:00
// p_align
2018-07-16 23:21:18 +00:00
// "As the system creates or augments a process image, it logically copies
// a file's segment to a virtual memory segment. When—and if— the system
// physically reads the file depends on the program's execution behavior,
// system load, and so on. A process does not require a physical page
// unless it references the logical page during execution, and processes
// commonly leave many pages unreferenced. Therefore delaying physical
// reads frequently obviates them, improving system performance. To obtain
// this efficiency in practice, executable and shared object files must
// have segment images whose file offsets and virtual addresses are
// congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95)
2018-07-17 02:53:56 +00:00
uint32_t p_align = 0x1000 ; // default page size on linux
2018-07-16 05:59:02 +00:00
emit ( p_align ) ;
2018-09-01 22:58:53 +00:00
if ( p_offset % p_align ! = p_start % p_align ) {
raise < < " segment starting at 0x " < < HEXWORD < < p_start < < " is improperly aligned; alignment for p_offset " < < p_offset < < " should be " < < ( p_offset % p_align ) < < " but is " < < ( p_start % p_align ) < < ' \n ' < < end ( ) ;
2018-07-16 23:16:06 +00:00
return ;
}
2018-07-16 05:59:02 +00:00
// prepare for next segment
p_offset + = size ;
}
2018-06-30 16:41:22 +00:00
# undef O
2018-07-11 05:39:46 +00:00
# undef emit
2018-06-30 16:41:22 +00:00
}
2018-07-26 16:30:00 +00:00
void write_segment ( const segment & s , ostream & out ) {
2018-07-16 05:59:02 +00:00
for ( int i = 0 ; i < SIZE ( s . lines ) ; + + i ) {
const vector < word > & w = s . lines . at ( i ) . words ;
for ( int j = 0 ; j < SIZE ( w ) ; + + j ) {
uint8_t x = hex_byte ( w . at ( j ) . data ) ; // we're done with metadata by this point
out . write ( reinterpret_cast < const char * > ( & x ) , /*sizeof(byte)*/ 1 ) ;
}
2018-07-01 05:46:32 +00:00
}
}
2018-09-01 22:58:53 +00:00
uint32_t num_words ( const segment & s ) {
2018-07-16 05:59:02 +00:00
uint32_t sum = 0 ;
for ( int i = 0 ; i < SIZE ( s . lines ) ; + + i )
sum + = SIZE ( s . lines . at ( i ) . words ) ;
return sum ;
}
2018-07-01 06:05:40 +00:00
2018-06-30 16:41:22 +00:00
: ( before " End Includes " )
using std : : ios ;