From 924ed08aca2fe78cc4d1dd1a0538434f0846e717 Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Sat, 1 Feb 2020 12:14:12 -0800 Subject: [PATCH] 5968 --- README.md | 18 +++-- mu_instructions | 48 ++++++------ mu_summary | 192 ++++++++++++++++++++++++++++++++++++++++++++++++ subx_opcodes | 2 +- 4 files changed, 226 insertions(+), 34 deletions(-) create mode 100644 mu_summary diff --git a/README.md b/README.md index d0bb5992..490ab46e 100644 --- a/README.md +++ b/README.md @@ -66,8 +66,8 @@ statements in Mu translate to a single machine code instruction. Variables reside in memory by default. Programs must specify registers when they want to use them. Functions must return results in registers. Execution begins at the function `main`, which always returns its result in register `ebx`. [This post](http://akkartik.name/post/mu-2019-2) -has more details. You can see a complete list of supported instructions and -their translations in [this summary](mu_instructions). +has more details, and there's a [summary](mu_summary) of all supported +instructions. ## SubX @@ -715,19 +715,21 @@ If you're still reading, here are some more things to check out: a) Try running the tests: `./test_apps` -b) Check out the online help. Starting point: `./bootstrap` +b) There's a handy [summary](mu_instructions) of how the Mu compiler translates +instructions to SubX. -c) Familiarize yourself with `./bootstrap help opcodes`. If you program in Mu -you'll spend a lot of time with it. (It's also [in this repo](https://github.com/akkartik/mu/blob/master/subx_opcodes).) +c) Check out the online help on SubX. Starting point: `./bootstrap` + +d) Familiarize yourself with the list of opcodes supported in SubX: `./bootstrap +help opcodes`. (It's also [in this repo](https://github.com/akkartik/mu/blob/master/subx_opcodes).) [Here](https://lobste.rs/s/qglfdp/subx_minimalist_assembly_language_for#c_o9ddqk) are some tips on my setup for quickly finding the right opcode for any situation from within Vim. -d) Try working on [the starter exercises](https://github.com/akkartik/mu/pulls) +e) Try working on [some starter SubX exercises](https://github.com/akkartik/mu/pulls) (labelled `hello`). -e) SubX comes with some useful [syntax sugar](http://akkartik.name/post/mu-2019-1). -Check it out. +f) SubX comes with some useful [syntax sugar](http://akkartik.name/post/mu-2019-1). ## Credits diff --git a/mu_instructions b/mu_instructions index 3fd3a64e..398a21e6 100644 --- a/mu_instructions +++ b/mu_instructions @@ -127,26 +127,14 @@ compare var, n {.name="compare", .inouts=[var, n], var/reg <- multiply var2 {.name="multiply", .inouts=[var2], .outputs=[reg], .subx-name="0f af/multiply", .rm32="*(ebp+" inouts[0].stack-offset ")", .r32=outputs[0]} Jumps have a slightly simpler format. Most of the time they take no inouts or -outputs. Occasionally you give them a label for a block to jump to the start -or end of. +outputs. Occasionally you give them a label for a containing block to jump to +the start or end of. break-if-= {.name="break-if-=", .subx-name="0f 84/jump-if-= break/disp32"} break-if-= label {.name="break-if-=", .inouts=[label], .subx-name="0f 84/jump-if-=", .disp32=inouts[0] ":break"} break-if-!= {.name="break-if-!=", .subx-name="0f 85/jump-if-!= break/disp32"} break-if-!= label {.name="break-if-!=", .inouts=[label], .subx-name="0f 85/jump-if-!=", .disp32=inouts[0] ":break"} -Inequalities are similar, but have unsigned and signed variants. We assume -unsigned variants are only ever used to compare addresses. - -break-if-addr< {.name="break-if-addr<", .subx-name="0f 82/jump-if-addr< break/disp32"} -break-if-addr< label {.name="break-if-addr<", .inouts=[label], .subx-name="0f 82/jump-if-addr<", .disp32=inouts[0] ":break"} -break-if-addr> {.name="break-if-addr>", .subx-name="0f 87/jump-if-addr> break/disp32"} -break-if-addr> label {.name="break-if-addr>", .inouts=[label], .subx-name="0f 87/jump-if-addr>", .disp32=inouts[0] ":break"} -break-if-addr<= {.name="break-if-addr<=", .subx-name="0f 86/jump-if-addr<= break/disp32"} -break-if-addr<= label {.name="break-if-addr<=", .inouts=[label], .subx-name="0f 86/jump-if-addr<=", .disp32=inouts[0] ":break"} -break-if-addr>= {.name="break-if-addr>=", .subx-name="0f 83/jump-if-addr>= break/disp32"} -break-if-addr>= label {.name="break-if-addr>=", .inouts=[label], .subx-name="0f 83/jump-if-addr>=", .disp32=inouts[0] ":break"} - break-if-< {.name="break-if-<", .subx-name="0f 8c/jump-if-< break/disp32"} break-if-< label {.name="break-if-<", .inouts=[label], .subx-name="0f 8c/jump-if-<", .disp32=inouts[0] ":break"} break-if-> {.name="break-if->", .subx-name="0f 8f/jump-if-> break/disp32"} @@ -156,6 +144,15 @@ break-if-<= label {.name="break-if-<=", .inouts=[label], break-if->= {.name="break-if->=", .subx-name="0f 8d/jump-if->= break/disp32"} break-if->= label {.name="break-if->=", .inouts=[label], .subx-name="0f 8d/jump-if->=", .disp32=inouts[0] ":break"} +break-if-addr< {.name="break-if-addr<", .subx-name="0f 82/jump-if-addr< break/disp32"} +break-if-addr< label {.name="break-if-addr<", .inouts=[label], .subx-name="0f 82/jump-if-addr<", .disp32=inouts[0] ":break"} +break-if-addr> {.name="break-if-addr>", .subx-name="0f 87/jump-if-addr> break/disp32"} +break-if-addr> label {.name="break-if-addr>", .inouts=[label], .subx-name="0f 87/jump-if-addr>", .disp32=inouts[0] ":break"} +break-if-addr<= {.name="break-if-addr<=", .subx-name="0f 86/jump-if-addr<= break/disp32"} +break-if-addr<= label {.name="break-if-addr<=", .inouts=[label], .subx-name="0f 86/jump-if-addr<=", .disp32=inouts[0] ":break"} +break-if-addr>= {.name="break-if-addr>=", .subx-name="0f 83/jump-if-addr>= break/disp32"} +break-if-addr>= label {.name="break-if-addr>=", .inouts=[label], .subx-name="0f 83/jump-if-addr>=", .disp32=inouts[0] ":break"} + Finally, we repeat all the 'break' variants almost identically for 'loop' instructions. This works because the compiler inserts ':loop' labels at the start of such named blocks, and ':break' labels at the end. @@ -165,15 +162,6 @@ loop-if-= label {.name="loop-if-=", .inouts=[label], loop-if-!= {.name="loop-if-!=", .subx-name="0f 85/jump-if-!= loop/disp32"} loop-if-!= label {.name="loop-if-!=", .inouts=[label], .subx-name="0f 85/jump-if-!=", .disp32=inouts[0] ":loop"} -loop-if-addr< {.name="loop-if-addr<", .subx-name="0f 82/jump-if-addr< loop/disp32"} -loop-if-addr< label {.name="loop-if-addr<", .inouts=[label], .subx-name="0f 82/jump-if-addr<", .disp32=inouts[0] ":loop"} -loop-if-addr> {.name="loop-if-addr>", .subx-name="0f 87/jump-if-addr> loop/disp32"} -loop-if-addr> label {.name="loop-if-addr>", .inouts=[label], .subx-name="0f 87/jump-if-addr>", .disp32=inouts[0] ":loop"} -loop-if-addr<= {.name="loop-if-addr<=", .subx-name="0f 86/jump-if-addr<= loop/disp32"} -loop-if-addr<= label {.name="loop-if-addr<=", .inouts=[label], .subx-name="0f 86/jump-if-addr<=", .disp32=inouts[0] ":loop"} -loop-if-addr>= {.name="loop-if-addr>=", .subx-name="0f 83/jump-if-addr>= loop/disp32"} -loop-if-addr>= label {.name="loop-if-addr>=", .inouts=[label], .subx-name="0f 83/jump-if-addr>=", .disp32=inouts[0] ":loop"} - loop-if-< {.name="loop-if-<", .subx-name="0f 8c/jump-if-< loop/disp32"} loop-if-< label {.name="loop-if-<", .inouts=[label], .subx-name="0f 8c/jump-if-<", .disp32=inouts[0] ":loop"} loop-if-> {.name="loop-if->", .subx-name="0f 8f/jump-if-> loop/disp32"} @@ -183,10 +171,20 @@ loop-if-<= label {.name="loop-if-<=", .inouts=[label], loop-if->= {.name="loop-if->=", .subx-name="0f 8d/jump-if->= loop/disp32"} loop-if->= label {.name="loop-if->=", .inouts=[label], .subx-name="0f 8d/jump-if->=", .disp32=inouts[0] ":loop"} -There are also unconditional loop instructions. So far it doesn't seem like -unconditional breaks have much use. +loop-if-addr< {.name="loop-if-addr<", .subx-name="0f 82/jump-if-addr< loop/disp32"} +loop-if-addr< label {.name="loop-if-addr<", .inouts=[label], .subx-name="0f 82/jump-if-addr<", .disp32=inouts[0] ":loop"} +loop-if-addr> {.name="loop-if-addr>", .subx-name="0f 87/jump-if-addr> loop/disp32"} +loop-if-addr> label {.name="loop-if-addr>", .inouts=[label], .subx-name="0f 87/jump-if-addr>", .disp32=inouts[0] ":loop"} +loop-if-addr<= {.name="loop-if-addr<=", .subx-name="0f 86/jump-if-addr<= loop/disp32"} +loop-if-addr<= label {.name="loop-if-addr<=", .inouts=[label], .subx-name="0f 86/jump-if-addr<=", .disp32=inouts[0] ":loop"} +loop-if-addr>= {.name="loop-if-addr>=", .subx-name="0f 83/jump-if-addr>= loop/disp32"} +loop-if-addr>= label {.name="loop-if-addr>=", .inouts=[label], .subx-name="0f 83/jump-if-addr>=", .disp32=inouts[0] ":loop"} + +Finally, unconditional jumps: loop {.name="loop", .subx-name="e9/jump loop/disp32"} loop label {.name="loop", .inouts=[label], .subx-name="e9/jump", .disp32=inouts[0] ":loop"} +(So far it doesn't seem like unconditional breaks have much use.) + vim:ft=c:nowrap diff --git a/mu_summary b/mu_summary new file mode 100644 index 00000000..676b1151 --- /dev/null +++ b/mu_summary @@ -0,0 +1,192 @@ +Mu programs are lists of functions. Each function has the following form: + + fn _name_ _inouts_with_types_ -> _outputs_with_types_ { + _instructions_ + } + +Instructions may be primitives or function calls. Either way, all instructions +have one of the following forms: + + # defining variables + var _name_: _type_ + var _name_/_register_: _type_ + + # doing things with variables + _operation_ _inouts_ + _outputs_ <- _operation_ _inouts_ + +Instructions and functions may have inouts and outputs. Both inouts and +outputs are variables. + +As seen above, variables can be defined to live in a register, like this: + + n/eax + +Variables not assigned a register live in the stack. + +Function inouts must always be on the stack, and outputs must always be in +registers. A function call must always write to the exact registers its +definition requires. For example: + + fn foo -> x/eax: int { + ... + } + fn main { + a/eax <- foo # ok + a/ebx <- foo # wrong + } + +Primitive inouts may be on the stack or in registers, but outputs must always +be in registers. + +Functions can contain nested blocks inside { and }. Variables defined in a +block don't exist outside it. + +## Primitive instructions + +Primitive instructions currently supported in Mu: + + var/eax <- increment + var/ecx <- increment + var/edx <- increment + var/ebx <- increment + var/esi <- increment + var/edi <- increment + increment var + + var/eax <- decrement + var/ecx <- decrement + var/edx <- decrement + var/ebx <- decrement + var/esi <- decrement + var/edi <- decrement + decrement var + + var1/reg1 <- add var2/reg2 + var/reg <- add var2 + add-to var1, var2/reg + var/eax <- add n + var/reg <- add n + add-to var, n + + var1/reg1 <- sub var2/reg2 + var/reg <- sub var2 + sub-from var1, var2/reg + var/eax <- sub n + var/reg <- sub n + sub-from var, n + + var1/reg1 <- and var2/reg2 + var/reg <- and var2 + and-with var1, var2/reg + var/eax <- and n + var/reg <- and n + and-with var, n + + var1/reg1 <- or var2/reg2 + var/reg <- or var2 + or-with var1, var2/reg + var/eax <- or n + var/reg <- or n + or-with var, n + + var1/reg1 <- xor var2/reg2 + var/reg <- xor var2 + xor-with var1, var2/reg + var/eax <- xor n + var/reg <- xor n + xor-with var, n + + var/eax <- copy n + var/ecx <- copy n + var/edx <- copy n + var/ebx <- copy n + var/esi <- copy n + var/edi <- copy n + var1/reg1 <- copy var2/reg2 + copy-to var1, var2/reg + var/reg <- copy var2 + var/reg <- copy n + copy-to var, n + + compare var1, var2/reg + compare var1/reg, var2 + compare var/eax, n + compare var, n + + var/reg <- multiply var2 + +## Primitive jump instructions + +There are two kinds of jumps, both with many variations: `break` and `loop`. +`break` instructions jump to the end of the containing block. `loop` instructions +jump to the beginning of the containing block. + +Jumps can take an optional label starting with '$': + + loop $foo + +This instruction jumps to the beginning of the block called $foo. It must lie +somewhere inside such a box. Jumps are only legal to containing blocks. + +There are two unconditional jumps: + + loop + loop label + # unconditional break instructions don't seem useful + +The remaining jump instructions are all conditional. Conditional jumps rely on +the result of the most recently executed `compare` instruction. (To keep +programs easy to read, keep compare instructions close to the jump that uses +them.) + + break-if-= + break-if-= label + break-if-!= + break-if-!= label + +Inequalities are similar, but have unsigned and signed variants. We assume +unsigned variants are only ever used to compare addresses. + + break-if-< + break-if-< label + break-if-> + break-if-> label + break-if-<= + break-if-<= label + break-if->= + break-if->= label + + break-if-addr< + break-if-addr< label + break-if-addr> + break-if-addr> label + break-if-addr<= + break-if-addr<= label + break-if-addr>= + break-if-addr>= label + +Similarly, conditional loops: + + loop-if-= + loop-if-= label + loop-if-!= + loop-if-!= label + + loop-if-< + loop-if-< label + loop-if-> + loop-if-> label + loop-if-<= + loop-if-<= label + loop-if->= + loop-if->= label + + loop-if-addr< + loop-if-addr< label + loop-if-addr> + loop-if-addr> label + loop-if-addr<= + loop-if-addr<= label + loop-if-addr>= + loop-if-addr>= label diff --git a/subx_opcodes b/subx_opcodes index e222df66..a44e2aba 100644 --- a/subx_opcodes +++ b/subx_opcodes @@ -100,7 +100,7 @@ Opcodes currently supported by SubX: 0f 8e: jump disp32 bytes away if lesser or equal (signed), if ZF is set or SF != OF (jcc/jle/jng) 0f 8f: jump disp32 bytes away if greater (signed), if ZF is unset and SF == OF (jcc/jg/jnle) 0f af: multiply rm32 into r32 (imul) -Run `subx help instructions` for details on words like 'r32' and 'disp8'. +Run `bootstrap help instructions` for details on words like 'r32' and 'disp8'. For complete details on these instructions, consult the IA-32 manual (volume 2). There's various versions of it online, such as https://c9x.me/x86. The mnemonics in brackets will help you locate each instruction.