From 2715d377b6108b0a607d9322d470bedd77c9c717 Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Sun, 15 Nov 2020 22:54:56 -0800 Subject: [PATCH] 7247 --- mu.md | 69 ++++++++++++++++++++++++++++++++++++++++++------- mu_instructions | 6 +++-- 2 files changed, 64 insertions(+), 11 deletions(-) diff --git a/mu.md b/mu.md index bb8b68c9..a2581f9e 100644 --- a/mu.md +++ b/mu.md @@ -45,7 +45,9 @@ will provide good error messages to support you. Further down, this page enumerates all available primitives in Mu, and [a separate page](http://akkartik.github.io/mu/html/mu_instructions.html) -describes how each primitive is translated to machine code. +describes how each primitive is translated to machine code. There is also a +useful list of pre-defined functions (implemented in unsafe machine code) in [400.mu](http://akkartik.github.io/mu/html/400.mu.html) +and [vocabulary.md](vocabulary.md). ## Functions and calls @@ -471,34 +473,49 @@ Here `var2` can't live in a register. ## Array operations -Mu arrays are size-prefixed so that operations on them can check bounds as -necessary at run-time. The `length` statement returns the number of elements -in an array. +Here's an example definition of a fixed-length array: + +``` +var x: (array int 3) +``` + +The length (here `3`) must be an integer literal. We'll show how to create +dynamically-sized arrays further down. + +Arrays can be large; to avoid copying them around on every function call +you'll usually want to manage `addr`s to them. Here's an example computing the +address of an array. + +``` +var n/eax: (addr array int) <- address x +``` + +Addresses to arrays don't include the array length in their type. However, you +can obtain the length of an array like this: ``` var/reg: int <- length arr/reg: (addr array T) ``` -The `index` statement takes an `addr` to an `array` and returns an `addr` to -one of its elements, that can be read from or written to. +To operate on elements of an array, use the `index` statement: ``` var/reg: (addr T) <- index arr/reg: (addr array T), n -var/reg: (addr T) <- index arr: (array T sz), n +var/reg: (addr T) <- index arr: (array T len), n ``` The index can also be a variable in a register, with a caveat: ``` var/reg: (addr T) <- index arr/reg: (addr array T), idx/reg: int -var/reg: (addr T) <- index arr: (array T sz), idx/reg: int +var/reg: (addr T) <- index arr: (array T len), idx/reg: int ``` The caveat: the size of T must be 1, 2, 4 or 8 bytes. The x86 instruction set has complex addressing modes that can index into an array in a single instruction in these situations. -For types in general you'll need to split up the work, performing a `compute-offset` +For other sizes of T you'll need to split up the work, performing a `compute-offset` before the `index`. ``` @@ -514,6 +531,40 @@ performing any necessary bounds checking. Now the offset can be passed to var/reg: (addr T) <- index arr/reg: (addr array T), idx/reg: (offset T) ``` +## Stream operations + +A common use for arrays is as buffers. Save a few items to a scratch space and +then process them. This pattern is so common (we use it in files) that there's +special support for it with a built-in type: `stream`. + +Streams are like arrays in many ways. You can initialize them with a length: + +``` +var x: (stream int 3) +``` + +However, streams don't provide random access with an `index` instruction. +Instead, you write to them sequentially, and read back what you wrote. + +``` +read-from-stream s: (addr stream T), out: (addr T) +write-to-stream s: (addr stream T), in: (addr T) +var/eax: boolean <- stream-empty? s: (addr stream) +var/eax: boolean <- stream-full? s: (addr stream) +``` + +You can clear streams: + +``` +clear-stream f: (addr stream _) +``` + +You can also rewind them to reread what's been written: + +``` +rewind-stream f: (addr stream _) +``` + ## Compound types Primitive types can be combined together using the `type` keyword. For diff --git a/mu_instructions b/mu_instructions index 253ede30..629ba3d1 100644 --- a/mu_instructions +++ b/mu_instructions @@ -317,11 +317,11 @@ var/reg: (addr T) <- address var2: T var/reg <- index arr/rega: (addr array T), idx/regi: int | if size-of(T) is 4 or 8 => "8d/copy-address *(" rega "+" regi "<<" log2(size-of(T)) "+4) " reg "/r32" -var/reg <- index arr: (array T sz), idx/regi: int +var/reg <- index arr: (array T len), idx/regi: int => "8d/copy-address *(ebp+" regi "<<" log2(size-of(T)) "+" (arr.stack-offset + 4) ") " reg "/r32" var/reg <- index arr/rega: (addr array T), n => "8d/copy-address *(" rega "+" (n*size-of(T)+4) ") " reg "/r32" -var/reg <- index arr: (array T sz), n +var/reg <- index arr: (array T len), n => "8d/copy-address *(ebp+" (arr.stack-offset+4+n*size-of(T)) ") " reg "/r32" var/reg: (offset T) <- compute-offset arr: (addr array T), idx/regi: int # arr can be in reg or mem @@ -382,6 +382,8 @@ populate in: (addr handle array T), num # can be literal or variable on stack o populate-stream in: (addr handle stream T), num # can be literal or variable on stack or register => "(new-stream Heap " size-of(T) " " num " " in ")" +# Some miscellaneous helpers to avoid error-prone size computations + read-from-stream s: (addr stream T), out: (addr T) => "(read-from-stream " s " " out " " size-of(T) ")"