update vocabulary documentation

Top-level and linux/ now have separate vocabulary.md files.
This commit is contained in:
Kartik K. Agaram 2021-03-08 23:49:07 -08:00
parent 6508ab51cc
commit cec5ef31b3
14 changed files with 577 additions and 194 deletions

View File

@ -125,13 +125,13 @@ $set-cursor-position-on-real-screen:end:
5d/pop-to-ebp
c3/return
# Draw cursor at current location. But this is rickety:
# Not a real `show-cursor` primitive:
# - does not clear previous location cursor was shown at.
# - does not preserve what was at the cursor. Caller is responsible for
# tracking what was on the screen at this position before and passing it
# in again.
# - does not stop showing the cursor at this location when the cursor moves
show-cursor-on-real-screen: # g: grapheme
draw-cursor-on-real-screen: # g: grapheme
# . prologue
55/push-ebp
89/<- %ebp 4/r32/esp
@ -141,7 +141,7 @@ show-cursor-on-real-screen: # g: grapheme
#
(cursor-position-on-real-screen) # => eax, ecx
(draw-grapheme-on-real-screen *(ebp+8) %eax %ecx 0 7)
$show-cursor-on-real-screen:end:
$draw-cursor-on-real-screen:end:
# . restore registers
59/pop-to-ecx
58/pop-to-eax
@ -156,7 +156,7 @@ $show-cursor-on-real-screen:end:
# 'draw*cursor*') print to by default.
#
# We don't bother displaying the cursor when drawing. It only becomes visible
# on show-cursor, which is quite rickety (see above)
# on draw-cursor, which is quite rickety (see above)
#
# It's up to applications to manage cursor display:
# - clean up where it used to be

6
400.mu
View File

@ -3,7 +3,7 @@ sig pixel-on-real-screen x: int, y: int, color: int
sig draw-grapheme-on-real-screen g: grapheme, x: int, y: int, color: int, background-color: int
sig cursor-position-on-real-screen -> _/eax: int, _/ecx: int
sig set-cursor-position-on-real-screen x: int, y: int
sig show-cursor-on-real-screen g: grapheme
sig draw-cursor-on-real-screen g: grapheme
# keyboard
sig read-key kbd: (addr keyboard) -> _/eax: byte
@ -26,9 +26,9 @@ sig check-next-stream-line-equal f: (addr stream byte), s: (addr array byte), ms
sig write f: (addr stream byte), s: (addr array byte)
sig write-stream f: (addr stream byte), s: (addr stream byte)
sig read-byte s: (addr stream byte) -> _/eax: byte
sig append-byte f: (addr stream byte), n: int
sig append-byte f: (addr stream byte), n: int # really just a byte, but I want to pass in literal numbers
#sig to-hex-char in/eax: int -> out/eax: int
sig append-byte-hex f: (addr stream byte), n: int
sig append-byte-hex f: (addr stream byte), n: int # really just a byte, but I want to pass in literal numbers
sig write-int32-hex f: (addr stream byte), n: int
sig write-int32-hex-bits f: (addr stream byte), n: int, bits: int
sig hex-int? in: (addr slice) -> _/eax: boolean

View File

@ -177,11 +177,11 @@ fn set-cursor-position screen: (addr screen), x: int, y: int {
copy-to *dest, src
}
fn show-cursor screen: (addr screen), g: grapheme {
fn draw-cursor screen: (addr screen), g: grapheme {
{
compare screen, 0
break-if-!=
show-cursor-on-real-screen g
draw-cursor-on-real-screen g
return
}
# fake screen

View File

@ -62,7 +62,7 @@ fn move-cursor-down screen: (addr screen) {
set-cursor-position screen, cursor-x, cursor-y
}
fn move-cursor-to-start-of-next-line screen: (addr screen) {
fn move-cursor-to-left-margin-of-next-line screen: (addr screen) {
var dummy/eax: int <- copy 0
var _height/ecx: int <- copy 0
dummy, _height <- screen-size screen

View File

@ -8,7 +8,7 @@ fn check-ints-equal _a: int, b: int, msg: (addr array byte) {
return
}
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, msg, 3/fg/cyan, 0/bg
move-cursor-to-start-of-next-line 0/screen
move-cursor-to-left-margin-of-next-line 0/screen
count-test-failure
}
@ -25,7 +25,7 @@ fn check _a: boolean, msg: (addr array byte) {
return
}
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, msg, 3/fg/cyan, 0/bg
move-cursor-to-start-of-next-line 0/screen
move-cursor-to-left-margin-of-next-line 0/screen
count-test-failure
}
@ -38,6 +38,6 @@ fn check-not _a: boolean, msg: (addr array byte) {
return
}
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, msg, 3/fg/cyan, 0/bg
move-cursor-to-start-of-next-line 0/screen
move-cursor-to-left-margin-of-next-line 0/screen
count-test-failure
}

View File

@ -52,7 +52,7 @@ fn check-screen-row-from screen-on-stack: (addr screen), x: int, y: int, expecte
draw-grapheme-at-cursor 0/screen, g, 3/cyan, 0/bg
move-cursor-rightward-and-downward 0/screen, 0/xmin, 0x80/xmax=screen-width
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "'", 3/fg/cyan, 0/bg
move-cursor-to-start-of-next-line 0/screen
move-cursor-to-left-margin-of-next-line 0/screen
}
idx <- increment
increment x
@ -120,7 +120,7 @@ fn check-screen-row-in-color-from screen-on-stack: (addr screen), fg: int, y: in
draw-grapheme-at-cursor 0/screen, g, 3/cyan, 0/bg
move-cursor-rightward-and-downward 0/screen, 0/xmin, 0x80/xmax=screen-width
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "'", 3/fg/cyan, 0/bg
move-cursor-to-start-of-next-line 0/screen
move-cursor-to-left-margin-of-next-line 0/screen
}
$check-screen-row-in-color-from:compare-colors: {
var color/eax: int <- screen-color-at-idx screen, idx
@ -144,7 +144,7 @@ fn check-screen-row-in-color-from screen-on-stack: (addr screen), fg: int, y: in
draw-int32-hex-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, fg, 3/fg/cyan, 0/bg
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, " but observed color ", 3/fg/cyan, 0/bg
draw-int32-hex-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, color, 3/fg/cyan, 0/bg
move-cursor-to-start-of-next-line 0/screen
move-cursor-to-left-margin-of-next-line 0/screen
}
}
idx <- increment
@ -211,7 +211,7 @@ fn check-screen-row-in-background-color-from screen-on-stack: (addr screen), bg:
draw-grapheme-at-cursor 0/screen, g, 3/cyan, 0/bg
move-cursor-rightward-and-downward 0/screen, 0/xmin, 0x80/xmax=screen-width
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "'", 3/fg/cyan, 0/bg
move-cursor-to-start-of-next-line 0/screen
move-cursor-to-left-margin-of-next-line 0/screen
break $check-screen-row-in-background-color-from:compare-graphemes
}
$check-screen-row-in-background-color-from:compare-background-colors: {
@ -236,7 +236,7 @@ fn check-screen-row-in-background-color-from screen-on-stack: (addr screen), bg:
draw-int32-hex-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, bg, 3/fg/cyan, 0/bg
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, " but observed background-color ", 3/fg/cyan, 0/bg
draw-int32-hex-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, background-color, 3/fg/cyan, 0/bg
move-cursor-to-start-of-next-line 0/screen
move-cursor-to-left-margin-of-next-line 0/screen
}
}
idx <- increment
@ -281,7 +281,7 @@ fn check-background-color-in-screen-row-from screen-on-stack: (addr screen), bg:
draw-int32-hex-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, y, 3/fg/cyan, 0/bg
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ") to not be in background-color ", 3/fg/cyan, 0/bg
draw-int32-hex-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, bg, 3/fg/cyan, 0/bg
move-cursor-to-start-of-next-line 0/screen
move-cursor-to-left-margin-of-next-line 0/screen
break $check-background-color-in-screen-row-from:compare-cells
}
# otherwise assert that background IS bg
@ -297,7 +297,7 @@ fn check-background-color-in-screen-row-from screen-on-stack: (addr screen), bg:
draw-int32-hex-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, bg, 3/fg/cyan, 0/bg
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, " but observed background-color ", 3/fg/cyan, 0/bg
draw-int32-hex-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, background-color, 3/fg/cyan, 0/bg
move-cursor-to-start-of-next-line 0/screen
move-cursor-to-left-margin-of-next-line 0/screen
}
idx <- increment
increment x

2
ex7.mu
View File

@ -14,7 +14,7 @@ fn main {
var space/eax: grapheme <- copy 0x20
set-cursor-position 0/screen, 0, 0
{
show-cursor 0/screen, space
draw-cursor 0/screen, space
var key/eax: byte <- read-key 0/keyboard
{
compare key, 0x68/h

View File

@ -1,11 +1,11 @@
# Some helpers for Mu tests.
fn check-true val: boolean, msg: (addr array byte) {
fn check val: boolean, msg: (addr array byte) {
var tmp/eax: int <- copy val
check-ints-equal tmp, 1, msg
}
fn check-false val: boolean, msg: (addr array byte) {
fn check-not val: boolean, msg: (addr array byte) {
var tmp/eax: int <- copy val
check-ints-equal tmp, 0, msg
}

View File

@ -6,18 +6,18 @@ fn test-stream {
var s: (stream int 4)
var s2/ecx: (addr stream int) <- address s
var tmp/eax: boolean <- stream-empty? s2
check-true tmp, "F - test-stream/empty?/0"
check tmp, "F - test-stream/empty?/0"
tmp <- stream-full? s2
check-false tmp, "F - test-stream/full?/0"
check-not tmp, "F - test-stream/full?/0"
# step 2: write to stream
var x: int
copy-to x, 0x34
var x2/edx: (addr int) <- address x
write-to-stream s2, x2
tmp <- stream-empty? s2
check-false tmp, "F - test-stream/empty?/1"
check-not tmp, "F - test-stream/empty?/1"
tmp <- stream-full? s2
check-false tmp, "F - test-stream/full?/1"
check-not tmp, "F - test-stream/full?/1"
# step 3: modify the value written (should make no difference)
copy-to x, 0
# step 4: read back
@ -25,9 +25,9 @@ fn test-stream {
var y2/ebx: (addr int) <- address y
read-from-stream s2, y2
tmp <- stream-empty? s2
check-true tmp, "F - test-stream/empty?/2"
check tmp, "F - test-stream/empty?/2"
tmp <- stream-full? s2
check-false tmp, "F - test-stream/full?/2"
check-not tmp, "F - test-stream/full?/2"
# we read back what was written
check-ints-equal y, 0x34, "F - test-stream"
}
@ -37,12 +37,12 @@ fn test-stream-full {
var s: (stream int 1)
var s2/ecx: (addr stream int) <- address s
var tmp/eax: boolean <- stream-full? s2
check-false tmp, "F - test-stream-full?/pre"
check-not tmp, "F - test-stream-full?/pre"
var x: int
var x2/edx: (addr int) <- address x
write-to-stream s2, x2
tmp <- stream-full? s2
check-true tmp, "F - test-stream-full?"
check tmp, "F - test-stream-full?"
}
fn test-fake-input-buffered-file {

View File

@ -620,7 +620,7 @@ fn check-buffer-contains _buf: (addr array byte), _contents: (addr array byte),
var buf/esi: (addr array byte) <- copy _buf
var contents/edi: (addr array byte) <- copy _contents
var a/eax: boolean <- string-starts-with? buf, contents
check-true a, msg
check a, msg
var len/ecx: int <- length contents
var len2/eax: int <- length buf
compare len, len2

View File

@ -8,6 +8,10 @@ kernel. To run programs under this directory, you must first `cd` into it.
Hello world!
```
See the [shared vocabulary](vocabulary.md) of data types and functions shared
by Mu programs running on Linux. Mu programs can transparently call low-level
functions written in SubX.
Some programs to try out:
* `tile`: [An experimental live-updating postfix shell environment](https://mastodon.social/@akkartik/105108305362341204)

368
linux/vocabulary.md Normal file
View File

@ -0,0 +1,368 @@
## Reference documentation on available primitives
### Data Structures
- Handles: addresses to objects allocated on the heap. They're augmented with
book-keeping to guarantee memory-safety, and so cannot be stored in registers.
See [mu.md](mu.md) for details, but in brief:
- You need `addr` values to access data they point to.
- You can't store `addr` values in other types. They're temporary.
- You can store `handle` values in other types.
- To convert `handle` to `addr`, use `lookup`.
- Reclaiming memory (currently unimplemented) invalidates all `addr`
values.
- Kernel strings: null-terminated regions of memory. Unsafe and to be avoided,
but needed for interacting with the kernel.
- Arrays: size-prefixed regions of memory containing multiple elements of a
single type. Contents are preceded by 4 bytes (32 bits) containing the
`size` of the array in bytes.
- Slices: a pair of 32-bit addresses denoting a [half-open](https://en.wikipedia.org/wiki/Interval_(mathematics))
\[`start`, `end`) interval to live memory with a consistent lifetime.
Invariant: `start` <= `end`
- Streams: strings prefixed by 32-bit `write` and `read` indexes that the next
write or read goes to, respectively.
- offset 0: write index
- offset 4: read index
- offset 8: size of array (in bytes)
- offset 12: start of array data
Invariant: 0 <= `read` <= `write` <= `size`
- File descriptors (fd): Low-level 32-bit integers that the kernel uses to
track files opened by the program.
- File: 32-bit value containing either a fd or an address to a stream (fake
file).
- Buffered files (buffered-file): Contain a file descriptor and a stream for
buffering reads/writes. Each `buffered-file` must exclusively perform either
reads or writes.
- Graphemes: 32-bit fragments of utf-8 that encode a single Unicode code-point.
- Code-points: 32-bit integers representing a Unicode character.
### 'system calls'
As I said at the top, a primary design goal of SubX (and Mu more broadly) is
to explore ways to turn arbitrary manual tests into reproducible automated
tests. SubX aims for this goal by baking testable interfaces deep into the
stack, at the OS syscall level. The idea is that every syscall that interacts
with hardware (and so the environment) should be *dependency injected* so that
it's possible to insert fake hardware in tests.
But those are big goals. Here are the syscalls I have so far:
- `write`: takes two arguments, a file `f` and an address to array `s`.
Comparing this interface with the Unix `write()` syscall shows two benefits:
1. SubX can handle 'fake' file descriptors in tests.
1. `write()` accepts buffer and its size in separate arguments, which
requires callers to manage the two separately and so can be error-prone.
SubX's wrapper keeps the two together to increase the chances that we
never accidentally go out of array bounds.
- `read`: takes two arguments, a file `f` and an address to stream `s`. Reads
as much data from `f` as can fit in (the free space of) `s`.
Like with `write()`, this wrapper around the Unix `read()` syscall adds the
ability to handle 'fake' file descriptors in tests, and reduces the chances
of clobbering outside array bounds.
One bit of weirdness here: in tests we do a redundant copy from one stream
to another. See [the comments before the implementation](http://akkartik.github.io/mu/html/060read.subx.html)
for a discussion of alternative interfaces.
- `stop`: takes two arguments:
- `ed` is an address to an _exit descriptor_. Exit descriptors allow us to
`exit()` the program in production, but return to the test harness within
tests. That allows tests to make assertions about when `exit()` is called.
- `value` is the status code to `exit()` with.
For more details on exit descriptors and how to create one, see [the
comments before the implementation](http://akkartik.github.io/mu/html/059stop.subx.html).
- `new-segment`
Allocates a whole new segment of memory for the program, discontiguous with
both existing code and data (heap) segments. Just a more opinionated form of
[`mmap`](http://man7.org/linux/man-pages/man2/mmap.2.html).
- `allocate`: takes two arguments, an address to allocation-descriptor `ad`
and an integer `n`
Allocates a contiguous range of memory that is guaranteed to be exclusively
available to the caller. Returns the starting address to the range in `eax`.
An allocation descriptor tracks allocated vs available addresses in some
contiguous range of memory. The int specifies the number of bytes to allocate.
Explicitly passing in an allocation descriptor allows for nested memory
management, where a sub-system gets a chunk of memory and further parcels it
out to individual allocations. Particularly helpful for (surprise) tests.
- `time`: returns the time in seconds since the epoch.
- `ntime`: returns the number of nanoseconds since some arbitrary point.
Saturates at 32 bits. Useful for fine-grained measurements over relatively
short durations.
- `sleep`: sleep for some number of whole seconds and some fraction of a
second expressed in nanoseconds. Not having decimal literals can be awkward
here.
- ... _(to be continued)_
I will continue to import syscalls over time from [the old Mu VM in the parent
directory](https://github.com/akkartik/mu), which has experimented with
interfaces for the screen, keyboard, mouse, disk and network.
### Functions
The most useful functions from 400.mu and later .mu files. Look for definitions
(using `ctags`) to see type signatures.
_(Compound arguments are usually passed in by reference. Where the results are
compound objects that don't fit in a register, the caller usually passes in
allocated memory for it.)_
#### assertions for tests
- `check`: fails current test if given boolean is false (`= 0`).
- `check-not`: fails current test if given boolean isn't false (`!= 0`).
- `check-ints-equal`: fails current test if given ints aren't equal
- `check-array-equal`: only arrays of ints, passes in a literal array in a
whitespace-separated string.
- `check-stream-equal`: fails current test if stream doesn't match string
- `check-next-stream-line-equal`: fails current test if next line of stream
until newline doesn't match string
Every Mu computer has a global trace that programs can write to, and that
tests can make assertions on.
- `clear-trace-stream`
- `check-trace-contains`
- `check-trace-scans-to`: like `check-trace-contains` but with an implicit,
stateful start index
#### error handling
- `error`: takes three arguments, an exit-descriptor, a file and a string (message)
Prints out the message to the file and then exits using the provided
exit-descriptor.
- `error-byte`: like `error` but takes an extra byte value that it prints out
at the end of the message.
#### numbers
- `abs`
- `repeated-shift-left`, since x86 only supports bit-shifts by constant values
- `repeated-shift-right`
- `shift-left-bytes`: shift left by `n*8` bits
- `integer-divide`
Floating point constructors, since x86 doesn't support immediate floats and Mu
doesn't yet parse floating-point literals:
- `rational`: int, int -> float
- `fill-in-rational`: int, int, (addr float)
- `fill-in-sqrt`: int, (addr float)
#### arrays and strings
- `populate`: allocates space for `n` objects of the appropriate type.
- `copy-array`: allocates enough space and writes out a copy of an array of
some type.
- `slice-to-string`: allocates space for an array of bytes and copies the
slice into it.
- `array-equal?`
- `substring`: string, start, length -> string
- `split-string`: string, delimiter -> array of strings
- `copy-array-object`
#### predicates
- `kernel-string-equal?`: compares a kernel string with a string
- `string-equal?`: compares two strings
- `stream-data-equal?`: compares a stream with a string
- `next-stream-line-equal?`: compares with string the next line in a stream, from
`read` index to newline
- `slice-empty?`: checks if the `start` and `end` of a slice are equal
- `slice-equal?`: compares a slice with a string
- `slice-starts-with?`: compares the start of a slice with a string
- `slice-ends-with?`: compares the end of a slice with a string
#### writing to disk
- `write`: string -> file
- Can also be used to cat a string into a stream.
- `write-stream`: stream -> file
- Can also be used to cat one stream into another.
- `write-stream-data`: stream -> file
- Like `write-stream` but ignores read index.
- `write-slice`: slice -> stream
- `append-byte`: int -> stream
- `append-byte-hex`: int -> stream
- textual representation in hex, no '0x' prefix
- `write-int`: int -> stream
- write number to stream
- `write-int32-hex`: int -> stream
- textual representation in hex, including '0x' prefix
- `write-int32-hex-buffered`: int -> buffered-file
- `write-int32-decimal`
- `write-int32-decimal-buffered`
- `write-buffered`: string -> buffered-file
- `write-slice-buffered`: slice -> buffered-file
- `flush`: buffered-file
- `write-byte-buffered`: int -> buffered-file
- `write-byte-buffered`: int -> buffered-file
- textual representation in hex, no '0x' prefix
- `print-int32-buffered`: int -> buffered-file
- textual representation in hex, including '0x' prefix
- `write-grapheme`: grapheme -> stream
- `to-grapheme`: code-point -> grapheme
- `write-float-decimal-approximate`: float, precision: int -> stream
- `new-buffered-file`
- `populate-buffered-file-containing`: string -> buffered-file
Unless otherwise states, writes to a stream will abort the entire program if
there isn't enough room in the destination stream.
#### reading from disk
- `read`: file -> stream
- Can also be used to cat one stream into another.
- Will silently stop reading when destination runs out of space.
- `read-byte-buffered`: buffered-file -> byte
- `read-line-buffered`: buffered-file -> stream
- Will abort the entire program if there isn't enough room.
- `read-grapheme`: stream -> grapheme
- `read-grapheme-buffered`: buffered-file -> grapheme
- `read-lines`: buffered-file -> array of strings
#### non-IO operations on streams
- `populate-stream`: allocates space in a stream for `n` objects of the
appropriate type.
- Will abort the entire program if `n*b` requires more than 32 bits.
- `clear-stream`: resets everything in the stream to `0` (except its `size`).
- `rewind-stream`: resets the read index of the stream to `0` without modifying
its contents.
#### reading/writing hex representations of integers
- `is-hex-int?`: slice -> boolean
- `parse-hex-int`: string -> int
- `parse-hex-int-from-slice`: slice -> int
- `is-hex-digit?`: byte -> boolean
- `parse-array-of-ints`
- `parse-array-of-decimal-ints`
#### printing to screen
All screen primitives require a screen object, which can be either the real
screen on the computer or a fake screen for tests. Mu supports a subset of
Unix terminal properties supported by almost all modern terminal emulators.
- `enable-screen-type-mode` (default)
- `enable-screen-grid-mode`
- `clear-screen`
- `screen-size`
- `move-cursor`
- `hide-cursor`
- `show-cursor`
- `print-string`: string -> screen
- `print-stream`
- `print-grapheme`
- `print-code-point`
- `print-int32-hex`
- `print-int32-decimal`
- `print-int32-decimal-right-justified`
- `print-array-of-ints-in-decimal`
- `print-float-hex`
- `print-float-decimal-approximate`: up to some precision
Printing to screen is stateful, and preserves formatting unless explicitly
manipulated.
- `reset-formatting`
- `start-color`: adjusts foreground and background
- `start-bold`
- `start-underline`
- `start-reverse-video`
- `start-blinking`
Assertions for tests:
- `screen-grapheme-at`
- `screen-color-at`
- `screen-background-color-at`
- `screen-bold-at?`
- `screen-underline-at?`
- `screen-reverse-at?`
- `screen-blink-at?`
- `check-screen-row`
- `check-screen-row-from`
- `check-screen-row-in-color`
- `check-screen-row-in-color-from`
- `check-screen-row-in-background-color`
- `check-screen-row-in-background-color-from`
- `check-screen-row-in-bold`
- `check-screen-row-in-bold-from`
- `check-screen-row-in-underline`
- `check-screen-row-in-underline-from`
- `check-screen-row-in-reverse`
- `check-screen-row-in-reverse-from`
- `check-screen-row-in-blinking`
- `check-screen-row-in-blinking-from`
#### keyboard
- `enable-keyboard-type-mode`: process keystrokes on `enter` (default mode)
- `read-line-from-real-keyboard`
- `enable-keyboard-immediate-mode`: process keystrokes as they're typed
- `read-key-from-real-keyboard`
#### tokenization
from a stream:
- `next-token`: stream, delimiter byte -> slice
- `skip-chars-matching`: stream, delimiter byte
- `skip-chars-not-matching`: stream, delimiter byte
from a slice:
- `next-token-from-slice`: start, end, delimiter byte -> slice
- Given a slice and a delimiter byte, returns a new slice inside the input
that ends at the delimiter byte.
- `skip-chars-matching-in-slice`: curr, end, delimiter byte -> new-curr (in `eax`)
- `skip-chars-not-matching-in-slice`: curr, end, delimiter byte -> new-curr (in `eax`)
#### file system
- `open`: filename, write? -> buffered-file

2
rpn.mu
View File

@ -28,7 +28,7 @@ fn main {
# read line from keyboard
clear-stream in
{
show-cursor 0/screen, space
draw-cursor 0/screen, space
var key/eax: byte <- read-key 0/keyboard
compare key, 0xa/newline
break-if-=

View File

@ -2,8 +2,15 @@
### Data Structures
- Kernel strings: null-terminated regions of memory. Unsafe and to be avoided,
but needed for interacting with the kernel.
- Handles: addresses to objects allocated on the heap. They're augmented with
book-keeping to guarantee memory-safety, and so cannot be stored in registers.
See [mu.md](mu.md) for details, but in brief:
- You need `addr` values to access data they point to.
- You can't store `addr` values in other types. They're temporary.
- You can store `handle` values in other types.
- To convert `handle` to `addr`, use `lookup`.
- Reclaiming memory (currently unimplemented) invalidates all `addr`
values.
- Arrays: size-prefixed regions of memory containing multiple elements of a
single type. Contents are preceded by 4 bytes (32 bits) containing the
@ -24,185 +31,189 @@
Invariant: 0 <= `read` <= `write` <= `size`
- File descriptors (fd): Low-level 32-bit integers that the kernel uses to
track files opened by the program.
Writes to a stream abort if it's full. Reads to a stream abort if it's
empty.
- File: 32-bit value containing either a fd or an address to a stream (fake
file).
- Graphemes: 32-bit fragments of utf-8 that encode a single Unicode code-point.
- Code-points: 32-bit integers representing a Unicode character.
- Buffered files (buffered-file): Contain a file descriptor and a stream for
buffering reads/writes. Each `buffered-file` must exclusively perform either
reads or writes.
### Functions
### 'system calls'
The most useful functions from 400.mu and later .mu files. Look for definitions
(using `ctags`) to see type signatures.
As I said at the top, a primary design goal of SubX (and Mu more broadly) is
to explore ways to turn arbitrary manual tests into reproducible automated
tests. SubX aims for this goal by baking testable interfaces deep into the
stack, at the OS syscall level. The idea is that every syscall that interacts
with hardware (and so the environment) should be *dependency injected* so that
it's possible to insert fake hardware in tests.
But those are big goals. Here are the syscalls I have so far:
- `write`: takes two arguments, a file `f` and an address to array `s`.
Comparing this interface with the Unix `write()` syscall shows two benefits:
1. SubX can handle 'fake' file descriptors in tests.
1. `write()` accepts buffer and its size in separate arguments, which
requires callers to manage the two separately and so can be error-prone.
SubX's wrapper keeps the two together to increase the chances that we
never accidentally go out of array bounds.
- `read`: takes two arguments, a file `f` and an address to stream `s`. Reads
as much data from `f` as can fit in (the free space of) `s`.
Like with `write()`, this wrapper around the Unix `read()` syscall adds the
ability to handle 'fake' file descriptors in tests, and reduces the chances
of clobbering outside array bounds.
One bit of weirdness here: in tests we do a redundant copy from one stream
to another. See [the comments before the implementation](http://akkartik.github.io/mu/html/060read.subx.html)
for a discussion of alternative interfaces.
- `stop`: takes two arguments:
- `ed` is an address to an _exit descriptor_. Exit descriptors allow us to
`exit()` the program in production, but return to the test harness within
tests. That allows tests to make assertions about when `exit()` is called.
- `value` is the status code to `exit()` with.
For more details on exit descriptors and how to create one, see [the
comments before the implementation](http://akkartik.github.io/mu/html/059stop.subx.html).
- `new-segment`
Allocates a whole new segment of memory for the program, discontiguous with
both existing code and data (heap) segments. Just a more opinionated form of
[`mmap`](http://man7.org/linux/man-pages/man2/mmap.2.html).
- `allocate`: takes two arguments, an address to allocation-descriptor `ad`
and an integer `n`
Allocates a contiguous range of memory that is guaranteed to be exclusively
available to the caller. Returns the starting address to the range in `eax`.
An allocation descriptor tracks allocated vs available addresses in some
contiguous range of memory. The int specifies the number of bytes to allocate.
Explicitly passing in an allocation descriptor allows for nested memory
management, where a sub-system gets a chunk of memory and further parcels it
out to individual allocations. Particularly helpful for (surprise) tests.
- ... _(to be continued)_
I will continue to import syscalls over time from [the old Mu VM in the parent
directory](https://github.com/akkartik/mu), which has experimented with
interfaces for the screen, keyboard, mouse, disk and network.
### primitives built atop system calls
_(Compound arguments are usually passed in by reference. Where the results are
compound objects that don't fit in a register, the caller usually passes in
allocated memory for it.)_
- `abort`: print a message in red on the bottom left of the screen and halt
#### assertions for tests
- `check-ints-equal`: fails current test if given ints aren't equal
- `check-stream-equal`: fails current test if stream doesn't match string
- `check`: fails current test if given boolean is false (`= 0`).
- `check-not`: fails current test if given boolean isn't false (`!= 0`).
- `check-ints-equal`: fails current test if given ints aren't equal.
- `check-strings-equal`: fails current test if given strings have different bytes.
- `check-stream-equal`: fails current test if stream's data doesn't match
string in its entirety. Ignores the stream's read index.
- `check-array-equal`: fails if an array's elements don't match what's written
in a whitespace-separated string.
- `check-next-stream-line-equal`: fails current test if next line of stream
until newline doesn't match string
#### error handling
- `error`: takes three arguments, an exit-descriptor, a file and a string (message)
Prints out the message to the file and then exits using the provided
exit-descriptor.
- `error-byte`: like `error` but takes an extra byte value that it prints out
at the end of the message.
until newline doesn't match string.
#### predicates
- `kernel-string-equal?`: compares a kernel string with a string
- `string-equal?`: compares two strings
- `stream-data-equal?`: compares a stream with a string
- `handle-equal?`: checks if two handles point at the identical address. Does
not compare payloads at their respective addresses.
- `array-equal?`: checks if two arrays (of ints only for now) have identical
elements.
- `string-equal?`: compares two strings.
- `stream-data-equal?`: compares a stream with a string.
- `next-stream-line-equal?`: compares with string the next line in a stream, from
`read` index to newline
`read` index to newline.
- `slice-empty?`: checks if the `start` and `end` of a slice are equal
- `slice-equal?`: compares a slice with a string
- `slice-starts-with?`: compares the start of a slice with a string
- `slice-ends-with?`: compares the end of a slice with a string
- `slice-empty?`: checks if the `start` and `end` of a slice are equal.
- `slice-equal?`: compares a slice with a string.
- `slice-starts-with?`: compares the start of a slice with a string.
#### writing to disk
- `write`: string -> file
- Can also be used to cat a string into a stream.
- Will abort the entire program if destination is a stream and doesn't have
enough room.
- `write-stream`: stream -> file
- Can also be used to cat one stream into another.
- Will abort the entire program if destination is a stream and doesn't have
enough room.
- `write-slice`: slice -> stream
- Will abort the entire program if there isn't enough room in the
destination stream.
- `append-byte`: int -> stream
- Will abort the entire program if there isn't enough room in the
destination stream.
- `append-byte-hex`: int -> stream
- textual representation in hex, no '0x' prefix
- Will abort the entire program if there isn't enough room in the
destination stream.
- `print-int32`: int -> stream
- textual representation in hex, including '0x' prefix
- Will abort the entire program if there isn't enough room in the
destination stream.
- `write-buffered`: string -> buffered-file
- `write-slice-buffered`: slice -> buffered-file
- `flush`: buffered-file
- `write-byte-buffered`: int -> buffered-file
- `print-byte-buffered`: int -> buffered-file
- textual representation in hex, no '0x' prefix
- `print-int32-buffered`: int -> buffered-file
- textual representation in hex, including '0x' prefix
- `stream-full?`: checks if a write to a stream would abort.
- `stream-empty?`: checks if a read from a stream would abort.
#### reading from disk
- `read`: file -> stream
- Can also be used to cat one stream into another.
- Will silently stop reading when destination runs out of space.
- `read-byte-buffered`: buffered-file -> byte
- `read-line-buffered`: buffered-file -> stream
- Will abort the entire program if there isn't enough room.
#### arrays
- `populate`: allocates space for `n` objects of the appropriate type.
- `copy-array`: allocates enough space and writes out a copy of an array of
some type.
- `slice-to-string`: allocates space for an array of bytes and copies the
slice into it.
#### streams
- `populate-stream`: allocates space in a stream for `n` objects of the
appropriate type.
- `write-to-stream`: writes arbitrary objects to a stream of the appropriate
type.
- `read-from-stream`: reads arbitrary objects from a stream of the appropriate
type.
- `stream-to-array`: allocates just enough space and writes out a stream's
data between its read index (inclusive) and write index (exclusive).
#### non-IO operations on streams
- `new-stream`: allocates space for a stream of `n` elements, each occupying
`b` bytes.
- Will abort the entire program if `n*b` requires more than 32 bits.
- `clear-stream`: resets everything in the stream to `0` (except its `size`).
- `rewind-stream`: resets the read index of the stream to `0` without modifying
its contents.
- `write`: writes a string into a stream of bytes. Doesn't support streams of
other types.
- `write-stream`: concatenates one stream into another.
- `write-slice`: writes a slice into a stream of bytes.
- `append-byte`: writes a single byte into a stream of bytes.
- `append-byte-hex`: writes textual representation of lowest byte in hex to
a stream of bytes. Does not write a '0x' prefix.
- `read-byte`: reads a single byte from a stream of bytes.
#### reading/writing hex representations of integers
- `is-hex-int?`: takes a slice argument, returns boolean result in `eax`
- `parse-hex-int`: takes a slice argument, returns int result in `eax`
- `is-hex-digit?`: takes a 32-bit word containing a single byte, returns
boolean result in `eax`.
- `from-hex-char`: takes a hexadecimal digit character in `eax`, returns its
numeric value in `eax`
- `to-hex-char`: takes a single-digit numeric value in `eax`, returns its
corresponding hexadecimal character in `eax`
#### tokenization
- `write-int32-hex`
- `hex-int?`: checks if a slice contains an int in hex. Supports '0x' prefix.
- `parse-hex-int`: reads int in hex from string
- `parse-hex-int-from-slice`: reads int in hex from slice
- `parse-array-of-ints`: reads in multiple ints in hex, separated by whitespace.
- `hex-digit?`: checks if byte is in [0, 9] or [a, f] (lowercase only)
from a stream:
- `next-token`: stream, delimiter byte -> slice
- `skip-chars-matching`: stream, delimiter byte
- `skip-chars-not-matching`: stream, delimiter byte
- `write-int32-decimal`
- `parse-decimal-int`
- `parse-decimal-int-from-slice`
- `parse-decimal-int-from-stream`
- `parse-array-of-decimal-ints`
- `decimal-digit?`: checks if byte is in [0, 9]
from a slice:
- `next-token-from-slice`: start, end, delimiter byte -> slice
- Given a slice and a delimiter byte, returns a new slice inside the input
that ends at the delimiter byte.
#### printing to screen
- `skip-chars-matching-in-slice`: curr, end, delimiter byte -> new-curr (in `eax`)
- `skip-chars-not-matching-in-slice`: curr, end, delimiter byte -> new-curr (in `eax`)
All screen primitives require a screen object, which can be either the real
screen on the computer or a fake screen for tests.
The real screen on the Mu computer can currently display only ASCII characters,
though it's easy to import more of the font. There is only one font. All
graphemes are 8 pixels wide and 16 pixels tall. These constraints only apply
to the real screen.
- `draw-grapheme`: draws a single grapheme at a given coordinate, with given
foreground and background colors.
- `render-grapheme`: like `draw-grapheme` and can also handle newlines
assuming text is printed left-to-right, top-to-bottom.
- `draw-code-point`
- `clear-screen`
- `draw-text-rightward`: draws a single line of text, stopping when it reaches
either the provided bound or the right screen margin.
- `draw-stream-rightward`
- `draw-text-rightward-over-full-screen`: does not provide a bound.
- `draw-text-wrapping-right-then-down`: draws multiple lines of text on screen
with simplistic word-wrap (no hyphenation) within (x, y) bounds.
- `draw-stream-wrapping-right-then-down`
- `draw-text-wrapping-right-then-down-over-full-screen`
- `draw-int32-hex-wrapping-right-then-down`
- `draw-int32-hex-wrapping-right-then-down-over-full-screen`
- `draw-int32-decimal-wrapping-right-then-down`
- `draw-int32-decimal-wrapping-right-then-down-over-full-screen`
Similar primitives for writing text top-to-bottom, left-to-right.
- `draw-text-downward`
- `draw-stream-downward`
- `draw-text-wrapping-down-then-right`
- `draw-stream-wrapping-down-then-right`
- `draw-text-wrapping-down-then-right-over-full-screen`
- `draw-int32-hex-wrapping-down-then-right`
- `draw-int32-hex-wrapping-down-then-right-over-full-screen`
- `draw-int32-decimal-wrapping-down-then-right`
- `draw-int32-decimal-wrapping-down-then-right-over-full-screen`
Screens remember the current cursor position.
- `cursor-position`
- `set-cursor-position`
- `draw-grapheme-at-cursor`
- `draw-code-point-at-cursor`
- `draw-cursor`: highlights the current position of the cursor. Programs must
pass in the grapheme to draw at the cursor position, and are responsible for
clearing the highlight when the cursor moves.
- `move-cursor-left`, `move-cursor-right`, `move-cursor-up`, `move-cursor-down`.
These primitives always silently fail if the desired movement would go out
of screen bounds.
- `move-cursor-to-left-margin-of-next-line`
- `move-cursor-rightward-and-downward`: move cursor one grapheme to the right
- `draw-text-rightward-from-cursor`
- `draw-text-wrapping-right-then-down-from-cursor`
- `draw-text-wrapping-right-then-down-from-cursor-over-full-screen`
- `draw-int32-hex-wrapping-right-then-down-from-cursor`
- `draw-int32-hex-wrapping-right-then-down-from-cursor-over-full-screen`
- `draw-int32-decimal-wrapping-right-then-down-from-cursor`
- `draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen`
- `draw-text-wrapping-down-then-right-from-cursor`
- `draw-text-wrapping-down-then-right-from-cursor-over-full-screen`
Assertions for tests:
- `check-screen-row`: compare a screen from the left margin of a given row
index with a string. The row index counts downward from 0 at the top of the
screen. String can be smaller or larger than a single row, and defines the
region of interest. Strings longer than a row wrap around to the left margin
of the next screen row. Currently assumes text is printed left-to-right on
the screen.
- `check-screen-row-from`: compare a fragment of a screen (left to write, top
to bottom) starting from a given (x, y) coordinate with an expected string.
Currently assumes text is printed left-to-right and top-to-bottom on the
screen.
- `check-screen-row-in-color`: like `check-screen-row` but:
- also compares foreground color
- ignores screen locations where the expected string contains spaces
- `check-screen-row-in-color-from`
- `check-screen-row-in-background-color`
- `check-screen-row-in-background-color-from`
- `check-background-color-in-screen-row`: unlike previous functions, this
doesn't check screen contents, only background color. Ignores background
color where expected string contains spaces, and compares background color
where expected string does not contain spaces. Never compares the character
at any screen location.
- `check-background-color-in-screen-row-from`