2019-09-20 18:09:07 +00:00
|
|
|
# Toy lisp interpreter. Incomplete.
|
2019-09-07 08:27:19 +00:00
|
|
|
#
|
|
|
|
# To run:
|
2019-10-30 00:51:47 +00:00
|
|
|
# $ ./ntranslate init.linux 0*.subx apps/mulisp.subx
|
2019-09-07 08:27:19 +00:00
|
|
|
# $ ./a.elf
|
|
|
|
# 42
|
|
|
|
# => 42
|
|
|
|
# ^D
|
|
|
|
# $
|
|
|
|
|
|
|
|
== code
|
|
|
|
|
|
|
|
Entry: # run tests if necessary, a REPL if not
|
2019-10-16 02:35:19 +00:00
|
|
|
# . prologue
|
2019-09-07 08:27:19 +00:00
|
|
|
89/<- %ebp 4/r32/esp
|
|
|
|
# initialize heap
|
2019-11-30 19:14:49 +00:00
|
|
|
(new-segment *Heap-size Heap)
|
2019-09-22 16:58:47 +00:00
|
|
|
{
|
|
|
|
# if (argc <= 1) break
|
|
|
|
81 7/subop/compare *ebp 1/imm32
|
|
|
|
7e/jump-if-lesser-or-equal break/disp8
|
|
|
|
# if (argv[1] != "test")) break
|
|
|
|
(kernel-string-equal? *(ebp+8) "test") # => eax
|
|
|
|
3d/compare-eax-and 0/imm32
|
|
|
|
74/jump-if-equal break/disp8
|
|
|
|
#
|
|
|
|
(run-tests)
|
|
|
|
# syscall(exit, *Num-test-failures)
|
|
|
|
8b/-> *Num-test-failures 3/r32/ebx
|
|
|
|
eb/jump $main:end/disp8
|
|
|
|
}
|
2019-09-07 08:27:19 +00:00
|
|
|
(repl Stdin Stdout)
|
|
|
|
# syscall(exit, 0)
|
|
|
|
bb/copy-to-ebx 0/imm32
|
|
|
|
$main:end:
|
|
|
|
b8/copy-to-eax 1/imm32/exit
|
|
|
|
cd/syscall 0x80/imm8
|
|
|
|
|
2019-09-08 07:33:57 +00:00
|
|
|
# Data structures
|
|
|
|
#
|
|
|
|
# Lisp is dynamically typed. Values always carry around knowledge of their
|
|
|
|
# type.
|
|
|
|
#
|
|
|
|
# There's several types of types in the description below, so we need a
|
|
|
|
# glossary and notational convention to disambiguate:
|
|
|
|
# lisp type: what Lisp code can see. Looks how you type it at the prompt.
|
|
|
|
# nil num char string symbol pair array
|
2019-09-12 00:49:34 +00:00
|
|
|
# type tag: the numeric code for a lisp type. All caps.
|
2019-09-08 07:33:57 +00:00
|
|
|
# NIL NUM CHAR STRING SYMBOL PAIR ARRAY
|
|
|
|
# memory type: a type specifying memory layout at the SubX level. Starts
|
|
|
|
# with a '$'.
|
2020-01-03 09:36:34 +00:00
|
|
|
# $int $array $(addr _)
|
2019-09-08 07:33:57 +00:00
|
|
|
#
|
|
|
|
# Lisp values are represented in memory by the _cell_ data structure. A cell
|
|
|
|
# is 12 bytes long:
|
|
|
|
# tag: $int (4 bytes; we're not concerned about wasting space)
|
|
|
|
# data: 8 bytes whose contents and meaning depend on tag
|
|
|
|
#
|
|
|
|
# What values of the different Lisp types look like in memory:
|
|
|
|
# - nil: cell{ tag: 0/NIL, data: 0 0 }
|
|
|
|
# - num: cell{ tag: 1/NUM, data: $int 0 }
|
|
|
|
# data contains the number
|
|
|
|
# - char: cell{ tag: 2/CHAR, data: $int 0 }
|
|
|
|
# data contains the utf-8 code of the character (no compound glyphs, no
|
|
|
|
# modifiers, etc., etc.)
|
2020-01-03 09:36:34 +00:00
|
|
|
# - string: cell{ tag: 3/STRING, data: $(addr stream byte)
|
|
|
|
# data contains an (addr array byte) containing the string in utf-8
|
|
|
|
# - symbol: cell{ tag: 4/SYMBOL, data: $(addr array byte) 0 }
|
|
|
|
# data contains an (addr array byte) containing the name of the symbol in utf-8
|
2019-09-08 07:33:57 +00:00
|
|
|
# alternatively, data could contain an index into the table of interned symbols
|
2020-01-03 09:36:34 +00:00
|
|
|
# - pair: cell{ tag: 5/PAIR, data: $(addr cell) $(addr cell) }
|
2019-09-08 07:33:57 +00:00
|
|
|
# data contains pointers to car and cdr
|
2020-01-03 09:36:34 +00:00
|
|
|
# - array: cell{ tag: 6/ARRAY, data: $tag $(addr stream data)
|
2019-09-08 07:33:57 +00:00
|
|
|
# data contains a pointer to an array of 8-byte data fields and the common
|
|
|
|
# tag for them all
|
2019-09-07 08:34:22 +00:00
|
|
|
|
2020-01-03 09:36:34 +00:00
|
|
|
repl: # in : (addr buffered-file), out : (addr buffered-file)
|
2019-10-16 02:35:19 +00:00
|
|
|
# . prologue
|
2019-09-07 08:27:19 +00:00
|
|
|
55/push-ebp
|
|
|
|
89/<- %ebp 4/r32/esp
|
|
|
|
# . save registers
|
2019-09-07 08:34:22 +00:00
|
|
|
50/push-eax
|
2019-09-22 16:58:47 +00:00
|
|
|
{
|
2019-12-08 21:56:46 +00:00
|
|
|
(lisp-read Stdin) # => eax : (handle cell)
|
2019-09-22 16:58:47 +00:00
|
|
|
# if (eax == 0) break
|
|
|
|
3d/compare-eax-and 0/imm32
|
|
|
|
74/jump-if-equal break/disp8
|
|
|
|
#
|
2019-12-08 21:56:46 +00:00
|
|
|
(lisp-eval %eax) # => eax : (handle cell)
|
2019-09-22 16:58:47 +00:00
|
|
|
(lisp-print Stdout %eax)
|
|
|
|
eb/jump loop/disp8
|
|
|
|
}
|
2019-09-07 08:34:22 +00:00
|
|
|
$repl:end:
|
|
|
|
# . restore registers
|
|
|
|
58/pop-to-eax
|
2019-10-16 02:35:19 +00:00
|
|
|
# . epilogue
|
2019-09-07 08:34:22 +00:00
|
|
|
89/<- %esp 5/r32/ebp
|
|
|
|
5d/pop-to-ebp
|
|
|
|
c3/return
|
|
|
|
|
2019-09-08 07:33:57 +00:00
|
|
|
# numbers start with a digit and are always in hex
|
|
|
|
# characters start with a backslash
|
|
|
|
# pairs start with '('
|
|
|
|
# arrays start with '['
|
|
|
|
# symbols start with anything else but quote, backquote, unquote or splice
|
2019-09-18 18:42:08 +00:00
|
|
|
# only one s-expression per line
|
2020-01-03 09:36:34 +00:00
|
|
|
lisp-read: # in : (addr buffered-file) -> eax : (handle cell)
|
2019-10-16 02:35:19 +00:00
|
|
|
# . prologue
|
2019-09-07 08:34:22 +00:00
|
|
|
55/push-ebp
|
|
|
|
89/<- %ebp 4/r32/esp
|
|
|
|
# . save registers
|
2019-09-18 18:42:08 +00:00
|
|
|
51/push-ecx
|
2019-12-08 21:56:46 +00:00
|
|
|
# var s/ecx : (ref stream byte 512)
|
2019-09-07 08:27:19 +00:00
|
|
|
81 5/subop/subtract %esp 0x200/imm32
|
|
|
|
68/push 0x200/imm32/size
|
|
|
|
68/push 0/imm32/read
|
|
|
|
68/push 0/imm32/write
|
|
|
|
89/<- %ecx 4/r32/esp
|
2019-09-22 16:58:47 +00:00
|
|
|
{
|
|
|
|
# read line into s
|
|
|
|
(clear-stream %ecx)
|
|
|
|
(read-line-buffered *(ebp+8) %ecx)
|
|
|
|
# if (s->write == 0) return null
|
|
|
|
{
|
|
|
|
81 7/subop/compare *ecx 0/imm32
|
|
|
|
75/jump-if-not-equal break/disp8
|
|
|
|
b8/copy-to-eax 0/imm32/eof
|
|
|
|
eb/jump $lisp-read:end/disp8
|
|
|
|
}
|
|
|
|
# ...
|
2019-09-22 19:45:51 +00:00
|
|
|
#? eb/jump loop/disp8
|
2019-09-22 16:58:47 +00:00
|
|
|
}
|
2019-09-07 08:34:22 +00:00
|
|
|
# return s
|
|
|
|
89/<- %eax 1/r32/ecx
|
|
|
|
$lisp-read:end:
|
2019-09-07 08:27:19 +00:00
|
|
|
# . reclaim locals
|
|
|
|
81 0/subop/add %esp 0x20c/imm32
|
|
|
|
# . restore registers
|
2019-09-18 18:42:08 +00:00
|
|
|
59/pop-to-ecx
|
2019-10-16 02:35:19 +00:00
|
|
|
# . epilogue
|
2019-09-18 18:42:08 +00:00
|
|
|
89/<- %esp 5/r32/ebp
|
|
|
|
5d/pop-to-ebp
|
|
|
|
c3/return
|
|
|
|
|
2020-01-03 09:36:34 +00:00
|
|
|
# lisp-read: in : (addr buffered-file) -> (handle cell)
|
2019-09-22 16:58:47 +00:00
|
|
|
# token tmp = next-mulisp-token(in)
|
2019-09-18 18:42:08 +00:00
|
|
|
# if is-int(tmp) return cell(tmp)
|
|
|
|
# if is-string(tmp) return cell(tmp)
|
|
|
|
# if is-pair(tmp) ...
|
|
|
|
# if is-array(tmp) ...
|
|
|
|
|
2020-01-03 09:36:34 +00:00
|
|
|
next-mulisp-token: # in : (addr buffered-file), line : (addr stream byte), result : (addr slice)
|
2019-09-18 18:42:08 +00:00
|
|
|
# pseudocode:
|
|
|
|
# if (line->read >= line->write)
|
|
|
|
# read-line-buffered(in, line)
|
|
|
|
# recurse
|
|
|
|
# if (line->data[line->read] == ' ')
|
|
|
|
# skip-chars-matching-whitespace(line)
|
|
|
|
# recurse
|
|
|
|
# if (line->data[line->read] == '#')
|
|
|
|
# read-line-buffered(in, line)
|
|
|
|
# recurse
|
|
|
|
# eax = line->data[line->read]
|
|
|
|
# if (eax == '"')
|
|
|
|
# result->start = &line->data[line->read]
|
|
|
|
# skip-string(in)
|
|
|
|
# result->end = &line->data[line->read]
|
|
|
|
# return
|
|
|
|
# if (is-digit(eax))
|
|
|
|
# result->start = &line->data[line->read]
|
|
|
|
# skip-hex-int(in)
|
|
|
|
# result->end = &line->data[line->read]
|
|
|
|
# return
|
|
|
|
# if (eax in '(' ')' '[' ']')
|
|
|
|
# result->start = &line->data[line->read]
|
|
|
|
# ++line->read
|
|
|
|
# result->en = &line->data[line->read]
|
|
|
|
# return
|
|
|
|
# else
|
|
|
|
# result->start = &line->data[line->read]
|
|
|
|
# skip-lisp-word(line)
|
|
|
|
# result->en = &line->data[line->read]
|
|
|
|
# return
|
|
|
|
#
|
2019-10-16 02:35:19 +00:00
|
|
|
# . prologue
|
2019-09-18 18:42:08 +00:00
|
|
|
55/push-ebp
|
|
|
|
89/<- %ebp 4/r32/esp
|
|
|
|
# . save registers
|
2019-09-22 16:58:47 +00:00
|
|
|
$next-mulisp-token:end:
|
2019-09-18 18:42:08 +00:00
|
|
|
# . reclaim locals
|
|
|
|
# . restore registers
|
2019-10-16 02:35:19 +00:00
|
|
|
# . epilogue
|
2019-09-07 08:27:19 +00:00
|
|
|
89/<- %esp 5/r32/ebp
|
|
|
|
5d/pop-to-ebp
|
|
|
|
c3/return
|
2019-09-07 08:34:22 +00:00
|
|
|
|
2020-01-03 09:36:34 +00:00
|
|
|
new-int-cell: # in : (addr slice) -> eax : (handle cell)
|
2019-09-18 18:42:08 +00:00
|
|
|
|
2020-01-03 09:36:34 +00:00
|
|
|
new-string-cell: # in : (addr slice) -> eax : (handle cell)
|
2019-09-18 18:42:08 +00:00
|
|
|
|
2020-01-03 09:36:34 +00:00
|
|
|
lisp-eval: # in : (addr cell) -> eax : (handle cell)
|
2019-10-16 02:35:19 +00:00
|
|
|
# . prologue
|
2019-09-07 08:34:22 +00:00
|
|
|
55/push-ebp
|
|
|
|
89/<- %ebp 4/r32/esp
|
|
|
|
# . save registers
|
|
|
|
8b/-> *(ebp+8) 0/r32/eax
|
|
|
|
$lisp-eval:end:
|
|
|
|
# . restore registers
|
2019-10-16 02:35:19 +00:00
|
|
|
# . epilogue
|
2019-09-07 08:34:22 +00:00
|
|
|
89/<- %esp 5/r32/ebp
|
|
|
|
5d/pop-to-ebp
|
|
|
|
c3/return
|
|
|
|
|
2020-01-03 09:36:34 +00:00
|
|
|
lisp-print: # out : (addr buffered-file), x : (addr cell)
|
2019-10-16 02:35:19 +00:00
|
|
|
# . prologue
|
2019-09-07 08:34:22 +00:00
|
|
|
55/push-ebp
|
|
|
|
89/<- %ebp 4/r32/esp
|
|
|
|
# . save registers
|
|
|
|
# write(x)
|
|
|
|
(write-buffered Stdout "=> ")
|
|
|
|
(write-stream-data Stdout *(ebp+0xc))
|
|
|
|
(flush Stdout)
|
|
|
|
$lisp-print:end:
|
|
|
|
# . restore registers
|
2019-10-16 02:35:19 +00:00
|
|
|
# . epilogue
|
2019-09-07 08:34:22 +00:00
|
|
|
89/<- %esp 5/r32/ebp
|
|
|
|
5d/pop-to-ebp
|
|
|
|
c3/return
|
|
|
|
|
|
|
|
== data
|
|
|
|
|
|
|
|
Nil:
|
|
|
|
0/imm32/tag
|
|
|
|
0/imm32/data
|