mu/shell/parse.mu
Kartik K. Agaram 26e9387df6 snapshot: infix
Like parenthesize, I'm copying tests over from https://github.com/akkartik/wart
Unlike parenthesize, though, I can't just transliterate the code itself.
Wart was operating on an intermediate AST representation. Here I'm all
the way down to cells. That seemed like a good idea when I embarked, but
now I'm not so sure. Operating with the right AST data structure allowed
me to more easily iterate over the elements of a list. The natural recursion
for cells is not a good fit.

This patch and the next couple is an interesting case study in what makes
Unix so effective. Yes, you have to play computer, and yes it gets verbose
and ugly. But just diff and patch go surprisingly far in helping build a
picture of the state space in my brain.

Then again, there's a steep gradient of skills here. There are people who
can visualize state spaces using diff and patch far better than me, and
people who can't do it as well as me. Nature, nurture, having different
priorities, whatever the reason. Giving some people just the right crutch
excludes others.
2021-06-22 21:23:40 -07:00

315 lines
11 KiB
Forth

fn parse-input tokens: (addr stream token), out: (addr handle cell), trace: (addr trace) {
rewind-stream tokens
var empty?/eax: boolean <- stream-empty? tokens
compare empty?, 0/false
{
break-if-=
error trace, "nothing to parse"
return
}
var close-paren?/eax: boolean <- copy 0/false
var dot?/ecx: boolean <- copy 0/false
close-paren?, dot? <- parse-sexpression tokens, out, trace
{
compare close-paren?, 0/false
break-if-=
error trace, "')' is not a valid expression"
return
}
{
compare dot?, 0/false
break-if-=
error trace, "'.' is not a valid expression"
return
}
{
var empty?/eax: boolean <- stream-empty? tokens
compare empty?, 0/false
break-if-!=
error trace, "unexpected tokens at end; only type in a single expression at a time"
}
}
# return values:
# unmatched close-paren encountered?
# dot encountered? (only used internally by recursive calls)
fn parse-sexpression tokens: (addr stream token), _out: (addr handle cell), trace: (addr trace) -> _/eax: boolean, _/ecx: boolean {
trace-text trace, "parse", "parse"
trace-lower trace
var curr-token-storage: token
var curr-token/ecx: (addr token) <- address curr-token-storage
var empty?/eax: boolean <- stream-empty? tokens
compare empty?, 0/false
{
break-if-=
error trace, "end of stream; never found a balancing ')'"
trace-higher trace
return 1/true, 0/false
}
read-from-stream tokens, curr-token
$parse-sexpression:type-check: {
# single quote -> parse as list with a special car
var quote-token?/eax: boolean <- quote-token? curr-token
compare quote-token?, 0/false
{
break-if-=
var out/edi: (addr handle cell) <- copy _out
allocate-pair out
var out-addr/eax: (addr cell) <- lookup *out
var left-ah/edx: (addr handle cell) <- get out-addr, left
new-symbol left-ah, "'"
var right-ah/edx: (addr handle cell) <- get out-addr, right
var close-paren?/eax: boolean <- copy 0/false
var dot?/ecx: boolean <- copy 0/false
close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
trace-higher trace
return close-paren?, dot?
}
# backquote quote -> parse as list with a special car
var backquote-token?/eax: boolean <- backquote-token? curr-token
compare backquote-token?, 0/false
{
break-if-=
var out/edi: (addr handle cell) <- copy _out
allocate-pair out
var out-addr/eax: (addr cell) <- lookup *out
var left-ah/edx: (addr handle cell) <- get out-addr, left
new-symbol left-ah, "`"
var right-ah/edx: (addr handle cell) <- get out-addr, right
var close-paren?/eax: boolean <- copy 0/false
var dot?/ecx: boolean <- copy 0/false
close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
trace-higher trace
return close-paren?, dot?
}
# unquote -> parse as list with a special car
var unquote-token?/eax: boolean <- unquote-token? curr-token
compare unquote-token?, 0/false
{
break-if-=
var out/edi: (addr handle cell) <- copy _out
allocate-pair out
var out-addr/eax: (addr cell) <- lookup *out
var left-ah/edx: (addr handle cell) <- get out-addr, left
new-symbol left-ah, ","
var right-ah/edx: (addr handle cell) <- get out-addr, right
var close-paren?/eax: boolean <- copy 0/false
var dot?/ecx: boolean <- copy 0/false
close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
trace-higher trace
return close-paren?, dot?
}
# unquote-splice -> parse as list with a special car
var unquote-splice-token?/eax: boolean <- unquote-splice-token? curr-token
compare unquote-splice-token?, 0/false
{
break-if-=
var out/edi: (addr handle cell) <- copy _out
allocate-pair out
var out-addr/eax: (addr cell) <- lookup *out
var left-ah/edx: (addr handle cell) <- get out-addr, left
new-symbol left-ah, ",@"
var right-ah/edx: (addr handle cell) <- get out-addr, right
var close-paren?/eax: boolean <- copy 0/false
var dot?/ecx: boolean <- copy 0/false
close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
trace-higher trace
return close-paren?, dot?
}
# dot -> return
var dot?/eax: boolean <- dot-token? curr-token
compare dot?, 0/false
{
break-if-=
trace-higher trace
return 0/false, 1/true
}
# not bracket -> parse atom
var bracket-token?/eax: boolean <- bracket-token? curr-token
compare bracket-token?, 0/false
{
break-if-!=
parse-atom curr-token, _out, trace
break $parse-sexpression:type-check
}
# open paren -> parse list
var open-paren?/eax: boolean <- open-paren-token? curr-token
compare open-paren?, 0/false
{
break-if-=
var curr/esi: (addr handle cell) <- copy _out
allocate-pair curr
var curr-addr/eax: (addr cell) <- lookup *curr
var left/edx: (addr handle cell) <- get curr-addr, left
{
var close-paren?/eax: boolean <- copy 0/false
var dot?/ecx: boolean <- copy 0/false
close-paren?, dot? <- parse-sexpression tokens, left, trace
{
compare dot?, 0/false
break-if-=
error trace, "'.' cannot be at the start of a list"
return 1/true, dot?
}
compare close-paren?, 0/false
break-if-!=
var curr-addr/eax: (addr cell) <- lookup *curr
curr <- get curr-addr, right
var tmp-storage: (handle cell)
var tmp/edx: (addr handle cell) <- address tmp-storage
$parse-sexpression:list-loop: {
var close-paren?/eax: boolean <- copy 0/false
var dot?/ecx: boolean <- copy 0/false
close-paren?, dot? <- parse-sexpression tokens, tmp, trace
# '.' -> clean up right here and return
compare dot?, 0/false
{
break-if-=
parse-dot-tail tokens, curr, trace
return 0/false, 0/false
}
allocate-pair curr
# ')' -> return
compare close-paren?, 0/false
break-if-!=
var curr-addr/eax: (addr cell) <- lookup *curr
var left/ecx: (addr handle cell) <- get curr-addr, left
copy-object tmp, left
#
curr <- get curr-addr, right
loop
}
}
break $parse-sexpression:type-check
}
# close paren -> return
var close-paren?/eax: boolean <- close-paren-token? curr-token
compare close-paren?, 0/false
{
break-if-=
trace-higher trace
return 1/true, 0/false
}
# otherwise abort
var stream-storage: (stream byte 0x400)
var stream/edx: (addr stream byte) <- address stream-storage
write stream, "unexpected token "
var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
rewind-stream curr-token-data
write-stream stream, curr-token-data
error-stream trace, stream
}
trace-higher trace
return 0/false, 0/false
}
fn parse-atom _curr-token: (addr token), _out: (addr handle cell), trace: (addr trace) {
trace-text trace, "parse", "parse atom"
var curr-token/ecx: (addr token) <- copy _curr-token
var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
var _curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
var curr-token-data/esi: (addr stream byte) <- copy _curr-token-data
trace trace, "parse", curr-token-data
# number
var number-token?/eax: boolean <- number-token? curr-token
compare number-token?, 0/false
{
break-if-=
rewind-stream curr-token-data
var _val/eax: int <- parse-decimal-int-from-stream curr-token-data
var val/ecx: int <- copy _val
var val-float/xmm0: float <- convert val
allocate-number _out
var out/eax: (addr handle cell) <- copy _out
var out-addr/eax: (addr cell) <- lookup *out
var dest/edi: (addr float) <- get out-addr, number-data
copy-to *dest, val-float
{
{
var should-trace?/eax: boolean <- should-trace? trace
compare should-trace?, 0/false
}
break-if-=
var stream-storage: (stream byte 0x400)
var stream/ecx: (addr stream byte) <- address stream-storage
write stream, "=> number "
var nested-trace-storage: trace
var nested-trace/edi: (addr trace) <- address nested-trace-storage
initialize-trace nested-trace, 1/only-errors, 0x10/capacity, 0/visible
print-number out-addr, stream, nested-trace
trace trace, "parse", stream
}
return
}
# default: copy either to a symbol or a stream
# stream token -> literal
var stream-token?/eax: boolean <- stream-token? curr-token
compare stream-token?, 0/false
{
break-if-=
allocate-stream _out
}
compare stream-token?, 0/false
{
break-if-!=
allocate-symbol _out
}
# copy token data
var out/eax: (addr handle cell) <- copy _out
var out-addr/eax: (addr cell) <- lookup *out
var curr-token-data-ah/ecx: (addr handle stream byte) <- get curr-token, text-data
var dest-ah/edx: (addr handle stream byte) <- get out-addr, text-data
copy-object curr-token-data-ah, dest-ah
{
{
var should-trace?/eax: boolean <- should-trace? trace
compare should-trace?, 0/false
}
break-if-=
var stream-storage: (stream byte 0x400)
var stream/ecx: (addr stream byte) <- address stream-storage
write stream, "=> symbol "
var nested-trace-storage: trace
var nested-trace/edi: (addr trace) <- address nested-trace-storage
initialize-trace nested-trace, 1/only-errors, 0x10/capacity, 0/visible
print-symbol out-addr, stream, nested-trace
trace trace, "parse", stream
}
}
fn parse-dot-tail tokens: (addr stream token), _out: (addr handle cell), trace: (addr trace) {
var out/edi: (addr handle cell) <- copy _out
var close-paren?/eax: boolean <- copy 0/false
var dot?/ecx: boolean <- copy 0/false
close-paren?, dot? <- parse-sexpression tokens, out, trace
compare close-paren?, 0/false
{
break-if-=
error trace, "'. )' makes no sense"
return
}
compare dot?, 0/false
{
break-if-=
error trace, "'. .' makes no sense"
return
}
#
var dummy: (handle cell)
var dummy-ah/edi: (addr handle cell) <- address dummy
close-paren?, dot? <- parse-sexpression tokens, dummy-ah, trace
compare close-paren?, 0/false
{
break-if-!=
error trace, "cannot have multiple expressions between '.' and ')'"
return
}
compare dot?, 0/false
{
break-if-=
error trace, "cannot have two dots in a single list"
return
}
}