8 changed files with 591 additions and 22 deletions
@ -0,0 +1,69 @@
|
||||
type int-stack { |
||||
data: (handle array int) |
||||
top: int |
||||
} |
||||
|
||||
fn initialize-int-stack _self: (addr int-stack), n: int { |
||||
var self/esi: (addr int-stack) <- copy _self |
||||
var d/edi: (addr handle array int) <- get self, data |
||||
populate d, n |
||||
var top/eax: (addr int) <- get self, top |
||||
copy-to *top, 0 |
||||
} |
||||
|
||||
fn push-int-stack _self: (addr int-stack), _val: int { |
||||
var self/esi: (addr int-stack) <- copy _self |
||||
var top-addr/ecx: (addr int) <- get self, top |
||||
var data-ah/edx: (addr handle array int) <- get self, data |
||||
var data/eax: (addr array int) <- lookup *data-ah |
||||
var top/edx: int <- copy *top-addr |
||||
var dest-addr/edx: (addr int) <- index data, top |
||||
var val/eax: int <- copy _val |
||||
copy-to *dest-addr, val |
||||
add-to *top-addr, 1 |
||||
} |
||||
|
||||
fn pop-int-stack _self: (addr int-stack) -> _/eax: int { |
||||
var self/esi: (addr int-stack) <- copy _self |
||||
var top-addr/ecx: (addr int) <- get self, top |
||||
{ |
||||
compare *top-addr, 0 |
||||
break-if-> |
||||
return 0 |
||||
} |
||||
subtract-from *top-addr, 1 |
||||
var data-ah/edx: (addr handle array int) <- get self, data |
||||
var data/eax: (addr array int) <- lookup *data-ah |
||||
var top/edx: int <- copy *top-addr |
||||
var result-addr/eax: (addr int) <- index data, top |
||||
var val/eax: int <- copy *result-addr |
||||
return val |
||||
} |
||||
|
||||
fn int-stack-empty? _self: (addr int-stack) -> _/eax: boolean { |
||||
var self/esi: (addr int-stack) <- copy _self |
||||
var top-addr/ecx: (addr int) <- get self, top |
||||
compare *top-addr, 0 |
||||
{ |
||||
break-if-= |
||||
return 0/false |
||||
} |
||||
return 1/true |
||||
} |
||||
|
||||
fn int-stack-top _self: (addr int-stack) -> _/eax: int { |
||||
var self/esi: (addr int-stack) <- copy _self |
||||
var top-addr/ecx: (addr int) <- get self, top |
||||
var top/ecx: int <- copy *top-addr |
||||
{ |
||||
compare top, 0 |
||||
break-if-> |
||||
return 0 |
||||
} |
||||
top <- decrement |
||||
var data-ah/edx: (addr handle array int) <- get self, data |
||||
var data/eax: (addr array int) <- lookup *data-ah |
||||
var result-addr/eax: (addr int) <- index data, top |
||||
var val/eax: int <- copy *result-addr |
||||
return val |
||||
} |
@ -1,21 +1,390 @@
|
||||
# TODO: not really implemented yet |
||||
## insert explicit parens based on indentation |
||||
|
||||
# Design goals: |
||||
# keywords in other languages should look different from functions: def, if, while, etc. |
||||
# fully-parenthesized expressions should not be messed with |
||||
# ignore indent when lines start with parens |
||||
# ignore indent inside parens |
||||
# no modes to disable this pass |
||||
# introduce no new operators |
||||
# the language doesn't use nested lists like Scheme's `cond` |
||||
# lines with one word are never wrapped in parens |
||||
# encourage macros to explicitly insert all parens |
||||
# ignore indent inside backquote |
||||
|
||||
fn parenthesize in: (addr stream token), out: (addr stream token), trace: (addr trace) { |
||||
trace-text trace, "parenthesize", "insert parens" |
||||
trace-lower trace |
||||
var buffer-storage: (stream token 0x40) |
||||
var buffer/edi: (addr stream token) <- address buffer-storage |
||||
var curr-line-indent: int |
||||
var num-words-in-line: int |
||||
var paren-at-start-of-line?: boolean |
||||
var explicit-open-parens-storage: int |
||||
var explicit-open-parens/ebx: (addr int) <- address explicit-open-parens-storage |
||||
var implicit-open-parens-storage: int-stack |
||||
var implicit-open-parens/esi: (addr int-stack) <- address implicit-open-parens-storage |
||||
initialize-int-stack implicit-open-parens, 0x10 # potentially a major memory leak |
||||
rewind-stream in |
||||
{ |
||||
var done?/eax: boolean <- stream-empty? in |
||||
compare done?, 0/false |
||||
break-if-!= |
||||
# |
||||
var curr-token-storage: token |
||||
var curr-token/ecx: (addr token) <- address curr-token-storage |
||||
read-from-stream in, curr-token |
||||
#? dump-token-from-cursor curr-token |
||||
# update state |
||||
{ |
||||
var is-indent?/eax: boolean <- indent-token? curr-token |
||||
compare is-indent?, 0/false |
||||
break-if-= |
||||
copy-to num-words-in-line, 0 |
||||
copy-to paren-at-start-of-line?, 0/false |
||||
var tmp/eax: int <- indent-level curr-token |
||||
copy-to curr-line-indent, tmp |
||||
} |
||||
{ |
||||
var is-word?/eax: boolean <- word-token? curr-token |
||||
compare is-word?, 0/false |
||||
break-if-= |
||||
increment num-words-in-line |
||||
} |
||||
{ |
||||
compare num-words-in-line, 0 |
||||
break-if-!= |
||||
var is-open?/eax: boolean <- open-paren-token? curr-token |
||||
compare is-open?, 0/false |
||||
break-if-= |
||||
copy-to paren-at-start-of-line?, 1/true |
||||
} |
||||
# |
||||
$parenthesize:emit: { |
||||
{ |
||||
compare paren-at-start-of-line?, 0/false |
||||
break-if-= |
||||
#? draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen "A", 7/fg 0/bg |
||||
emit-all buffer, curr-token, out, explicit-open-parens |
||||
break $parenthesize:emit |
||||
} |
||||
{ |
||||
var is-indent?/eax: boolean <- indent-token? curr-token |
||||
compare is-indent?, 0/false |
||||
break-if-= |
||||
#? draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen "B", 7/fg 0/bg |
||||
emit-all buffer, curr-token, out, explicit-open-parens |
||||
break $parenthesize:emit |
||||
} |
||||
{ |
||||
compare num-words-in-line, 2 |
||||
break-if->= |
||||
#? draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen "C", 7/fg 0/bg |
||||
write-to-stream buffer, curr-token |
||||
break $parenthesize:emit |
||||
} |
||||
{ |
||||
compare num-words-in-line, 2 |
||||
break-if-!= |
||||
var is-word?/eax: boolean <- word-token? curr-token |
||||
compare is-word?, 0/false |
||||
break-if-= |
||||
compare *explicit-open-parens, 0 |
||||
break-if-!= |
||||
#? draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen "(\n", 7/fg 0/bg |
||||
var paren-storage: token |
||||
var paren-token/eax: (addr token) <- address paren-storage |
||||
initialize-token paren-token, "(" |
||||
write-to-stream out, paren-token |
||||
push-int-stack implicit-open-parens, curr-line-indent |
||||
} |
||||
emit-all buffer, curr-token, out, explicit-open-parens |
||||
} |
||||
{ |
||||
var is-indent?/eax: boolean <- indent-token? curr-token |
||||
compare is-indent?, 0/false |
||||
break-if-= |
||||
{ |
||||
# . loop check |
||||
var done?/eax: boolean <- int-stack-empty? implicit-open-parens |
||||
compare done?, 0/false |
||||
break-if-!= |
||||
var top-indent/eax: int <- int-stack-top implicit-open-parens |
||||
compare top-indent, curr-line-indent |
||||
break-if-< |
||||
# . loop body |
||||
var paren-storage: token |
||||
var paren-token/eax: (addr token) <- address paren-storage |
||||
initialize-token paren-token, ")" |
||||
write-to-stream out, paren-token |
||||
# . update |
||||
var dummy/eax: int <- pop-int-stack implicit-open-parens |
||||
loop |
||||
} |
||||
} |
||||
loop |
||||
} |
||||
emit-all buffer, 0/no-curr-token, out, explicit-open-parens |
||||
{ |
||||
# . loop check |
||||
var done?/eax: boolean <- int-stack-empty? implicit-open-parens |
||||
compare done?, 0/false |
||||
break-if-!= |
||||
# . loop body |
||||
var paren-storage: token |
||||
var paren-token/eax: (addr token) <- address paren-storage |
||||
initialize-token paren-token, ")" |
||||
write-to-stream out, paren-token |
||||
# . update |
||||
var dummy/eax: int <- pop-int-stack implicit-open-parens |
||||
loop |
||||
} |
||||
trace-higher trace |
||||
} |
||||
|
||||
fn indent-level _in: (addr token) -> _/eax: int { |
||||
var in/eax: (addr token) <- copy _in |
||||
var result/eax: (addr int) <- get in, number-data |
||||
return *result |
||||
} |
||||
|
||||
fn word-token? in: (addr token) -> _/eax: boolean { |
||||
{ |
||||
var is-indent?/eax: boolean <- indent-token? in |
||||
compare is-indent?, 0/false |
||||
break-if-!= |
||||
var is-bracket?/eax: boolean <- bracket-token? in # overzealously checks for [], but shouldn't ever encounter it |
||||
compare is-bracket?, 0/false |
||||
break-if-!= |
||||
var is-quote?/eax: boolean <- quote-token? in |
||||
compare is-quote?, 0/false |
||||
break-if-!= |
||||
var is-backquote?/eax: boolean <- backquote-token? in |
||||
compare is-backquote?, 0/false |
||||
break-if-!= |
||||
var is-unquote?/eax: boolean <- unquote-token? in |
||||
compare is-unquote?, 0/false |
||||
break-if-!= |
||||
var is-unquote-splice?/eax: boolean <- unquote-splice-token? in |
||||
compare is-unquote-splice?, 0/false |
||||
break-if-!= |
||||
return 1/true |
||||
} |
||||
return 0/false |
||||
} |
||||
|
||||
fn emit-all first: (addr stream token), second: (addr token), out: (addr stream token), explicit-open-parens: (addr int) { |
||||
rewind-stream first |
||||
{ |
||||
var done?/eax: boolean <- stream-empty? first |
||||
compare done?, 0/false |
||||
break-if-!= |
||||
var curr-token-storage: token |
||||
var curr-token/eax: (addr token) <- address curr-token-storage |
||||
read-from-stream first, curr-token |
||||
emit curr-token, out, explicit-open-parens |
||||
loop |
||||
} |
||||
clear-stream first |
||||
{ |
||||
compare second, 0 |
||||
break-if-= |
||||
emit second, out, explicit-open-parens |
||||
} |
||||
} |
||||
|
||||
fn emit t: (addr token), out: (addr stream token), explicit-open-parens: (addr int) { |
||||
{ |
||||
var is-indent?/eax: boolean <- indent-token? t |
||||
compare is-indent?, 0/false |
||||
break-if-= |
||||
return |
||||
} |
||||
write-to-stream out, t |
||||
var explicit-open-parens/edi: (addr int) <- copy explicit-open-parens |
||||
{ |
||||
var is-open?/eax: boolean <- open-paren-token? t |
||||
compare is-open?, 0/false |
||||
break-if-= |
||||
increment *explicit-open-parens |
||||
} |
||||
{ |
||||
var is-close?/eax: boolean <- close-paren-token? t |
||||
compare is-close?, 0/false |
||||
break-if-= |
||||
decrement *explicit-open-parens |
||||
compare *explicit-open-parens, 0 |
||||
break-if->= |
||||
abort "emit: extra ')'" |
||||
} |
||||
} |
||||
|
||||
fn emit-non-indent-tokens in: (addr stream token), out: (addr stream token) { |
||||
rewind-stream in |
||||
{ |
||||
var done?/eax: boolean <- stream-empty? in |
||||
compare done?, 0/false |
||||
break-if-!= |
||||
var token-storage: token |
||||
var token/edx: (addr token) <- address token-storage |
||||
read-from-stream in, token |
||||
var is-skip?/eax: boolean <- skip-token? token |
||||
compare is-skip?, 0/false |
||||
loop-if-!= |
||||
var is-indent?/eax: boolean <- indent-token? token |
||||
compare is-indent?, 0/false |
||||
loop-if-!= |
||||
write-to-stream out, token # shallow copy |
||||
loop |
||||
} |
||||
trace-higher trace |
||||
} |
||||
|
||||
fn test-parenthesize { |
||||
check-parenthesize "a b c ", "(a b c)", "F - test-parenthesize/1" |
||||
check-parenthesize "a (b)", "(a (b))", "F - test-parenthesize/2" |
||||
check-parenthesize "a (b c)", "(a (b c))", "F - test-parenthesize/3" |
||||
check-parenthesize "a (b c) d", "(a (b c) d)", "F - test-parenthesize/4" |
||||
check-parenthesize "a b c\nd ef", "(a b c) (d ef)", "F - test-parenthesize/5-multiple-lines" |
||||
check-parenthesize "a b c\n d ef", "(a b c (d ef))", "F - test-parenthesize/6-indented" |
||||
check-parenthesize "a b c\n (d ef)", "(a b c (d ef))", "F - test-parenthesize/7-indented" |
||||
check-parenthesize "a b c\n (d ef)\n g", "(a b c (d ef) g)", "F - test-parenthesize/8-indented" |
||||
check-parenthesize "a b c\n d e\n f\ny", "(a b c (d e f)) y", "F - test-parenthesize/9-indented" |
||||
check-parenthesize "#a\na b", "(a b)", "F - test-parenthesize/10-initial-comment" |
||||
check-parenthesize "a b c\n d ef\n\n g #abc", "(a b c (d ef) g)", "F - test-parenthesize/11-comments" |
||||
check-parenthesize "a b c\n '(d ef)\n\n g #abc", "(a b c '(d ef) g)", "F - test-parenthesize/12-quotes-and-comments" |
||||
check-parenthesize " a b c", "(a b c)", "F - test-parenthesize/13-initial-indent" |
||||
check-parenthesize " a b c\n 34", "(a b c) 34", "F - test-parenthesize/14-initial-indent" |
||||
check-parenthesize "def foo\n a b c\n d e\nnewdef", "(def foo (a b c) (d e)) newdef", "F - test-parenthesize/14" |
||||
check-parenthesize " a a\n a\ny", "(a a a) y", "F - test-parenthesize/15-group-before-too-much-outdent" |
||||
check-parenthesize "a `(b c)", "(a `(b c))", "F - test-parenthesize/16-backquote" |
||||
check-parenthesize "'a b c", "('a b c)", "F - test-parenthesize/17-quote" |
||||
check-parenthesize ",a b c", "(,a b c)", "F - test-parenthesize/18-unquote" |
||||
check-parenthesize ",@a b c", "(,@a b c)", "F - test-parenthesize/19-unquote-splice" |
||||
check-parenthesize "a b\n 'c\n ,d\n e", "(a b 'c ,d e)", "F - test-parenthesize/20-quotes-are-not-words" |
||||
check-parenthesize "def foo\n#a b c\n de\nnew", "(def foo (d e)) new", "F - test-parenthesize/21-group-across-comments" |
||||
} |
||||
|
||||
fn test-parenthesize-skips-lines-with-initial-parens { |
||||
check-parenthesize "(a b c)", "(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/1" |
||||
check-parenthesize "(a (b c))", "(a (b c))", "F - test-parenthesize-skips-lines-with-initial-parens/2" |
||||
check-parenthesize "(a () b)", "(a () b)", "F - test-parenthesize-skips-lines-with-initial-parens/3" |
||||
check-parenthesize " (a b c)", "(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/initial-indent" |
||||
check-parenthesize "(a b c\n bc\n def\n gh)", "(a b c bc def gh)", "F - test-parenthesize-skips-lines-with-initial-parens/outdent" |
||||
check-parenthesize "(a b c\n (def gh)\n (i j k)\n lm\n\n\n (no p))", "(a b c (def gh) (i j k) lm (no p))", "F - test-parenthesize-skips-lines-with-initial-parens/fully-parenthesized" |
||||
check-parenthesize ",(a b c)", ",(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/after-unquote" |
||||
check-parenthesize ",@(a b c)", ",@(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/after-unquote-splice" |
||||
check-parenthesize ",,(a b c)", ",,(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/after-nested-unquote" |
||||
check-parenthesize "(def foo\n #a b c\n d e)\nnew", "(def foo d e) new", "F - test-parenthesize-skips-lines-with-initial-parens/across-comment" |
||||
check-parenthesize "`(def foo\n #a b c\n d e)\nnew", "`(def foo d e) new", "F - test-parenthesize-skips-lines-with-initial-parens/across-comment-after-backquote" |
||||
check-parenthesize " (a b c\n d e)", "(a b c d e)", "F - test-parenthesize-skips-lines-with-initial-parens/with-indent" |
||||
check-parenthesize "def foo(a (b)\n c d)\n d e\nnew", "(def foo (a (b) c d) (d e)) new", "F - test-parenthesize-skips-lines-with-initial-parens/inside-arg-lists" |
||||
} |
||||
|
||||
fn test-parenthesize-skips-single-word-lines { |
||||
# lines usually get grouped with later indented lines |
||||
check-parenthesize "a b\n c", "(a b c)", "F - test-parenthesize-skips-single-word-lines/0" |
||||
# but single-word lines don't |
||||
check-parenthesize "a\n c", "a c", "F - test-parenthesize-skips-single-word-lines/1" |
||||
check-parenthesize "a", "a", "F - test-parenthesize-skips-single-word-lines/2" |
||||
check-parenthesize "a \nb\nc", "a b c", "F - test-parenthesize-skips-single-word-lines/3" |
||||
} |
||||
|
||||
fn check-parenthesize actual: (addr array byte), expected: (addr array byte), message: (addr array byte) { |
||||
var trace-storage: trace |
||||
var trace/edx: (addr trace) <- address trace-storage |
||||
initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible |
||||
# |
||||
var actual-buffer-storage: gap-buffer |
||||
var actual-buffer/eax: (addr gap-buffer) <- address actual-buffer-storage |
||||
initialize-gap-buffer-with actual-buffer, actual |
||||
var actual-tokens-storage: (stream token 0x40) |
||||
var actual-tokens/esi: (addr stream token) <- address actual-tokens-storage |
||||
tokenize-and-parenthesize actual-buffer, actual-tokens, trace |
||||
# |
||||
var expected-buffer-storage: gap-buffer |
||||
var expected-buffer/eax: (addr gap-buffer) <- address expected-buffer-storage |
||||
initialize-gap-buffer-with expected-buffer, expected |
||||
var expected-tokens-storage: (stream token 0x40) |
||||
var expected-tokens/edi: (addr stream token) <- address expected-tokens-storage |
||||
tokenize-and-strip-indent expected-buffer, expected-tokens, trace |
||||
# |
||||
rewind-stream actual-tokens |
||||
check-token-streams-data-equal actual-tokens, expected-tokens, message |
||||
} |
||||
|
||||
fn check-token-streams-data-equal actual: (addr stream token), expected: (addr stream token), message: (addr array byte) { |
||||
rewind-stream actual |
||||
rewind-stream expected |
||||
{ |
||||
# loop termination checks |
||||
var actual-done?/eax: boolean <- stream-empty? actual |
||||
{ |
||||
compare actual-done?, 0/false |
||||
break-if-= |
||||
var expected-done?/eax: boolean <- stream-empty? expected |
||||
compare expected-done?, 0/false |
||||
{ |
||||
break-if-!= |
||||
# actual empty, but expected not empty |
||||
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, message, 3/fg=cyan 0/bg |
||||
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ": too short\n", 3/fg=cyan 0/bg |
||||
count-test-failure |
||||
return |
||||
} |
||||
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ".", 3/fg/cyan, 0/bg |
||||
return |
||||
} |
||||
var expected-done?/eax: boolean <- stream-empty? expected |
||||
compare expected-done?, 0/false |
||||
{ |
||||
break-if-= |
||||
# actual not empty, but expected empty |
||||
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, message, 3/fg=cyan 0/bg |
||||
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ": too long\n", 3/fg=cyan 0/bg |
||||
count-test-failure |
||||
return |
||||
} |
||||
# loop body |
||||
var curr-token-storage: token |
||||
var curr-token/ecx: (addr token) <- address curr-token-storage |
||||
read-from-stream actual, curr-token |
||||
var expected-token-storage: token |
||||
var expected-token/edx: (addr token) <- address expected-token-storage |
||||
read-from-stream expected, expected-token |
||||
var match?/eax: boolean <- tokens-equal? curr-token, expected-token |
||||
compare match?, 0/false |
||||
{ |
||||
break-if-!= |
||||
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, message, 3/fg=cyan 0/bg |
||||
count-test-failure |
||||
return |
||||
} |
||||
loop |
||||
} |
||||
} |
||||
|
||||
fn tokenize-and-parenthesize in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) { |
||||
var tokens-storage: (stream token 0x400) |
||||
var tokens/edx: (addr stream token) <- address tokens-storage |
||||
tokenize in, tokens, trace |
||||
var error?/eax: boolean <- has-errors? trace |
||||
compare error?, 0/false |
||||
{ |
||||
break-if-= |
||||
return |
||||
} |
||||
parenthesize tokens, out, trace |
||||
} |
||||
|
||||
fn tokenize-and-strip-indent in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) { |
||||
var tokens-storage: (stream token 0x400) |
||||
var tokens/edx: (addr stream token) <- address tokens-storage |
||||
tokenize in, tokens, trace |
||||
var error?/eax: boolean <- has-errors? trace |
||||
compare error?, 0/false |
||||
{ |
||||
break-if-= |
||||
return |
||||
} |
||||
emit-non-indent-tokens tokens, out |
||||
} |
||||
|
Loading…
Reference in new issue