start guessing parentheses based on indentation

This commit is contained in:
Kartik K. Agaram 2021-06-20 21:18:38 -07:00
parent 9d7d99fe6c
commit 6e1aa99a00
3 changed files with 66 additions and 77 deletions

View File

@ -15,22 +15,22 @@ Entry:
#
#? (main 0 0 Primary-bus-secondary-drive)
#? (set-cursor-position 0 0x40 0x20)
(test-parenthesize)
(test-parenthesize-skips-lines-with-initial-parens)
(test-parenthesize-skips-single-word-lines)
#? # always first run tests
#? (run-tests)
#? (num-test-failures) # => eax
#? # call main if tests all passed
#? {
#? 3d/compare-eax-and 0/imm32
#? 75/jump-if-!= break/disp8
#? c7 0/subop/copy *Running-tests? 0/imm32/false
#? (clear-real-screen)
#? c7 0/subop/copy *Real-screen-cursor-x 0/imm32
#? c7 0/subop/copy *Real-screen-cursor-y 0/imm32
#? (main 0 0 Primary-bus-secondary-drive)
#? }
#? (test-parenthesize)
#? (test-parenthesize-skips-lines-with-initial-parens)
#? (test-parenthesize-skips-single-word-lines)
# always first run tests
(run-tests)
(num-test-failures) # => eax
# call main if tests all passed
{
3d/compare-eax-and 0/imm32
75/jump-if-!= break/disp8
c7 0/subop/copy *Running-tests? 0/imm32/false
(clear-real-screen)
c7 0/subop/copy *Real-screen-cursor-x 0/imm32
c7 0/subop/copy *Real-screen-cursor-y 0/imm32
(main 0 0 Primary-bus-secondary-drive)
}
# hang indefinitely
{

View File

@ -219,7 +219,8 @@ fn emit t: (addr token), out: (addr stream token), explicit-open-parens: (addr i
}
}
fn emit-non-indent-tokens in: (addr stream token), out: (addr stream token) {
# helper for checking parenthesize
fn emit-salient-tokens in: (addr stream token), out: (addr stream token) {
rewind-stream in
{
var done?/eax: boolean <- stream-empty? in
@ -228,12 +229,15 @@ fn emit-non-indent-tokens in: (addr stream token), out: (addr stream token) {
var token-storage: token
var token/edx: (addr token) <- address token-storage
read-from-stream in, token
# skip tokens should be skipped
var is-skip?/eax: boolean <- skip-token? token
compare is-skip?, 0/false
loop-if-!=
# indent tokens should be skipped
var is-indent?/eax: boolean <- indent-token? token
compare is-indent?, 0/false
loop-if-!=
#
write-to-stream out, token # shallow copy
loop
}
@ -250,7 +254,18 @@ fn test-parenthesize {
check-parenthesize "a b c\n (d ef)\n g", "(a b c (d ef) g)", "F - test-parenthesize/8-indented"
check-parenthesize "a b c\n d e\n f\ny", "(a b c (d e f)) y", "F - test-parenthesize/9-indented"
check-parenthesize "#a\na b", "(a b)", "F - test-parenthesize/10-initial-comment"
#? a b c
#? d ef
#?
#? g
#? check-parenthesize "a b c\n d ef\n\n g", "(a b c (d ef) g)", "F - test-parenthesize/11-comments"
#? check-parenthesize "a b c\n d ef\n\n g #abc", "(a b c (d ef)) g", "F - test-parenthesize/11-comments"
check-parenthesize "a b c\n d ef\n\n g #abc", "(a b c (d ef) g)", "F - test-parenthesize/11-comments"
#? a b c
#? '(d ef)
#?
#? g #abc
#? check-parenthesize "a b c\n '(d ef)\n g #abc", "(a b c '(d ef) g)", "F - test-parenthesize/12-quotes-and-comments"
check-parenthesize "a b c\n '(d ef)\n\n g #abc", "(a b c '(d ef) g)", "F - test-parenthesize/12-quotes-and-comments"
check-parenthesize " a b c", "(a b c)", "F - test-parenthesize/13-initial-indent"
check-parenthesize " a b c\n 34", "(a b c) 34", "F - test-parenthesize/14-initial-indent"
@ -261,7 +276,7 @@ fn test-parenthesize {
check-parenthesize ",a b c", "(,a b c)", "F - test-parenthesize/18-unquote"
check-parenthesize ",@a b c", "(,@a b c)", "F - test-parenthesize/19-unquote-splice"
check-parenthesize "a b\n 'c\n ,d\n e", "(a b 'c ,d e)", "F - test-parenthesize/20-quotes-are-not-words"
check-parenthesize "def foo\n#a b c\n de\nnew", "(def foo (d e)) new", "F - test-parenthesize/21-group-across-comments"
check-parenthesize "def foo\n#a b c\n d e\nnew", "(def foo (d e)) new", "F - test-parenthesize/21-group-across-comments"
}
fn test-parenthesize-skips-lines-with-initial-parens {
@ -306,7 +321,7 @@ fn check-parenthesize actual: (addr array byte), expected: (addr array byte), me
initialize-gap-buffer-with expected-buffer, expected
var expected-tokens-storage: (stream token 0x40)
var expected-tokens/edi: (addr stream token) <- address expected-tokens-storage
tokenize-and-strip-indent expected-buffer, expected-tokens, trace
tokenize-salient expected-buffer, expected-tokens, trace
#
rewind-stream actual-tokens
check-token-streams-data-equal actual-tokens, expected-tokens, message
@ -348,9 +363,11 @@ fn check-token-streams-data-equal actual: (addr stream token), expected: (addr s
var curr-token-storage: token
var curr-token/ecx: (addr token) <- address curr-token-storage
read-from-stream actual, curr-token
#? dump-token-from-cursor curr-token
var expected-token-storage: token
var expected-token/edx: (addr token) <- address expected-token-storage
read-from-stream expected, expected-token
#? dump-token-from-cursor expected-token
var match?/eax: boolean <- tokens-equal? curr-token, expected-token
compare match?, 0/false
{
@ -376,7 +393,7 @@ fn tokenize-and-parenthesize in: (addr gap-buffer), out: (addr stream token), tr
parenthesize tokens, out, trace
}
fn tokenize-and-strip-indent in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) {
fn tokenize-salient in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) {
var tokens-storage: (stream token 0x400)
var tokens/edx: (addr stream token) <- address tokens-storage
tokenize in, tokens, trace
@ -386,5 +403,5 @@ fn tokenize-and-strip-indent in: (addr gap-buffer), out: (addr stream token), tr
break-if-=
return
}
emit-non-indent-tokens tokens, out
emit-salient-tokens tokens, out
}

View File

@ -29,9 +29,6 @@ fn tokenize in: (addr gap-buffer), out: (addr stream token), trace: (addr trace)
break-if-=
return
}
var comment?/eax: boolean <- comment-token? token
compare comment?, 0/false
loop-if-!=
var skip?/eax: boolean <- skip-token? token
compare skip?, 0/false
loop-if-!=
@ -389,23 +386,35 @@ fn test-tokenize-indent {
fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, trace: (addr trace) -> _/edi: boolean {
trace-text trace, "tokenize", "next-token"
trace-lower trace
# first save an indent token
{
compare start-of-line?, 0/false
break-if-=
next-indent-token in, out, trace
trace-higher trace
return 0/not-at-start-of-line
next-indent-token in, out, trace # might not be returned
}
skip-spaces-from-gap-buffer in
var g/eax: grapheme <- peek-from-gap-buffer in
{
compare g, 0x23/comment
break-if-!=
skip-rest-of-line in
}
var g/eax: grapheme <- peek-from-gap-buffer in
{
var g/eax: grapheme <- peek-from-gap-buffer in
compare g, 0xa/newline
break-if-!=
trace-text trace, "tokenize", "newline"
g <- read-from-gap-buffer in
initialize-skip-token out
initialize-skip-token out # might drop indent if that's all there was in this line
return 1/at-start-of-line
}
{
compare start-of-line?, 0/false
break-if-=
# still here? no comment or newline?
trace-higher trace
return 0/not-at-start-of-line
}
{
var done?/eax: boolean <- gap-buffer-scan-done? in
compare done?, 0/false
@ -436,14 +445,6 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
next-stream-token in, out, trace
break $next-token:case
}
# comment
{
compare g, 0x23/comment
break-if-!=
rest-of-line in, out, trace
copy-to start-of-line?, 1/true
break $next-token:case
}
# special-case: '-'
{
compare g, 0x2d/minus
@ -530,6 +531,11 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
initialize-token out, ","
break $next-token:case
}
set-cursor-position 0/screen, 0x40 0x20
{
var foo/eax: int <- copy g
draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, foo, 7/fg 0/bg
}
abort "unknown token type"
}
trace-higher trace
@ -765,37 +771,17 @@ fn next-bracket-token g: grapheme, _out: (addr token), trace: (addr trace) {
}
}
fn rest-of-line in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
trace-text trace, "tokenize", "comment"
var out/eax: (addr token) <- copy _out
var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
populate-stream out-data-ah, 0x40
var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
var out-data/edi: (addr stream byte) <- copy _out-data
fn skip-rest-of-line in: (addr gap-buffer) {
{
var empty?/eax: boolean <- gap-buffer-scan-done? in
compare empty?, 0/false
{
break-if-=
return
}
var g/eax: grapheme <- read-from-gap-buffer in
var done?/eax: boolean <- gap-buffer-scan-done? in
compare done?, 0/false
break-if-!=
var g/eax: grapheme <- peek-from-gap-buffer in
compare g, 0xa/newline
break-if-=
write-grapheme out-data, g
g <- read-from-gap-buffer in # consume
loop
}
{
var should-trace?/eax: boolean <- should-trace? trace
compare should-trace?, 0/false
break-if-=
var stream-storage: (stream byte 0x80)
var stream/esi: (addr stream byte) <- address stream-storage
write stream, "=> "
rewind-stream out-data
write-stream stream, out-data
trace trace, "tokenize", stream
}
}
fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
@ -1296,20 +1282,6 @@ fn stream-token? _self: (addr token) -> _/eax: boolean {
return 1/true
}
fn comment-token? _self: (addr token) -> _/eax: boolean {
var self/eax: (addr token) <- copy _self
var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
var in-data/eax: (addr stream byte) <- lookup *in-data-ah
rewind-stream in-data
var g/eax: grapheme <- read-grapheme in-data
compare g, 0x23/hash
{
break-if-=
return 0/false
}
return 1/true
}
fn skip-token? _self: (addr token) -> _/eax: boolean {
var self/eax: (addr token) <- copy _self
var in-type/eax: (addr int) <- get self, type