From 6e1aa99a0033d7ff2ead3a56400dd6e47a80e4f5 Mon Sep 17 00:00:00 2001 From: "Kartik K. Agaram" Date: Sun, 20 Jun 2021 21:18:38 -0700 Subject: [PATCH] start guessing parentheses based on indentation --- mu-init.subx | 32 ++++++++--------- shell/parenthesize.mu | 27 +++++++++++--- shell/tokenize.mu | 84 +++++++++++++++---------------------------- 3 files changed, 66 insertions(+), 77 deletions(-) diff --git a/mu-init.subx b/mu-init.subx index 25c1149e..438e29e7 100644 --- a/mu-init.subx +++ b/mu-init.subx @@ -15,22 +15,22 @@ Entry: # #? (main 0 0 Primary-bus-secondary-drive) #? (set-cursor-position 0 0x40 0x20) - (test-parenthesize) - (test-parenthesize-skips-lines-with-initial-parens) - (test-parenthesize-skips-single-word-lines) -#? # always first run tests -#? (run-tests) -#? (num-test-failures) # => eax -#? # call main if tests all passed -#? { -#? 3d/compare-eax-and 0/imm32 -#? 75/jump-if-!= break/disp8 -#? c7 0/subop/copy *Running-tests? 0/imm32/false -#? (clear-real-screen) -#? c7 0/subop/copy *Real-screen-cursor-x 0/imm32 -#? c7 0/subop/copy *Real-screen-cursor-y 0/imm32 -#? (main 0 0 Primary-bus-secondary-drive) -#? } +#? (test-parenthesize) +#? (test-parenthesize-skips-lines-with-initial-parens) +#? (test-parenthesize-skips-single-word-lines) + # always first run tests + (run-tests) + (num-test-failures) # => eax + # call main if tests all passed + { + 3d/compare-eax-and 0/imm32 + 75/jump-if-!= break/disp8 + c7 0/subop/copy *Running-tests? 0/imm32/false + (clear-real-screen) + c7 0/subop/copy *Real-screen-cursor-x 0/imm32 + c7 0/subop/copy *Real-screen-cursor-y 0/imm32 + (main 0 0 Primary-bus-secondary-drive) + } # hang indefinitely { diff --git a/shell/parenthesize.mu b/shell/parenthesize.mu index f99f8c7f..91bc4334 100644 --- a/shell/parenthesize.mu +++ b/shell/parenthesize.mu @@ -219,7 +219,8 @@ fn emit t: (addr token), out: (addr stream token), explicit-open-parens: (addr i } } -fn emit-non-indent-tokens in: (addr stream token), out: (addr stream token) { +# helper for checking parenthesize +fn emit-salient-tokens in: (addr stream token), out: (addr stream token) { rewind-stream in { var done?/eax: boolean <- stream-empty? in @@ -228,12 +229,15 @@ fn emit-non-indent-tokens in: (addr stream token), out: (addr stream token) { var token-storage: token var token/edx: (addr token) <- address token-storage read-from-stream in, token + # skip tokens should be skipped var is-skip?/eax: boolean <- skip-token? token compare is-skip?, 0/false loop-if-!= + # indent tokens should be skipped var is-indent?/eax: boolean <- indent-token? token compare is-indent?, 0/false loop-if-!= + # write-to-stream out, token # shallow copy loop } @@ -250,7 +254,18 @@ fn test-parenthesize { check-parenthesize "a b c\n (d ef)\n g", "(a b c (d ef) g)", "F - test-parenthesize/8-indented" check-parenthesize "a b c\n d e\n f\ny", "(a b c (d e f)) y", "F - test-parenthesize/9-indented" check-parenthesize "#a\na b", "(a b)", "F - test-parenthesize/10-initial-comment" +#? a b c +#? d ef +#? +#? g +#? check-parenthesize "a b c\n d ef\n\n g", "(a b c (d ef) g)", "F - test-parenthesize/11-comments" +#? check-parenthesize "a b c\n d ef\n\n g #abc", "(a b c (d ef)) g", "F - test-parenthesize/11-comments" check-parenthesize "a b c\n d ef\n\n g #abc", "(a b c (d ef) g)", "F - test-parenthesize/11-comments" +#? a b c +#? '(d ef) +#? +#? g #abc +#? check-parenthesize "a b c\n '(d ef)\n g #abc", "(a b c '(d ef) g)", "F - test-parenthesize/12-quotes-and-comments" check-parenthesize "a b c\n '(d ef)\n\n g #abc", "(a b c '(d ef) g)", "F - test-parenthesize/12-quotes-and-comments" check-parenthesize " a b c", "(a b c)", "F - test-parenthesize/13-initial-indent" check-parenthesize " a b c\n 34", "(a b c) 34", "F - test-parenthesize/14-initial-indent" @@ -261,7 +276,7 @@ fn test-parenthesize { check-parenthesize ",a b c", "(,a b c)", "F - test-parenthesize/18-unquote" check-parenthesize ",@a b c", "(,@a b c)", "F - test-parenthesize/19-unquote-splice" check-parenthesize "a b\n 'c\n ,d\n e", "(a b 'c ,d e)", "F - test-parenthesize/20-quotes-are-not-words" - check-parenthesize "def foo\n#a b c\n de\nnew", "(def foo (d e)) new", "F - test-parenthesize/21-group-across-comments" + check-parenthesize "def foo\n#a b c\n d e\nnew", "(def foo (d e)) new", "F - test-parenthesize/21-group-across-comments" } fn test-parenthesize-skips-lines-with-initial-parens { @@ -306,7 +321,7 @@ fn check-parenthesize actual: (addr array byte), expected: (addr array byte), me initialize-gap-buffer-with expected-buffer, expected var expected-tokens-storage: (stream token 0x40) var expected-tokens/edi: (addr stream token) <- address expected-tokens-storage - tokenize-and-strip-indent expected-buffer, expected-tokens, trace + tokenize-salient expected-buffer, expected-tokens, trace # rewind-stream actual-tokens check-token-streams-data-equal actual-tokens, expected-tokens, message @@ -348,9 +363,11 @@ fn check-token-streams-data-equal actual: (addr stream token), expected: (addr s var curr-token-storage: token var curr-token/ecx: (addr token) <- address curr-token-storage read-from-stream actual, curr-token +#? dump-token-from-cursor curr-token var expected-token-storage: token var expected-token/edx: (addr token) <- address expected-token-storage read-from-stream expected, expected-token +#? dump-token-from-cursor expected-token var match?/eax: boolean <- tokens-equal? curr-token, expected-token compare match?, 0/false { @@ -376,7 +393,7 @@ fn tokenize-and-parenthesize in: (addr gap-buffer), out: (addr stream token), tr parenthesize tokens, out, trace } -fn tokenize-and-strip-indent in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) { +fn tokenize-salient in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) { var tokens-storage: (stream token 0x400) var tokens/edx: (addr stream token) <- address tokens-storage tokenize in, tokens, trace @@ -386,5 +403,5 @@ fn tokenize-and-strip-indent in: (addr gap-buffer), out: (addr stream token), tr break-if-= return } - emit-non-indent-tokens tokens, out + emit-salient-tokens tokens, out } diff --git a/shell/tokenize.mu b/shell/tokenize.mu index 1675b728..ab25615f 100644 --- a/shell/tokenize.mu +++ b/shell/tokenize.mu @@ -29,9 +29,6 @@ fn tokenize in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) break-if-= return } - var comment?/eax: boolean <- comment-token? token - compare comment?, 0/false - loop-if-!= var skip?/eax: boolean <- skip-token? token compare skip?, 0/false loop-if-!= @@ -389,23 +386,35 @@ fn test-tokenize-indent { fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, trace: (addr trace) -> _/edi: boolean { trace-text trace, "tokenize", "next-token" trace-lower trace + # first save an indent token { compare start-of-line?, 0/false break-if-= - next-indent-token in, out, trace - trace-higher trace - return 0/not-at-start-of-line + next-indent-token in, out, trace # might not be returned } skip-spaces-from-gap-buffer in + var g/eax: grapheme <- peek-from-gap-buffer in + { + compare g, 0x23/comment + break-if-!= + skip-rest-of-line in + } + var g/eax: grapheme <- peek-from-gap-buffer in { - var g/eax: grapheme <- peek-from-gap-buffer in compare g, 0xa/newline break-if-!= trace-text trace, "tokenize", "newline" g <- read-from-gap-buffer in - initialize-skip-token out + initialize-skip-token out # might drop indent if that's all there was in this line return 1/at-start-of-line } + { + compare start-of-line?, 0/false + break-if-= + # still here? no comment or newline? + trace-higher trace + return 0/not-at-start-of-line + } { var done?/eax: boolean <- gap-buffer-scan-done? in compare done?, 0/false @@ -436,14 +445,6 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, next-stream-token in, out, trace break $next-token:case } - # comment - { - compare g, 0x23/comment - break-if-!= - rest-of-line in, out, trace - copy-to start-of-line?, 1/true - break $next-token:case - } # special-case: '-' { compare g, 0x2d/minus @@ -530,6 +531,11 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, initialize-token out, "," break $next-token:case } + set-cursor-position 0/screen, 0x40 0x20 + { + var foo/eax: int <- copy g + draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, foo, 7/fg 0/bg + } abort "unknown token type" } trace-higher trace @@ -765,37 +771,17 @@ fn next-bracket-token g: grapheme, _out: (addr token), trace: (addr trace) { } } -fn rest-of-line in: (addr gap-buffer), _out: (addr token), trace: (addr trace) { - trace-text trace, "tokenize", "comment" - var out/eax: (addr token) <- copy _out - var out-data-ah/eax: (addr handle stream byte) <- get out, text-data - populate-stream out-data-ah, 0x40 - var _out-data/eax: (addr stream byte) <- lookup *out-data-ah - var out-data/edi: (addr stream byte) <- copy _out-data +fn skip-rest-of-line in: (addr gap-buffer) { { - var empty?/eax: boolean <- gap-buffer-scan-done? in - compare empty?, 0/false - { - break-if-= - return - } - var g/eax: grapheme <- read-from-gap-buffer in + var done?/eax: boolean <- gap-buffer-scan-done? in + compare done?, 0/false + break-if-!= + var g/eax: grapheme <- peek-from-gap-buffer in compare g, 0xa/newline break-if-= - write-grapheme out-data, g + g <- read-from-gap-buffer in # consume loop } - { - var should-trace?/eax: boolean <- should-trace? trace - compare should-trace?, 0/false - break-if-= - var stream-storage: (stream byte 0x80) - var stream/esi: (addr stream byte) <- address stream-storage - write stream, "=> " - rewind-stream out-data - write-stream stream, out-data - trace trace, "tokenize", stream - } } fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) { @@ -1296,20 +1282,6 @@ fn stream-token? _self: (addr token) -> _/eax: boolean { return 1/true } -fn comment-token? _self: (addr token) -> _/eax: boolean { - var self/eax: (addr token) <- copy _self - var in-data-ah/eax: (addr handle stream byte) <- get self, text-data - var in-data/eax: (addr stream byte) <- lookup *in-data-ah - rewind-stream in-data - var g/eax: grapheme <- read-grapheme in-data - compare g, 0x23/hash - { - break-if-= - return 0/false - } - return 1/true -} - fn skip-token? _self: (addr token) -> _/eax: boolean { var self/eax: (addr token) <- copy _self var in-type/eax: (addr int) <- get self, type