From 9d7d99fe6cc5a05960ef52cdfa8acefabf8e40bf Mon Sep 17 00:00:00 2001 From: "Kartik K. Agaram" Date: Sun, 20 Jun 2021 20:36:47 -0700 Subject: [PATCH] snapshot This is going better than expected; just 3 failing tests among the new ones. --- 109stream-equal.subx | 2 +- 309stream.subx | 20 ++- 400.mu | 3 +- mu-init.subx | 33 ++-- shell/README.md | 58 ++++++- shell/int-stack.mu | 69 ++++++++ shell/parenthesize.mu | 373 +++++++++++++++++++++++++++++++++++++++++- shell/tokenize.mu | 55 +++++++ 8 files changed, 591 insertions(+), 22 deletions(-) create mode 100644 shell/int-stack.mu diff --git a/109stream-equal.subx b/109stream-equal.subx index 8f6cf1bf..556afd91 100644 --- a/109stream-equal.subx +++ b/109stream-equal.subx @@ -190,7 +190,7 @@ test-stream-data-equal-size-check: 5d/pop-to-ebp c3/return -# helper for later tests +# helper for tests check-stream-equal: # f: (addr stream byte), s: (addr array byte), msg: (addr array byte) # . prologue 55/push-ebp diff --git a/309stream.subx b/309stream.subx index c39a7146..61da00ae 100644 --- a/309stream.subx +++ b/309stream.subx @@ -208,7 +208,7 @@ $stream-final:end: c3/return # compare all the data in two streams (ignoring the read pointer) -streams-data-equal?: # f: (addr stream byte), s: (addr array byte) -> result/eax: boolean +streams-data-equal?: # a: (addr stream byte), b: (addr array byte) -> result/eax: boolean # pseudocode: # awrite = a->write # if (awrite != b->write) return false @@ -295,3 +295,21 @@ $streams-data-equal?:end: 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return + +# helper for tests +check-streams-data-equal: # s: (addr stream _), expected: (addr array _), msg: (addr array byte) + # . prologue + 55/push-ebp + 89/<- %ebp 4/r32/esp + # . save registers + 50/push-eax + # + (streams-data-equal? *(ebp+8) *(ebp+0xc)) # => eax + (check-ints-equal %eax 1 *(ebp+0x10)) +$check-streams-equal:end: + # . restore registers + 58/pop-to-eax + # . epilogue + 89/<- %esp 5/r32/ebp + 5d/pop-to-ebp + c3/return diff --git a/400.mu b/400.mu index a0f2d85b..05a5463f 100644 --- a/400.mu +++ b/400.mu @@ -43,8 +43,9 @@ sig count-of-events -> _/eax: int sig clear-stream f: (addr stream _) sig rewind-stream f: (addr stream _) sig stream-data-equal? f: (addr stream byte), s: (addr array byte) -> _/eax: boolean -sig streams-data-equal? f: (addr stream byte), s: (addr stream byte) -> _/eax: boolean +sig streams-data-equal? a: (addr stream byte), b: (addr stream byte) -> _/eax: boolean sig check-stream-equal f: (addr stream byte), s: (addr array byte), msg: (addr array byte) +sig check-streams-data-equal s: (addr stream _), expected: (addr stream _), msg: (addr array byte) sig next-stream-line-equal? f: (addr stream byte), s: (addr array byte) -> _/eax: boolean sig check-next-stream-line-equal f: (addr stream byte), s: (addr array byte), msg: (addr array byte) sig write f: (addr stream byte), s: (addr array byte) diff --git a/mu-init.subx b/mu-init.subx index 4bd7abc6..25c1149e 100644 --- a/mu-init.subx +++ b/mu-init.subx @@ -14,22 +14,23 @@ Entry: bd/copy-to-ebp 0/imm32 # #? (main 0 0 Primary-bus-secondary-drive) -#? (test-tokenize-indent) -#? (test-run-integer) -#? (test-run-expand-trace) - # always first run tests - (run-tests) - (num-test-failures) # => eax - # call main if tests all passed - { - 3d/compare-eax-and 0/imm32 - 75/jump-if-!= break/disp8 - c7 0/subop/copy *Running-tests? 0/imm32/false - (clear-real-screen) - c7 0/subop/copy *Real-screen-cursor-x 0/imm32 - c7 0/subop/copy *Real-screen-cursor-y 0/imm32 - (main 0 0 Primary-bus-secondary-drive) - } +#? (set-cursor-position 0 0x40 0x20) + (test-parenthesize) + (test-parenthesize-skips-lines-with-initial-parens) + (test-parenthesize-skips-single-word-lines) +#? # always first run tests +#? (run-tests) +#? (num-test-failures) # => eax +#? # call main if tests all passed +#? { +#? 3d/compare-eax-and 0/imm32 +#? 75/jump-if-!= break/disp8 +#? c7 0/subop/copy *Running-tests? 0/imm32/false +#? (clear-real-screen) +#? c7 0/subop/copy *Real-screen-cursor-x 0/imm32 +#? c7 0/subop/copy *Real-screen-cursor-y 0/imm32 +#? (main 0 0 Primary-bus-secondary-drive) +#? } # hang indefinitely { diff --git a/shell/README.md b/shell/README.md index 6bde168b..a9d8ea7b 100644 --- a/shell/README.md +++ b/shell/README.md @@ -62,7 +62,63 @@ Currently runs a tiny dialect of Lisp. Steps to run it from the top-level: qemu-system-i386 -m 2G -enable-kvm -hda code.img -hdb data.img ``` -*Known issues* +### Indent-sensitivity + +The Mu shell is a Lisp under the hood. However, you'll see a lot fewer +parentheses than most Lisps because it can often automatically insert them +based on indentation. + +If you're already used to Lisp and always type in all parens, everything will +continue to work. In particular, paren-insertion is disabled inside explicitly +added parens. Once Mu sees a `(`, it stops trying to be smart until it sees a +`)`. + +I recommend tastefully only removing parens from top-level (`def`, `mac`, +`define`) and control-flow words (`if`, `while`, `for`, etc.) Continue using +parens for most real function calls. When in doubt, insert parens. + +The rule for when parens are inserted is: + +> Multi-word lines without leading parens are implicitly grouped with later +> indented lines + +For example: + +``` +if (> n 0) => (if (> n 0) + 34 34) +``` + +No indented lines after? Parens go around a single line: + +``` +f a => (f a) +f b (f b) +``` + +Lines with a single word are never wrapped in parens: + +``` +def (foo) => (def (foo) + 42 42) +``` + +Lines with a leading paren never get more parens: + +``` +def (foo x) => (def (foo x) + (print x) x (print x) x) +``` + +Putting these rules together, parens are not required around the `if` in: + +``` +if (= 1 (% x 2)) + 'odd + 'even +``` + +### Known issues * No mouse support. diff --git a/shell/int-stack.mu b/shell/int-stack.mu new file mode 100644 index 00000000..a3ffa6eb --- /dev/null +++ b/shell/int-stack.mu @@ -0,0 +1,69 @@ +type int-stack { + data: (handle array int) + top: int +} + +fn initialize-int-stack _self: (addr int-stack), n: int { + var self/esi: (addr int-stack) <- copy _self + var d/edi: (addr handle array int) <- get self, data + populate d, n + var top/eax: (addr int) <- get self, top + copy-to *top, 0 +} + +fn push-int-stack _self: (addr int-stack), _val: int { + var self/esi: (addr int-stack) <- copy _self + var top-addr/ecx: (addr int) <- get self, top + var data-ah/edx: (addr handle array int) <- get self, data + var data/eax: (addr array int) <- lookup *data-ah + var top/edx: int <- copy *top-addr + var dest-addr/edx: (addr int) <- index data, top + var val/eax: int <- copy _val + copy-to *dest-addr, val + add-to *top-addr, 1 +} + +fn pop-int-stack _self: (addr int-stack) -> _/eax: int { + var self/esi: (addr int-stack) <- copy _self + var top-addr/ecx: (addr int) <- get self, top + { + compare *top-addr, 0 + break-if-> + return 0 + } + subtract-from *top-addr, 1 + var data-ah/edx: (addr handle array int) <- get self, data + var data/eax: (addr array int) <- lookup *data-ah + var top/edx: int <- copy *top-addr + var result-addr/eax: (addr int) <- index data, top + var val/eax: int <- copy *result-addr + return val +} + +fn int-stack-empty? _self: (addr int-stack) -> _/eax: boolean { + var self/esi: (addr int-stack) <- copy _self + var top-addr/ecx: (addr int) <- get self, top + compare *top-addr, 0 + { + break-if-= + return 0/false + } + return 1/true +} + +fn int-stack-top _self: (addr int-stack) -> _/eax: int { + var self/esi: (addr int-stack) <- copy _self + var top-addr/ecx: (addr int) <- get self, top + var top/ecx: int <- copy *top-addr + { + compare top, 0 + break-if-> + return 0 + } + top <- decrement + var data-ah/edx: (addr handle array int) <- get self, data + var data/eax: (addr array int) <- lookup *data-ah + var result-addr/eax: (addr int) <- index data, top + var val/eax: int <- copy *result-addr + return val +} diff --git a/shell/parenthesize.mu b/shell/parenthesize.mu index 67b50854..f99f8c7f 100644 --- a/shell/parenthesize.mu +++ b/shell/parenthesize.mu @@ -1,21 +1,390 @@ -# TODO: not really implemented yet +## insert explicit parens based on indentation + +# Design goals: +# keywords in other languages should look different from functions: def, if, while, etc. +# fully-parenthesized expressions should not be messed with +# ignore indent when lines start with parens +# ignore indent inside parens +# no modes to disable this pass +# introduce no new operators +# the language doesn't use nested lists like Scheme's `cond` +# lines with one word are never wrapped in parens +# encourage macros to explicitly insert all parens +# ignore indent inside backquote + fn parenthesize in: (addr stream token), out: (addr stream token), trace: (addr trace) { trace-text trace, "parenthesize", "insert parens" trace-lower trace + var buffer-storage: (stream token 0x40) + var buffer/edi: (addr stream token) <- address buffer-storage + var curr-line-indent: int + var num-words-in-line: int + var paren-at-start-of-line?: boolean + var explicit-open-parens-storage: int + var explicit-open-parens/ebx: (addr int) <- address explicit-open-parens-storage + var implicit-open-parens-storage: int-stack + var implicit-open-parens/esi: (addr int-stack) <- address implicit-open-parens-storage + initialize-int-stack implicit-open-parens, 0x10 # potentially a major memory leak rewind-stream in { var done?/eax: boolean <- stream-empty? in compare done?, 0/false break-if-!= # + var curr-token-storage: token + var curr-token/ecx: (addr token) <- address curr-token-storage + read-from-stream in, curr-token +#? dump-token-from-cursor curr-token + # update state + { + var is-indent?/eax: boolean <- indent-token? curr-token + compare is-indent?, 0/false + break-if-= + copy-to num-words-in-line, 0 + copy-to paren-at-start-of-line?, 0/false + var tmp/eax: int <- indent-level curr-token + copy-to curr-line-indent, tmp + } + { + var is-word?/eax: boolean <- word-token? curr-token + compare is-word?, 0/false + break-if-= + increment num-words-in-line + } + { + compare num-words-in-line, 0 + break-if-!= + var is-open?/eax: boolean <- open-paren-token? curr-token + compare is-open?, 0/false + break-if-= + copy-to paren-at-start-of-line?, 1/true + } + # + $parenthesize:emit: { + { + compare paren-at-start-of-line?, 0/false + break-if-= +#? draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen "A", 7/fg 0/bg + emit-all buffer, curr-token, out, explicit-open-parens + break $parenthesize:emit + } + { + var is-indent?/eax: boolean <- indent-token? curr-token + compare is-indent?, 0/false + break-if-= +#? draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen "B", 7/fg 0/bg + emit-all buffer, curr-token, out, explicit-open-parens + break $parenthesize:emit + } + { + compare num-words-in-line, 2 + break-if->= +#? draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen "C", 7/fg 0/bg + write-to-stream buffer, curr-token + break $parenthesize:emit + } + { + compare num-words-in-line, 2 + break-if-!= + var is-word?/eax: boolean <- word-token? curr-token + compare is-word?, 0/false + break-if-= + compare *explicit-open-parens, 0 + break-if-!= +#? draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen "(\n", 7/fg 0/bg + var paren-storage: token + var paren-token/eax: (addr token) <- address paren-storage + initialize-token paren-token, "(" + write-to-stream out, paren-token + push-int-stack implicit-open-parens, curr-line-indent + } + emit-all buffer, curr-token, out, explicit-open-parens + } + { + var is-indent?/eax: boolean <- indent-token? curr-token + compare is-indent?, 0/false + break-if-= + { + # . loop check + var done?/eax: boolean <- int-stack-empty? implicit-open-parens + compare done?, 0/false + break-if-!= + var top-indent/eax: int <- int-stack-top implicit-open-parens + compare top-indent, curr-line-indent + break-if-< + # . loop body + var paren-storage: token + var paren-token/eax: (addr token) <- address paren-storage + initialize-token paren-token, ")" + write-to-stream out, paren-token + # . update + var dummy/eax: int <- pop-int-stack implicit-open-parens + loop + } + } + loop + } + emit-all buffer, 0/no-curr-token, out, explicit-open-parens + { + # . loop check + var done?/eax: boolean <- int-stack-empty? implicit-open-parens + compare done?, 0/false + break-if-!= + # . loop body + var paren-storage: token + var paren-token/eax: (addr token) <- address paren-storage + initialize-token paren-token, ")" + write-to-stream out, paren-token + # . update + var dummy/eax: int <- pop-int-stack implicit-open-parens + loop + } + trace-higher trace +} + +fn indent-level _in: (addr token) -> _/eax: int { + var in/eax: (addr token) <- copy _in + var result/eax: (addr int) <- get in, number-data + return *result +} + +fn word-token? in: (addr token) -> _/eax: boolean { + { + var is-indent?/eax: boolean <- indent-token? in + compare is-indent?, 0/false + break-if-!= + var is-bracket?/eax: boolean <- bracket-token? in # overzealously checks for [], but shouldn't ever encounter it + compare is-bracket?, 0/false + break-if-!= + var is-quote?/eax: boolean <- quote-token? in + compare is-quote?, 0/false + break-if-!= + var is-backquote?/eax: boolean <- backquote-token? in + compare is-backquote?, 0/false + break-if-!= + var is-unquote?/eax: boolean <- unquote-token? in + compare is-unquote?, 0/false + break-if-!= + var is-unquote-splice?/eax: boolean <- unquote-splice-token? in + compare is-unquote-splice?, 0/false + break-if-!= + return 1/true + } + return 0/false +} + +fn emit-all first: (addr stream token), second: (addr token), out: (addr stream token), explicit-open-parens: (addr int) { + rewind-stream first + { + var done?/eax: boolean <- stream-empty? first + compare done?, 0/false + break-if-!= + var curr-token-storage: token + var curr-token/eax: (addr token) <- address curr-token-storage + read-from-stream first, curr-token + emit curr-token, out, explicit-open-parens + loop + } + clear-stream first + { + compare second, 0 + break-if-= + emit second, out, explicit-open-parens + } +} + +fn emit t: (addr token), out: (addr stream token), explicit-open-parens: (addr int) { + { + var is-indent?/eax: boolean <- indent-token? t + compare is-indent?, 0/false + break-if-= + return + } + write-to-stream out, t + var explicit-open-parens/edi: (addr int) <- copy explicit-open-parens + { + var is-open?/eax: boolean <- open-paren-token? t + compare is-open?, 0/false + break-if-= + increment *explicit-open-parens + } + { + var is-close?/eax: boolean <- close-paren-token? t + compare is-close?, 0/false + break-if-= + decrement *explicit-open-parens + compare *explicit-open-parens, 0 + break-if->= + abort "emit: extra ')'" + } +} + +fn emit-non-indent-tokens in: (addr stream token), out: (addr stream token) { + rewind-stream in + { + var done?/eax: boolean <- stream-empty? in + compare done?, 0/false + break-if-!= var token-storage: token var token/edx: (addr token) <- address token-storage read-from-stream in, token + var is-skip?/eax: boolean <- skip-token? token + compare is-skip?, 0/false + loop-if-!= var is-indent?/eax: boolean <- indent-token? token compare is-indent?, 0/false loop-if-!= write-to-stream out, token # shallow copy loop } - trace-higher trace +} + +fn test-parenthesize { + check-parenthesize "a b c ", "(a b c)", "F - test-parenthesize/1" + check-parenthesize "a (b)", "(a (b))", "F - test-parenthesize/2" + check-parenthesize "a (b c)", "(a (b c))", "F - test-parenthesize/3" + check-parenthesize "a (b c) d", "(a (b c) d)", "F - test-parenthesize/4" + check-parenthesize "a b c\nd ef", "(a b c) (d ef)", "F - test-parenthesize/5-multiple-lines" + check-parenthesize "a b c\n d ef", "(a b c (d ef))", "F - test-parenthesize/6-indented" + check-parenthesize "a b c\n (d ef)", "(a b c (d ef))", "F - test-parenthesize/7-indented" + check-parenthesize "a b c\n (d ef)\n g", "(a b c (d ef) g)", "F - test-parenthesize/8-indented" + check-parenthesize "a b c\n d e\n f\ny", "(a b c (d e f)) y", "F - test-parenthesize/9-indented" + check-parenthesize "#a\na b", "(a b)", "F - test-parenthesize/10-initial-comment" + check-parenthesize "a b c\n d ef\n\n g #abc", "(a b c (d ef) g)", "F - test-parenthesize/11-comments" + check-parenthesize "a b c\n '(d ef)\n\n g #abc", "(a b c '(d ef) g)", "F - test-parenthesize/12-quotes-and-comments" + check-parenthesize " a b c", "(a b c)", "F - test-parenthesize/13-initial-indent" + check-parenthesize " a b c\n 34", "(a b c) 34", "F - test-parenthesize/14-initial-indent" + check-parenthesize "def foo\n a b c\n d e\nnewdef", "(def foo (a b c) (d e)) newdef", "F - test-parenthesize/14" + check-parenthesize " a a\n a\ny", "(a a a) y", "F - test-parenthesize/15-group-before-too-much-outdent" + check-parenthesize "a `(b c)", "(a `(b c))", "F - test-parenthesize/16-backquote" + check-parenthesize "'a b c", "('a b c)", "F - test-parenthesize/17-quote" + check-parenthesize ",a b c", "(,a b c)", "F - test-parenthesize/18-unquote" + check-parenthesize ",@a b c", "(,@a b c)", "F - test-parenthesize/19-unquote-splice" + check-parenthesize "a b\n 'c\n ,d\n e", "(a b 'c ,d e)", "F - test-parenthesize/20-quotes-are-not-words" + check-parenthesize "def foo\n#a b c\n de\nnew", "(def foo (d e)) new", "F - test-parenthesize/21-group-across-comments" +} + +fn test-parenthesize-skips-lines-with-initial-parens { + check-parenthesize "(a b c)", "(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/1" + check-parenthesize "(a (b c))", "(a (b c))", "F - test-parenthesize-skips-lines-with-initial-parens/2" + check-parenthesize "(a () b)", "(a () b)", "F - test-parenthesize-skips-lines-with-initial-parens/3" + check-parenthesize " (a b c)", "(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/initial-indent" + check-parenthesize "(a b c\n bc\n def\n gh)", "(a b c bc def gh)", "F - test-parenthesize-skips-lines-with-initial-parens/outdent" + check-parenthesize "(a b c\n (def gh)\n (i j k)\n lm\n\n\n (no p))", "(a b c (def gh) (i j k) lm (no p))", "F - test-parenthesize-skips-lines-with-initial-parens/fully-parenthesized" + check-parenthesize ",(a b c)", ",(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/after-unquote" + check-parenthesize ",@(a b c)", ",@(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/after-unquote-splice" + check-parenthesize ",,(a b c)", ",,(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/after-nested-unquote" + check-parenthesize "(def foo\n #a b c\n d e)\nnew", "(def foo d e) new", "F - test-parenthesize-skips-lines-with-initial-parens/across-comment" + check-parenthesize "`(def foo\n #a b c\n d e)\nnew", "`(def foo d e) new", "F - test-parenthesize-skips-lines-with-initial-parens/across-comment-after-backquote" + check-parenthesize " (a b c\n d e)", "(a b c d e)", "F - test-parenthesize-skips-lines-with-initial-parens/with-indent" + check-parenthesize "def foo(a (b)\n c d)\n d e\nnew", "(def foo (a (b) c d) (d e)) new", "F - test-parenthesize-skips-lines-with-initial-parens/inside-arg-lists" +} + +fn test-parenthesize-skips-single-word-lines { + # lines usually get grouped with later indented lines + check-parenthesize "a b\n c", "(a b c)", "F - test-parenthesize-skips-single-word-lines/0" + # but single-word lines don't + check-parenthesize "a\n c", "a c", "F - test-parenthesize-skips-single-word-lines/1" + check-parenthesize "a", "a", "F - test-parenthesize-skips-single-word-lines/2" + check-parenthesize "a \nb\nc", "a b c", "F - test-parenthesize-skips-single-word-lines/3" +} + +fn check-parenthesize actual: (addr array byte), expected: (addr array byte), message: (addr array byte) { + var trace-storage: trace + var trace/edx: (addr trace) <- address trace-storage + initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible + # + var actual-buffer-storage: gap-buffer + var actual-buffer/eax: (addr gap-buffer) <- address actual-buffer-storage + initialize-gap-buffer-with actual-buffer, actual + var actual-tokens-storage: (stream token 0x40) + var actual-tokens/esi: (addr stream token) <- address actual-tokens-storage + tokenize-and-parenthesize actual-buffer, actual-tokens, trace + # + var expected-buffer-storage: gap-buffer + var expected-buffer/eax: (addr gap-buffer) <- address expected-buffer-storage + initialize-gap-buffer-with expected-buffer, expected + var expected-tokens-storage: (stream token 0x40) + var expected-tokens/edi: (addr stream token) <- address expected-tokens-storage + tokenize-and-strip-indent expected-buffer, expected-tokens, trace + # + rewind-stream actual-tokens + check-token-streams-data-equal actual-tokens, expected-tokens, message +} + +fn check-token-streams-data-equal actual: (addr stream token), expected: (addr stream token), message: (addr array byte) { + rewind-stream actual + rewind-stream expected + { + # loop termination checks + var actual-done?/eax: boolean <- stream-empty? actual + { + compare actual-done?, 0/false + break-if-= + var expected-done?/eax: boolean <- stream-empty? expected + compare expected-done?, 0/false + { + break-if-!= + # actual empty, but expected not empty + draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, message, 3/fg=cyan 0/bg + draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ": too short\n", 3/fg=cyan 0/bg + count-test-failure + return + } + draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ".", 3/fg/cyan, 0/bg + return + } + var expected-done?/eax: boolean <- stream-empty? expected + compare expected-done?, 0/false + { + break-if-= + # actual not empty, but expected empty + draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, message, 3/fg=cyan 0/bg + draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ": too long\n", 3/fg=cyan 0/bg + count-test-failure + return + } + # loop body + var curr-token-storage: token + var curr-token/ecx: (addr token) <- address curr-token-storage + read-from-stream actual, curr-token + var expected-token-storage: token + var expected-token/edx: (addr token) <- address expected-token-storage + read-from-stream expected, expected-token + var match?/eax: boolean <- tokens-equal? curr-token, expected-token + compare match?, 0/false + { + break-if-!= + draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, message, 3/fg=cyan 0/bg + count-test-failure + return + } + loop + } +} + +fn tokenize-and-parenthesize in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) { + var tokens-storage: (stream token 0x400) + var tokens/edx: (addr stream token) <- address tokens-storage + tokenize in, tokens, trace + var error?/eax: boolean <- has-errors? trace + compare error?, 0/false + { + break-if-= + return + } + parenthesize tokens, out, trace +} + +fn tokenize-and-strip-indent in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) { + var tokens-storage: (stream token 0x400) + var tokens/edx: (addr stream token) <- address tokens-storage + tokenize in, tokens, trace + var error?/eax: boolean <- has-errors? trace + compare error?, 0/false + { + break-if-= + return + } + emit-non-indent-tokens tokens, out } diff --git a/shell/tokenize.mu b/shell/tokenize.mu index 6b1cbffb..1675b728 100644 --- a/shell/tokenize.mu +++ b/shell/tokenize.mu @@ -1361,3 +1361,58 @@ fn write-token-text-data out: (addr stream byte), _self: (addr token) { rewind-stream data write-stream out, data } + +fn tokens-equal? _a: (addr token), _b: (addr token) -> _/eax: boolean { + var a/edx: (addr token) <- copy _a + var b/ebx: (addr token) <- copy _b + var a-type-addr/eax: (addr int) <- get a, type + var a-type/eax: int <- copy *a-type-addr + var b-type-addr/ecx: (addr int) <- get b, type + compare a-type, *b-type-addr + { + break-if-= + return 0/false + } + compare a-type, 2/skip + { + break-if-!= + # skip tokens have no other data + return 1/true + } + compare a-type, 3/indent + { + break-if-!= + # indent tokens have no other data + var a-number-data-addr/eax: (addr int) <- get a, number-data + var a-number-data/eax: int <- copy *a-number-data-addr + var b-number-data-addr/ecx: (addr int) <- get b, number-data + compare a-number-data, *b-number-data-addr + { + break-if-= + return 0/false + } + return 1/true + } + var b-data-ah/eax: (addr handle stream byte) <- get b, text-data + var _b-data/eax: (addr stream byte) <- lookup *b-data-ah + var b-data/ebx: (addr stream byte) <- copy _b-data + var a-data-ah/eax: (addr handle stream byte) <- get a, text-data + var a-data/eax: (addr stream byte) <- lookup *a-data-ah + var data-match?/eax: boolean <- streams-data-equal? a-data, b-data + return data-match? +} + +fn dump-token-from-cursor _t: (addr token) { + var t/esi: (addr token) <- copy _t + var type/eax: (addr int) <- get t, type + draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, *type, 7/fg 0/bg + draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, " ", 7/fg 0/bg + var text-ah/eax: (addr handle stream byte) <- get t, text-data + var text/eax: (addr stream byte) <- lookup *text-ah + rewind-stream text + draw-stream-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, text, 7/fg 0/bg + draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, " ", 7/fg 0/bg + var num/eax: (addr int) <- get t, number-data + draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, *num, 7/fg 0/bg + draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "\n", 7/fg 0/bg +}