From 9d7d99fe6cc5a05960ef52cdfa8acefabf8e40bf Mon Sep 17 00:00:00 2001
From: "Kartik K. Agaram" <vc@akkartik.com>
Date: Sun, 20 Jun 2021 20:36:47 -0700
Subject: [PATCH] snapshot

This is going better than expected; just 3 failing tests among the new
ones.
---
 109stream-equal.subx  |   2 +-
 309stream.subx        |  20 ++-
 400.mu                |   3 +-
 mu-init.subx          |  33 ++--
 shell/README.md       |  58 ++++++-
 shell/int-stack.mu    |  69 ++++++++
 shell/parenthesize.mu | 373 +++++++++++++++++++++++++++++++++++++++++-
 shell/tokenize.mu     |  55 +++++++
 8 files changed, 591 insertions(+), 22 deletions(-)
 create mode 100644 shell/int-stack.mu

diff --git a/109stream-equal.subx b/109stream-equal.subx
index 8f6cf1bf..556afd91 100644
--- a/109stream-equal.subx
+++ b/109stream-equal.subx
@@ -190,7 +190,7 @@ test-stream-data-equal-size-check:
     5d/pop-to-ebp
     c3/return
 
-# helper for later tests
+# helper for tests
 check-stream-equal:  # f: (addr stream byte), s: (addr array byte), msg: (addr array byte)
     # . prologue
     55/push-ebp
diff --git a/309stream.subx b/309stream.subx
index c39a7146..61da00ae 100644
--- a/309stream.subx
+++ b/309stream.subx
@@ -208,7 +208,7 @@ $stream-final:end:
     c3/return
 
 # compare all the data in two streams (ignoring the read pointer)
-streams-data-equal?:  # f: (addr stream byte), s: (addr array byte) -> result/eax: boolean
+streams-data-equal?:  # a: (addr stream byte), b: (addr array byte) -> result/eax: boolean
     # pseudocode:
     #   awrite = a->write
     #   if (awrite != b->write) return false
@@ -295,3 +295,21 @@ $streams-data-equal?:end:
     89/<- %esp 5/r32/ebp
     5d/pop-to-ebp
     c3/return
+
+# helper for tests
+check-streams-data-equal:  # s: (addr stream _), expected: (addr array _), msg: (addr array byte)
+    # . prologue
+    55/push-ebp
+    89/<- %ebp 4/r32/esp
+    # . save registers
+    50/push-eax
+    #
+    (streams-data-equal? *(ebp+8) *(ebp+0xc))  # => eax
+    (check-ints-equal %eax 1 *(ebp+0x10))
+$check-streams-equal:end:
+    # . restore registers
+    58/pop-to-eax
+    # . epilogue
+    89/<- %esp 5/r32/ebp
+    5d/pop-to-ebp
+    c3/return
diff --git a/400.mu b/400.mu
index a0f2d85b..05a5463f 100644
--- a/400.mu
+++ b/400.mu
@@ -43,8 +43,9 @@ sig count-of-events -> _/eax: int
 sig clear-stream f: (addr stream _)
 sig rewind-stream f: (addr stream _)
 sig stream-data-equal? f: (addr stream byte), s: (addr array byte) -> _/eax: boolean
-sig streams-data-equal? f: (addr stream byte), s: (addr stream byte) -> _/eax: boolean
+sig streams-data-equal? a: (addr stream byte), b: (addr stream byte) -> _/eax: boolean
 sig check-stream-equal f: (addr stream byte), s: (addr array byte), msg: (addr array byte)
+sig check-streams-data-equal s: (addr stream _), expected: (addr stream _), msg: (addr array byte)
 sig next-stream-line-equal? f: (addr stream byte), s: (addr array byte) -> _/eax: boolean
 sig check-next-stream-line-equal f: (addr stream byte), s: (addr array byte), msg: (addr array byte)
 sig write f: (addr stream byte), s: (addr array byte)
diff --git a/mu-init.subx b/mu-init.subx
index 4bd7abc6..25c1149e 100644
--- a/mu-init.subx
+++ b/mu-init.subx
@@ -14,22 +14,23 @@ Entry:
   bd/copy-to-ebp 0/imm32
   #
 #?   (main 0 0 Primary-bus-secondary-drive)
-#?   (test-tokenize-indent)
-#?   (test-run-integer)
-#?   (test-run-expand-trace)
-  # always first run tests
-  (run-tests)
-  (num-test-failures)  # => eax
-  # call main if tests all passed
-  {
-    3d/compare-eax-and 0/imm32
-    75/jump-if-!= break/disp8
-    c7 0/subop/copy *Running-tests? 0/imm32/false
-    (clear-real-screen)
-    c7 0/subop/copy *Real-screen-cursor-x 0/imm32
-    c7 0/subop/copy *Real-screen-cursor-y 0/imm32
-    (main 0 0 Primary-bus-secondary-drive)
-  }
+#?   (set-cursor-position 0 0x40 0x20)
+  (test-parenthesize)
+  (test-parenthesize-skips-lines-with-initial-parens)
+  (test-parenthesize-skips-single-word-lines)
+#?   # always first run tests
+#?   (run-tests)
+#?   (num-test-failures)  # => eax
+#?   # call main if tests all passed
+#?   {
+#?     3d/compare-eax-and 0/imm32
+#?     75/jump-if-!= break/disp8
+#?     c7 0/subop/copy *Running-tests? 0/imm32/false
+#?     (clear-real-screen)
+#?     c7 0/subop/copy *Real-screen-cursor-x 0/imm32
+#?     c7 0/subop/copy *Real-screen-cursor-y 0/imm32
+#?     (main 0 0 Primary-bus-secondary-drive)
+#?   }
 
   # hang indefinitely
   {
diff --git a/shell/README.md b/shell/README.md
index 6bde168b..a9d8ea7b 100644
--- a/shell/README.md
+++ b/shell/README.md
@@ -62,7 +62,63 @@ Currently runs a tiny dialect of Lisp. Steps to run it from the top-level:
   qemu-system-i386 -m 2G -enable-kvm -hda code.img -hdb data.img
   ```
 
-*Known issues*
+### Indent-sensitivity
+
+The Mu shell is a Lisp under the hood. However, you'll see a lot fewer
+parentheses than most Lisps because it can often automatically insert them
+based on indentation.
+
+If you're already used to Lisp and always type in all parens, everything will
+continue to work. In particular, paren-insertion is disabled inside explicitly
+added parens. Once Mu sees a `(`, it stops trying to be smart until it sees a
+`)`.
+
+I recommend tastefully only removing parens from top-level (`def`, `mac`,
+`define`) and control-flow words (`if`, `while`, `for`, etc.) Continue using
+parens for most real function calls. When in doubt, insert parens.
+
+The rule for when parens are inserted is:
+
+> Multi-word lines without leading parens are implicitly grouped with later
+> indented lines
+
+For example:
+
+```
+if (> n 0)      =>      (if (> n 0)
+  34                      34)
+```
+
+No indented lines after? Parens go around a single line:
+
+```
+f a             =>      (f a)
+f b                     (f b)
+```
+
+Lines with a single word are never wrapped in parens:
+
+```
+def (foo)       =>      (def (foo)
+  42                      42)
+```
+
+Lines with a leading paren never get more parens:
+
+```
+def (foo x)     =>      (def (foo x)
+  (print x) x             (print x) x)
+```
+
+Putting these rules together, parens are not required around the `if` in:
+
+```
+if (= 1 (% x 2))
+  'odd
+  'even
+```
+
+### Known issues
 
 * No mouse support.
 
diff --git a/shell/int-stack.mu b/shell/int-stack.mu
new file mode 100644
index 00000000..a3ffa6eb
--- /dev/null
+++ b/shell/int-stack.mu
@@ -0,0 +1,69 @@
+type int-stack {
+  data: (handle array int)
+  top: int
+}
+
+fn initialize-int-stack _self: (addr int-stack), n: int {
+  var self/esi: (addr int-stack) <- copy _self
+  var d/edi: (addr handle array int) <- get self, data
+  populate d, n
+  var top/eax: (addr int) <- get self, top
+  copy-to *top, 0
+}
+
+fn push-int-stack _self: (addr int-stack), _val: int {
+  var self/esi: (addr int-stack) <- copy _self
+  var top-addr/ecx: (addr int) <- get self, top
+  var data-ah/edx: (addr handle array int) <- get self, data
+  var data/eax: (addr array int) <- lookup *data-ah
+  var top/edx: int <- copy *top-addr
+  var dest-addr/edx: (addr int) <- index data, top
+  var val/eax: int <- copy _val
+  copy-to *dest-addr, val
+  add-to *top-addr, 1
+}
+
+fn pop-int-stack _self: (addr int-stack) -> _/eax: int {
+  var self/esi: (addr int-stack) <- copy _self
+  var top-addr/ecx: (addr int) <- get self, top
+  {
+    compare *top-addr, 0
+    break-if->
+    return 0
+  }
+  subtract-from *top-addr, 1
+  var data-ah/edx: (addr handle array int) <- get self, data
+  var data/eax: (addr array int) <- lookup *data-ah
+  var top/edx: int <- copy *top-addr
+  var result-addr/eax: (addr int) <- index data, top
+  var val/eax: int <- copy *result-addr
+  return val
+}
+
+fn int-stack-empty? _self: (addr int-stack) -> _/eax: boolean {
+  var self/esi: (addr int-stack) <- copy _self
+  var top-addr/ecx: (addr int) <- get self, top
+  compare *top-addr, 0
+  {
+    break-if-=
+    return 0/false
+  }
+  return 1/true
+}
+
+fn int-stack-top _self: (addr int-stack) -> _/eax: int {
+  var self/esi: (addr int-stack) <- copy _self
+  var top-addr/ecx: (addr int) <- get self, top
+  var top/ecx: int <- copy *top-addr
+  {
+    compare top, 0
+    break-if->
+    return 0
+  }
+  top <- decrement
+  var data-ah/edx: (addr handle array int) <- get self, data
+  var data/eax: (addr array int) <- lookup *data-ah
+  var result-addr/eax: (addr int) <- index data, top
+  var val/eax: int <- copy *result-addr
+  return val
+}
diff --git a/shell/parenthesize.mu b/shell/parenthesize.mu
index 67b50854..f99f8c7f 100644
--- a/shell/parenthesize.mu
+++ b/shell/parenthesize.mu
@@ -1,21 +1,390 @@
-# TODO: not really implemented yet
+## insert explicit parens based on indentation
+
+# Design goals:
+#  keywords in other languages should look different from functions: def, if, while, etc.
+#  fully-parenthesized expressions should not be messed with
+#    ignore indent when lines start with parens
+#    ignore indent inside parens
+#    no modes to disable this pass
+#  introduce no new operators
+#    the language doesn't use nested lists like Scheme's `cond`
+#    lines with one word are never wrapped in parens
+#  encourage macros to explicitly insert all parens
+#    ignore indent inside backquote
+
 fn parenthesize in: (addr stream token), out: (addr stream token), trace: (addr trace) {
   trace-text trace, "parenthesize", "insert parens"
   trace-lower trace
+  var buffer-storage: (stream token 0x40)
+  var buffer/edi: (addr stream token) <- address buffer-storage
+  var curr-line-indent: int
+  var num-words-in-line: int
+  var paren-at-start-of-line?: boolean
+  var explicit-open-parens-storage: int
+  var explicit-open-parens/ebx: (addr int) <- address explicit-open-parens-storage
+  var implicit-open-parens-storage: int-stack
+  var implicit-open-parens/esi: (addr int-stack) <- address implicit-open-parens-storage
+  initialize-int-stack implicit-open-parens, 0x10  # potentially a major memory leak
   rewind-stream in
   {
     var done?/eax: boolean <- stream-empty? in
     compare done?, 0/false
     break-if-!=
     #
+    var curr-token-storage: token
+    var curr-token/ecx: (addr token) <- address curr-token-storage
+    read-from-stream in, curr-token
+#?     dump-token-from-cursor curr-token
+    # update state
+    {
+      var is-indent?/eax: boolean <- indent-token? curr-token
+      compare is-indent?, 0/false
+      break-if-=
+      copy-to num-words-in-line, 0
+      copy-to paren-at-start-of-line?, 0/false
+      var tmp/eax: int <- indent-level curr-token
+      copy-to curr-line-indent, tmp
+    }
+    {
+      var is-word?/eax: boolean <- word-token? curr-token
+      compare is-word?, 0/false
+      break-if-=
+      increment num-words-in-line
+    }
+    {
+      compare num-words-in-line, 0
+      break-if-!=
+      var is-open?/eax: boolean <- open-paren-token? curr-token
+      compare is-open?, 0/false
+      break-if-=
+      copy-to paren-at-start-of-line?, 1/true
+    }
+    #
+    $parenthesize:emit: {
+      {
+        compare paren-at-start-of-line?, 0/false
+        break-if-=
+#?         draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen "A", 7/fg 0/bg
+        emit-all buffer, curr-token, out, explicit-open-parens
+        break $parenthesize:emit
+      }
+      {
+        var is-indent?/eax: boolean <- indent-token? curr-token
+        compare is-indent?, 0/false
+        break-if-=
+#?         draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen "B", 7/fg 0/bg
+        emit-all buffer, curr-token, out, explicit-open-parens
+        break $parenthesize:emit
+      }
+      {
+        compare num-words-in-line, 2
+        break-if->=
+#?         draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen "C", 7/fg 0/bg
+        write-to-stream buffer, curr-token
+        break $parenthesize:emit
+      }
+      {
+        compare num-words-in-line, 2
+        break-if-!=
+        var is-word?/eax: boolean <- word-token? curr-token
+        compare is-word?, 0/false
+        break-if-=
+        compare *explicit-open-parens, 0
+        break-if-!=
+#?         draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen "(\n", 7/fg 0/bg
+        var paren-storage: token
+        var paren-token/eax: (addr token) <- address paren-storage
+        initialize-token paren-token, "("
+        write-to-stream out, paren-token
+        push-int-stack implicit-open-parens, curr-line-indent
+      }
+      emit-all buffer, curr-token, out, explicit-open-parens
+    }
+    {
+      var is-indent?/eax: boolean <- indent-token? curr-token
+      compare is-indent?, 0/false
+      break-if-=
+      {
+        # . loop check
+        var done?/eax: boolean <- int-stack-empty? implicit-open-parens
+        compare done?, 0/false
+        break-if-!=
+        var top-indent/eax: int <- int-stack-top implicit-open-parens
+        compare top-indent, curr-line-indent
+        break-if-<
+        # . loop body
+        var paren-storage: token
+        var paren-token/eax: (addr token) <- address paren-storage
+        initialize-token paren-token, ")"
+        write-to-stream out, paren-token
+        # . update
+        var dummy/eax: int <- pop-int-stack implicit-open-parens
+        loop
+      }
+    }
+    loop
+  }
+  emit-all buffer, 0/no-curr-token, out, explicit-open-parens
+  {
+    # . loop check
+    var done?/eax: boolean <- int-stack-empty? implicit-open-parens
+    compare done?, 0/false
+    break-if-!=
+    # . loop body
+    var paren-storage: token
+    var paren-token/eax: (addr token) <- address paren-storage
+    initialize-token paren-token, ")"
+    write-to-stream out, paren-token
+    # . update
+    var dummy/eax: int <- pop-int-stack implicit-open-parens
+    loop
+  }
+  trace-higher trace
+}
+
+fn indent-level _in: (addr token) -> _/eax: int {
+  var in/eax: (addr token) <- copy _in
+  var result/eax: (addr int) <- get in, number-data
+  return *result
+}
+
+fn word-token? in: (addr token) -> _/eax: boolean {
+  {
+    var is-indent?/eax: boolean <- indent-token? in
+    compare is-indent?, 0/false
+    break-if-!=
+    var is-bracket?/eax: boolean <- bracket-token? in  # overzealously checks for [], but shouldn't ever encounter it
+    compare is-bracket?, 0/false
+    break-if-!=
+    var is-quote?/eax: boolean <- quote-token? in
+    compare is-quote?, 0/false
+    break-if-!=
+    var is-backquote?/eax: boolean <- backquote-token? in
+    compare is-backquote?, 0/false
+    break-if-!=
+    var is-unquote?/eax: boolean <- unquote-token? in
+    compare is-unquote?, 0/false
+    break-if-!=
+    var is-unquote-splice?/eax: boolean <- unquote-splice-token? in
+    compare is-unquote-splice?, 0/false
+    break-if-!=
+    return 1/true
+  }
+  return 0/false
+}
+
+fn emit-all first: (addr stream token), second: (addr token), out: (addr stream token), explicit-open-parens: (addr int) {
+  rewind-stream first
+  {
+    var done?/eax: boolean <- stream-empty? first
+    compare done?, 0/false
+    break-if-!=
+    var curr-token-storage: token
+    var curr-token/eax: (addr token) <- address curr-token-storage
+    read-from-stream first, curr-token
+    emit curr-token, out, explicit-open-parens
+    loop
+  }
+  clear-stream first
+  {
+    compare second, 0
+    break-if-=
+    emit second, out, explicit-open-parens
+  }
+}
+
+fn emit t: (addr token), out: (addr stream token), explicit-open-parens: (addr int) {
+  {
+    var is-indent?/eax: boolean <- indent-token? t
+    compare is-indent?, 0/false
+    break-if-=
+    return
+  }
+  write-to-stream out, t
+  var explicit-open-parens/edi: (addr int) <- copy explicit-open-parens
+  {
+    var is-open?/eax: boolean <- open-paren-token? t
+    compare is-open?, 0/false
+    break-if-=
+    increment *explicit-open-parens
+  }
+  {
+    var is-close?/eax: boolean <- close-paren-token? t
+    compare is-close?, 0/false
+    break-if-=
+    decrement *explicit-open-parens
+    compare *explicit-open-parens, 0
+    break-if->=
+    abort "emit: extra ')'"
+  }
+}
+
+fn emit-non-indent-tokens in: (addr stream token), out: (addr stream token) {
+  rewind-stream in
+  {
+    var done?/eax: boolean <- stream-empty? in
+    compare done?, 0/false
+    break-if-!=
     var token-storage: token
     var token/edx: (addr token) <- address token-storage
     read-from-stream in, token
+    var is-skip?/eax: boolean <- skip-token? token
+    compare is-skip?, 0/false
+    loop-if-!=
     var is-indent?/eax: boolean <- indent-token? token
     compare is-indent?, 0/false
     loop-if-!=
     write-to-stream out, token  # shallow copy
     loop
   }
-  trace-higher trace
+}
+
+fn test-parenthesize {
+  check-parenthesize "a b c  ", "(a b c)", "F - test-parenthesize/1"
+  check-parenthesize "a (b)", "(a (b))", "F - test-parenthesize/2"
+  check-parenthesize "a (b c)", "(a (b c))", "F - test-parenthesize/3"
+  check-parenthesize "a (b c) d", "(a (b c) d)", "F - test-parenthesize/4"
+  check-parenthesize "a b c\nd ef", "(a b c) (d ef)", "F - test-parenthesize/5-multiple-lines"
+  check-parenthesize "a b c\n  d ef", "(a b c (d ef))", "F - test-parenthesize/6-indented"
+  check-parenthesize "a b c\n  (d ef)", "(a b c (d ef))", "F - test-parenthesize/7-indented"
+  check-parenthesize "a b c\n  (d ef)\n  g", "(a b c (d ef) g)", "F - test-parenthesize/8-indented"
+  check-parenthesize "a b c\n  d e\n    f\ny", "(a b c (d e f)) y", "F - test-parenthesize/9-indented"
+  check-parenthesize "#a\na b", "(a b)", "F - test-parenthesize/10-initial-comment"
+  check-parenthesize "a b c\n    d ef\n\n  g #abc", "(a b c (d ef) g)", "F - test-parenthesize/11-comments"
+  check-parenthesize "a b c\n  '(d ef)\n\n  g #abc", "(a b c '(d ef) g)", "F - test-parenthesize/12-quotes-and-comments"
+  check-parenthesize "  a b c", "(a b c)", "F - test-parenthesize/13-initial-indent"
+  check-parenthesize "    a b c\n  34", "(a b c) 34", "F - test-parenthesize/14-initial-indent"
+  check-parenthesize "def foo\n    a b c\n  d e\nnewdef", "(def foo (a b c) (d e)) newdef", "F - test-parenthesize/14"
+  check-parenthesize "  a a\n    a\ny", "(a a a) y", "F - test-parenthesize/15-group-before-too-much-outdent"
+  check-parenthesize "a `(b c)", "(a `(b c))", "F - test-parenthesize/16-backquote"
+  check-parenthesize "'a b c", "('a b c)", "F - test-parenthesize/17-quote"
+  check-parenthesize ",a b c", "(,a b c)", "F - test-parenthesize/18-unquote"
+  check-parenthesize ",@a b c", "(,@a b c)", "F - test-parenthesize/19-unquote-splice"
+  check-parenthesize "a b\n  'c\n  ,d\n  e", "(a b 'c ,d e)", "F - test-parenthesize/20-quotes-are-not-words"
+  check-parenthesize "def foo\n#a b c\n  de\nnew", "(def foo (d e)) new", "F - test-parenthesize/21-group-across-comments"
+}
+
+fn test-parenthesize-skips-lines-with-initial-parens {
+  check-parenthesize "(a b c)", "(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/1"
+  check-parenthesize "(a (b c))", "(a (b c))", "F - test-parenthesize-skips-lines-with-initial-parens/2"
+  check-parenthesize "(a () b)", "(a () b)", "F - test-parenthesize-skips-lines-with-initial-parens/3"
+  check-parenthesize "  (a b c)", "(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/initial-indent"
+  check-parenthesize "(a b c\n  bc\n    def\n  gh)", "(a b c bc def gh)", "F - test-parenthesize-skips-lines-with-initial-parens/outdent"
+  check-parenthesize "(a b c\n  (def gh)\n    (i j k)\n  lm\n\n\n    (no p))", "(a b c (def gh) (i j k) lm (no p))", "F - test-parenthesize-skips-lines-with-initial-parens/fully-parenthesized"
+  check-parenthesize ",(a b c)", ",(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/after-unquote"
+  check-parenthesize ",@(a b c)", ",@(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/after-unquote-splice"
+  check-parenthesize ",,(a b c)", ",,(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/after-nested-unquote"
+  check-parenthesize "(def foo\n    #a b c\n  d e)\nnew", "(def foo d e) new", "F - test-parenthesize-skips-lines-with-initial-parens/across-comment"
+  check-parenthesize "`(def foo\n    #a b c\n  d e)\nnew", "`(def foo d e) new", "F - test-parenthesize-skips-lines-with-initial-parens/across-comment-after-backquote"
+  check-parenthesize "  (a b c\n    d e)", "(a b c d e)", "F - test-parenthesize-skips-lines-with-initial-parens/with-indent"
+  check-parenthesize "def foo(a (b)\n    c d)\n  d e\nnew", "(def foo (a (b) c d) (d e)) new", "F - test-parenthesize-skips-lines-with-initial-parens/inside-arg-lists"
+}
+
+fn test-parenthesize-skips-single-word-lines {
+  # lines usually get grouped with later indented lines
+  check-parenthesize "a b\n  c", "(a b c)", "F - test-parenthesize-skips-single-word-lines/0"
+  # but single-word lines don't
+  check-parenthesize "a\n  c", "a c", "F - test-parenthesize-skips-single-word-lines/1"
+  check-parenthesize "a", "a", "F - test-parenthesize-skips-single-word-lines/2"
+  check-parenthesize "a  \nb\nc", "a b c", "F - test-parenthesize-skips-single-word-lines/3"
+}
+
+fn check-parenthesize actual: (addr array byte), expected: (addr array byte), message: (addr array byte) {
+  var trace-storage: trace
+  var trace/edx: (addr trace) <- address trace-storage
+  initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
+  #
+  var actual-buffer-storage: gap-buffer
+  var actual-buffer/eax: (addr gap-buffer) <- address actual-buffer-storage
+  initialize-gap-buffer-with actual-buffer, actual
+  var actual-tokens-storage: (stream token 0x40)
+  var actual-tokens/esi: (addr stream token) <- address actual-tokens-storage
+  tokenize-and-parenthesize actual-buffer, actual-tokens, trace
+  #
+  var expected-buffer-storage: gap-buffer
+  var expected-buffer/eax: (addr gap-buffer) <- address expected-buffer-storage
+  initialize-gap-buffer-with expected-buffer, expected
+  var expected-tokens-storage: (stream token 0x40)
+  var expected-tokens/edi: (addr stream token) <- address expected-tokens-storage
+  tokenize-and-strip-indent expected-buffer, expected-tokens, trace
+  #
+  rewind-stream actual-tokens
+  check-token-streams-data-equal actual-tokens, expected-tokens, message
+}
+
+fn check-token-streams-data-equal actual: (addr stream token), expected: (addr stream token), message: (addr array byte) {
+  rewind-stream actual
+  rewind-stream expected
+  {
+    # loop termination checks
+    var actual-done?/eax: boolean <- stream-empty? actual
+    {
+      compare actual-done?, 0/false
+      break-if-=
+      var expected-done?/eax: boolean <- stream-empty? expected
+      compare expected-done?, 0/false
+      {
+        break-if-!=
+        # actual empty, but expected not empty
+        draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, message, 3/fg=cyan 0/bg
+        draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ": too short\n", 3/fg=cyan 0/bg
+        count-test-failure
+        return
+      }
+      draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ".", 3/fg/cyan, 0/bg
+      return
+    }
+    var expected-done?/eax: boolean <- stream-empty? expected
+    compare expected-done?, 0/false
+    {
+      break-if-=
+      # actual not empty, but expected empty
+      draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, message, 3/fg=cyan 0/bg
+      draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ": too long\n", 3/fg=cyan 0/bg
+      count-test-failure
+      return
+    }
+    # loop body
+    var curr-token-storage: token
+    var curr-token/ecx: (addr token) <- address curr-token-storage
+    read-from-stream actual, curr-token
+    var expected-token-storage: token
+    var expected-token/edx: (addr token) <- address expected-token-storage
+    read-from-stream expected, expected-token
+    var match?/eax: boolean <- tokens-equal? curr-token, expected-token
+    compare match?, 0/false
+    {
+      break-if-!=
+      draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, message, 3/fg=cyan 0/bg
+      count-test-failure
+      return
+    }
+    loop
+  }
+}
+
+fn tokenize-and-parenthesize in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) {
+  var tokens-storage: (stream token 0x400)
+  var tokens/edx: (addr stream token) <- address tokens-storage
+  tokenize in, tokens, trace
+  var error?/eax: boolean <- has-errors? trace
+  compare error?, 0/false
+  {
+    break-if-=
+    return
+  }
+  parenthesize tokens, out, trace
+}
+
+fn tokenize-and-strip-indent in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) {
+  var tokens-storage: (stream token 0x400)
+  var tokens/edx: (addr stream token) <- address tokens-storage
+  tokenize in, tokens, trace
+  var error?/eax: boolean <- has-errors? trace
+  compare error?, 0/false
+  {
+    break-if-=
+    return
+  }
+  emit-non-indent-tokens tokens, out
 }
diff --git a/shell/tokenize.mu b/shell/tokenize.mu
index 6b1cbffb..1675b728 100644
--- a/shell/tokenize.mu
+++ b/shell/tokenize.mu
@@ -1361,3 +1361,58 @@ fn write-token-text-data out: (addr stream byte), _self: (addr token) {
   rewind-stream data
   write-stream out, data
 }
+
+fn tokens-equal? _a: (addr token), _b: (addr token) -> _/eax: boolean {
+  var a/edx: (addr token) <- copy _a
+  var b/ebx: (addr token) <- copy _b
+  var a-type-addr/eax: (addr int) <- get a, type
+  var a-type/eax: int <- copy *a-type-addr
+  var b-type-addr/ecx: (addr int) <- get b, type
+  compare a-type, *b-type-addr
+  {
+    break-if-=
+    return 0/false
+  }
+  compare a-type, 2/skip
+  {
+    break-if-!=
+    # skip tokens have no other data
+    return 1/true
+  }
+  compare a-type, 3/indent
+  {
+    break-if-!=
+    # indent tokens have no other data
+    var a-number-data-addr/eax: (addr int) <- get a, number-data
+    var a-number-data/eax: int <- copy *a-number-data-addr
+    var b-number-data-addr/ecx: (addr int) <- get b, number-data
+    compare a-number-data, *b-number-data-addr
+    {
+      break-if-=
+      return 0/false
+    }
+    return 1/true
+  }
+  var b-data-ah/eax: (addr handle stream byte) <- get b, text-data
+  var _b-data/eax: (addr stream byte) <- lookup *b-data-ah
+  var b-data/ebx: (addr stream byte) <- copy _b-data
+  var a-data-ah/eax: (addr handle stream byte) <- get a, text-data
+  var a-data/eax: (addr stream byte) <- lookup *a-data-ah
+  var data-match?/eax: boolean <- streams-data-equal? a-data, b-data
+  return data-match?
+}
+
+fn dump-token-from-cursor _t: (addr token) {
+  var t/esi: (addr token) <- copy _t
+  var type/eax: (addr int) <- get t, type
+  draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, *type, 7/fg 0/bg
+  draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, " ", 7/fg 0/bg
+  var text-ah/eax: (addr handle stream byte) <- get t, text-data
+  var text/eax: (addr stream byte) <- lookup *text-ah
+  rewind-stream text
+  draw-stream-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, text, 7/fg 0/bg
+  draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, " ", 7/fg 0/bg
+  var num/eax: (addr int) <- get t, number-data
+  draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, *num, 7/fg 0/bg
+  draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "\n", 7/fg 0/bg
+}