start guessing parentheses based on indentation

2021-06-20 21:18:38 -07:00 · 2021-06-20 21:18:38 -07:00 · 6e1aa99a00
parent 9d7d99fe6c
commit 6e1aa99a00
3 changed files with 66 additions and 77 deletions
--- a/mu-init.subx
+++ b/mu-init.subx
@ -15,22 +15,22 @@ Entry:
  #
 #?   (main 0 0 Primary-bus-secondary-drive)
 #?   (set-cursor-position 0 0x40 0x20)
-  (test-parenthesize)
-  (test-parenthesize-skips-lines-with-initial-parens)
-  (test-parenthesize-skips-single-word-lines)
-#?   # always first run tests
-#?   (run-tests)
-#?   (num-test-failures)  # => eax
-#?   # call main if tests all passed
-#?   {
-#?     3d/compare-eax-and 0/imm32
-#?     75/jump-if-!= break/disp8
-#?     c7 0/subop/copy *Running-tests? 0/imm32/false
-#?     (clear-real-screen)
-#?     c7 0/subop/copy *Real-screen-cursor-x 0/imm32
-#?     c7 0/subop/copy *Real-screen-cursor-y 0/imm32
-#?     (main 0 0 Primary-bus-secondary-drive)
-#?   }
+#?   (test-parenthesize)
+#?   (test-parenthesize-skips-lines-with-initial-parens)
+#?   (test-parenthesize-skips-single-word-lines)
+  # always first run tests
+  (run-tests)
+  (num-test-failures)  # => eax
+  # call main if tests all passed
+  {
+    3d/compare-eax-and 0/imm32
+    75/jump-if-!= break/disp8
+    c7 0/subop/copy *Running-tests? 0/imm32/false
+    (clear-real-screen)
+    c7 0/subop/copy *Real-screen-cursor-x 0/imm32
+    c7 0/subop/copy *Real-screen-cursor-y 0/imm32
+    (main 0 0 Primary-bus-secondary-drive)
+  }

  # hang indefinitely
  {
--- a/shell/parenthesize.mu
+++ b/shell/parenthesize.mu
@ -219,7 +219,8 @@ fn emit t: (addr token), out: (addr stream token), explicit-open-parens: (addr i
  }
 }

-fn emit-non-indent-tokens in: (addr stream token), out: (addr stream token) {
+# helper for checking parenthesize
+fn emit-salient-tokens in: (addr stream token), out: (addr stream token) {
  rewind-stream in
  {
    var done?/eax: boolean <- stream-empty? in
@ -228,12 +229,15 @@ fn emit-non-indent-tokens in: (addr stream token), out: (addr stream token) {
    var token-storage: token
    var token/edx: (addr token) <- address token-storage
    read-from-stream in, token
+    # skip tokens should be skipped
    var is-skip?/eax: boolean <- skip-token? token
    compare is-skip?, 0/false
    loop-if-!=
+    # indent tokens should be skipped
    var is-indent?/eax: boolean <- indent-token? token
    compare is-indent?, 0/false
    loop-if-!=
+    #
    write-to-stream out, token  # shallow copy
    loop
  }
@ -250,7 +254,18 @@ fn test-parenthesize {
  check-parenthesize "a b c\n  (d ef)\n  g", "(a b c (d ef) g)", "F - test-parenthesize/8-indented"
  check-parenthesize "a b c\n  d e\n    f\ny", "(a b c (d e f)) y", "F - test-parenthesize/9-indented"
  check-parenthesize "#a\na b", "(a b)", "F - test-parenthesize/10-initial-comment"
+#? a b c
+#?     d ef
+#? 
+#?   g
+#?   check-parenthesize "a b c\n    d ef\n\n  g", "(a b c (d ef) g)", "F - test-parenthesize/11-comments"
+#?   check-parenthesize "a b c\n    d ef\n\n  g #abc", "(a b c (d ef)) g", "F - test-parenthesize/11-comments"
  check-parenthesize "a b c\n    d ef\n\n  g #abc", "(a b c (d ef) g)", "F - test-parenthesize/11-comments"
+#? a b c
+#?   '(d ef)
+#? 
+#?   g #abc
+#?   check-parenthesize "a b c\n  '(d ef)\n  g #abc", "(a b c '(d ef) g)", "F - test-parenthesize/12-quotes-and-comments"
  check-parenthesize "a b c\n  '(d ef)\n\n  g #abc", "(a b c '(d ef) g)", "F - test-parenthesize/12-quotes-and-comments"
  check-parenthesize "  a b c", "(a b c)", "F - test-parenthesize/13-initial-indent"
  check-parenthesize "    a b c\n  34", "(a b c) 34", "F - test-parenthesize/14-initial-indent"
@ -261,7 +276,7 @@ fn test-parenthesize {
  check-parenthesize ",a b c", "(,a b c)", "F - test-parenthesize/18-unquote"
  check-parenthesize ",@a b c", "(,@a b c)", "F - test-parenthesize/19-unquote-splice"
  check-parenthesize "a b\n  'c\n  ,d\n  e", "(a b 'c ,d e)", "F - test-parenthesize/20-quotes-are-not-words"
-  check-parenthesize "def foo\n#a b c\n  de\nnew", "(def foo (d e)) new", "F - test-parenthesize/21-group-across-comments"
+  check-parenthesize "def foo\n#a b c\n  d e\nnew", "(def foo (d e)) new", "F - test-parenthesize/21-group-across-comments"
 }

 fn test-parenthesize-skips-lines-with-initial-parens {
@ -306,7 +321,7 @@ fn check-parenthesize actual: (addr array byte), expected: (addr array byte), me
  initialize-gap-buffer-with expected-buffer, expected
  var expected-tokens-storage: (stream token 0x40)
  var expected-tokens/edi: (addr stream token) <- address expected-tokens-storage
-  tokenize-and-strip-indent expected-buffer, expected-tokens, trace
+  tokenize-salient expected-buffer, expected-tokens, trace
  #
  rewind-stream actual-tokens
  check-token-streams-data-equal actual-tokens, expected-tokens, message
@ -348,9 +363,11 @@ fn check-token-streams-data-equal actual: (addr stream token), expected: (addr s
    var curr-token-storage: token
    var curr-token/ecx: (addr token) <- address curr-token-storage
    read-from-stream actual, curr-token
+#?     dump-token-from-cursor curr-token
    var expected-token-storage: token
    var expected-token/edx: (addr token) <- address expected-token-storage
    read-from-stream expected, expected-token
+#?     dump-token-from-cursor expected-token
    var match?/eax: boolean <- tokens-equal? curr-token, expected-token
    compare match?, 0/false
    {
@ -376,7 +393,7 @@ fn tokenize-and-parenthesize in: (addr gap-buffer), out: (addr stream token), tr
  parenthesize tokens, out, trace
 }

-fn tokenize-and-strip-indent in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) {
+fn tokenize-salient in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) {
  var tokens-storage: (stream token 0x400)
  var tokens/edx: (addr stream token) <- address tokens-storage
  tokenize in, tokens, trace
@ -386,5 +403,5 @@ fn tokenize-and-strip-indent in: (addr gap-buffer), out: (addr stream token), tr
    break-if-=
    return
  }
-  emit-non-indent-tokens tokens, out
+  emit-salient-tokens tokens, out
 }
--- a/shell/tokenize.mu
+++ b/shell/tokenize.mu
@ -29,9 +29,6 @@ fn tokenize in: (addr gap-buffer), out: (addr stream token), trace: (addr trace)
      break-if-=
      return
    }
-    var comment?/eax: boolean <- comment-token? token
-    compare comment?, 0/false
-    loop-if-!=
    var skip?/eax: boolean <- skip-token? token
    compare skip?, 0/false
    loop-if-!=
@ -389,23 +386,35 @@ fn test-tokenize-indent {
 fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, trace: (addr trace) -> _/edi: boolean {
  trace-text trace, "tokenize", "next-token"
  trace-lower trace
+  # first save an indent token
  {
    compare start-of-line?, 0/false
    break-if-=
-    next-indent-token in, out, trace
-    trace-higher trace
-    return 0/not-at-start-of-line
+    next-indent-token in, out, trace  # might not be returned
  }
  skip-spaces-from-gap-buffer in
+  var g/eax: grapheme <- peek-from-gap-buffer in
+  {
+    compare g, 0x23/comment
+    break-if-!=
+    skip-rest-of-line in
+  }
+  var g/eax: grapheme <- peek-from-gap-buffer in
  {
-    var g/eax: grapheme <- peek-from-gap-buffer in
    compare g, 0xa/newline
    break-if-!=
    trace-text trace, "tokenize", "newline"
    g <- read-from-gap-buffer in
-    initialize-skip-token out
+    initialize-skip-token out  # might drop indent if that's all there was in this line
    return 1/at-start-of-line
  }
+  {
+    compare start-of-line?, 0/false
+    break-if-=
+    # still here? no comment or newline?
+    trace-higher trace
+    return 0/not-at-start-of-line
+  }
  {
    var done?/eax: boolean <- gap-buffer-scan-done? in
    compare done?, 0/false
@ -436,14 +445,6 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
      next-stream-token in, out, trace
      break $next-token:case
    }
-    # comment
-    {
-      compare g, 0x23/comment
-      break-if-!=
-      rest-of-line in, out, trace
-      copy-to start-of-line?, 1/true
-      break $next-token:case
-    }
    # special-case: '-'
    {
      compare g, 0x2d/minus
@ -530,6 +531,11 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
      initialize-token out, ","
      break $next-token:case
    }
+    set-cursor-position 0/screen, 0x40 0x20
+    {
+      var foo/eax: int <- copy g
+      draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, foo, 7/fg 0/bg
+    }
    abort "unknown token type"
  }
  trace-higher trace
@ -765,37 +771,17 @@ fn next-bracket-token g: grapheme, _out: (addr token), trace: (addr trace) {
  }
 }

-fn rest-of-line in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
-  trace-text trace, "tokenize", "comment"
-  var out/eax: (addr token) <- copy _out
-  var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
-  populate-stream out-data-ah, 0x40
-  var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
-  var out-data/edi: (addr stream byte) <- copy _out-data
+fn skip-rest-of-line in: (addr gap-buffer) {
  {
-    var empty?/eax: boolean <- gap-buffer-scan-done? in
-    compare empty?, 0/false
-    {
-      break-if-=
-      return
-    }
-    var g/eax: grapheme <- read-from-gap-buffer in
+    var done?/eax: boolean <- gap-buffer-scan-done? in
+    compare done?, 0/false
+    break-if-!=
+    var g/eax: grapheme <- peek-from-gap-buffer in
    compare g, 0xa/newline
    break-if-=
-    write-grapheme out-data, g
+    g <- read-from-gap-buffer in  # consume
    loop
  }
-  {
-    var should-trace?/eax: boolean <- should-trace? trace
-    compare should-trace?, 0/false
-    break-if-=
-    var stream-storage: (stream byte 0x80)
-    var stream/esi: (addr stream byte) <- address stream-storage
-    write stream, "=> "
-    rewind-stream out-data
-    write-stream stream, out-data
-    trace trace, "tokenize", stream
-  }
 }

 fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
@ -1296,20 +1282,6 @@ fn stream-token? _self: (addr token) -> _/eax: boolean {
  return 1/true
 }

-fn comment-token? _self: (addr token) -> _/eax: boolean {
-  var self/eax: (addr token) <- copy _self
-  var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
-  var in-data/eax: (addr stream byte) <- lookup *in-data-ah
-  rewind-stream in-data
-  var g/eax: grapheme <- read-grapheme in-data
-  compare g, 0x23/hash
-  {
-    break-if-=
-    return 0/false
-  }
-  return 1/true
-}
-
 fn skip-token? _self: (addr token) -> _/eax: boolean {
  var self/eax: (addr token) <- copy _self
  var in-type/eax: (addr int) <- get self, type