clean up lexical categories

This commit is contained in:
Kartik K. Agaram 2021-06-22 21:43:44 -07:00
parent 59d904b4df
commit 0436ab71ea
3 changed files with 91 additions and 175 deletions

View File

@ -15,6 +15,16 @@ Entry:
#
#? (main 0 0 Primary-bus-secondary-drive)
(set-cursor-position 0 0x30 2)
(test-tokenize-number)
(test-tokenize-negative-number)
(test-tokenize-quote)
(test-tokenize-backquote)
(test-tokenize-unquote)
(test-tokenize-unquote-splice)
(test-tokenize-dotted-list)
(test-tokenize-stream-literal)
(test-tokenize-stream-literal-in-tree)
(test-tokenize-indent)
(test-infix)
#? # always first run tests
#? (run-tests)

View File

@ -1,6 +1,7 @@
fn transform-infix x-ah: (addr handle cell), trace: (addr trace) {
trace-text trace, "infix", "transform infix"
trace-lower trace
#? trace-text trace, "infix", "todo"
#? draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "a:", 2/fg 0/bg
#? dump-cell-from-cursor-over-full-screen x-ah, 7/fg 0/bg
transform-infix-2 x-ah, trace
@ -316,91 +317,18 @@ fn operator-symbol? _x: (addr cell) -> _/eax: boolean {
}
fn non-operator-grapheme? g: grapheme -> _/eax: boolean {
## whitespace
compare g, 9/tab
var operator?/eax: boolean <- operator-grapheme? g
compare operator?, 0/false
{
break-if-!=
return 0/false
}
compare g, 0xa/newline
{
break-if-!=
return 0/false
}
compare g, 0x20/space
{
break-if-!=
return 0/false
}
## we don't really use double quotes
compare g, 0x22/double-quote
{
break-if-!=
return 1/true
}
## brackets
compare g, 0x28/open-paren
{
break-if-!=
return 0/false
}
compare g, 0x29/close-paren
{
break-if-!=
return 0/false
}
compare g, 0x5b/open-square-bracket
{
break-if-!=
return 0/false
}
compare g, 0x5d/close-square-bracket
{
break-if-!=
return 0/false
}
compare g, 0x7b/open-curly-bracket
{
break-if-!=
return 0/false
}
compare g, 0x7d/close-curly-bracket
{
break-if-!=
return 0/false
}
# quotes and unquotes are like symbols for this purpose
compare g, 0x27/single-quote
{
break-if-!=
return 1/true
}
compare g, 0x60/backquote
{
break-if-!=
return 1/true
}
compare g, 0x2c/comma
{
break-if-!=
return 1/true
}
compare g, 0x40/at-sign
{
break-if-!=
return 1/true
}
# - other punctuation
compare g, 0x23/hash
{
break-if-!=
break-if-=
return 0/false
}
return 1/true
}
# just a short list of operator graphemes for now
fn operator-grapheme? g: grapheme -> _/eax: boolean {
# '$' is a symbol char
# '$' is special and can be in either a symbol or operator
compare g, 0x25/percent
{
break-if-!=
@ -411,26 +339,6 @@ fn operator-grapheme? g: grapheme -> _/eax: boolean {
break-if-!=
return 1/true
}
compare g, 0x27/single-quote
{
break-if-!=
return 0/false
}
compare g, 0x60/backquote
{
break-if-!=
return 0/false
}
compare g, 0x2c/comma
{
break-if-!=
return 0/false
}
compare g, 0x40/at-sign
{
break-if-!=
return 0/false
}
compare g, 0x2a/asterisk
{
break-if-!=

View File

@ -744,88 +744,43 @@ fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
}
}
# Mu carves up the space of graphemes into 4 categories:
# whitespace
# quotes and unquotes (from a Lisp perspective; doesn't include double
# quotes or other Unicode quotes)
# operators
# symbols
# (Numbers have their own parsing rules that don't fit cleanly in this
# partition.)
#
# During tokenization operators and symbols are treated identically.
# A later phase digs into that nuance.
fn symbol-grapheme? g: grapheme -> _/eax: boolean {
## whitespace
compare g, 9/tab
var whitespace?/eax: boolean <- whitespace-grapheme? g
compare whitespace?, 0/false
{
break-if-=
return 0/false
}
var quote-or-unquote?/eax: boolean <- quote-or-unquote-grapheme? g
compare quote-or-unquote?, 0/false
{
break-if-=
return 0/false
}
var bracket?/eax: boolean <- bracket-grapheme? g
compare bracket?, 0/false
{
break-if-=
return 0/false
}
compare g, 0x23/hash # comments get filtered out
{
break-if-!=
return 0/false
}
compare g, 0xa/newline
{
break-if-!=
return 0/false
}
compare g, 0x20/space
{
break-if-!=
return 0/false
}
## quotes
compare g, 0x22/double-quote
{
break-if-!=
return 0/false
}
compare g, 0x60/backquote
{
break-if-!=
return 0/false
}
## brackets
compare g, 0x28/open-paren
{
break-if-!=
return 0/false
}
compare g, 0x29/close-paren
{
break-if-!=
return 0/false
}
compare g, 0x5b/open-square-bracket
{
break-if-!=
return 0/false
}
compare g, 0x5d/close-square-bracket
{
break-if-!=
return 0/false
}
compare g, 0x7b/open-curly-bracket
{
break-if-!=
return 0/false
}
compare g, 0x7d/close-curly-bracket
{
break-if-!=
return 0/false
}
# quotes and unquotes
compare g, 0x27/single-quote
{
break-if-!=
return 0/false
}
compare g, 0x60/backquote
{
break-if-!=
return 0/false
}
compare g, 0x2c/comma
{
break-if-!=
return 0/false
}
compare g, 0x40/at-sign
{
break-if-!=
return 0/false
}
# - other punctuation
compare g, 0x23/hash
compare g, 0x22/double-quote # double quotes reserved for now
{
break-if-!=
return 0/false
@ -833,6 +788,49 @@ fn symbol-grapheme? g: grapheme -> _/eax: boolean {
return 1/true
}
fn whitespace-grapheme? g: grapheme -> _/eax: boolean {
compare g, 9/tab
{
break-if-!=
return 1/true
}
compare g, 0xa/newline
{
break-if-!=
return 1/true
}
compare g, 0x20/space
{
break-if-!=
return 1/true
}
return 0/false
}
fn quote-or-unquote-grapheme? g: grapheme -> _/eax: boolean {
compare g, 0x27/single-quote
{
break-if-!=
return 1/true
}
compare g, 0x60/backquote
{
break-if-!=
return 1/true
}
compare g, 0x2c/comma
{
break-if-!=
return 1/true
}
compare g, 0x40/at-sign
{
break-if-!=
return 1/true
}
return 0/false
}
fn bracket-grapheme? g: grapheme -> _/eax: boolean {
compare g, 0x28/open-paren
{