clean up lexical categories
This commit is contained in:
parent
59d904b4df
commit
0436ab71ea
10
mu-init.subx
10
mu-init.subx
|
@ -15,6 +15,16 @@ Entry:
|
|||
#
|
||||
#? (main 0 0 Primary-bus-secondary-drive)
|
||||
(set-cursor-position 0 0x30 2)
|
||||
(test-tokenize-number)
|
||||
(test-tokenize-negative-number)
|
||||
(test-tokenize-quote)
|
||||
(test-tokenize-backquote)
|
||||
(test-tokenize-unquote)
|
||||
(test-tokenize-unquote-splice)
|
||||
(test-tokenize-dotted-list)
|
||||
(test-tokenize-stream-literal)
|
||||
(test-tokenize-stream-literal-in-tree)
|
||||
(test-tokenize-indent)
|
||||
(test-infix)
|
||||
#? # always first run tests
|
||||
#? (run-tests)
|
||||
|
|
104
shell/infix.mu
104
shell/infix.mu
|
@ -1,6 +1,7 @@
|
|||
fn transform-infix x-ah: (addr handle cell), trace: (addr trace) {
|
||||
trace-text trace, "infix", "transform infix"
|
||||
trace-lower trace
|
||||
#? trace-text trace, "infix", "todo"
|
||||
#? draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "a:", 2/fg 0/bg
|
||||
#? dump-cell-from-cursor-over-full-screen x-ah, 7/fg 0/bg
|
||||
transform-infix-2 x-ah, trace
|
||||
|
@ -316,91 +317,18 @@ fn operator-symbol? _x: (addr cell) -> _/eax: boolean {
|
|||
}
|
||||
|
||||
fn non-operator-grapheme? g: grapheme -> _/eax: boolean {
|
||||
## whitespace
|
||||
compare g, 9/tab
|
||||
var operator?/eax: boolean <- operator-grapheme? g
|
||||
compare operator?, 0/false
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0xa/newline
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x20/space
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
## we don't really use double quotes
|
||||
compare g, 0x22/double-quote
|
||||
{
|
||||
break-if-!=
|
||||
return 1/true
|
||||
}
|
||||
## brackets
|
||||
compare g, 0x28/open-paren
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x29/close-paren
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x5b/open-square-bracket
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x5d/close-square-bracket
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x7b/open-curly-bracket
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x7d/close-curly-bracket
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
# quotes and unquotes are like symbols for this purpose
|
||||
compare g, 0x27/single-quote
|
||||
{
|
||||
break-if-!=
|
||||
return 1/true
|
||||
}
|
||||
compare g, 0x60/backquote
|
||||
{
|
||||
break-if-!=
|
||||
return 1/true
|
||||
}
|
||||
compare g, 0x2c/comma
|
||||
{
|
||||
break-if-!=
|
||||
return 1/true
|
||||
}
|
||||
compare g, 0x40/at-sign
|
||||
{
|
||||
break-if-!=
|
||||
return 1/true
|
||||
}
|
||||
# - other punctuation
|
||||
compare g, 0x23/hash
|
||||
{
|
||||
break-if-!=
|
||||
break-if-=
|
||||
return 0/false
|
||||
}
|
||||
return 1/true
|
||||
}
|
||||
|
||||
# just a short list of operator graphemes for now
|
||||
fn operator-grapheme? g: grapheme -> _/eax: boolean {
|
||||
# '$' is a symbol char
|
||||
# '$' is special and can be in either a symbol or operator
|
||||
compare g, 0x25/percent
|
||||
{
|
||||
break-if-!=
|
||||
|
@ -411,26 +339,6 @@ fn operator-grapheme? g: grapheme -> _/eax: boolean {
|
|||
break-if-!=
|
||||
return 1/true
|
||||
}
|
||||
compare g, 0x27/single-quote
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x60/backquote
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x2c/comma
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x40/at-sign
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x2a/asterisk
|
||||
{
|
||||
break-if-!=
|
||||
|
|
|
@ -744,88 +744,43 @@ fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
|
|||
}
|
||||
}
|
||||
|
||||
# Mu carves up the space of graphemes into 4 categories:
|
||||
# whitespace
|
||||
# quotes and unquotes (from a Lisp perspective; doesn't include double
|
||||
# quotes or other Unicode quotes)
|
||||
# operators
|
||||
# symbols
|
||||
# (Numbers have their own parsing rules that don't fit cleanly in this
|
||||
# partition.)
|
||||
#
|
||||
# During tokenization operators and symbols are treated identically.
|
||||
# A later phase digs into that nuance.
|
||||
|
||||
fn symbol-grapheme? g: grapheme -> _/eax: boolean {
|
||||
## whitespace
|
||||
compare g, 9/tab
|
||||
var whitespace?/eax: boolean <- whitespace-grapheme? g
|
||||
compare whitespace?, 0/false
|
||||
{
|
||||
break-if-=
|
||||
return 0/false
|
||||
}
|
||||
var quote-or-unquote?/eax: boolean <- quote-or-unquote-grapheme? g
|
||||
compare quote-or-unquote?, 0/false
|
||||
{
|
||||
break-if-=
|
||||
return 0/false
|
||||
}
|
||||
var bracket?/eax: boolean <- bracket-grapheme? g
|
||||
compare bracket?, 0/false
|
||||
{
|
||||
break-if-=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x23/hash # comments get filtered out
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0xa/newline
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x20/space
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
## quotes
|
||||
compare g, 0x22/double-quote
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x60/backquote
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
## brackets
|
||||
compare g, 0x28/open-paren
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x29/close-paren
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x5b/open-square-bracket
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x5d/close-square-bracket
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x7b/open-curly-bracket
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x7d/close-curly-bracket
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
# quotes and unquotes
|
||||
compare g, 0x27/single-quote
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x60/backquote
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x2c/comma
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
compare g, 0x40/at-sign
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
}
|
||||
# - other punctuation
|
||||
compare g, 0x23/hash
|
||||
compare g, 0x22/double-quote # double quotes reserved for now
|
||||
{
|
||||
break-if-!=
|
||||
return 0/false
|
||||
|
@ -833,6 +788,49 @@ fn symbol-grapheme? g: grapheme -> _/eax: boolean {
|
|||
return 1/true
|
||||
}
|
||||
|
||||
fn whitespace-grapheme? g: grapheme -> _/eax: boolean {
|
||||
compare g, 9/tab
|
||||
{
|
||||
break-if-!=
|
||||
return 1/true
|
||||
}
|
||||
compare g, 0xa/newline
|
||||
{
|
||||
break-if-!=
|
||||
return 1/true
|
||||
}
|
||||
compare g, 0x20/space
|
||||
{
|
||||
break-if-!=
|
||||
return 1/true
|
||||
}
|
||||
return 0/false
|
||||
}
|
||||
|
||||
fn quote-or-unquote-grapheme? g: grapheme -> _/eax: boolean {
|
||||
compare g, 0x27/single-quote
|
||||
{
|
||||
break-if-!=
|
||||
return 1/true
|
||||
}
|
||||
compare g, 0x60/backquote
|
||||
{
|
||||
break-if-!=
|
||||
return 1/true
|
||||
}
|
||||
compare g, 0x2c/comma
|
||||
{
|
||||
break-if-!=
|
||||
return 1/true
|
||||
}
|
||||
compare g, 0x40/at-sign
|
||||
{
|
||||
break-if-!=
|
||||
return 1/true
|
||||
}
|
||||
return 0/false
|
||||
}
|
||||
|
||||
fn bracket-grapheme? g: grapheme -> _/eax: boolean {
|
||||
compare g, 0x28/open-paren
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue