1227 lines
42 KiB
Forth
1227 lines
42 KiB
Forth
# The language is indent-sensitive.
|
|
# Each line consists of an initial indent token followed by other tokens.
|
|
type token {
|
|
type: int
|
|
# type 0: default
|
|
# type 1: stream
|
|
text-data: (handle stream byte)
|
|
# type 2: skip (end of line or end of file)
|
|
# type 3: indent
|
|
number-data: int
|
|
}
|
|
|
|
fn tokenize in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) {
|
|
trace-text trace, "tokenize", "tokenize"
|
|
trace-lower trace
|
|
rewind-gap-buffer in
|
|
var at-start-of-line?/edi: boolean <- copy 1/true
|
|
{
|
|
var done?/eax: boolean <- gap-buffer-scan-done? in
|
|
compare done?, 0/false
|
|
break-if-!=
|
|
#
|
|
var token-storage: token
|
|
var token/edx: (addr token) <- address token-storage
|
|
at-start-of-line? <- next-token in, token, at-start-of-line?, trace
|
|
var error?/eax: boolean <- has-errors? trace
|
|
compare error?, 0/false
|
|
{
|
|
break-if-=
|
|
return
|
|
}
|
|
var skip?/eax: boolean <- skip-token? token
|
|
compare skip?, 0/false
|
|
loop-if-!=
|
|
write-to-stream out, token # shallow-copy text-data
|
|
loop
|
|
}
|
|
trace-higher trace
|
|
}
|
|
|
|
fn test-tokenize-number {
|
|
var in-storage: gap-buffer
|
|
var in/esi: (addr gap-buffer) <- address in-storage
|
|
initialize-gap-buffer-with in, "123 a"
|
|
#
|
|
var stream-storage: (stream token 0x10)
|
|
var stream/edi: (addr stream token) <- address stream-storage
|
|
#
|
|
var trace-storage: trace
|
|
var trace/edx: (addr trace) <- address trace-storage
|
|
initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
|
|
tokenize in, stream, trace
|
|
#
|
|
var curr-token-storage: token
|
|
var curr-token/ebx: (addr token) <- address curr-token-storage
|
|
read-from-stream stream, curr-token
|
|
var curr-token-type/eax: (addr int) <- get curr-token, type
|
|
check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-number/before-indent-type"
|
|
var curr-token-data/eax: (addr int) <- get curr-token, number-data
|
|
check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-number/before-indent"
|
|
read-from-stream stream, curr-token
|
|
var number?/eax: boolean <- number-token? curr-token
|
|
check number?, "F - test-tokenize-number"
|
|
var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
|
|
var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
|
|
check-stream-equal curr-token-data, "123", "F - test-tokenize-number: value"
|
|
}
|
|
|
|
fn test-tokenize-negative-number {
|
|
var in-storage: gap-buffer
|
|
var in/esi: (addr gap-buffer) <- address in-storage
|
|
initialize-gap-buffer-with in, "-123 a"
|
|
#
|
|
var stream-storage: (stream token 0x10)
|
|
var stream/edi: (addr stream token) <- address stream-storage
|
|
#
|
|
var trace-storage: trace
|
|
var trace/edx: (addr trace) <- address trace-storage
|
|
initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
|
|
tokenize in, stream, trace
|
|
#
|
|
var curr-token-storage: token
|
|
var curr-token/ebx: (addr token) <- address curr-token-storage
|
|
read-from-stream stream, curr-token
|
|
var curr-token-type/eax: (addr int) <- get curr-token, type
|
|
check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-negative-number/before-indent-type"
|
|
var curr-token-data/eax: (addr int) <- get curr-token, number-data
|
|
check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-negative-number/before-indent"
|
|
read-from-stream stream, curr-token
|
|
var number?/eax: boolean <- number-token? curr-token
|
|
check number?, "F - test-tokenize-negative-number"
|
|
var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
|
|
var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
|
|
check-stream-equal curr-token-data, "-123", "F - test-tokenize-negative-number: value"
|
|
}
|
|
|
|
fn test-tokenize-quote {
|
|
var in-storage: gap-buffer
|
|
var in/esi: (addr gap-buffer) <- address in-storage
|
|
initialize-gap-buffer-with in, "'(a)"
|
|
#
|
|
var stream-storage: (stream token 0x10)
|
|
var stream/edi: (addr stream token) <- address stream-storage
|
|
#
|
|
var trace-storage: trace
|
|
var trace/edx: (addr trace) <- address trace-storage
|
|
initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
|
|
tokenize in, stream, trace
|
|
#
|
|
var curr-token-storage: token
|
|
var curr-token/ebx: (addr token) <- address curr-token-storage
|
|
read-from-stream stream, curr-token
|
|
var curr-token-type/eax: (addr int) <- get curr-token, type
|
|
check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-quote/before-indent-type"
|
|
var curr-token-data/eax: (addr int) <- get curr-token, number-data
|
|
check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-quote/before-indent"
|
|
read-from-stream stream, curr-token
|
|
var quote?/eax: boolean <- quote-token? curr-token
|
|
check quote?, "F - test-tokenize-quote: quote"
|
|
read-from-stream stream, curr-token
|
|
var open-paren?/eax: boolean <- open-paren-token? curr-token
|
|
check open-paren?, "F - test-tokenize-quote: open paren"
|
|
read-from-stream stream, curr-token # skip a
|
|
read-from-stream stream, curr-token
|
|
var close-paren?/eax: boolean <- close-paren-token? curr-token
|
|
check close-paren?, "F - test-tokenize-quote: close paren"
|
|
}
|
|
|
|
fn test-tokenize-backquote {
|
|
var in-storage: gap-buffer
|
|
var in/esi: (addr gap-buffer) <- address in-storage
|
|
initialize-gap-buffer-with in, "`(a)"
|
|
#
|
|
var stream-storage: (stream token 0x10)
|
|
var stream/edi: (addr stream token) <- address stream-storage
|
|
#
|
|
var trace-storage: trace
|
|
var trace/edx: (addr trace) <- address trace-storage
|
|
initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
|
|
tokenize in, stream, trace
|
|
#
|
|
var curr-token-storage: token
|
|
var curr-token/ebx: (addr token) <- address curr-token-storage
|
|
read-from-stream stream, curr-token
|
|
var curr-token-type/eax: (addr int) <- get curr-token, type
|
|
check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-backquote/before-indent-type"
|
|
var curr-token-data/eax: (addr int) <- get curr-token, number-data
|
|
check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-backquote/before-indent"
|
|
read-from-stream stream, curr-token
|
|
var backquote?/eax: boolean <- backquote-token? curr-token
|
|
check backquote?, "F - test-tokenize-backquote: backquote"
|
|
read-from-stream stream, curr-token
|
|
var open-paren?/eax: boolean <- open-paren-token? curr-token
|
|
check open-paren?, "F - test-tokenize-backquote: open paren"
|
|
read-from-stream stream, curr-token # skip a
|
|
read-from-stream stream, curr-token
|
|
var close-paren?/eax: boolean <- close-paren-token? curr-token
|
|
check close-paren?, "F - test-tokenize-backquote: close paren"
|
|
}
|
|
|
|
fn test-tokenize-unquote {
|
|
var in-storage: gap-buffer
|
|
var in/esi: (addr gap-buffer) <- address in-storage
|
|
initialize-gap-buffer-with in, ",(a)"
|
|
#
|
|
var stream-storage: (stream token 0x10)
|
|
var stream/edi: (addr stream token) <- address stream-storage
|
|
#
|
|
var trace-storage: trace
|
|
var trace/edx: (addr trace) <- address trace-storage
|
|
initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
|
|
tokenize in, stream, trace
|
|
#
|
|
var curr-token-storage: token
|
|
var curr-token/ebx: (addr token) <- address curr-token-storage
|
|
read-from-stream stream, curr-token
|
|
var curr-token-type/eax: (addr int) <- get curr-token, type
|
|
check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-unquote/before-indent-type"
|
|
var curr-token-data/eax: (addr int) <- get curr-token, number-data
|
|
check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-unquote/before-indent"
|
|
read-from-stream stream, curr-token
|
|
var unquote?/eax: boolean <- unquote-token? curr-token
|
|
check unquote?, "F - test-tokenize-unquote: unquote"
|
|
read-from-stream stream, curr-token
|
|
var open-paren?/eax: boolean <- open-paren-token? curr-token
|
|
check open-paren?, "F - test-tokenize-unquote: open paren"
|
|
read-from-stream stream, curr-token # skip a
|
|
read-from-stream stream, curr-token
|
|
var close-paren?/eax: boolean <- close-paren-token? curr-token
|
|
check close-paren?, "F - test-tokenize-unquote: close paren"
|
|
}
|
|
|
|
fn test-tokenize-unquote-splice {
|
|
var in-storage: gap-buffer
|
|
var in/esi: (addr gap-buffer) <- address in-storage
|
|
initialize-gap-buffer-with in, ",@a"
|
|
#
|
|
var stream-storage: (stream token 0x10)
|
|
var stream/edi: (addr stream token) <- address stream-storage
|
|
#
|
|
var trace-storage: trace
|
|
var trace/edx: (addr trace) <- address trace-storage
|
|
initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
|
|
tokenize in, stream, trace
|
|
#
|
|
var curr-token-storage: token
|
|
var curr-token/ebx: (addr token) <- address curr-token-storage
|
|
read-from-stream stream, curr-token
|
|
var curr-token-type/eax: (addr int) <- get curr-token, type
|
|
check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-unquote-splice/before-indent-type"
|
|
var curr-token-data/eax: (addr int) <- get curr-token, number-data
|
|
check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-unquote-splice/before-indent"
|
|
read-from-stream stream, curr-token
|
|
var unquote-splice?/eax: boolean <- unquote-splice-token? curr-token
|
|
check unquote-splice?, "F - test-tokenize-unquote-splice: unquote-splice"
|
|
}
|
|
|
|
fn test-tokenize-dotted-list {
|
|
var in-storage: gap-buffer
|
|
var in/esi: (addr gap-buffer) <- address in-storage
|
|
initialize-gap-buffer-with in, "(a . b)"
|
|
#
|
|
var stream-storage: (stream token 0x10)
|
|
var stream/edi: (addr stream token) <- address stream-storage
|
|
#
|
|
var trace-storage: trace
|
|
var trace/edx: (addr trace) <- address trace-storage
|
|
initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
|
|
tokenize in, stream, trace
|
|
#
|
|
var curr-token-storage: token
|
|
var curr-token/ebx: (addr token) <- address curr-token-storage
|
|
read-from-stream stream, curr-token
|
|
var curr-token-type/eax: (addr int) <- get curr-token, type
|
|
check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-dotted-list/before-indent-type"
|
|
var curr-token-data/eax: (addr int) <- get curr-token, number-data
|
|
check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-dotted-list/before-indent"
|
|
read-from-stream stream, curr-token
|
|
var open-paren?/eax: boolean <- open-paren-token? curr-token
|
|
check open-paren?, "F - test-tokenize-dotted-list: open paren"
|
|
read-from-stream stream, curr-token # skip a
|
|
read-from-stream stream, curr-token
|
|
var dot?/eax: boolean <- dot-token? curr-token
|
|
check dot?, "F - test-tokenize-dotted-list: dot"
|
|
read-from-stream stream, curr-token # skip b
|
|
read-from-stream stream, curr-token
|
|
var close-paren?/eax: boolean <- close-paren-token? curr-token
|
|
check close-paren?, "F - test-tokenize-dotted-list: close paren"
|
|
}
|
|
|
|
# double quotes with zero escaping support
|
|
fn test-tokenize-stream-literal {
|
|
var in-storage: gap-buffer
|
|
var in/esi: (addr gap-buffer) <- address in-storage
|
|
initialize-gap-buffer-with in, "\"abc def\""
|
|
#
|
|
var stream-storage: (stream token 0x10)
|
|
var stream/edi: (addr stream token) <- address stream-storage
|
|
#
|
|
var trace-storage: trace
|
|
var trace/edx: (addr trace) <- address trace-storage
|
|
initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
|
|
tokenize in, stream, trace
|
|
#
|
|
var curr-token-storage: token
|
|
var curr-token/ebx: (addr token) <- address curr-token-storage
|
|
read-from-stream stream, curr-token
|
|
var curr-token-type/eax: (addr int) <- get curr-token, type
|
|
check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-stream-literal/before-indent-type"
|
|
var curr-token-data/eax: (addr int) <- get curr-token, number-data
|
|
check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-stream-literal/before-indent"
|
|
read-from-stream stream, curr-token
|
|
var stream?/eax: boolean <- stream-token? curr-token
|
|
check stream?, "F - test-tokenize-stream-literal: type"
|
|
var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
|
|
var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
|
|
var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
|
|
check data-equal?, "F - test-tokenize-stream-literal"
|
|
var empty?/eax: boolean <- stream-empty? stream
|
|
check empty?, "F - test-tokenize-stream-literal: empty?"
|
|
}
|
|
|
|
# alternative syntax for strings with balancing brackets
|
|
fn test-tokenize-balanced-stream-literal {
|
|
var in-storage: gap-buffer
|
|
var in/esi: (addr gap-buffer) <- address in-storage
|
|
initialize-gap-buffer-with in, "[abc def]"
|
|
#
|
|
var stream-storage: (stream token 0x10)
|
|
var stream/edi: (addr stream token) <- address stream-storage
|
|
#
|
|
var trace-storage: trace
|
|
var trace/edx: (addr trace) <- address trace-storage
|
|
initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
|
|
tokenize in, stream, trace
|
|
#
|
|
var curr-token-storage: token
|
|
var curr-token/ebx: (addr token) <- address curr-token-storage
|
|
read-from-stream stream, curr-token
|
|
var curr-token-type/eax: (addr int) <- get curr-token, type
|
|
check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-balanced-stream-literal/before-indent-type"
|
|
var curr-token-data/eax: (addr int) <- get curr-token, number-data
|
|
check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-balanced-stream-literal/before-indent"
|
|
read-from-stream stream, curr-token
|
|
var stream?/eax: boolean <- stream-token? curr-token
|
|
check stream?, "F - test-tokenize-stream-literal: type"
|
|
var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
|
|
var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
|
|
var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
|
|
check data-equal?, "F - test-tokenize-balanced-stream-literal"
|
|
var empty?/eax: boolean <- stream-empty? stream
|
|
check empty?, "F - test-tokenize-balanced-stream-literal: empty?"
|
|
}
|
|
|
|
fn test-tokenize-nested-stream-literal {
|
|
var in-storage: gap-buffer
|
|
var in/esi: (addr gap-buffer) <- address in-storage
|
|
initialize-gap-buffer-with in, "[abc [def]]"
|
|
#
|
|
var stream-storage: (stream token 0x10)
|
|
var stream/edi: (addr stream token) <- address stream-storage
|
|
#
|
|
var trace-storage: trace
|
|
var trace/edx: (addr trace) <- address trace-storage
|
|
initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
|
|
tokenize in, stream, trace
|
|
#
|
|
var curr-token-storage: token
|
|
var curr-token/ebx: (addr token) <- address curr-token-storage
|
|
read-from-stream stream, curr-token
|
|
var curr-token-type/eax: (addr int) <- get curr-token, type
|
|
check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-nested-stream-literal/before-indent-type"
|
|
var curr-token-data/eax: (addr int) <- get curr-token, number-data
|
|
check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-nested-stream-literal/before-indent"
|
|
read-from-stream stream, curr-token
|
|
var stream?/eax: boolean <- stream-token? curr-token
|
|
check stream?, "F - test-tokenize-stream-literal: type"
|
|
var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
|
|
var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
|
|
var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc [def]"
|
|
check data-equal?, "F - test-tokenize-nested-stream-literal"
|
|
var empty?/eax: boolean <- stream-empty? stream
|
|
check empty?, "F - test-tokenize-nested-stream-literal: empty?"
|
|
}
|
|
|
|
fn test-tokenize-stream-literal-in-tree {
|
|
var in-storage: gap-buffer
|
|
var in/esi: (addr gap-buffer) <- address in-storage
|
|
initialize-gap-buffer-with in, "([abc def])"
|
|
#
|
|
var stream-storage: (stream token 0x10)
|
|
var stream/edi: (addr stream token) <- address stream-storage
|
|
#
|
|
var trace-storage: trace
|
|
var trace/edx: (addr trace) <- address trace-storage
|
|
initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
|
|
tokenize in, stream, trace
|
|
#
|
|
var curr-token-storage: token
|
|
var curr-token/ebx: (addr token) <- address curr-token-storage
|
|
read-from-stream stream, curr-token
|
|
var curr-token-type/eax: (addr int) <- get curr-token, type
|
|
check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-stream-literal-in-tree/before-indent-type"
|
|
var curr-token-data/eax: (addr int) <- get curr-token, number-data
|
|
check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-stream-literal-in-tree/before-indent"
|
|
read-from-stream stream, curr-token
|
|
var bracket?/eax: boolean <- bracket-token? curr-token
|
|
check bracket?, "F - test-tokenize-stream-literal-in-tree: open paren"
|
|
read-from-stream stream, curr-token
|
|
var stream?/eax: boolean <- stream-token? curr-token
|
|
check stream?, "F - test-tokenize-stream-literal-in-tree: type"
|
|
var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
|
|
var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
|
|
var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
|
|
check data-equal?, "F - test-tokenize-stream-literal-in-tree"
|
|
read-from-stream stream, curr-token
|
|
var bracket?/eax: boolean <- bracket-token? curr-token
|
|
check bracket?, "F - test-tokenize-stream-literal-in-tree: close paren"
|
|
var empty?/eax: boolean <- stream-empty? stream
|
|
check empty?, "F - test-tokenize-stream-literal-in-tree: empty?"
|
|
}
|
|
|
|
fn test-tokenize-indent {
|
|
var in-storage: gap-buffer
|
|
var in/esi: (addr gap-buffer) <- address in-storage
|
|
initialize-gap-buffer-with in, "abc\n def"
|
|
#
|
|
var stream-storage: (stream token 0x10)
|
|
var stream/edi: (addr stream token) <- address stream-storage
|
|
#
|
|
var trace-storage: trace
|
|
var trace/edx: (addr trace) <- address trace-storage
|
|
initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
|
|
tokenize in, stream, trace
|
|
#
|
|
var curr-token-storage: token
|
|
var curr-token/ebx: (addr token) <- address curr-token-storage
|
|
read-from-stream stream, curr-token
|
|
var curr-token-type/eax: (addr int) <- get curr-token, type
|
|
check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-indent/before-indent-type"
|
|
var curr-token-data/eax: (addr int) <- get curr-token, number-data
|
|
check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-indent/before-indent"
|
|
read-from-stream stream, curr-token
|
|
var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
|
|
var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
|
|
check-stream-equal curr-token-data, "abc", "F - test-tokenize-indent/before"
|
|
#
|
|
read-from-stream stream, curr-token
|
|
var curr-token-type/eax: (addr int) <- get curr-token, type
|
|
check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-indent/type"
|
|
var curr-token-data/eax: (addr int) <- get curr-token, number-data
|
|
check-ints-equal *curr-token-data, 2/spaces, "F - test-tokenize-indent"
|
|
#
|
|
read-from-stream stream, curr-token
|
|
var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
|
|
var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
|
|
check-stream-equal curr-token-data, "def", "F - test-tokenize-indent/after"
|
|
}
|
|
|
|
# caller is responsible for threading start-of-line? between calls to next-token
|
|
# 'in' may contain whitespace if start-of-line?
|
|
fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, trace: (addr trace) -> _/edi: boolean {
|
|
trace-text trace, "tokenize", "next-token"
|
|
trace-lower trace
|
|
# save an indent token if necessary
|
|
{
|
|
compare start-of-line?, 0/false
|
|
break-if-=
|
|
next-indent-token in, out, trace # might not be returned
|
|
}
|
|
skip-spaces-from-gap-buffer in
|
|
var g/eax: code-point-utf8 <- peek-from-gap-buffer in
|
|
{
|
|
compare g, 0x23/comment
|
|
break-if-!=
|
|
skip-rest-of-line in
|
|
}
|
|
var g/eax: code-point-utf8 <- peek-from-gap-buffer in
|
|
{
|
|
compare g, 0xa/newline
|
|
break-if-!=
|
|
trace-text trace, "tokenize", "newline"
|
|
g <- read-from-gap-buffer in
|
|
initialize-skip-token out # might drop indent if that's all there was in this line
|
|
trace-higher trace
|
|
return 1/at-start-of-line
|
|
}
|
|
{
|
|
compare start-of-line?, 0/false
|
|
break-if-=
|
|
# still here? no comment or newline? return saved indent
|
|
trace-higher trace
|
|
return 0/not-at-start-of-line
|
|
}
|
|
{
|
|
var done?/eax: boolean <- gap-buffer-scan-done? in
|
|
compare done?, 0/false
|
|
break-if-=
|
|
trace-text trace, "tokenize", "end"
|
|
initialize-skip-token out
|
|
trace-higher trace
|
|
return 1/at-start-of-line
|
|
}
|
|
var _g/eax: code-point-utf8 <- peek-from-gap-buffer in
|
|
var g/ecx: code-point-utf8 <- copy _g
|
|
{
|
|
var should-trace?/eax: boolean <- should-trace? trace
|
|
compare should-trace?, 0/false
|
|
break-if-=
|
|
var stream-storage: (stream byte 0x40)
|
|
var stream/esi: (addr stream byte) <- address stream-storage
|
|
write stream, "next: "
|
|
var gval/eax: int <- copy g
|
|
write-int32-hex stream, gval
|
|
trace trace, "tokenize", stream
|
|
}
|
|
$next-token:case: {
|
|
# double quotes begin streams
|
|
{
|
|
compare g, 0x22/double-quote
|
|
break-if-!=
|
|
var dummy/eax: code-point-utf8 <- read-from-gap-buffer in # skip
|
|
next-stream-token in, out, trace
|
|
break $next-token:case
|
|
}
|
|
# open square brackets begin balanced streams
|
|
{
|
|
compare g, 0x5b/open-square-bracket
|
|
break-if-!=
|
|
var dummy/eax: code-point-utf8 <- read-from-gap-buffer in # skip open bracket
|
|
next-balanced-stream-token in, out, trace
|
|
break $next-token:case
|
|
}
|
|
# other symbol char
|
|
{
|
|
var symbol?/eax: boolean <- symbol-code-point-utf8? g
|
|
compare symbol?, 0/false
|
|
break-if-=
|
|
next-symbol-token in, out, trace
|
|
break $next-token:case
|
|
}
|
|
# unbalanced close square brackets are errors
|
|
{
|
|
compare g, 0x5d/close-square-bracket
|
|
break-if-!=
|
|
error trace, "unbalanced ']'"
|
|
return start-of-line?
|
|
}
|
|
# other brackets are always single-char tokens
|
|
{
|
|
var bracket?/eax: boolean <- bracket-code-point-utf8? g
|
|
compare bracket?, 0/false
|
|
break-if-=
|
|
var g/eax: code-point-utf8 <- read-from-gap-buffer in
|
|
next-bracket-token g, out, trace
|
|
break $next-token:case
|
|
}
|
|
# quote
|
|
{
|
|
compare g, 0x27/single-quote
|
|
break-if-!=
|
|
var g/eax: code-point-utf8 <- read-from-gap-buffer in # consume
|
|
initialize-token out, "'"
|
|
break $next-token:case
|
|
}
|
|
# backquote
|
|
{
|
|
compare g, 0x60/backquote
|
|
break-if-!=
|
|
var g/eax: code-point-utf8 <- read-from-gap-buffer in # consume
|
|
initialize-token out, "`"
|
|
break $next-token:case
|
|
}
|
|
# unquote
|
|
{
|
|
compare g, 0x2c/comma
|
|
break-if-!=
|
|
var g/eax: code-point-utf8 <- read-from-gap-buffer in # consume
|
|
# check for unquote-splice
|
|
{
|
|
g <- peek-from-gap-buffer in
|
|
compare g, 0x40/at-sign
|
|
break-if-!=
|
|
g <- read-from-gap-buffer in
|
|
initialize-token out, ",@"
|
|
break $next-token:case
|
|
}
|
|
initialize-token out, ","
|
|
break $next-token:case
|
|
}
|
|
set-cursor-position 0/screen, 0x40 0x20
|
|
{
|
|
var foo/eax: int <- copy g
|
|
draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, foo, 7/fg 0/bg
|
|
}
|
|
abort "unknown token type"
|
|
}
|
|
trace-higher trace
|
|
{
|
|
var should-trace?/eax: boolean <- should-trace? trace
|
|
compare should-trace?, 0/false
|
|
break-if-=
|
|
var stream-storage: (stream byte 0x400) # maximum possible token size (next-stream-token)
|
|
var stream/eax: (addr stream byte) <- address stream-storage
|
|
write stream, "=> "
|
|
write-token-text-data stream, out
|
|
trace trace, "tokenize", stream
|
|
}
|
|
return start-of-line?
|
|
}
|
|
|
|
fn next-symbol-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
|
|
trace-text trace, "tokenize", "looking for a symbol"
|
|
trace-lower trace
|
|
var out/eax: (addr token) <- copy _out
|
|
var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
|
|
populate-stream out-data-ah, 0x40/max-symbol-size
|
|
var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
|
|
var out-data/edi: (addr stream byte) <- copy _out-data
|
|
$next-symbol-token:loop: {
|
|
var done?/eax: boolean <- gap-buffer-scan-done? in
|
|
compare done?, 0/false
|
|
break-if-!=
|
|
var g/eax: code-point-utf8 <- peek-from-gap-buffer in
|
|
{
|
|
{
|
|
var should-trace?/eax: boolean <- should-trace? trace
|
|
compare should-trace?, 0/false
|
|
}
|
|
break-if-=
|
|
var stream-storage: (stream byte 0x40)
|
|
var stream/esi: (addr stream byte) <- address stream-storage
|
|
write stream, "next: "
|
|
var gval/eax: int <- copy g
|
|
write-int32-hex stream, gval
|
|
trace trace, "tokenize", stream
|
|
}
|
|
# if non-symbol, return
|
|
{
|
|
var symbol-code-point-utf8?/eax: boolean <- symbol-code-point-utf8? g
|
|
compare symbol-code-point-utf8?, 0/false
|
|
break-if-!=
|
|
trace-text trace, "tokenize", "stop"
|
|
break $next-symbol-token:loop
|
|
}
|
|
var g/eax: code-point-utf8 <- read-from-gap-buffer in
|
|
write-code-point-utf8 out-data, g
|
|
loop
|
|
}
|
|
trace-higher trace
|
|
{
|
|
var should-trace?/eax: boolean <- should-trace? trace
|
|
compare should-trace?, 0/false
|
|
break-if-=
|
|
var stream-storage: (stream byte 0x40)
|
|
var stream/esi: (addr stream byte) <- address stream-storage
|
|
write stream, "=> "
|
|
rewind-stream out-data
|
|
write-stream stream, out-data
|
|
trace trace, "tokenize", stream
|
|
}
|
|
}
|
|
|
|
fn next-number-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
|
|
trace-text trace, "tokenize", "looking for a number"
|
|
trace-lower trace
|
|
var out/eax: (addr token) <- copy _out
|
|
var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
|
|
populate-stream out-data-ah, 0x40
|
|
var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
|
|
var out-data/edi: (addr stream byte) <- copy _out-data
|
|
$next-number-token:check-minus: {
|
|
var g/eax: code-point-utf8 <- peek-from-gap-buffer in
|
|
compare g, 0x2d/minus
|
|
g <- read-from-gap-buffer in # consume
|
|
write-code-point-utf8 out-data, g
|
|
}
|
|
$next-number-token:loop: {
|
|
var done?/eax: boolean <- gap-buffer-scan-done? in
|
|
compare done?, 0/false
|
|
break-if-!=
|
|
var g/eax: code-point-utf8 <- peek-from-gap-buffer in
|
|
{
|
|
{
|
|
var should-trace?/eax: boolean <- should-trace? trace
|
|
compare should-trace?, 0/false
|
|
}
|
|
break-if-=
|
|
var stream-storage: (stream byte 0x40)
|
|
var stream/esi: (addr stream byte) <- address stream-storage
|
|
write stream, "next: "
|
|
var gval/eax: int <- copy g
|
|
write-int32-hex stream, gval
|
|
trace trace, "tokenize", stream
|
|
}
|
|
# if not symbol code-point-utf8, return
|
|
{
|
|
var symbol-code-point-utf8?/eax: boolean <- symbol-code-point-utf8? g
|
|
compare symbol-code-point-utf8?, 0/false
|
|
break-if-!=
|
|
trace-text trace, "tokenize", "stop"
|
|
break $next-number-token:loop
|
|
}
|
|
# if not digit code-point-utf8, abort
|
|
{
|
|
var digit?/eax: boolean <- decimal-digit? g
|
|
compare digit?, 0/false
|
|
break-if-!=
|
|
error trace, "invalid number"
|
|
return
|
|
}
|
|
trace-text trace, "tokenize", "append"
|
|
var g/eax: code-point-utf8 <- read-from-gap-buffer in
|
|
write-code-point-utf8 out-data, g
|
|
loop
|
|
}
|
|
trace-higher trace
|
|
}
|
|
|
|
fn next-stream-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
|
|
trace-text trace, "tokenize", "stream"
|
|
var out/edi: (addr token) <- copy _out
|
|
var out-type/eax: (addr int) <- get out, type
|
|
copy-to *out-type, 1/stream
|
|
var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
|
|
# stream tokens contain whole function definitions on boot, so we always
|
|
# give them plenty of space
|
|
populate-stream out-data-ah, 0x400/max-definition-size=1KB
|
|
var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
|
|
var out-data/edi: (addr stream byte) <- copy _out-data
|
|
{
|
|
var empty?/eax: boolean <- gap-buffer-scan-done? in
|
|
compare empty?, 0/false
|
|
{
|
|
break-if-=
|
|
error trace, "unbalanced '\"'"
|
|
return
|
|
}
|
|
var g/eax: code-point-utf8 <- read-from-gap-buffer in
|
|
compare g, 0x22/double-quote
|
|
break-if-=
|
|
write-code-point-utf8 out-data, g
|
|
loop
|
|
}
|
|
{
|
|
var should-trace?/eax: boolean <- should-trace? trace
|
|
compare should-trace?, 0/false
|
|
break-if-=
|
|
var stream-storage: (stream byte 0x400) # max-definition-size
|
|
var stream/esi: (addr stream byte) <- address stream-storage
|
|
write stream, "=> "
|
|
rewind-stream out-data
|
|
write-stream-immutable stream, out-data
|
|
trace trace, "tokenize", stream
|
|
}
|
|
}
|
|
|
|
fn next-balanced-stream-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
|
|
trace-text trace, "tokenize", "balanced stream"
|
|
var out/edi: (addr token) <- copy _out
|
|
var out-type/eax: (addr int) <- get out, type
|
|
copy-to *out-type, 1/stream
|
|
var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
|
|
var bracket-count: int
|
|
# stream tokens contain whole function definitions on boot, so we always
|
|
# give them plenty of space
|
|
populate-stream out-data-ah, 0x40000/max-definition-size=256KB
|
|
var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
|
|
var out-data/edi: (addr stream byte) <- copy _out-data
|
|
$next-balanced-stream-token:loop: {
|
|
var empty?/eax: boolean <- gap-buffer-scan-done? in
|
|
compare empty?, 0/false
|
|
{
|
|
break-if-=
|
|
error trace, "unbalanced '['"
|
|
return
|
|
}
|
|
var g/eax: code-point-utf8 <- read-from-gap-buffer in
|
|
{
|
|
compare g, 0x5b/open-square-bracket
|
|
break-if-!=
|
|
increment bracket-count
|
|
}
|
|
{
|
|
compare g, 0x5d/close-square-bracket
|
|
break-if-!=
|
|
compare bracket-count, 0
|
|
break-if-= $next-balanced-stream-token:loop
|
|
decrement bracket-count
|
|
}
|
|
write-code-point-utf8 out-data, g
|
|
loop
|
|
}
|
|
{
|
|
var should-trace?/eax: boolean <- should-trace? trace
|
|
compare should-trace?, 0/false
|
|
break-if-=
|
|
var stream-storage: (stream byte 0x400) # max-definition-size
|
|
var stream/esi: (addr stream byte) <- address stream-storage
|
|
write stream, "=> "
|
|
rewind-stream out-data
|
|
write-stream-immutable stream, out-data
|
|
trace trace, "tokenize", stream
|
|
}
|
|
}
|
|
|
|
fn next-bracket-token g: code-point-utf8, _out: (addr token), trace: (addr trace) {
|
|
trace-text trace, "tokenize", "bracket"
|
|
var out/eax: (addr token) <- copy _out
|
|
var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
|
|
populate-stream out-data-ah, 0x40
|
|
var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
|
|
var out-data/edi: (addr stream byte) <- copy _out-data
|
|
write-code-point-utf8 out-data, g
|
|
{
|
|
var should-trace?/eax: boolean <- should-trace? trace
|
|
compare should-trace?, 0/false
|
|
break-if-=
|
|
var stream-storage: (stream byte 0x40)
|
|
var stream/esi: (addr stream byte) <- address stream-storage
|
|
write stream, "=> "
|
|
rewind-stream out-data
|
|
write-stream stream, out-data
|
|
trace trace, "tokenize", stream
|
|
}
|
|
}
|
|
|
|
fn skip-rest-of-line in: (addr gap-buffer) {
|
|
{
|
|
var done?/eax: boolean <- gap-buffer-scan-done? in
|
|
compare done?, 0/false
|
|
break-if-!=
|
|
var g/eax: code-point-utf8 <- peek-from-gap-buffer in
|
|
compare g, 0xa/newline
|
|
break-if-=
|
|
g <- read-from-gap-buffer in # consume
|
|
loop
|
|
}
|
|
}
|
|
|
|
fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
|
|
trace-text trace, "tokenize", "indent"
|
|
trace-lower trace
|
|
var out/edi: (addr token) <- copy _out
|
|
var out-type/eax: (addr int) <- get out, type
|
|
copy-to *out-type, 3/indent
|
|
var dest/edi: (addr int) <- get out, number-data
|
|
copy-to *dest, 0
|
|
{
|
|
var done?/eax: boolean <- gap-buffer-scan-done? in
|
|
compare done?, 0/false
|
|
break-if-!=
|
|
var g/eax: code-point-utf8 <- peek-from-gap-buffer in
|
|
{
|
|
{
|
|
var should-trace?/eax: boolean <- should-trace? trace
|
|
compare should-trace?, 0/false
|
|
}
|
|
break-if-=
|
|
var stream-storage: (stream byte 0x40)
|
|
var stream/esi: (addr stream byte) <- address stream-storage
|
|
write stream, "next: "
|
|
var gval/eax: int <- copy g
|
|
write-int32-hex stream, gval
|
|
trace trace, "tokenize", stream
|
|
}
|
|
# if non-space, break
|
|
compare g, 0x20/space
|
|
break-if-!=
|
|
g <- read-from-gap-buffer in
|
|
increment *dest
|
|
loop
|
|
}
|
|
trace-higher trace
|
|
{
|
|
var should-trace?/eax: boolean <- should-trace? trace
|
|
compare should-trace?, 0/false
|
|
break-if-=
|
|
var stream-storage: (stream byte 0x40)
|
|
var stream/esi: (addr stream byte) <- address stream-storage
|
|
write stream, "=> indent "
|
|
write-int32-hex stream, *dest
|
|
trace trace, "tokenize", stream
|
|
}
|
|
}
|
|
|
|
# Mu carves up the space of code-point-utf8s into 4 categories:
|
|
# whitespace
|
|
# quotes and unquotes (from a Lisp perspective; doesn't include double
|
|
# quotes or other Unicode quotes)
|
|
# operators
|
|
# symbols
|
|
# (Numbers have their own parsing rules that don't fit cleanly in this
|
|
# partition.)
|
|
#
|
|
# During tokenization operators and symbols are treated identically.
|
|
# A later phase digs into that nuance.
|
|
|
|
fn symbol-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
|
|
var whitespace?/eax: boolean <- whitespace-code-point-utf8? g
|
|
compare whitespace?, 0/false
|
|
{
|
|
break-if-=
|
|
return 0/false
|
|
}
|
|
var quote-or-unquote?/eax: boolean <- quote-or-unquote-code-point-utf8? g
|
|
compare quote-or-unquote?, 0/false
|
|
{
|
|
break-if-=
|
|
return 0/false
|
|
}
|
|
var bracket?/eax: boolean <- bracket-code-point-utf8? g
|
|
compare bracket?, 0/false
|
|
{
|
|
break-if-=
|
|
return 0/false
|
|
}
|
|
compare g, 0x23/hash # comments get filtered out
|
|
{
|
|
break-if-!=
|
|
return 0/false
|
|
}
|
|
compare g, 0x22/double-quote # double quotes reserved for now
|
|
{
|
|
break-if-!=
|
|
return 0/false
|
|
}
|
|
return 1/true
|
|
}
|
|
|
|
fn whitespace-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
|
|
compare g, 9/tab
|
|
{
|
|
break-if-!=
|
|
return 1/true
|
|
}
|
|
compare g, 0xa/newline
|
|
{
|
|
break-if-!=
|
|
return 1/true
|
|
}
|
|
compare g, 0x20/space
|
|
{
|
|
break-if-!=
|
|
return 1/true
|
|
}
|
|
return 0/false
|
|
}
|
|
|
|
fn quote-or-unquote-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
|
|
compare g, 0x27/single-quote
|
|
{
|
|
break-if-!=
|
|
return 1/true
|
|
}
|
|
compare g, 0x60/backquote
|
|
{
|
|
break-if-!=
|
|
return 1/true
|
|
}
|
|
compare g, 0x2c/comma
|
|
{
|
|
break-if-!=
|
|
return 1/true
|
|
}
|
|
compare g, 0x40/at-sign
|
|
{
|
|
break-if-!=
|
|
return 1/true
|
|
}
|
|
return 0/false
|
|
}
|
|
|
|
fn bracket-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
|
|
compare g, 0x28/open-paren
|
|
{
|
|
break-if-!=
|
|
return 1/true
|
|
}
|
|
compare g, 0x29/close-paren
|
|
{
|
|
break-if-!=
|
|
return 1/true
|
|
}
|
|
compare g, 0x5b/open-square-bracket
|
|
{
|
|
break-if-!=
|
|
return 1/true
|
|
}
|
|
compare g, 0x5d/close-square-bracket
|
|
{
|
|
break-if-!=
|
|
return 1/true
|
|
}
|
|
compare g, 0x7b/open-curly-bracket
|
|
{
|
|
break-if-!=
|
|
return 1/true
|
|
}
|
|
compare g, 0x7d/close-curly-bracket
|
|
{
|
|
break-if-!=
|
|
return 1/true
|
|
}
|
|
return 0/false
|
|
}
|
|
|
|
fn number-token? _self: (addr token) -> _/eax: boolean {
|
|
var self/eax: (addr token) <- copy _self
|
|
var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
|
|
var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
|
|
var in-data/ecx: (addr stream byte) <- copy _in-data
|
|
rewind-stream in-data
|
|
var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
|
|
# if '-', read another
|
|
{
|
|
compare g, 0x2d/minus
|
|
break-if-!=
|
|
g <- read-code-point-utf8 in-data
|
|
}
|
|
{
|
|
{
|
|
var result/eax: boolean <- decimal-digit? g
|
|
compare result, 0/false
|
|
break-if-!=
|
|
return 0/false
|
|
}
|
|
{
|
|
var done?/eax: boolean <- stream-empty? in-data
|
|
compare done?, 0/false
|
|
}
|
|
break-if-!=
|
|
g <- read-code-point-utf8 in-data
|
|
loop
|
|
}
|
|
return 1/true
|
|
}
|
|
|
|
fn bracket-token? _self: (addr token) -> _/eax: boolean {
|
|
var self/eax: (addr token) <- copy _self
|
|
{
|
|
var in-type/eax: (addr int) <- get self, type
|
|
compare *in-type, 1/stream
|
|
break-if-!=
|
|
# streams are never paren tokens
|
|
return 0/false
|
|
}
|
|
var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
|
|
var in-data/eax: (addr stream byte) <- lookup *in-data-ah
|
|
rewind-stream in-data
|
|
var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
|
|
var result/eax: boolean <- bracket-code-point-utf8? g
|
|
return result
|
|
}
|
|
|
|
fn quote-token? _self: (addr token) -> _/eax: boolean {
|
|
var self/eax: (addr token) <- copy _self
|
|
var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
|
|
var in-data/eax: (addr stream byte) <- lookup *in-data-ah
|
|
rewind-stream in-data
|
|
var result/eax: boolean <- stream-data-equal? in-data, "'"
|
|
return result
|
|
}
|
|
|
|
fn backquote-token? _self: (addr token) -> _/eax: boolean {
|
|
var self/eax: (addr token) <- copy _self
|
|
var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
|
|
var in-data/eax: (addr stream byte) <- lookup *in-data-ah
|
|
rewind-stream in-data
|
|
var result/eax: boolean <- stream-data-equal? in-data, "`"
|
|
return result
|
|
}
|
|
|
|
fn unquote-token? _self: (addr token) -> _/eax: boolean {
|
|
var self/eax: (addr token) <- copy _self
|
|
var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
|
|
var in-data/eax: (addr stream byte) <- lookup *in-data-ah
|
|
rewind-stream in-data
|
|
var result/eax: boolean <- stream-data-equal? in-data, ","
|
|
return result
|
|
}
|
|
|
|
fn unquote-splice-token? _self: (addr token) -> _/eax: boolean {
|
|
var self/eax: (addr token) <- copy _self
|
|
var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
|
|
var in-data/eax: (addr stream byte) <- lookup *in-data-ah
|
|
rewind-stream in-data
|
|
var result/eax: boolean <- stream-data-equal? in-data, ",@"
|
|
return result
|
|
}
|
|
|
|
fn open-paren-token? _self: (addr token) -> _/eax: boolean {
|
|
var self/eax: (addr token) <- copy _self
|
|
var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
|
|
var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
|
|
var in-data/ecx: (addr stream byte) <- copy _in-data
|
|
rewind-stream in-data
|
|
var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
|
|
compare g, 0x28/open-paren
|
|
{
|
|
break-if-!=
|
|
var result/eax: boolean <- stream-empty? in-data
|
|
return result
|
|
}
|
|
return 0/false
|
|
}
|
|
|
|
fn close-paren-token? _self: (addr token) -> _/eax: boolean {
|
|
var self/eax: (addr token) <- copy _self
|
|
var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
|
|
var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
|
|
var in-data/ecx: (addr stream byte) <- copy _in-data
|
|
rewind-stream in-data
|
|
var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
|
|
compare g, 0x29/close-paren
|
|
{
|
|
break-if-!=
|
|
var result/eax: boolean <- stream-empty? in-data
|
|
return result
|
|
}
|
|
return 0/false
|
|
}
|
|
|
|
fn dot-token? _self: (addr token) -> _/eax: boolean {
|
|
var self/eax: (addr token) <- copy _self
|
|
var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
|
|
var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
|
|
var in-data/ecx: (addr stream byte) <- copy _in-data
|
|
rewind-stream in-data
|
|
var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
|
|
compare g, 0x2e/dot
|
|
{
|
|
break-if-!=
|
|
var result/eax: boolean <- stream-empty? in-data
|
|
return result
|
|
}
|
|
return 0/false
|
|
}
|
|
|
|
fn test-dot-token {
|
|
var tmp-storage: (handle token)
|
|
var tmp-ah/eax: (addr handle token) <- address tmp-storage
|
|
allocate-token tmp-ah
|
|
var tmp/eax: (addr token) <- lookup *tmp-ah
|
|
initialize-token tmp, "."
|
|
var result/eax: boolean <- dot-token? tmp
|
|
check result, "F - test-dot-token"
|
|
}
|
|
|
|
fn stream-token? _self: (addr token) -> _/eax: boolean {
|
|
var self/eax: (addr token) <- copy _self
|
|
var in-type/eax: (addr int) <- get self, type
|
|
compare *in-type, 1/stream
|
|
{
|
|
break-if-=
|
|
return 0/false
|
|
}
|
|
return 1/true
|
|
}
|
|
|
|
fn skip-token? _self: (addr token) -> _/eax: boolean {
|
|
var self/eax: (addr token) <- copy _self
|
|
var in-type/eax: (addr int) <- get self, type
|
|
compare *in-type, 2/skip
|
|
{
|
|
break-if-=
|
|
return 0/false
|
|
}
|
|
return 1/true
|
|
}
|
|
|
|
fn indent-token? _self: (addr token) -> _/eax: boolean {
|
|
var self/eax: (addr token) <- copy _self
|
|
var in-type/eax: (addr int) <- get self, type
|
|
compare *in-type, 3/indent
|
|
{
|
|
break-if-=
|
|
return 0/false
|
|
}
|
|
return 1/true
|
|
}
|
|
|
|
fn allocate-token _self-ah: (addr handle token) {
|
|
var self-ah/eax: (addr handle token) <- copy _self-ah
|
|
allocate self-ah
|
|
var self/eax: (addr token) <- lookup *self-ah
|
|
var dest-ah/eax: (addr handle stream byte) <- get self, text-data
|
|
populate-stream dest-ah, 0x40/max-symbol-size
|
|
}
|
|
|
|
fn initialize-token _self: (addr token), val: (addr array byte) {
|
|
var self/eax: (addr token) <- copy _self
|
|
var dest-ah/eax: (addr handle stream byte) <- get self, text-data
|
|
populate-stream dest-ah, 0x40
|
|
var dest/eax: (addr stream byte) <- lookup *dest-ah
|
|
write dest, val
|
|
}
|
|
|
|
fn initialize-skip-token _self: (addr token) {
|
|
var self/eax: (addr token) <- copy _self
|
|
var self-type/eax: (addr int) <- get self, type
|
|
copy-to *self-type, 2/skip
|
|
}
|
|
|
|
fn write-token-text-data out: (addr stream byte), _self: (addr token) {
|
|
var self/eax: (addr token) <- copy _self
|
|
var data-ah/eax: (addr handle stream byte) <- get self, text-data
|
|
var data/eax: (addr stream byte) <- lookup *data-ah
|
|
rewind-stream data
|
|
write-stream out, data
|
|
}
|
|
|
|
fn tokens-equal? _a: (addr token), _b: (addr token) -> _/eax: boolean {
|
|
var a/edx: (addr token) <- copy _a
|
|
var b/ebx: (addr token) <- copy _b
|
|
var a-type-addr/eax: (addr int) <- get a, type
|
|
var a-type/eax: int <- copy *a-type-addr
|
|
var b-type-addr/ecx: (addr int) <- get b, type
|
|
compare a-type, *b-type-addr
|
|
{
|
|
break-if-=
|
|
return 0/false
|
|
}
|
|
compare a-type, 2/skip
|
|
{
|
|
break-if-!=
|
|
# skip tokens have no other data
|
|
return 1/true
|
|
}
|
|
compare a-type, 3/indent
|
|
{
|
|
break-if-!=
|
|
# indent tokens have no other data
|
|
var a-number-data-addr/eax: (addr int) <- get a, number-data
|
|
var a-number-data/eax: int <- copy *a-number-data-addr
|
|
var b-number-data-addr/ecx: (addr int) <- get b, number-data
|
|
compare a-number-data, *b-number-data-addr
|
|
{
|
|
break-if-=
|
|
return 0/false
|
|
}
|
|
return 1/true
|
|
}
|
|
var b-data-ah/eax: (addr handle stream byte) <- get b, text-data
|
|
var _b-data/eax: (addr stream byte) <- lookup *b-data-ah
|
|
var b-data/ebx: (addr stream byte) <- copy _b-data
|
|
var a-data-ah/eax: (addr handle stream byte) <- get a, text-data
|
|
var a-data/eax: (addr stream byte) <- lookup *a-data-ah
|
|
var data-match?/eax: boolean <- streams-data-equal? a-data, b-data
|
|
return data-match?
|
|
}
|
|
|
|
fn dump-token-from-cursor _t: (addr token) {
|
|
var t/esi: (addr token) <- copy _t
|
|
var type/eax: (addr int) <- get t, type
|
|
draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, *type, 7/fg 0/bg
|
|
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, " ", 7/fg 0/bg
|
|
var text-ah/eax: (addr handle stream byte) <- get t, text-data
|
|
var text/eax: (addr stream byte) <- lookup *text-ah
|
|
rewind-stream text
|
|
draw-stream-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, text, 7/fg 0/bg
|
|
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, " ", 7/fg 0/bg
|
|
var num/eax: (addr int) <- get t, number-data
|
|
draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, *num, 7/fg 0/bg
|
|
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "\n", 7/fg 0/bg
|
|
}
|