rename grapheme to code-point-utf8

Longer name, but it doesn't lie. We have no data structure right now for
combining multiple code points. And it makes no sense for the notion of
a grapheme to conflate its Unicode encoding.
This commit is contained in:
Kartik K. Agaram 2021-11-09 08:12:11 -08:00
parent d1808995b2
commit d253a31828
55 changed files with 1403 additions and 1408 deletions

View File

@ -324,7 +324,7 @@ test-write-int32-decimal-negative-multiple-digits:
# . end
c3/return
decimal-digit?: # c: grapheme -> result/eax: boolean
decimal-digit?: # c: code-point-utf8 -> result/eax: boolean
# . prologue
55/push-ebp
89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp
@ -423,7 +423,7 @@ test-decimal-digit-above-9:
81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp
c3/return
to-decimal-digit: # in: grapheme -> out/eax: int
to-decimal-digit: # in: code-point-utf8 -> out/eax: int
# . prologue
55/push-ebp
89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp

4
400.mu
View File

@ -97,8 +97,8 @@ sig write-slice out: (addr stream byte), s: (addr slice)
# bad name alert
sig slice-to-string ad: (addr allocation-descriptor), in: (addr slice), out: (addr handle array byte)
sig write-int32-decimal out: (addr stream byte), n: int
sig decimal-digit? c: grapheme -> _/eax: boolean
sig to-decimal-digit in: grapheme -> _/eax: int
sig decimal-digit? c: code-point-utf8 -> _/eax: boolean
sig to-decimal-digit in: code-point-utf8 -> _/eax: int
# bad name alert
# next-word really tokenizes
# next-raw-word really reads whitespace-separated words

View File

@ -1,16 +1,11 @@
# Helpers for Unicode.
#
# Mu has no characters, only code points and graphemes.
# Code points are the indivisible atoms of text streams.
# The basic unit for rendering Unicode is the code point.
# https://en.wikipedia.org/wiki/Code_point
# Graphemes are the smallest self-contained unit of text.
# Graphemes may consist of multiple code points.
# The glyph a non-cursive font displays may represent multiple code points.
#
# Mu graphemes are always represented in utf-8, and they are required to fit
# in 4 bytes. (This can be confusing if you focus just on ASCII, where Mu's
# graphemes and code-points are identical.)
#
# Mu doesn't yet support graphemes consisting of multiple code points.
# In addition to raw code points (just integers assigned special meaning), Mu
# provides a common encoding as a convenience: code-point-utf8.
fn test-unicode-serialization-and-deserialization {
var i/ebx: int <- copy 0
@ -20,8 +15,8 @@ fn test-unicode-serialization-and-deserialization {
# but not emoji
break-if->=
var c/eax: code-point <- copy i
var _g/eax: grapheme <- to-grapheme c
var g/ecx: grapheme <- copy _g
var _g/eax: code-point-utf8 <- to-utf8 c
var g/ecx: code-point-utf8 <- copy _g
var c2/eax: code-point <- to-code-point g
compare i, c2
{
@ -51,7 +46,7 @@ fn test-unicode-serialization-and-deserialization {
}
# transliterated from tb_utf8_char_to_unicode in https://github.com/nsf/termbox
fn to-code-point in: grapheme -> _/eax: code-point {
fn to-code-point in: code-point-utf8 -> _/eax: code-point {
var g/ebx: int <- copy in
# if single byte, just return it
{
@ -61,7 +56,7 @@ fn to-code-point in: grapheme -> _/eax: code-point {
return result
}
#
var len/edx: int <- grapheme-length in
var len/edx: int <- utf8-length in
# extract bits from first byte
var b/eax: byte <- copy-byte g
var result/edi: code-point <- copy b
@ -99,16 +94,16 @@ fn to-code-point in: grapheme -> _/eax: code-point {
# transliterated from tb_utf8_unicode_to_char in https://github.com/nsf/termbox
# https://wiki.tcl-lang.org/page/UTF%2D8+bit+by+bit explains the algorithm
fn to-grapheme in: code-point -> _/eax: grapheme {
fn to-utf8 in: code-point -> _/eax: code-point-utf8 {
var c/eax: int <- copy in
var num-trailers/ecx: int <- copy 0
var first/edx: int <- copy 0
$to-grapheme:compute-length: {
$to-utf8:compute-length: {
# single byte: just return it
compare c, 0x7f
{
break-if->
var g/eax: grapheme <- copy c
var g/eax: code-point-utf8 <- copy c
return g
}
# 2 bytes
@ -117,7 +112,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme {
break-if->
num-trailers <- copy 1
first <- copy 0xc0
break $to-grapheme:compute-length
break $to-utf8:compute-length
}
# 3 bytes
compare c, 0xffff
@ -125,7 +120,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme {
break-if->
num-trailers <- copy 2
first <- copy 0xe0
break $to-grapheme:compute-length
break $to-utf8:compute-length
}
# 4 bytes
compare c, 0x1fffff
@ -133,7 +128,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme {
break-if->
num-trailers <- copy 3
first <- copy 0xf0
break $to-grapheme:compute-length
break $to-utf8:compute-length
}
# more than 4 bytes: unsupported
compare c, 0x1fffff
@ -144,7 +139,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme {
}
}
# emit trailer bytes, 6 bits from 'in', first two bits '10'
var result/edi: grapheme <- copy 0
var result/edi: code-point-utf8 <- copy 0
{
compare num-trailers, 0
break-if-<=
@ -166,16 +161,16 @@ fn to-grapheme in: code-point -> _/eax: grapheme {
return result
}
# single-byte code point have identical graphemes
fn test-to-grapheme-single-byte {
# single-byte code point have identical code-point-utf8s
fn test-to-utf8-single-byte {
var in-int/ecx: int <- copy 0
{
compare in-int, 0x7f
break-if->
var in/eax: code-point <- copy in-int
var out/eax: grapheme <- to-grapheme in
var out/eax: code-point-utf8 <- to-utf8 in
var out-int/eax: int <- copy out
check-ints-equal out-int, in-int, "F - test-to-grapheme-single-byte"
check-ints-equal out-int, in-int, "F - test-to-utf8-single-byte"
in-int <- increment
loop
}
@ -183,55 +178,55 @@ fn test-to-grapheme-single-byte {
# byte | byte | byte | byte
# smallest 2-byte utf-8
fn test-to-grapheme-two-bytes-min {
fn test-to-utf8-two-bytes-min {
var in/eax: code-point <- copy 0x80 # 10 00-0000
var out/eax: grapheme <- to-grapheme in
var out/eax: code-point-utf8 <- to-utf8 in
var out-int/eax: int <- copy out
check-ints-equal out-int, 0x80c2, "F - to-grapheme/2a" # 110 0-0010 10 00-0000
check-ints-equal out-int, 0x80c2, "F - to-utf8/2a" # 110 0-0010 10 00-0000
}
# largest 2-byte utf-8
fn test-to-grapheme-two-bytes-max {
fn test-to-utf8-two-bytes-max {
var in/eax: code-point <- copy 0x7ff # 1-1111 11-1111
var out/eax: grapheme <- to-grapheme in
var out/eax: code-point-utf8 <- to-utf8 in
var out-int/eax: int <- copy out
check-ints-equal out-int, 0xbfdf, "F - to-grapheme/2b" # 110 1-1111 10 11-1111
check-ints-equal out-int, 0xbfdf, "F - to-utf8/2b" # 110 1-1111 10 11-1111
}
# smallest 3-byte utf-8
fn test-to-grapheme-three-bytes-min {
fn test-to-utf8-three-bytes-min {
var in/eax: code-point <- copy 0x800 # 10-0000 00-0000
var out/eax: grapheme <- to-grapheme in
var out/eax: code-point-utf8 <- to-utf8 in
var out-int/eax: int <- copy out
check-ints-equal out-int, 0x80a0e0, "F - to-grapheme/3a" # 1110 0000 10 10-0000 10 00-0000
check-ints-equal out-int, 0x80a0e0, "F - to-utf8/3a" # 1110 0000 10 10-0000 10 00-0000
}
# largest 3-byte utf-8
fn test-to-grapheme-three-bytes-max {
fn test-to-utf8-three-bytes-max {
var in/eax: code-point <- copy 0xffff # 1111 11-1111 11-1111
var out/eax: grapheme <- to-grapheme in
var out/eax: code-point-utf8 <- to-utf8 in
var out-int/eax: int <- copy out
check-ints-equal out-int, 0xbfbfef, "F - to-grapheme/3b" # 1110 1111 10 11-1111 10 11-1111
check-ints-equal out-int, 0xbfbfef, "F - to-utf8/3b" # 1110 1111 10 11-1111 10 11-1111
}
# smallest 4-byte utf-8
fn test-to-grapheme-four-bytes-min {
fn test-to-utf8-four-bytes-min {
var in/eax: code-point <- copy 0x10000 # 1-0000 00-0000 00-0000
var out/eax: grapheme <- to-grapheme in
var out/eax: code-point-utf8 <- to-utf8 in
var out-int/eax: int <- copy out
check-ints-equal out-int, 0x808090f0, "F - to-grapheme/4a" # 1111-0 000 10 01-0000 10 00-0000 10 00-0000
check-ints-equal out-int, 0x808090f0, "F - to-utf8/4a" # 1111-0 000 10 01-0000 10 00-0000 10 00-0000
}
# largest 4-byte utf-8
fn test-to-grapheme-four-bytes-max {
fn test-to-utf8-four-bytes-max {
var in/eax: code-point <- copy 0x1fffff # 111 11-1111 11-1111 11-1111
var out/eax: grapheme <- to-grapheme in
var out/eax: code-point-utf8 <- to-utf8 in
var out-int/eax: int <- copy out
check-ints-equal out-int, 0xbfbfbff7, "F - to-grapheme/4b" # 1111-0 111 10 11-1111 10 11-1111 10 11-1111
check-ints-equal out-int, 0xbfbfbff7, "F - to-utf8/4b" # 1111-0 111 10 11-1111 10 11-1111 10 11-1111
}
# read the next grapheme from a stream of bytes
fn read-grapheme in: (addr stream byte) -> _/eax: grapheme {
# read the next code-point-utf8 from a stream of bytes
fn read-code-point-utf8 in: (addr stream byte) -> _/eax: code-point-utf8 {
# if at eof, return EOF
{
var eof?/eax: boolean <- stream-empty? in
@ -241,18 +236,18 @@ fn read-grapheme in: (addr stream byte) -> _/eax: grapheme {
}
var c/eax: byte <- read-byte in
var num-trailers/ecx: int <- copy 0
$read-grapheme:compute-length: {
$read-code-point-utf8:compute-length: {
# single byte: just return it
compare c, 0xc0
{
break-if->=
var g/eax: grapheme <- copy c
var g/eax: code-point-utf8 <- copy c
return g
}
compare c, 0xfe
{
break-if-<
var g/eax: grapheme <- copy c
var g/eax: code-point-utf8 <- copy c
return g
}
# 2 bytes
@ -260,27 +255,27 @@ fn read-grapheme in: (addr stream byte) -> _/eax: grapheme {
{
break-if->=
num-trailers <- copy 1
break $read-grapheme:compute-length
break $read-code-point-utf8:compute-length
}
# 3 bytes
compare c, 0xf0
{
break-if->=
num-trailers <- copy 2
break $read-grapheme:compute-length
break $read-code-point-utf8:compute-length
}
# 4 bytes
compare c, 0xf8
{
break-if->=
num-trailers <- copy 3
break $read-grapheme:compute-length
break $read-code-point-utf8:compute-length
}
abort "utf-8 encodings larger than 4 bytes are not yet supported"
return 0
}
# prepend trailer bytes
var result/edi: grapheme <- copy c
var result/edi: code-point-utf8 <- copy c
var num-byte-shifts/edx: int <- copy 1
{
compare num-trailers, 0
@ -297,34 +292,34 @@ fn read-grapheme in: (addr stream byte) -> _/eax: grapheme {
return result
}
fn test-read-grapheme {
fn test-read-code-point-utf8 {
var s: (stream byte 0x30)
var s2/ecx: (addr stream byte) <- address s
write s2, "aΒcde"
var c/eax: grapheme <- read-grapheme s2
var c/eax: code-point-utf8 <- read-code-point-utf8 s2
var n/eax: int <- copy c
check-ints-equal n, 0x61, "F - test grapheme/0"
var c/eax: grapheme <- read-grapheme s2
check-ints-equal n, 0x61, "F - test code-point-utf8/0"
var c/eax: code-point-utf8 <- read-code-point-utf8 s2
var n/eax: int <- copy c
check-ints-equal n, 0x92ce/greek-capital-letter-beta, "F - test grapheme/1"
var c/eax: grapheme <- read-grapheme s2
check-ints-equal n, 0x92ce/greek-capital-letter-beta, "F - test code-point-utf8/1"
var c/eax: code-point-utf8 <- read-code-point-utf8 s2
var n/eax: int <- copy c
check-ints-equal n, 0x63, "F - test grapheme/2"
var c/eax: grapheme <- read-grapheme s2
check-ints-equal n, 0x63, "F - test code-point-utf8/2"
var c/eax: code-point-utf8 <- read-code-point-utf8 s2
var n/eax: int <- copy c
check-ints-equal n, 0x96b8e4, "F - test grapheme/3"
var c/eax: grapheme <- read-grapheme s2
check-ints-equal n, 0x96b8e4, "F - test code-point-utf8/3"
var c/eax: code-point-utf8 <- read-code-point-utf8 s2
var n/eax: int <- copy c
check-ints-equal n, 0x64, "F - test grapheme/4"
var c/eax: grapheme <- read-grapheme s2
check-ints-equal n, 0x64, "F - test code-point-utf8/4"
var c/eax: code-point-utf8 <- read-code-point-utf8 s2
var n/eax: int <- copy c
check-ints-equal n, 0x8c95e7, "F - test grapheme/5"
var c/eax: grapheme <- read-grapheme s2
check-ints-equal n, 0x8c95e7, "F - test code-point-utf8/5"
var c/eax: code-point-utf8 <- read-code-point-utf8 s2
var n/eax: int <- copy c
check-ints-equal n, 0x65, "F - test grapheme/6"
check-ints-equal n, 0x65, "F - test code-point-utf8/6"
}
fn grapheme-length g: grapheme -> _/edx: int {
fn utf8-length g: code-point-utf8 -> _/edx: int {
{
compare g, 0xff
break-if->
@ -389,23 +384,23 @@ fn test-shift-left-bytes-5 {
check-ints-equal result, 0, "F - shift-left-bytes >4"
}
# write a grapheme to a stream of bytes
# write a code-point-utf8 to a stream of bytes
# this is like write-to-stream, except we skip leading 0 bytes
fn write-grapheme out: (addr stream byte), g: grapheme {
$write-grapheme:body: {
fn write-code-point-utf8 out: (addr stream byte), g: code-point-utf8 {
$write-code-point-utf8:body: {
var c/eax: int <- copy g
append-byte out, c # first byte is always written
c <- shift-right 8
compare c, 0
break-if-= $write-grapheme:body
break-if-= $write-code-point-utf8:body
append-byte out, c
c <- shift-right 8
compare c, 0
break-if-= $write-grapheme:body
break-if-= $write-code-point-utf8:body
append-byte out, c
c <- shift-right 8
compare c, 0
break-if-= $write-grapheme:body
break-if-= $write-code-point-utf8:body
append-byte out, c
}
}

View File

@ -1,4 +1,4 @@
# read up to 'len' graphemes after skipping the first 'start' ones
# read up to 'len' code-point-utf8s after skipping the first 'start' ones
fn substring in: (addr array byte), start: int, len: int, out-ah: (addr handle array byte) {
var in-stream: (stream byte 0x100)
var in-stream-addr/esi: (addr stream byte) <- address in-stream
@ -6,29 +6,29 @@ fn substring in: (addr array byte), start: int, len: int, out-ah: (addr handle a
var out-stream: (stream byte 0x100)
var out-stream-addr/edi: (addr stream byte) <- address out-stream
$substring:core: {
# skip 'start' graphemes
# skip 'start' code-point-utf8s
var i/eax: int <- copy 0
{
compare i, start
break-if->=
{
var dummy/eax: grapheme <- read-grapheme in-stream-addr
var dummy/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr
compare dummy, 0xffffffff/end-of-file
break-if-= $substring:core
}
i <- increment
loop
}
# copy 'len' graphemes
# copy 'len' code-point-utf8s
i <- copy 0
{
compare i, len
break-if->=
{
var g/eax: grapheme <- read-grapheme in-stream-addr
var g/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr
compare g, 0xffffffff/end-of-file
break-if-= $substring:core
write-grapheme out-stream-addr, g
write-code-point-utf8 out-stream-addr, g
}
i <- increment
loop
@ -85,7 +85,7 @@ fn test-substring {
check-strings-equal out, "bcde", "F - test-substring/middle-too-small"
}
fn split-string in: (addr array byte), delim: grapheme, out: (addr handle array (handle array byte)) {
fn split-string in: (addr array byte), delim: code-point-utf8, out: (addr handle array (handle array byte)) {
var in-stream: (stream byte 0x100)
var in-stream-addr/esi: (addr stream byte) <- address in-stream
write in-stream-addr, in
@ -94,10 +94,10 @@ fn split-string in: (addr array byte), delim: grapheme, out: (addr handle array
var curr-stream: (stream byte 0x100)
var curr-stream-addr/ecx: (addr stream byte) <- address curr-stream
$split-string:core: {
var g/eax: grapheme <- read-grapheme in-stream-addr
var g/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr
compare g, 0xffffffff
break-if-=
#? print-grapheme-to-real-screen g
#? print-code-point-utf8-to-real-screen g
#? print-string-to-real-screen "\n"
compare g, delim
{
@ -110,7 +110,7 @@ fn split-string in: (addr array byte), delim: grapheme, out: (addr handle array
clear-stream curr-stream-addr
loop $split-string:core
}
write-grapheme curr-stream-addr, g
write-code-point-utf8 curr-stream-addr, g
loop
}
stream-to-array tokens-stream-addr, out

View File

@ -71,7 +71,7 @@ fn initialize-screen _screen: (addr screen), width: int, height: int, pixel-grap
copy-to *dest, 0
}
# in graphemes
# in code-point-utf8s
fn screen-size _screen: (addr screen) -> _/eax: int, _/ecx: int {
var screen/esi: (addr screen) <- copy _screen
var width/eax: int <- copy 0
@ -459,7 +459,7 @@ fn clear-rect _screen: (addr screen), xmin: int, ymin: int, xmax: int, ymax: int
set-cursor-position screen, 0, 0
}
# there's no grapheme that guarantees to cover every pixel, so we'll bump down
# there's no code-point-utf8 that guarantees to cover every pixel, so we'll bump down
# to pixels for a real screen
fn clear-real-screen {
var y/eax: int <- copy 0

View File

@ -117,7 +117,7 @@ fn draw-text-rightward screen: (addr screen), text: (addr array byte), x: int, x
fn draw-stream-rightward screen: (addr screen), stream: (addr stream byte), x: int, xmax: int, y: int, color: int, background-color: int -> _/eax: int {
var xcurr/ecx: int <- copy x
{
var g/eax: grapheme <- read-grapheme stream
var g/eax: code-point-utf8 <- read-code-point-utf8 stream
compare g, 0xffffffff/end-of-file
break-if-=
var c/eax: code-point <- to-code-point g
@ -218,7 +218,7 @@ fn draw-stream-wrapping-right-then-down screen: (addr screen), stream: (addr str
next-c <- copy 0
break $draw-stream-wrapping-right-then-down:read-base
}
var g/eax: grapheme <- read-grapheme stream
var g/eax: code-point-utf8 <- read-code-point-utf8 stream
var _c/eax: code-point <- to-code-point g
c <- copy _c
}
@ -240,7 +240,7 @@ fn draw-stream-wrapping-right-then-down screen: (addr screen), stream: (addr str
compare done?, 0/false
break-if-!=
# read a character
var g/eax: grapheme <- read-grapheme stream
var g/eax: code-point-utf8 <- read-code-point-utf8 stream
var c/eax: code-point <- to-code-point g
# if not a combining character, save for next iteration and loop
{
@ -343,7 +343,7 @@ fn draw-int32-hex-wrapping-right-then-down screen: (addr screen), n: int, xmin:
var xcurr/edx: int <- copy x
var ycurr/ecx: int <- copy y
{
var g/eax: grapheme <- read-grapheme stream
var g/eax: code-point-utf8 <- read-code-point-utf8 stream
compare g, 0xffffffff/end-of-file
break-if-=
var c/eax: code-point <- to-code-point g
@ -398,7 +398,7 @@ fn draw-int32-decimal-wrapping-right-then-down screen: (addr screen), n: int, xm
var xcurr/edx: int <- copy x
var ycurr/ecx: int <- copy y
{
var g/eax: grapheme <- read-grapheme stream
var g/eax: code-point-utf8 <- read-code-point-utf8 stream
compare g, 0xffffffff/end-of-file
break-if-=
var c/eax: code-point <- to-code-point g
@ -466,7 +466,7 @@ fn draw-text-downward screen: (addr screen), text: (addr array byte), x: int, y:
fn draw-stream-downward screen: (addr screen), stream: (addr stream byte), x: int, y: int, ymax: int, color: int, background-color: int -> _/eax: int {
var ycurr/ecx: int <- copy y
{
var g/eax: grapheme <- read-grapheme stream
var g/eax: code-point-utf8 <- read-code-point-utf8 stream
compare g, 0xffffffff/end-of-file
break-if-=
var c/eax: code-point <- to-code-point g
@ -508,7 +508,7 @@ fn draw-stream-wrapping-down-then-right screen: (addr screen), stream: (addr str
var xcurr/edx: int <- copy x
var ycurr/ecx: int <- copy y
{
var g/eax: grapheme <- read-grapheme stream
var g/eax: code-point-utf8 <- read-code-point-utf8 stream
compare g, 0xffffffff/end-of-file
break-if-=
var c/eax: code-point <- to-code-point g

View File

@ -12,7 +12,7 @@ fn check-screen-row-from _screen: (addr screen), x: int, y: int, expected: (addr
var screen/esi: (addr screen) <- copy _screen
var failure-count/edi: int <- copy 0
var index/ecx: int <- screen-cell-index screen, x, y
# compare 'expected' with the screen contents starting at 'index', grapheme by grapheme
# compare 'expected' with the screen contents starting at 'index', code-point-utf8 by code-point-utf8
var e: (stream byte 0x100)
var e-addr/edx: (addr stream byte) <- address e
write e-addr, expected
@ -26,16 +26,16 @@ fn check-screen-row-from _screen: (addr screen), x: int, y: int, expected: (addr
break-if-!=
var _c/eax: code-point <- screen-code-point-at-index screen, index
var c/ebx: code-point <- copy _c
var expected-grapheme/eax: grapheme <- read-grapheme e-addr
var expected-code-point/eax: code-point <- to-code-point expected-grapheme
# compare graphemes
$check-screen-row-from:compare-graphemes: {
# if expected-code-point is space, null grapheme is also ok
var expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr
var expected-code-point/eax: code-point <- to-code-point expected-code-point-utf8
# compare code-point-utf8s
$check-screen-row-from:compare-code-point-utf8s: {
# if expected-code-point is space, null code-point-utf8 is also ok
{
compare expected-code-point, 0x20
break-if-!=
compare c, 0
break-if-= $check-screen-row-from:compare-graphemes
break-if-= $check-screen-row-from:compare-code-point-utf8s
}
# if (c == expected-code-point) print "."
compare c, expected-code-point
@ -79,7 +79,7 @@ fn check-screen-row-in-color screen: (addr screen), fg: int, y: int, expected: (
fn check-screen-row-in-color-from _screen: (addr screen), fg: int, y: int, x: int, expected: (addr array byte), msg: (addr array byte) {
var screen/esi: (addr screen) <- copy _screen
var index/ecx: int <- screen-cell-index screen, x, y
# compare 'expected' with the screen contents starting at 'index', grapheme by grapheme
# compare 'expected' with the screen contents starting at 'index', code-point-utf8 by code-point-utf8
var e: (stream byte 0x100)
var e-addr/edx: (addr stream byte) <- address e
write e-addr, expected
@ -93,11 +93,11 @@ fn check-screen-row-in-color-from _screen: (addr screen), fg: int, y: int, x: in
break-if-!=
var _c/eax: code-point <- screen-code-point-at-index screen, index
var c/ebx: code-point <- copy _c
var expected-grapheme/eax: grapheme <- read-grapheme e-addr
var _expected-code-point/eax: code-point <- to-code-point expected-grapheme
var expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr
var _expected-code-point/eax: code-point <- to-code-point expected-code-point-utf8
var expected-code-point/edi: code-point <- copy _expected-code-point
$check-screen-row-in-color-from:compare-cells: {
# if expected-code-point is space, null grapheme is also ok
# if expected-code-point is space, null code-point-utf8 is also ok
{
compare expected-code-point, 0x20
break-if-!=
@ -112,14 +112,14 @@ fn check-screen-row-in-color-from _screen: (addr screen), fg: int, y: int, x: in
compare color, fg
break-if-!= $check-screen-row-in-color-from:compare-cells
}
# compare graphemes
$check-screen-row-in-color-from:compare-graphemes: {
# compare code-point-utf8s
$check-screen-row-in-color-from:compare-code-point-utf8s: {
# if (c == expected-code-point) print "."
compare c, expected-code-point
{
break-if-!=
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ".", 3/fg=cyan, 0/bg
break $check-screen-row-in-color-from:compare-graphemes
break $check-screen-row-in-color-from:compare-code-point-utf8s
}
# otherwise print an error
count-test-failure
@ -173,7 +173,7 @@ fn check-screen-row-in-background-color screen: (addr screen), bg: int, y: int,
fn check-screen-row-in-background-color-from _screen: (addr screen), bg: int, y: int, x: int, expected: (addr array byte), msg: (addr array byte) {
var screen/esi: (addr screen) <- copy _screen
var index/ecx: int <- screen-cell-index screen, x, y
# compare 'expected' with the screen contents starting at 'index', grapheme by grapheme
# compare 'expected' with the screen contents starting at 'index', code-point-utf8 by code-point-utf8
var e: (stream byte 0x100)
var e-addr/edx: (addr stream byte) <- address e
write e-addr, expected
@ -187,11 +187,11 @@ fn check-screen-row-in-background-color-from _screen: (addr screen), bg: int, y:
break-if-!=
var _g/eax: code-point <- screen-code-point-at-index screen, index
var g/ebx: code-point <- copy _g
var expected-grapheme/eax: grapheme <- read-grapheme e-addr
var _expected-code-point/eax: code-point <- to-code-point expected-grapheme
var expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr
var _expected-code-point/eax: code-point <- to-code-point expected-code-point-utf8
var expected-code-point/edi: code-point <- copy _expected-code-point
$check-screen-row-in-background-color-from:compare-cells: {
# if expected-code-point is space, null grapheme is also ok
# if expected-code-point is space, null code-point-utf8 is also ok
{
compare expected-code-point, 0x20
break-if-!=
@ -206,14 +206,14 @@ fn check-screen-row-in-background-color-from _screen: (addr screen), bg: int, y:
compare background-color, bg
break-if-!= $check-screen-row-in-background-color-from:compare-cells
}
# compare graphemes
$check-screen-row-in-background-color-from:compare-graphemes: {
# compare code-point-utf8s
$check-screen-row-in-background-color-from:compare-code-point-utf8s: {
# if (g == expected-code-point) print "."
compare g, expected-code-point
{
break-if-!=
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ".", 3/fg=cyan, 0/bg
break $check-screen-row-in-background-color-from:compare-graphemes
break $check-screen-row-in-background-color-from:compare-code-point-utf8s
}
# otherwise print an error
count-test-failure
@ -228,7 +228,7 @@ fn check-screen-row-in-background-color-from _screen: (addr screen), bg: int, y:
draw-code-point-at-cursor-over-full-screen 0/screen, g, 3/cyan, 0/bg
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "'", 3/fg=cyan, 0/bg
move-cursor-to-left-margin-of-next-line 0/screen
break $check-screen-row-in-background-color-from:compare-graphemes
break $check-screen-row-in-background-color-from:compare-code-point-utf8s
}
$check-screen-row-in-background-color-from:compare-background-colors: {
var background-color/eax: int <- screen-background-color-at-index screen, index
@ -284,8 +284,8 @@ fn check-background-color-in-screen-row-from _screen: (addr screen), bg: int, y:
var unused?/eax: boolean <- screen-cell-unused-at-index? screen, index
compare unused?, 0/false
break-if-!=
var _expected-bit/eax: grapheme <- read-grapheme e-addr
var expected-bit/edi: grapheme <- copy _expected-bit
var _expected-bit/eax: code-point-utf8 <- read-code-point-utf8 e-addr
var expected-bit/edi: code-point-utf8 <- copy _expected-bit
$check-background-color-in-screen-row-from:compare-cells: {
var background-color/eax: int <- screen-background-color-at-index screen, index
# if expected-bit is space, assert that background is NOT bg
@ -336,23 +336,23 @@ fn check-background-color-in-screen-row-from _screen: (addr screen), bg: int, y:
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ".", 3/fg=cyan, 0/bg
}
fn test-draw-single-grapheme {
fn test-draw-single-code-point-utf8 {
var _screen: screen
var screen/esi: (addr screen) <- address _screen
initialize-screen screen, 5, 4, 0/no-pixel-graphics
var dummy/eax: int <- draw-code-point screen, 0x61/a, 0/x, 0/y, 1/fg, 2/bg
check-screen-row screen, 0/y, "a", "F - test-draw-single-grapheme" # top-left corner of the screen
check-screen-row-in-color screen, 1/fg, 0/y, "a", "F - test-draw-single-grapheme-fg"
check-screen-row-in-background-color screen, 2/bg, 0/y, "a", "F - test-draw-single-grapheme-bg"
check-background-color-in-screen-row screen, 2/bg, 0/y, "x ", "F - test-draw-single-grapheme-bg2"
check-screen-row screen, 0/y, "a", "F - test-draw-single-code-point-utf8" # top-left corner of the screen
check-screen-row-in-color screen, 1/fg, 0/y, "a", "F - test-draw-single-code-point-utf8-fg"
check-screen-row-in-background-color screen, 2/bg, 0/y, "a", "F - test-draw-single-code-point-utf8-bg"
check-background-color-in-screen-row screen, 2/bg, 0/y, "x ", "F - test-draw-single-code-point-utf8-bg2"
}
fn test-draw-multiple-graphemes {
fn test-draw-multiple-code-point-utf8s {
var _screen: screen
var screen/esi: (addr screen) <- address _screen
initialize-screen screen, 0x10/rows, 4/cols, 0/no-pixel-graphics
draw-text-wrapping-right-then-down-from-cursor-over-full-screen screen, "Hello, 世界", 1/fg, 2/bg
check-screen-row screen, 0/y, "Hello, 世界", "F - test-draw-multiple-graphemes"
check-screen-row-in-color screen, 1/fg, 0/y, "Hello, 世界", "F - test-draw-multiple-graphemes-fg"
check-background-color-in-screen-row screen, 2/bg, 0/y, "xxxxxxxxx ", "F - test-draw-multiple-graphemes-bg2"
check-screen-row screen, 0/y, "Hello, 世界", "F - test-draw-multiple-code-point-utf8s"
check-screen-row-in-color screen, 1/fg, 0/y, "Hello, 世界", "F - test-draw-multiple-code-point-utf8s-fg"
check-background-color-in-screen-row screen, 2/bg, 0/y, "xxxxxxxxx ", "F - test-draw-multiple-code-point-utf8s-bg2"
}

View File

@ -1,26 +1,26 @@
# grapheme stacks are the smallest unit of editable text
# code-point-utf8 stacks are the smallest unit of editable text
type grapheme-stack {
data: (handle array grapheme)
type code-point-utf8-stack {
data: (handle array code-point-utf8)
top: int
}
fn initialize-grapheme-stack _self: (addr grapheme-stack), n: int {
var self/esi: (addr grapheme-stack) <- copy _self
var d/edi: (addr handle array grapheme) <- get self, data
fn initialize-code-point-utf8-stack _self: (addr code-point-utf8-stack), n: int {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var d/edi: (addr handle array code-point-utf8) <- get self, data
populate d, n
var top/eax: (addr int) <- get self, top
copy-to *top, 0
}
fn clear-grapheme-stack _self: (addr grapheme-stack) {
var self/esi: (addr grapheme-stack) <- copy _self
fn clear-code-point-utf8-stack _self: (addr code-point-utf8-stack) {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var top/eax: (addr int) <- get self, top
copy-to *top, 0
}
fn grapheme-stack-empty? _self: (addr grapheme-stack) -> _/eax: boolean {
var self/esi: (addr grapheme-stack) <- copy _self
fn code-point-utf8-stack-empty? _self: (addr code-point-utf8-stack) -> _/eax: boolean {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var top/eax: (addr int) <- get self, top
compare *top, 0
{
@ -30,26 +30,26 @@ fn grapheme-stack-empty? _self: (addr grapheme-stack) -> _/eax: boolean {
return 0/false
}
fn grapheme-stack-length _self: (addr grapheme-stack) -> _/eax: int {
var self/esi: (addr grapheme-stack) <- copy _self
fn code-point-utf8-stack-length _self: (addr code-point-utf8-stack) -> _/eax: int {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var top/eax: (addr int) <- get self, top
return *top
}
fn push-grapheme-stack _self: (addr grapheme-stack), _val: grapheme {
var self/esi: (addr grapheme-stack) <- copy _self
fn push-code-point-utf8-stack _self: (addr code-point-utf8-stack), _val: code-point-utf8 {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var top-addr/ecx: (addr int) <- get self, top
var data-ah/edx: (addr handle array grapheme) <- get self, data
var data/eax: (addr array grapheme) <- lookup *data-ah
var data-ah/edx: (addr handle array code-point-utf8) <- get self, data
var data/eax: (addr array code-point-utf8) <- lookup *data-ah
var top/edx: int <- copy *top-addr
var dest-addr/edx: (addr grapheme) <- index data, top
var val/eax: grapheme <- copy _val
var dest-addr/edx: (addr code-point-utf8) <- index data, top
var val/eax: code-point-utf8 <- copy _val
copy-to *dest-addr, val
add-to *top-addr, 1
}
fn pop-grapheme-stack _self: (addr grapheme-stack) -> _/eax: grapheme {
var self/esi: (addr grapheme-stack) <- copy _self
fn pop-code-point-utf8-stack _self: (addr code-point-utf8-stack) -> _/eax: code-point-utf8 {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var top-addr/ecx: (addr int) <- get self, top
{
compare *top-addr, 0
@ -57,25 +57,25 @@ fn pop-grapheme-stack _self: (addr grapheme-stack) -> _/eax: grapheme {
return -1
}
subtract-from *top-addr, 1
var data-ah/edx: (addr handle array grapheme) <- get self, data
var data/eax: (addr array grapheme) <- lookup *data-ah
var data-ah/edx: (addr handle array code-point-utf8) <- get self, data
var data/eax: (addr array code-point-utf8) <- lookup *data-ah
var top/edx: int <- copy *top-addr
var result-addr/eax: (addr grapheme) <- index data, top
var result-addr/eax: (addr code-point-utf8) <- index data, top
return *result-addr
}
fn copy-grapheme-stack _src: (addr grapheme-stack), dest: (addr grapheme-stack) {
var src/esi: (addr grapheme-stack) <- copy _src
var data-ah/edi: (addr handle array grapheme) <- get src, data
var _data/eax: (addr array grapheme) <- lookup *data-ah
var data/edi: (addr array grapheme) <- copy _data
fn copy-code-point-utf8-stack _src: (addr code-point-utf8-stack), dest: (addr code-point-utf8-stack) {
var src/esi: (addr code-point-utf8-stack) <- copy _src
var data-ah/edi: (addr handle array code-point-utf8) <- get src, data
var _data/eax: (addr array code-point-utf8) <- lookup *data-ah
var data/edi: (addr array code-point-utf8) <- copy _data
var top-addr/ecx: (addr int) <- get src, top
var i/eax: int <- copy 0
{
compare i, *top-addr
break-if->=
var g/edx: (addr grapheme) <- index data, i
push-grapheme-stack dest, *g
var g/edx: (addr code-point-utf8) <- index data, i
push-code-point-utf8-stack dest, *g
i <- increment
loop
}
@ -84,12 +84,12 @@ fn copy-grapheme-stack _src: (addr grapheme-stack), dest: (addr grapheme-stack)
# dump stack to screen from bottom to top
# hardcoded colors:
# matching paren
fn render-stack-from-bottom-wrapping-right-then-down screen: (addr screen), _self: (addr grapheme-stack), xmin: int, ymin: int, xmax: int, ymax: int, _x: int, _y: int, highlight-matching-open-paren?: boolean, open-paren-depth: int, color: int, background-color: int -> _/eax: int, _/ecx: int {
var self/esi: (addr grapheme-stack) <- copy _self
fn render-stack-from-bottom-wrapping-right-then-down screen: (addr screen), _self: (addr code-point-utf8-stack), xmin: int, ymin: int, xmax: int, ymax: int, _x: int, _y: int, highlight-matching-open-paren?: boolean, open-paren-depth: int, color: int, background-color: int -> _/eax: int, _/ecx: int {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var matching-open-paren-index/edx: int <- get-matching-open-paren-index self, highlight-matching-open-paren?, open-paren-depth
var data-ah/edi: (addr handle array grapheme) <- get self, data
var _data/eax: (addr array grapheme) <- lookup *data-ah
var data/edi: (addr array grapheme) <- copy _data
var data-ah/edi: (addr handle array code-point-utf8) <- get self, data
var _data/eax: (addr array code-point-utf8) <- lookup *data-ah
var data/edi: (addr array code-point-utf8) <- copy _data
var x/eax: int <- copy _x
var y/ecx: int <- copy _y
var top-addr/esi: (addr int) <- get self, top
@ -100,7 +100,7 @@ fn render-stack-from-bottom-wrapping-right-then-down screen: (addr screen), _sel
{
var c: code-point
{
var g/eax: (addr grapheme) <- index data, i
var g/eax: (addr code-point-utf8) <- index data, i
var tmp/eax: code-point <- to-code-point *g
copy-to c, tmp
}
@ -123,7 +123,7 @@ fn render-stack-from-bottom-wrapping-right-then-down screen: (addr screen), _sel
}
# helper for small words
fn render-stack-from-bottom screen: (addr screen), self: (addr grapheme-stack), x: int, y: int, highlight-matching-open-paren?: boolean, open-paren-depth: int -> _/eax: int {
fn render-stack-from-bottom screen: (addr screen), self: (addr code-point-utf8-stack), x: int, y: int, highlight-matching-open-paren?: boolean, open-paren-depth: int -> _/eax: int {
var _width/eax: int <- copy 0
var _height/ecx: int <- copy 0
_width, _height <- screen-size screen
@ -136,16 +136,16 @@ fn render-stack-from-bottom screen: (addr screen), self: (addr grapheme-stack),
}
# dump stack to screen from top to bottom
# optionally render a 'cursor' with the top grapheme
# optionally render a 'cursor' with the top code-point-utf8
# hard-coded colors:
# matching paren
# cursor
fn render-stack-from-top-wrapping-right-then-down screen: (addr screen), _self: (addr grapheme-stack), xmin: int, ymin: int, xmax: int, ymax: int, _x: int, _y: int, render-cursor?: boolean, color: int, background-color: int -> _/eax: int, _/ecx: int {
var self/esi: (addr grapheme-stack) <- copy _self
fn render-stack-from-top-wrapping-right-then-down screen: (addr screen), _self: (addr code-point-utf8-stack), xmin: int, ymin: int, xmax: int, ymax: int, _x: int, _y: int, render-cursor?: boolean, color: int, background-color: int -> _/eax: int, _/ecx: int {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var matching-close-paren-index/edx: int <- get-matching-close-paren-index self, render-cursor?
var data-ah/eax: (addr handle array grapheme) <- get self, data
var _data/eax: (addr array grapheme) <- lookup *data-ah
var data/edi: (addr array grapheme) <- copy _data
var data-ah/eax: (addr handle array code-point-utf8) <- get self, data
var _data/eax: (addr array code-point-utf8) <- lookup *data-ah
var data/edi: (addr array code-point-utf8) <- copy _data
var x/eax: int <- copy _x
var y/ecx: int <- copy _y
var top-addr/ebx: (addr int) <- get self, top
@ -159,7 +159,7 @@ fn render-stack-from-top-wrapping-right-then-down screen: (addr screen), _self:
break-if-<
var c: code-point
{
var g/eax: (addr grapheme) <- index data, i
var g/eax: (addr code-point-utf8) <- index data, i
var tmp/eax: code-point <- to-code-point *g
copy-to c, tmp
}
@ -184,7 +184,7 @@ fn render-stack-from-top-wrapping-right-then-down screen: (addr screen), _self:
#
var c: code-point
{
var g/eax: (addr grapheme) <- index data, i
var g/eax: (addr code-point-utf8) <- index data, i
var tmp/eax: code-point <- to-code-point *g
copy-to c, tmp
}
@ -196,7 +196,7 @@ fn render-stack-from-top-wrapping-right-then-down screen: (addr screen), _self:
}
# helper for small words
fn render-stack-from-top screen: (addr screen), self: (addr grapheme-stack), x: int, y: int, render-cursor?: boolean -> _/eax: int {
fn render-stack-from-top screen: (addr screen), self: (addr code-point-utf8-stack), x: int, y: int, render-cursor?: boolean -> _/eax: int {
var _width/eax: int <- copy 0
var _height/ecx: int <- copy 0
_width, _height <- screen-size screen
@ -208,190 +208,190 @@ fn render-stack-from-top screen: (addr screen), self: (addr grapheme-stack), x:
return x2 # y2? yolo
}
fn test-render-grapheme-stack {
fn test-render-code-point-utf8-stack {
# setup: gs = "abc"
var gs-storage: grapheme-stack
var gs/edi: (addr grapheme-stack) <- address gs-storage
initialize-grapheme-stack gs, 5
var g/eax: grapheme <- copy 0x61/a
push-grapheme-stack gs, g
var gs-storage: code-point-utf8-stack
var gs/edi: (addr code-point-utf8-stack) <- address gs-storage
initialize-code-point-utf8-stack gs, 5
var g/eax: code-point-utf8 <- copy 0x61/a
push-code-point-utf8-stack gs, g
g <- copy 0x62/b
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
g <- copy 0x63/c
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
# setup: screen
var screen-storage: screen
var screen/esi: (addr screen) <- address screen-storage
initialize-screen screen, 5, 4, 0/no-pixel-graphics
#
var x/eax: int <- render-stack-from-bottom screen, gs, 0/x, 0/y, 0/no-highlight-matching-open-paren, 0/open-paren-depth
check-screen-row screen, 0/y, "abc ", "F - test-render-grapheme-stack from bottom"
check-ints-equal x, 3, "F - test-render-grapheme-stack from bottom: result"
check-background-color-in-screen-row screen, 3/bg=reverse, 0/y, " ", "F - test-render-grapheme-stack from bottom: bg"
check-screen-row screen, 0/y, "abc ", "F - test-render-code-point-utf8-stack from bottom"
check-ints-equal x, 3, "F - test-render-code-point-utf8-stack from bottom: result"
check-background-color-in-screen-row screen, 3/bg=reverse, 0/y, " ", "F - test-render-code-point-utf8-stack from bottom: bg"
#
var x/eax: int <- render-stack-from-top screen, gs, 0/x, 1/y, 0/cursor=false
check-screen-row screen, 1/y, "cba ", "F - test-render-grapheme-stack from top without cursor"
check-ints-equal x, 3, "F - test-render-grapheme-stack from top without cursor: result"
check-background-color-in-screen-row screen, 3/bg=reverse, 1/y, " ", "F - test-render-grapheme-stack from top without cursor: bg"
check-screen-row screen, 1/y, "cba ", "F - test-render-code-point-utf8-stack from top without cursor"
check-ints-equal x, 3, "F - test-render-code-point-utf8-stack from top without cursor: result"
check-background-color-in-screen-row screen, 3/bg=reverse, 1/y, " ", "F - test-render-code-point-utf8-stack from top without cursor: bg"
#
var x/eax: int <- render-stack-from-top screen, gs, 0/x, 2/y, 1/cursor=true
check-screen-row screen, 2/y, "cba ", "F - test-render-grapheme-stack from top with cursor"
check-ints-equal x, 3, "F - test-render-grapheme-stack from top with cursor: result"
check-background-color-in-screen-row screen, 3/bg=reverse, 2/y, "| ", "F - test-render-grapheme-stack from top with cursor: bg"
check-screen-row screen, 2/y, "cba ", "F - test-render-code-point-utf8-stack from top with cursor"
check-ints-equal x, 3, "F - test-render-code-point-utf8-stack from top with cursor: result"
check-background-color-in-screen-row screen, 3/bg=reverse, 2/y, "| ", "F - test-render-code-point-utf8-stack from top with cursor: bg"
}
fn test-render-grapheme-stack-while-highlighting-matching-close-paren {
fn test-render-code-point-utf8-stack-while-highlighting-matching-close-paren {
# setup: gs = "(b)"
var gs-storage: grapheme-stack
var gs/edi: (addr grapheme-stack) <- address gs-storage
initialize-grapheme-stack gs, 5
var g/eax: grapheme <- copy 0x29/close-paren
push-grapheme-stack gs, g
var gs-storage: code-point-utf8-stack
var gs/edi: (addr code-point-utf8-stack) <- address gs-storage
initialize-code-point-utf8-stack gs, 5
var g/eax: code-point-utf8 <- copy 0x29/close-paren
push-code-point-utf8-stack gs, g
g <- copy 0x62/b
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
g <- copy 0x28/open-paren
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
# setup: screen
var screen-storage: screen
var screen/esi: (addr screen) <- address screen-storage
initialize-screen screen, 5, 4, 0/no-pixel-graphics
#
var x/eax: int <- render-stack-from-top screen, gs, 0/x, 2/y, 1/cursor=true
check-screen-row screen, 2/y, "(b) ", "F - test-render-grapheme-stack-while-highlighting-matching-close-paren"
check-background-color-in-screen-row screen, 3/bg=reverse, 2/y, "| ", "F - test-render-grapheme-stack-while-highlighting-matching-close-paren: cursor"
check-screen-row-in-color screen, 0xf/fg=white, 2/y, " ) ", "F - test-render-grapheme-stack-while-highlighting-matching-close-paren: matching paren"
check-screen-row screen, 2/y, "(b) ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-close-paren"
check-background-color-in-screen-row screen, 3/bg=reverse, 2/y, "| ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-close-paren: cursor"
check-screen-row-in-color screen, 0xf/fg=white, 2/y, " ) ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-close-paren: matching paren"
}
fn test-render-grapheme-stack-while-highlighting-matching-close-paren-2 {
fn test-render-code-point-utf8-stack-while-highlighting-matching-close-paren-2 {
# setup: gs = "(a (b)) c"
var gs-storage: grapheme-stack
var gs/edi: (addr grapheme-stack) <- address gs-storage
initialize-grapheme-stack gs, 0x10
var g/eax: grapheme <- copy 0x63/c
push-grapheme-stack gs, g
var gs-storage: code-point-utf8-stack
var gs/edi: (addr code-point-utf8-stack) <- address gs-storage
initialize-code-point-utf8-stack gs, 0x10
var g/eax: code-point-utf8 <- copy 0x63/c
push-code-point-utf8-stack gs, g
g <- copy 0x20/space
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
g <- copy 0x29/close-paren
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
g <- copy 0x29/close-paren
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
g <- copy 0x62/b
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
g <- copy 0x28/open-paren
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
g <- copy 0x20/space
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
g <- copy 0x61/a
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
g <- copy 0x28/open-paren
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
# setup: screen
var screen-storage: screen
var screen/esi: (addr screen) <- address screen-storage
initialize-screen screen, 5, 4, 0/no-pixel-graphics
#
var x/eax: int <- render-stack-from-top screen, gs, 0/x, 2/y, 1/cursor=true
check-screen-row screen, 2/y, "(a (b)) c ", "F - test-render-grapheme-stack-while-highlighting-matching-close-paren-2"
check-background-color-in-screen-row screen, 3/bg=reverse, 2/y, "| ", "F - test-render-grapheme-stack-while-highlighting-matching-close-paren-2: cursor"
check-screen-row-in-color screen, 0xf/fg=white, 2/y, " ) ", "F - test-render-grapheme-stack-while-highlighting-matching-close-paren-2: matching paren"
check-screen-row screen, 2/y, "(a (b)) c ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-close-paren-2"
check-background-color-in-screen-row screen, 3/bg=reverse, 2/y, "| ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-close-paren-2: cursor"
check-screen-row-in-color screen, 0xf/fg=white, 2/y, " ) ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-close-paren-2: matching paren"
}
fn test-render-grapheme-stack-while-highlighting-matching-open-paren-with-close-paren-at-end {
fn test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-with-close-paren-at-end {
# setup: gs = "(b)"
var gs-storage: grapheme-stack
var gs/edi: (addr grapheme-stack) <- address gs-storage
initialize-grapheme-stack gs, 5
var g/eax: grapheme <- copy 0x28/open-paren
push-grapheme-stack gs, g
var gs-storage: code-point-utf8-stack
var gs/edi: (addr code-point-utf8-stack) <- address gs-storage
initialize-code-point-utf8-stack gs, 5
var g/eax: code-point-utf8 <- copy 0x28/open-paren
push-code-point-utf8-stack gs, g
g <- copy 0x62/b
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
g <- copy 0x29/close-paren
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
# setup: screen
var screen-storage: screen
var screen/esi: (addr screen) <- address screen-storage
initialize-screen screen, 5, 4, 0/no-pixel-graphics
#
var x/eax: int <- render-stack-from-bottom screen, gs, 0/x, 2/y, 1/highlight-matching-open-paren, 1/open-paren-depth
check-screen-row screen, 2/y, "(b) ", "F - test-render-grapheme-stack-while-highlighting-matching-open-paren-with-close-paren-at-end"
check-screen-row-in-color screen, 0xf/fg=white, 2/y, "( ", "F - test-render-grapheme-stack-while-highlighting-matching-open-paren-with-close-paren-at-end: matching paren"
check-screen-row screen, 2/y, "(b) ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-with-close-paren-at-end"
check-screen-row-in-color screen, 0xf/fg=white, 2/y, "( ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-with-close-paren-at-end: matching paren"
}
fn test-render-grapheme-stack-while-highlighting-matching-open-paren-with-close-paren-at-end-2 {
fn test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-with-close-paren-at-end-2 {
# setup: gs = "a((b))"
var gs-storage: grapheme-stack
var gs/edi: (addr grapheme-stack) <- address gs-storage
initialize-grapheme-stack gs, 0x10
var g/eax: grapheme <- copy 0x61/a
push-grapheme-stack gs, g
var gs-storage: code-point-utf8-stack
var gs/edi: (addr code-point-utf8-stack) <- address gs-storage
initialize-code-point-utf8-stack gs, 0x10
var g/eax: code-point-utf8 <- copy 0x61/a
push-code-point-utf8-stack gs, g
g <- copy 0x28/open-paren
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
g <- copy 0x28/open-paren
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
g <- copy 0x62/b
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
g <- copy 0x29/close-paren
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
g <- copy 0x29/close-paren
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
# setup: screen
var screen-storage: screen
var screen/esi: (addr screen) <- address screen-storage
initialize-screen screen, 5, 4, 0/no-pixel-graphics
#
var x/eax: int <- render-stack-from-bottom screen, gs, 0/x, 2/y, 1/highlight-matching-open-paren, 1/open-paren-depth
check-screen-row screen, 2/y, "a((b)) ", "F - test-render-grapheme-stack-while-highlighting-matching-open-paren-with-close-paren-at-end-2"
check-screen-row-in-color screen, 0xf/fg=white, 2/y, " ( ", "F - test-render-grapheme-stack-while-highlighting-matching-open-paren-with-close-paren-at-end-2: matching paren"
check-screen-row screen, 2/y, "a((b)) ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-with-close-paren-at-end-2"
check-screen-row-in-color screen, 0xf/fg=white, 2/y, " ( ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-with-close-paren-at-end-2: matching paren"
}
fn test-render-grapheme-stack-while-highlighting-matching-open-paren {
fn test-render-code-point-utf8-stack-while-highlighting-matching-open-paren {
# setup: gs = "(b"
var gs-storage: grapheme-stack
var gs/edi: (addr grapheme-stack) <- address gs-storage
initialize-grapheme-stack gs, 5
var g/eax: grapheme <- copy 0x28/open-paren
push-grapheme-stack gs, g
var gs-storage: code-point-utf8-stack
var gs/edi: (addr code-point-utf8-stack) <- address gs-storage
initialize-code-point-utf8-stack gs, 5
var g/eax: code-point-utf8 <- copy 0x28/open-paren
push-code-point-utf8-stack gs, g
g <- copy 0x62/b
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
# setup: screen
var screen-storage: screen
var screen/esi: (addr screen) <- address screen-storage
initialize-screen screen, 5, 4, 0/no-pixel-graphics
#
var x/eax: int <- render-stack-from-bottom screen, gs, 0/x, 2/y, 1/highlight-matching-open-paren, 0/open-paren-depth
check-screen-row screen, 2/y, "(b ", "F - test-render-grapheme-stack-while-highlighting-matching-open-paren"
check-screen-row-in-color screen, 0xf/fg=white, 2/y, "( ", "F - test-render-grapheme-stack-while-highlighting-matching-open-paren: matching paren"
check-screen-row screen, 2/y, "(b ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-open-paren"
check-screen-row-in-color screen, 0xf/fg=white, 2/y, "( ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-open-paren: matching paren"
}
fn test-render-grapheme-stack-while-highlighting-matching-open-paren-2 {
fn test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-2 {
# setup: gs = "a((b)"
var gs-storage: grapheme-stack
var gs/edi: (addr grapheme-stack) <- address gs-storage
initialize-grapheme-stack gs, 0x10
var g/eax: grapheme <- copy 0x61/a
push-grapheme-stack gs, g
var gs-storage: code-point-utf8-stack
var gs/edi: (addr code-point-utf8-stack) <- address gs-storage
initialize-code-point-utf8-stack gs, 0x10
var g/eax: code-point-utf8 <- copy 0x61/a
push-code-point-utf8-stack gs, g
g <- copy 0x28/open-paren
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
g <- copy 0x28/open-paren
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
g <- copy 0x62/b
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
g <- copy 0x29/close-paren
push-grapheme-stack gs, g
push-code-point-utf8-stack gs, g
# setup: screen
var screen-storage: screen
var screen/esi: (addr screen) <- address screen-storage
initialize-screen screen, 5, 4, 0/no-pixel-graphics
#
var x/eax: int <- render-stack-from-bottom screen, gs, 0/x, 2/y, 1/highlight-matching-open-paren, 0/open-paren-depth
check-screen-row screen, 2/y, "a((b) ", "F - test-render-grapheme-stack-while-highlighting-matching-open-paren-2"
check-screen-row-in-color screen, 0xf/fg=white, 2/y, " ( ", "F - test-render-grapheme-stack-while-highlighting-matching-open-paren-2: matching paren"
check-screen-row screen, 2/y, "a((b) ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-2"
check-screen-row-in-color screen, 0xf/fg=white, 2/y, " ( ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-2: matching paren"
}
# return the index of the matching close-paren of the grapheme at cursor (top of stack)
# return the index of the matching close-paren of the code-point-utf8 at cursor (top of stack)
# or top index if there's no matching close-paren
fn get-matching-close-paren-index _self: (addr grapheme-stack), render-cursor?: boolean -> _/edx: int {
var self/esi: (addr grapheme-stack) <- copy _self
fn get-matching-close-paren-index _self: (addr code-point-utf8-stack), render-cursor?: boolean -> _/edx: int {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var top-addr/edx: (addr int) <- get self, top
# if not rendering cursor, return
compare render-cursor?, 0/false
@ -399,8 +399,8 @@ fn get-matching-close-paren-index _self: (addr grapheme-stack), render-cursor?:
break-if-!=
return *top-addr
}
var data-ah/eax: (addr handle array grapheme) <- get self, data
var data/eax: (addr array grapheme) <- lookup *data-ah
var data-ah/eax: (addr handle array code-point-utf8) <- get self, data
var data/eax: (addr array code-point-utf8) <- lookup *data-ah
var i/ecx: int <- copy *top-addr
# if stack is empty, return
compare i, 0
@ -410,7 +410,7 @@ fn get-matching-close-paren-index _self: (addr grapheme-stack), render-cursor?:
}
# if cursor is not '(' return
i <- decrement
var g/esi: (addr grapheme) <- index data, i
var g/esi: (addr code-point-utf8) <- index data, i
compare *g, 0x28/open-paren
{
break-if-=
@ -422,7 +422,7 @@ fn get-matching-close-paren-index _self: (addr grapheme-stack), render-cursor?:
{
compare i, 0
break-if-<
var g/esi: (addr grapheme) <- index data, i
var g/esi: (addr code-point-utf8) <- index data, i
compare *g, 0x28/open-paren
{
break-if-!=
@ -446,8 +446,8 @@ fn get-matching-close-paren-index _self: (addr grapheme-stack), render-cursor?:
# return the index of the first open-paren at the given depth
# or top index if there's no matching close-paren
fn get-matching-open-paren-index _self: (addr grapheme-stack), control: boolean, depth: int -> _/edx: int {
var self/esi: (addr grapheme-stack) <- copy _self
fn get-matching-open-paren-index _self: (addr code-point-utf8-stack), control: boolean, depth: int -> _/edx: int {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var top-addr/edx: (addr int) <- get self, top
# if not rendering cursor, return
compare control, 0/false
@ -455,8 +455,8 @@ fn get-matching-open-paren-index _self: (addr grapheme-stack), control: boolean,
break-if-!=
return *top-addr
}
var data-ah/eax: (addr handle array grapheme) <- get self, data
var data/eax: (addr array grapheme) <- lookup *data-ah
var data-ah/eax: (addr handle array code-point-utf8) <- get self, data
var data/eax: (addr array code-point-utf8) <- lookup *data-ah
var i/ecx: int <- copy *top-addr
# if stack is empty, return
compare i, 0
@ -470,7 +470,7 @@ fn get-matching-open-paren-index _self: (addr grapheme-stack), control: boolean,
{
compare i, 0
break-if-<
var g/esi: (addr grapheme) <- index data, i
var g/esi: (addr code-point-utf8) <- index data, i
compare *g, 0x29/close-paren
{
break-if-!=
@ -494,11 +494,11 @@ fn get-matching-open-paren-index _self: (addr grapheme-stack), control: boolean,
# compare from bottom
# beware: modifies 'stream', which must be disposed of after a false result
fn prefix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: boolean {
var self/esi: (addr grapheme-stack) <- copy _self
var data-ah/edi: (addr handle array grapheme) <- get self, data
var _data/eax: (addr array grapheme) <- lookup *data-ah
var data/edi: (addr array grapheme) <- copy _data
fn prefix-match? _self: (addr code-point-utf8-stack), s: (addr stream byte) -> _/eax: boolean {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var data-ah/edi: (addr handle array code-point-utf8) <- get self, data
var _data/eax: (addr array code-point-utf8) <- lookup *data-ah
var data/edi: (addr array code-point-utf8) <- copy _data
var top-addr/ecx: (addr int) <- get self, top
var i/ebx: int <- copy 0
{
@ -506,8 +506,8 @@ fn prefix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b
break-if->=
# if curr != expected, return false
{
var curr-a/edx: (addr grapheme) <- index data, i
var expected/eax: grapheme <- read-grapheme s
var curr-a/edx: (addr code-point-utf8) <- index data, i
var expected/eax: code-point-utf8 <- read-code-point-utf8 s
{
compare expected, *curr-a
break-if-=
@ -522,11 +522,11 @@ fn prefix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b
# compare from bottom
# beware: modifies 'stream', which must be disposed of after a false result
fn suffix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: boolean {
var self/esi: (addr grapheme-stack) <- copy _self
var data-ah/edi: (addr handle array grapheme) <- get self, data
var _data/eax: (addr array grapheme) <- lookup *data-ah
var data/edi: (addr array grapheme) <- copy _data
fn suffix-match? _self: (addr code-point-utf8-stack), s: (addr stream byte) -> _/eax: boolean {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var data-ah/edi: (addr handle array code-point-utf8) <- get self, data
var _data/eax: (addr array code-point-utf8) <- lookup *data-ah
var data/edi: (addr array code-point-utf8) <- copy _data
var top-addr/eax: (addr int) <- get self, top
var i/ebx: int <- copy *top-addr
i <- decrement
@ -534,8 +534,8 @@ fn suffix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b
compare i, 0
break-if-<
{
var curr-a/edx: (addr grapheme) <- index data, i
var expected/eax: grapheme <- read-grapheme s
var curr-a/edx: (addr code-point-utf8) <- index data, i
var expected/eax: code-point-utf8 <- read-code-point-utf8 s
# if curr != expected, return false
{
compare expected, *curr-a
@ -549,18 +549,18 @@ fn suffix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b
return 1 # true
}
fn grapheme-stack-is-decimal-integer? _self: (addr grapheme-stack) -> _/eax: boolean {
var self/esi: (addr grapheme-stack) <- copy _self
var data-ah/eax: (addr handle array grapheme) <- get self, data
var _data/eax: (addr array grapheme) <- lookup *data-ah
var data/edx: (addr array grapheme) <- copy _data
fn code-point-utf8-stack-is-decimal-integer? _self: (addr code-point-utf8-stack) -> _/eax: boolean {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var data-ah/eax: (addr handle array code-point-utf8) <- get self, data
var _data/eax: (addr array code-point-utf8) <- lookup *data-ah
var data/edx: (addr array code-point-utf8) <- copy _data
var top-addr/ecx: (addr int) <- get self, top
var i/ebx: int <- copy 0
var result/eax: boolean <- copy 1/true
$grapheme-stack-is-integer?:loop: {
$code-point-utf8-stack-is-integer?:loop: {
compare i, *top-addr
break-if->=
var g/edx: (addr grapheme) <- index data, i
var g/edx: (addr code-point-utf8) <- index data, i
result <- decimal-digit? *g
compare result, 0/false
break-if-=

File diff suppressed because it is too large Load Diff

View File

@ -32,13 +32,13 @@ fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk)
var dummy/eax: int <- draw-code-point-on-real-screen 0x61/a, 0/x 0/y, 3/fg 0/bg
var dummy/eax: int <- overlay-code-point-on-real-screen 0x0300/combining-grave-accent, 0/x 0/y, 3/fg 0/bg
# below a grapheme with a descender, the accent uglily overlaps
# below a code-point-utf8 with a descender, the accent uglily overlaps
# https://en.wikipedia.org/wiki/Descender
var dummy/eax: int <- draw-code-point-on-real-screen 0x67/g, 4/x 3/y, 3/fg 0/bg
var dummy/eax: int <- draw-code-point-on-real-screen 0x61/a, 4/x 4/y, 3/fg 0/bg
var dummy/eax: int <- overlay-code-point-on-real-screen 0x0300/combining-grave-accent, 4/x 4/y, 3/fg 0/bg
# beside a grapheme with a descender, it becomes more obvious that monowidth fonts can't make baselines line up
# beside a code-point-utf8 with a descender, it becomes more obvious that monowidth fonts can't make baselines line up
# https://en.wikipedia.org/wiki/Baseline_(typography)
var dummy/eax: int <- draw-code-point-on-real-screen 0x67/g, 8/x 3/y, 3/fg 0/bg
var dummy/eax: int <- draw-code-point-on-real-screen 0x61/a, 9/x 3/y, 3/fg 0/bg
@ -82,62 +82,62 @@ fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk)
var dummy/eax: int <- draw-code-point-on-real-screen 0x0915/devanagari-letter-ka, 0x13/x 9/y, 3/fg 0/bg
var dummy/eax: int <- overlay-code-point-on-real-screen 0x0903/devanagari-visarga, 0x13/x 9/y, 3/fg 0/bg
# render the same devanagari letters as a single stream of utf-8 graphemes rather than individual code-points.
# render the same devanagari letters as a single stream of utf-8 code-point-utf8s rather than individual code-points.
var text-storage: (stream byte 0x200)
var text/esi: (addr stream byte) <- address text-storage
var g/eax: grapheme <- to-grapheme 0x0915/devanagari-letter-ka
var ka/ecx: grapheme <- copy g
var g/eax: code-point-utf8 <- to-utf8 0x0915/devanagari-letter-ka
var ka/ecx: code-point-utf8 <- copy g
# ka
write-grapheme text, ka
write-code-point-utf8 text, ka
# kaa
write-grapheme text, ka
g <- to-grapheme 0x093e/devanagari-vowel-aa
write-grapheme text, g
write-code-point-utf8 text, ka
g <- to-utf8 0x093e/devanagari-vowel-aa
write-code-point-utf8 text, g
# ki
write-grapheme text, ka
g <- to-grapheme 0x093f/devanagari-vowel-i
write-grapheme text, g
write-code-point-utf8 text, ka
g <- to-utf8 0x093f/devanagari-vowel-i
write-code-point-utf8 text, g
# kee
write-grapheme text, ka
g <- to-grapheme 0x0940/devanagari-vowel-ii
write-grapheme text, g
write-code-point-utf8 text, ka
g <- to-utf8 0x0940/devanagari-vowel-ii
write-code-point-utf8 text, g
# ku
write-grapheme text, ka
g <- to-grapheme 0x0941/devanagari-vowel-u
write-grapheme text, g
write-code-point-utf8 text, ka
g <- to-utf8 0x0941/devanagari-vowel-u
write-code-point-utf8 text, g
# koo
write-grapheme text, ka
g <- to-grapheme 0x0942/devanagari-vowel-oo
write-grapheme text, g
write-code-point-utf8 text, ka
g <- to-utf8 0x0942/devanagari-vowel-oo
write-code-point-utf8 text, g
# kay
write-grapheme text, ka
g <- to-grapheme 0x0947/devanagari-vowel-E
write-grapheme text, g
write-code-point-utf8 text, ka
g <- to-utf8 0x0947/devanagari-vowel-E
write-code-point-utf8 text, g
# kai
write-grapheme text, ka
g <- to-grapheme 0x0948/devanagari-vowel-ai
write-grapheme text, g
write-code-point-utf8 text, ka
g <- to-utf8 0x0948/devanagari-vowel-ai
write-code-point-utf8 text, g
# ko
write-grapheme text, ka
g <- to-grapheme 0x094b/devanagari-vowel-o
write-grapheme text, g
write-code-point-utf8 text, ka
g <- to-utf8 0x094b/devanagari-vowel-o
write-code-point-utf8 text, g
# kow
write-grapheme text, ka
g <- to-grapheme 0x094f/devanagari-vowel-aw
write-grapheme text, g
write-code-point-utf8 text, ka
g <- to-utf8 0x094f/devanagari-vowel-aw
write-code-point-utf8 text, g
# kan
write-grapheme text, ka
g <- to-grapheme 0x0902/devanagari-anusvara
write-grapheme text, g
write-code-point-utf8 text, ka
g <- to-utf8 0x0902/devanagari-anusvara
write-code-point-utf8 text, g
# kaha
write-grapheme text, ka
g <- to-grapheme 0x0903/devanagari-visarga
write-grapheme text, g
write-code-point-utf8 text, ka
g <- to-utf8 0x0903/devanagari-visarga
write-code-point-utf8 text, g
# render everything
set-cursor-position screen, 4/x 0xe/y
draw-stream-wrapping-right-then-down-from-cursor-over-full-screen screen, text, 3/fg 0/bg
# a stream of tamil graphemes (with interspersed spaces for clarity) that don't look the same in Mu
# a stream of tamil code-point-utf8s (with interspersed spaces for clarity) that don't look the same in Mu
set-cursor-position 0, 4/x 0x12/y
draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0, "எ கு ", 3/fg 0/bg
set-cursor-position 0, 4/x 0x13/y

View File

@ -33,7 +33,7 @@ fn word-count in: (addr stream byte) -> _/eax: int {
var done?/eax: boolean <- stream-empty? in
compare done?, 0/false
break-if-!=
var g/eax: grapheme <- read-grapheme in
var g/eax: code-point-utf8 <- read-code-point-utf8 in
{
compare g, 0x20/space
break-if-!=

View File

@ -28,7 +28,7 @@ fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk)
var second-screen/edi: (addr screen) <- address second-buffer
initialize-screen second-screen, 0x80, 0x30, 1/include-pixels
render second-screen, env
convert-graphemes-to-pixels second-screen
convert-code-point-utf8s-to-pixels second-screen
copy-pixels second-screen, screen
{
edit keyboard, env
@ -39,7 +39,7 @@ fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk)
step env
clear-screen second-screen
render second-screen, env
convert-graphemes-to-pixels second-screen
convert-code-point-utf8s-to-pixels second-screen
copy-pixels second-screen, screen
}
linger

View File

@ -615,12 +615,12 @@ fn draw-json-stream-wrapping-right-then-down screen: (addr screen), stream: (add
}
compare c, 0xffffffff/end-of-file
break-if-=
$draw-json-stream-wrapping-right-then-down:render-grapheme: {
$draw-json-stream-wrapping-right-then-down:render-code-point-utf8: {
compare c, 0x5c/backslash
{
break-if-!=
xcurr, ycurr <- render-json-escaped-code-point screen, stream, xmin, ymin, xmax, ymax, xcurr, ycurr, color, background-color
break $draw-json-stream-wrapping-right-then-down:render-grapheme
break $draw-json-stream-wrapping-right-then-down:render-code-point-utf8
}
compare c, 0xa/newline
{
@ -629,7 +629,7 @@ fn draw-json-stream-wrapping-right-then-down screen: (addr screen), stream: (add
var dummy/eax: int <- draw-code-point screen, 0x20/space, xcurr, ycurr, color, background-color
xcurr <- copy xmin
ycurr <- increment
break $draw-json-stream-wrapping-right-then-down:render-grapheme
break $draw-json-stream-wrapping-right-then-down:render-code-point-utf8
}
var offset/eax: int <- draw-code-point screen, c, xcurr, ycurr, color, background-color
# overlay a combining character if necessary
@ -639,7 +639,7 @@ fn draw-json-stream-wrapping-right-then-down screen: (addr screen), stream: (add
break-if-!=
# read a character
# no combining character allowed here
var g/eax: grapheme <- read-grapheme stream
var g/eax: code-point-utf8 <- read-code-point-utf8 stream
var c/eax: code-point <- to-code-point g
# if not a combining character, save for next iteration and loop
{
@ -672,7 +672,7 @@ fn draw-json-stream-wrapping-right-then-down screen: (addr screen), stream: (add
# just return a different register
fn read-json-code-point stream: (addr stream byte) -> _/ebx: code-point {
var g/eax: grapheme <- read-grapheme stream
var g/eax: code-point-utf8 <- read-code-point-utf8 stream
var result/eax: code-point <- to-code-point g
return result
}
@ -1012,7 +1012,7 @@ fn update-search _env: (addr environment), key: byte, users: (addr array user),
# otherwise delegate
var search-terms-ah/eax: (addr handle gap-buffer) <- get env, search-terms
var search-terms/eax: (addr gap-buffer) <- lookup *search-terms-ah
var g/ecx: grapheme <- copy key
var g/ecx: code-point-utf8 <- copy key
edit-gap-buffer search-terms, g
}

View File

@ -48,7 +48,7 @@
</dict>
<dict>
<key>match</key>
<string>\b(addr|array|boolean|byte|code-point|grapheme|handle|int|float|stream|type)\b</string>
<string>\b(addr|array|boolean|byte|code-point|code-point-utf8|handle|int|float|stream|type)\b</string>
<key>name</key>
<string>storage.type.mu</string>
</dict>

View File

@ -48,7 +48,7 @@
</dict>
<dict>
<key>match</key>
<string>\b(addr|array|boolean|byte|code-point|grapheme|handle|int|float|stream|type)\b</string>
<string>\b(addr|array|boolean|byte|code-point|code-point-utf8|handle|int|float|stream|type)\b</string>
<key>name</key>
<string>storage.type.mu</string>
</dict>

View File

@ -303,7 +303,7 @@ test-write-int32-decimal-negative-multiple-digits:
# . end
c3/return
decimal-digit?: # c: grapheme -> result/eax: boolean
decimal-digit?: # c: code-point-utf8 -> result/eax: boolean
# . prologue
55/push-ebp
89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp
@ -402,7 +402,7 @@ test-decimal-digit-above-9:
81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp
c3/return
to-decimal-digit: # in: grapheme -> out/eax: int
to-decimal-digit: # in: code-point-utf8 -> out/eax: int
# . prologue
55/push-ebp
89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp

View File

@ -157,8 +157,8 @@ $print-stream-to-real-screen:end:
5d/pop-to-ebp
c3/return
# print a grapheme in utf-8 (only up to 4 bytes so far)
print-grapheme-to-real-screen: # c: grapheme
# print a code-point-utf8 in utf-8 (only up to 4 bytes so far)
print-code-point-utf8-to-real-screen: # c: code-point-utf8
# . prologue
55/push-ebp
89/<- %ebp 4/r32/esp
@ -170,31 +170,31 @@ print-grapheme-to-real-screen: # c: grapheme
8a/byte-> *(ebp+8) 0/r32/al
# if (curr == 0) return
3d/compare-eax-and 0/imm32
74/jump-if-= $print-grapheme-to-real-screen:end/disp8
74/jump-if-= $print-code-point-utf8-to-real-screen:end/disp8
#
(print-byte-to-real-screen %eax)
# curr = *(ebp+9)
8a/byte-> *(ebp+9) 0/r32/al
# if (curr == 0) return
3d/compare-eax-and 0/imm32
74/jump-if-= $print-grapheme-to-real-screen:end/disp8
74/jump-if-= $print-code-point-utf8-to-real-screen:end/disp8
#
(print-byte-to-real-screen %eax)
# curr = *(ebp+10)
8a/byte-> *(ebp+0xa) 0/r32/al
# if (curr == 0) return
3d/compare-eax-and 0/imm32
74/jump-if-= $print-grapheme-to-real-screen:end/disp8
74/jump-if-= $print-code-point-utf8-to-real-screen:end/disp8
#
(print-byte-to-real-screen %eax)
# curr = *(ebp+11)
8a/byte-> *(ebp+0xb) 0/r32/al
# if (curr == 0) return
3d/compare-eax-and 0/imm32
74/jump-if-= $print-grapheme-to-real-screen:end/disp8
74/jump-if-= $print-code-point-utf8-to-real-screen:end/disp8
#
(print-byte-to-real-screen %eax)
$print-grapheme-to-real-screen:end:
$print-code-point-utf8-to-real-screen:end:
# . restore registers
58/pop-to-eax
# . epilogue

View File

@ -121,15 +121,15 @@ $enable-keyboard-type-mode:end:
# read keys or escapes up to 4 bytes
#
# fun fact: terminal escapes and graphemes in utf-8 don't conflict!
# - in graphemes all but the first/lowest byte will have a 1 in the MSB (be
# fun fact: terminal escapes and code-point-utf8s in utf-8 don't conflict!
# - in code-point-utf8s all but the first/lowest byte will have a 1 in the MSB (be
# greater than 0x7f)
# - in escapes every byte will have a 0 in the MSB
# the two categories overlap only when the first/lowest byte is 0x1b or 'esc'
#
# Only use this in immediate mode; in type (typewriter) mode 4 bytes may get
# parts of multiple keys.
read-key-from-real-keyboard: # -> result/eax: grapheme
read-key-from-real-keyboard: # -> result/eax: code-point-utf8
# . prologue
55/push-ebp
89/<- %ebp 4/r32/esp

View File

@ -113,8 +113,8 @@ sig skip-until-close-paren line: (addr stream byte)
#sig skip-until-close-paren-in-slice curr: (addr byte), end: (addr byte) -> _/eax: (addr byte)
sig write-stream-data f: (addr buffered-file), s: (addr stream byte)
sig write-int32-decimal out: (addr stream byte), n: int
sig decimal-digit? c: grapheme -> _/eax: boolean
sig to-decimal-digit in: grapheme -> _/eax: int
sig decimal-digit? c: code-point-utf8 -> _/eax: boolean
sig to-decimal-digit in: code-point-utf8 -> _/eax: int
# bad name alert
# next-word really tokenizes
# next-raw-word really reads whitespace-separated words
@ -159,7 +159,7 @@ sig move-cursor-on-real-screen row: int, column: int
sig print-string-to-real-screen s: (addr array byte)
sig print-slice-to-real-screen s: (addr slice)
sig print-stream-to-real-screen s: (addr stream byte)
sig print-grapheme-to-real-screen c: grapheme
sig print-code-point-utf8-to-real-screen c: code-point-utf8
sig print-int32-hex-to-real-screen n: int
sig print-int32-hex-bits-to-real-screen n: int, bits: int
sig print-int32-decimal-to-real-screen n: int
@ -174,7 +174,7 @@ sig hide-cursor-on-real-screen
sig show-cursor-on-real-screen
sig enable-keyboard-immediate-mode
sig enable-keyboard-type-mode
sig read-key-from-real-keyboard -> _/eax: grapheme
sig read-key-from-real-keyboard -> _/eax: code-point-utf8
sig read-line-from-real-keyboard out: (addr stream byte)
sig open filename: (addr array byte), write?: boolean, out: (addr handle buffered-file)
sig populate-buffered-file-containing contents: (addr array byte), out: (addr handle buffered-file)

View File

@ -1,31 +1,31 @@
# Helpers for Unicode.
#
# Mu has no characters, only code points and graphemes.
# Mu has no characters, only code points and code-point-utf8s.
# Code points are the indivisible atoms of text streams.
# https://en.wikipedia.org/wiki/Code_point
# Graphemes are the smallest self-contained unit of text.
# Graphemes may consist of multiple code points.
#
# Mu graphemes are always represented in utf-8, and they are required to fit
# Mu code-point-utf8s are always represented in utf-8, and they are required to fit
# in 4 bytes.
#
# Mu doesn't currently support combining code points, or graphemes made of
# Mu doesn't currently support combining code points, or code-point-utf8s made of
# multiple code points. One day we will.
# On Linux, we also don't currently support code points that translate into
# multiple or wide graphemes. (In particular, Tab will never be supported.)
# multiple or wide code-point-utf8s. (In particular, Tab will never be supported.)
# transliterated from tb_utf8_unicode_to_char in https://github.com/nsf/termbox
# https://wiki.tcl-lang.org/page/UTF%2D8+bit+by+bit explains the algorithm
fn to-grapheme in: code-point -> _/eax: grapheme {
fn to-utf8 in: code-point -> _/eax: code-point-utf8 {
var c/eax: int <- copy in
var num-trailers/ecx: int <- copy 0
var first/edx: int <- copy 0
$to-grapheme:compute-length: {
$to-utf8:compute-length: {
# single byte: just return it
compare c, 0x7f
{
break-if->
var g/eax: grapheme <- copy c
var g/eax: code-point-utf8 <- copy c
return g
}
# 2 bytes
@ -34,7 +34,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme {
break-if->
num-trailers <- copy 1
first <- copy 0xc0
break $to-grapheme:compute-length
break $to-utf8:compute-length
}
# 3 bytes
compare c, 0xffff
@ -42,7 +42,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme {
break-if->
num-trailers <- copy 2
first <- copy 0xe0
break $to-grapheme:compute-length
break $to-utf8:compute-length
}
# 4 bytes
compare c, 0x1fffff
@ -50,7 +50,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme {
break-if->
num-trailers <- copy 3
first <- copy 0xf0
break $to-grapheme:compute-length
break $to-utf8:compute-length
}
# more than 4 bytes: unsupported
# TODO: print to stderr
@ -65,7 +65,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme {
}
}
# emit trailer bytes, 6 bits from 'in', first two bits '10'
var result/edi: grapheme <- copy 0
var result/edi: code-point-utf8 <- copy 0
{
compare num-trailers, 0
break-if-<=
@ -87,16 +87,16 @@ fn to-grapheme in: code-point -> _/eax: grapheme {
return result
}
# single-byte code point have identical graphemes
fn test-to-grapheme-single-byte {
# single-byte code point have identical code-point-utf8s
fn test-to-utf8-single-byte {
var in-int/ecx: int <- copy 0
{
compare in-int, 0x7f
break-if->
var in/eax: code-point <- copy in-int
var out/eax: grapheme <- to-grapheme in
var out/eax: code-point-utf8 <- to-utf8 in
var out-int/eax: int <- copy out
check-ints-equal out-int, in-int, "F - test-to-grapheme-single-byte"
check-ints-equal out-int, in-int, "F - test-to-utf8-single-byte"
in-int <- increment
loop
}
@ -104,55 +104,55 @@ fn test-to-grapheme-single-byte {
# byte | byte | byte | byte
# smallest 2-byte utf-8
fn test-to-grapheme-two-bytes-min {
fn test-to-utf8-two-bytes-min {
var in/eax: code-point <- copy 0x80 # 10 00-0000
var out/eax: grapheme <- to-grapheme in
var out/eax: code-point-utf8 <- to-utf8 in
var out-int/eax: int <- copy out
check-ints-equal out-int, 0x80c2, "F - to-grapheme/2a" # 110 0-0010 10 00-0000
check-ints-equal out-int, 0x80c2, "F - to-utf8/2a" # 110 0-0010 10 00-0000
}
# largest 2-byte utf-8
fn test-to-grapheme-two-bytes-max {
fn test-to-utf8-two-bytes-max {
var in/eax: code-point <- copy 0x7ff # 1-1111 11-1111
var out/eax: grapheme <- to-grapheme in
var out/eax: code-point-utf8 <- to-utf8 in
var out-int/eax: int <- copy out
check-ints-equal out-int, 0xbfdf, "F - to-grapheme/2b" # 110 1-1111 10 11-1111
check-ints-equal out-int, 0xbfdf, "F - to-utf8/2b" # 110 1-1111 10 11-1111
}
# smallest 3-byte utf-8
fn test-to-grapheme-three-bytes-min {
fn test-to-utf8-three-bytes-min {
var in/eax: code-point <- copy 0x800 # 10-0000 00-0000
var out/eax: grapheme <- to-grapheme in
var out/eax: code-point-utf8 <- to-utf8 in
var out-int/eax: int <- copy out
check-ints-equal out-int, 0x80a0e0, "F - to-grapheme/3a" # 1110 0000 10 10-0000 10 00-0000
check-ints-equal out-int, 0x80a0e0, "F - to-utf8/3a" # 1110 0000 10 10-0000 10 00-0000
}
# largest 3-byte utf-8
fn test-to-grapheme-three-bytes-max {
fn test-to-utf8-three-bytes-max {
var in/eax: code-point <- copy 0xffff # 1111 11-1111 11-1111
var out/eax: grapheme <- to-grapheme in
var out/eax: code-point-utf8 <- to-utf8 in
var out-int/eax: int <- copy out
check-ints-equal out-int, 0xbfbfef, "F - to-grapheme/3b" # 1110 1111 10 11-1111 10 11-1111
check-ints-equal out-int, 0xbfbfef, "F - to-utf8/3b" # 1110 1111 10 11-1111 10 11-1111
}
# smallest 4-byte utf-8
fn test-to-grapheme-four-bytes-min {
fn test-to-utf8-four-bytes-min {
var in/eax: code-point <- copy 0x10000 # 1-0000 00-0000 00-0000
var out/eax: grapheme <- to-grapheme in
var out/eax: code-point-utf8 <- to-utf8 in
var out-int/eax: int <- copy out
check-ints-equal out-int, 0x808090f0, "F - to-grapheme/4a" # 1111-0 000 10 01-0000 10 00-0000 10 00-0000
check-ints-equal out-int, 0x808090f0, "F - to-utf8/4a" # 1111-0 000 10 01-0000 10 00-0000 10 00-0000
}
# largest 4-byte utf-8
fn test-to-grapheme-four-bytes-max {
fn test-to-utf8-four-bytes-max {
var in/eax: code-point <- copy 0x1fffff # 111 11-1111 11-1111 11-1111
var out/eax: grapheme <- to-grapheme in
var out/eax: code-point-utf8 <- to-utf8 in
var out-int/eax: int <- copy out
check-ints-equal out-int, 0xbfbfbff7, "F - to-grapheme/4b" # 1111-0 111 10 11-1111 10 11-1111 10 11-1111
check-ints-equal out-int, 0xbfbfbff7, "F - to-utf8/4b" # 1111-0 111 10 11-1111 10 11-1111 10 11-1111
}
# read the next grapheme from a stream of bytes
fn read-grapheme in: (addr stream byte) -> _/eax: grapheme {
# read the next code-point-utf8 from a stream of bytes
fn read-code-point-utf8 in: (addr stream byte) -> _/eax: code-point-utf8 {
# if at eof, return EOF
{
var eof?/eax: boolean <- stream-empty? in
@ -162,18 +162,18 @@ fn read-grapheme in: (addr stream byte) -> _/eax: grapheme {
}
var c/eax: byte <- read-byte in
var num-trailers/ecx: int <- copy 0
$read-grapheme:compute-length: {
$read-code-point-utf8:compute-length: {
# single byte: just return it
compare c, 0xc0
{
break-if->=
var g/eax: grapheme <- copy c
var g/eax: code-point-utf8 <- copy c
return g
}
compare c, 0xfe
{
break-if-<
var g/eax: grapheme <- copy c
var g/eax: code-point-utf8 <- copy c
return g
}
# 2 bytes
@ -181,23 +181,23 @@ fn read-grapheme in: (addr stream byte) -> _/eax: grapheme {
{
break-if->=
num-trailers <- copy 1
break $read-grapheme:compute-length
break $read-code-point-utf8:compute-length
}
# 3 bytes
compare c, 0xf0
{
break-if->=
num-trailers <- copy 2
break $read-grapheme:compute-length
break $read-code-point-utf8:compute-length
}
# 4 bytes
compare c, 0xf8
{
break-if->=
num-trailers <- copy 3
break $read-grapheme:compute-length
break $read-code-point-utf8:compute-length
}
$read-grapheme:abort: {
$read-code-point-utf8:abort: {
# TODO: print to stderr
print-string-to-real-screen "utf-8 encodings larger than 4 bytes are not yet supported. First byte seen: "
var n/eax: int <- copy c
@ -208,7 +208,7 @@ $read-grapheme:abort: {
}
}
# prepend trailer bytes
var result/edi: grapheme <- copy c
var result/edi: code-point-utf8 <- copy c
var num-byte-shifts/edx: int <- copy 1
{
compare num-trailers, 0
@ -225,48 +225,48 @@ $read-grapheme:abort: {
return result
}
fn test-read-grapheme {
fn test-read-code-point-utf8 {
var s: (stream byte 0x30)
var s2/ecx: (addr stream byte) <- address s
write s2, "aΒcde"
var c/eax: grapheme <- read-grapheme s2
var c/eax: code-point-utf8 <- read-code-point-utf8 s2
var n/eax: int <- copy c
check-ints-equal n, 0x61, "F - test grapheme/0"
var c/eax: grapheme <- read-grapheme s2
check-ints-equal n, 0x61, "F - test code-point-utf8/0"
var c/eax: code-point-utf8 <- read-code-point-utf8 s2
var n/eax: int <- copy c
check-ints-equal n, 0x92ce/greek-capital-letter-beta, "F - test grapheme/1"
var c/eax: grapheme <- read-grapheme s2
check-ints-equal n, 0x92ce/greek-capital-letter-beta, "F - test code-point-utf8/1"
var c/eax: code-point-utf8 <- read-code-point-utf8 s2
var n/eax: int <- copy c
check-ints-equal n, 0x63, "F - test grapheme/2"
var c/eax: grapheme <- read-grapheme s2
check-ints-equal n, 0x63, "F - test code-point-utf8/2"
var c/eax: code-point-utf8 <- read-code-point-utf8 s2
var n/eax: int <- copy c
check-ints-equal n, 0x96b8e4, "F - test grapheme/3"
var c/eax: grapheme <- read-grapheme s2
check-ints-equal n, 0x96b8e4, "F - test code-point-utf8/3"
var c/eax: code-point-utf8 <- read-code-point-utf8 s2
var n/eax: int <- copy c
check-ints-equal n, 0x64, "F - test grapheme/4"
var c/eax: grapheme <- read-grapheme s2
check-ints-equal n, 0x64, "F - test code-point-utf8/4"
var c/eax: code-point-utf8 <- read-code-point-utf8 s2
var n/eax: int <- copy c
check-ints-equal n, 0x8c95e7, "F - test grapheme/5"
var c/eax: grapheme <- read-grapheme s2
check-ints-equal n, 0x8c95e7, "F - test code-point-utf8/5"
var c/eax: code-point-utf8 <- read-code-point-utf8 s2
var n/eax: int <- copy c
check-ints-equal n, 0x65, "F - test grapheme/6"
check-ints-equal n, 0x65, "F - test code-point-utf8/6"
}
fn read-grapheme-buffered in: (addr buffered-file) -> _/eax: grapheme {
fn read-code-point-utf8-buffered in: (addr buffered-file) -> _/eax: code-point-utf8 {
var c/eax: byte <- read-byte-buffered in
var num-trailers/ecx: int <- copy 0
$read-grapheme-buffered:compute-length: {
$read-code-point-utf8-buffered:compute-length: {
# single byte: just return it
compare c, 0xc0
{
break-if->=
var g/eax: grapheme <- copy c
var g/eax: code-point-utf8 <- copy c
return g
}
compare c, 0xfe
{
break-if-<
var g/eax: grapheme <- copy c
var g/eax: code-point-utf8 <- copy c
return g
}
# 2 bytes
@ -274,23 +274,23 @@ fn read-grapheme-buffered in: (addr buffered-file) -> _/eax: grapheme {
{
break-if->=
num-trailers <- copy 1
break $read-grapheme-buffered:compute-length
break $read-code-point-utf8-buffered:compute-length
}
# 3 bytes
compare c, 0xf0
{
break-if->=
num-trailers <- copy 2
break $read-grapheme-buffered:compute-length
break $read-code-point-utf8-buffered:compute-length
}
# 4 bytes
compare c, 0xf8
{
break-if->=
num-trailers <- copy 3
break $read-grapheme-buffered:compute-length
break $read-code-point-utf8-buffered:compute-length
}
$read-grapheme-buffered:abort: {
$read-code-point-utf8-buffered:abort: {
# TODO: print to stderr
print-string-to-real-screen "utf-8 encodings larger than 4 bytes are not supported. First byte seen: "
var n/eax: int <- copy c
@ -301,7 +301,7 @@ $read-grapheme-buffered:abort: {
}
}
# prepend trailer bytes
var result/edi: grapheme <- copy c
var result/edi: code-point-utf8 <- copy c
var num-byte-shifts/edx: int <- copy 1
{
compare num-trailers, 0
@ -364,23 +364,23 @@ fn test-shift-left-bytes-5 {
check-ints-equal result, 0, "F - shift-left-bytes >4"
}
# write a grapheme to a stream of bytes
# write a code-point-utf8 to a stream of bytes
# this is like write-to-stream, except we skip leading 0 bytes
fn write-grapheme out: (addr stream byte), g: grapheme {
$write-grapheme:body: {
fn write-code-point-utf8 out: (addr stream byte), g: code-point-utf8 {
$write-code-point-utf8:body: {
var c/eax: int <- copy g
append-byte out, c # first byte is always written
c <- shift-right 8
compare c, 0
break-if-= $write-grapheme:body
break-if-= $write-code-point-utf8:body
append-byte out, c
c <- shift-right 8
compare c, 0
break-if-= $write-grapheme:body
break-if-= $write-code-point-utf8:body
append-byte out, c
c <- shift-right 8
compare c, 0
break-if-= $write-grapheme:body
break-if-= $write-code-point-utf8:body
append-byte out, c
}
}

View File

@ -18,7 +18,7 @@ type screen {
}
type screen-cell {
data: grapheme
data: code-point-utf8
color: int
background-color: int
bold?: boolean
@ -83,7 +83,7 @@ fn clear-screen screen: (addr screen) {
return
}
# fake screen
var space/edi: grapheme <- copy 0x20
var space/edi: code-point-utf8 <- copy 0x20
move-cursor screen, 1, 1
var screen-addr/esi: (addr screen) <- copy screen
var i/eax: int <- copy 1
@ -96,7 +96,7 @@ fn clear-screen screen: (addr screen) {
{
compare j, *ncols
break-if->
print-grapheme screen, space
print-code-point-utf8 screen, space
j <- increment
loop
}
@ -186,8 +186,8 @@ fn print-stream _screen: (addr screen), s: (addr stream byte) {
var done?/eax: boolean <- stream-empty? s
compare done?, 0
break-if-!=
var g/eax: grapheme <- read-grapheme s
print-grapheme screen, g
var g/eax: code-point-utf8 <- read-code-point-utf8 s
print-code-point-utf8 screen, g
loop
}
}
@ -211,11 +211,11 @@ fn print-array-of-ints-in-decimal screen: (addr screen), _a: (addr array int) {
}
}
fn print-grapheme screen: (addr screen), c: grapheme {
fn print-code-point-utf8 screen: (addr screen), c: code-point-utf8 {
compare screen, 0
{
break-if-!=
print-grapheme-to-real-screen c
print-code-point-utf8-to-real-screen c
return
}
# fake screen
@ -239,7 +239,7 @@ fn print-grapheme screen: (addr screen), c: grapheme {
break-if-<=
copy-to *cursor-row-addr, num-rows
# if (top-index > data size) top-index = 0, otherwise top-index += num-cols
$print-grapheme:perform-scroll: {
$print-code-point-utf8:perform-scroll: {
var top-index-addr/ebx: (addr int) <- get screen-addr, top-index
var data-ah/eax: (addr handle array screen-cell) <- get screen-addr, data
var data/eax: (addr array screen-cell) <- lookup *data-ah
@ -248,7 +248,7 @@ fn print-grapheme screen: (addr screen), c: grapheme {
{
break-if->=
add-to *top-index-addr, num-cols
break $print-grapheme:perform-scroll
break $print-code-point-utf8:perform-scroll
}
{
break-if-<
@ -257,7 +257,7 @@ fn print-grapheme screen: (addr screen), c: grapheme {
}
}
var idx/ecx: int <- current-screen-cell-index screen-addr
#? print-string-to-real-screen "printing grapheme at screen index "
#? print-string-to-real-screen "printing code-point-utf8 at screen index "
#? print-int32-hex-to-real-screen idx
#? print-string-to-real-screen ": "
var data-ah/eax: (addr handle array screen-cell) <- get screen-addr, data
@ -266,9 +266,9 @@ fn print-grapheme screen: (addr screen), c: grapheme {
var dest-cell/ecx: (addr screen-cell) <- index data, offset
var src-cell/eax: (addr screen-cell) <- get screen-addr, curr-attributes
copy-object src-cell, dest-cell
var dest/eax: (addr grapheme) <- get dest-cell, data
var c2/ecx: grapheme <- copy c
#? print-grapheme-to-real-screen c2
var dest/eax: (addr code-point-utf8) <- get dest-cell, data
var c2/ecx: code-point-utf8 <- copy c
#? print-code-point-utf8-to-real-screen c2
#? print-string-to-real-screen "\n"
copy-to *dest, c2
increment *cursor-col-addr
@ -305,21 +305,21 @@ fn screen-cell-index screen-on-stack: (addr screen), row: int, col: int -> _/ecx
return result
}
fn screen-grapheme-at screen-on-stack: (addr screen), row: int, col: int -> _/eax: grapheme {
fn screen-code-point-utf8-at screen-on-stack: (addr screen), row: int, col: int -> _/eax: code-point-utf8 {
var screen-addr/esi: (addr screen) <- copy screen-on-stack
var idx/ecx: int <- screen-cell-index screen-addr, row, col
var result/eax: grapheme <- screen-grapheme-at-idx screen-addr, idx
var result/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen-addr, idx
return result
}
fn screen-grapheme-at-idx screen-on-stack: (addr screen), idx-on-stack: int -> _/eax: grapheme {
fn screen-code-point-utf8-at-idx screen-on-stack: (addr screen), idx-on-stack: int -> _/eax: code-point-utf8 {
var screen-addr/esi: (addr screen) <- copy screen-on-stack
var data-ah/eax: (addr handle array screen-cell) <- get screen-addr, data
var data/eax: (addr array screen-cell) <- lookup *data-ah
var idx/ecx: int <- copy idx-on-stack
var offset/ecx: (offset screen-cell) <- compute-offset data, idx
var cell/eax: (addr screen-cell) <- index data, offset
var src/eax: (addr grapheme) <- get cell, data
var src/eax: (addr code-point-utf8) <- get cell, data
return *src
}
@ -433,8 +433,8 @@ fn screen-blink-at-idx? screen-on-stack: (addr screen), idx-on-stack: int -> _/e
}
fn print-code-point screen: (addr screen), c: code-point {
var g/eax: grapheme <- to-grapheme c
print-grapheme screen, g
var g/eax: code-point-utf8 <- to-utf8 c
print-code-point-utf8 screen, g
}
fn print-int32-hex screen: (addr screen), n: int {
@ -453,8 +453,8 @@ fn print-int32-hex screen: (addr screen), n: int {
var done?/eax: boolean <- stream-empty? s2-addr
compare done?, 0
break-if-!=
var g/eax: grapheme <- read-grapheme s2-addr
print-grapheme screen, g
var g/eax: code-point-utf8 <- read-code-point-utf8 s2-addr
print-code-point-utf8 screen, g
loop
}
}
@ -475,8 +475,8 @@ fn print-int32-hex-bits screen: (addr screen), n: int, bits: int {
var done?/eax: boolean <- stream-empty? s2-addr
compare done?, 0
break-if-!=
var g/eax: grapheme <- read-grapheme s2-addr
print-grapheme screen, g
var g/eax: code-point-utf8 <- read-code-point-utf8 s2-addr
print-code-point-utf8 screen, g
loop
}
}
@ -497,8 +497,8 @@ fn print-int32-decimal screen: (addr screen), n: int {
var done?/eax: boolean <- stream-empty? s2-addr
compare done?, 0
break-if-!=
var g/eax: grapheme <- read-grapheme s2-addr
print-grapheme screen, g
var g/eax: code-point-utf8 <- read-code-point-utf8 s2-addr
print-code-point-utf8 screen, g
loop
}
}
@ -631,7 +631,7 @@ fn check-screen-row screen: (addr screen), row-idx: int, expected: (addr array b
fn check-screen-row-from screen-on-stack: (addr screen), row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) {
var screen/esi: (addr screen) <- copy screen-on-stack
var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx
# compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme
# compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8
var e: (stream byte 0x100)
var e-addr/edx: (addr stream byte) <- address e
write e-addr, expected
@ -639,35 +639,35 @@ fn check-screen-row-from screen-on-stack: (addr screen), row-idx: int, col-idx:
var done?/eax: boolean <- stream-empty? e-addr
compare done?, 0
break-if-!=
var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx
var g/ebx: grapheme <- copy _g
var expected-grapheme/eax: grapheme <- read-grapheme e-addr
# compare graphemes
$check-screen-row-from:compare-graphemes: {
# if expected-grapheme is space, null grapheme is also ok
var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx
var g/ebx: code-point-utf8 <- copy _g
var expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr
# compare code-point-utf8s
$check-screen-row-from:compare-code-point-utf8s: {
# if expected-code-point-utf8 is space, null code-point-utf8 is also ok
{
compare expected-grapheme, 0x20
compare expected-code-point-utf8, 0x20
break-if-!=
compare g, 0
break-if-= $check-screen-row-from:compare-graphemes
break-if-= $check-screen-row-from:compare-code-point-utf8s
}
# if (g == expected-grapheme) print "."
compare g, expected-grapheme
# if (g == expected-code-point-utf8) print "."
compare g, expected-code-point-utf8
{
break-if-!=
print-string-to-real-screen "."
break $check-screen-row-from:compare-graphemes
break $check-screen-row-from:compare-code-point-utf8s
}
# otherwise print an error
print-string-to-real-screen msg
print-string-to-real-screen ": expected '"
print-grapheme-to-real-screen expected-grapheme
print-code-point-utf8-to-real-screen expected-code-point-utf8
print-string-to-real-screen "' at ("
print-int32-hex-to-real-screen row-idx
print-string-to-real-screen ", "
print-int32-hex-to-real-screen col-idx
print-string-to-real-screen ") but observed '"
print-grapheme-to-real-screen g
print-code-point-utf8-to-real-screen g
print-string-to-real-screen "'\n"
}
idx <- increment
@ -685,7 +685,7 @@ fn check-screen-row-in-color screen: (addr screen), fg: int, row-idx: int, expec
fn check-screen-row-in-color-from screen-on-stack: (addr screen), fg: int, row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) {
var screen/esi: (addr screen) <- copy screen-on-stack
var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx
# compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme
# compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8
var e: (stream byte 0x100)
var e-addr/edx: (addr stream byte) <- address e
write e-addr, expected
@ -693,45 +693,45 @@ fn check-screen-row-in-color-from screen-on-stack: (addr screen), fg: int, row-i
var done?/eax: boolean <- stream-empty? e-addr
compare done?, 0
break-if-!=
var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx
var g/ebx: grapheme <- copy _g
var _expected-grapheme/eax: grapheme <- read-grapheme e-addr
var expected-grapheme/edi: grapheme <- copy _expected-grapheme
var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx
var g/ebx: code-point-utf8 <- copy _g
var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr
var expected-code-point-utf8/edi: code-point-utf8 <- copy _expected-code-point-utf8
$check-screen-row-in-color-from:compare-cells: {
# if expected-grapheme is space, null grapheme is also ok
# if expected-code-point-utf8 is space, null code-point-utf8 is also ok
{
compare expected-grapheme, 0x20
compare expected-code-point-utf8, 0x20
break-if-!=
compare g, 0
break-if-= $check-screen-row-in-color-from:compare-cells
}
# if expected-grapheme is space, a different color is ok
# if expected-code-point-utf8 is space, a different color is ok
{
compare expected-grapheme, 0x20
compare expected-code-point-utf8, 0x20
break-if-!=
var color/eax: int <- screen-color-at-idx screen, idx
compare color, fg
break-if-!= $check-screen-row-in-color-from:compare-cells
}
# compare graphemes
$check-screen-row-in-color-from:compare-graphemes: {
# if (g == expected-grapheme) print "."
compare g, expected-grapheme
# compare code-point-utf8s
$check-screen-row-in-color-from:compare-code-point-utf8s: {
# if (g == expected-code-point-utf8) print "."
compare g, expected-code-point-utf8
{
break-if-!=
print-string-to-real-screen "."
break $check-screen-row-in-color-from:compare-graphemes
break $check-screen-row-in-color-from:compare-code-point-utf8s
}
# otherwise print an error
print-string-to-real-screen msg
print-string-to-real-screen ": expected '"
print-grapheme-to-real-screen expected-grapheme
print-code-point-utf8-to-real-screen expected-code-point-utf8
print-string-to-real-screen "' at ("
print-int32-hex-to-real-screen row-idx
print-string-to-real-screen ", "
print-int32-hex-to-real-screen col-idx
print-string-to-real-screen ") but observed '"
print-grapheme-to-real-screen g
print-code-point-utf8-to-real-screen g
print-string-to-real-screen "'\n"
}
$check-screen-row-in-color-from:compare-colors: {
@ -745,7 +745,7 @@ fn check-screen-row-in-color-from screen-on-stack: (addr screen), fg: int, row-i
# otherwise print an error
print-string-to-real-screen msg
print-string-to-real-screen ": expected '"
print-grapheme-to-real-screen expected-grapheme
print-code-point-utf8-to-real-screen expected-code-point-utf8
print-string-to-real-screen "' at ("
print-int32-hex-to-real-screen row-idx
print-string-to-real-screen ", "
@ -772,7 +772,7 @@ fn check-screen-row-in-background-color screen: (addr screen), bg: int, row-idx:
fn check-screen-row-in-background-color-from screen-on-stack: (addr screen), bg: int, row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) {
var screen/esi: (addr screen) <- copy screen-on-stack
var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx
# compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme
# compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8
var e: (stream byte 0x100)
var e-addr/edx: (addr stream byte) <- address e
write e-addr, expected
@ -780,45 +780,45 @@ fn check-screen-row-in-background-color-from screen-on-stack: (addr screen), bg:
var done?/eax: boolean <- stream-empty? e-addr
compare done?, 0
break-if-!=
var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx
var g/ebx: grapheme <- copy _g
var _expected-grapheme/eax: grapheme <- read-grapheme e-addr
var expected-grapheme/edx: grapheme <- copy _expected-grapheme
var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx
var g/ebx: code-point-utf8 <- copy _g
var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr
var expected-code-point-utf8/edx: code-point-utf8 <- copy _expected-code-point-utf8
$check-screen-row-in-background-color-from:compare-cells: {
# if expected-grapheme is space, null grapheme is also ok
# if expected-code-point-utf8 is space, null code-point-utf8 is also ok
{
compare expected-grapheme, 0x20
compare expected-code-point-utf8, 0x20
break-if-!=
compare g, 0
break-if-= $check-screen-row-in-background-color-from:compare-cells
}
# if expected-grapheme is space, a different color is ok
# if expected-code-point-utf8 is space, a different color is ok
{
compare expected-grapheme, 0x20
compare expected-code-point-utf8, 0x20
break-if-!=
var color/eax: int <- screen-background-color-at-idx screen, idx
compare color, bg
break-if-!= $check-screen-row-in-background-color-from:compare-cells
}
# compare graphemes
$check-screen-row-in-background-color-from:compare-graphemes: {
# if (g == expected-grapheme) print "."
compare g, expected-grapheme
# compare code-point-utf8s
$check-screen-row-in-background-color-from:compare-code-point-utf8s: {
# if (g == expected-code-point-utf8) print "."
compare g, expected-code-point-utf8
{
break-if-!=
print-string-to-real-screen "."
break $check-screen-row-in-background-color-from:compare-graphemes
break $check-screen-row-in-background-color-from:compare-code-point-utf8s
}
# otherwise print an error
print-string-to-real-screen msg
print-string-to-real-screen ": expected '"
print-grapheme-to-real-screen expected-grapheme
print-code-point-utf8-to-real-screen expected-code-point-utf8
print-string-to-real-screen "' at ("
print-int32-hex-to-real-screen row-idx
print-string-to-real-screen ", "
print-int32-hex-to-real-screen col-idx
print-string-to-real-screen ") but observed '"
print-grapheme-to-real-screen g
print-code-point-utf8-to-real-screen g
print-string-to-real-screen "'\n"
}
$check-screen-row-in-background-color-from:compare-colors: {
@ -832,7 +832,7 @@ fn check-screen-row-in-background-color-from screen-on-stack: (addr screen), bg:
# otherwise print an error
print-string-to-real-screen msg
print-string-to-real-screen ": expected '"
print-grapheme-to-real-screen expected-grapheme
print-code-point-utf8-to-real-screen expected-code-point-utf8
print-string-to-real-screen "' at ("
print-int32-hex-to-real-screen row-idx
print-string-to-real-screen ", "
@ -857,7 +857,7 @@ fn check-screen-row-in-bold screen: (addr screen), row-idx: int, expected: (addr
fn check-screen-row-in-bold-from screen-on-stack: (addr screen), row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) {
var screen/esi: (addr screen) <- copy screen-on-stack
var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx
# compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme
# compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8
var e: (stream byte 0x100)
var e-addr/edx: (addr stream byte) <- address e
write e-addr, expected
@ -865,45 +865,45 @@ fn check-screen-row-in-bold-from screen-on-stack: (addr screen), row-idx: int, c
var done?/eax: boolean <- stream-empty? e-addr
compare done?, 0
break-if-!=
var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx
var g/ebx: grapheme <- copy _g
var _expected-grapheme/eax: grapheme <- read-grapheme e-addr
var expected-grapheme/edx: grapheme <- copy _expected-grapheme
var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx
var g/ebx: code-point-utf8 <- copy _g
var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr
var expected-code-point-utf8/edx: code-point-utf8 <- copy _expected-code-point-utf8
$check-screen-row-in-bold-from:compare-cells: {
# if expected-grapheme is space, null grapheme is also ok
# if expected-code-point-utf8 is space, null code-point-utf8 is also ok
{
compare expected-grapheme, 0x20
compare expected-code-point-utf8, 0x20
break-if-!=
compare g, 0
break-if-= $check-screen-row-in-bold-from:compare-cells
}
# if expected-grapheme is space, non-bold is ok
# if expected-code-point-utf8 is space, non-bold is ok
{
compare expected-grapheme, 0x20
compare expected-code-point-utf8, 0x20
break-if-!=
var bold?/eax: boolean <- screen-bold-at-idx? screen, idx
compare bold?, 1
break-if-!= $check-screen-row-in-bold-from:compare-cells
}
# compare graphemes
$check-screen-row-in-bold-from:compare-graphemes: {
# if (g == expected-grapheme) print "."
compare g, expected-grapheme
# compare code-point-utf8s
$check-screen-row-in-bold-from:compare-code-point-utf8s: {
# if (g == expected-code-point-utf8) print "."
compare g, expected-code-point-utf8
{
break-if-!=
print-string-to-real-screen "."
break $check-screen-row-in-bold-from:compare-graphemes
break $check-screen-row-in-bold-from:compare-code-point-utf8s
}
# otherwise print an error
print-string-to-real-screen msg
print-string-to-real-screen ": expected '"
print-grapheme-to-real-screen expected-grapheme
print-code-point-utf8-to-real-screen expected-code-point-utf8
print-string-to-real-screen "' at ("
print-int32-hex-to-real-screen row-idx
print-string-to-real-screen ", "
print-int32-hex-to-real-screen col-idx
print-string-to-real-screen ") but observed '"
print-grapheme-to-real-screen g
print-code-point-utf8-to-real-screen g
print-string-to-real-screen "'\n"
}
$check-screen-row-in-bold-from:compare-bold: {
@ -917,7 +917,7 @@ fn check-screen-row-in-bold-from screen-on-stack: (addr screen), row-idx: int, c
# otherwise print an error
print-string-to-real-screen msg
print-string-to-real-screen ": expected '"
print-grapheme-to-real-screen expected-grapheme
print-code-point-utf8-to-real-screen expected-code-point-utf8
print-string-to-real-screen "' at ("
print-int32-hex-to-real-screen row-idx
print-string-to-real-screen ", "
@ -938,7 +938,7 @@ fn check-screen-row-in-underline screen: (addr screen), row-idx: int, expected:
fn check-screen-row-in-underline-from screen-on-stack: (addr screen), row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) {
var screen/esi: (addr screen) <- copy screen-on-stack
var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx
# compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme
# compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8
var e: (stream byte 0x100)
var e-addr/edx: (addr stream byte) <- address e
write e-addr, expected
@ -946,45 +946,45 @@ fn check-screen-row-in-underline-from screen-on-stack: (addr screen), row-idx: i
var done?/eax: boolean <- stream-empty? e-addr
compare done?, 0
break-if-!=
var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx
var g/ebx: grapheme <- copy _g
var _expected-grapheme/eax: grapheme <- read-grapheme e-addr
var expected-grapheme/edx: grapheme <- copy _expected-grapheme
var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx
var g/ebx: code-point-utf8 <- copy _g
var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr
var expected-code-point-utf8/edx: code-point-utf8 <- copy _expected-code-point-utf8
$check-screen-row-in-underline-from:compare-cells: {
# if expected-grapheme is space, null grapheme is also ok
# if expected-code-point-utf8 is space, null code-point-utf8 is also ok
{
compare expected-grapheme, 0x20
compare expected-code-point-utf8, 0x20
break-if-!=
compare g, 0
break-if-= $check-screen-row-in-underline-from:compare-cells
}
# if expected-grapheme is space, non-underline is ok
# if expected-code-point-utf8 is space, non-underline is ok
{
compare expected-grapheme, 0x20
compare expected-code-point-utf8, 0x20
break-if-!=
var underline?/eax: boolean <- screen-underline-at-idx? screen, idx
compare underline?, 1
break-if-!= $check-screen-row-in-underline-from:compare-cells
}
# compare graphemes
$check-screen-row-in-underline-from:compare-graphemes: {
# if (g == expected-grapheme) print "."
compare g, expected-grapheme
# compare code-point-utf8s
$check-screen-row-in-underline-from:compare-code-point-utf8s: {
# if (g == expected-code-point-utf8) print "."
compare g, expected-code-point-utf8
{
break-if-!=
print-string-to-real-screen "."
break $check-screen-row-in-underline-from:compare-graphemes
break $check-screen-row-in-underline-from:compare-code-point-utf8s
}
# otherwise print an error
print-string-to-real-screen msg
print-string-to-real-screen ": expected '"
print-grapheme-to-real-screen expected-grapheme
print-code-point-utf8-to-real-screen expected-code-point-utf8
print-string-to-real-screen "' at ("
print-int32-hex-to-real-screen row-idx
print-string-to-real-screen ", "
print-int32-hex-to-real-screen col-idx
print-string-to-real-screen ") but observed '"
print-grapheme-to-real-screen g
print-code-point-utf8-to-real-screen g
print-string-to-real-screen "'\n"
}
$check-screen-row-in-underline-from:compare-underline: {
@ -998,7 +998,7 @@ fn check-screen-row-in-underline-from screen-on-stack: (addr screen), row-idx: i
# otherwise print an error
print-string-to-real-screen msg
print-string-to-real-screen ": expected '"
print-grapheme-to-real-screen expected-grapheme
print-code-point-utf8-to-real-screen expected-code-point-utf8
print-string-to-real-screen "' at ("
print-int32-hex-to-real-screen row-idx
print-string-to-real-screen ", "
@ -1019,7 +1019,7 @@ fn check-screen-row-in-reverse screen: (addr screen), row-idx: int, expected: (a
fn check-screen-row-in-reverse-from screen-on-stack: (addr screen), row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) {
var screen/esi: (addr screen) <- copy screen-on-stack
var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx
# compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme
# compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8
var e: (stream byte 0x100)
var e-addr/edx: (addr stream byte) <- address e
write e-addr, expected
@ -1027,45 +1027,45 @@ fn check-screen-row-in-reverse-from screen-on-stack: (addr screen), row-idx: int
var done?/eax: boolean <- stream-empty? e-addr
compare done?, 0
break-if-!=
var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx
var g/ebx: grapheme <- copy _g
var _expected-grapheme/eax: grapheme <- read-grapheme e-addr
var expected-grapheme/edx: grapheme <- copy _expected-grapheme
var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx
var g/ebx: code-point-utf8 <- copy _g
var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr
var expected-code-point-utf8/edx: code-point-utf8 <- copy _expected-code-point-utf8
$check-screen-row-in-reverse-from:compare-cells: {
# if expected-grapheme is space, null grapheme is also ok
# if expected-code-point-utf8 is space, null code-point-utf8 is also ok
{
compare expected-grapheme, 0x20
compare expected-code-point-utf8, 0x20
break-if-!=
compare g, 0
break-if-= $check-screen-row-in-reverse-from:compare-cells
}
# if expected-grapheme is space, non-reverse is ok
# if expected-code-point-utf8 is space, non-reverse is ok
{
compare expected-grapheme, 0x20
compare expected-code-point-utf8, 0x20
break-if-!=
var reverse?/eax: boolean <- screen-reverse-at-idx? screen, idx
compare reverse?, 1
break-if-!= $check-screen-row-in-reverse-from:compare-cells
}
# compare graphemes
$check-screen-row-in-reverse-from:compare-graphemes: {
# if (g == expected-grapheme) print "."
compare g, expected-grapheme
# compare code-point-utf8s
$check-screen-row-in-reverse-from:compare-code-point-utf8s: {
# if (g == expected-code-point-utf8) print "."
compare g, expected-code-point-utf8
{
break-if-!=
print-string-to-real-screen "."
break $check-screen-row-in-reverse-from:compare-graphemes
break $check-screen-row-in-reverse-from:compare-code-point-utf8s
}
# otherwise print an error
print-string-to-real-screen msg
print-string-to-real-screen ": expected '"
print-grapheme-to-real-screen expected-grapheme
print-code-point-utf8-to-real-screen expected-code-point-utf8
print-string-to-real-screen "' at ("
print-int32-hex-to-real-screen row-idx
print-string-to-real-screen ", "
print-int32-hex-to-real-screen col-idx
print-string-to-real-screen ") but observed '"
print-grapheme-to-real-screen g
print-code-point-utf8-to-real-screen g
print-string-to-real-screen "'\n"
}
$check-screen-row-in-reverse-from:compare-reverse: {
@ -1079,7 +1079,7 @@ fn check-screen-row-in-reverse-from screen-on-stack: (addr screen), row-idx: int
# otherwise print an error
print-string-to-real-screen msg
print-string-to-real-screen ": expected '"
print-grapheme-to-real-screen expected-grapheme
print-code-point-utf8-to-real-screen expected-code-point-utf8
print-string-to-real-screen "' at ("
print-int32-hex-to-real-screen row-idx
print-string-to-real-screen ", "
@ -1100,7 +1100,7 @@ fn check-screen-row-in-blinking screen: (addr screen), row-idx: int, expected: (
fn check-screen-row-in-blinking-from screen-on-stack: (addr screen), row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) {
var screen/esi: (addr screen) <- copy screen-on-stack
var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx
# compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme
# compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8
var e: (stream byte 0x100)
var e-addr/edx: (addr stream byte) <- address e
write e-addr, expected
@ -1108,45 +1108,45 @@ fn check-screen-row-in-blinking-from screen-on-stack: (addr screen), row-idx: in
var done?/eax: boolean <- stream-empty? e-addr
compare done?, 0
break-if-!=
var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx
var g/ebx: grapheme <- copy _g
var _expected-grapheme/eax: grapheme <- read-grapheme e-addr
var expected-grapheme/edx: grapheme <- copy _expected-grapheme
var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx
var g/ebx: code-point-utf8 <- copy _g
var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr
var expected-code-point-utf8/edx: code-point-utf8 <- copy _expected-code-point-utf8
$check-screen-row-in-blinking-from:compare-cells: {
# if expected-grapheme is space, null grapheme is also ok
# if expected-code-point-utf8 is space, null code-point-utf8 is also ok
{
compare expected-grapheme, 0x20
compare expected-code-point-utf8, 0x20
break-if-!=
compare g, 0
break-if-= $check-screen-row-in-blinking-from:compare-cells
}
# if expected-grapheme is space, non-blinking is ok
# if expected-code-point-utf8 is space, non-blinking is ok
{
compare expected-grapheme, 0x20
compare expected-code-point-utf8, 0x20
break-if-!=
var blinking?/eax: boolean <- screen-blink-at-idx? screen, idx
compare blinking?, 1
break-if-!= $check-screen-row-in-blinking-from:compare-cells
}
# compare graphemes
$check-screen-row-in-blinking-from:compare-graphemes: {
# if (g == expected-grapheme) print "."
compare g, expected-grapheme
# compare code-point-utf8s
$check-screen-row-in-blinking-from:compare-code-point-utf8s: {
# if (g == expected-code-point-utf8) print "."
compare g, expected-code-point-utf8
{
break-if-!=
print-string-to-real-screen "."
break $check-screen-row-in-blinking-from:compare-graphemes
break $check-screen-row-in-blinking-from:compare-code-point-utf8s
}
# otherwise print an error
print-string-to-real-screen msg
print-string-to-real-screen ": expected '"
print-grapheme-to-real-screen expected-grapheme
print-code-point-utf8-to-real-screen expected-code-point-utf8
print-string-to-real-screen "' at ("
print-int32-hex-to-real-screen row-idx
print-string-to-real-screen ", "
print-int32-hex-to-real-screen col-idx
print-string-to-real-screen ") but observed '"
print-grapheme-to-real-screen g
print-code-point-utf8-to-real-screen g
print-string-to-real-screen "'\n"
}
$check-screen-row-in-blinking-from:compare-blinking: {
@ -1160,7 +1160,7 @@ fn check-screen-row-in-blinking-from screen-on-stack: (addr screen), row-idx: in
# otherwise print an error
print-string-to-real-screen msg
print-string-to-real-screen ": expected '"
print-grapheme-to-real-screen expected-grapheme
print-code-point-utf8-to-real-screen expected-code-point-utf8
print-string-to-real-screen "' at ("
print-int32-hex-to-real-screen row-idx
print-string-to-real-screen ", "
@ -1175,21 +1175,21 @@ fn check-screen-row-in-blinking-from screen-on-stack: (addr screen), row-idx: in
}
}
fn test-print-single-grapheme {
fn test-print-single-code-point-utf8 {
var screen-on-stack: screen
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5/rows, 4/cols
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
check-screen-row screen, 1/row, "a", "F - test-print-single-grapheme" # top-left corner of the screen
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
check-screen-row screen, 1/row, "a", "F - test-print-single-code-point-utf8" # top-left corner of the screen
}
fn test-print-multiple-graphemes {
fn test-print-multiple-code-point-utf8s {
var screen-on-stack: screen
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5/rows, 4/cols
print-string screen, "Hello, "
check-screen-row screen, 1/row, "Hello, 世界", "F - test-print-multiple-graphemes"
check-screen-row screen, 1/row, "Hello, 世界", "F - test-print-multiple-code-point-utf8s"
}
fn test-move-cursor {
@ -1197,8 +1197,8 @@ fn test-move-cursor {
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5/rows, 4/cols
move-cursor screen, 1, 4
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
check-screen-row screen, 1/row, " a", "F - test-move-cursor" # top row
}
@ -1207,8 +1207,8 @@ fn test-move-cursor-zeroes {
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5/rows, 4/cols
move-cursor screen, 0, 0
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
check-screen-row screen, 1/row, "a", "F - test-move-cursor-zeroes" # top-left corner of the screen
}
@ -1217,8 +1217,8 @@ fn test-move-cursor-zero-row {
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5/rows, 4/cols
move-cursor screen, 0, 2
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
check-screen-row screen, 1/row, " a", "F - test-move-cursor-zero-row" # top row
}
@ -1227,8 +1227,8 @@ fn test-move-cursor-zero-column {
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5/rows, 4/cols
move-cursor screen, 4, 0
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
check-screen-row screen, 4/row, "a", "F - test-move-cursor-zero-column"
}
@ -1237,8 +1237,8 @@ fn test-move-cursor-negative-row {
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5, 3
move-cursor screen, -1/row, 2/col
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
# no move
check-screen-row screen, 1/row, "a", "F - test-move-cursor-negative-row"
}
@ -1248,8 +1248,8 @@ fn test-move-cursor-negative-column {
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5, 3
move-cursor screen, 2/row, -1/col
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
# no move
check-screen-row screen, 1/row, "a", "F - test-move-cursor-negative-column"
}
@ -1259,8 +1259,8 @@ fn test-move-cursor-column-too-large {
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5/rows, 3/cols
move-cursor screen, 1/row, 4/col
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
# top row is empty
check-screen-row screen, 1/row, " ", "F - test-move-cursor-column-too-large"
# character shows up on next row
@ -1272,8 +1272,8 @@ fn test-move-cursor-column-too-large-saturates {
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5/rows, 3/cols
move-cursor screen, 1/row, 6/col
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
# top row is empty
check-screen-row screen, 1/row, " ", "F - test-move-cursor-column-too-large-saturates" # top-left corner of the screen
# character shows up at the start of next row
@ -1285,8 +1285,8 @@ fn test-move-cursor-row-too-large {
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5/rows, 3/cols
move-cursor screen, 6/row, 2/col
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
# bottom row shows the character
check-screen-row screen, 5/row, " a", "F - test-move-cursor-row-too-large"
}
@ -1296,8 +1296,8 @@ fn test-move-cursor-row-too-large-saturates {
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5/rows, 3/cols
move-cursor screen, 9/row, 2/col
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
# bottom row shows the character
check-screen-row screen, 5/row, " a", "F - test-move-cursor-row-too-large-saturates"
}
@ -1307,8 +1307,8 @@ fn test-check-screen-row-from {
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5/rows, 4/cols
move-cursor screen, 1, 4
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
check-screen-row screen, 1/row, " a", "F - test-check-screen-row-from/baseline"
check-screen-row-from screen, 1/row, 4/col, "a", "F - test-check-screen-row-from"
}
@ -1328,8 +1328,8 @@ fn test-check-screen-scrolls-on-overflow {
initialize-screen screen, 5/rows, 4/cols
# single character starting at bottom right
move-cursor screen, 5/rows, 4/cols
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
check-screen-row-from screen, 5/row, 4/col, "a", "F - test-check-screen-scrolls-on-overflow/baseline" # bottom-right corner of the screen
# multiple characters starting at bottom right
move-cursor screen, 5, 4
@ -1348,14 +1348,14 @@ fn test-check-screen-color {
var screen-on-stack: screen
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5/rows, 4/cols
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
start-color screen, 1/fg, 0/bg
c <- copy 0x62/b
print-grapheme screen, c
print-code-point-utf8 screen, c
start-color screen, 0/fg, 7/bg
c <- copy 0x63/c
print-grapheme screen, c
print-code-point-utf8 screen, c
check-screen-row-in-color screen, 0/fg, 1/row, "a c", "F - test-check-screen-color"
}
@ -1363,14 +1363,14 @@ fn test-check-screen-background-color {
var screen-on-stack: screen
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5/rows, 4/cols
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
start-color screen, 0/fg, 1/bg
c <- copy 0x62/b
print-grapheme screen, c
print-code-point-utf8 screen, c
start-color screen, 0/fg, 7/bg
c <- copy 0x63/c
print-grapheme screen, c
print-code-point-utf8 screen, c
check-screen-row-in-background-color screen, 7/bg, 1/row, "a c", "F - test-check-screen-background-color"
}
@ -1379,14 +1379,14 @@ fn test-check-screen-bold {
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5/rows, 4/cols
start-bold screen
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
reset-formatting screen
c <- copy 0x62/b
print-grapheme screen, c
print-code-point-utf8 screen, c
start-bold screen
c <- copy 0x63/c
print-grapheme screen, c
print-code-point-utf8 screen, c
check-screen-row-in-bold screen, 1/row, "a c", "F - test-check-screen-bold"
}
@ -1395,14 +1395,14 @@ fn test-check-screen-underline {
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5/rows, 4/cols
start-underline screen
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
reset-formatting screen
c <- copy 0x62/b
print-grapheme screen, c
print-code-point-utf8 screen, c
start-underline screen
c <- copy 0x63/c
print-grapheme screen, c
print-code-point-utf8 screen, c
check-screen-row-in-underline screen, 1/row, "a c", "F - test-check-screen-underline"
}
@ -1411,14 +1411,14 @@ fn test-check-screen-reverse {
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5/rows, 4/cols
start-reverse-video screen
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
reset-formatting screen
c <- copy 0x62/b
print-grapheme screen, c
print-code-point-utf8 screen, c
start-reverse-video screen
c <- copy 0x63/c
print-grapheme screen, c
print-code-point-utf8 screen, c
check-screen-row-in-reverse screen, 1/row, "a c", "F - test-check-screen-reverse"
}
@ -1427,14 +1427,14 @@ fn test-check-screen-blinking {
var screen/esi: (addr screen) <- address screen-on-stack
initialize-screen screen, 5/rows, 4/cols
start-blinking screen
var c/eax: grapheme <- copy 0x61/a
print-grapheme screen, c
var c/eax: code-point-utf8 <- copy 0x61/a
print-code-point-utf8 screen, c
reset-formatting screen
c <- copy 0x62/b
print-grapheme screen, c
print-code-point-utf8 screen, c
start-blinking screen
c <- copy 0x63/c
print-grapheme screen, c
print-code-point-utf8 screen, c
check-screen-row-in-blinking screen, 1/row, "a c", "F - test-check-screen-blinking"
}

View File

@ -6,7 +6,7 @@ fn print-int32-decimal-right-justified screen: (addr screen), n: int, _width: in
{
compare n-width, width
break-if->=
print-grapheme screen, 0x20/space
print-code-point-utf8 screen, 0x20/space
width <- decrement
loop
}

View File

@ -1,4 +1,4 @@
# read up to 'len' graphemes after skipping the first 'start' ones
# read up to 'len' code-point-utf8s after skipping the first 'start' ones
fn substring in: (addr array byte), start: int, len: int, out-ah: (addr handle array byte) {
var in-stream: (stream byte 0x100)
var in-stream-addr/esi: (addr stream byte) <- address in-stream
@ -6,29 +6,29 @@ fn substring in: (addr array byte), start: int, len: int, out-ah: (addr handle a
var out-stream: (stream byte 0x100)
var out-stream-addr/edi: (addr stream byte) <- address out-stream
$substring:core: {
# skip 'start' graphemes
# skip 'start' code-point-utf8s
var i/eax: int <- copy 0
{
compare i, start
break-if->=
{
var dummy/eax: grapheme <- read-grapheme in-stream-addr
var dummy/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr
compare dummy, 0xffffffff/end-of-file
break-if-= $substring:core
}
i <- increment
loop
}
# copy 'len' graphemes
# copy 'len' code-point-utf8s
i <- copy 0
{
compare i, len
break-if->=
{
var g/eax: grapheme <- read-grapheme in-stream-addr
var g/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr
compare g, 0xffffffff/end-of-file
break-if-= $substring:core
write-grapheme out-stream-addr, g
write-code-point-utf8 out-stream-addr, g
}
i <- increment
loop
@ -85,7 +85,7 @@ fn test-substring {
check-strings-equal out, "bcde", "F - test-substring/middle-too-small"
}
fn split-string in: (addr array byte), delim: grapheme, out: (addr handle array (handle array byte)) {
fn split-string in: (addr array byte), delim: code-point-utf8, out: (addr handle array (handle array byte)) {
var in-stream: (stream byte 0x100)
var in-stream-addr/esi: (addr stream byte) <- address in-stream
write in-stream-addr, in
@ -94,10 +94,10 @@ fn split-string in: (addr array byte), delim: grapheme, out: (addr handle array
var curr-stream: (stream byte 0x100)
var curr-stream-addr/ecx: (addr stream byte) <- address curr-stream
$split-string:core: {
var g/eax: grapheme <- read-grapheme in-stream-addr
var g/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr
compare g, 0xffffffff
break-if-=
#? print-grapheme-to-real-screen g
#? print-code-point-utf8-to-real-screen g
#? print-string-to-real-screen "\n"
compare g, delim
{
@ -110,7 +110,7 @@ fn split-string in: (addr array byte), delim: grapheme, out: (addr handle array
clear-stream curr-stream-addr
loop $split-string:core
}
write-grapheme curr-stream-addr, g
write-code-point-utf8 curr-stream-addr, g
loop
}
stream-to-array tokens-stream-addr, out

View File

@ -33,7 +33,7 @@
fn main -> _/ebx: int {
enable-keyboard-immediate-mode
var look/esi: grapheme <- copy 0 # lookahead
var look/esi: code-point-utf8 <- copy 0 # lookahead
var n/eax: int <- copy 0 # result of each expression
print-string 0/screen, "press ctrl-c or ctrl-d to exit\n"
# read-eval-print loop
@ -55,17 +55,17 @@ fn main -> _/ebx: int {
return 0
}
fn simplify -> _/eax: int, _/esi: grapheme {
fn simplify -> _/eax: int, _/esi: code-point-utf8 {
# prime the pump
var look/esi: grapheme <- get-char
var look/esi: code-point-utf8 <- get-char
# do it
var result/eax: int <- copy 0
result, look <- expression look
return result, look
}
fn expression _look: grapheme -> _/eax: int, _/esi: grapheme {
var look/esi: grapheme <- copy _look
fn expression _look: code-point-utf8 -> _/eax: int, _/esi: code-point-utf8 {
var look/esi: code-point-utf8 <- copy _look
# read arg
var result/eax: int <- copy 0
result, look <- term look
@ -78,7 +78,7 @@ fn expression _look: grapheme -> _/eax: int, _/esi: grapheme {
break-if-= $expression:loop
}
# read operator
var op/ecx: grapheme <- copy 0
var op/ecx: code-point-utf8 <- copy 0
op, look <- operator look
# read next arg
var second/edx: int <- copy 0
@ -109,8 +109,8 @@ fn expression _look: grapheme -> _/eax: int, _/esi: grapheme {
return result, look
}
fn term _look: grapheme -> _/eax: int, _/esi: grapheme {
var look/esi: grapheme <- copy _look
fn term _look: code-point-utf8 -> _/eax: int, _/esi: code-point-utf8 {
var look/esi: code-point-utf8 <- copy _look
# read arg
look <- skip-spaces look
var result/eax: int <- copy 0
@ -124,7 +124,7 @@ fn term _look: grapheme -> _/eax: int, _/esi: grapheme {
break-if-= $term:loop
}
# read operator
var op/ecx: grapheme <- copy 0
var op/ecx: code-point-utf8 <- copy 0
op, look <- operator look
# read next arg
var second/edx: int <- copy 0
@ -154,8 +154,8 @@ fn term _look: grapheme -> _/eax: int, _/esi: grapheme {
return result, look
}
fn factor _look: grapheme -> _/eax: int, _/esi: grapheme {
var look/esi: grapheme <- copy _look # should be a no-op
fn factor _look: code-point-utf8 -> _/eax: int, _/esi: code-point-utf8 {
var look/esi: code-point-utf8 <- copy _look # should be a no-op
look <- skip-spaces look
# if next char is not '(', parse a number
compare look, 0x28/open-paren
@ -174,7 +174,7 @@ fn factor _look: grapheme -> _/eax: int, _/esi: grapheme {
return result, look
}
fn mul-or-div? c: grapheme -> _/eax: boolean {
fn mul-or-div? c: code-point-utf8 -> _/eax: boolean {
compare c, 0x2a/*
{
break-if-!=
@ -188,7 +188,7 @@ fn mul-or-div? c: grapheme -> _/eax: boolean {
return 0/false
}
fn add-or-sub? c: grapheme -> _/eax: boolean {
fn add-or-sub? c: code-point-utf8 -> _/eax: boolean {
compare c, 0x2b/+
{
break-if-!=
@ -202,14 +202,14 @@ fn add-or-sub? c: grapheme -> _/eax: boolean {
return 0/false
}
fn operator _look: grapheme -> _/ecx: grapheme, _/esi: grapheme {
var op/ecx: grapheme <- copy _look
var look/esi: grapheme <- get-char
fn operator _look: code-point-utf8 -> _/ecx: code-point-utf8, _/esi: code-point-utf8 {
var op/ecx: code-point-utf8 <- copy _look
var look/esi: code-point-utf8 <- get-char
return op, look
}
fn num _look: grapheme -> _/eax: int, _/esi: grapheme {
var look/esi: grapheme <- copy _look
fn num _look: code-point-utf8 -> _/eax: int, _/esi: code-point-utf8 {
var look/esi: code-point-utf8 <- copy _look
var result/edi: int <- copy 0
{
var first-digit/eax: int <- to-decimal-digit look
@ -234,8 +234,8 @@ fn num _look: grapheme -> _/eax: int, _/esi: grapheme {
return result, look
}
fn skip-spaces _look: grapheme -> _/esi: grapheme {
var look/esi: grapheme <- copy _look # should be a no-op
fn skip-spaces _look: code-point-utf8 -> _/esi: code-point-utf8 {
var look/esi: code-point-utf8 <- copy _look # should be a no-op
{
compare look, 0x20
break-if-!=
@ -245,9 +245,9 @@ fn skip-spaces _look: grapheme -> _/esi: grapheme {
return look
}
fn get-char -> _/esi: grapheme {
var look/eax: grapheme <- read-key-from-real-keyboard
print-grapheme-to-real-screen look
fn get-char -> _/esi: code-point-utf8 {
var look/eax: code-point-utf8 <- read-key-from-real-keyboard
print-code-point-utf8-to-real-screen look
compare look, 4
{
break-if-!=

View File

@ -37,7 +37,7 @@ fn parse-int _in: (addr array byte) -> _/eax: int {
var tmp/ebx: (addr byte) <- index in, i
var c/eax: byte <- copy-byte *tmp
#
var g/eax: grapheme <- copy c
var g/eax: code-point-utf8 <- copy c
var digit/eax: int <- to-decimal-digit g
result <- add digit
i <- increment

View File

@ -30,8 +30,8 @@ fn main _args: (addr array addr array byte) -> _/ebx: int {
var c/eax: byte <- read-byte-buffered in-addr
compare c, 0xffffffff/end-of-file
break-if-=
var g/eax: grapheme <- copy c
print-grapheme 0/screen, g
var g/eax: code-point-utf8 <- copy c
print-code-point-utf8 0/screen, g
loop
}
}

View File

@ -23,7 +23,7 @@ fn main -> _/ebx: int {
print-string 0/screen, "press a key to see its code: "
enable-keyboard-immediate-mode
var x/eax: grapheme <- read-key-from-real-keyboard
var x/eax: code-point-utf8 <- read-key-from-real-keyboard
enable-keyboard-type-mode
enable-screen-type-mode
print-string 0/screen, "You pressed "

View File

@ -49,7 +49,7 @@ fn interactive fs: (addr buffered-file) {
#
{
render paginated-screen, fs
var key/eax: grapheme <- read-key-from-real-keyboard
var key/eax: code-point-utf8 <- read-key-from-real-keyboard
compare key, 0x71/'q'
loop-if-!=
}
@ -160,13 +160,13 @@ fn test-render-asterisk-in-text {
fn render-normal screen: (addr paginated-screen), fs: (addr buffered-file) {
var newline-seen?/esi: boolean <- copy 0/false
var start-of-paragraph?/edi: boolean <- copy 1/true
var previous-grapheme/ebx: grapheme <- copy 0
var previous-code-point-utf8/ebx: code-point-utf8 <- copy 0
$render-normal:loop: {
# if done-drawing?(screen) break
var done?/eax: boolean <- done-drawing? screen
compare done?, 0/false
break-if-!=
var c/eax: grapheme <- read-grapheme-buffered fs
var c/eax: code-point-utf8 <- read-code-point-utf8-buffered fs
$render-normal:loop-body: {
# if (c == EOF) break
compare c, 0xffffffff/end-of-file
@ -186,8 +186,8 @@ $render-normal:loop-body: {
# otherwise render two newlines
{
break-if-=
add-grapheme screen, 0xa/newline
add-grapheme screen, 0xa/newline
add-code-point-utf8 screen, 0xa/newline
add-code-point-utf8 screen, 0xa/newline
newline-seen? <- copy 0/false
start-of-paragraph? <- copy 1/true
break $render-normal:loop-body
@ -221,20 +221,20 @@ $render-normal:flush-buffered-newline: {
{
compare c, 0x20
break-if-!=
add-grapheme screen, 0xa/newline
add-code-point-utf8 screen, 0xa/newline
break $render-normal:flush-buffered-newline
}
add-grapheme screen, 0x20/space
add-code-point-utf8 screen, 0x20/space
# fall through to print c
}
## end soft newline support
$render-normal:whitespace-separated-regions: {
# if previous-grapheme wasn't whitespace, skip this block
# if previous-code-point-utf8 wasn't whitespace, skip this block
{
compare previous-grapheme, 0x20/space
compare previous-code-point-utf8, 0x20/space
break-if-=
compare previous-grapheme, 0xa/newline
compare previous-code-point-utf8, 0xa/newline
break-if-=
break $render-normal:whitespace-separated-regions
}
@ -260,9 +260,9 @@ $render-normal:whitespace-separated-regions: {
}
}
#
add-grapheme screen, c
add-code-point-utf8 screen, c
} # $render-normal:loop-body
previous-grapheme <- copy c
previous-code-point-utf8 <- copy c
loop
} # $render-normal:loop
}
@ -271,7 +271,7 @@ fn render-header-line screen: (addr paginated-screen), fs: (addr buffered-file)
$render-header-line:body: {
# compute color based on number of '#'s
var header-level/esi: int <- copy 1 # caller already grabbed one
var c/eax: grapheme <- copy 0
var c/eax: code-point-utf8 <- copy 0
{
# if done-drawing?(screen) return
{
@ -280,7 +280,7 @@ $render-header-line:body: {
break-if-!= $render-header-line:body
}
#
c <- read-grapheme-buffered fs
c <- read-code-point-utf8-buffered fs
# if (c != '#') break
compare c, 0x23/'#'
break-if-!=
@ -298,7 +298,7 @@ $render-header-line:body: {
break-if-!=
}
#
c <- read-grapheme-buffered fs
c <- read-code-point-utf8-buffered fs
# if (c == EOF) break
compare c, 0xffffffff/end-of-file
break-if-=
@ -306,7 +306,7 @@ $render-header-line:body: {
compare c, 0xa/newline
break-if-=
#
add-grapheme screen, c
add-code-point-utf8 screen, c
#
loop
}
@ -353,7 +353,7 @@ fn render-until-asterisk screen: (addr paginated-screen), fs: (addr buffered-fil
compare done?, 0/false
break-if-!=
#
var c/eax: grapheme <- read-grapheme-buffered fs
var c/eax: code-point-utf8 <- read-code-point-utf8-buffered fs
# if (c == EOF) break
compare c, 0xffffffff/end-of-file
break-if-=
@ -361,7 +361,7 @@ fn render-until-asterisk screen: (addr paginated-screen), fs: (addr buffered-fil
compare c, 0x2a/'*'
break-if-=
#
add-grapheme screen, c
add-code-point-utf8 screen, c
#
loop
}
@ -374,7 +374,7 @@ fn render-until-underscore screen: (addr paginated-screen), fs: (addr buffered-f
compare done?, 0/false
break-if-!=
#
var c/eax: grapheme <- read-grapheme-buffered fs
var c/eax: code-point-utf8 <- read-code-point-utf8-buffered fs
# if (c == EOF) break
compare c, 0xffffffff/end-of-file
break-if-=
@ -382,7 +382,7 @@ fn render-until-underscore screen: (addr paginated-screen), fs: (addr buffered-f
compare c, 0x5f/'_'
break-if-=
#
add-grapheme screen, c
add-code-point-utf8 screen, c
#
loop
}

View File

@ -7,7 +7,7 @@
# on each frame
# start-drawing
# while !done-drawing
# add-grapheme ...
# add-code-point-utf8 ...
type paginated-screen {
screen: (handle screen)
@ -152,23 +152,23 @@ fn done-drawing? _self: (addr paginated-screen) -> _/eax: boolean {
return 1/true
}
fn add-grapheme _self: (addr paginated-screen), c: grapheme {
#? print-string-to-real-screen "add-grapheme: "
#? print-grapheme-to-real-screen c
fn add-code-point-utf8 _self: (addr paginated-screen), c: code-point-utf8 {
#? print-string-to-real-screen "add-code-point-utf8: "
#? print-code-point-utf8-to-real-screen c
#? print-string-to-real-screen "\n"
$add-grapheme:body: {
$add-code-point-utf8:body: {
var self/esi: (addr paginated-screen) <- copy _self
{
compare c, 0xa/newline
break-if-!=
next-line self
reposition-cursor self
break $add-grapheme:body
break $add-code-point-utf8:body
}
# print c
var screen-ah/eax: (addr handle screen) <- get self, screen
var screen-addr/eax: (addr screen) <- lookup *screen-ah
print-grapheme screen-addr, c
print-code-point-utf8 screen-addr, c
# self->col++
var tmp/eax: (addr int) <- get self, col
increment *tmp
@ -186,21 +186,21 @@ $add-grapheme:body: {
## tests
fn test-print-grapheme-on-paginated-screen {
fn test-print-code-point-utf8-on-paginated-screen {
var pg-on-stack: paginated-screen
var pg/eax: (addr paginated-screen) <- address pg-on-stack
initialize-fake-paginated-screen pg, 3/rows, 0xa/cols, 0xa/page-width, 0, 0
start-drawing pg
{
var c/ecx: grapheme <- copy 0x61/a
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x61/a
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-grapheme-on-paginated-screen/done"
check-ints-equal done, 0, "F - test-print-code-point-utf8-on-paginated-screen/done"
}
var screen-ah/eax: (addr handle screen) <- get pg, screen
var screen-addr/eax: (addr screen) <- lookup *screen-ah
check-screen-row screen-addr, 1, "a", "F - test-print-grapheme-on-paginated-screen"
check-screen-row screen-addr, 1, "a", "F - test-print-code-point-utf8-on-paginated-screen"
}
fn test-print-single-page {
@ -210,29 +210,29 @@ fn test-print-single-page {
start-drawing pg
# pages at columns [1, 3), [3, 5)
{
var c/ecx: grapheme <- copy 0x61/a
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x61/a
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-single-page/done-1"
}
{
var c/ecx: grapheme <- copy 0x62/b
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x62/b
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-single-page/done-2"
}
{
var c/ecx: grapheme <- copy 0x63/c
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x63/c
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-single-page/done-3"
}
{
var c/ecx: grapheme <- copy 0x64/d
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x64/d
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-single-page/done-4"
@ -250,36 +250,36 @@ fn test-print-single-page-narrower-than-page-width {
initialize-fake-paginated-screen pg, 2/rows, 4/cols, 5/page-width, 0, 0
start-drawing pg
{
var c/ecx: grapheme <- copy 0x61/a
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x61/a
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width/done-1"
}
{
var c/ecx: grapheme <- copy 0x62/b
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x62/b
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width/done-2"
}
{
var c/ecx: grapheme <- copy 0x63/c
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x63/c
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width/done-3"
}
{
var c/ecx: grapheme <- copy 0x64/d
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x64/d
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width/done-4"
}
{
var c/ecx: grapheme <- copy 0x65/e
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x65/e
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width/done-5"
@ -297,36 +297,36 @@ fn test-print-single-page-narrower-than-page-width-with-margin {
initialize-fake-paginated-screen pg, 2/rows, 4/cols, 5/page-width, 0/top-margin, 1/left-margin
start-drawing pg
{
var c/ecx: grapheme <- copy 0x61/a
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x61/a
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width-with-margin/done-1"
}
{
var c/ecx: grapheme <- copy 0x62/b
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x62/b
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width-with-margin/done-2"
}
{
var c/ecx: grapheme <- copy 0x63/c
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x63/c
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width-with-margin/done-3"
}
{
var c/ecx: grapheme <- copy 0x64/d
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x64/d
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width-with-margin/done-4"
}
{
var c/ecx: grapheme <- copy 0x65/e
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x65/e
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width-with-margin/done-5"
@ -344,29 +344,29 @@ fn test-print-multiple-pages {
initialize-fake-paginated-screen pg, 2/rows, 2/cols, 1/page-width, 0, 0
start-drawing pg
{
var c/ecx: grapheme <- copy 0x61/a
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x61/a
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-multiple-pages/done-1"
}
{
var c/ecx: grapheme <- copy 0x62/b
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x62/b
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-multiple-pages/done-2"
}
{
var c/ecx: grapheme <- copy 0x63/c
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x63/c
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-multiple-pages/done-3"
}
{
var c/ecx: grapheme <- copy 0x64/d
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x64/d
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 1, "F - test-print-multiple-pages/done-4"
@ -384,57 +384,57 @@ fn test-print-multiple-pages-2 {
initialize-fake-paginated-screen pg, 2/rows, 4/cols, 2/page-width, 0, 0
start-drawing pg
{
var c/ecx: grapheme <- copy 0x61/a
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x61/a
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-1"
}
{
var c/ecx: grapheme <- copy 0x62/b
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x62/b
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-2"
}
{
var c/ecx: grapheme <- copy 0x63/c
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x63/c
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-3"
}
{
var c/ecx: grapheme <- copy 0x64/d
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x64/d
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-4"
}
{
var c/ecx: grapheme <- copy 0x65/e
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x65/e
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-5"
}
{
var c/ecx: grapheme <- copy 0x66/f
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x66/f
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-6"
}
{
var c/ecx: grapheme <- copy 0x67/g
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x67/g
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-7"
}
{
var c/ecx: grapheme <- copy 0x68/h
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x68/h
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 1, "F - test-print-multiple-pages-2/done-8"
@ -452,60 +452,60 @@ fn test-print-multiple-pages-with-margins {
initialize-fake-paginated-screen pg, 3/rows, 6/cols, 2/page-width, 1/top-margin, 1/left-margin
start-drawing pg
{
var c/ecx: grapheme <- copy 0x61/a
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x61/a
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-1"
check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-1"
}
{
var c/ecx: grapheme <- copy 0x62/b
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x62/b
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-2"
check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-2"
}
{
var c/ecx: grapheme <- copy 0x63/c
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x63/c
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-3"
check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-3"
}
{
var c/ecx: grapheme <- copy 0x64/d
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x64/d
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-4"
check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-4"
}
{
var c/ecx: grapheme <- copy 0x65/e
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x65/e
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-5"
check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-5"
}
{
var c/ecx: grapheme <- copy 0x66/f
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x66/f
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-6"
check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-6"
}
{
var c/ecx: grapheme <- copy 0x67/g
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x67/g
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-7"
check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-7"
}
{
var c/ecx: grapheme <- copy 0x68/h
add-grapheme pg, c
var c/ecx: code-point-utf8 <- copy 0x68/h
add-code-point-utf8 pg, c
var done?/eax: boolean <- done-drawing? pg
var done/eax: int <- copy done?
check-ints-equal done, 1, "F - test-print-multiple-pages-with-margins/grapheme-8"
check-ints-equal done, 1, "F - test-print-multiple-pages-with-margins/code-point-utf8-8"
}
var screen-ah/eax: (addr handle screen) <- get pg, screen
var screen-addr/eax: (addr screen) <- lookup *screen-ah

BIN
linux/mu

Binary file not shown.

View File

@ -416,8 +416,8 @@ Type-id: # (stream (addr array byte))
"stream"/imm32 # 11
"slice"/imm32 # 12
"code-point"/imm32 # 13; smallest scannable unit from a text stream
"grapheme"/imm32 # 14; smallest printable unit; will eventually be composed of multiple code-points, but currently corresponds 1:1
# only 4-byte graphemes in utf-8 are currently supported;
"code-point-utf8"/imm32 # 14; smallest printable unit; will eventually be composed of multiple code-points, but currently corresponds 1:1
# only 4-byte code-point-utf8s in utf-8 are currently supported;
# unclear how we should deal with larger clusters.
"float"/imm32 # 15
# 0x40
@ -22183,9 +22183,9 @@ $mu-numberlike-output?:check-code-point:
(simple-mu-type? %esi 0xd) # code-point => eax
3d/compare-eax-and 0/imm32/false
75/jump-if-!= $mu-numberlike-output?:return-true/disp8
$mu-numberlike-output?:check-grapheme:
# if t is a grapheme, return
(simple-mu-type? %esi 0xe) # grapheme => eax
$mu-numberlike-output?:check-code-point-utf8:
# if t is a code-point-utf8, return
(simple-mu-type? %esi 0xe) # code-point-utf8 => eax
3d/compare-eax-and 0/imm32/false
75/jump-if-!= $mu-numberlike-output?:return-true/disp8
$mu-numberlike-output?:return-false:

View File

@ -78,7 +78,7 @@ fn clear-rect screen: (addr screen), row1: int, col1: int, row2: int, col2: int
{
compare j, col2
break-if->
print-grapheme screen 0x20/space
print-code-point-utf8 screen 0x20/space
j <- increment
loop
}
@ -98,7 +98,7 @@ fn clear-rect2 screen: (addr screen), row1: int, col1: int, w: int, h: int {
{
compare j, h
break-if->=
print-grapheme screen 0x20/space
print-code-point-utf8 screen 0x20/space
j <- increment
loop
}

View File

@ -70,7 +70,7 @@ fn initialize-environment-with-fake-screen _self: (addr environment), nrows: int
# Iterate
#############
fn process _self: (addr environment), key: grapheme {
fn process _self: (addr environment), key: code-point-utf8 {
var self/esi: (addr environment) <- copy _self
var fn-name-ah/eax: (addr handle word) <- get self, partial-function-name
var fn-name/eax: (addr word) <- lookup *fn-name-ah
@ -102,7 +102,7 @@ fn process _self: (addr environment), key: grapheme {
}
# collect new name in partial-function-name, and move the cursor to function with that name
fn process-goto-dialog _self: (addr environment), key: grapheme {
fn process-goto-dialog _self: (addr environment), key: code-point-utf8 {
var self/esi: (addr environment) <- copy _self
var fn-name-ah/edi: (addr handle word) <- get self, partial-function-name
# if 'esc' pressed, cancel goto
@ -130,7 +130,7 @@ fn process-goto-dialog _self: (addr environment), key: grapheme {
compare key, 0x7f/del # backspace on Macs
$process-goto-dialog:backspace: {
break-if-!=
# if not at start, delete grapheme before cursor
# if not at start, delete code-point-utf8 before cursor
var fn-name/eax: (addr word) <- lookup *fn-name-ah
var at-start?/eax: boolean <- cursor-at-start? fn-name
compare at-start?, 0/false
@ -142,24 +142,24 @@ fn process-goto-dialog _self: (addr environment), key: grapheme {
return
}
# otherwise insert key within current word
var print?/eax: boolean <- real-grapheme? key
$process-goto-dialog:real-grapheme: {
var print?/eax: boolean <- real-code-point-utf8? key
$process-goto-dialog:real-code-point-utf8: {
compare print?, 0/false
break-if-=
var fn-name/eax: (addr word) <- lookup *fn-name-ah
add-grapheme-to-word fn-name, key
add-code-point-utf8-to-word fn-name, key
return
}
# silently ignore other hotkeys
}
fn process-function _self: (addr environment), _function: (addr function), key: grapheme {
fn process-function _self: (addr environment), _function: (addr function), key: code-point-utf8 {
var self/esi: (addr environment) <- copy _self
var function/edi: (addr function) <- copy _function
process-function-edit self, function, key
}
fn process-function-edit _self: (addr environment), _function: (addr function), key: grapheme {
fn process-function-edit _self: (addr environment), _function: (addr function), key: code-point-utf8 {
var self/esi: (addr environment) <- copy _self
var function/edi: (addr function) <- copy _function
var cursor-word-ah/ebx: (addr handle word) <- get function, cursor-word
@ -290,7 +290,7 @@ fn process-function-edit _self: (addr environment), _function: (addr function),
compare key, 0x7f/del # backspace on Macs
$process-function-edit:backspace: {
break-if-!=
# if not at start of some word, delete grapheme before cursor within current word
# if not at start of some word, delete code-point-utf8 before cursor within current word
var at-start?/eax: boolean <- cursor-at-start? cursor-word
compare at-start?, 0/false
{
@ -325,25 +325,25 @@ fn process-function-edit _self: (addr environment), _function: (addr function),
copy-object new-prev-word-ah, cursor-word-ah
return
}
# if start of word is quote and grapheme before cursor is not, just insert it as usual
# if start of word is quote and code-point-utf8 before cursor is not, just insert it as usual
# TODO: support string escaping
{
var first-grapheme/eax: grapheme <- first-grapheme cursor-word
compare first-grapheme, 0x22/double-quote
var first-code-point-utf8/eax: code-point-utf8 <- first-code-point-utf8 cursor-word
compare first-code-point-utf8, 0x22/double-quote
break-if-!=
var final-grapheme/eax: grapheme <- grapheme-before-cursor cursor-word
compare final-grapheme, 0x22/double-quote
var final-code-point-utf8/eax: code-point-utf8 <- code-point-utf8-before-cursor cursor-word
compare final-code-point-utf8, 0x22/double-quote
break-if-=
break $process-function-edit:space
}
# if start of word is '[' and grapheme before cursor is not ']', just insert it as usual
# if start of word is '[' and code-point-utf8 before cursor is not ']', just insert it as usual
# TODO: support nested arrays
{
var first-grapheme/eax: grapheme <- first-grapheme cursor-word
compare first-grapheme, 0x5b/[
var first-code-point-utf8/eax: code-point-utf8 <- first-code-point-utf8 cursor-word
compare first-code-point-utf8, 0x5b/[
break-if-!=
var final-grapheme/eax: grapheme <- grapheme-before-cursor cursor-word
compare final-grapheme, 0x5d/]
var final-code-point-utf8/eax: code-point-utf8 <- code-point-utf8-before-cursor cursor-word
compare final-code-point-utf8, 0x5d/]
break-if-=
break $process-function-edit:space
}
@ -368,26 +368,26 @@ fn process-function-edit _self: (addr environment), _function: (addr function),
var at-end?/eax: boolean <- cursor-at-end? cursor-word
compare at-end?, 0/false
break-if-!=
var g/eax: grapheme <- pop-after-cursor cursor-word
add-grapheme-to-word next-word, g
var g/eax: code-point-utf8 <- pop-after-cursor cursor-word
add-code-point-utf8-to-word next-word, g
loop
}
cursor-to-start next-word
return
}
# otherwise insert key within current word
var g/edx: grapheme <- copy key
var print?/eax: boolean <- real-grapheme? key
$process-function-edit:real-grapheme: {
var g/edx: code-point-utf8 <- copy key
var print?/eax: boolean <- real-code-point-utf8? key
$process-function-edit:real-code-point-utf8: {
compare print?, 0/false
break-if-=
add-grapheme-to-word cursor-word, g
add-code-point-utf8-to-word cursor-word, g
return
}
# silently ignore other hotkeys
}
fn process-sandbox _self: (addr environment), _sandbox: (addr sandbox), key: grapheme {
fn process-sandbox _self: (addr environment), _sandbox: (addr sandbox), key: code-point-utf8 {
var self/esi: (addr environment) <- copy _self
var sandbox/edi: (addr sandbox) <- copy _sandbox
var rename-word-mode-ah?/ecx: (addr handle word) <- get sandbox, partial-name-for-cursor-word
@ -413,7 +413,7 @@ fn process-sandbox _self: (addr environment), _sandbox: (addr sandbox), key: gra
process-sandbox-edit self, sandbox, key
}
fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key: grapheme {
fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key: code-point-utf8 {
var self/esi: (addr environment) <- copy _self
var sandbox/edi: (addr sandbox) <- copy _sandbox
var cursor-call-path-ah/eax: (addr handle call-path-element) <- get sandbox, cursor-call-path
@ -730,7 +730,7 @@ fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key
compare key, 0x7f/del # backspace on Macs
$process-sandbox-edit:backspace: {
break-if-!=
# if not at start of some word, delete grapheme before cursor within current word
# if not at start of some word, delete code-point-utf8 before cursor within current word
var at-start?/eax: boolean <- cursor-at-start? cursor-word
compare at-start?, 0/false
{
@ -766,25 +766,25 @@ fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key
decrement-final-element cursor-call-path
return
}
# if start of word is quote and grapheme before cursor is not, just insert it as usual
# if start of word is quote and code-point-utf8 before cursor is not, just insert it as usual
# TODO: support string escaping
{
var first-grapheme/eax: grapheme <- first-grapheme cursor-word
compare first-grapheme, 0x22/double-quote
var first-code-point-utf8/eax: code-point-utf8 <- first-code-point-utf8 cursor-word
compare first-code-point-utf8, 0x22/double-quote
break-if-!=
var final-grapheme/eax: grapheme <- grapheme-before-cursor cursor-word
compare final-grapheme, 0x22/double-quote
var final-code-point-utf8/eax: code-point-utf8 <- code-point-utf8-before-cursor cursor-word
compare final-code-point-utf8, 0x22/double-quote
break-if-=
break $process-sandbox-edit:space
}
# if start of word is '[' and grapheme before cursor is not ']', just insert it as usual
# if start of word is '[' and code-point-utf8 before cursor is not ']', just insert it as usual
# TODO: support nested arrays
{
var first-grapheme/eax: grapheme <- first-grapheme cursor-word
compare first-grapheme, 0x5b/[
var first-code-point-utf8/eax: code-point-utf8 <- first-code-point-utf8 cursor-word
compare first-code-point-utf8, 0x5b/[
break-if-!=
var final-grapheme/eax: grapheme <- grapheme-before-cursor cursor-word
compare final-grapheme, 0x5d/]
var final-code-point-utf8/eax: code-point-utf8 <- code-point-utf8-before-cursor cursor-word
compare final-code-point-utf8, 0x5d/]
break-if-=
break $process-sandbox-edit:space
}
@ -809,8 +809,8 @@ fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key
var at-end?/eax: boolean <- cursor-at-end? cursor-word
compare at-end?, 0/false
break-if-!=
var g/eax: grapheme <- pop-after-cursor cursor-word
add-grapheme-to-word next-word, g
var g/eax: code-point-utf8 <- pop-after-cursor cursor-word
add-code-point-utf8-to-word next-word, g
loop
}
cursor-to-start next-word
@ -838,12 +838,12 @@ fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key
return
}
# otherwise insert key within current word
var g/edx: grapheme <- copy key
var print?/eax: boolean <- real-grapheme? key
$process-sandbox-edit:real-grapheme: {
var g/edx: code-point-utf8 <- copy key
var print?/eax: boolean <- real-code-point-utf8? key
$process-sandbox-edit:real-code-point-utf8: {
compare print?, 0/false
break-if-=
add-grapheme-to-word cursor-word, g
add-code-point-utf8-to-word cursor-word, g
return
}
# silently ignore other hotkeys
@ -852,7 +852,7 @@ fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key
# collect new name in partial-name-for-cursor-word, and then rename the word
# at cursor to it
# Precondition: cursor-call-path is a singleton (not within a call)
fn process-sandbox-rename _sandbox: (addr sandbox), key: grapheme {
fn process-sandbox-rename _sandbox: (addr sandbox), key: code-point-utf8 {
var sandbox/esi: (addr sandbox) <- copy _sandbox
var new-name-ah/edi: (addr handle word) <- get sandbox, partial-name-for-cursor-word
# if 'esc' pressed, cancel rename
@ -911,7 +911,7 @@ fn process-sandbox-rename _sandbox: (addr sandbox), key: grapheme {
{
var new-name/eax: (addr word) <- lookup *new-name-ah
cursor-to-start new-name
add-grapheme-to-word new-name, 0x3d/=
add-code-point-utf8-to-word new-name, 0x3d/=
}
# append name to new line
chain-words new-line-word-ah, new-name-ah
@ -941,7 +941,7 @@ fn process-sandbox-rename _sandbox: (addr sandbox), key: grapheme {
compare key, 0x7f/del # backspace on Macs
$process-sandbox-rename:backspace: {
break-if-!=
# if not at start, delete grapheme before cursor
# if not at start, delete code-point-utf8 before cursor
var new-name/eax: (addr word) <- lookup *new-name-ah
var at-start?/eax: boolean <- cursor-at-start? new-name
compare at-start?, 0/false
@ -953,12 +953,12 @@ fn process-sandbox-rename _sandbox: (addr sandbox), key: grapheme {
return
}
# otherwise insert key within current word
var print?/eax: boolean <- real-grapheme? key
$process-sandbox-rename:real-grapheme: {
var print?/eax: boolean <- real-code-point-utf8? key
$process-sandbox-rename:real-code-point-utf8: {
compare print?, 0/false
break-if-=
var new-name/eax: (addr word) <- lookup *new-name-ah
add-grapheme-to-word new-name, key
add-code-point-utf8-to-word new-name, key
return
}
# silently ignore other hotkeys
@ -968,7 +968,7 @@ fn process-sandbox-rename _sandbox: (addr sandbox), key: grapheme {
# of the sandbox to be a new function with that name. Replace the last line
# with a call to the appropriate function.
# Precondition: cursor-call-path is a singleton (not within a call)
fn process-sandbox-define _sandbox: (addr sandbox), functions: (addr handle function), key: grapheme {
fn process-sandbox-define _sandbox: (addr sandbox), functions: (addr handle function), key: code-point-utf8 {
var sandbox/esi: (addr sandbox) <- copy _sandbox
var new-name-ah/edi: (addr handle word) <- get sandbox, partial-name-for-function
# if 'esc' pressed, cancel define
@ -1033,7 +1033,7 @@ fn process-sandbox-define _sandbox: (addr sandbox), functions: (addr handle func
compare key, 0x7f/del # backspace on Macs
$process-sandbox-define:backspace: {
break-if-!=
# if not at start, delete grapheme before cursor
# if not at start, delete code-point-utf8 before cursor
var new-name/eax: (addr word) <- lookup *new-name-ah
var at-start?/eax: boolean <- cursor-at-start? new-name
compare at-start?, 0/false
@ -1045,12 +1045,12 @@ fn process-sandbox-define _sandbox: (addr sandbox), functions: (addr handle func
return
}
# otherwise insert key within current word
var print?/eax: boolean <- real-grapheme? key
$process-sandbox-define:real-grapheme: {
var print?/eax: boolean <- real-code-point-utf8? key
$process-sandbox-define:real-code-point-utf8: {
compare print?, 0/false
break-if-=
var new-name/eax: (addr word) <- lookup *new-name-ah
add-grapheme-to-word new-name, key
add-code-point-utf8-to-word new-name, key
return
}
# silently ignore other hotkeys
@ -2107,7 +2107,7 @@ fn render-function-right-aligned screen: (addr screen), row: int, right-col: int
start-color screen, 0, 0xf7
clear-rect screen, row, col, new-row, col2
col <- add 1
#? var dummy/eax: grapheme <- read-key-from-real-keyboard
#? var dummy/eax: code-point-utf8 <- read-key-from-real-keyboard
render-function screen, row, col, f
new-row <- add 1/function-bottom-margin
col <- subtract 1/function-left-padding
@ -2144,7 +2144,7 @@ fn render-function screen: (addr screen), row: int, col: int, _f: (addr function
render-line-without-stack screen, body, row, col, cursor-word, cursor-row, cursor-col
}
fn real-grapheme? g: grapheme -> _/eax: boolean {
fn real-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
# if g == newline return true
compare g, 0xa
{

View File

@ -1,14 +1,14 @@
type gap-buffer {
left: grapheme-stack
right: grapheme-stack
left: code-point-utf8-stack
right: code-point-utf8-stack
}
fn initialize-gap-buffer _self: (addr gap-buffer) {
var self/esi: (addr gap-buffer) <- copy _self
var left/eax: (addr grapheme-stack) <- get self, left
initialize-grapheme-stack left, 0x10/max-word-size
var right/eax: (addr grapheme-stack) <- get self, right
initialize-grapheme-stack right, 0x10/max-word-size
var left/eax: (addr code-point-utf8-stack) <- get self, left
initialize-code-point-utf8-stack left, 0x10/max-word-size
var right/eax: (addr code-point-utf8-stack) <- get self, right
initialize-code-point-utf8-stack right, 0x10/max-word-size
}
# just for tests
@ -21,8 +21,8 @@ fn initialize-gap-buffer-with self: (addr gap-buffer), s: (addr array byte) {
var done?/eax: boolean <- stream-empty? stream
compare done?, 0/false
break-if-!=
var g/eax: grapheme <- read-grapheme stream
add-grapheme-at-gap self, g
var g/eax: code-point-utf8 <- read-code-point-utf8 stream
add-code-point-utf8-at-gap self, g
loop
}
}
@ -37,44 +37,44 @@ fn gap-buffer-to-string self: (addr gap-buffer), out: (addr handle array byte) {
fn emit-gap-buffer _self: (addr gap-buffer), out: (addr stream byte) {
var self/esi: (addr gap-buffer) <- copy _self
clear-stream out
var left/eax: (addr grapheme-stack) <- get self, left
var left/eax: (addr code-point-utf8-stack) <- get self, left
emit-stack-from-bottom left, out
var right/eax: (addr grapheme-stack) <- get self, right
var right/eax: (addr code-point-utf8-stack) <- get self, right
emit-stack-from-top right, out
}
# dump stack from bottom to top
fn emit-stack-from-bottom _self: (addr grapheme-stack), out: (addr stream byte) {
var self/esi: (addr grapheme-stack) <- copy _self
var data-ah/edi: (addr handle array grapheme) <- get self, data
var _data/eax: (addr array grapheme) <- lookup *data-ah
var data/edi: (addr array grapheme) <- copy _data
fn emit-stack-from-bottom _self: (addr code-point-utf8-stack), out: (addr stream byte) {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var data-ah/edi: (addr handle array code-point-utf8) <- get self, data
var _data/eax: (addr array code-point-utf8) <- lookup *data-ah
var data/edi: (addr array code-point-utf8) <- copy _data
var top-addr/ecx: (addr int) <- get self, top
var i/eax: int <- copy 0
{
compare i, *top-addr
break-if->=
var g/edx: (addr grapheme) <- index data, i
write-grapheme out, *g
var g/edx: (addr code-point-utf8) <- index data, i
write-code-point-utf8 out, *g
i <- increment
loop
}
}
# dump stack from top to bottom
fn emit-stack-from-top _self: (addr grapheme-stack), out: (addr stream byte) {
var self/esi: (addr grapheme-stack) <- copy _self
var data-ah/edi: (addr handle array grapheme) <- get self, data
var _data/eax: (addr array grapheme) <- lookup *data-ah
var data/edi: (addr array grapheme) <- copy _data
fn emit-stack-from-top _self: (addr code-point-utf8-stack), out: (addr stream byte) {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var data-ah/edi: (addr handle array code-point-utf8) <- get self, data
var _data/eax: (addr array code-point-utf8) <- lookup *data-ah
var data/edi: (addr array code-point-utf8) <- copy _data
var top-addr/ecx: (addr int) <- get self, top
var i/eax: int <- copy *top-addr
i <- decrement
{
compare i, 0
break-if-<
var g/edx: (addr grapheme) <- index data, i
write-grapheme out, *g
var g/edx: (addr code-point-utf8) <- index data, i
write-code-point-utf8 out, *g
i <- decrement
loop
}
@ -82,33 +82,33 @@ fn emit-stack-from-top _self: (addr grapheme-stack), out: (addr stream byte) {
fn render-gap-buffer screen: (addr screen), _gap: (addr gap-buffer) {
var gap/esi: (addr gap-buffer) <- copy _gap
var left/eax: (addr grapheme-stack) <- get gap, left
var left/eax: (addr code-point-utf8-stack) <- get gap, left
render-stack-from-bottom left, screen
var right/eax: (addr grapheme-stack) <- get gap, right
var right/eax: (addr code-point-utf8-stack) <- get gap, right
render-stack-from-top right, screen
}
fn gap-buffer-length _gap: (addr gap-buffer) -> _/eax: int {
var gap/esi: (addr gap-buffer) <- copy _gap
var left/eax: (addr grapheme-stack) <- get gap, left
var left/eax: (addr code-point-utf8-stack) <- get gap, left
var tmp/eax: (addr int) <- get left, top
var left-length/ecx: int <- copy *tmp
var right/esi: (addr grapheme-stack) <- get gap, right
var right/esi: (addr code-point-utf8-stack) <- get gap, right
tmp <- get right, top
var result/eax: int <- copy *tmp
result <- add left-length
return result
}
fn add-grapheme-at-gap _self: (addr gap-buffer), g: grapheme {
fn add-code-point-utf8-at-gap _self: (addr gap-buffer), g: code-point-utf8 {
var self/esi: (addr gap-buffer) <- copy _self
var left/eax: (addr grapheme-stack) <- get self, left
push-grapheme-stack left, g
var left/eax: (addr code-point-utf8-stack) <- get self, left
push-code-point-utf8-stack left, g
}
fn gap-to-start self: (addr gap-buffer) {
{
var curr/eax: grapheme <- gap-left self
var curr/eax: code-point-utf8 <- gap-left self
compare curr, -1
loop-if-!=
}
@ -116,7 +116,7 @@ fn gap-to-start self: (addr gap-buffer) {
fn gap-to-end self: (addr gap-buffer) {
{
var curr/eax: grapheme <- gap-right self
var curr/eax: code-point-utf8 <- gap-right self
compare curr, -1
loop-if-!=
}
@ -124,96 +124,96 @@ fn gap-to-end self: (addr gap-buffer) {
fn gap-at-start? _self: (addr gap-buffer) -> _/eax: boolean {
var self/esi: (addr gap-buffer) <- copy _self
var left/eax: (addr grapheme-stack) <- get self, left
var result/eax: boolean <- grapheme-stack-empty? left
var left/eax: (addr code-point-utf8-stack) <- get self, left
var result/eax: boolean <- code-point-utf8-stack-empty? left
return result
}
fn gap-at-end? _self: (addr gap-buffer) -> _/eax: boolean {
var self/esi: (addr gap-buffer) <- copy _self
var right/eax: (addr grapheme-stack) <- get self, right
var result/eax: boolean <- grapheme-stack-empty? right
var right/eax: (addr code-point-utf8-stack) <- get self, right
var result/eax: boolean <- code-point-utf8-stack-empty? right
return result
}
fn gap-right _self: (addr gap-buffer) -> _/eax: grapheme {
fn gap-right _self: (addr gap-buffer) -> _/eax: code-point-utf8 {
var self/esi: (addr gap-buffer) <- copy _self
var g/eax: grapheme <- copy 0
var right/ecx: (addr grapheme-stack) <- get self, right
g <- pop-grapheme-stack right
var g/eax: code-point-utf8 <- copy 0
var right/ecx: (addr code-point-utf8-stack) <- get self, right
g <- pop-code-point-utf8-stack right
compare g, -1
{
break-if-=
var left/ecx: (addr grapheme-stack) <- get self, left
push-grapheme-stack left, g
var left/ecx: (addr code-point-utf8-stack) <- get self, left
push-code-point-utf8-stack left, g
}
return g
}
fn gap-left _self: (addr gap-buffer) -> _/eax: grapheme {
fn gap-left _self: (addr gap-buffer) -> _/eax: code-point-utf8 {
var self/esi: (addr gap-buffer) <- copy _self
var g/eax: grapheme <- copy 0
var g/eax: code-point-utf8 <- copy 0
{
var left/ecx: (addr grapheme-stack) <- get self, left
g <- pop-grapheme-stack left
var left/ecx: (addr code-point-utf8-stack) <- get self, left
g <- pop-code-point-utf8-stack left
}
compare g, -1
{
break-if-=
var right/ecx: (addr grapheme-stack) <- get self, right
push-grapheme-stack right, g
var right/ecx: (addr code-point-utf8-stack) <- get self, right
push-code-point-utf8-stack right, g
}
return g
}
fn gap-index _self: (addr gap-buffer) -> _/eax: int {
var self/eax: (addr gap-buffer) <- copy _self
var left/eax: (addr grapheme-stack) <- get self, left
var left/eax: (addr code-point-utf8-stack) <- get self, left
var top-addr/eax: (addr int) <- get left, top
var result/eax: int <- copy *top-addr
return result
}
fn first-grapheme-in-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme {
fn first-code-point-utf8-in-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8 {
var self/esi: (addr gap-buffer) <- copy _self
# try to read from left
var left/eax: (addr grapheme-stack) <- get self, left
var left/eax: (addr code-point-utf8-stack) <- get self, left
var top-addr/ecx: (addr int) <- get left, top
compare *top-addr, 0
{
break-if-<=
var data-ah/eax: (addr handle array grapheme) <- get left, data
var data/eax: (addr array grapheme) <- lookup *data-ah
var result-addr/eax: (addr grapheme) <- index data, 0
var data-ah/eax: (addr handle array code-point-utf8) <- get left, data
var data/eax: (addr array code-point-utf8) <- lookup *data-ah
var result-addr/eax: (addr code-point-utf8) <- index data, 0
return *result-addr
}
# try to read from right
var right/eax: (addr grapheme-stack) <- get self, right
var right/eax: (addr code-point-utf8-stack) <- get self, right
top-addr <- get right, top
compare *top-addr, 0
{
break-if-<=
var data-ah/eax: (addr handle array grapheme) <- get right, data
var data/eax: (addr array grapheme) <- lookup *data-ah
var data-ah/eax: (addr handle array code-point-utf8) <- get right, data
var data/eax: (addr array code-point-utf8) <- lookup *data-ah
var top/ecx: int <- copy *top-addr
top <- decrement
var result-addr/eax: (addr grapheme) <- index data, top
var result-addr/eax: (addr code-point-utf8) <- index data, top
return *result-addr
}
# give up
return -1
}
fn grapheme-before-cursor-in-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme {
fn code-point-utf8-before-cursor-in-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8 {
var self/esi: (addr gap-buffer) <- copy _self
# try to read from left
var left/ecx: (addr grapheme-stack) <- get self, left
var left/ecx: (addr code-point-utf8-stack) <- get self, left
var top-addr/edx: (addr int) <- get left, top
compare *top-addr, 0
{
break-if-<=
var result/eax: grapheme <- pop-grapheme-stack left
push-grapheme-stack left, result
var result/eax: code-point-utf8 <- pop-code-point-utf8-stack left
push-code-point-utf8-stack left, result
return result
}
# give up
@ -222,27 +222,27 @@ fn grapheme-before-cursor-in-gap-buffer _self: (addr gap-buffer) -> _/eax: graph
fn delete-before-gap _self: (addr gap-buffer) {
var self/eax: (addr gap-buffer) <- copy _self
var left/eax: (addr grapheme-stack) <- get self, left
var dummy/eax: grapheme <- pop-grapheme-stack left
var left/eax: (addr code-point-utf8-stack) <- get self, left
var dummy/eax: code-point-utf8 <- pop-code-point-utf8-stack left
}
fn pop-after-gap _self: (addr gap-buffer) -> _/eax: grapheme {
fn pop-after-gap _self: (addr gap-buffer) -> _/eax: code-point-utf8 {
var self/eax: (addr gap-buffer) <- copy _self
var right/eax: (addr grapheme-stack) <- get self, right
var result/eax: grapheme <- pop-grapheme-stack right
var right/eax: (addr code-point-utf8-stack) <- get self, right
var result/eax: code-point-utf8 <- pop-code-point-utf8-stack right
return result
}
fn gap-buffer-equal? _self: (addr gap-buffer), s: (addr array byte) -> _/eax: boolean {
var self/esi: (addr gap-buffer) <- copy _self
# complication: graphemes may be multiple bytes
# complication: code-point-utf8s may be multiple bytes
# so don't rely on length
# instead turn the expected result into a stream and arrange to read from it in order
var stream-storage: (stream byte 0x10/max-word-size)
var expected-stream/ecx: (addr stream byte) <- address stream-storage
write expected-stream, s
# compare left
var left/edx: (addr grapheme-stack) <- get self, left
var left/edx: (addr code-point-utf8-stack) <- get self, left
var result/eax: boolean <- prefix-match? left, expected-stream
compare result, 0/false
{
@ -250,7 +250,7 @@ fn gap-buffer-equal? _self: (addr gap-buffer), s: (addr array byte) -> _/eax: bo
return result
}
# compare right
var right/edx: (addr grapheme-stack) <- get self, right
var right/edx: (addr code-point-utf8-stack) <- get self, right
result <- suffix-match? right, expected-stream
compare result, 0/false
{
@ -267,10 +267,10 @@ fn test-gap-buffer-equal-from-end? {
var g/esi: (addr gap-buffer) <- address _g
initialize-gap-buffer g
#
var c/eax: grapheme <- copy 0x61/a
add-grapheme-at-gap g, c
add-grapheme-at-gap g, c
add-grapheme-at-gap g, c
var c/eax: code-point-utf8 <- copy 0x61/a
add-code-point-utf8-at-gap g, c
add-code-point-utf8-at-gap g, c
add-code-point-utf8-at-gap g, c
# gap is at end (right is empty)
var _result/eax: boolean <- gap-buffer-equal? g, "aaa"
var result/eax: int <- copy _result
@ -282,11 +282,11 @@ fn test-gap-buffer-equal-from-middle? {
var g/esi: (addr gap-buffer) <- address _g
initialize-gap-buffer g
#
var c/eax: grapheme <- copy 0x61/a
add-grapheme-at-gap g, c
add-grapheme-at-gap g, c
add-grapheme-at-gap g, c
var dummy/eax: grapheme <- gap-left g
var c/eax: code-point-utf8 <- copy 0x61/a
add-code-point-utf8-at-gap g, c
add-code-point-utf8-at-gap g, c
add-code-point-utf8-at-gap g, c
var dummy/eax: code-point-utf8 <- gap-left g
# gap is in the middle
var _result/eax: boolean <- gap-buffer-equal? g, "aaa"
var result/eax: int <- copy _result
@ -298,11 +298,11 @@ fn test-gap-buffer-equal-from-start? {
var g/esi: (addr gap-buffer) <- address _g
initialize-gap-buffer g
#
var c/eax: grapheme <- copy 0x61/a
add-grapheme-at-gap g, c
add-grapheme-at-gap g, c
add-grapheme-at-gap g, c
var dummy/eax: grapheme <- gap-left g
var c/eax: code-point-utf8 <- copy 0x61/a
add-code-point-utf8-at-gap g, c
add-code-point-utf8-at-gap g, c
add-code-point-utf8-at-gap g, c
var dummy/eax: code-point-utf8 <- gap-left g
dummy <- gap-left g
dummy <- gap-left g
# gap is at the start
@ -319,25 +319,25 @@ fn copy-gap-buffer _src-ah: (addr handle gap-buffer), _dest-ah: (addr handle gap
var dest-ah/eax: (addr handle gap-buffer) <- copy _dest-ah
var _dest-a/eax: (addr gap-buffer) <- lookup *dest-ah
var dest-a/edi: (addr gap-buffer) <- copy _dest-a
# copy left grapheme-stack
var src/ecx: (addr grapheme-stack) <- get src-a, left
var dest/edx: (addr grapheme-stack) <- get dest-a, left
copy-grapheme-stack src, dest
# copy right grapheme-stack
# copy left code-point-utf8-stack
var src/ecx: (addr code-point-utf8-stack) <- get src-a, left
var dest/edx: (addr code-point-utf8-stack) <- get dest-a, left
copy-code-point-utf8-stack src, dest
# copy right code-point-utf8-stack
src <- get src-a, right
dest <- get dest-a, right
copy-grapheme-stack src, dest
copy-code-point-utf8-stack src, dest
}
fn gap-buffer-is-decimal-integer? _self: (addr gap-buffer) -> _/eax: boolean {
var self/esi: (addr gap-buffer) <- copy _self
var curr/ecx: (addr grapheme-stack) <- get self, left
var result/eax: boolean <- grapheme-stack-is-decimal-integer? curr
var curr/ecx: (addr code-point-utf8-stack) <- get self, left
var result/eax: boolean <- code-point-utf8-stack-is-decimal-integer? curr
{
compare result, 0/false
break-if-=
curr <- get self, right
result <- grapheme-stack-is-decimal-integer? curr
result <- code-point-utf8-stack-is-decimal-integer? curr
}
return result
}

View File

@ -1,24 +1,24 @@
type grapheme-stack {
data: (handle array grapheme)
type code-point-utf8-stack {
data: (handle array code-point-utf8)
top: int
}
fn initialize-grapheme-stack _self: (addr grapheme-stack), n: int {
var self/esi: (addr grapheme-stack) <- copy _self
var d/edi: (addr handle array grapheme) <- get self, data
fn initialize-code-point-utf8-stack _self: (addr code-point-utf8-stack), n: int {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var d/edi: (addr handle array code-point-utf8) <- get self, data
populate d, n
var top/eax: (addr int) <- get self, top
copy-to *top, 0
}
fn clear-grapheme-stack _self: (addr grapheme-stack) {
var self/esi: (addr grapheme-stack) <- copy _self
fn clear-code-point-utf8-stack _self: (addr code-point-utf8-stack) {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var top/eax: (addr int) <- get self, top
copy-to *top, 0
}
fn grapheme-stack-empty? _self: (addr grapheme-stack) -> _/eax: boolean {
var self/esi: (addr grapheme-stack) <- copy _self
fn code-point-utf8-stack-empty? _self: (addr code-point-utf8-stack) -> _/eax: boolean {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var top/eax: (addr int) <- get self, top
compare *top, 0
{
@ -28,20 +28,20 @@ fn grapheme-stack-empty? _self: (addr grapheme-stack) -> _/eax: boolean {
return 0/false
}
fn push-grapheme-stack _self: (addr grapheme-stack), _val: grapheme {
var self/esi: (addr grapheme-stack) <- copy _self
fn push-code-point-utf8-stack _self: (addr code-point-utf8-stack), _val: code-point-utf8 {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var top-addr/ecx: (addr int) <- get self, top
var data-ah/edx: (addr handle array grapheme) <- get self, data
var data/eax: (addr array grapheme) <- lookup *data-ah
var data-ah/edx: (addr handle array code-point-utf8) <- get self, data
var data/eax: (addr array code-point-utf8) <- lookup *data-ah
var top/edx: int <- copy *top-addr
var dest-addr/edx: (addr grapheme) <- index data, top
var val/eax: grapheme <- copy _val
var dest-addr/edx: (addr code-point-utf8) <- index data, top
var val/eax: code-point-utf8 <- copy _val
copy-to *dest-addr, val
add-to *top-addr, 1
}
fn pop-grapheme-stack _self: (addr grapheme-stack) -> _/eax: grapheme {
var self/esi: (addr grapheme-stack) <- copy _self
fn pop-code-point-utf8-stack _self: (addr code-point-utf8-stack) -> _/eax: code-point-utf8 {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var top-addr/ecx: (addr int) <- get self, top
{
compare *top-addr, 0
@ -49,25 +49,25 @@ fn pop-grapheme-stack _self: (addr grapheme-stack) -> _/eax: grapheme {
return -1
}
subtract-from *top-addr, 1
var data-ah/edx: (addr handle array grapheme) <- get self, data
var data/eax: (addr array grapheme) <- lookup *data-ah
var data-ah/edx: (addr handle array code-point-utf8) <- get self, data
var data/eax: (addr array code-point-utf8) <- lookup *data-ah
var top/edx: int <- copy *top-addr
var result-addr/eax: (addr grapheme) <- index data, top
var result-addr/eax: (addr code-point-utf8) <- index data, top
return *result-addr
}
fn copy-grapheme-stack _src: (addr grapheme-stack), dest: (addr grapheme-stack) {
var src/esi: (addr grapheme-stack) <- copy _src
var data-ah/edi: (addr handle array grapheme) <- get src, data
var _data/eax: (addr array grapheme) <- lookup *data-ah
var data/edi: (addr array grapheme) <- copy _data
fn copy-code-point-utf8-stack _src: (addr code-point-utf8-stack), dest: (addr code-point-utf8-stack) {
var src/esi: (addr code-point-utf8-stack) <- copy _src
var data-ah/edi: (addr handle array code-point-utf8) <- get src, data
var _data/eax: (addr array code-point-utf8) <- lookup *data-ah
var data/edi: (addr array code-point-utf8) <- copy _data
var top-addr/ecx: (addr int) <- get src, top
var i/eax: int <- copy 0
{
compare i, *top-addr
break-if->=
var g/edx: (addr grapheme) <- index data, i
push-grapheme-stack dest, *g
var g/edx: (addr code-point-utf8) <- index data, i
push-code-point-utf8-stack dest, *g
i <- increment
loop
}
@ -75,18 +75,18 @@ fn copy-grapheme-stack _src: (addr grapheme-stack), dest: (addr grapheme-stack)
# dump stack to screen from bottom to top
# don't move the cursor or anything
fn render-stack-from-bottom _self: (addr grapheme-stack), screen: (addr screen) {
var self/esi: (addr grapheme-stack) <- copy _self
var data-ah/edi: (addr handle array grapheme) <- get self, data
var _data/eax: (addr array grapheme) <- lookup *data-ah
var data/edi: (addr array grapheme) <- copy _data
fn render-stack-from-bottom _self: (addr code-point-utf8-stack), screen: (addr screen) {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var data-ah/edi: (addr handle array code-point-utf8) <- get self, data
var _data/eax: (addr array code-point-utf8) <- lookup *data-ah
var data/edi: (addr array code-point-utf8) <- copy _data
var top-addr/ecx: (addr int) <- get self, top
var i/eax: int <- copy 0
{
compare i, *top-addr
break-if->=
var g/edx: (addr grapheme) <- index data, i
print-grapheme screen, *g
var g/edx: (addr code-point-utf8) <- index data, i
print-code-point-utf8 screen, *g
i <- increment
loop
}
@ -94,19 +94,19 @@ fn render-stack-from-bottom _self: (addr grapheme-stack), screen: (addr screen)
# dump stack to screen from top to bottom
# don't move the cursor or anything
fn render-stack-from-top _self: (addr grapheme-stack), screen: (addr screen) {
var self/esi: (addr grapheme-stack) <- copy _self
var data-ah/edi: (addr handle array grapheme) <- get self, data
var _data/eax: (addr array grapheme) <- lookup *data-ah
var data/edi: (addr array grapheme) <- copy _data
fn render-stack-from-top _self: (addr code-point-utf8-stack), screen: (addr screen) {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var data-ah/edi: (addr handle array code-point-utf8) <- get self, data
var _data/eax: (addr array code-point-utf8) <- lookup *data-ah
var data/edi: (addr array code-point-utf8) <- copy _data
var top-addr/ecx: (addr int) <- get self, top
var i/eax: int <- copy *top-addr
i <- decrement
{
compare i, 0
break-if-<
var g/edx: (addr grapheme) <- index data, i
print-grapheme screen, *g
var g/edx: (addr code-point-utf8) <- index data, i
print-code-point-utf8 screen, *g
i <- decrement
loop
}
@ -114,11 +114,11 @@ fn render-stack-from-top _self: (addr grapheme-stack), screen: (addr screen) {
# compare from bottom
# beware: modifies 'stream', which must be disposed of after a false result
fn prefix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: boolean {
var self/esi: (addr grapheme-stack) <- copy _self
var data-ah/edi: (addr handle array grapheme) <- get self, data
var _data/eax: (addr array grapheme) <- lookup *data-ah
var data/edi: (addr array grapheme) <- copy _data
fn prefix-match? _self: (addr code-point-utf8-stack), s: (addr stream byte) -> _/eax: boolean {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var data-ah/edi: (addr handle array code-point-utf8) <- get self, data
var _data/eax: (addr array code-point-utf8) <- lookup *data-ah
var data/edi: (addr array code-point-utf8) <- copy _data
var top-addr/ecx: (addr int) <- get self, top
var i/ebx: int <- copy 0
{
@ -126,8 +126,8 @@ fn prefix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b
break-if->=
# if curr != expected, return false
{
var curr-a/edx: (addr grapheme) <- index data, i
var expected/eax: grapheme <- read-grapheme s
var curr-a/edx: (addr code-point-utf8) <- index data, i
var expected/eax: code-point-utf8 <- read-code-point-utf8 s
{
compare expected, *curr-a
break-if-=
@ -142,11 +142,11 @@ fn prefix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b
# compare from bottom
# beware: modifies 'stream', which must be disposed of after a false result
fn suffix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: boolean {
var self/esi: (addr grapheme-stack) <- copy _self
var data-ah/edi: (addr handle array grapheme) <- get self, data
var _data/eax: (addr array grapheme) <- lookup *data-ah
var data/edi: (addr array grapheme) <- copy _data
fn suffix-match? _self: (addr code-point-utf8-stack), s: (addr stream byte) -> _/eax: boolean {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var data-ah/edi: (addr handle array code-point-utf8) <- get self, data
var _data/eax: (addr array code-point-utf8) <- lookup *data-ah
var data/edi: (addr array code-point-utf8) <- copy _data
var top-addr/eax: (addr int) <- get self, top
var i/ebx: int <- copy *top-addr
i <- decrement
@ -154,8 +154,8 @@ fn suffix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b
compare i, 0
break-if-<
{
var curr-a/edx: (addr grapheme) <- index data, i
var expected/eax: grapheme <- read-grapheme s
var curr-a/edx: (addr code-point-utf8) <- index data, i
var expected/eax: code-point-utf8 <- read-code-point-utf8 s
# if curr != expected, return false
{
compare expected, *curr-a
@ -169,18 +169,18 @@ fn suffix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b
return 1 # true
}
fn grapheme-stack-is-decimal-integer? _self: (addr grapheme-stack) -> _/eax: boolean {
var self/esi: (addr grapheme-stack) <- copy _self
var data-ah/eax: (addr handle array grapheme) <- get self, data
var _data/eax: (addr array grapheme) <- lookup *data-ah
var data/edx: (addr array grapheme) <- copy _data
fn code-point-utf8-stack-is-decimal-integer? _self: (addr code-point-utf8-stack) -> _/eax: boolean {
var self/esi: (addr code-point-utf8-stack) <- copy _self
var data-ah/eax: (addr handle array code-point-utf8) <- get self, data
var _data/eax: (addr array code-point-utf8) <- lookup *data-ah
var data/edx: (addr array code-point-utf8) <- copy _data
var top-addr/ecx: (addr int) <- get self, top
var i/ebx: int <- copy 0
var result/eax: boolean <- copy 1/true
$grapheme-stack-is-integer?:loop: {
$code-point-utf8-stack-is-integer?:loop: {
compare i, *top-addr
break-if->=
var g/edx: (addr grapheme) <- index data, i
var g/edx: (addr code-point-utf8) <- index data, i
result <- decimal-digit? *g
compare result, 0/false
break-if-=

View File

@ -54,7 +54,7 @@ fn interactive {
initialize-environment env
{
render env
var key/eax: grapheme <- read-key-from-real-keyboard
var key/eax: code-point-utf8 <- read-key-from-real-keyboard
compare key, 0x11/ctrl-q
break-if-=
process env, key
@ -79,7 +79,7 @@ fn process-all env: (addr environment), cmds: (addr array byte) {
var done?/eax: boolean <- stream-empty? cmds-stream-a
compare done?, 0/false
break-if-!=
var g/eax: grapheme <- read-grapheme cmds-stream-a
var g/eax: code-point-utf8 <- read-code-point-utf8 cmds-stream-a
process env, g
loop
}
@ -105,7 +105,7 @@ fn repl {
var done?/eax: boolean <- stream-empty? line
compare done?, 0/false
break-if-!=
var g/eax: grapheme <- read-grapheme line
var g/eax: code-point-utf8 <- read-code-point-utf8 line
process env, g
loop
}

View File

@ -111,10 +111,10 @@ fn print-surface-cell-at _self: (addr surface), screen-row: int, screen-col: int
compare idx, 0
{
break-if->=
var space/ecx: grapheme <- copy 0x20
var space/ecx: code-point-utf8 <- copy 0x20
var screen-ah/edi: (addr handle screen) <- get self, screen
var screen/eax: (addr screen) <- lookup *screen-ah
print-grapheme screen, space
print-code-point-utf8 screen, space
return
}
# otherwise print the appropriate screen-cell
@ -156,9 +156,9 @@ fn print-screen-cell screen: (addr screen), _cell: (addr screen-cell) {
break-if-=
start-blinking screen
}
var g/eax: (addr grapheme) <- get cell, data
print-grapheme screen, *g
#? var g2/eax: grapheme <- copy *g
var g/eax: (addr code-point-utf8) <- get cell, data
print-code-point-utf8 screen, *g
#? var g2/eax: code-point-utf8 <- copy *g
#? var g3/eax: int <- copy g2
#? print-int32-hex-to-real-screen g3
#? print-string-to-real-screen "\n"
@ -264,7 +264,7 @@ fn num-lines in: (addr array byte) -> _/ecx: int {
var done?/eax: boolean <- stream-empty? s-addr
compare done?, 0/false
break-if-!=
var g/eax: grapheme <- read-grapheme s-addr
var g/eax: code-point-utf8 <- read-code-point-utf8 s-addr
compare g, 0xa/newline
loop-if-!=
result <- increment
@ -282,7 +282,7 @@ fn first-line-length in: (addr array byte) -> _/edx: int {
var done?/eax: boolean <- stream-empty? s-addr
compare done?, 0/false
break-if-!=
var g/eax: grapheme <- read-grapheme s-addr
var g/eax: code-point-utf8 <- read-code-point-utf8 s-addr
compare g, 0xa/newline
break-if-=
result <- increment
@ -301,12 +301,12 @@ fn fill-in _out: (addr array screen-cell), in: (addr array byte) {
var done?/eax: boolean <- stream-empty? s-addr
compare done?, 0/false
break-if-!=
var g/eax: grapheme <- read-grapheme s-addr
var g/eax: code-point-utf8 <- read-code-point-utf8 s-addr
compare g, 0xa/newline
loop-if-=
var offset/edx: (offset screen-cell) <- compute-offset out, idx
var dest/edx: (addr screen-cell) <- index out, offset
var dest2/edx: (addr grapheme) <- get dest, data
var dest2/edx: (addr code-point-utf8) <- get dest, data
copy-to *dest2, g
idx <- increment
loop

View File

@ -90,15 +90,15 @@ fn render-number screen: (addr screen), val: float, top-level?: boolean {
fg <- copy 0
}
start-color screen, fg, bg
print-grapheme screen, 0x20/space
print-code-point-utf8 screen, 0x20/space
print-float-decimal-approximate screen, val, 3
print-grapheme screen, 0x20/space
print-code-point-utf8 screen, 0x20/space
}
fn render-array-at screen: (addr screen), row: int, col: int, _a: (addr array value) {
start-color screen, 0xf2, 7
# don't surround in spaces
print-grapheme screen, 0x5b/[
print-code-point-utf8 screen, 0x5b/[
increment col
var a/esi: (addr array value) <- copy _a
var max/ecx: int <- length a
@ -122,7 +122,7 @@ fn render-array-at screen: (addr screen), row: int, col: int, _a: (addr array va
i <- increment
loop
}
print-grapheme screen, 0x5d/]
print-code-point-utf8 screen, 0x5d/]
}
fn render-screen screen: (addr screen), row: int, col: int, _target-screen: (addr screen) {
@ -179,13 +179,13 @@ fn print-screen-cell-of-fake-screen screen: (addr screen), _target: (addr screen
start-blinking screen
start-color screen, 0, 1
}
var g/eax: grapheme <- screen-grapheme-at target, row, col
var g/eax: code-point-utf8 <- screen-code-point-utf8-at target, row, col
{
compare g, 0
break-if-!=
g <- copy 0x20/space
}
print-grapheme screen, g
print-code-point-utf8 screen, g
reset-formatting screen
}

View File

@ -58,15 +58,15 @@ fn move-word-contents _src-ah: (addr handle word), _dest-ah: (addr handle word)
cursor-to-start src
var src-data-ah/eax: (addr handle gap-buffer) <- get src, scalar-data
var src-data/eax: (addr gap-buffer) <- lookup *src-data-ah
var src-stack/ecx: (addr grapheme-stack) <- get src-data, right
var src-stack/ecx: (addr code-point-utf8-stack) <- get src-data, right
{
var done?/eax: boolean <- grapheme-stack-empty? src-stack
var done?/eax: boolean <- code-point-utf8-stack-empty? src-stack
compare done?, 0/false
break-if-!=
var g/eax: grapheme <- pop-grapheme-stack src-stack
#? print-grapheme 0, g
var g/eax: code-point-utf8 <- pop-code-point-utf8-stack src-stack
#? print-code-point-utf8 0, g
#? print-string 0, "\n"
add-grapheme-to-word dest, g
add-code-point-utf8-to-word dest, g
loop
}
}
@ -79,17 +79,17 @@ fn copy-word-contents-before-cursor _src-ah: (addr handle word), _dest-ah: (addr
var src/eax: (addr word) <- lookup *src-ah
var src-data-ah/eax: (addr handle gap-buffer) <- get src, scalar-data
var src-data/eax: (addr gap-buffer) <- lookup *src-data-ah
var src-stack/ecx: (addr grapheme-stack) <- get src-data, left
var src-stack-data-ah/eax: (addr handle array grapheme) <- get src-stack, data
var _src-stack-data/eax: (addr array grapheme) <- lookup *src-stack-data-ah
var src-stack-data/edx: (addr array grapheme) <- copy _src-stack-data
var src-stack/ecx: (addr code-point-utf8-stack) <- get src-data, left
var src-stack-data-ah/eax: (addr handle array code-point-utf8) <- get src-stack, data
var _src-stack-data/eax: (addr array code-point-utf8) <- lookup *src-stack-data-ah
var src-stack-data/edx: (addr array code-point-utf8) <- copy _src-stack-data
var top-addr/ecx: (addr int) <- get src-stack, top
var i/eax: int <- copy 0
{
compare i, *top-addr
break-if->=
var g/edx: (addr grapheme) <- index src-stack-data, i
add-grapheme-to-word dest, *g
var g/edx: (addr code-point-utf8) <- index src-stack-data, i
add-code-point-utf8-to-word dest, *g
i <- increment
loop
}
@ -129,27 +129,27 @@ fn final-word _in: (addr handle word), out: (addr handle word) {
copy-object curr-ah, out # modify 'out' right at the end, just in case it's same as 'in'
}
fn first-grapheme _self: (addr word) -> _/eax: grapheme {
fn first-code-point-utf8 _self: (addr word) -> _/eax: code-point-utf8 {
var self/esi: (addr word) <- copy _self
var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
var data/eax: (addr gap-buffer) <- lookup *data-ah
var result/eax: grapheme <- first-grapheme-in-gap-buffer data
var result/eax: code-point-utf8 <- first-code-point-utf8-in-gap-buffer data
return result
}
fn grapheme-before-cursor _self: (addr word) -> _/eax: grapheme {
fn code-point-utf8-before-cursor _self: (addr word) -> _/eax: code-point-utf8 {
var self/esi: (addr word) <- copy _self
var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
var data/eax: (addr gap-buffer) <- lookup *data-ah
var result/eax: grapheme <- grapheme-before-cursor-in-gap-buffer data
var result/eax: code-point-utf8 <- code-point-utf8-before-cursor-in-gap-buffer data
return result
}
fn add-grapheme-to-word _self: (addr word), c: grapheme {
fn add-code-point-utf8-to-word _self: (addr word), c: code-point-utf8 {
var self/esi: (addr word) <- copy _self
var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
var data/eax: (addr gap-buffer) <- lookup *data-ah
add-grapheme-at-gap data, c
add-code-point-utf8-at-gap data, c
}
fn cursor-at-start? _self: (addr word) -> _/eax: boolean {
@ -172,14 +172,14 @@ fn cursor-left _self: (addr word) {
var self/esi: (addr word) <- copy _self
var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
var data/eax: (addr gap-buffer) <- lookup *data-ah
var dummy/eax: grapheme <- gap-left data
var dummy/eax: code-point-utf8 <- gap-left data
}
fn cursor-right _self: (addr word) {
var self/esi: (addr word) <- copy _self
var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
var data/eax: (addr gap-buffer) <- lookup *data-ah
var dummy/eax: grapheme <- gap-right data
var dummy/eax: code-point-utf8 <- gap-right data
}
fn cursor-to-start _self: (addr word) {
@ -211,11 +211,11 @@ fn delete-before-cursor _self: (addr word) {
delete-before-gap data
}
fn pop-after-cursor _self: (addr word) -> _/eax: grapheme {
fn pop-after-cursor _self: (addr word) -> _/eax: code-point-utf8 {
var self/esi: (addr word) <- copy _self
var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
var data/eax: (addr gap-buffer) <- lookup *data-ah
var result/eax: grapheme <- pop-after-gap data
var result/eax: code-point-utf8 <- pop-after-gap data
return result
}
@ -553,14 +553,14 @@ fn parse-words in: (addr array byte), out-ah: (addr handle word) {
var done?/eax: boolean <- stream-empty? in-stream-a
compare done?, 0/false
break-if-!=
var _g/eax: grapheme <- read-grapheme in-stream-a
var g/ecx: grapheme <- copy _g
var _g/eax: code-point-utf8 <- read-code-point-utf8 in-stream-a
var g/ecx: code-point-utf8 <- copy _g
# if not space, insert
compare g, 0x20/space
{
break-if-=
var cursor-word/eax: (addr word) <- lookup *cursor-word-ah
add-grapheme-to-word cursor-word, g
add-code-point-utf8-to-word cursor-word, g
loop $parse-words:loop
}
# otherwise insert word after and move cursor to it

View File

@ -206,8 +206,8 @@ doesn't yet parse floating-point literals:
- `print-int32-buffered`: int -> buffered-file
- textual representation in hex, including '0x' prefix
- `write-grapheme`: grapheme -> stream
- `to-grapheme`: code-point -> grapheme
- `write-code-point-utf8`: code-point-utf8 -> stream
- `to-utf8`: code-point -> code-point-utf8
- `write-float-decimal-approximate`: float, precision: int -> stream
@ -226,8 +226,8 @@ there isn't enough room in the destination stream.
- `read-line-buffered`: buffered-file -> stream
- Will abort the entire program if there isn't enough room.
- `read-grapheme`: stream -> grapheme
- `read-grapheme-buffered`: buffered-file -> grapheme
- `read-code-point-utf8`: stream -> code-point-utf8
- `read-code-point-utf8-buffered`: buffered-file -> code-point-utf8
- `read-lines`: buffered-file -> array of strings
@ -268,7 +268,7 @@ Unix terminal properties supported by almost all modern terminal emulators.
- `print-string`: string -> screen
- `print-stream`
- `print-grapheme`
- `print-code-point-utf8`
- `print-code-point`
- `print-int32-hex`
- `print-int32-decimal`
@ -290,7 +290,7 @@ manipulated.
Assertions for tests:
- `screen-grapheme-at`
- `screen-code-point-utf8-at`
- `screen-color-at`
- `screen-background-color-at`
- `screen-bold-at?`

8
mu.md
View File

@ -80,7 +80,7 @@ You can store several types in these registers:
- (addr T) (address into memory)
- byte (uses only 8 bits)
- code-point (Unicode)
- grapheme (code-point encoded in UTF-8)
- code-point-utf8 (code-point encoded in UTF-8)
There's one 32-bit type you _cannot_ store in these registers:
- float
@ -579,9 +579,9 @@ are a few functions to help with them:
# bytes
append-byte s: (addr stream byte), var: int # write lower byte of var
var/eax: byte <- read-byte s: (addr stream byte)
# 32-bit graphemes encoded in UTF-8
write-grapheme out: (addr stream byte), g: grapheme
g/eax: grapheme <- read-grapheme in: (addr stream byte)
# 32-bit code-point-utf8s encoded in UTF-8
write-code-point-utf8 out: (addr stream byte), g: code-point-utf8
g/eax: code-point-utf8 <- read-code-point-utf8 in: (addr stream byte)
```
You can check if a stream is empty or full:

View File

@ -150,7 +150,7 @@ def (a <> b)
```
To permit arbitrary infix operators, the Mu shell partitions the space of
graphemes between operators and regular symbols. As a result, you can't define
code-point-utf8s between operators and regular symbols. As a result, you can't define
symbols mixing the two.
```
'*global*

View File

@ -2,7 +2,7 @@
#
# vim:textwidth&
# It would be nice for tests to use a narrower screen than the standard 0x80 of
# 1024 pixels with 8px-wide graphemes. But it complicates rendering logic to
# 1024 pixels with 8px-wide code-point-utf8s. But it complicates rendering logic to
# make width configurable, so we just use longer lines than usual.
type environment {
@ -93,7 +93,7 @@ fn type-in self: (addr environment), screen: (addr screen), keys: (addr array by
var done?/eax: boolean <- stream-empty? input-stream
compare done?, 0/false
break-if-!=
var key/eax: grapheme <- read-grapheme input-stream
var key/eax: code-point-utf8 <- read-code-point-utf8 input-stream
edit-environment self, key, 0/no-disk
render-environment screen, self
loop
@ -145,7 +145,7 @@ fn render-environment screen: (addr screen), _self: (addr environment) {
render-sandbox-menu screen, sandbox
}
fn edit-environment _self: (addr environment), key: grapheme, data-disk: (addr disk) {
fn edit-environment _self: (addr environment), key: code-point-utf8, data-disk: (addr disk) {
var self/esi: (addr environment) <- copy _self
var globals/edi: (addr global-table) <- get self, globals
var sandbox/ecx: (addr sandbox) <- get self, sandbox

View File

@ -230,7 +230,7 @@ fn render-globals-menu screen: (addr screen), _self: (addr global-table) {
draw-text-rightward-from-cursor screen, " >> ", width, 7/fg, 0xc5/bg=blue-bg
}
fn edit-globals _self: (addr global-table), key: grapheme {
fn edit-globals _self: (addr global-table), key: code-point-utf8 {
var self/esi: (addr global-table) <- copy _self
# ctrl-s
{

View File

@ -312,8 +312,8 @@ fn tokenize-infix _sym-ah: (addr handle cell), trace: (addr trace) {
var buffer/edi: (addr gap-buffer) <- address buffer-storage
initialize-gap-buffer buffer, 0x40/max-symbol-size
# scan for first non-$
var g/eax: grapheme <- read-grapheme sym-data
add-grapheme-at-gap buffer, g
var g/eax: code-point-utf8 <- read-code-point-utf8 sym-data
add-code-point-utf8-at-gap buffer, g
{
compare g, 0x24/dollar
break-if-!=
@ -323,28 +323,28 @@ fn tokenize-infix _sym-ah: (addr handle cell), trace: (addr trace) {
break-if-=
return # symbol is all '$'s; do nothing
}
g <- read-grapheme sym-data
add-grapheme-at-gap buffer, g
g <- read-code-point-utf8 sym-data
add-code-point-utf8-at-gap buffer, g
loop
}
var tokenization-needed?: boolean
var _operator-so-far?/eax: boolean <- operator-grapheme? g
var _operator-so-far?/eax: boolean <- operator-code-point-utf8? g
var operator-so-far?/ecx: boolean <- copy _operator-so-far?
{
var done?/eax: boolean <- stream-empty? sym-data
compare done?, 0/false
break-if-!=
var g/eax: grapheme <- read-grapheme sym-data
var g/eax: code-point-utf8 <- read-code-point-utf8 sym-data
{
var curr-operator?/eax: boolean <- operator-grapheme? g
var curr-operator?/eax: boolean <- operator-code-point-utf8? g
compare curr-operator?, operator-so-far?
break-if-=
# state change; insert a space
add-grapheme-at-gap buffer, 0x20/space
add-code-point-utf8-at-gap buffer, 0x20/space
operator-so-far? <- copy curr-operator?
copy-to tokenization-needed?, 1/true
}
add-grapheme-at-gap buffer, g
add-code-point-utf8-at-gap buffer, g
loop
}
compare tokenization-needed?, 0/false
@ -406,7 +406,7 @@ fn test-infix {
# helpers
# return true if x is composed entirely of operator graphemes, optionally prefixed with some '$'s
# return true if x is composed entirely of operator code-point-utf8s, optionally prefixed with some '$'s
# some operator, some non-operator => pre-tokenized symbol; return false
# all '$'s => return false
fn operator-symbol? _x: (addr cell) -> _/eax: boolean {
@ -421,7 +421,7 @@ fn operator-symbol? _x: (addr cell) -> _/eax: boolean {
var _x-data/eax: (addr stream byte) <- lookup *x-data-ah
var x-data/esi: (addr stream byte) <- copy _x-data
rewind-stream x-data
var g/eax: grapheme <- read-grapheme x-data
var g/eax: code-point-utf8 <- read-code-point-utf8 x-data
# special case: '$' is reserved for gensyms, and can work with either
# operator or non-operator symbols.
{
@ -434,12 +434,12 @@ fn operator-symbol? _x: (addr cell) -> _/eax: boolean {
# '$', '$$', '$$$', etc. are regular symbols
return 0/false
}
g <- read-grapheme x-data
g <- read-code-point-utf8 x-data
loop
}
{
{
var result/eax: boolean <- operator-grapheme? g
var result/eax: boolean <- operator-code-point-utf8? g
compare result, 0/false
break-if-!=
return 0/false
@ -449,13 +449,13 @@ fn operator-symbol? _x: (addr cell) -> _/eax: boolean {
compare done?, 0/false
}
break-if-!=
g <- read-grapheme x-data
g <- read-code-point-utf8 x-data
loop
}
return 1/true
}
fn operator-grapheme? g: grapheme -> _/eax: boolean {
fn operator-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
# '$' is special and can be in either a symbol or operator; here we treat it as a symbol
compare g, 0x25/percent
{

View File

@ -13,7 +13,7 @@ fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk)
var key/eax: byte <- read-key keyboard
compare key, 0
loop-if-=
var key/eax: grapheme <- copy key
var key/eax: code-point-utf8 <- copy key
edit-environment env, key, data-disk
}
loop

View File

@ -172,7 +172,7 @@ fn render-primitives screen: (addr screen), xmin: int, xmax: int, ymax: int {
y <- increment
var tmpx/eax: int <- copy xmin
tmpx <- draw-text-rightward screen, " key", tmpx, left-max, y, 0x2a/fg=orange, 0xdc/bg=green-bg
tmpx <- draw-text-rightward screen, ": keyboard -> grapheme?", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg
tmpx <- draw-text-rightward screen, ": keyboard -> code-point-utf8?", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg
y <- increment
var tmpx/eax: int <- copy xmin
tmpx <- draw-text-rightward screen, "streams", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg
@ -183,7 +183,7 @@ fn render-primitives screen: (addr screen), xmin: int, xmax: int, ymax: int {
y <- increment
var tmpx/eax: int <- copy xmin
tmpx <- draw-text-rightward screen, " write", tmpx, left-max, y, 0x2a/fg=orange, 0xdc/bg=green-bg
tmpx <- draw-text-rightward screen, ": stream grapheme -> stream", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg
tmpx <- draw-text-rightward screen, ": stream code-point-utf8 -> stream", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg
y <- increment
var tmpx/eax: int <- copy xmin
tmpx <- draw-text-rightward screen, " rewind clear", tmpx, left-max, y, 0x2a/fg=orange, 0xdc/bg=green-bg
@ -191,7 +191,7 @@ fn render-primitives screen: (addr screen), xmin: int, xmax: int, ymax: int {
y <- increment
var tmpx/eax: int <- copy xmin
tmpx <- draw-text-rightward screen, " read", tmpx, left-max, y, 0x2a/fg=orange, 0xdc/bg=green-bg
tmpx <- draw-text-rightward screen, ": stream -> grapheme", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg
tmpx <- draw-text-rightward screen, ": stream -> code-point-utf8", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg
}
fn primitive-global? _x: (addr global) -> _/eax: boolean {
@ -3056,7 +3056,7 @@ fn wait-for-key keyboard: (addr gap-buffer) -> _/eax: int {
return result
}
# otherwise read from fake keyboard
var g/eax: grapheme <- read-from-gap-buffer keyboard
var g/eax: code-point-utf8 <- read-from-gap-buffer keyboard
var result/eax: int <- copy g
return result
}
@ -3121,14 +3121,14 @@ fn apply-write _args-ah: (addr handle cell), out: (addr handle cell), trace: (ad
var second-type/eax: (addr int) <- get second, type
compare *second-type, 1/number
break-if-=
error trace, "second arg for 'write' is not a number/grapheme"
error trace, "second arg for 'write' is not a number/code-point-utf8"
return
}
var second-value/eax: (addr float) <- get second, number-data
var x-float/xmm0: float <- copy *second-value
var x/eax: int <- convert x-float
var x-grapheme/eax: grapheme <- copy x
write-grapheme stream-data, x-grapheme
var x-code-point-utf8/eax: code-point-utf8 <- copy x
write-code-point-utf8 stream-data, x-code-point-utf8
# return the stream
copy-object first-ah, out
}
@ -3202,8 +3202,8 @@ fn apply-read _args-ah: (addr handle cell), out: (addr handle cell), trace: (add
var _stream-data/eax: (addr stream byte) <- lookup *stream-data-ah
var stream-data/ebx: (addr stream byte) <- copy _stream-data
#? rewind-stream stream-data
var result-grapheme/eax: grapheme <- read-grapheme stream-data
var result/eax: int <- copy result-grapheme
var result-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 stream-data
var result/eax: int <- copy result-code-point-utf8
new-integer out, result
}

View File

@ -449,7 +449,7 @@ fn render-keyboard-menu screen: (addr screen) {
draw-text-rightward-from-cursor screen, " to sandbox ", width, 7/fg, 0xc5/bg=blue-bg
}
fn edit-sandbox _self: (addr sandbox), key: grapheme, globals: (addr global-table), data-disk: (addr disk) {
fn edit-sandbox _self: (addr sandbox), key: code-point-utf8, globals: (addr global-table), data-disk: (addr disk) {
var self/esi: (addr sandbox) <- copy _self
# ctrl-s
{

View File

@ -429,13 +429,13 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
next-indent-token in, out, trace # might not be returned
}
skip-spaces-from-gap-buffer in
var g/eax: grapheme <- peek-from-gap-buffer in
var g/eax: code-point-utf8 <- peek-from-gap-buffer in
{
compare g, 0x23/comment
break-if-!=
skip-rest-of-line in
}
var g/eax: grapheme <- peek-from-gap-buffer in
var g/eax: code-point-utf8 <- peek-from-gap-buffer in
{
compare g, 0xa/newline
break-if-!=
@ -461,8 +461,8 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
trace-higher trace
return 1/at-start-of-line
}
var _g/eax: grapheme <- peek-from-gap-buffer in
var g/ecx: grapheme <- copy _g
var _g/eax: code-point-utf8 <- peek-from-gap-buffer in
var g/ecx: code-point-utf8 <- copy _g
{
var should-trace?/eax: boolean <- should-trace? trace
compare should-trace?, 0/false
@ -479,7 +479,7 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
{
compare g, 0x22/double-quote
break-if-!=
var dummy/eax: grapheme <- read-from-gap-buffer in # skip
var dummy/eax: code-point-utf8 <- read-from-gap-buffer in # skip
next-stream-token in, out, trace
break $next-token:case
}
@ -487,13 +487,13 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
{
compare g, 0x5b/open-square-bracket
break-if-!=
var dummy/eax: grapheme <- read-from-gap-buffer in # skip open bracket
var dummy/eax: code-point-utf8 <- read-from-gap-buffer in # skip open bracket
next-balanced-stream-token in, out, trace
break $next-token:case
}
# other symbol char
{
var symbol?/eax: boolean <- symbol-grapheme? g
var symbol?/eax: boolean <- symbol-code-point-utf8? g
compare symbol?, 0/false
break-if-=
next-symbol-token in, out, trace
@ -508,10 +508,10 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
}
# other brackets are always single-char tokens
{
var bracket?/eax: boolean <- bracket-grapheme? g
var bracket?/eax: boolean <- bracket-code-point-utf8? g
compare bracket?, 0/false
break-if-=
var g/eax: grapheme <- read-from-gap-buffer in
var g/eax: code-point-utf8 <- read-from-gap-buffer in
next-bracket-token g, out, trace
break $next-token:case
}
@ -519,7 +519,7 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
{
compare g, 0x27/single-quote
break-if-!=
var g/eax: grapheme <- read-from-gap-buffer in # consume
var g/eax: code-point-utf8 <- read-from-gap-buffer in # consume
initialize-token out, "'"
break $next-token:case
}
@ -527,7 +527,7 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
{
compare g, 0x60/backquote
break-if-!=
var g/eax: grapheme <- read-from-gap-buffer in # consume
var g/eax: code-point-utf8 <- read-from-gap-buffer in # consume
initialize-token out, "`"
break $next-token:case
}
@ -535,7 +535,7 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
{
compare g, 0x2c/comma
break-if-!=
var g/eax: grapheme <- read-from-gap-buffer in # consume
var g/eax: code-point-utf8 <- read-from-gap-buffer in # consume
# check for unquote-splice
{
g <- peek-from-gap-buffer in
@ -581,7 +581,7 @@ fn next-symbol-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
var done?/eax: boolean <- gap-buffer-scan-done? in
compare done?, 0/false
break-if-!=
var g/eax: grapheme <- peek-from-gap-buffer in
var g/eax: code-point-utf8 <- peek-from-gap-buffer in
{
{
var should-trace?/eax: boolean <- should-trace? trace
@ -597,14 +597,14 @@ fn next-symbol-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
}
# if non-symbol, return
{
var symbol-grapheme?/eax: boolean <- symbol-grapheme? g
compare symbol-grapheme?, 0/false
var symbol-code-point-utf8?/eax: boolean <- symbol-code-point-utf8? g
compare symbol-code-point-utf8?, 0/false
break-if-!=
trace-text trace, "tokenize", "stop"
break $next-symbol-token:loop
}
var g/eax: grapheme <- read-from-gap-buffer in
write-grapheme out-data, g
var g/eax: code-point-utf8 <- read-from-gap-buffer in
write-code-point-utf8 out-data, g
loop
}
trace-higher trace
@ -630,16 +630,16 @@ fn next-number-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
var out-data/edi: (addr stream byte) <- copy _out-data
$next-number-token:check-minus: {
var g/eax: grapheme <- peek-from-gap-buffer in
var g/eax: code-point-utf8 <- peek-from-gap-buffer in
compare g, 0x2d/minus
g <- read-from-gap-buffer in # consume
write-grapheme out-data, g
write-code-point-utf8 out-data, g
}
$next-number-token:loop: {
var done?/eax: boolean <- gap-buffer-scan-done? in
compare done?, 0/false
break-if-!=
var g/eax: grapheme <- peek-from-gap-buffer in
var g/eax: code-point-utf8 <- peek-from-gap-buffer in
{
{
var should-trace?/eax: boolean <- should-trace? trace
@ -653,15 +653,15 @@ fn next-number-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
write-int32-hex stream, gval
trace trace, "tokenize", stream
}
# if not symbol grapheme, return
# if not symbol code-point-utf8, return
{
var symbol-grapheme?/eax: boolean <- symbol-grapheme? g
compare symbol-grapheme?, 0/false
var symbol-code-point-utf8?/eax: boolean <- symbol-code-point-utf8? g
compare symbol-code-point-utf8?, 0/false
break-if-!=
trace-text trace, "tokenize", "stop"
break $next-number-token:loop
}
# if not digit grapheme, abort
# if not digit code-point-utf8, abort
{
var digit?/eax: boolean <- decimal-digit? g
compare digit?, 0/false
@ -670,8 +670,8 @@ fn next-number-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
return
}
trace-text trace, "tokenize", "append"
var g/eax: grapheme <- read-from-gap-buffer in
write-grapheme out-data, g
var g/eax: code-point-utf8 <- read-from-gap-buffer in
write-code-point-utf8 out-data, g
loop
}
trace-higher trace
@ -696,10 +696,10 @@ fn next-stream-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
error trace, "unbalanced '\"'"
return
}
var g/eax: grapheme <- read-from-gap-buffer in
var g/eax: code-point-utf8 <- read-from-gap-buffer in
compare g, 0x22/double-quote
break-if-=
write-grapheme out-data, g
write-code-point-utf8 out-data, g
loop
}
{
@ -735,7 +735,7 @@ fn next-balanced-stream-token in: (addr gap-buffer), _out: (addr token), trace:
error trace, "unbalanced '['"
return
}
var g/eax: grapheme <- read-from-gap-buffer in
var g/eax: code-point-utf8 <- read-from-gap-buffer in
{
compare g, 0x5b/open-square-bracket
break-if-!=
@ -748,7 +748,7 @@ fn next-balanced-stream-token in: (addr gap-buffer), _out: (addr token), trace:
break-if-= $next-balanced-stream-token:loop
decrement bracket-count
}
write-grapheme out-data, g
write-code-point-utf8 out-data, g
loop
}
{
@ -764,14 +764,14 @@ fn next-balanced-stream-token in: (addr gap-buffer), _out: (addr token), trace:
}
}
fn next-bracket-token g: grapheme, _out: (addr token), trace: (addr trace) {
fn next-bracket-token g: code-point-utf8, _out: (addr token), trace: (addr trace) {
trace-text trace, "tokenize", "bracket"
var out/eax: (addr token) <- copy _out
var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
populate-stream out-data-ah, 0x40
var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
var out-data/edi: (addr stream byte) <- copy _out-data
write-grapheme out-data, g
write-code-point-utf8 out-data, g
{
var should-trace?/eax: boolean <- should-trace? trace
compare should-trace?, 0/false
@ -790,7 +790,7 @@ fn skip-rest-of-line in: (addr gap-buffer) {
var done?/eax: boolean <- gap-buffer-scan-done? in
compare done?, 0/false
break-if-!=
var g/eax: grapheme <- peek-from-gap-buffer in
var g/eax: code-point-utf8 <- peek-from-gap-buffer in
compare g, 0xa/newline
break-if-=
g <- read-from-gap-buffer in # consume
@ -810,7 +810,7 @@ fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
var done?/eax: boolean <- gap-buffer-scan-done? in
compare done?, 0/false
break-if-!=
var g/eax: grapheme <- peek-from-gap-buffer in
var g/eax: code-point-utf8 <- peek-from-gap-buffer in
{
{
var should-trace?/eax: boolean <- should-trace? trace
@ -844,7 +844,7 @@ fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
}
}
# Mu carves up the space of graphemes into 4 categories:
# Mu carves up the space of code-point-utf8s into 4 categories:
# whitespace
# quotes and unquotes (from a Lisp perspective; doesn't include double
# quotes or other Unicode quotes)
@ -856,20 +856,20 @@ fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
# During tokenization operators and symbols are treated identically.
# A later phase digs into that nuance.
fn symbol-grapheme? g: grapheme -> _/eax: boolean {
var whitespace?/eax: boolean <- whitespace-grapheme? g
fn symbol-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
var whitespace?/eax: boolean <- whitespace-code-point-utf8? g
compare whitespace?, 0/false
{
break-if-=
return 0/false
}
var quote-or-unquote?/eax: boolean <- quote-or-unquote-grapheme? g
var quote-or-unquote?/eax: boolean <- quote-or-unquote-code-point-utf8? g
compare quote-or-unquote?, 0/false
{
break-if-=
return 0/false
}
var bracket?/eax: boolean <- bracket-grapheme? g
var bracket?/eax: boolean <- bracket-code-point-utf8? g
compare bracket?, 0/false
{
break-if-=
@ -888,7 +888,7 @@ fn symbol-grapheme? g: grapheme -> _/eax: boolean {
return 1/true
}
fn whitespace-grapheme? g: grapheme -> _/eax: boolean {
fn whitespace-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
compare g, 9/tab
{
break-if-!=
@ -907,7 +907,7 @@ fn whitespace-grapheme? g: grapheme -> _/eax: boolean {
return 0/false
}
fn quote-or-unquote-grapheme? g: grapheme -> _/eax: boolean {
fn quote-or-unquote-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
compare g, 0x27/single-quote
{
break-if-!=
@ -931,7 +931,7 @@ fn quote-or-unquote-grapheme? g: grapheme -> _/eax: boolean {
return 0/false
}
fn bracket-grapheme? g: grapheme -> _/eax: boolean {
fn bracket-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
compare g, 0x28/open-paren
{
break-if-!=
@ -971,12 +971,12 @@ fn number-token? _self: (addr token) -> _/eax: boolean {
var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
var in-data/ecx: (addr stream byte) <- copy _in-data
rewind-stream in-data
var g/eax: grapheme <- read-grapheme in-data
var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
# if '-', read another
{
compare g, 0x2d/minus
break-if-!=
g <- read-grapheme in-data
g <- read-code-point-utf8 in-data
}
{
{
@ -990,7 +990,7 @@ fn number-token? _self: (addr token) -> _/eax: boolean {
compare done?, 0/false
}
break-if-!=
g <- read-grapheme in-data
g <- read-code-point-utf8 in-data
loop
}
return 1/true
@ -1008,8 +1008,8 @@ fn bracket-token? _self: (addr token) -> _/eax: boolean {
var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
var in-data/eax: (addr stream byte) <- lookup *in-data-ah
rewind-stream in-data
var g/eax: grapheme <- read-grapheme in-data
var result/eax: boolean <- bracket-grapheme? g
var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
var result/eax: boolean <- bracket-code-point-utf8? g
return result
}
@ -1055,7 +1055,7 @@ fn open-paren-token? _self: (addr token) -> _/eax: boolean {
var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
var in-data/ecx: (addr stream byte) <- copy _in-data
rewind-stream in-data
var g/eax: grapheme <- read-grapheme in-data
var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
compare g, 0x28/open-paren
{
break-if-!=
@ -1071,7 +1071,7 @@ fn close-paren-token? _self: (addr token) -> _/eax: boolean {
var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
var in-data/ecx: (addr stream byte) <- copy _in-data
rewind-stream in-data
var g/eax: grapheme <- read-grapheme in-data
var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
compare g, 0x29/close-paren
{
break-if-!=
@ -1087,7 +1087,7 @@ fn dot-token? _self: (addr token) -> _/eax: boolean {
var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
var in-data/ecx: (addr stream byte) <- copy _in-data
rewind-stream in-data
var g/eax: grapheme <- read-grapheme in-data
var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
compare g, 0x2e/dot
{
break-if-!=

View File

@ -904,7 +904,7 @@ fn render-trace-menu screen: (addr screen) {
draw-text-rightward-from-cursor screen, " show whole line ", width, 7/fg, 0xc5/bg=blue-bg
}
fn edit-trace _self: (addr trace), key: grapheme {
fn edit-trace _self: (addr trace), key: code-point-utf8 {
var self/esi: (addr trace) <- copy _self
# cursor down
{

View File

@ -69,8 +69,8 @@ sig slice-starts-with? s: (addr slice), head: (addr array byte) -> _/eax: boolea
sig write-slice out: (addr stream byte), s: (addr slice)
sig slice-to-string ad: (addr allocation-descriptor), in: (addr slice), out: (addr handle array byte)
sig write-int32-decimal out: (addr stream byte), n: int
sig decimal-digit? c: grapheme -> _/eax: boolean
sig to-decimal-digit in: grapheme -> _/eax: int
sig decimal-digit? c: code-point-utf8 -> _/eax: boolean
sig to-decimal-digit in: code-point-utf8 -> _/eax: int
sig next-word line: (addr stream byte), out: (addr slice) # merges '#' comments into a single word
sig next-raw-word line: (addr stream byte), out: (addr slice) # does not merge '#' comments
sig skip-chars-matching in: (addr stream byte), delimiter: byte
@ -89,18 +89,18 @@ sig parse-array-of-ints s: (addr array byte), out: (addr handle array int)
sig parse-array-of-decimal-ints s: (addr array byte), out: (addr handle array int)
sig check-array-equal a: (addr array int), expected: (addr array byte), msg: (addr array byte)
sig integer-divide a: int, b: int -> _/eax: int, _/edx: int
sig to-code-point in: grapheme -> _/eax: code-point
sig to-grapheme in: code-point -> _/eax: grapheme
sig read-grapheme in: (addr stream byte) -> _/eax: grapheme
sig grapheme-length g: grapheme -> _/edx: int
sig to-code-point in: code-point-utf8 -> _/eax: code-point
sig to-utf8 in: code-point -> _/eax: code-point-utf8
sig read-code-point-utf8 in: (addr stream byte) -> _/eax: code-point-utf8
sig utf8-length g: code-point-utf8 -> _/edx: int
sig shift-left-bytes n: int, k: int -> _/eax: int
sig write-grapheme out: (addr stream byte), g: grapheme
sig write-code-point-utf8 out: (addr stream byte), g: code-point-utf8
sig fill-in-rational _out: (addr float), nr: int, dr: int
sig fill-in-sqrt _out: (addr float), n: int
sig rational nr: int, dr: int -> _/xmm0: float
sig scale-down-and-round-up n: int, m: int -> _/ecx: int
sig substring in: (addr array byte), start: int, len: int, out-ah: (addr handle array byte)
sig split-string in: (addr array byte), delim: grapheme, out: (addr handle array (handle array byte))
sig split-string in: (addr array byte), delim: code-point-utf8, out: (addr handle array (handle array byte))
sig render-float-decimal screen: (addr screen), in: float, precision: int, x: int, y: int, color: int, background-color: int -> _/eax: int
sig write-float-decimal-approximate out: (addr stream byte), in: float, precision: int
sig decimal-digits n: int, _buf: (addr array byte) -> _/eax: int
@ -242,22 +242,22 @@ sig slide-down _a: (addr array int), start: int, end: int, target: int
sig find-slide-down-slot-in-array _a: (addr array int), _val: int -> _/ecx: int
sig check-slide-up before: (addr array byte), start: int, end: int, target: int, after: (addr array byte), msg: (addr array byte)
sig check-slide-down before: (addr array byte), start: int, end: int, target: int, after: (addr array byte), msg: (addr array byte)
sig initialize-grapheme-stack _self: (addr grapheme-stack), n: int
sig clear-grapheme-stack _self: (addr grapheme-stack)
sig grapheme-stack-empty? _self: (addr grapheme-stack) -> _/eax: boolean
sig grapheme-stack-length _self: (addr grapheme-stack) -> _/eax: int
sig push-grapheme-stack _self: (addr grapheme-stack), _val: grapheme
sig pop-grapheme-stack _self: (addr grapheme-stack) -> _/eax: grapheme
sig copy-grapheme-stack _src: (addr grapheme-stack), dest: (addr grapheme-stack)
sig render-stack-from-bottom-wrapping-right-then-down screen: (addr screen), _self: (addr grapheme-stack), xmin: int, ymin: int, xmax: int, ymax: int, _x: int, _y: int, highlight-matching-open-paren?: boolean, open-paren-depth: int, color: int, background-color: int -> _/eax: int, _/ecx: int
sig render-stack-from-bottom screen: (addr screen), self: (addr grapheme-stack), x: int, y: int, highlight-matching-open-paren?: boolean, open-paren-depth: int -> _/eax: int
sig render-stack-from-top-wrapping-right-then-down screen: (addr screen), _self: (addr grapheme-stack), xmin: int, ymin: int, xmax: int, ymax: int, _x: int, _y: int, render-cursor?: boolean, color: int, background-color: int -> _/eax: int, _/ecx: int
sig render-stack-from-top screen: (addr screen), self: (addr grapheme-stack), x: int, y: int, render-cursor?: boolean -> _/eax: int
sig get-matching-close-paren-index _self: (addr grapheme-stack), render-cursor?: boolean -> _/edx: int
sig get-matching-open-paren-index _self: (addr grapheme-stack), control: boolean, depth: int -> _/edx: int
sig prefix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: boolean
sig suffix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: boolean
sig grapheme-stack-is-decimal-integer? _self: (addr grapheme-stack) -> _/eax: boolean
sig initialize-code-point-utf8-stack _self: (addr code-point-utf8-stack), n: int
sig clear-code-point-utf8-stack _self: (addr code-point-utf8-stack)
sig code-point-utf8-stack-empty? _self: (addr code-point-utf8-stack) -> _/eax: boolean
sig code-point-utf8-stack-length _self: (addr code-point-utf8-stack) -> _/eax: int
sig push-code-point-utf8-stack _self: (addr code-point-utf8-stack), _val: code-point-utf8
sig pop-code-point-utf8-stack _self: (addr code-point-utf8-stack) -> _/eax: code-point-utf8
sig copy-code-point-utf8-stack _src: (addr code-point-utf8-stack), dest: (addr code-point-utf8-stack)
sig render-stack-from-bottom-wrapping-right-then-down screen: (addr screen), _self: (addr code-point-utf8-stack), xmin: int, ymin: int, xmax: int, ymax: int, _x: int, _y: int, highlight-matching-open-paren?: boolean, open-paren-depth: int, color: int, background-color: int -> _/eax: int, _/ecx: int
sig render-stack-from-bottom screen: (addr screen), self: (addr code-point-utf8-stack), x: int, y: int, highlight-matching-open-paren?: boolean, open-paren-depth: int -> _/eax: int
sig render-stack-from-top-wrapping-right-then-down screen: (addr screen), _self: (addr code-point-utf8-stack), xmin: int, ymin: int, xmax: int, ymax: int, _x: int, _y: int, render-cursor?: boolean, color: int, background-color: int -> _/eax: int, _/ecx: int
sig render-stack-from-top screen: (addr screen), self: (addr code-point-utf8-stack), x: int, y: int, render-cursor?: boolean -> _/eax: int
sig get-matching-close-paren-index _self: (addr code-point-utf8-stack), render-cursor?: boolean -> _/edx: int
sig get-matching-open-paren-index _self: (addr code-point-utf8-stack), control: boolean, depth: int -> _/edx: int
sig prefix-match? _self: (addr code-point-utf8-stack), s: (addr stream byte) -> _/eax: boolean
sig suffix-match? _self: (addr code-point-utf8-stack), s: (addr stream byte) -> _/eax: boolean
sig code-point-utf8-stack-is-decimal-integer? _self: (addr code-point-utf8-stack) -> _/eax: boolean
sig initialize-gap-buffer _self: (addr gap-buffer), capacity: int
sig clear-gap-buffer _self: (addr gap-buffer)
sig gap-buffer-empty? _self: (addr gap-buffer) -> _/eax: boolean
@ -266,43 +266,43 @@ sig initialize-gap-buffer-with self: (addr gap-buffer), keys: (addr array byte)
sig load-gap-buffer-from-stream self: (addr gap-buffer), in: (addr stream byte)
sig emit-gap-buffer self: (addr gap-buffer), out: (addr stream byte)
sig append-gap-buffer _self: (addr gap-buffer), out: (addr stream byte)
sig emit-stack-from-bottom _self: (addr grapheme-stack), out: (addr stream byte)
sig emit-stack-from-top _self: (addr grapheme-stack), out: (addr stream byte)
sig emit-stack-from-bottom _self: (addr code-point-utf8-stack), out: (addr stream byte)
sig emit-stack-from-top _self: (addr code-point-utf8-stack), out: (addr stream byte)
sig word-at-gap _self: (addr gap-buffer), out: (addr stream byte)
sig grapheme-at-gap _self: (addr gap-buffer) -> _/eax: grapheme
sig top-most-word _self: (addr grapheme-stack) -> _/eax: int
sig emit-stack-from-index _self: (addr grapheme-stack), start: int, out: (addr stream byte)
sig emit-stack-to-index _self: (addr grapheme-stack), end: int, out: (addr stream byte)
sig is-ascii-word-grapheme? g: grapheme -> _/eax: boolean
sig code-point-utf8-at-gap _self: (addr gap-buffer) -> _/eax: code-point-utf8
sig top-most-word _self: (addr code-point-utf8-stack) -> _/eax: int
sig emit-stack-from-index _self: (addr code-point-utf8-stack), start: int, out: (addr stream byte)
sig emit-stack-to-index _self: (addr code-point-utf8-stack), end: int, out: (addr stream byte)
sig is-ascii-word-code-point-utf8? g: code-point-utf8 -> _/eax: boolean
sig render-gap-buffer-wrapping-right-then-down screen: (addr screen), _gap: (addr gap-buffer), xmin: int, ymin: int, xmax: int, ymax: int, render-cursor?: boolean, color: int, background-color: int -> _/eax: int, _/ecx: int
sig render-gap-buffer screen: (addr screen), gap: (addr gap-buffer), x: int, y: int, render-cursor?: boolean, color: int, background-color: int -> _/eax: int
sig gap-buffer-length _gap: (addr gap-buffer) -> _/eax: int
sig add-grapheme-at-gap _self: (addr gap-buffer), g: grapheme
sig add-code-point-utf8-at-gap _self: (addr gap-buffer), g: code-point-utf8
sig add-code-point-at-gap self: (addr gap-buffer), c: code-point
sig gap-to-start self: (addr gap-buffer)
sig gap-to-end self: (addr gap-buffer)
sig gap-at-start? _self: (addr gap-buffer) -> _/eax: boolean
sig gap-at-end? _self: (addr gap-buffer) -> _/eax: boolean
sig gap-right _self: (addr gap-buffer) -> _/eax: grapheme
sig gap-left _self: (addr gap-buffer) -> _/eax: grapheme
sig gap-right _self: (addr gap-buffer) -> _/eax: code-point-utf8
sig gap-left _self: (addr gap-buffer) -> _/eax: code-point-utf8
sig index-of-gap _self: (addr gap-buffer) -> _/eax: int
sig first-grapheme-in-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme
sig grapheme-before-cursor-in-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme
sig first-code-point-utf8-in-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8
sig code-point-utf8-before-cursor-in-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8
sig delete-before-gap _self: (addr gap-buffer)
sig pop-after-gap _self: (addr gap-buffer) -> _/eax: grapheme
sig pop-after-gap _self: (addr gap-buffer) -> _/eax: code-point-utf8
sig gap-buffer-equal? _self: (addr gap-buffer), s: (addr array byte) -> _/eax: boolean
sig gap-buffers-equal? self: (addr gap-buffer), g: (addr gap-buffer) -> _/eax: boolean
sig gap-index _self: (addr gap-buffer), _n: int -> _/eax: grapheme
sig gap-index _self: (addr gap-buffer), _n: int -> _/eax: code-point-utf8
sig copy-gap-buffer _src-ah: (addr handle gap-buffer), _dest-ah: (addr handle gap-buffer)
sig gap-buffer-is-decimal-integer? _self: (addr gap-buffer) -> _/eax: boolean
sig highlight-matching-open-paren? _gap: (addr gap-buffer), render-cursor?: boolean -> _/ebx: boolean, _/edi: int
sig rewind-gap-buffer _self: (addr gap-buffer)
sig gap-buffer-scan-done? _self: (addr gap-buffer) -> _/eax: boolean
sig peek-from-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme
sig read-from-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme
sig peek-from-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8
sig read-from-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8
sig put-back-from-gap-buffer _self: (addr gap-buffer)
sig skip-spaces-from-gap-buffer self: (addr gap-buffer)
sig edit-gap-buffer self: (addr gap-buffer), key: grapheme
sig edit-gap-buffer self: (addr gap-buffer), key: code-point-utf8
sig gap-to-start-of-next-word self: (addr gap-buffer)
sig gap-to-end-of-previous-word self: (addr gap-buffer)
sig gap-to-previous-start-of-line self: (addr gap-buffer)

View File

@ -55,7 +55,7 @@ fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk)
# process a single keystroke
$main:input: {
var key/eax: byte <- read-key keyboard
var key/eax: grapheme <- copy key
var key/eax: code-point-utf8 <- copy key
compare key, 0
loop-if-=
# tab = switch cursor between input areas

View File

@ -37,7 +37,7 @@ fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk)
# process a single keystroke
$main:input: {
var key/eax: byte <- read-key keyboard
var key/eax: grapheme <- copy key
var key/eax: code-point-utf8 <- copy key
compare key, 0
loop-if-=
# tab = switch cursor between input areas

View File

@ -541,7 +541,7 @@ fn main screen: (addr screen), keyboard: (addr keyboard) {
var done?/eax: boolean <- stream-empty? in
compare done?, 0/false
break-if-!=
var g/eax: grapheme <- read-grapheme in
var g/eax: code-point-utf8 <- read-code-point-utf8 in
# do stuff with g here
loop
}
@ -550,8 +550,8 @@ fn main screen: (addr screen), keyboard: (addr keyboard) {
`read-line-from-keyboard` reads keystrokes from the keyboard until you press
the `Enter` (also called `newline`) key, and accumulates them into a _stream_
of bytes. The loop then repeatedly reads _graphemes_ from the stream. A
grapheme can consist of multiple bytes, particularly outside of the Latin
of bytes. The loop then repeatedly reads _code-point-utf8s_ from the stream. A
code-point-utf8 can consist of multiple bytes, particularly outside of the Latin
alphabet and Arabic digits most prevalent in the West. Mu doesn't yet support
non-Qwerty keyboards, but support for other keyboards should be easy to add.
@ -561,12 +561,12 @@ give yourself a sense of what you can do with them. Does the above program
make sense now? Feel free to experiment to make sense of it.
Can you modify it to print out the line a second time, after you've typed it
out until the `Enter` key? Can you print a space after every grapheme when you
out until the `Enter` key? Can you print a space after every code-point-utf8 when you
print the line out a second time? You'll need to skim the section on
[printing to screen](https://github.com/akkartik/mu/blob/main/vocabulary.md#printing-to-screen)
from Mu's vocabulary. Pay particular attention to the difference between a
grapheme and a _code-point_. Mu programs often read characters in units of
graphemes, but they must draw in units of code-points that the font manages.
code-point-utf8 and a _code-point_. Mu programs often read characters in units of
code-point-utf8s, but they must draw in units of code-points that the font manages.
(This adds some complexity but helps combine multiple code-points into a
single glyph as needed for some languages.)

View File

@ -43,7 +43,7 @@ how they work under the hood.
- Code-points: integer representing a Unicode character. Must be representable
in 32 bits as utf-8; largest supported value is 0x10000.
Mu will let you convert between bytes, graphemes and code-points using `copy`,
Mu will let you convert between bytes, code-point-utf8s and code-points using `copy`,
and trust that you know what you're doing. Be aware that doing so is only
correct for English/Latin characters, digits and symbols.
@ -120,7 +120,7 @@ signatures.mu for their full type signatures.
- `append-byte-hex`: writes textual representation of lowest byte in hex to
a stream of bytes. Does not write a '0x' prefix.
- `read-byte`: reads a single byte from a stream of bytes.
- `read-grapheme`: reads a single unicode grapheme (up to 4 bytes) from a
- `read-code-point-utf8`: reads a single unicode code-point-utf8 (up to 4 bytes) from a
stream of bytes.
#### reading/writing hex representations of integers
@ -137,7 +137,7 @@ signatures.mu for their full type signatures.
- `parse-decimal-int-from-slice`
- `parse-decimal-int-from-stream`
- `parse-array-of-decimal-ints`
- `decimal-digit?`: checks if a grapheme is in [0, 9]
- `decimal-digit?`: checks if a code-point-utf8 is in [0, 9]
#### printing to screen
@ -197,7 +197,7 @@ automatically read and update the cursor position in various ways.
These primitives always silently fail if the desired movement would go out
of screen bounds.
- `move-cursor-to-left-margin-of-next-line`
- `move-cursor-rightward-and-downward`: move cursor one grapheme to the right
- `move-cursor-rightward-and-downward`: move cursor one code-point-utf8 to the right
- `draw-text-rightward-from-cursor`: truncate at some right margin.
- `draw-text-rightward-from-cursor-over-full-screen`: truncate at right edge
@ -255,7 +255,7 @@ Assertions for tests:
`read-key` reads a single key from the keyboard and returns it if it exists.
Returns 0 if no key has been pressed. Currently only support single-byte keys,
which are identical to their code-point and grapheme representations.
which are identical to their code-point and code-point-utf8 representations.
`read-line-from-keyboard` reads keys from keyboard, echoes them to screen
(with given fg/bg colors) and accumulates them in a stream until it encounters