diff --git a/126write-int-decimal.subx b/126write-int-decimal.subx index d30cb762..f61538b8 100644 --- a/126write-int-decimal.subx +++ b/126write-int-decimal.subx @@ -324,7 +324,7 @@ test-write-int32-decimal-negative-multiple-digits: # . end c3/return -decimal-digit?: # c: grapheme -> result/eax: boolean +decimal-digit?: # c: code-point-utf8 -> result/eax: boolean # . prologue 55/push-ebp 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp @@ -423,7 +423,7 @@ test-decimal-digit-above-9: 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp c3/return -to-decimal-digit: # in: grapheme -> out/eax: int +to-decimal-digit: # in: code-point-utf8 -> out/eax: int # . prologue 55/push-ebp 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp diff --git a/400.mu b/400.mu index 9baad977..495e57ce 100644 --- a/400.mu +++ b/400.mu @@ -97,8 +97,8 @@ sig write-slice out: (addr stream byte), s: (addr slice) # bad name alert sig slice-to-string ad: (addr allocation-descriptor), in: (addr slice), out: (addr handle array byte) sig write-int32-decimal out: (addr stream byte), n: int -sig decimal-digit? c: grapheme -> _/eax: boolean -sig to-decimal-digit in: grapheme -> _/eax: int +sig decimal-digit? c: code-point-utf8 -> _/eax: boolean +sig to-decimal-digit in: code-point-utf8 -> _/eax: int # bad name alert # next-word really tokenizes # next-raw-word really reads whitespace-separated words diff --git a/403unicode.mu b/403unicode.mu index e1bfba3f..5d465d71 100644 --- a/403unicode.mu +++ b/403unicode.mu @@ -1,16 +1,11 @@ # Helpers for Unicode. # -# Mu has no characters, only code points and graphemes. -# Code points are the indivisible atoms of text streams. +# The basic unit for rendering Unicode is the code point. # https://en.wikipedia.org/wiki/Code_point -# Graphemes are the smallest self-contained unit of text. -# Graphemes may consist of multiple code points. +# The glyph a non-cursive font displays may represent multiple code points. # -# Mu graphemes are always represented in utf-8, and they are required to fit -# in 4 bytes. (This can be confusing if you focus just on ASCII, where Mu's -# graphemes and code-points are identical.) -# -# Mu doesn't yet support graphemes consisting of multiple code points. +# In addition to raw code points (just integers assigned special meaning), Mu +# provides a common encoding as a convenience: code-point-utf8. fn test-unicode-serialization-and-deserialization { var i/ebx: int <- copy 0 @@ -20,8 +15,8 @@ fn test-unicode-serialization-and-deserialization { # but not emoji break-if->= var c/eax: code-point <- copy i - var _g/eax: grapheme <- to-grapheme c - var g/ecx: grapheme <- copy _g + var _g/eax: code-point-utf8 <- to-utf8 c + var g/ecx: code-point-utf8 <- copy _g var c2/eax: code-point <- to-code-point g compare i, c2 { @@ -51,7 +46,7 @@ fn test-unicode-serialization-and-deserialization { } # transliterated from tb_utf8_char_to_unicode in https://github.com/nsf/termbox -fn to-code-point in: grapheme -> _/eax: code-point { +fn to-code-point in: code-point-utf8 -> _/eax: code-point { var g/ebx: int <- copy in # if single byte, just return it { @@ -61,7 +56,7 @@ fn to-code-point in: grapheme -> _/eax: code-point { return result } # - var len/edx: int <- grapheme-length in + var len/edx: int <- utf8-length in # extract bits from first byte var b/eax: byte <- copy-byte g var result/edi: code-point <- copy b @@ -99,16 +94,16 @@ fn to-code-point in: grapheme -> _/eax: code-point { # transliterated from tb_utf8_unicode_to_char in https://github.com/nsf/termbox # https://wiki.tcl-lang.org/page/UTF%2D8+bit+by+bit explains the algorithm -fn to-grapheme in: code-point -> _/eax: grapheme { +fn to-utf8 in: code-point -> _/eax: code-point-utf8 { var c/eax: int <- copy in var num-trailers/ecx: int <- copy 0 var first/edx: int <- copy 0 - $to-grapheme:compute-length: { + $to-utf8:compute-length: { # single byte: just return it compare c, 0x7f { break-if-> - var g/eax: grapheme <- copy c + var g/eax: code-point-utf8 <- copy c return g } # 2 bytes @@ -117,7 +112,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme { break-if-> num-trailers <- copy 1 first <- copy 0xc0 - break $to-grapheme:compute-length + break $to-utf8:compute-length } # 3 bytes compare c, 0xffff @@ -125,7 +120,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme { break-if-> num-trailers <- copy 2 first <- copy 0xe0 - break $to-grapheme:compute-length + break $to-utf8:compute-length } # 4 bytes compare c, 0x1fffff @@ -133,7 +128,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme { break-if-> num-trailers <- copy 3 first <- copy 0xf0 - break $to-grapheme:compute-length + break $to-utf8:compute-length } # more than 4 bytes: unsupported compare c, 0x1fffff @@ -144,7 +139,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme { } } # emit trailer bytes, 6 bits from 'in', first two bits '10' - var result/edi: grapheme <- copy 0 + var result/edi: code-point-utf8 <- copy 0 { compare num-trailers, 0 break-if-<= @@ -166,16 +161,16 @@ fn to-grapheme in: code-point -> _/eax: grapheme { return result } -# single-byte code point have identical graphemes -fn test-to-grapheme-single-byte { +# single-byte code point have identical code-point-utf8s +fn test-to-utf8-single-byte { var in-int/ecx: int <- copy 0 { compare in-int, 0x7f break-if-> var in/eax: code-point <- copy in-int - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, in-int, "F - test-to-grapheme-single-byte" + check-ints-equal out-int, in-int, "F - test-to-utf8-single-byte" in-int <- increment loop } @@ -183,55 +178,55 @@ fn test-to-grapheme-single-byte { # byte | byte | byte | byte # smallest 2-byte utf-8 -fn test-to-grapheme-two-bytes-min { +fn test-to-utf8-two-bytes-min { var in/eax: code-point <- copy 0x80 # 10 00-0000 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0x80c2, "F - to-grapheme/2a" # 110 0-0010 10 00-0000 + check-ints-equal out-int, 0x80c2, "F - to-utf8/2a" # 110 0-0010 10 00-0000 } # largest 2-byte utf-8 -fn test-to-grapheme-two-bytes-max { +fn test-to-utf8-two-bytes-max { var in/eax: code-point <- copy 0x7ff # 1-1111 11-1111 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0xbfdf, "F - to-grapheme/2b" # 110 1-1111 10 11-1111 + check-ints-equal out-int, 0xbfdf, "F - to-utf8/2b" # 110 1-1111 10 11-1111 } # smallest 3-byte utf-8 -fn test-to-grapheme-three-bytes-min { +fn test-to-utf8-three-bytes-min { var in/eax: code-point <- copy 0x800 # 10-0000 00-0000 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0x80a0e0, "F - to-grapheme/3a" # 1110 0000 10 10-0000 10 00-0000 + check-ints-equal out-int, 0x80a0e0, "F - to-utf8/3a" # 1110 0000 10 10-0000 10 00-0000 } # largest 3-byte utf-8 -fn test-to-grapheme-three-bytes-max { +fn test-to-utf8-three-bytes-max { var in/eax: code-point <- copy 0xffff # 1111 11-1111 11-1111 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0xbfbfef, "F - to-grapheme/3b" # 1110 1111 10 11-1111 10 11-1111 + check-ints-equal out-int, 0xbfbfef, "F - to-utf8/3b" # 1110 1111 10 11-1111 10 11-1111 } # smallest 4-byte utf-8 -fn test-to-grapheme-four-bytes-min { +fn test-to-utf8-four-bytes-min { var in/eax: code-point <- copy 0x10000 # 1-0000 00-0000 00-0000 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0x808090f0, "F - to-grapheme/4a" # 1111-0 000 10 01-0000 10 00-0000 10 00-0000 + check-ints-equal out-int, 0x808090f0, "F - to-utf8/4a" # 1111-0 000 10 01-0000 10 00-0000 10 00-0000 } # largest 4-byte utf-8 -fn test-to-grapheme-four-bytes-max { +fn test-to-utf8-four-bytes-max { var in/eax: code-point <- copy 0x1fffff # 111 11-1111 11-1111 11-1111 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0xbfbfbff7, "F - to-grapheme/4b" # 1111-0 111 10 11-1111 10 11-1111 10 11-1111 + check-ints-equal out-int, 0xbfbfbff7, "F - to-utf8/4b" # 1111-0 111 10 11-1111 10 11-1111 10 11-1111 } -# read the next grapheme from a stream of bytes -fn read-grapheme in: (addr stream byte) -> _/eax: grapheme { +# read the next code-point-utf8 from a stream of bytes +fn read-code-point-utf8 in: (addr stream byte) -> _/eax: code-point-utf8 { # if at eof, return EOF { var eof?/eax: boolean <- stream-empty? in @@ -241,18 +236,18 @@ fn read-grapheme in: (addr stream byte) -> _/eax: grapheme { } var c/eax: byte <- read-byte in var num-trailers/ecx: int <- copy 0 - $read-grapheme:compute-length: { + $read-code-point-utf8:compute-length: { # single byte: just return it compare c, 0xc0 { break-if->= - var g/eax: grapheme <- copy c + var g/eax: code-point-utf8 <- copy c return g } compare c, 0xfe { break-if-< - var g/eax: grapheme <- copy c + var g/eax: code-point-utf8 <- copy c return g } # 2 bytes @@ -260,27 +255,27 @@ fn read-grapheme in: (addr stream byte) -> _/eax: grapheme { { break-if->= num-trailers <- copy 1 - break $read-grapheme:compute-length + break $read-code-point-utf8:compute-length } # 3 bytes compare c, 0xf0 { break-if->= num-trailers <- copy 2 - break $read-grapheme:compute-length + break $read-code-point-utf8:compute-length } # 4 bytes compare c, 0xf8 { break-if->= num-trailers <- copy 3 - break $read-grapheme:compute-length + break $read-code-point-utf8:compute-length } abort "utf-8 encodings larger than 4 bytes are not yet supported" return 0 } # prepend trailer bytes - var result/edi: grapheme <- copy c + var result/edi: code-point-utf8 <- copy c var num-byte-shifts/edx: int <- copy 1 { compare num-trailers, 0 @@ -297,34 +292,34 @@ fn read-grapheme in: (addr stream byte) -> _/eax: grapheme { return result } -fn test-read-grapheme { +fn test-read-code-point-utf8 { var s: (stream byte 0x30) var s2/ecx: (addr stream byte) <- address s write s2, "aΒc世d界e" - var c/eax: grapheme <- read-grapheme s2 + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x61, "F - test grapheme/0" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x61, "F - test code-point-utf8/0" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x92ce/greek-capital-letter-beta, "F - test grapheme/1" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x92ce/greek-capital-letter-beta, "F - test code-point-utf8/1" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x63, "F - test grapheme/2" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x63, "F - test code-point-utf8/2" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x96b8e4, "F - test grapheme/3" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x96b8e4, "F - test code-point-utf8/3" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x64, "F - test grapheme/4" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x64, "F - test code-point-utf8/4" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x8c95e7, "F - test grapheme/5" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x8c95e7, "F - test code-point-utf8/5" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x65, "F - test grapheme/6" + check-ints-equal n, 0x65, "F - test code-point-utf8/6" } -fn grapheme-length g: grapheme -> _/edx: int { +fn utf8-length g: code-point-utf8 -> _/edx: int { { compare g, 0xff break-if-> @@ -389,23 +384,23 @@ fn test-shift-left-bytes-5 { check-ints-equal result, 0, "F - shift-left-bytes >4" } -# write a grapheme to a stream of bytes +# write a code-point-utf8 to a stream of bytes # this is like write-to-stream, except we skip leading 0 bytes -fn write-grapheme out: (addr stream byte), g: grapheme { -$write-grapheme:body: { +fn write-code-point-utf8 out: (addr stream byte), g: code-point-utf8 { +$write-code-point-utf8:body: { var c/eax: int <- copy g append-byte out, c # first byte is always written c <- shift-right 8 compare c, 0 - break-if-= $write-grapheme:body + break-if-= $write-code-point-utf8:body append-byte out, c c <- shift-right 8 compare c, 0 - break-if-= $write-grapheme:body + break-if-= $write-code-point-utf8:body append-byte out, c c <- shift-right 8 compare c, 0 - break-if-= $write-grapheme:body + break-if-= $write-code-point-utf8:body append-byte out, c } } diff --git a/411string.mu b/411string.mu index cf0471ac..493c9b56 100644 --- a/411string.mu +++ b/411string.mu @@ -1,4 +1,4 @@ -# read up to 'len' graphemes after skipping the first 'start' ones +# read up to 'len' code-point-utf8s after skipping the first 'start' ones fn substring in: (addr array byte), start: int, len: int, out-ah: (addr handle array byte) { var in-stream: (stream byte 0x100) var in-stream-addr/esi: (addr stream byte) <- address in-stream @@ -6,29 +6,29 @@ fn substring in: (addr array byte), start: int, len: int, out-ah: (addr handle a var out-stream: (stream byte 0x100) var out-stream-addr/edi: (addr stream byte) <- address out-stream $substring:core: { - # skip 'start' graphemes + # skip 'start' code-point-utf8s var i/eax: int <- copy 0 { compare i, start break-if->= { - var dummy/eax: grapheme <- read-grapheme in-stream-addr + var dummy/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr compare dummy, 0xffffffff/end-of-file break-if-= $substring:core } i <- increment loop } - # copy 'len' graphemes + # copy 'len' code-point-utf8s i <- copy 0 { compare i, len break-if->= { - var g/eax: grapheme <- read-grapheme in-stream-addr + var g/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr compare g, 0xffffffff/end-of-file break-if-= $substring:core - write-grapheme out-stream-addr, g + write-code-point-utf8 out-stream-addr, g } i <- increment loop @@ -85,7 +85,7 @@ fn test-substring { check-strings-equal out, "bcde", "F - test-substring/middle-too-small" } -fn split-string in: (addr array byte), delim: grapheme, out: (addr handle array (handle array byte)) { +fn split-string in: (addr array byte), delim: code-point-utf8, out: (addr handle array (handle array byte)) { var in-stream: (stream byte 0x100) var in-stream-addr/esi: (addr stream byte) <- address in-stream write in-stream-addr, in @@ -94,10 +94,10 @@ fn split-string in: (addr array byte), delim: grapheme, out: (addr handle array var curr-stream: (stream byte 0x100) var curr-stream-addr/ecx: (addr stream byte) <- address curr-stream $split-string:core: { - var g/eax: grapheme <- read-grapheme in-stream-addr + var g/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr compare g, 0xffffffff break-if-= -#? print-grapheme-to-real-screen g +#? print-code-point-utf8-to-real-screen g #? print-string-to-real-screen "\n" compare g, delim { @@ -110,7 +110,7 @@ fn split-string in: (addr array byte), delim: grapheme, out: (addr handle array clear-stream curr-stream-addr loop $split-string:core } - write-grapheme curr-stream-addr, g + write-code-point-utf8 curr-stream-addr, g loop } stream-to-array tokens-stream-addr, out diff --git a/500fake-screen.mu b/500fake-screen.mu index c34beeaa..f86ce47c 100644 --- a/500fake-screen.mu +++ b/500fake-screen.mu @@ -71,7 +71,7 @@ fn initialize-screen _screen: (addr screen), width: int, height: int, pixel-grap copy-to *dest, 0 } -# in graphemes +# in code-point-utf8s fn screen-size _screen: (addr screen) -> _/eax: int, _/ecx: int { var screen/esi: (addr screen) <- copy _screen var width/eax: int <- copy 0 @@ -459,7 +459,7 @@ fn clear-rect _screen: (addr screen), xmin: int, ymin: int, xmax: int, ymax: int set-cursor-position screen, 0, 0 } -# there's no grapheme that guarantees to cover every pixel, so we'll bump down +# there's no code-point-utf8 that guarantees to cover every pixel, so we'll bump down # to pixels for a real screen fn clear-real-screen { var y/eax: int <- copy 0 diff --git a/501draw-text.mu b/501draw-text.mu index 5d2917a9..66260b56 100644 --- a/501draw-text.mu +++ b/501draw-text.mu @@ -117,7 +117,7 @@ fn draw-text-rightward screen: (addr screen), text: (addr array byte), x: int, x fn draw-stream-rightward screen: (addr screen), stream: (addr stream byte), x: int, xmax: int, y: int, color: int, background-color: int -> _/eax: int { var xcurr/ecx: int <- copy x { - var g/eax: grapheme <- read-grapheme stream + var g/eax: code-point-utf8 <- read-code-point-utf8 stream compare g, 0xffffffff/end-of-file break-if-= var c/eax: code-point <- to-code-point g @@ -218,7 +218,7 @@ fn draw-stream-wrapping-right-then-down screen: (addr screen), stream: (addr str next-c <- copy 0 break $draw-stream-wrapping-right-then-down:read-base } - var g/eax: grapheme <- read-grapheme stream + var g/eax: code-point-utf8 <- read-code-point-utf8 stream var _c/eax: code-point <- to-code-point g c <- copy _c } @@ -240,7 +240,7 @@ fn draw-stream-wrapping-right-then-down screen: (addr screen), stream: (addr str compare done?, 0/false break-if-!= # read a character - var g/eax: grapheme <- read-grapheme stream + var g/eax: code-point-utf8 <- read-code-point-utf8 stream var c/eax: code-point <- to-code-point g # if not a combining character, save for next iteration and loop { @@ -343,7 +343,7 @@ fn draw-int32-hex-wrapping-right-then-down screen: (addr screen), n: int, xmin: var xcurr/edx: int <- copy x var ycurr/ecx: int <- copy y { - var g/eax: grapheme <- read-grapheme stream + var g/eax: code-point-utf8 <- read-code-point-utf8 stream compare g, 0xffffffff/end-of-file break-if-= var c/eax: code-point <- to-code-point g @@ -398,7 +398,7 @@ fn draw-int32-decimal-wrapping-right-then-down screen: (addr screen), n: int, xm var xcurr/edx: int <- copy x var ycurr/ecx: int <- copy y { - var g/eax: grapheme <- read-grapheme stream + var g/eax: code-point-utf8 <- read-code-point-utf8 stream compare g, 0xffffffff/end-of-file break-if-= var c/eax: code-point <- to-code-point g @@ -466,7 +466,7 @@ fn draw-text-downward screen: (addr screen), text: (addr array byte), x: int, y: fn draw-stream-downward screen: (addr screen), stream: (addr stream byte), x: int, y: int, ymax: int, color: int, background-color: int -> _/eax: int { var ycurr/ecx: int <- copy y { - var g/eax: grapheme <- read-grapheme stream + var g/eax: code-point-utf8 <- read-code-point-utf8 stream compare g, 0xffffffff/end-of-file break-if-= var c/eax: code-point <- to-code-point g @@ -508,7 +508,7 @@ fn draw-stream-wrapping-down-then-right screen: (addr screen), stream: (addr str var xcurr/edx: int <- copy x var ycurr/ecx: int <- copy y { - var g/eax: grapheme <- read-grapheme stream + var g/eax: code-point-utf8 <- read-code-point-utf8 stream compare g, 0xffffffff/end-of-file break-if-= var c/eax: code-point <- to-code-point g diff --git a/504test-screen.mu b/504test-screen.mu index 64b378dd..f6bdae3b 100644 --- a/504test-screen.mu +++ b/504test-screen.mu @@ -12,7 +12,7 @@ fn check-screen-row-from _screen: (addr screen), x: int, y: int, expected: (addr var screen/esi: (addr screen) <- copy _screen var failure-count/edi: int <- copy 0 var index/ecx: int <- screen-cell-index screen, x, y - # compare 'expected' with the screen contents starting at 'index', grapheme by grapheme + # compare 'expected' with the screen contents starting at 'index', code-point-utf8 by code-point-utf8 var e: (stream byte 0x100) var e-addr/edx: (addr stream byte) <- address e write e-addr, expected @@ -26,16 +26,16 @@ fn check-screen-row-from _screen: (addr screen), x: int, y: int, expected: (addr break-if-!= var _c/eax: code-point <- screen-code-point-at-index screen, index var c/ebx: code-point <- copy _c - var expected-grapheme/eax: grapheme <- read-grapheme e-addr - var expected-code-point/eax: code-point <- to-code-point expected-grapheme - # compare graphemes - $check-screen-row-from:compare-graphemes: { - # if expected-code-point is space, null grapheme is also ok + var expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr + var expected-code-point/eax: code-point <- to-code-point expected-code-point-utf8 + # compare code-point-utf8s + $check-screen-row-from:compare-code-point-utf8s: { + # if expected-code-point is space, null code-point-utf8 is also ok { compare expected-code-point, 0x20 break-if-!= compare c, 0 - break-if-= $check-screen-row-from:compare-graphemes + break-if-= $check-screen-row-from:compare-code-point-utf8s } # if (c == expected-code-point) print "." compare c, expected-code-point @@ -79,7 +79,7 @@ fn check-screen-row-in-color screen: (addr screen), fg: int, y: int, expected: ( fn check-screen-row-in-color-from _screen: (addr screen), fg: int, y: int, x: int, expected: (addr array byte), msg: (addr array byte) { var screen/esi: (addr screen) <- copy _screen var index/ecx: int <- screen-cell-index screen, x, y - # compare 'expected' with the screen contents starting at 'index', grapheme by grapheme + # compare 'expected' with the screen contents starting at 'index', code-point-utf8 by code-point-utf8 var e: (stream byte 0x100) var e-addr/edx: (addr stream byte) <- address e write e-addr, expected @@ -93,11 +93,11 @@ fn check-screen-row-in-color-from _screen: (addr screen), fg: int, y: int, x: in break-if-!= var _c/eax: code-point <- screen-code-point-at-index screen, index var c/ebx: code-point <- copy _c - var expected-grapheme/eax: grapheme <- read-grapheme e-addr - var _expected-code-point/eax: code-point <- to-code-point expected-grapheme + var expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr + var _expected-code-point/eax: code-point <- to-code-point expected-code-point-utf8 var expected-code-point/edi: code-point <- copy _expected-code-point $check-screen-row-in-color-from:compare-cells: { - # if expected-code-point is space, null grapheme is also ok + # if expected-code-point is space, null code-point-utf8 is also ok { compare expected-code-point, 0x20 break-if-!= @@ -112,14 +112,14 @@ fn check-screen-row-in-color-from _screen: (addr screen), fg: int, y: int, x: in compare color, fg break-if-!= $check-screen-row-in-color-from:compare-cells } - # compare graphemes - $check-screen-row-in-color-from:compare-graphemes: { + # compare code-point-utf8s + $check-screen-row-in-color-from:compare-code-point-utf8s: { # if (c == expected-code-point) print "." compare c, expected-code-point { break-if-!= draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ".", 3/fg=cyan, 0/bg - break $check-screen-row-in-color-from:compare-graphemes + break $check-screen-row-in-color-from:compare-code-point-utf8s } # otherwise print an error count-test-failure @@ -173,7 +173,7 @@ fn check-screen-row-in-background-color screen: (addr screen), bg: int, y: int, fn check-screen-row-in-background-color-from _screen: (addr screen), bg: int, y: int, x: int, expected: (addr array byte), msg: (addr array byte) { var screen/esi: (addr screen) <- copy _screen var index/ecx: int <- screen-cell-index screen, x, y - # compare 'expected' with the screen contents starting at 'index', grapheme by grapheme + # compare 'expected' with the screen contents starting at 'index', code-point-utf8 by code-point-utf8 var e: (stream byte 0x100) var e-addr/edx: (addr stream byte) <- address e write e-addr, expected @@ -187,11 +187,11 @@ fn check-screen-row-in-background-color-from _screen: (addr screen), bg: int, y: break-if-!= var _g/eax: code-point <- screen-code-point-at-index screen, index var g/ebx: code-point <- copy _g - var expected-grapheme/eax: grapheme <- read-grapheme e-addr - var _expected-code-point/eax: code-point <- to-code-point expected-grapheme + var expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr + var _expected-code-point/eax: code-point <- to-code-point expected-code-point-utf8 var expected-code-point/edi: code-point <- copy _expected-code-point $check-screen-row-in-background-color-from:compare-cells: { - # if expected-code-point is space, null grapheme is also ok + # if expected-code-point is space, null code-point-utf8 is also ok { compare expected-code-point, 0x20 break-if-!= @@ -206,14 +206,14 @@ fn check-screen-row-in-background-color-from _screen: (addr screen), bg: int, y: compare background-color, bg break-if-!= $check-screen-row-in-background-color-from:compare-cells } - # compare graphemes - $check-screen-row-in-background-color-from:compare-graphemes: { + # compare code-point-utf8s + $check-screen-row-in-background-color-from:compare-code-point-utf8s: { # if (g == expected-code-point) print "." compare g, expected-code-point { break-if-!= draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ".", 3/fg=cyan, 0/bg - break $check-screen-row-in-background-color-from:compare-graphemes + break $check-screen-row-in-background-color-from:compare-code-point-utf8s } # otherwise print an error count-test-failure @@ -228,7 +228,7 @@ fn check-screen-row-in-background-color-from _screen: (addr screen), bg: int, y: draw-code-point-at-cursor-over-full-screen 0/screen, g, 3/cyan, 0/bg draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "'", 3/fg=cyan, 0/bg move-cursor-to-left-margin-of-next-line 0/screen - break $check-screen-row-in-background-color-from:compare-graphemes + break $check-screen-row-in-background-color-from:compare-code-point-utf8s } $check-screen-row-in-background-color-from:compare-background-colors: { var background-color/eax: int <- screen-background-color-at-index screen, index @@ -284,8 +284,8 @@ fn check-background-color-in-screen-row-from _screen: (addr screen), bg: int, y: var unused?/eax: boolean <- screen-cell-unused-at-index? screen, index compare unused?, 0/false break-if-!= - var _expected-bit/eax: grapheme <- read-grapheme e-addr - var expected-bit/edi: grapheme <- copy _expected-bit + var _expected-bit/eax: code-point-utf8 <- read-code-point-utf8 e-addr + var expected-bit/edi: code-point-utf8 <- copy _expected-bit $check-background-color-in-screen-row-from:compare-cells: { var background-color/eax: int <- screen-background-color-at-index screen, index # if expected-bit is space, assert that background is NOT bg @@ -336,23 +336,23 @@ fn check-background-color-in-screen-row-from _screen: (addr screen), bg: int, y: draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ".", 3/fg=cyan, 0/bg } -fn test-draw-single-grapheme { +fn test-draw-single-code-point-utf8 { var _screen: screen var screen/esi: (addr screen) <- address _screen initialize-screen screen, 5, 4, 0/no-pixel-graphics var dummy/eax: int <- draw-code-point screen, 0x61/a, 0/x, 0/y, 1/fg, 2/bg - check-screen-row screen, 0/y, "a", "F - test-draw-single-grapheme" # top-left corner of the screen - check-screen-row-in-color screen, 1/fg, 0/y, "a", "F - test-draw-single-grapheme-fg" - check-screen-row-in-background-color screen, 2/bg, 0/y, "a", "F - test-draw-single-grapheme-bg" - check-background-color-in-screen-row screen, 2/bg, 0/y, "x ", "F - test-draw-single-grapheme-bg2" + check-screen-row screen, 0/y, "a", "F - test-draw-single-code-point-utf8" # top-left corner of the screen + check-screen-row-in-color screen, 1/fg, 0/y, "a", "F - test-draw-single-code-point-utf8-fg" + check-screen-row-in-background-color screen, 2/bg, 0/y, "a", "F - test-draw-single-code-point-utf8-bg" + check-background-color-in-screen-row screen, 2/bg, 0/y, "x ", "F - test-draw-single-code-point-utf8-bg2" } -fn test-draw-multiple-graphemes { +fn test-draw-multiple-code-point-utf8s { var _screen: screen var screen/esi: (addr screen) <- address _screen initialize-screen screen, 0x10/rows, 4/cols, 0/no-pixel-graphics draw-text-wrapping-right-then-down-from-cursor-over-full-screen screen, "Hello, 世界", 1/fg, 2/bg - check-screen-row screen, 0/y, "Hello, 世界", "F - test-draw-multiple-graphemes" - check-screen-row-in-color screen, 1/fg, 0/y, "Hello, 世界", "F - test-draw-multiple-graphemes-fg" - check-background-color-in-screen-row screen, 2/bg, 0/y, "xxxxxxxxx ", "F - test-draw-multiple-graphemes-bg2" + check-screen-row screen, 0/y, "Hello, 世界", "F - test-draw-multiple-code-point-utf8s" + check-screen-row-in-color screen, 1/fg, 0/y, "Hello, 世界", "F - test-draw-multiple-code-point-utf8s-fg" + check-background-color-in-screen-row screen, 2/bg, 0/y, "xxxxxxxxx ", "F - test-draw-multiple-code-point-utf8s-bg2" } diff --git a/513grapheme-stack.mu b/513grapheme-stack.mu index 3228cfb3..3aa0fd4e 100644 --- a/513grapheme-stack.mu +++ b/513grapheme-stack.mu @@ -1,26 +1,26 @@ -# grapheme stacks are the smallest unit of editable text +# code-point-utf8 stacks are the smallest unit of editable text -type grapheme-stack { - data: (handle array grapheme) +type code-point-utf8-stack { + data: (handle array code-point-utf8) top: int } -fn initialize-grapheme-stack _self: (addr grapheme-stack), n: int { - var self/esi: (addr grapheme-stack) <- copy _self - var d/edi: (addr handle array grapheme) <- get self, data +fn initialize-code-point-utf8-stack _self: (addr code-point-utf8-stack), n: int { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var d/edi: (addr handle array code-point-utf8) <- get self, data populate d, n var top/eax: (addr int) <- get self, top copy-to *top, 0 } -fn clear-grapheme-stack _self: (addr grapheme-stack) { - var self/esi: (addr grapheme-stack) <- copy _self +fn clear-code-point-utf8-stack _self: (addr code-point-utf8-stack) { + var self/esi: (addr code-point-utf8-stack) <- copy _self var top/eax: (addr int) <- get self, top copy-to *top, 0 } -fn grapheme-stack-empty? _self: (addr grapheme-stack) -> _/eax: boolean { - var self/esi: (addr grapheme-stack) <- copy _self +fn code-point-utf8-stack-empty? _self: (addr code-point-utf8-stack) -> _/eax: boolean { + var self/esi: (addr code-point-utf8-stack) <- copy _self var top/eax: (addr int) <- get self, top compare *top, 0 { @@ -30,26 +30,26 @@ fn grapheme-stack-empty? _self: (addr grapheme-stack) -> _/eax: boolean { return 0/false } -fn grapheme-stack-length _self: (addr grapheme-stack) -> _/eax: int { - var self/esi: (addr grapheme-stack) <- copy _self +fn code-point-utf8-stack-length _self: (addr code-point-utf8-stack) -> _/eax: int { + var self/esi: (addr code-point-utf8-stack) <- copy _self var top/eax: (addr int) <- get self, top return *top } -fn push-grapheme-stack _self: (addr grapheme-stack), _val: grapheme { - var self/esi: (addr grapheme-stack) <- copy _self +fn push-code-point-utf8-stack _self: (addr code-point-utf8-stack), _val: code-point-utf8 { + var self/esi: (addr code-point-utf8-stack) <- copy _self var top-addr/ecx: (addr int) <- get self, top - var data-ah/edx: (addr handle array grapheme) <- get self, data - var data/eax: (addr array grapheme) <- lookup *data-ah + var data-ah/edx: (addr handle array code-point-utf8) <- get self, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah var top/edx: int <- copy *top-addr - var dest-addr/edx: (addr grapheme) <- index data, top - var val/eax: grapheme <- copy _val + var dest-addr/edx: (addr code-point-utf8) <- index data, top + var val/eax: code-point-utf8 <- copy _val copy-to *dest-addr, val add-to *top-addr, 1 } -fn pop-grapheme-stack _self: (addr grapheme-stack) -> _/eax: grapheme { - var self/esi: (addr grapheme-stack) <- copy _self +fn pop-code-point-utf8-stack _self: (addr code-point-utf8-stack) -> _/eax: code-point-utf8 { + var self/esi: (addr code-point-utf8-stack) <- copy _self var top-addr/ecx: (addr int) <- get self, top { compare *top-addr, 0 @@ -57,25 +57,25 @@ fn pop-grapheme-stack _self: (addr grapheme-stack) -> _/eax: grapheme { return -1 } subtract-from *top-addr, 1 - var data-ah/edx: (addr handle array grapheme) <- get self, data - var data/eax: (addr array grapheme) <- lookup *data-ah + var data-ah/edx: (addr handle array code-point-utf8) <- get self, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah var top/edx: int <- copy *top-addr - var result-addr/eax: (addr grapheme) <- index data, top + var result-addr/eax: (addr code-point-utf8) <- index data, top return *result-addr } -fn copy-grapheme-stack _src: (addr grapheme-stack), dest: (addr grapheme-stack) { - var src/esi: (addr grapheme-stack) <- copy _src - var data-ah/edi: (addr handle array grapheme) <- get src, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn copy-code-point-utf8-stack _src: (addr code-point-utf8-stack), dest: (addr code-point-utf8-stack) { + var src/esi: (addr code-point-utf8-stack) <- copy _src + var data-ah/edi: (addr handle array code-point-utf8) <- get src, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get src, top var i/eax: int <- copy 0 { compare i, *top-addr break-if->= - var g/edx: (addr grapheme) <- index data, i - push-grapheme-stack dest, *g + var g/edx: (addr code-point-utf8) <- index data, i + push-code-point-utf8-stack dest, *g i <- increment loop } @@ -84,12 +84,12 @@ fn copy-grapheme-stack _src: (addr grapheme-stack), dest: (addr grapheme-stack) # dump stack to screen from bottom to top # hardcoded colors: # matching paren -fn render-stack-from-bottom-wrapping-right-then-down screen: (addr screen), _self: (addr grapheme-stack), xmin: int, ymin: int, xmax: int, ymax: int, _x: int, _y: int, highlight-matching-open-paren?: boolean, open-paren-depth: int, color: int, background-color: int -> _/eax: int, _/ecx: int { - var self/esi: (addr grapheme-stack) <- copy _self +fn render-stack-from-bottom-wrapping-right-then-down screen: (addr screen), _self: (addr code-point-utf8-stack), xmin: int, ymin: int, xmax: int, ymax: int, _x: int, _y: int, highlight-matching-open-paren?: boolean, open-paren-depth: int, color: int, background-color: int -> _/eax: int, _/ecx: int { + var self/esi: (addr code-point-utf8-stack) <- copy _self var matching-open-paren-index/edx: int <- get-matching-open-paren-index self, highlight-matching-open-paren?, open-paren-depth - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var x/eax: int <- copy _x var y/ecx: int <- copy _y var top-addr/esi: (addr int) <- get self, top @@ -100,7 +100,7 @@ fn render-stack-from-bottom-wrapping-right-then-down screen: (addr screen), _sel { var c: code-point { - var g/eax: (addr grapheme) <- index data, i + var g/eax: (addr code-point-utf8) <- index data, i var tmp/eax: code-point <- to-code-point *g copy-to c, tmp } @@ -123,7 +123,7 @@ fn render-stack-from-bottom-wrapping-right-then-down screen: (addr screen), _sel } # helper for small words -fn render-stack-from-bottom screen: (addr screen), self: (addr grapheme-stack), x: int, y: int, highlight-matching-open-paren?: boolean, open-paren-depth: int -> _/eax: int { +fn render-stack-from-bottom screen: (addr screen), self: (addr code-point-utf8-stack), x: int, y: int, highlight-matching-open-paren?: boolean, open-paren-depth: int -> _/eax: int { var _width/eax: int <- copy 0 var _height/ecx: int <- copy 0 _width, _height <- screen-size screen @@ -136,16 +136,16 @@ fn render-stack-from-bottom screen: (addr screen), self: (addr grapheme-stack), } # dump stack to screen from top to bottom -# optionally render a 'cursor' with the top grapheme +# optionally render a 'cursor' with the top code-point-utf8 # hard-coded colors: # matching paren # cursor -fn render-stack-from-top-wrapping-right-then-down screen: (addr screen), _self: (addr grapheme-stack), xmin: int, ymin: int, xmax: int, ymax: int, _x: int, _y: int, render-cursor?: boolean, color: int, background-color: int -> _/eax: int, _/ecx: int { - var self/esi: (addr grapheme-stack) <- copy _self +fn render-stack-from-top-wrapping-right-then-down screen: (addr screen), _self: (addr code-point-utf8-stack), xmin: int, ymin: int, xmax: int, ymax: int, _x: int, _y: int, render-cursor?: boolean, color: int, background-color: int -> _/eax: int, _/ecx: int { + var self/esi: (addr code-point-utf8-stack) <- copy _self var matching-close-paren-index/edx: int <- get-matching-close-paren-index self, render-cursor? - var data-ah/eax: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data + var data-ah/eax: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var x/eax: int <- copy _x var y/ecx: int <- copy _y var top-addr/ebx: (addr int) <- get self, top @@ -159,7 +159,7 @@ fn render-stack-from-top-wrapping-right-then-down screen: (addr screen), _self: break-if-< var c: code-point { - var g/eax: (addr grapheme) <- index data, i + var g/eax: (addr code-point-utf8) <- index data, i var tmp/eax: code-point <- to-code-point *g copy-to c, tmp } @@ -184,7 +184,7 @@ fn render-stack-from-top-wrapping-right-then-down screen: (addr screen), _self: # var c: code-point { - var g/eax: (addr grapheme) <- index data, i + var g/eax: (addr code-point-utf8) <- index data, i var tmp/eax: code-point <- to-code-point *g copy-to c, tmp } @@ -196,7 +196,7 @@ fn render-stack-from-top-wrapping-right-then-down screen: (addr screen), _self: } # helper for small words -fn render-stack-from-top screen: (addr screen), self: (addr grapheme-stack), x: int, y: int, render-cursor?: boolean -> _/eax: int { +fn render-stack-from-top screen: (addr screen), self: (addr code-point-utf8-stack), x: int, y: int, render-cursor?: boolean -> _/eax: int { var _width/eax: int <- copy 0 var _height/ecx: int <- copy 0 _width, _height <- screen-size screen @@ -208,190 +208,190 @@ fn render-stack-from-top screen: (addr screen), self: (addr grapheme-stack), x: return x2 # y2? yolo } -fn test-render-grapheme-stack { +fn test-render-code-point-utf8-stack { # setup: gs = "abc" - var gs-storage: grapheme-stack - var gs/edi: (addr grapheme-stack) <- address gs-storage - initialize-grapheme-stack gs, 5 - var g/eax: grapheme <- copy 0x61/a - push-grapheme-stack gs, g + var gs-storage: code-point-utf8-stack + var gs/edi: (addr code-point-utf8-stack) <- address gs-storage + initialize-code-point-utf8-stack gs, 5 + var g/eax: code-point-utf8 <- copy 0x61/a + push-code-point-utf8-stack gs, g g <- copy 0x62/b - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g g <- copy 0x63/c - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g # setup: screen var screen-storage: screen var screen/esi: (addr screen) <- address screen-storage initialize-screen screen, 5, 4, 0/no-pixel-graphics # var x/eax: int <- render-stack-from-bottom screen, gs, 0/x, 0/y, 0/no-highlight-matching-open-paren, 0/open-paren-depth - check-screen-row screen, 0/y, "abc ", "F - test-render-grapheme-stack from bottom" - check-ints-equal x, 3, "F - test-render-grapheme-stack from bottom: result" - check-background-color-in-screen-row screen, 3/bg=reverse, 0/y, " ", "F - test-render-grapheme-stack from bottom: bg" + check-screen-row screen, 0/y, "abc ", "F - test-render-code-point-utf8-stack from bottom" + check-ints-equal x, 3, "F - test-render-code-point-utf8-stack from bottom: result" + check-background-color-in-screen-row screen, 3/bg=reverse, 0/y, " ", "F - test-render-code-point-utf8-stack from bottom: bg" # var x/eax: int <- render-stack-from-top screen, gs, 0/x, 1/y, 0/cursor=false - check-screen-row screen, 1/y, "cba ", "F - test-render-grapheme-stack from top without cursor" - check-ints-equal x, 3, "F - test-render-grapheme-stack from top without cursor: result" - check-background-color-in-screen-row screen, 3/bg=reverse, 1/y, " ", "F - test-render-grapheme-stack from top without cursor: bg" + check-screen-row screen, 1/y, "cba ", "F - test-render-code-point-utf8-stack from top without cursor" + check-ints-equal x, 3, "F - test-render-code-point-utf8-stack from top without cursor: result" + check-background-color-in-screen-row screen, 3/bg=reverse, 1/y, " ", "F - test-render-code-point-utf8-stack from top without cursor: bg" # var x/eax: int <- render-stack-from-top screen, gs, 0/x, 2/y, 1/cursor=true - check-screen-row screen, 2/y, "cba ", "F - test-render-grapheme-stack from top with cursor" - check-ints-equal x, 3, "F - test-render-grapheme-stack from top with cursor: result" - check-background-color-in-screen-row screen, 3/bg=reverse, 2/y, "| ", "F - test-render-grapheme-stack from top with cursor: bg" + check-screen-row screen, 2/y, "cba ", "F - test-render-code-point-utf8-stack from top with cursor" + check-ints-equal x, 3, "F - test-render-code-point-utf8-stack from top with cursor: result" + check-background-color-in-screen-row screen, 3/bg=reverse, 2/y, "| ", "F - test-render-code-point-utf8-stack from top with cursor: bg" } -fn test-render-grapheme-stack-while-highlighting-matching-close-paren { +fn test-render-code-point-utf8-stack-while-highlighting-matching-close-paren { # setup: gs = "(b)" - var gs-storage: grapheme-stack - var gs/edi: (addr grapheme-stack) <- address gs-storage - initialize-grapheme-stack gs, 5 - var g/eax: grapheme <- copy 0x29/close-paren - push-grapheme-stack gs, g + var gs-storage: code-point-utf8-stack + var gs/edi: (addr code-point-utf8-stack) <- address gs-storage + initialize-code-point-utf8-stack gs, 5 + var g/eax: code-point-utf8 <- copy 0x29/close-paren + push-code-point-utf8-stack gs, g g <- copy 0x62/b - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g g <- copy 0x28/open-paren - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g # setup: screen var screen-storage: screen var screen/esi: (addr screen) <- address screen-storage initialize-screen screen, 5, 4, 0/no-pixel-graphics # var x/eax: int <- render-stack-from-top screen, gs, 0/x, 2/y, 1/cursor=true - check-screen-row screen, 2/y, "(b) ", "F - test-render-grapheme-stack-while-highlighting-matching-close-paren" - check-background-color-in-screen-row screen, 3/bg=reverse, 2/y, "| ", "F - test-render-grapheme-stack-while-highlighting-matching-close-paren: cursor" - check-screen-row-in-color screen, 0xf/fg=white, 2/y, " ) ", "F - test-render-grapheme-stack-while-highlighting-matching-close-paren: matching paren" + check-screen-row screen, 2/y, "(b) ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-close-paren" + check-background-color-in-screen-row screen, 3/bg=reverse, 2/y, "| ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-close-paren: cursor" + check-screen-row-in-color screen, 0xf/fg=white, 2/y, " ) ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-close-paren: matching paren" } -fn test-render-grapheme-stack-while-highlighting-matching-close-paren-2 { +fn test-render-code-point-utf8-stack-while-highlighting-matching-close-paren-2 { # setup: gs = "(a (b)) c" - var gs-storage: grapheme-stack - var gs/edi: (addr grapheme-stack) <- address gs-storage - initialize-grapheme-stack gs, 0x10 - var g/eax: grapheme <- copy 0x63/c - push-grapheme-stack gs, g + var gs-storage: code-point-utf8-stack + var gs/edi: (addr code-point-utf8-stack) <- address gs-storage + initialize-code-point-utf8-stack gs, 0x10 + var g/eax: code-point-utf8 <- copy 0x63/c + push-code-point-utf8-stack gs, g g <- copy 0x20/space - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g g <- copy 0x29/close-paren - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g g <- copy 0x29/close-paren - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g g <- copy 0x62/b - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g g <- copy 0x28/open-paren - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g g <- copy 0x20/space - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g g <- copy 0x61/a - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g g <- copy 0x28/open-paren - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g # setup: screen var screen-storage: screen var screen/esi: (addr screen) <- address screen-storage initialize-screen screen, 5, 4, 0/no-pixel-graphics # var x/eax: int <- render-stack-from-top screen, gs, 0/x, 2/y, 1/cursor=true - check-screen-row screen, 2/y, "(a (b)) c ", "F - test-render-grapheme-stack-while-highlighting-matching-close-paren-2" - check-background-color-in-screen-row screen, 3/bg=reverse, 2/y, "| ", "F - test-render-grapheme-stack-while-highlighting-matching-close-paren-2: cursor" - check-screen-row-in-color screen, 0xf/fg=white, 2/y, " ) ", "F - test-render-grapheme-stack-while-highlighting-matching-close-paren-2: matching paren" + check-screen-row screen, 2/y, "(a (b)) c ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-close-paren-2" + check-background-color-in-screen-row screen, 3/bg=reverse, 2/y, "| ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-close-paren-2: cursor" + check-screen-row-in-color screen, 0xf/fg=white, 2/y, " ) ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-close-paren-2: matching paren" } -fn test-render-grapheme-stack-while-highlighting-matching-open-paren-with-close-paren-at-end { +fn test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-with-close-paren-at-end { # setup: gs = "(b)" - var gs-storage: grapheme-stack - var gs/edi: (addr grapheme-stack) <- address gs-storage - initialize-grapheme-stack gs, 5 - var g/eax: grapheme <- copy 0x28/open-paren - push-grapheme-stack gs, g + var gs-storage: code-point-utf8-stack + var gs/edi: (addr code-point-utf8-stack) <- address gs-storage + initialize-code-point-utf8-stack gs, 5 + var g/eax: code-point-utf8 <- copy 0x28/open-paren + push-code-point-utf8-stack gs, g g <- copy 0x62/b - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g g <- copy 0x29/close-paren - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g # setup: screen var screen-storage: screen var screen/esi: (addr screen) <- address screen-storage initialize-screen screen, 5, 4, 0/no-pixel-graphics # var x/eax: int <- render-stack-from-bottom screen, gs, 0/x, 2/y, 1/highlight-matching-open-paren, 1/open-paren-depth - check-screen-row screen, 2/y, "(b) ", "F - test-render-grapheme-stack-while-highlighting-matching-open-paren-with-close-paren-at-end" - check-screen-row-in-color screen, 0xf/fg=white, 2/y, "( ", "F - test-render-grapheme-stack-while-highlighting-matching-open-paren-with-close-paren-at-end: matching paren" + check-screen-row screen, 2/y, "(b) ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-with-close-paren-at-end" + check-screen-row-in-color screen, 0xf/fg=white, 2/y, "( ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-with-close-paren-at-end: matching paren" } -fn test-render-grapheme-stack-while-highlighting-matching-open-paren-with-close-paren-at-end-2 { +fn test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-with-close-paren-at-end-2 { # setup: gs = "a((b))" - var gs-storage: grapheme-stack - var gs/edi: (addr grapheme-stack) <- address gs-storage - initialize-grapheme-stack gs, 0x10 - var g/eax: grapheme <- copy 0x61/a - push-grapheme-stack gs, g + var gs-storage: code-point-utf8-stack + var gs/edi: (addr code-point-utf8-stack) <- address gs-storage + initialize-code-point-utf8-stack gs, 0x10 + var g/eax: code-point-utf8 <- copy 0x61/a + push-code-point-utf8-stack gs, g g <- copy 0x28/open-paren - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g g <- copy 0x28/open-paren - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g g <- copy 0x62/b - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g g <- copy 0x29/close-paren - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g g <- copy 0x29/close-paren - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g # setup: screen var screen-storage: screen var screen/esi: (addr screen) <- address screen-storage initialize-screen screen, 5, 4, 0/no-pixel-graphics # var x/eax: int <- render-stack-from-bottom screen, gs, 0/x, 2/y, 1/highlight-matching-open-paren, 1/open-paren-depth - check-screen-row screen, 2/y, "a((b)) ", "F - test-render-grapheme-stack-while-highlighting-matching-open-paren-with-close-paren-at-end-2" - check-screen-row-in-color screen, 0xf/fg=white, 2/y, " ( ", "F - test-render-grapheme-stack-while-highlighting-matching-open-paren-with-close-paren-at-end-2: matching paren" + check-screen-row screen, 2/y, "a((b)) ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-with-close-paren-at-end-2" + check-screen-row-in-color screen, 0xf/fg=white, 2/y, " ( ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-with-close-paren-at-end-2: matching paren" } -fn test-render-grapheme-stack-while-highlighting-matching-open-paren { +fn test-render-code-point-utf8-stack-while-highlighting-matching-open-paren { # setup: gs = "(b" - var gs-storage: grapheme-stack - var gs/edi: (addr grapheme-stack) <- address gs-storage - initialize-grapheme-stack gs, 5 - var g/eax: grapheme <- copy 0x28/open-paren - push-grapheme-stack gs, g + var gs-storage: code-point-utf8-stack + var gs/edi: (addr code-point-utf8-stack) <- address gs-storage + initialize-code-point-utf8-stack gs, 5 + var g/eax: code-point-utf8 <- copy 0x28/open-paren + push-code-point-utf8-stack gs, g g <- copy 0x62/b - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g # setup: screen var screen-storage: screen var screen/esi: (addr screen) <- address screen-storage initialize-screen screen, 5, 4, 0/no-pixel-graphics # var x/eax: int <- render-stack-from-bottom screen, gs, 0/x, 2/y, 1/highlight-matching-open-paren, 0/open-paren-depth - check-screen-row screen, 2/y, "(b ", "F - test-render-grapheme-stack-while-highlighting-matching-open-paren" - check-screen-row-in-color screen, 0xf/fg=white, 2/y, "( ", "F - test-render-grapheme-stack-while-highlighting-matching-open-paren: matching paren" + check-screen-row screen, 2/y, "(b ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-open-paren" + check-screen-row-in-color screen, 0xf/fg=white, 2/y, "( ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-open-paren: matching paren" } -fn test-render-grapheme-stack-while-highlighting-matching-open-paren-2 { +fn test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-2 { # setup: gs = "a((b)" - var gs-storage: grapheme-stack - var gs/edi: (addr grapheme-stack) <- address gs-storage - initialize-grapheme-stack gs, 0x10 - var g/eax: grapheme <- copy 0x61/a - push-grapheme-stack gs, g + var gs-storage: code-point-utf8-stack + var gs/edi: (addr code-point-utf8-stack) <- address gs-storage + initialize-code-point-utf8-stack gs, 0x10 + var g/eax: code-point-utf8 <- copy 0x61/a + push-code-point-utf8-stack gs, g g <- copy 0x28/open-paren - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g g <- copy 0x28/open-paren - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g g <- copy 0x62/b - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g g <- copy 0x29/close-paren - push-grapheme-stack gs, g + push-code-point-utf8-stack gs, g # setup: screen var screen-storage: screen var screen/esi: (addr screen) <- address screen-storage initialize-screen screen, 5, 4, 0/no-pixel-graphics # var x/eax: int <- render-stack-from-bottom screen, gs, 0/x, 2/y, 1/highlight-matching-open-paren, 0/open-paren-depth - check-screen-row screen, 2/y, "a((b) ", "F - test-render-grapheme-stack-while-highlighting-matching-open-paren-2" - check-screen-row-in-color screen, 0xf/fg=white, 2/y, " ( ", "F - test-render-grapheme-stack-while-highlighting-matching-open-paren-2: matching paren" + check-screen-row screen, 2/y, "a((b) ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-2" + check-screen-row-in-color screen, 0xf/fg=white, 2/y, " ( ", "F - test-render-code-point-utf8-stack-while-highlighting-matching-open-paren-2: matching paren" } -# return the index of the matching close-paren of the grapheme at cursor (top of stack) +# return the index of the matching close-paren of the code-point-utf8 at cursor (top of stack) # or top index if there's no matching close-paren -fn get-matching-close-paren-index _self: (addr grapheme-stack), render-cursor?: boolean -> _/edx: int { - var self/esi: (addr grapheme-stack) <- copy _self +fn get-matching-close-paren-index _self: (addr code-point-utf8-stack), render-cursor?: boolean -> _/edx: int { + var self/esi: (addr code-point-utf8-stack) <- copy _self var top-addr/edx: (addr int) <- get self, top # if not rendering cursor, return compare render-cursor?, 0/false @@ -399,8 +399,8 @@ fn get-matching-close-paren-index _self: (addr grapheme-stack), render-cursor?: break-if-!= return *top-addr } - var data-ah/eax: (addr handle array grapheme) <- get self, data - var data/eax: (addr array grapheme) <- lookup *data-ah + var data-ah/eax: (addr handle array code-point-utf8) <- get self, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah var i/ecx: int <- copy *top-addr # if stack is empty, return compare i, 0 @@ -410,7 +410,7 @@ fn get-matching-close-paren-index _self: (addr grapheme-stack), render-cursor?: } # if cursor is not '(' return i <- decrement - var g/esi: (addr grapheme) <- index data, i + var g/esi: (addr code-point-utf8) <- index data, i compare *g, 0x28/open-paren { break-if-= @@ -422,7 +422,7 @@ fn get-matching-close-paren-index _self: (addr grapheme-stack), render-cursor?: { compare i, 0 break-if-< - var g/esi: (addr grapheme) <- index data, i + var g/esi: (addr code-point-utf8) <- index data, i compare *g, 0x28/open-paren { break-if-!= @@ -446,8 +446,8 @@ fn get-matching-close-paren-index _self: (addr grapheme-stack), render-cursor?: # return the index of the first open-paren at the given depth # or top index if there's no matching close-paren -fn get-matching-open-paren-index _self: (addr grapheme-stack), control: boolean, depth: int -> _/edx: int { - var self/esi: (addr grapheme-stack) <- copy _self +fn get-matching-open-paren-index _self: (addr code-point-utf8-stack), control: boolean, depth: int -> _/edx: int { + var self/esi: (addr code-point-utf8-stack) <- copy _self var top-addr/edx: (addr int) <- get self, top # if not rendering cursor, return compare control, 0/false @@ -455,8 +455,8 @@ fn get-matching-open-paren-index _self: (addr grapheme-stack), control: boolean, break-if-!= return *top-addr } - var data-ah/eax: (addr handle array grapheme) <- get self, data - var data/eax: (addr array grapheme) <- lookup *data-ah + var data-ah/eax: (addr handle array code-point-utf8) <- get self, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah var i/ecx: int <- copy *top-addr # if stack is empty, return compare i, 0 @@ -470,7 +470,7 @@ fn get-matching-open-paren-index _self: (addr grapheme-stack), control: boolean, { compare i, 0 break-if-< - var g/esi: (addr grapheme) <- index data, i + var g/esi: (addr code-point-utf8) <- index data, i compare *g, 0x29/close-paren { break-if-!= @@ -494,11 +494,11 @@ fn get-matching-open-paren-index _self: (addr grapheme-stack), control: boolean, # compare from bottom # beware: modifies 'stream', which must be disposed of after a false result -fn prefix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: boolean { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn prefix-match? _self: (addr code-point-utf8-stack), s: (addr stream byte) -> _/eax: boolean { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/ebx: int <- copy 0 { @@ -506,8 +506,8 @@ fn prefix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b break-if->= # if curr != expected, return false { - var curr-a/edx: (addr grapheme) <- index data, i - var expected/eax: grapheme <- read-grapheme s + var curr-a/edx: (addr code-point-utf8) <- index data, i + var expected/eax: code-point-utf8 <- read-code-point-utf8 s { compare expected, *curr-a break-if-= @@ -522,11 +522,11 @@ fn prefix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b # compare from bottom # beware: modifies 'stream', which must be disposed of after a false result -fn suffix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: boolean { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn suffix-match? _self: (addr code-point-utf8-stack), s: (addr stream byte) -> _/eax: boolean { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/eax: (addr int) <- get self, top var i/ebx: int <- copy *top-addr i <- decrement @@ -534,8 +534,8 @@ fn suffix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b compare i, 0 break-if-< { - var curr-a/edx: (addr grapheme) <- index data, i - var expected/eax: grapheme <- read-grapheme s + var curr-a/edx: (addr code-point-utf8) <- index data, i + var expected/eax: code-point-utf8 <- read-code-point-utf8 s # if curr != expected, return false { compare expected, *curr-a @@ -549,18 +549,18 @@ fn suffix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b return 1 # true } -fn grapheme-stack-is-decimal-integer? _self: (addr grapheme-stack) -> _/eax: boolean { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/eax: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edx: (addr array grapheme) <- copy _data +fn code-point-utf8-stack-is-decimal-integer? _self: (addr code-point-utf8-stack) -> _/eax: boolean { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/eax: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edx: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/ebx: int <- copy 0 var result/eax: boolean <- copy 1/true - $grapheme-stack-is-integer?:loop: { + $code-point-utf8-stack-is-integer?:loop: { compare i, *top-addr break-if->= - var g/edx: (addr grapheme) <- index data, i + var g/edx: (addr code-point-utf8) <- index data, i result <- decimal-digit? *g compare result, 0/false break-if-= diff --git a/514gap-buffer.mu b/514gap-buffer.mu index a5a7273e..6513983c 100644 --- a/514gap-buffer.mu +++ b/514gap-buffer.mu @@ -1,8 +1,8 @@ # primitive for editing text type gap-buffer { - left: grapheme-stack - right: grapheme-stack + left: code-point-utf8-stack + right: code-point-utf8-stack # some fields for scanning incrementally through a gap-buffer left-read-index: int right-read-index: int @@ -10,41 +10,41 @@ type gap-buffer { fn initialize-gap-buffer _self: (addr gap-buffer), capacity: int { var self/esi: (addr gap-buffer) <- copy _self - var left/eax: (addr grapheme-stack) <- get self, left - initialize-grapheme-stack left, capacity - var right/eax: (addr grapheme-stack) <- get self, right - initialize-grapheme-stack right, capacity + var left/eax: (addr code-point-utf8-stack) <- get self, left + initialize-code-point-utf8-stack left, capacity + var right/eax: (addr code-point-utf8-stack) <- get self, right + initialize-code-point-utf8-stack right, capacity } fn clear-gap-buffer _self: (addr gap-buffer) { var self/esi: (addr gap-buffer) <- copy _self - var left/eax: (addr grapheme-stack) <- get self, left - clear-grapheme-stack left - var right/eax: (addr grapheme-stack) <- get self, right - clear-grapheme-stack right + var left/eax: (addr code-point-utf8-stack) <- get self, left + clear-code-point-utf8-stack left + var right/eax: (addr code-point-utf8-stack) <- get self, right + clear-code-point-utf8-stack right } fn gap-buffer-empty? _self: (addr gap-buffer) -> _/eax: boolean { var self/esi: (addr gap-buffer) <- copy _self # if !empty?(left) return false { - var left/eax: (addr grapheme-stack) <- get self, left - var result/eax: boolean <- grapheme-stack-empty? left + var left/eax: (addr code-point-utf8-stack) <- get self, left + var result/eax: boolean <- code-point-utf8-stack-empty? left compare result, 0/false break-if-!= return 0/false } # return empty?(right) - var left/eax: (addr grapheme-stack) <- get self, left - var result/eax: boolean <- grapheme-stack-empty? left + var left/eax: (addr code-point-utf8-stack) <- get self, left + var result/eax: boolean <- code-point-utf8-stack-empty? left return result } fn gap-buffer-capacity _gap: (addr gap-buffer) -> _/edx: int { var gap/esi: (addr gap-buffer) <- copy _gap - var left/eax: (addr grapheme-stack) <- get gap, left - var left-data-ah/eax: (addr handle array grapheme) <- get left, data - var left-data/eax: (addr array grapheme) <- lookup *left-data-ah + var left/eax: (addr code-point-utf8-stack) <- get gap, left + var left-data-ah/eax: (addr handle array code-point-utf8) <- get left, data + var left-data/eax: (addr array code-point-utf8) <- lookup *left-data-ah var result/eax: int <- length left-data return result } @@ -58,8 +58,8 @@ fn initialize-gap-buffer-with self: (addr gap-buffer), keys: (addr array byte) { var done?/eax: boolean <- stream-empty? input-stream compare done?, 0/false break-if-!= - var g/eax: grapheme <- read-grapheme input-stream - add-grapheme-at-gap self, g + var g/eax: code-point-utf8 <- read-code-point-utf8 input-stream + add-code-point-utf8-at-gap self, g loop } } @@ -73,7 +73,7 @@ fn load-gap-buffer-from-stream self: (addr gap-buffer), in: (addr stream byte) { var key/eax: byte <- read-byte in compare key, 0/null break-if-= - var g/eax: grapheme <- copy key + var g/eax: code-point-utf8 <- copy key edit-gap-buffer self, g loop } @@ -86,44 +86,44 @@ fn emit-gap-buffer self: (addr gap-buffer), out: (addr stream byte) { fn append-gap-buffer _self: (addr gap-buffer), out: (addr stream byte) { var self/esi: (addr gap-buffer) <- copy _self - var left/eax: (addr grapheme-stack) <- get self, left + var left/eax: (addr code-point-utf8-stack) <- get self, left emit-stack-from-bottom left, out - var right/eax: (addr grapheme-stack) <- get self, right + var right/eax: (addr code-point-utf8-stack) <- get self, right emit-stack-from-top right, out } # dump stack from bottom to top -fn emit-stack-from-bottom _self: (addr grapheme-stack), out: (addr stream byte) { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn emit-stack-from-bottom _self: (addr code-point-utf8-stack), out: (addr stream byte) { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/eax: int <- copy 0 { compare i, *top-addr break-if->= - var g/edx: (addr grapheme) <- index data, i - write-grapheme out, *g + var g/edx: (addr code-point-utf8) <- index data, i + write-code-point-utf8 out, *g i <- increment loop } } # dump stack from top to bottom -fn emit-stack-from-top _self: (addr grapheme-stack), out: (addr stream byte) { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn emit-stack-from-top _self: (addr code-point-utf8-stack), out: (addr stream byte) { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/eax: int <- copy *top-addr i <- decrement { compare i, 0 break-if-< - var g/edx: (addr grapheme) <- index data, i - write-grapheme out, *g + var g/edx: (addr code-point-utf8) <- index data, i + write-code-point-utf8 out, *g i <- decrement loop } @@ -133,16 +133,16 @@ fn word-at-gap _self: (addr gap-buffer), out: (addr stream byte) { var self/esi: (addr gap-buffer) <- copy _self clear-stream out { - var g/eax: grapheme <- grapheme-at-gap self - var at-word?/eax: boolean <- is-ascii-word-grapheme? g + var g/eax: code-point-utf8 <- code-point-utf8-at-gap self + var at-word?/eax: boolean <- is-ascii-word-code-point-utf8? g compare at-word?, 0/false break-if-!= return } - var left/ecx: (addr grapheme-stack) <- get self, left + var left/ecx: (addr code-point-utf8-stack) <- get self, left var left-index/eax: int <- top-most-word left emit-stack-from-index left, left-index, out - var right/ecx: (addr grapheme-stack) <- get self, right + var right/ecx: (addr code-point-utf8-stack) <- get self, right var right-index/eax: int <- top-most-word right emit-stack-to-index right, right-index, out } @@ -170,7 +170,7 @@ fn test-word-at-gap-single-word-with-gap-at-start { check-stream-equal out, "abc", "F - test-word-at-gap-single-word-with-gap-at-start" } -fn test-word-at-gap-multiple-words-with-gap-at-non-word-grapheme-at-end { +fn test-word-at-gap-multiple-words-with-gap-at-non-word-code-point-utf8-at-end { var _g: gap-buffer var g/esi: (addr gap-buffer) <- address _g initialize-gap-buffer-with g, "abc " @@ -178,10 +178,10 @@ fn test-word-at-gap-multiple-words-with-gap-at-non-word-grapheme-at-end { var out-storage: (stream byte 0x10) var out/eax: (addr stream byte) <- address out-storage word-at-gap g, out - check-stream-equal out, "", "F - test-word-at-gap-multiple-words-with-gap-at-non-word-grapheme-at-end" + check-stream-equal out, "", "F - test-word-at-gap-multiple-words-with-gap-at-non-word-code-point-utf8-at-end" } -fn test-word-at-gap-multiple-words-with-gap-at-non-word-grapheme-at-start { +fn test-word-at-gap-multiple-words-with-gap-at-non-word-code-point-utf8-at-start { var _g: gap-buffer var g/esi: (addr gap-buffer) <- address _g initialize-gap-buffer-with g, " abc" @@ -190,7 +190,7 @@ fn test-word-at-gap-multiple-words-with-gap-at-non-word-grapheme-at-start { var out-storage: (stream byte 0x10) var out/eax: (addr stream byte) <- address out-storage word-at-gap g, out - check-stream-equal out, "", "F - test-word-at-gap-multiple-words-with-gap-at-non-word-grapheme-at-start" + check-stream-equal out, "", "F - test-word-at-gap-multiple-words-with-gap-at-non-word-code-point-utf8-at-start" } fn test-word-at-gap-multiple-words-with-gap-at-end { @@ -220,7 +220,7 @@ fn test-word-at-gap-multiple-words-with-gap-at-final-word { var _g: gap-buffer var g/esi: (addr gap-buffer) <- address _g initialize-gap-buffer-with g, "a bc d" - var dummy/eax: grapheme <- gap-left g + var dummy/eax: code-point-utf8 <- gap-left g # gap is at final word var out-storage: (stream byte 0x10) var out/eax: (addr stream byte) <- address out-storage @@ -232,7 +232,7 @@ fn test-word-at-gap-multiple-words-with-gap-at-final-non-word { var _g: gap-buffer var g/esi: (addr gap-buffer) <- address _g initialize-gap-buffer-with g, "abc " - var dummy/eax: grapheme <- gap-left g + var dummy/eax: code-point-utf8 <- gap-left g # gap is at final word var out-storage: (stream byte 0x10) var out/eax: (addr stream byte) <- address out-storage @@ -240,51 +240,51 @@ fn test-word-at-gap-multiple-words-with-gap-at-final-non-word { check-stream-equal out, "", "F - test-word-at-gap-multiple-words-with-gap-at-final-non-word" } -fn grapheme-at-gap _self: (addr gap-buffer) -> _/eax: grapheme { +fn code-point-utf8-at-gap _self: (addr gap-buffer) -> _/eax: code-point-utf8 { # send top of right most of the time var self/esi: (addr gap-buffer) <- copy _self - var right/edi: (addr grapheme-stack) <- get self, right - var data-ah/eax: (addr handle array grapheme) <- get right, data - var data/eax: (addr array grapheme) <- lookup *data-ah + var right/edi: (addr code-point-utf8-stack) <- get self, right + var data-ah/eax: (addr handle array code-point-utf8) <- get right, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah var top-addr/ecx: (addr int) <- get right, top { compare *top-addr, 0 break-if-<= var top/ecx: int <- copy *top-addr top <- decrement - var result/eax: (addr grapheme) <- index data, top + var result/eax: (addr code-point-utf8) <- index data, top return *result } # send top of left only if right is empty - var left/edi: (addr grapheme-stack) <- get self, left - var data-ah/eax: (addr handle array grapheme) <- get left, data - var data/eax: (addr array grapheme) <- lookup *data-ah + var left/edi: (addr code-point-utf8-stack) <- get self, left + var data-ah/eax: (addr handle array code-point-utf8) <- get left, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah var top-addr/ecx: (addr int) <- get left, top { compare *top-addr, 0 break-if-<= var top/ecx: int <- copy *top-addr top <- decrement - var result/eax: (addr grapheme) <- index data, top + var result/eax: (addr code-point-utf8) <- index data, top return *result } # send null if everything is empty return 0 } -fn top-most-word _self: (addr grapheme-stack) -> _/eax: int { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn top-most-word _self: (addr code-point-utf8-stack) -> _/eax: int { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/ebx: int <- copy *top-addr i <- decrement { compare i, 0 break-if-< - var g/edx: (addr grapheme) <- index data, i - var is-word?/eax: boolean <- is-ascii-word-grapheme? *g + var g/edx: (addr code-point-utf8) <- index data, i + var is-word?/eax: boolean <- is-ascii-word-code-point-utf8? *g compare is-word?, 0/false break-if-= i <- decrement @@ -294,28 +294,28 @@ fn top-most-word _self: (addr grapheme-stack) -> _/eax: int { return i } -fn emit-stack-from-index _self: (addr grapheme-stack), start: int, out: (addr stream byte) { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn emit-stack-from-index _self: (addr code-point-utf8-stack), start: int, out: (addr stream byte) { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/eax: int <- copy start { compare i, *top-addr break-if->= - var g/edx: (addr grapheme) <- index data, i - write-grapheme out, *g + var g/edx: (addr code-point-utf8) <- index data, i + write-code-point-utf8 out, *g i <- increment loop } } -fn emit-stack-to-index _self: (addr grapheme-stack), end: int, out: (addr stream byte) { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn emit-stack-to-index _self: (addr code-point-utf8-stack), end: int, out: (addr stream byte) { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/eax: int <- copy *top-addr i <- decrement @@ -324,14 +324,14 @@ fn emit-stack-to-index _self: (addr grapheme-stack), end: int, out: (addr stream break-if-< compare i, end break-if-< - var g/edx: (addr grapheme) <- index data, i - write-grapheme out, *g + var g/edx: (addr code-point-utf8) <- index data, i + write-code-point-utf8 out, *g i <- decrement loop } } -fn is-ascii-word-grapheme? g: grapheme -> _/eax: boolean { +fn is-ascii-word-code-point-utf8? g: code-point-utf8 -> _/eax: boolean { compare g, 0x21/! { break-if-!= @@ -384,14 +384,14 @@ fn is-ascii-word-grapheme? g: grapheme -> _/eax: boolean { # cursor is a single other color. fn render-gap-buffer-wrapping-right-then-down screen: (addr screen), _gap: (addr gap-buffer), xmin: int, ymin: int, xmax: int, ymax: int, render-cursor?: boolean, color: int, background-color: int -> _/eax: int, _/ecx: int { var gap/esi: (addr gap-buffer) <- copy _gap - var left/edx: (addr grapheme-stack) <- get gap, left + var left/edx: (addr code-point-utf8-stack) <- get gap, left var highlight-matching-open-paren?/ebx: boolean <- copy 0/false var matching-open-paren-depth/edi: int <- copy 0 highlight-matching-open-paren?, matching-open-paren-depth <- highlight-matching-open-paren? gap, render-cursor? var x2/eax: int <- copy 0 var y2/ecx: int <- copy 0 x2, y2 <- render-stack-from-bottom-wrapping-right-then-down screen, left, xmin, ymin, xmax, ymax, xmin, ymin, highlight-matching-open-paren?, matching-open-paren-depth, color, background-color - var right/edx: (addr grapheme-stack) <- get gap, right + var right/edx: (addr code-point-utf8-stack) <- get gap, right x2, y2 <- render-stack-from-top-wrapping-right-then-down screen, right, xmin, ymin, xmax, ymax, x2, y2, render-cursor?, color, background-color # decide whether we still need to print a cursor var fg/edi: int <- copy color @@ -399,15 +399,15 @@ fn render-gap-buffer-wrapping-right-then-down screen: (addr screen), _gap: (addr compare render-cursor?, 0/false { break-if-= - # if the right side is empty, grapheme stack didn't print the cursor - var empty?/eax: boolean <- grapheme-stack-empty? right + # if the right side is empty, code-point-utf8 stack didn't print the cursor + var empty?/eax: boolean <- code-point-utf8-stack-empty? right compare empty?, 0/false break-if-= # swap foreground and background fg <- copy background-color bg <- copy color } - # print a grapheme either way so that cursor position doesn't affect printed width + # print a code-point-utf8 either way so that cursor position doesn't affect printed width var space/edx: code-point <- copy 0x20 x2, y2 <- render-code-point screen, space, xmin, ymin, xmax, ymax, x2, y2, fg, bg return x2, y2 @@ -427,30 +427,30 @@ fn render-gap-buffer screen: (addr screen), gap: (addr gap-buffer), x: int, y: i fn gap-buffer-length _gap: (addr gap-buffer) -> _/eax: int { var gap/esi: (addr gap-buffer) <- copy _gap - var left/eax: (addr grapheme-stack) <- get gap, left + var left/eax: (addr code-point-utf8-stack) <- get gap, left var tmp/eax: (addr int) <- get left, top var left-length/ecx: int <- copy *tmp - var right/esi: (addr grapheme-stack) <- get gap, right + var right/esi: (addr code-point-utf8-stack) <- get gap, right tmp <- get right, top var result/eax: int <- copy *tmp result <- add left-length return result } -fn add-grapheme-at-gap _self: (addr gap-buffer), g: grapheme { +fn add-code-point-utf8-at-gap _self: (addr gap-buffer), g: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self - var left/eax: (addr grapheme-stack) <- get self, left - push-grapheme-stack left, g + var left/eax: (addr code-point-utf8-stack) <- get self, left + push-code-point-utf8-stack left, g } fn add-code-point-at-gap self: (addr gap-buffer), c: code-point { - var g/eax: grapheme <- copy c - add-grapheme-at-gap self, g + var g/eax: code-point-utf8 <- copy c + add-code-point-utf8-at-gap self, g } fn gap-to-start self: (addr gap-buffer) { { - var curr/eax: grapheme <- gap-left self + var curr/eax: code-point-utf8 <- gap-left self compare curr, -1 loop-if-!= } @@ -458,7 +458,7 @@ fn gap-to-start self: (addr gap-buffer) { fn gap-to-end self: (addr gap-buffer) { { - var curr/eax: grapheme <- gap-right self + var curr/eax: code-point-utf8 <- gap-right self compare curr, -1 loop-if-!= } @@ -466,96 +466,96 @@ fn gap-to-end self: (addr gap-buffer) { fn gap-at-start? _self: (addr gap-buffer) -> _/eax: boolean { var self/esi: (addr gap-buffer) <- copy _self - var left/eax: (addr grapheme-stack) <- get self, left - var result/eax: boolean <- grapheme-stack-empty? left + var left/eax: (addr code-point-utf8-stack) <- get self, left + var result/eax: boolean <- code-point-utf8-stack-empty? left return result } fn gap-at-end? _self: (addr gap-buffer) -> _/eax: boolean { var self/esi: (addr gap-buffer) <- copy _self - var right/eax: (addr grapheme-stack) <- get self, right - var result/eax: boolean <- grapheme-stack-empty? right + var right/eax: (addr code-point-utf8-stack) <- get self, right + var result/eax: boolean <- code-point-utf8-stack-empty? right return result } -fn gap-right _self: (addr gap-buffer) -> _/eax: grapheme { +fn gap-right _self: (addr gap-buffer) -> _/eax: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self - var g/eax: grapheme <- copy 0 - var right/ecx: (addr grapheme-stack) <- get self, right - g <- pop-grapheme-stack right + var g/eax: code-point-utf8 <- copy 0 + var right/ecx: (addr code-point-utf8-stack) <- get self, right + g <- pop-code-point-utf8-stack right compare g, -1 { break-if-= - var left/ecx: (addr grapheme-stack) <- get self, left - push-grapheme-stack left, g + var left/ecx: (addr code-point-utf8-stack) <- get self, left + push-code-point-utf8-stack left, g } return g } -fn gap-left _self: (addr gap-buffer) -> _/eax: grapheme { +fn gap-left _self: (addr gap-buffer) -> _/eax: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self - var g/eax: grapheme <- copy 0 + var g/eax: code-point-utf8 <- copy 0 { - var left/ecx: (addr grapheme-stack) <- get self, left - g <- pop-grapheme-stack left + var left/ecx: (addr code-point-utf8-stack) <- get self, left + g <- pop-code-point-utf8-stack left } compare g, -1 { break-if-= - var right/ecx: (addr grapheme-stack) <- get self, right - push-grapheme-stack right, g + var right/ecx: (addr code-point-utf8-stack) <- get self, right + push-code-point-utf8-stack right, g } return g } fn index-of-gap _self: (addr gap-buffer) -> _/eax: int { var self/eax: (addr gap-buffer) <- copy _self - var left/eax: (addr grapheme-stack) <- get self, left + var left/eax: (addr code-point-utf8-stack) <- get self, left var top-addr/eax: (addr int) <- get left, top var result/eax: int <- copy *top-addr return result } -fn first-grapheme-in-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme { +fn first-code-point-utf8-in-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self # try to read from left - var left/eax: (addr grapheme-stack) <- get self, left + var left/eax: (addr code-point-utf8-stack) <- get self, left var top-addr/ecx: (addr int) <- get left, top compare *top-addr, 0 { break-if-<= - var data-ah/eax: (addr handle array grapheme) <- get left, data - var data/eax: (addr array grapheme) <- lookup *data-ah - var result-addr/eax: (addr grapheme) <- index data, 0 + var data-ah/eax: (addr handle array code-point-utf8) <- get left, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah + var result-addr/eax: (addr code-point-utf8) <- index data, 0 return *result-addr } # try to read from right - var right/eax: (addr grapheme-stack) <- get self, right + var right/eax: (addr code-point-utf8-stack) <- get self, right top-addr <- get right, top compare *top-addr, 0 { break-if-<= - var data-ah/eax: (addr handle array grapheme) <- get right, data - var data/eax: (addr array grapheme) <- lookup *data-ah + var data-ah/eax: (addr handle array code-point-utf8) <- get right, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah var top/ecx: int <- copy *top-addr top <- decrement - var result-addr/eax: (addr grapheme) <- index data, top + var result-addr/eax: (addr code-point-utf8) <- index data, top return *result-addr } # give up return -1 } -fn grapheme-before-cursor-in-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme { +fn code-point-utf8-before-cursor-in-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self # try to read from left - var left/ecx: (addr grapheme-stack) <- get self, left + var left/ecx: (addr code-point-utf8-stack) <- get self, left var top-addr/edx: (addr int) <- get left, top compare *top-addr, 0 { break-if-<= - var result/eax: grapheme <- pop-grapheme-stack left - push-grapheme-stack left, result + var result/eax: code-point-utf8 <- pop-code-point-utf8-stack left + push-code-point-utf8-stack left, result return result } # give up @@ -564,27 +564,27 @@ fn grapheme-before-cursor-in-gap-buffer _self: (addr gap-buffer) -> _/eax: graph fn delete-before-gap _self: (addr gap-buffer) { var self/eax: (addr gap-buffer) <- copy _self - var left/eax: (addr grapheme-stack) <- get self, left - var dummy/eax: grapheme <- pop-grapheme-stack left + var left/eax: (addr code-point-utf8-stack) <- get self, left + var dummy/eax: code-point-utf8 <- pop-code-point-utf8-stack left } -fn pop-after-gap _self: (addr gap-buffer) -> _/eax: grapheme { +fn pop-after-gap _self: (addr gap-buffer) -> _/eax: code-point-utf8 { var self/eax: (addr gap-buffer) <- copy _self - var right/eax: (addr grapheme-stack) <- get self, right - var result/eax: grapheme <- pop-grapheme-stack right + var right/eax: (addr code-point-utf8-stack) <- get self, right + var result/eax: code-point-utf8 <- pop-code-point-utf8-stack right return result } fn gap-buffer-equal? _self: (addr gap-buffer), s: (addr array byte) -> _/eax: boolean { var self/esi: (addr gap-buffer) <- copy _self - # complication: graphemes may be multiple bytes + # complication: code-point-utf8s may be multiple bytes # so don't rely on length # instead turn the expected result into a stream and arrange to read from it in order var stream-storage: (stream byte 0x10/capacity) var expected-stream/ecx: (addr stream byte) <- address stream-storage write expected-stream, s # compare left - var left/edx: (addr grapheme-stack) <- get self, left + var left/edx: (addr code-point-utf8-stack) <- get self, left var result/eax: boolean <- prefix-match? left, expected-stream compare result, 0/false { @@ -592,7 +592,7 @@ fn gap-buffer-equal? _self: (addr gap-buffer), s: (addr array byte) -> _/eax: bo return result } # compare right - var right/edx: (addr grapheme-stack) <- get self, right + var right/edx: (addr code-point-utf8-stack) <- get self, right result <- suffix-match? right, expected-stream compare result, 0/false { @@ -625,7 +625,7 @@ fn test-gap-buffer-equal-from-middle { add-code-point-at-gap g, 0x61/a add-code-point-at-gap g, 0x61/a add-code-point-at-gap g, 0x61/a - var dummy/eax: grapheme <- gap-left g + var dummy/eax: code-point-utf8 <- gap-left g # gap is in the middle var result/eax: boolean <- gap-buffer-equal? g, "aaa" check result, "F - test-gap-buffer-equal-from-middle" @@ -639,7 +639,7 @@ fn test-gap-buffer-equal-from-start { add-code-point-at-gap g, 0x61/a add-code-point-at-gap g, 0x61/a add-code-point-at-gap g, 0x61/a - var dummy/eax: grapheme <- gap-left g + var dummy/eax: code-point-utf8 <- gap-left g dummy <- gap-left g dummy <- gap-left g # gap is at the start @@ -674,9 +674,9 @@ fn gap-buffers-equal? self: (addr gap-buffer), g: (addr gap-buffer) -> _/eax: bo compare i, len break-if->= { - var tmp/eax: grapheme <- gap-index self, i - var curr/ecx: grapheme <- copy tmp - var currg/eax: grapheme <- gap-index g, i + var tmp/eax: code-point-utf8 <- gap-index self, i + var curr/ecx: code-point-utf8 <- copy tmp + var currg/eax: code-point-utf8 <- gap-index g, i compare curr, currg break-if-= return 0/false @@ -687,36 +687,36 @@ fn gap-buffers-equal? self: (addr gap-buffer), g: (addr gap-buffer) -> _/eax: bo return 1/true } -fn gap-index _self: (addr gap-buffer), _n: int -> _/eax: grapheme { +fn gap-index _self: (addr gap-buffer), _n: int -> _/eax: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self var n/ebx: int <- copy _n # if n < left->length, index into left - var left/edi: (addr grapheme-stack) <- get self, left + var left/edi: (addr code-point-utf8-stack) <- get self, left var left-len-a/edx: (addr int) <- get left, top compare n, *left-len-a { break-if->= - var data-ah/eax: (addr handle array grapheme) <- get left, data - var data/eax: (addr array grapheme) <- lookup *data-ah - var result/eax: (addr grapheme) <- index data, n + var data-ah/eax: (addr handle array code-point-utf8) <- get left, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah + var result/eax: (addr code-point-utf8) <- index data, n return *result } # shrink n n <- subtract *left-len-a # if n < right->length, index into right - var right/edi: (addr grapheme-stack) <- get self, right + var right/edi: (addr code-point-utf8-stack) <- get self, right var right-len-a/edx: (addr int) <- get right, top compare n, *right-len-a { break-if->= - var data-ah/eax: (addr handle array grapheme) <- get right, data - var data/eax: (addr array grapheme) <- lookup *data-ah + var data-ah/eax: (addr handle array code-point-utf8) <- get right, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah # idx = right->len - n - 1 var idx/ebx: int <- copy n idx <- subtract *right-len-a idx <- negate idx <- subtract 1 - var result/eax: (addr grapheme) <- index data, idx + var result/eax: (addr code-point-utf8) <- index data, idx return *result } # error @@ -757,25 +757,25 @@ fn test-gap-buffer-index { var gap/esi: (addr gap-buffer) <- address gap-storage initialize-gap-buffer-with gap, "abc" # gap is at end, all contents are in left - var g/eax: grapheme <- gap-index gap, 0 + var g/eax: code-point-utf8 <- gap-index gap, 0 var x/ecx: int <- copy g check-ints-equal x, 0x61/a, "F - test-gap-index/left-1" - var g/eax: grapheme <- gap-index gap, 1 + var g/eax: code-point-utf8 <- gap-index gap, 1 var x/ecx: int <- copy g check-ints-equal x, 0x62/b, "F - test-gap-index/left-2" - var g/eax: grapheme <- gap-index gap, 2 + var g/eax: code-point-utf8 <- gap-index gap, 2 var x/ecx: int <- copy g check-ints-equal x, 0x63/c, "F - test-gap-index/left-3" # now check when everything is to the right gap-to-start gap rewind-gap-buffer gap - var g/eax: grapheme <- gap-index gap, 0 + var g/eax: code-point-utf8 <- gap-index gap, 0 var x/ecx: int <- copy g check-ints-equal x, 0x61/a, "F - test-gap-index/right-1" - var g/eax: grapheme <- gap-index gap, 1 + var g/eax: code-point-utf8 <- gap-index gap, 1 var x/ecx: int <- copy g check-ints-equal x, 0x62/b, "F - test-gap-index/right-2" - var g/eax: grapheme <- gap-index gap, 2 + var g/eax: code-point-utf8 <- gap-index gap, 2 var x/ecx: int <- copy g check-ints-equal x, 0x63/c, "F - test-gap-index/right-3" } @@ -788,25 +788,25 @@ fn copy-gap-buffer _src-ah: (addr handle gap-buffer), _dest-ah: (addr handle gap var dest-ah/eax: (addr handle gap-buffer) <- copy _dest-ah var _dest-a/eax: (addr gap-buffer) <- lookup *dest-ah var dest-a/edi: (addr gap-buffer) <- copy _dest-a - # copy left grapheme-stack - var src/ecx: (addr grapheme-stack) <- get src-a, left - var dest/edx: (addr grapheme-stack) <- get dest-a, left - copy-grapheme-stack src, dest - # copy right grapheme-stack + # copy left code-point-utf8-stack + var src/ecx: (addr code-point-utf8-stack) <- get src-a, left + var dest/edx: (addr code-point-utf8-stack) <- get dest-a, left + copy-code-point-utf8-stack src, dest + # copy right code-point-utf8-stack src <- get src-a, right dest <- get dest-a, right - copy-grapheme-stack src, dest + copy-code-point-utf8-stack src, dest } fn gap-buffer-is-decimal-integer? _self: (addr gap-buffer) -> _/eax: boolean { var self/esi: (addr gap-buffer) <- copy _self - var curr/ecx: (addr grapheme-stack) <- get self, left - var result/eax: boolean <- grapheme-stack-is-decimal-integer? curr + var curr/ecx: (addr code-point-utf8-stack) <- get self, left + var result/eax: boolean <- code-point-utf8-stack-is-decimal-integer? curr { compare result, 0/false break-if-= curr <- get self, right - result <- grapheme-stack-is-decimal-integer? curr + result <- code-point-utf8-stack-is-decimal-integer? curr } return result } @@ -841,7 +841,7 @@ fn test-render-gap-buffer-with-cursor-at-end { # var x/eax: int <- render-gap-buffer screen, gap, 0/x, 0/y, 1/show-cursor, 3/fg, 0xc5/bg=blue-bg check-screen-row screen, 0/y, "abc ", "F - test-render-gap-buffer-with-cursor-at-end" - # we've drawn one extra grapheme for the cursor + # we've drawn one extra code-point-utf8 for the cursor check-ints-equal x, 4, "F - test-render-gap-buffer-with-cursor-at-end: result" # abc check-background-color-in-screen-row screen, 3/bg=reverse, 0/y, " |", "F - test-render-gap-buffer-with-cursor-at-end: bg" @@ -853,7 +853,7 @@ fn test-render-gap-buffer-with-cursor-in-middle { var gap/esi: (addr gap-buffer) <- address gap-storage initialize-gap-buffer-with gap, "abc" gap-to-end gap - var dummy/eax: grapheme <- gap-left gap + var dummy/eax: code-point-utf8 <- gap-left gap # setup: screen var screen-storage: screen var screen/edi: (addr screen) <- address screen-storage @@ -905,7 +905,7 @@ fn test-render-gap-buffer-highlight-matching-open-paren { var gap/esi: (addr gap-buffer) <- address gap-storage initialize-gap-buffer-with gap, "(a)" gap-to-end gap - var dummy/eax: grapheme <- gap-left gap + var dummy/eax: code-point-utf8 <- gap-left gap # setup: screen var screen-storage: screen var screen/edi: (addr screen) <- address screen-storage @@ -947,7 +947,7 @@ fn highlight-matching-open-paren? _gap: (addr gap-buffer), render-cursor?: boole return 0/false, 0 } var gap/esi: (addr gap-buffer) <- copy _gap - var stack/edi: (addr grapheme-stack) <- get gap, right + var stack/edi: (addr code-point-utf8-stack) <- get gap, right var top-addr/eax: (addr int) <- get stack, top var top-index/ecx: int <- copy *top-addr compare top-index, 0 @@ -963,9 +963,9 @@ fn highlight-matching-open-paren? _gap: (addr gap-buffer), render-cursor?: boole return 0/false, 0 } top-index <- decrement - var data-ah/eax: (addr handle array grapheme) <- get stack, data - var data/eax: (addr array grapheme) <- lookup *data-ah - var g/eax: (addr grapheme) <- index data, top-index + var data-ah/eax: (addr handle array code-point-utf8) <- get stack, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah + var g/eax: (addr code-point-utf8) <- index data, top-index compare *g, 0x29/close-paren { break-if-= @@ -975,9 +975,9 @@ fn highlight-matching-open-paren? _gap: (addr gap-buffer), render-cursor?: boole } # cursor is not at end; return (char at cursor == ')') top-index <- decrement - var data-ah/eax: (addr handle array grapheme) <- get stack, data - var data/eax: (addr array grapheme) <- lookup *data-ah - var g/eax: (addr grapheme) <- index data, top-index + var data-ah/eax: (addr handle array code-point-utf8) <- get stack, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah + var g/eax: (addr code-point-utf8) <- index data, top-index compare *g, 0x29/close-paren { break-if-= @@ -998,7 +998,7 @@ fn test-highlight-matching-open-paren { highlight-matching-open-paren?, open-paren-depth <- highlight-matching-open-paren? gap, 1/render-cursor check highlight-matching-open-paren?, "F - test-highlight-matching-open-paren: at end immediately after ')'" check-ints-equal open-paren-depth, 1, "F - test-highlight-matching-open-paren: depth at end immediately after ')'" - var dummy/eax: grapheme <- gap-left gap + var dummy/eax: code-point-utf8 <- gap-left gap highlight-matching-open-paren?, open-paren-depth <- highlight-matching-open-paren? gap, 1/render-cursor check highlight-matching-open-paren?, "F - test-highlight-matching-open-paren: on ')'" dummy <- gap-left gap @@ -1022,8 +1022,8 @@ fn rewind-gap-buffer _self: (addr gap-buffer) { fn gap-buffer-scan-done? _self: (addr gap-buffer) -> _/eax: boolean { var self/esi: (addr gap-buffer) <- copy _self # more in left? - var left/eax: (addr grapheme-stack) <- get self, left - var left-size/eax: int <- grapheme-stack-length left + var left/eax: (addr code-point-utf8-stack) <- get self, left + var left-size/eax: int <- code-point-utf8-stack-length left var left-read-index/ecx: (addr int) <- get self, left-read-index compare *left-read-index, left-size { @@ -1031,8 +1031,8 @@ fn gap-buffer-scan-done? _self: (addr gap-buffer) -> _/eax: boolean { return 0/false } # more in right? - var right/eax: (addr grapheme-stack) <- get self, right - var right-size/eax: int <- grapheme-stack-length right + var right/eax: (addr code-point-utf8-stack) <- get self, right + var right-size/eax: int <- code-point-utf8-stack-length right var right-read-index/ecx: (addr int) <- get self, right-read-index compare *right-read-index, right-size { @@ -1043,73 +1043,73 @@ fn gap-buffer-scan-done? _self: (addr gap-buffer) -> _/eax: boolean { return 1/true } -fn peek-from-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme { +fn peek-from-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self # more in left? - var left/ecx: (addr grapheme-stack) <- get self, left - var left-size/eax: int <- grapheme-stack-length left + var left/ecx: (addr code-point-utf8-stack) <- get self, left + var left-size/eax: int <- code-point-utf8-stack-length left var left-read-index-a/edx: (addr int) <- get self, left-read-index compare *left-read-index-a, left-size { break-if->= - var left-data-ah/eax: (addr handle array grapheme) <- get left, data - var left-data/eax: (addr array grapheme) <- lookup *left-data-ah + var left-data-ah/eax: (addr handle array code-point-utf8) <- get left, data + var left-data/eax: (addr array code-point-utf8) <- lookup *left-data-ah var left-read-index/ecx: int <- copy *left-read-index-a - var result/eax: (addr grapheme) <- index left-data, left-read-index + var result/eax: (addr code-point-utf8) <- index left-data, left-read-index return *result } # more in right? - var right/ecx: (addr grapheme-stack) <- get self, right - var _right-size/eax: int <- grapheme-stack-length right + var right/ecx: (addr code-point-utf8-stack) <- get self, right + var _right-size/eax: int <- code-point-utf8-stack-length right var right-size/ebx: int <- copy _right-size var right-read-index-a/edx: (addr int) <- get self, right-read-index compare *right-read-index-a, right-size { break-if->= # read the right from reverse - var right-data-ah/eax: (addr handle array grapheme) <- get right, data - var right-data/eax: (addr array grapheme) <- lookup *right-data-ah + var right-data-ah/eax: (addr handle array code-point-utf8) <- get right, data + var right-data/eax: (addr array code-point-utf8) <- lookup *right-data-ah var right-read-index/ebx: int <- copy right-size right-read-index <- subtract *right-read-index-a right-read-index <- subtract 1 - var result/eax: (addr grapheme) <- index right-data, right-read-index + var result/eax: (addr code-point-utf8) <- index right-data, right-read-index return *result } # if we get here there's nothing left return 0/nul } -fn read-from-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme { +fn read-from-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self # more in left? - var left/ecx: (addr grapheme-stack) <- get self, left - var left-size/eax: int <- grapheme-stack-length left + var left/ecx: (addr code-point-utf8-stack) <- get self, left + var left-size/eax: int <- code-point-utf8-stack-length left var left-read-index-a/edx: (addr int) <- get self, left-read-index compare *left-read-index-a, left-size { break-if->= - var left-data-ah/eax: (addr handle array grapheme) <- get left, data - var left-data/eax: (addr array grapheme) <- lookup *left-data-ah + var left-data-ah/eax: (addr handle array code-point-utf8) <- get left, data + var left-data/eax: (addr array code-point-utf8) <- lookup *left-data-ah var left-read-index/ecx: int <- copy *left-read-index-a - var result/eax: (addr grapheme) <- index left-data, left-read-index + var result/eax: (addr code-point-utf8) <- index left-data, left-read-index increment *left-read-index-a return *result } # more in right? - var right/ecx: (addr grapheme-stack) <- get self, right - var _right-size/eax: int <- grapheme-stack-length right + var right/ecx: (addr code-point-utf8-stack) <- get self, right + var _right-size/eax: int <- code-point-utf8-stack-length right var right-size/ebx: int <- copy _right-size var right-read-index-a/edx: (addr int) <- get self, right-read-index compare *right-read-index-a, right-size { break-if->= # read the right from reverse - var right-data-ah/eax: (addr handle array grapheme) <- get right, data - var right-data/eax: (addr array grapheme) <- lookup *right-data-ah + var right-data-ah/eax: (addr handle array code-point-utf8) <- get right, data + var right-data/eax: (addr array code-point-utf8) <- lookup *right-data-ah var right-read-index/ebx: int <- copy right-size right-read-index <- subtract *right-read-index-a right-read-index <- subtract 1 - var result/eax: (addr grapheme) <- index right-data, right-read-index + var result/eax: (addr code-point-utf8) <- index right-data, right-read-index increment *right-read-index-a return *result } @@ -1120,8 +1120,8 @@ fn read-from-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme { fn put-back-from-gap-buffer _self: (addr gap-buffer) { var self/esi: (addr gap-buffer) <- copy _self # more in right? - var right/eax: (addr grapheme-stack) <- get self, right - var right-size/eax: int <- grapheme-stack-length right + var right/eax: (addr code-point-utf8-stack) <- get self, right + var right-size/eax: int <- code-point-utf8-stack-length right var right-read-index-a/eax: (addr int) <- get self, right-read-index compare *right-read-index-a, 0 { @@ -1130,8 +1130,8 @@ fn put-back-from-gap-buffer _self: (addr gap-buffer) { return } # more in left? - var left/eax: (addr grapheme-stack) <- get self, left - var left-size/eax: int <- grapheme-stack-length left + var left/eax: (addr code-point-utf8-stack) <- get self, left + var left-size/eax: int <- code-point-utf8-stack-length left var left-read-index-a/eax: (addr int) <- get self, left-read-index decrement *left-read-index-a } @@ -1143,22 +1143,22 @@ fn test-read-from-gap-buffer { # gap is at end, all contents are in left var done?/eax: boolean <- gap-buffer-scan-done? gap check-not done?, "F - test-read-from-gap-buffer/left-1/done" - var g/eax: grapheme <- read-from-gap-buffer gap + var g/eax: code-point-utf8 <- read-from-gap-buffer gap var x/ecx: int <- copy g check-ints-equal x, 0x61/a, "F - test-read-from-gap-buffer/left-1" var done?/eax: boolean <- gap-buffer-scan-done? gap check-not done?, "F - test-read-from-gap-buffer/left-2/done" - var g/eax: grapheme <- read-from-gap-buffer gap + var g/eax: code-point-utf8 <- read-from-gap-buffer gap var x/ecx: int <- copy g check-ints-equal x, 0x62/b, "F - test-read-from-gap-buffer/left-2" var done?/eax: boolean <- gap-buffer-scan-done? gap check-not done?, "F - test-read-from-gap-buffer/left-3/done" - var g/eax: grapheme <- read-from-gap-buffer gap + var g/eax: code-point-utf8 <- read-from-gap-buffer gap var x/ecx: int <- copy g check-ints-equal x, 0x63/c, "F - test-read-from-gap-buffer/left-3" var done?/eax: boolean <- gap-buffer-scan-done? gap check done?, "F - test-read-from-gap-buffer/left-4/done" - var g/eax: grapheme <- read-from-gap-buffer gap + var g/eax: code-point-utf8 <- read-from-gap-buffer gap var x/ecx: int <- copy g check-ints-equal x, 0/nul, "F - test-read-from-gap-buffer/left-4" # now check when everything is to the right @@ -1166,22 +1166,22 @@ fn test-read-from-gap-buffer { rewind-gap-buffer gap var done?/eax: boolean <- gap-buffer-scan-done? gap check-not done?, "F - test-read-from-gap-buffer/right-1/done" - var g/eax: grapheme <- read-from-gap-buffer gap + var g/eax: code-point-utf8 <- read-from-gap-buffer gap var x/ecx: int <- copy g check-ints-equal x, 0x61/a, "F - test-read-from-gap-buffer/right-1" var done?/eax: boolean <- gap-buffer-scan-done? gap check-not done?, "F - test-read-from-gap-buffer/right-2/done" - var g/eax: grapheme <- read-from-gap-buffer gap + var g/eax: code-point-utf8 <- read-from-gap-buffer gap var x/ecx: int <- copy g check-ints-equal x, 0x62/b, "F - test-read-from-gap-buffer/right-2" var done?/eax: boolean <- gap-buffer-scan-done? gap check-not done?, "F - test-read-from-gap-buffer/right-3/done" - var g/eax: grapheme <- read-from-gap-buffer gap + var g/eax: code-point-utf8 <- read-from-gap-buffer gap var x/ecx: int <- copy g check-ints-equal x, 0x63/c, "F - test-read-from-gap-buffer/right-3" var done?/eax: boolean <- gap-buffer-scan-done? gap check done?, "F - test-read-from-gap-buffer/right-4/done" - var g/eax: grapheme <- read-from-gap-buffer gap + var g/eax: code-point-utf8 <- read-from-gap-buffer gap var x/ecx: int <- copy g check-ints-equal x, 0/nul, "F - test-read-from-gap-buffer/right-4" } @@ -1190,7 +1190,7 @@ fn skip-spaces-from-gap-buffer self: (addr gap-buffer) { var done?/eax: boolean <- gap-buffer-scan-done? self compare done?, 0/false break-if-!= - var g/eax: grapheme <- peek-from-gap-buffer self + var g/eax: code-point-utf8 <- peek-from-gap-buffer self { compare g, 0x20/space break-if-= @@ -1200,8 +1200,8 @@ fn skip-spaces-from-gap-buffer self: (addr gap-buffer) { loop } -fn edit-gap-buffer self: (addr gap-buffer), key: grapheme { - var g/edx: grapheme <- copy key +fn edit-gap-buffer self: (addr gap-buffer), key: code-point-utf8 { + var g/edx: code-point-utf8 <- copy key { compare g, 8/backspace break-if-!= @@ -1211,13 +1211,13 @@ fn edit-gap-buffer self: (addr gap-buffer), key: grapheme { { compare g, 0x80/left-arrow break-if-!= - var dummy/eax: grapheme <- gap-left self + var dummy/eax: code-point-utf8 <- gap-left self return } { compare g, 0x83/right-arrow break-if-!= - var dummy/eax: grapheme <- gap-right self + var dummy/eax: code-point-utf8 <- gap-right self return } { @@ -1271,11 +1271,11 @@ fn edit-gap-buffer self: (addr gap-buffer), key: grapheme { return } # default: insert character - add-grapheme-at-gap self, g + add-code-point-utf8-at-gap self, g } fn gap-to-start-of-next-word self: (addr gap-buffer) { - var curr/eax: grapheme <- copy 0 + var curr/eax: code-point-utf8 <- copy 0 # skip to next space { curr <- gap-right self @@ -1302,7 +1302,7 @@ fn gap-to-start-of-next-word self: (addr gap-buffer) { } fn gap-to-end-of-previous-word self: (addr gap-buffer) { - var curr/eax: grapheme <- copy 0 + var curr/eax: code-point-utf8 <- copy 0 # skip to previous space { curr <- gap-left self @@ -1330,7 +1330,7 @@ fn gap-to-end-of-previous-word self: (addr gap-buffer) { fn gap-to-previous-start-of-line self: (addr gap-buffer) { # skip past immediate newline - var dummy/eax: grapheme <- gap-left self + var dummy/eax: code-point-utf8 <- gap-left self # skip to previous newline { dummy <- gap-left self @@ -1351,7 +1351,7 @@ fn gap-to-previous-start-of-line self: (addr gap-buffer) { fn gap-to-next-end-of-line self: (addr gap-buffer) { # skip past immediate newline - var dummy/eax: grapheme <- gap-right self + var dummy/eax: code-point-utf8 <- gap-right self # skip to next newline { dummy <- gap-right self @@ -1380,7 +1380,7 @@ fn gap-up self: (addr gap-buffer) { { compare i, col break-if->= - var curr/eax: grapheme <- gap-right self + var curr/eax: code-point-utf8 <- gap-right self { compare curr, -1 break-if-!= @@ -1402,13 +1402,13 @@ fn gap-down self: (addr gap-buffer) { var col/edx: int <- count-columns-to-start-of-line self # skip to start of next line gap-to-end-of-line self - var dummy/eax: grapheme <- gap-right self + var dummy/eax: code-point-utf8 <- gap-right self # skip ahead by up to col on previous line var i/ecx: int <- copy 0 { compare i, col break-if->= - var curr/eax: grapheme <- gap-right self + var curr/eax: code-point-utf8 <- gap-right self { compare curr, -1 break-if-!= @@ -1427,7 +1427,7 @@ fn gap-down self: (addr gap-buffer) { fn count-columns-to-start-of-line self: (addr gap-buffer) -> _/edx: int { var count/edx: int <- copy 0 - var dummy/eax: grapheme <- copy 0 + var dummy/eax: code-point-utf8 <- copy 0 # skip to previous newline { dummy <- gap-left self @@ -1449,7 +1449,7 @@ fn count-columns-to-start-of-line self: (addr gap-buffer) -> _/edx: int { } fn gap-to-end-of-line self: (addr gap-buffer) { - var dummy/eax: grapheme <- copy 0 + var dummy/eax: code-point-utf8 <- copy 0 # skip to next newline { dummy <- gap-right self diff --git a/apps/ex15.mu b/apps/ex15.mu index 51bfc1db..570704d6 100644 --- a/apps/ex15.mu +++ b/apps/ex15.mu @@ -32,13 +32,13 @@ fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk) var dummy/eax: int <- draw-code-point-on-real-screen 0x61/a, 0/x 0/y, 3/fg 0/bg var dummy/eax: int <- overlay-code-point-on-real-screen 0x0300/combining-grave-accent, 0/x 0/y, 3/fg 0/bg - # below a grapheme with a descender, the accent uglily overlaps + # below a code-point-utf8 with a descender, the accent uglily overlaps # https://en.wikipedia.org/wiki/Descender var dummy/eax: int <- draw-code-point-on-real-screen 0x67/g, 4/x 3/y, 3/fg 0/bg var dummy/eax: int <- draw-code-point-on-real-screen 0x61/a, 4/x 4/y, 3/fg 0/bg var dummy/eax: int <- overlay-code-point-on-real-screen 0x0300/combining-grave-accent, 4/x 4/y, 3/fg 0/bg - # beside a grapheme with a descender, it becomes more obvious that monowidth fonts can't make baselines line up + # beside a code-point-utf8 with a descender, it becomes more obvious that monowidth fonts can't make baselines line up # https://en.wikipedia.org/wiki/Baseline_(typography) var dummy/eax: int <- draw-code-point-on-real-screen 0x67/g, 8/x 3/y, 3/fg 0/bg var dummy/eax: int <- draw-code-point-on-real-screen 0x61/a, 9/x 3/y, 3/fg 0/bg @@ -82,62 +82,62 @@ fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk) var dummy/eax: int <- draw-code-point-on-real-screen 0x0915/devanagari-letter-ka, 0x13/x 9/y, 3/fg 0/bg var dummy/eax: int <- overlay-code-point-on-real-screen 0x0903/devanagari-visarga, 0x13/x 9/y, 3/fg 0/bg - # render the same devanagari letters as a single stream of utf-8 graphemes rather than individual code-points. + # render the same devanagari letters as a single stream of utf-8 code-point-utf8s rather than individual code-points. var text-storage: (stream byte 0x200) var text/esi: (addr stream byte) <- address text-storage - var g/eax: grapheme <- to-grapheme 0x0915/devanagari-letter-ka - var ka/ecx: grapheme <- copy g + var g/eax: code-point-utf8 <- to-utf8 0x0915/devanagari-letter-ka + var ka/ecx: code-point-utf8 <- copy g # ka - write-grapheme text, ka + write-code-point-utf8 text, ka # kaa - write-grapheme text, ka - g <- to-grapheme 0x093e/devanagari-vowel-aa - write-grapheme text, g + write-code-point-utf8 text, ka + g <- to-utf8 0x093e/devanagari-vowel-aa + write-code-point-utf8 text, g # ki - write-grapheme text, ka - g <- to-grapheme 0x093f/devanagari-vowel-i - write-grapheme text, g + write-code-point-utf8 text, ka + g <- to-utf8 0x093f/devanagari-vowel-i + write-code-point-utf8 text, g # kee - write-grapheme text, ka - g <- to-grapheme 0x0940/devanagari-vowel-ii - write-grapheme text, g + write-code-point-utf8 text, ka + g <- to-utf8 0x0940/devanagari-vowel-ii + write-code-point-utf8 text, g # ku - write-grapheme text, ka - g <- to-grapheme 0x0941/devanagari-vowel-u - write-grapheme text, g + write-code-point-utf8 text, ka + g <- to-utf8 0x0941/devanagari-vowel-u + write-code-point-utf8 text, g # koo - write-grapheme text, ka - g <- to-grapheme 0x0942/devanagari-vowel-oo - write-grapheme text, g + write-code-point-utf8 text, ka + g <- to-utf8 0x0942/devanagari-vowel-oo + write-code-point-utf8 text, g # kay - write-grapheme text, ka - g <- to-grapheme 0x0947/devanagari-vowel-E - write-grapheme text, g + write-code-point-utf8 text, ka + g <- to-utf8 0x0947/devanagari-vowel-E + write-code-point-utf8 text, g # kai - write-grapheme text, ka - g <- to-grapheme 0x0948/devanagari-vowel-ai - write-grapheme text, g + write-code-point-utf8 text, ka + g <- to-utf8 0x0948/devanagari-vowel-ai + write-code-point-utf8 text, g # ko - write-grapheme text, ka - g <- to-grapheme 0x094b/devanagari-vowel-o - write-grapheme text, g + write-code-point-utf8 text, ka + g <- to-utf8 0x094b/devanagari-vowel-o + write-code-point-utf8 text, g # kow - write-grapheme text, ka - g <- to-grapheme 0x094f/devanagari-vowel-aw - write-grapheme text, g + write-code-point-utf8 text, ka + g <- to-utf8 0x094f/devanagari-vowel-aw + write-code-point-utf8 text, g # kan - write-grapheme text, ka - g <- to-grapheme 0x0902/devanagari-anusvara - write-grapheme text, g + write-code-point-utf8 text, ka + g <- to-utf8 0x0902/devanagari-anusvara + write-code-point-utf8 text, g # kaha - write-grapheme text, ka - g <- to-grapheme 0x0903/devanagari-visarga - write-grapheme text, g + write-code-point-utf8 text, ka + g <- to-utf8 0x0903/devanagari-visarga + write-code-point-utf8 text, g # render everything set-cursor-position screen, 4/x 0xe/y draw-stream-wrapping-right-then-down-from-cursor-over-full-screen screen, text, 3/fg 0/bg - # a stream of tamil graphemes (with interspersed spaces for clarity) that don't look the same in Mu + # a stream of tamil code-point-utf8s (with interspersed spaces for clarity) that don't look the same in Mu set-cursor-position 0, 4/x 0x12/y draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0, "எ ஃ கு ", 3/fg 0/bg set-cursor-position 0, 4/x 0x13/y diff --git a/apps/ex9.mu b/apps/ex9.mu index deb3f2f7..c192bde8 100644 --- a/apps/ex9.mu +++ b/apps/ex9.mu @@ -33,7 +33,7 @@ fn word-count in: (addr stream byte) -> _/eax: int { var done?/eax: boolean <- stream-empty? in compare done?, 0/false break-if-!= - var g/eax: grapheme <- read-grapheme in + var g/eax: code-point-utf8 <- read-code-point-utf8 in { compare g, 0x20/space break-if-!= diff --git a/apps/hest-life.mu b/apps/hest-life.mu index 721db5c8..340f7214 100644 --- a/apps/hest-life.mu +++ b/apps/hest-life.mu @@ -28,7 +28,7 @@ fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk) var second-screen/edi: (addr screen) <- address second-buffer initialize-screen second-screen, 0x80, 0x30, 1/include-pixels render second-screen, env - convert-graphemes-to-pixels second-screen + convert-code-point-utf8s-to-pixels second-screen copy-pixels second-screen, screen { edit keyboard, env @@ -39,7 +39,7 @@ fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk) step env clear-screen second-screen render second-screen, env - convert-graphemes-to-pixels second-screen + convert-code-point-utf8s-to-pixels second-screen copy-pixels second-screen, screen } linger diff --git a/browse-slack/environment.mu b/browse-slack/environment.mu index 8f53a7f8..e4e24ff6 100644 --- a/browse-slack/environment.mu +++ b/browse-slack/environment.mu @@ -615,12 +615,12 @@ fn draw-json-stream-wrapping-right-then-down screen: (addr screen), stream: (add } compare c, 0xffffffff/end-of-file break-if-= - $draw-json-stream-wrapping-right-then-down:render-grapheme: { + $draw-json-stream-wrapping-right-then-down:render-code-point-utf8: { compare c, 0x5c/backslash { break-if-!= xcurr, ycurr <- render-json-escaped-code-point screen, stream, xmin, ymin, xmax, ymax, xcurr, ycurr, color, background-color - break $draw-json-stream-wrapping-right-then-down:render-grapheme + break $draw-json-stream-wrapping-right-then-down:render-code-point-utf8 } compare c, 0xa/newline { @@ -629,7 +629,7 @@ fn draw-json-stream-wrapping-right-then-down screen: (addr screen), stream: (add var dummy/eax: int <- draw-code-point screen, 0x20/space, xcurr, ycurr, color, background-color xcurr <- copy xmin ycurr <- increment - break $draw-json-stream-wrapping-right-then-down:render-grapheme + break $draw-json-stream-wrapping-right-then-down:render-code-point-utf8 } var offset/eax: int <- draw-code-point screen, c, xcurr, ycurr, color, background-color # overlay a combining character if necessary @@ -639,7 +639,7 @@ fn draw-json-stream-wrapping-right-then-down screen: (addr screen), stream: (add break-if-!= # read a character # no combining character allowed here - var g/eax: grapheme <- read-grapheme stream + var g/eax: code-point-utf8 <- read-code-point-utf8 stream var c/eax: code-point <- to-code-point g # if not a combining character, save for next iteration and loop { @@ -672,7 +672,7 @@ fn draw-json-stream-wrapping-right-then-down screen: (addr screen), stream: (add # just return a different register fn read-json-code-point stream: (addr stream byte) -> _/ebx: code-point { - var g/eax: grapheme <- read-grapheme stream + var g/eax: code-point-utf8 <- read-code-point-utf8 stream var result/eax: code-point <- to-code-point g return result } @@ -1012,7 +1012,7 @@ fn update-search _env: (addr environment), key: byte, users: (addr array user), # otherwise delegate var search-terms-ah/eax: (addr handle gap-buffer) <- get env, search-terms var search-terms/eax: (addr gap-buffer) <- lookup *search-terms-ah - var g/ecx: grapheme <- copy key + var g/ecx: code-point-utf8 <- copy key edit-gap-buffer search-terms, g } diff --git a/editor/Mu.tmbundle/Syntaxes/Mu.tmLanguage b/editor/Mu.tmbundle/Syntaxes/Mu.tmLanguage index 64f71320..b10d6d17 100644 --- a/editor/Mu.tmbundle/Syntaxes/Mu.tmLanguage +++ b/editor/Mu.tmbundle/Syntaxes/Mu.tmLanguage @@ -48,7 +48,7 @@ match - \b(addr|array|boolean|byte|code-point|grapheme|handle|int|float|stream|type)\b + \b(addr|array|boolean|byte|code-point|code-point-utf8|handle|int|float|stream|type)\b name storage.type.mu diff --git a/editor/VSCode/syntaxes/Mu.tmLanguage b/editor/VSCode/syntaxes/Mu.tmLanguage index 64f71320..b10d6d17 100644 --- a/editor/VSCode/syntaxes/Mu.tmLanguage +++ b/editor/VSCode/syntaxes/Mu.tmLanguage @@ -48,7 +48,7 @@ match - \b(addr|array|boolean|byte|code-point|grapheme|handle|int|float|stream|type)\b + \b(addr|array|boolean|byte|code-point|code-point-utf8|handle|int|float|stream|type)\b name storage.type.mu diff --git a/linux/126write-int-decimal.subx b/linux/126write-int-decimal.subx index 04f8b021..c48e17c8 100644 --- a/linux/126write-int-decimal.subx +++ b/linux/126write-int-decimal.subx @@ -303,7 +303,7 @@ test-write-int32-decimal-negative-multiple-digits: # . end c3/return -decimal-digit?: # c: grapheme -> result/eax: boolean +decimal-digit?: # c: code-point-utf8 -> result/eax: boolean # . prologue 55/push-ebp 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp @@ -402,7 +402,7 @@ test-decimal-digit-above-9: 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp c3/return -to-decimal-digit: # in: grapheme -> out/eax: int +to-decimal-digit: # in: code-point-utf8 -> out/eax: int # . prologue 55/push-ebp 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp diff --git a/linux/304screen.subx b/linux/304screen.subx index 0b24fdbf..fb8ac0ea 100644 --- a/linux/304screen.subx +++ b/linux/304screen.subx @@ -157,8 +157,8 @@ $print-stream-to-real-screen:end: 5d/pop-to-ebp c3/return -# print a grapheme in utf-8 (only up to 4 bytes so far) -print-grapheme-to-real-screen: # c: grapheme +# print a code-point-utf8 in utf-8 (only up to 4 bytes so far) +print-code-point-utf8-to-real-screen: # c: code-point-utf8 # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp @@ -170,31 +170,31 @@ print-grapheme-to-real-screen: # c: grapheme 8a/byte-> *(ebp+8) 0/r32/al # if (curr == 0) return 3d/compare-eax-and 0/imm32 - 74/jump-if-= $print-grapheme-to-real-screen:end/disp8 + 74/jump-if-= $print-code-point-utf8-to-real-screen:end/disp8 # (print-byte-to-real-screen %eax) # curr = *(ebp+9) 8a/byte-> *(ebp+9) 0/r32/al # if (curr == 0) return 3d/compare-eax-and 0/imm32 - 74/jump-if-= $print-grapheme-to-real-screen:end/disp8 + 74/jump-if-= $print-code-point-utf8-to-real-screen:end/disp8 # (print-byte-to-real-screen %eax) # curr = *(ebp+10) 8a/byte-> *(ebp+0xa) 0/r32/al # if (curr == 0) return 3d/compare-eax-and 0/imm32 - 74/jump-if-= $print-grapheme-to-real-screen:end/disp8 + 74/jump-if-= $print-code-point-utf8-to-real-screen:end/disp8 # (print-byte-to-real-screen %eax) # curr = *(ebp+11) 8a/byte-> *(ebp+0xb) 0/r32/al # if (curr == 0) return 3d/compare-eax-and 0/imm32 - 74/jump-if-= $print-grapheme-to-real-screen:end/disp8 + 74/jump-if-= $print-code-point-utf8-to-real-screen:end/disp8 # (print-byte-to-real-screen %eax) -$print-grapheme-to-real-screen:end: +$print-code-point-utf8-to-real-screen:end: # . restore registers 58/pop-to-eax # . epilogue diff --git a/linux/305keyboard.subx b/linux/305keyboard.subx index 32159e49..21be1081 100644 --- a/linux/305keyboard.subx +++ b/linux/305keyboard.subx @@ -121,15 +121,15 @@ $enable-keyboard-type-mode:end: # read keys or escapes up to 4 bytes # -# fun fact: terminal escapes and graphemes in utf-8 don't conflict! -# - in graphemes all but the first/lowest byte will have a 1 in the MSB (be +# fun fact: terminal escapes and code-point-utf8s in utf-8 don't conflict! +# - in code-point-utf8s all but the first/lowest byte will have a 1 in the MSB (be # greater than 0x7f) # - in escapes every byte will have a 0 in the MSB # the two categories overlap only when the first/lowest byte is 0x1b or 'esc' # # Only use this in immediate mode; in type (typewriter) mode 4 bytes may get # parts of multiple keys. -read-key-from-real-keyboard: # -> result/eax: grapheme +read-key-from-real-keyboard: # -> result/eax: code-point-utf8 # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp diff --git a/linux/400.mu b/linux/400.mu index c757c970..a391a76e 100644 --- a/linux/400.mu +++ b/linux/400.mu @@ -113,8 +113,8 @@ sig skip-until-close-paren line: (addr stream byte) #sig skip-until-close-paren-in-slice curr: (addr byte), end: (addr byte) -> _/eax: (addr byte) sig write-stream-data f: (addr buffered-file), s: (addr stream byte) sig write-int32-decimal out: (addr stream byte), n: int -sig decimal-digit? c: grapheme -> _/eax: boolean -sig to-decimal-digit in: grapheme -> _/eax: int +sig decimal-digit? c: code-point-utf8 -> _/eax: boolean +sig to-decimal-digit in: code-point-utf8 -> _/eax: int # bad name alert # next-word really tokenizes # next-raw-word really reads whitespace-separated words @@ -159,7 +159,7 @@ sig move-cursor-on-real-screen row: int, column: int sig print-string-to-real-screen s: (addr array byte) sig print-slice-to-real-screen s: (addr slice) sig print-stream-to-real-screen s: (addr stream byte) -sig print-grapheme-to-real-screen c: grapheme +sig print-code-point-utf8-to-real-screen c: code-point-utf8 sig print-int32-hex-to-real-screen n: int sig print-int32-hex-bits-to-real-screen n: int, bits: int sig print-int32-decimal-to-real-screen n: int @@ -174,7 +174,7 @@ sig hide-cursor-on-real-screen sig show-cursor-on-real-screen sig enable-keyboard-immediate-mode sig enable-keyboard-type-mode -sig read-key-from-real-keyboard -> _/eax: grapheme +sig read-key-from-real-keyboard -> _/eax: code-point-utf8 sig read-line-from-real-keyboard out: (addr stream byte) sig open filename: (addr array byte), write?: boolean, out: (addr handle buffered-file) sig populate-buffered-file-containing contents: (addr array byte), out: (addr handle buffered-file) diff --git a/linux/403unicode.mu b/linux/403unicode.mu index 8594615a..655cae2b 100644 --- a/linux/403unicode.mu +++ b/linux/403unicode.mu @@ -1,31 +1,31 @@ # Helpers for Unicode. # -# Mu has no characters, only code points and graphemes. +# Mu has no characters, only code points and code-point-utf8s. # Code points are the indivisible atoms of text streams. # https://en.wikipedia.org/wiki/Code_point # Graphemes are the smallest self-contained unit of text. # Graphemes may consist of multiple code points. # -# Mu graphemes are always represented in utf-8, and they are required to fit +# Mu code-point-utf8s are always represented in utf-8, and they are required to fit # in 4 bytes. # -# Mu doesn't currently support combining code points, or graphemes made of +# Mu doesn't currently support combining code points, or code-point-utf8s made of # multiple code points. One day we will. # On Linux, we also don't currently support code points that translate into -# multiple or wide graphemes. (In particular, Tab will never be supported.) +# multiple or wide code-point-utf8s. (In particular, Tab will never be supported.) # transliterated from tb_utf8_unicode_to_char in https://github.com/nsf/termbox # https://wiki.tcl-lang.org/page/UTF%2D8+bit+by+bit explains the algorithm -fn to-grapheme in: code-point -> _/eax: grapheme { +fn to-utf8 in: code-point -> _/eax: code-point-utf8 { var c/eax: int <- copy in var num-trailers/ecx: int <- copy 0 var first/edx: int <- copy 0 - $to-grapheme:compute-length: { + $to-utf8:compute-length: { # single byte: just return it compare c, 0x7f { break-if-> - var g/eax: grapheme <- copy c + var g/eax: code-point-utf8 <- copy c return g } # 2 bytes @@ -34,7 +34,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme { break-if-> num-trailers <- copy 1 first <- copy 0xc0 - break $to-grapheme:compute-length + break $to-utf8:compute-length } # 3 bytes compare c, 0xffff @@ -42,7 +42,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme { break-if-> num-trailers <- copy 2 first <- copy 0xe0 - break $to-grapheme:compute-length + break $to-utf8:compute-length } # 4 bytes compare c, 0x1fffff @@ -50,7 +50,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme { break-if-> num-trailers <- copy 3 first <- copy 0xf0 - break $to-grapheme:compute-length + break $to-utf8:compute-length } # more than 4 bytes: unsupported # TODO: print to stderr @@ -65,7 +65,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme { } } # emit trailer bytes, 6 bits from 'in', first two bits '10' - var result/edi: grapheme <- copy 0 + var result/edi: code-point-utf8 <- copy 0 { compare num-trailers, 0 break-if-<= @@ -87,16 +87,16 @@ fn to-grapheme in: code-point -> _/eax: grapheme { return result } -# single-byte code point have identical graphemes -fn test-to-grapheme-single-byte { +# single-byte code point have identical code-point-utf8s +fn test-to-utf8-single-byte { var in-int/ecx: int <- copy 0 { compare in-int, 0x7f break-if-> var in/eax: code-point <- copy in-int - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, in-int, "F - test-to-grapheme-single-byte" + check-ints-equal out-int, in-int, "F - test-to-utf8-single-byte" in-int <- increment loop } @@ -104,55 +104,55 @@ fn test-to-grapheme-single-byte { # byte | byte | byte | byte # smallest 2-byte utf-8 -fn test-to-grapheme-two-bytes-min { +fn test-to-utf8-two-bytes-min { var in/eax: code-point <- copy 0x80 # 10 00-0000 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0x80c2, "F - to-grapheme/2a" # 110 0-0010 10 00-0000 + check-ints-equal out-int, 0x80c2, "F - to-utf8/2a" # 110 0-0010 10 00-0000 } # largest 2-byte utf-8 -fn test-to-grapheme-two-bytes-max { +fn test-to-utf8-two-bytes-max { var in/eax: code-point <- copy 0x7ff # 1-1111 11-1111 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0xbfdf, "F - to-grapheme/2b" # 110 1-1111 10 11-1111 + check-ints-equal out-int, 0xbfdf, "F - to-utf8/2b" # 110 1-1111 10 11-1111 } # smallest 3-byte utf-8 -fn test-to-grapheme-three-bytes-min { +fn test-to-utf8-three-bytes-min { var in/eax: code-point <- copy 0x800 # 10-0000 00-0000 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0x80a0e0, "F - to-grapheme/3a" # 1110 0000 10 10-0000 10 00-0000 + check-ints-equal out-int, 0x80a0e0, "F - to-utf8/3a" # 1110 0000 10 10-0000 10 00-0000 } # largest 3-byte utf-8 -fn test-to-grapheme-three-bytes-max { +fn test-to-utf8-three-bytes-max { var in/eax: code-point <- copy 0xffff # 1111 11-1111 11-1111 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0xbfbfef, "F - to-grapheme/3b" # 1110 1111 10 11-1111 10 11-1111 + check-ints-equal out-int, 0xbfbfef, "F - to-utf8/3b" # 1110 1111 10 11-1111 10 11-1111 } # smallest 4-byte utf-8 -fn test-to-grapheme-four-bytes-min { +fn test-to-utf8-four-bytes-min { var in/eax: code-point <- copy 0x10000 # 1-0000 00-0000 00-0000 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0x808090f0, "F - to-grapheme/4a" # 1111-0 000 10 01-0000 10 00-0000 10 00-0000 + check-ints-equal out-int, 0x808090f0, "F - to-utf8/4a" # 1111-0 000 10 01-0000 10 00-0000 10 00-0000 } # largest 4-byte utf-8 -fn test-to-grapheme-four-bytes-max { +fn test-to-utf8-four-bytes-max { var in/eax: code-point <- copy 0x1fffff # 111 11-1111 11-1111 11-1111 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0xbfbfbff7, "F - to-grapheme/4b" # 1111-0 111 10 11-1111 10 11-1111 10 11-1111 + check-ints-equal out-int, 0xbfbfbff7, "F - to-utf8/4b" # 1111-0 111 10 11-1111 10 11-1111 10 11-1111 } -# read the next grapheme from a stream of bytes -fn read-grapheme in: (addr stream byte) -> _/eax: grapheme { +# read the next code-point-utf8 from a stream of bytes +fn read-code-point-utf8 in: (addr stream byte) -> _/eax: code-point-utf8 { # if at eof, return EOF { var eof?/eax: boolean <- stream-empty? in @@ -162,18 +162,18 @@ fn read-grapheme in: (addr stream byte) -> _/eax: grapheme { } var c/eax: byte <- read-byte in var num-trailers/ecx: int <- copy 0 - $read-grapheme:compute-length: { + $read-code-point-utf8:compute-length: { # single byte: just return it compare c, 0xc0 { break-if->= - var g/eax: grapheme <- copy c + var g/eax: code-point-utf8 <- copy c return g } compare c, 0xfe { break-if-< - var g/eax: grapheme <- copy c + var g/eax: code-point-utf8 <- copy c return g } # 2 bytes @@ -181,23 +181,23 @@ fn read-grapheme in: (addr stream byte) -> _/eax: grapheme { { break-if->= num-trailers <- copy 1 - break $read-grapheme:compute-length + break $read-code-point-utf8:compute-length } # 3 bytes compare c, 0xf0 { break-if->= num-trailers <- copy 2 - break $read-grapheme:compute-length + break $read-code-point-utf8:compute-length } # 4 bytes compare c, 0xf8 { break-if->= num-trailers <- copy 3 - break $read-grapheme:compute-length + break $read-code-point-utf8:compute-length } -$read-grapheme:abort: { +$read-code-point-utf8:abort: { # TODO: print to stderr print-string-to-real-screen "utf-8 encodings larger than 4 bytes are not yet supported. First byte seen: " var n/eax: int <- copy c @@ -208,7 +208,7 @@ $read-grapheme:abort: { } } # prepend trailer bytes - var result/edi: grapheme <- copy c + var result/edi: code-point-utf8 <- copy c var num-byte-shifts/edx: int <- copy 1 { compare num-trailers, 0 @@ -225,48 +225,48 @@ $read-grapheme:abort: { return result } -fn test-read-grapheme { +fn test-read-code-point-utf8 { var s: (stream byte 0x30) var s2/ecx: (addr stream byte) <- address s write s2, "aΒc世d界e" - var c/eax: grapheme <- read-grapheme s2 + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x61, "F - test grapheme/0" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x61, "F - test code-point-utf8/0" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x92ce/greek-capital-letter-beta, "F - test grapheme/1" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x92ce/greek-capital-letter-beta, "F - test code-point-utf8/1" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x63, "F - test grapheme/2" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x63, "F - test code-point-utf8/2" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x96b8e4, "F - test grapheme/3" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x96b8e4, "F - test code-point-utf8/3" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x64, "F - test grapheme/4" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x64, "F - test code-point-utf8/4" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x8c95e7, "F - test grapheme/5" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x8c95e7, "F - test code-point-utf8/5" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x65, "F - test grapheme/6" + check-ints-equal n, 0x65, "F - test code-point-utf8/6" } -fn read-grapheme-buffered in: (addr buffered-file) -> _/eax: grapheme { +fn read-code-point-utf8-buffered in: (addr buffered-file) -> _/eax: code-point-utf8 { var c/eax: byte <- read-byte-buffered in var num-trailers/ecx: int <- copy 0 - $read-grapheme-buffered:compute-length: { + $read-code-point-utf8-buffered:compute-length: { # single byte: just return it compare c, 0xc0 { break-if->= - var g/eax: grapheme <- copy c + var g/eax: code-point-utf8 <- copy c return g } compare c, 0xfe { break-if-< - var g/eax: grapheme <- copy c + var g/eax: code-point-utf8 <- copy c return g } # 2 bytes @@ -274,23 +274,23 @@ fn read-grapheme-buffered in: (addr buffered-file) -> _/eax: grapheme { { break-if->= num-trailers <- copy 1 - break $read-grapheme-buffered:compute-length + break $read-code-point-utf8-buffered:compute-length } # 3 bytes compare c, 0xf0 { break-if->= num-trailers <- copy 2 - break $read-grapheme-buffered:compute-length + break $read-code-point-utf8-buffered:compute-length } # 4 bytes compare c, 0xf8 { break-if->= num-trailers <- copy 3 - break $read-grapheme-buffered:compute-length + break $read-code-point-utf8-buffered:compute-length } -$read-grapheme-buffered:abort: { +$read-code-point-utf8-buffered:abort: { # TODO: print to stderr print-string-to-real-screen "utf-8 encodings larger than 4 bytes are not supported. First byte seen: " var n/eax: int <- copy c @@ -301,7 +301,7 @@ $read-grapheme-buffered:abort: { } } # prepend trailer bytes - var result/edi: grapheme <- copy c + var result/edi: code-point-utf8 <- copy c var num-byte-shifts/edx: int <- copy 1 { compare num-trailers, 0 @@ -364,23 +364,23 @@ fn test-shift-left-bytes-5 { check-ints-equal result, 0, "F - shift-left-bytes >4" } -# write a grapheme to a stream of bytes +# write a code-point-utf8 to a stream of bytes # this is like write-to-stream, except we skip leading 0 bytes -fn write-grapheme out: (addr stream byte), g: grapheme { -$write-grapheme:body: { +fn write-code-point-utf8 out: (addr stream byte), g: code-point-utf8 { +$write-code-point-utf8:body: { var c/eax: int <- copy g append-byte out, c # first byte is always written c <- shift-right 8 compare c, 0 - break-if-= $write-grapheme:body + break-if-= $write-code-point-utf8:body append-byte out, c c <- shift-right 8 compare c, 0 - break-if-= $write-grapheme:body + break-if-= $write-code-point-utf8:body append-byte out, c c <- shift-right 8 compare c, 0 - break-if-= $write-grapheme:body + break-if-= $write-code-point-utf8:body append-byte out, c } } diff --git a/linux/405screen.mu b/linux/405screen.mu index c850df2a..ffb0deb8 100644 --- a/linux/405screen.mu +++ b/linux/405screen.mu @@ -18,7 +18,7 @@ type screen { } type screen-cell { - data: grapheme + data: code-point-utf8 color: int background-color: int bold?: boolean @@ -83,7 +83,7 @@ fn clear-screen screen: (addr screen) { return } # fake screen - var space/edi: grapheme <- copy 0x20 + var space/edi: code-point-utf8 <- copy 0x20 move-cursor screen, 1, 1 var screen-addr/esi: (addr screen) <- copy screen var i/eax: int <- copy 1 @@ -96,7 +96,7 @@ fn clear-screen screen: (addr screen) { { compare j, *ncols break-if-> - print-grapheme screen, space + print-code-point-utf8 screen, space j <- increment loop } @@ -186,8 +186,8 @@ fn print-stream _screen: (addr screen), s: (addr stream byte) { var done?/eax: boolean <- stream-empty? s compare done?, 0 break-if-!= - var g/eax: grapheme <- read-grapheme s - print-grapheme screen, g + var g/eax: code-point-utf8 <- read-code-point-utf8 s + print-code-point-utf8 screen, g loop } } @@ -211,11 +211,11 @@ fn print-array-of-ints-in-decimal screen: (addr screen), _a: (addr array int) { } } -fn print-grapheme screen: (addr screen), c: grapheme { +fn print-code-point-utf8 screen: (addr screen), c: code-point-utf8 { compare screen, 0 { break-if-!= - print-grapheme-to-real-screen c + print-code-point-utf8-to-real-screen c return } # fake screen @@ -239,7 +239,7 @@ fn print-grapheme screen: (addr screen), c: grapheme { break-if-<= copy-to *cursor-row-addr, num-rows # if (top-index > data size) top-index = 0, otherwise top-index += num-cols - $print-grapheme:perform-scroll: { + $print-code-point-utf8:perform-scroll: { var top-index-addr/ebx: (addr int) <- get screen-addr, top-index var data-ah/eax: (addr handle array screen-cell) <- get screen-addr, data var data/eax: (addr array screen-cell) <- lookup *data-ah @@ -248,7 +248,7 @@ fn print-grapheme screen: (addr screen), c: grapheme { { break-if->= add-to *top-index-addr, num-cols - break $print-grapheme:perform-scroll + break $print-code-point-utf8:perform-scroll } { break-if-< @@ -257,7 +257,7 @@ fn print-grapheme screen: (addr screen), c: grapheme { } } var idx/ecx: int <- current-screen-cell-index screen-addr -#? print-string-to-real-screen "printing grapheme at screen index " +#? print-string-to-real-screen "printing code-point-utf8 at screen index " #? print-int32-hex-to-real-screen idx #? print-string-to-real-screen ": " var data-ah/eax: (addr handle array screen-cell) <- get screen-addr, data @@ -266,9 +266,9 @@ fn print-grapheme screen: (addr screen), c: grapheme { var dest-cell/ecx: (addr screen-cell) <- index data, offset var src-cell/eax: (addr screen-cell) <- get screen-addr, curr-attributes copy-object src-cell, dest-cell - var dest/eax: (addr grapheme) <- get dest-cell, data - var c2/ecx: grapheme <- copy c -#? print-grapheme-to-real-screen c2 + var dest/eax: (addr code-point-utf8) <- get dest-cell, data + var c2/ecx: code-point-utf8 <- copy c +#? print-code-point-utf8-to-real-screen c2 #? print-string-to-real-screen "\n" copy-to *dest, c2 increment *cursor-col-addr @@ -305,21 +305,21 @@ fn screen-cell-index screen-on-stack: (addr screen), row: int, col: int -> _/ecx return result } -fn screen-grapheme-at screen-on-stack: (addr screen), row: int, col: int -> _/eax: grapheme { +fn screen-code-point-utf8-at screen-on-stack: (addr screen), row: int, col: int -> _/eax: code-point-utf8 { var screen-addr/esi: (addr screen) <- copy screen-on-stack var idx/ecx: int <- screen-cell-index screen-addr, row, col - var result/eax: grapheme <- screen-grapheme-at-idx screen-addr, idx + var result/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen-addr, idx return result } -fn screen-grapheme-at-idx screen-on-stack: (addr screen), idx-on-stack: int -> _/eax: grapheme { +fn screen-code-point-utf8-at-idx screen-on-stack: (addr screen), idx-on-stack: int -> _/eax: code-point-utf8 { var screen-addr/esi: (addr screen) <- copy screen-on-stack var data-ah/eax: (addr handle array screen-cell) <- get screen-addr, data var data/eax: (addr array screen-cell) <- lookup *data-ah var idx/ecx: int <- copy idx-on-stack var offset/ecx: (offset screen-cell) <- compute-offset data, idx var cell/eax: (addr screen-cell) <- index data, offset - var src/eax: (addr grapheme) <- get cell, data + var src/eax: (addr code-point-utf8) <- get cell, data return *src } @@ -433,8 +433,8 @@ fn screen-blink-at-idx? screen-on-stack: (addr screen), idx-on-stack: int -> _/e } fn print-code-point screen: (addr screen), c: code-point { - var g/eax: grapheme <- to-grapheme c - print-grapheme screen, g + var g/eax: code-point-utf8 <- to-utf8 c + print-code-point-utf8 screen, g } fn print-int32-hex screen: (addr screen), n: int { @@ -453,8 +453,8 @@ fn print-int32-hex screen: (addr screen), n: int { var done?/eax: boolean <- stream-empty? s2-addr compare done?, 0 break-if-!= - var g/eax: grapheme <- read-grapheme s2-addr - print-grapheme screen, g + var g/eax: code-point-utf8 <- read-code-point-utf8 s2-addr + print-code-point-utf8 screen, g loop } } @@ -475,8 +475,8 @@ fn print-int32-hex-bits screen: (addr screen), n: int, bits: int { var done?/eax: boolean <- stream-empty? s2-addr compare done?, 0 break-if-!= - var g/eax: grapheme <- read-grapheme s2-addr - print-grapheme screen, g + var g/eax: code-point-utf8 <- read-code-point-utf8 s2-addr + print-code-point-utf8 screen, g loop } } @@ -497,8 +497,8 @@ fn print-int32-decimal screen: (addr screen), n: int { var done?/eax: boolean <- stream-empty? s2-addr compare done?, 0 break-if-!= - var g/eax: grapheme <- read-grapheme s2-addr - print-grapheme screen, g + var g/eax: code-point-utf8 <- read-code-point-utf8 s2-addr + print-code-point-utf8 screen, g loop } } @@ -631,7 +631,7 @@ fn check-screen-row screen: (addr screen), row-idx: int, expected: (addr array b fn check-screen-row-from screen-on-stack: (addr screen), row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) { var screen/esi: (addr screen) <- copy screen-on-stack var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx - # compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme + # compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8 var e: (stream byte 0x100) var e-addr/edx: (addr stream byte) <- address e write e-addr, expected @@ -639,35 +639,35 @@ fn check-screen-row-from screen-on-stack: (addr screen), row-idx: int, col-idx: var done?/eax: boolean <- stream-empty? e-addr compare done?, 0 break-if-!= - var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx - var g/ebx: grapheme <- copy _g - var expected-grapheme/eax: grapheme <- read-grapheme e-addr - # compare graphemes - $check-screen-row-from:compare-graphemes: { - # if expected-grapheme is space, null grapheme is also ok + var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx + var g/ebx: code-point-utf8 <- copy _g + var expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr + # compare code-point-utf8s + $check-screen-row-from:compare-code-point-utf8s: { + # if expected-code-point-utf8 is space, null code-point-utf8 is also ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= compare g, 0 - break-if-= $check-screen-row-from:compare-graphemes + break-if-= $check-screen-row-from:compare-code-point-utf8s } - # if (g == expected-grapheme) print "." - compare g, expected-grapheme + # if (g == expected-code-point-utf8) print "." + compare g, expected-code-point-utf8 { break-if-!= print-string-to-real-screen "." - break $check-screen-row-from:compare-graphemes + break $check-screen-row-from:compare-code-point-utf8s } # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " print-int32-hex-to-real-screen col-idx print-string-to-real-screen ") but observed '" - print-grapheme-to-real-screen g + print-code-point-utf8-to-real-screen g print-string-to-real-screen "'\n" } idx <- increment @@ -685,7 +685,7 @@ fn check-screen-row-in-color screen: (addr screen), fg: int, row-idx: int, expec fn check-screen-row-in-color-from screen-on-stack: (addr screen), fg: int, row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) { var screen/esi: (addr screen) <- copy screen-on-stack var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx - # compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme + # compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8 var e: (stream byte 0x100) var e-addr/edx: (addr stream byte) <- address e write e-addr, expected @@ -693,45 +693,45 @@ fn check-screen-row-in-color-from screen-on-stack: (addr screen), fg: int, row-i var done?/eax: boolean <- stream-empty? e-addr compare done?, 0 break-if-!= - var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx - var g/ebx: grapheme <- copy _g - var _expected-grapheme/eax: grapheme <- read-grapheme e-addr - var expected-grapheme/edi: grapheme <- copy _expected-grapheme + var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx + var g/ebx: code-point-utf8 <- copy _g + var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr + var expected-code-point-utf8/edi: code-point-utf8 <- copy _expected-code-point-utf8 $check-screen-row-in-color-from:compare-cells: { - # if expected-grapheme is space, null grapheme is also ok + # if expected-code-point-utf8 is space, null code-point-utf8 is also ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= compare g, 0 break-if-= $check-screen-row-in-color-from:compare-cells } - # if expected-grapheme is space, a different color is ok + # if expected-code-point-utf8 is space, a different color is ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= var color/eax: int <- screen-color-at-idx screen, idx compare color, fg break-if-!= $check-screen-row-in-color-from:compare-cells } - # compare graphemes - $check-screen-row-in-color-from:compare-graphemes: { - # if (g == expected-grapheme) print "." - compare g, expected-grapheme + # compare code-point-utf8s + $check-screen-row-in-color-from:compare-code-point-utf8s: { + # if (g == expected-code-point-utf8) print "." + compare g, expected-code-point-utf8 { break-if-!= print-string-to-real-screen "." - break $check-screen-row-in-color-from:compare-graphemes + break $check-screen-row-in-color-from:compare-code-point-utf8s } # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " print-int32-hex-to-real-screen col-idx print-string-to-real-screen ") but observed '" - print-grapheme-to-real-screen g + print-code-point-utf8-to-real-screen g print-string-to-real-screen "'\n" } $check-screen-row-in-color-from:compare-colors: { @@ -745,7 +745,7 @@ fn check-screen-row-in-color-from screen-on-stack: (addr screen), fg: int, row-i # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " @@ -772,7 +772,7 @@ fn check-screen-row-in-background-color screen: (addr screen), bg: int, row-idx: fn check-screen-row-in-background-color-from screen-on-stack: (addr screen), bg: int, row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) { var screen/esi: (addr screen) <- copy screen-on-stack var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx - # compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme + # compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8 var e: (stream byte 0x100) var e-addr/edx: (addr stream byte) <- address e write e-addr, expected @@ -780,45 +780,45 @@ fn check-screen-row-in-background-color-from screen-on-stack: (addr screen), bg: var done?/eax: boolean <- stream-empty? e-addr compare done?, 0 break-if-!= - var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx - var g/ebx: grapheme <- copy _g - var _expected-grapheme/eax: grapheme <- read-grapheme e-addr - var expected-grapheme/edx: grapheme <- copy _expected-grapheme + var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx + var g/ebx: code-point-utf8 <- copy _g + var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr + var expected-code-point-utf8/edx: code-point-utf8 <- copy _expected-code-point-utf8 $check-screen-row-in-background-color-from:compare-cells: { - # if expected-grapheme is space, null grapheme is also ok + # if expected-code-point-utf8 is space, null code-point-utf8 is also ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= compare g, 0 break-if-= $check-screen-row-in-background-color-from:compare-cells } - # if expected-grapheme is space, a different color is ok + # if expected-code-point-utf8 is space, a different color is ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= var color/eax: int <- screen-background-color-at-idx screen, idx compare color, bg break-if-!= $check-screen-row-in-background-color-from:compare-cells } - # compare graphemes - $check-screen-row-in-background-color-from:compare-graphemes: { - # if (g == expected-grapheme) print "." - compare g, expected-grapheme + # compare code-point-utf8s + $check-screen-row-in-background-color-from:compare-code-point-utf8s: { + # if (g == expected-code-point-utf8) print "." + compare g, expected-code-point-utf8 { break-if-!= print-string-to-real-screen "." - break $check-screen-row-in-background-color-from:compare-graphemes + break $check-screen-row-in-background-color-from:compare-code-point-utf8s } # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " print-int32-hex-to-real-screen col-idx print-string-to-real-screen ") but observed '" - print-grapheme-to-real-screen g + print-code-point-utf8-to-real-screen g print-string-to-real-screen "'\n" } $check-screen-row-in-background-color-from:compare-colors: { @@ -832,7 +832,7 @@ fn check-screen-row-in-background-color-from screen-on-stack: (addr screen), bg: # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " @@ -857,7 +857,7 @@ fn check-screen-row-in-bold screen: (addr screen), row-idx: int, expected: (addr fn check-screen-row-in-bold-from screen-on-stack: (addr screen), row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) { var screen/esi: (addr screen) <- copy screen-on-stack var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx - # compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme + # compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8 var e: (stream byte 0x100) var e-addr/edx: (addr stream byte) <- address e write e-addr, expected @@ -865,45 +865,45 @@ fn check-screen-row-in-bold-from screen-on-stack: (addr screen), row-idx: int, c var done?/eax: boolean <- stream-empty? e-addr compare done?, 0 break-if-!= - var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx - var g/ebx: grapheme <- copy _g - var _expected-grapheme/eax: grapheme <- read-grapheme e-addr - var expected-grapheme/edx: grapheme <- copy _expected-grapheme + var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx + var g/ebx: code-point-utf8 <- copy _g + var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr + var expected-code-point-utf8/edx: code-point-utf8 <- copy _expected-code-point-utf8 $check-screen-row-in-bold-from:compare-cells: { - # if expected-grapheme is space, null grapheme is also ok + # if expected-code-point-utf8 is space, null code-point-utf8 is also ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= compare g, 0 break-if-= $check-screen-row-in-bold-from:compare-cells } - # if expected-grapheme is space, non-bold is ok + # if expected-code-point-utf8 is space, non-bold is ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= var bold?/eax: boolean <- screen-bold-at-idx? screen, idx compare bold?, 1 break-if-!= $check-screen-row-in-bold-from:compare-cells } - # compare graphemes - $check-screen-row-in-bold-from:compare-graphemes: { - # if (g == expected-grapheme) print "." - compare g, expected-grapheme + # compare code-point-utf8s + $check-screen-row-in-bold-from:compare-code-point-utf8s: { + # if (g == expected-code-point-utf8) print "." + compare g, expected-code-point-utf8 { break-if-!= print-string-to-real-screen "." - break $check-screen-row-in-bold-from:compare-graphemes + break $check-screen-row-in-bold-from:compare-code-point-utf8s } # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " print-int32-hex-to-real-screen col-idx print-string-to-real-screen ") but observed '" - print-grapheme-to-real-screen g + print-code-point-utf8-to-real-screen g print-string-to-real-screen "'\n" } $check-screen-row-in-bold-from:compare-bold: { @@ -917,7 +917,7 @@ fn check-screen-row-in-bold-from screen-on-stack: (addr screen), row-idx: int, c # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " @@ -938,7 +938,7 @@ fn check-screen-row-in-underline screen: (addr screen), row-idx: int, expected: fn check-screen-row-in-underline-from screen-on-stack: (addr screen), row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) { var screen/esi: (addr screen) <- copy screen-on-stack var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx - # compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme + # compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8 var e: (stream byte 0x100) var e-addr/edx: (addr stream byte) <- address e write e-addr, expected @@ -946,45 +946,45 @@ fn check-screen-row-in-underline-from screen-on-stack: (addr screen), row-idx: i var done?/eax: boolean <- stream-empty? e-addr compare done?, 0 break-if-!= - var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx - var g/ebx: grapheme <- copy _g - var _expected-grapheme/eax: grapheme <- read-grapheme e-addr - var expected-grapheme/edx: grapheme <- copy _expected-grapheme + var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx + var g/ebx: code-point-utf8 <- copy _g + var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr + var expected-code-point-utf8/edx: code-point-utf8 <- copy _expected-code-point-utf8 $check-screen-row-in-underline-from:compare-cells: { - # if expected-grapheme is space, null grapheme is also ok + # if expected-code-point-utf8 is space, null code-point-utf8 is also ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= compare g, 0 break-if-= $check-screen-row-in-underline-from:compare-cells } - # if expected-grapheme is space, non-underline is ok + # if expected-code-point-utf8 is space, non-underline is ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= var underline?/eax: boolean <- screen-underline-at-idx? screen, idx compare underline?, 1 break-if-!= $check-screen-row-in-underline-from:compare-cells } - # compare graphemes - $check-screen-row-in-underline-from:compare-graphemes: { - # if (g == expected-grapheme) print "." - compare g, expected-grapheme + # compare code-point-utf8s + $check-screen-row-in-underline-from:compare-code-point-utf8s: { + # if (g == expected-code-point-utf8) print "." + compare g, expected-code-point-utf8 { break-if-!= print-string-to-real-screen "." - break $check-screen-row-in-underline-from:compare-graphemes + break $check-screen-row-in-underline-from:compare-code-point-utf8s } # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " print-int32-hex-to-real-screen col-idx print-string-to-real-screen ") but observed '" - print-grapheme-to-real-screen g + print-code-point-utf8-to-real-screen g print-string-to-real-screen "'\n" } $check-screen-row-in-underline-from:compare-underline: { @@ -998,7 +998,7 @@ fn check-screen-row-in-underline-from screen-on-stack: (addr screen), row-idx: i # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " @@ -1019,7 +1019,7 @@ fn check-screen-row-in-reverse screen: (addr screen), row-idx: int, expected: (a fn check-screen-row-in-reverse-from screen-on-stack: (addr screen), row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) { var screen/esi: (addr screen) <- copy screen-on-stack var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx - # compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme + # compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8 var e: (stream byte 0x100) var e-addr/edx: (addr stream byte) <- address e write e-addr, expected @@ -1027,45 +1027,45 @@ fn check-screen-row-in-reverse-from screen-on-stack: (addr screen), row-idx: int var done?/eax: boolean <- stream-empty? e-addr compare done?, 0 break-if-!= - var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx - var g/ebx: grapheme <- copy _g - var _expected-grapheme/eax: grapheme <- read-grapheme e-addr - var expected-grapheme/edx: grapheme <- copy _expected-grapheme + var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx + var g/ebx: code-point-utf8 <- copy _g + var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr + var expected-code-point-utf8/edx: code-point-utf8 <- copy _expected-code-point-utf8 $check-screen-row-in-reverse-from:compare-cells: { - # if expected-grapheme is space, null grapheme is also ok + # if expected-code-point-utf8 is space, null code-point-utf8 is also ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= compare g, 0 break-if-= $check-screen-row-in-reverse-from:compare-cells } - # if expected-grapheme is space, non-reverse is ok + # if expected-code-point-utf8 is space, non-reverse is ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= var reverse?/eax: boolean <- screen-reverse-at-idx? screen, idx compare reverse?, 1 break-if-!= $check-screen-row-in-reverse-from:compare-cells } - # compare graphemes - $check-screen-row-in-reverse-from:compare-graphemes: { - # if (g == expected-grapheme) print "." - compare g, expected-grapheme + # compare code-point-utf8s + $check-screen-row-in-reverse-from:compare-code-point-utf8s: { + # if (g == expected-code-point-utf8) print "." + compare g, expected-code-point-utf8 { break-if-!= print-string-to-real-screen "." - break $check-screen-row-in-reverse-from:compare-graphemes + break $check-screen-row-in-reverse-from:compare-code-point-utf8s } # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " print-int32-hex-to-real-screen col-idx print-string-to-real-screen ") but observed '" - print-grapheme-to-real-screen g + print-code-point-utf8-to-real-screen g print-string-to-real-screen "'\n" } $check-screen-row-in-reverse-from:compare-reverse: { @@ -1079,7 +1079,7 @@ fn check-screen-row-in-reverse-from screen-on-stack: (addr screen), row-idx: int # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " @@ -1100,7 +1100,7 @@ fn check-screen-row-in-blinking screen: (addr screen), row-idx: int, expected: ( fn check-screen-row-in-blinking-from screen-on-stack: (addr screen), row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) { var screen/esi: (addr screen) <- copy screen-on-stack var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx - # compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme + # compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8 var e: (stream byte 0x100) var e-addr/edx: (addr stream byte) <- address e write e-addr, expected @@ -1108,45 +1108,45 @@ fn check-screen-row-in-blinking-from screen-on-stack: (addr screen), row-idx: in var done?/eax: boolean <- stream-empty? e-addr compare done?, 0 break-if-!= - var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx - var g/ebx: grapheme <- copy _g - var _expected-grapheme/eax: grapheme <- read-grapheme e-addr - var expected-grapheme/edx: grapheme <- copy _expected-grapheme + var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx + var g/ebx: code-point-utf8 <- copy _g + var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr + var expected-code-point-utf8/edx: code-point-utf8 <- copy _expected-code-point-utf8 $check-screen-row-in-blinking-from:compare-cells: { - # if expected-grapheme is space, null grapheme is also ok + # if expected-code-point-utf8 is space, null code-point-utf8 is also ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= compare g, 0 break-if-= $check-screen-row-in-blinking-from:compare-cells } - # if expected-grapheme is space, non-blinking is ok + # if expected-code-point-utf8 is space, non-blinking is ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= var blinking?/eax: boolean <- screen-blink-at-idx? screen, idx compare blinking?, 1 break-if-!= $check-screen-row-in-blinking-from:compare-cells } - # compare graphemes - $check-screen-row-in-blinking-from:compare-graphemes: { - # if (g == expected-grapheme) print "." - compare g, expected-grapheme + # compare code-point-utf8s + $check-screen-row-in-blinking-from:compare-code-point-utf8s: { + # if (g == expected-code-point-utf8) print "." + compare g, expected-code-point-utf8 { break-if-!= print-string-to-real-screen "." - break $check-screen-row-in-blinking-from:compare-graphemes + break $check-screen-row-in-blinking-from:compare-code-point-utf8s } # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " print-int32-hex-to-real-screen col-idx print-string-to-real-screen ") but observed '" - print-grapheme-to-real-screen g + print-code-point-utf8-to-real-screen g print-string-to-real-screen "'\n" } $check-screen-row-in-blinking-from:compare-blinking: { @@ -1160,7 +1160,7 @@ fn check-screen-row-in-blinking-from screen-on-stack: (addr screen), row-idx: in # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " @@ -1175,21 +1175,21 @@ fn check-screen-row-in-blinking-from screen-on-stack: (addr screen), row-idx: in } } -fn test-print-single-grapheme { +fn test-print-single-code-point-utf8 { var screen-on-stack: screen var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c - check-screen-row screen, 1/row, "a", "F - test-print-single-grapheme" # top-left corner of the screen + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c + check-screen-row screen, 1/row, "a", "F - test-print-single-code-point-utf8" # top-left corner of the screen } -fn test-print-multiple-graphemes { +fn test-print-multiple-code-point-utf8s { var screen-on-stack: screen var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols print-string screen, "Hello, 世界" - check-screen-row screen, 1/row, "Hello, 世界", "F - test-print-multiple-graphemes" + check-screen-row screen, 1/row, "Hello, 世界", "F - test-print-multiple-code-point-utf8s" } fn test-move-cursor { @@ -1197,8 +1197,8 @@ fn test-move-cursor { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols move-cursor screen, 1, 4 - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c check-screen-row screen, 1/row, " a", "F - test-move-cursor" # top row } @@ -1207,8 +1207,8 @@ fn test-move-cursor-zeroes { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols move-cursor screen, 0, 0 - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c check-screen-row screen, 1/row, "a", "F - test-move-cursor-zeroes" # top-left corner of the screen } @@ -1217,8 +1217,8 @@ fn test-move-cursor-zero-row { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols move-cursor screen, 0, 2 - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c check-screen-row screen, 1/row, " a", "F - test-move-cursor-zero-row" # top row } @@ -1227,8 +1227,8 @@ fn test-move-cursor-zero-column { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols move-cursor screen, 4, 0 - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c check-screen-row screen, 4/row, "a", "F - test-move-cursor-zero-column" } @@ -1237,8 +1237,8 @@ fn test-move-cursor-negative-row { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5, 3 move-cursor screen, -1/row, 2/col - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c # no move check-screen-row screen, 1/row, "a", "F - test-move-cursor-negative-row" } @@ -1248,8 +1248,8 @@ fn test-move-cursor-negative-column { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5, 3 move-cursor screen, 2/row, -1/col - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c # no move check-screen-row screen, 1/row, "a", "F - test-move-cursor-negative-column" } @@ -1259,8 +1259,8 @@ fn test-move-cursor-column-too-large { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 3/cols move-cursor screen, 1/row, 4/col - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c # top row is empty check-screen-row screen, 1/row, " ", "F - test-move-cursor-column-too-large" # character shows up on next row @@ -1272,8 +1272,8 @@ fn test-move-cursor-column-too-large-saturates { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 3/cols move-cursor screen, 1/row, 6/col - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c # top row is empty check-screen-row screen, 1/row, " ", "F - test-move-cursor-column-too-large-saturates" # top-left corner of the screen # character shows up at the start of next row @@ -1285,8 +1285,8 @@ fn test-move-cursor-row-too-large { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 3/cols move-cursor screen, 6/row, 2/col - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c # bottom row shows the character check-screen-row screen, 5/row, " a", "F - test-move-cursor-row-too-large" } @@ -1296,8 +1296,8 @@ fn test-move-cursor-row-too-large-saturates { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 3/cols move-cursor screen, 9/row, 2/col - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c # bottom row shows the character check-screen-row screen, 5/row, " a", "F - test-move-cursor-row-too-large-saturates" } @@ -1307,8 +1307,8 @@ fn test-check-screen-row-from { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols move-cursor screen, 1, 4 - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c check-screen-row screen, 1/row, " a", "F - test-check-screen-row-from/baseline" check-screen-row-from screen, 1/row, 4/col, "a", "F - test-check-screen-row-from" } @@ -1328,8 +1328,8 @@ fn test-check-screen-scrolls-on-overflow { initialize-screen screen, 5/rows, 4/cols # single character starting at bottom right move-cursor screen, 5/rows, 4/cols - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c check-screen-row-from screen, 5/row, 4/col, "a", "F - test-check-screen-scrolls-on-overflow/baseline" # bottom-right corner of the screen # multiple characters starting at bottom right move-cursor screen, 5, 4 @@ -1348,14 +1348,14 @@ fn test-check-screen-color { var screen-on-stack: screen var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c start-color screen, 1/fg, 0/bg c <- copy 0x62/b - print-grapheme screen, c + print-code-point-utf8 screen, c start-color screen, 0/fg, 7/bg c <- copy 0x63/c - print-grapheme screen, c + print-code-point-utf8 screen, c check-screen-row-in-color screen, 0/fg, 1/row, "a c", "F - test-check-screen-color" } @@ -1363,14 +1363,14 @@ fn test-check-screen-background-color { var screen-on-stack: screen var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c start-color screen, 0/fg, 1/bg c <- copy 0x62/b - print-grapheme screen, c + print-code-point-utf8 screen, c start-color screen, 0/fg, 7/bg c <- copy 0x63/c - print-grapheme screen, c + print-code-point-utf8 screen, c check-screen-row-in-background-color screen, 7/bg, 1/row, "a c", "F - test-check-screen-background-color" } @@ -1379,14 +1379,14 @@ fn test-check-screen-bold { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols start-bold screen - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c reset-formatting screen c <- copy 0x62/b - print-grapheme screen, c + print-code-point-utf8 screen, c start-bold screen c <- copy 0x63/c - print-grapheme screen, c + print-code-point-utf8 screen, c check-screen-row-in-bold screen, 1/row, "a c", "F - test-check-screen-bold" } @@ -1395,14 +1395,14 @@ fn test-check-screen-underline { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols start-underline screen - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c reset-formatting screen c <- copy 0x62/b - print-grapheme screen, c + print-code-point-utf8 screen, c start-underline screen c <- copy 0x63/c - print-grapheme screen, c + print-code-point-utf8 screen, c check-screen-row-in-underline screen, 1/row, "a c", "F - test-check-screen-underline" } @@ -1411,14 +1411,14 @@ fn test-check-screen-reverse { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols start-reverse-video screen - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c reset-formatting screen c <- copy 0x62/b - print-grapheme screen, c + print-code-point-utf8 screen, c start-reverse-video screen c <- copy 0x63/c - print-grapheme screen, c + print-code-point-utf8 screen, c check-screen-row-in-reverse screen, 1/row, "a c", "F - test-check-screen-reverse" } @@ -1427,14 +1427,14 @@ fn test-check-screen-blinking { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols start-blinking screen - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c reset-formatting screen c <- copy 0x62/b - print-grapheme screen, c + print-code-point-utf8 screen, c start-blinking screen c <- copy 0x63/c - print-grapheme screen, c + print-code-point-utf8 screen, c check-screen-row-in-blinking screen, 1/row, "a c", "F - test-check-screen-blinking" } diff --git a/linux/407right-justify.mu b/linux/407right-justify.mu index b7322ae5..aa767782 100644 --- a/linux/407right-justify.mu +++ b/linux/407right-justify.mu @@ -6,7 +6,7 @@ fn print-int32-decimal-right-justified screen: (addr screen), n: int, _width: in { compare n-width, width break-if->= - print-grapheme screen, 0x20/space + print-code-point-utf8 screen, 0x20/space width <- decrement loop } diff --git a/linux/411string.mu b/linux/411string.mu index cf0471ac..493c9b56 100644 --- a/linux/411string.mu +++ b/linux/411string.mu @@ -1,4 +1,4 @@ -# read up to 'len' graphemes after skipping the first 'start' ones +# read up to 'len' code-point-utf8s after skipping the first 'start' ones fn substring in: (addr array byte), start: int, len: int, out-ah: (addr handle array byte) { var in-stream: (stream byte 0x100) var in-stream-addr/esi: (addr stream byte) <- address in-stream @@ -6,29 +6,29 @@ fn substring in: (addr array byte), start: int, len: int, out-ah: (addr handle a var out-stream: (stream byte 0x100) var out-stream-addr/edi: (addr stream byte) <- address out-stream $substring:core: { - # skip 'start' graphemes + # skip 'start' code-point-utf8s var i/eax: int <- copy 0 { compare i, start break-if->= { - var dummy/eax: grapheme <- read-grapheme in-stream-addr + var dummy/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr compare dummy, 0xffffffff/end-of-file break-if-= $substring:core } i <- increment loop } - # copy 'len' graphemes + # copy 'len' code-point-utf8s i <- copy 0 { compare i, len break-if->= { - var g/eax: grapheme <- read-grapheme in-stream-addr + var g/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr compare g, 0xffffffff/end-of-file break-if-= $substring:core - write-grapheme out-stream-addr, g + write-code-point-utf8 out-stream-addr, g } i <- increment loop @@ -85,7 +85,7 @@ fn test-substring { check-strings-equal out, "bcde", "F - test-substring/middle-too-small" } -fn split-string in: (addr array byte), delim: grapheme, out: (addr handle array (handle array byte)) { +fn split-string in: (addr array byte), delim: code-point-utf8, out: (addr handle array (handle array byte)) { var in-stream: (stream byte 0x100) var in-stream-addr/esi: (addr stream byte) <- address in-stream write in-stream-addr, in @@ -94,10 +94,10 @@ fn split-string in: (addr array byte), delim: grapheme, out: (addr handle array var curr-stream: (stream byte 0x100) var curr-stream-addr/ecx: (addr stream byte) <- address curr-stream $split-string:core: { - var g/eax: grapheme <- read-grapheme in-stream-addr + var g/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr compare g, 0xffffffff break-if-= -#? print-grapheme-to-real-screen g +#? print-code-point-utf8-to-real-screen g #? print-string-to-real-screen "\n" compare g, delim { @@ -110,7 +110,7 @@ fn split-string in: (addr array byte), delim: grapheme, out: (addr handle array clear-stream curr-stream-addr loop $split-string:core } - write-grapheme curr-stream-addr, g + write-code-point-utf8 curr-stream-addr, g loop } stream-to-array tokens-stream-addr, out diff --git a/linux/apps/arith.mu b/linux/apps/arith.mu index 4393a34c..08b2008d 100644 --- a/linux/apps/arith.mu +++ b/linux/apps/arith.mu @@ -33,7 +33,7 @@ fn main -> _/ebx: int { enable-keyboard-immediate-mode - var look/esi: grapheme <- copy 0 # lookahead + var look/esi: code-point-utf8 <- copy 0 # lookahead var n/eax: int <- copy 0 # result of each expression print-string 0/screen, "press ctrl-c or ctrl-d to exit\n" # read-eval-print loop @@ -55,17 +55,17 @@ fn main -> _/ebx: int { return 0 } -fn simplify -> _/eax: int, _/esi: grapheme { +fn simplify -> _/eax: int, _/esi: code-point-utf8 { # prime the pump - var look/esi: grapheme <- get-char + var look/esi: code-point-utf8 <- get-char # do it var result/eax: int <- copy 0 result, look <- expression look return result, look } -fn expression _look: grapheme -> _/eax: int, _/esi: grapheme { - var look/esi: grapheme <- copy _look +fn expression _look: code-point-utf8 -> _/eax: int, _/esi: code-point-utf8 { + var look/esi: code-point-utf8 <- copy _look # read arg var result/eax: int <- copy 0 result, look <- term look @@ -78,7 +78,7 @@ fn expression _look: grapheme -> _/eax: int, _/esi: grapheme { break-if-= $expression:loop } # read operator - var op/ecx: grapheme <- copy 0 + var op/ecx: code-point-utf8 <- copy 0 op, look <- operator look # read next arg var second/edx: int <- copy 0 @@ -109,8 +109,8 @@ fn expression _look: grapheme -> _/eax: int, _/esi: grapheme { return result, look } -fn term _look: grapheme -> _/eax: int, _/esi: grapheme { - var look/esi: grapheme <- copy _look +fn term _look: code-point-utf8 -> _/eax: int, _/esi: code-point-utf8 { + var look/esi: code-point-utf8 <- copy _look # read arg look <- skip-spaces look var result/eax: int <- copy 0 @@ -124,7 +124,7 @@ fn term _look: grapheme -> _/eax: int, _/esi: grapheme { break-if-= $term:loop } # read operator - var op/ecx: grapheme <- copy 0 + var op/ecx: code-point-utf8 <- copy 0 op, look <- operator look # read next arg var second/edx: int <- copy 0 @@ -154,8 +154,8 @@ fn term _look: grapheme -> _/eax: int, _/esi: grapheme { return result, look } -fn factor _look: grapheme -> _/eax: int, _/esi: grapheme { - var look/esi: grapheme <- copy _look # should be a no-op +fn factor _look: code-point-utf8 -> _/eax: int, _/esi: code-point-utf8 { + var look/esi: code-point-utf8 <- copy _look # should be a no-op look <- skip-spaces look # if next char is not '(', parse a number compare look, 0x28/open-paren @@ -174,7 +174,7 @@ fn factor _look: grapheme -> _/eax: int, _/esi: grapheme { return result, look } -fn mul-or-div? c: grapheme -> _/eax: boolean { +fn mul-or-div? c: code-point-utf8 -> _/eax: boolean { compare c, 0x2a/* { break-if-!= @@ -188,7 +188,7 @@ fn mul-or-div? c: grapheme -> _/eax: boolean { return 0/false } -fn add-or-sub? c: grapheme -> _/eax: boolean { +fn add-or-sub? c: code-point-utf8 -> _/eax: boolean { compare c, 0x2b/+ { break-if-!= @@ -202,14 +202,14 @@ fn add-or-sub? c: grapheme -> _/eax: boolean { return 0/false } -fn operator _look: grapheme -> _/ecx: grapheme, _/esi: grapheme { - var op/ecx: grapheme <- copy _look - var look/esi: grapheme <- get-char +fn operator _look: code-point-utf8 -> _/ecx: code-point-utf8, _/esi: code-point-utf8 { + var op/ecx: code-point-utf8 <- copy _look + var look/esi: code-point-utf8 <- get-char return op, look } -fn num _look: grapheme -> _/eax: int, _/esi: grapheme { - var look/esi: grapheme <- copy _look +fn num _look: code-point-utf8 -> _/eax: int, _/esi: code-point-utf8 { + var look/esi: code-point-utf8 <- copy _look var result/edi: int <- copy 0 { var first-digit/eax: int <- to-decimal-digit look @@ -234,8 +234,8 @@ fn num _look: grapheme -> _/eax: int, _/esi: grapheme { return result, look } -fn skip-spaces _look: grapheme -> _/esi: grapheme { - var look/esi: grapheme <- copy _look # should be a no-op +fn skip-spaces _look: code-point-utf8 -> _/esi: code-point-utf8 { + var look/esi: code-point-utf8 <- copy _look # should be a no-op { compare look, 0x20 break-if-!= @@ -245,9 +245,9 @@ fn skip-spaces _look: grapheme -> _/esi: grapheme { return look } -fn get-char -> _/esi: grapheme { - var look/eax: grapheme <- read-key-from-real-keyboard - print-grapheme-to-real-screen look +fn get-char -> _/esi: code-point-utf8 { + var look/eax: code-point-utf8 <- read-key-from-real-keyboard + print-code-point-utf8-to-real-screen look compare look, 4 { break-if-!= diff --git a/linux/apps/parse-int.mu b/linux/apps/parse-int.mu index 0f8c71d1..ccff8d44 100644 --- a/linux/apps/parse-int.mu +++ b/linux/apps/parse-int.mu @@ -37,7 +37,7 @@ fn parse-int _in: (addr array byte) -> _/eax: int { var tmp/ebx: (addr byte) <- index in, i var c/eax: byte <- copy-byte *tmp # - var g/eax: grapheme <- copy c + var g/eax: code-point-utf8 <- copy c var digit/eax: int <- to-decimal-digit g result <- add digit i <- increment diff --git a/linux/apps/print-file.mu b/linux/apps/print-file.mu index 75ce2e39..284b805e 100644 --- a/linux/apps/print-file.mu +++ b/linux/apps/print-file.mu @@ -30,8 +30,8 @@ fn main _args: (addr array addr array byte) -> _/ebx: int { var c/eax: byte <- read-byte-buffered in-addr compare c, 0xffffffff/end-of-file break-if-= - var g/eax: grapheme <- copy c - print-grapheme 0/screen, g + var g/eax: code-point-utf8 <- copy c + print-code-point-utf8 0/screen, g loop } } diff --git a/linux/apps/tui.mu b/linux/apps/tui.mu index 4e58b986..f4fc914c 100644 --- a/linux/apps/tui.mu +++ b/linux/apps/tui.mu @@ -23,7 +23,7 @@ fn main -> _/ebx: int { print-string 0/screen, "press a key to see its code: " enable-keyboard-immediate-mode - var x/eax: grapheme <- read-key-from-real-keyboard + var x/eax: code-point-utf8 <- read-key-from-real-keyboard enable-keyboard-type-mode enable-screen-type-mode print-string 0/screen, "You pressed " diff --git a/linux/browse/main.mu b/linux/browse/main.mu index 5b4f2e06..27504afe 100644 --- a/linux/browse/main.mu +++ b/linux/browse/main.mu @@ -49,7 +49,7 @@ fn interactive fs: (addr buffered-file) { # { render paginated-screen, fs - var key/eax: grapheme <- read-key-from-real-keyboard + var key/eax: code-point-utf8 <- read-key-from-real-keyboard compare key, 0x71/'q' loop-if-!= } @@ -160,13 +160,13 @@ fn test-render-asterisk-in-text { fn render-normal screen: (addr paginated-screen), fs: (addr buffered-file) { var newline-seen?/esi: boolean <- copy 0/false var start-of-paragraph?/edi: boolean <- copy 1/true - var previous-grapheme/ebx: grapheme <- copy 0 + var previous-code-point-utf8/ebx: code-point-utf8 <- copy 0 $render-normal:loop: { # if done-drawing?(screen) break var done?/eax: boolean <- done-drawing? screen compare done?, 0/false break-if-!= - var c/eax: grapheme <- read-grapheme-buffered fs + var c/eax: code-point-utf8 <- read-code-point-utf8-buffered fs $render-normal:loop-body: { # if (c == EOF) break compare c, 0xffffffff/end-of-file @@ -186,8 +186,8 @@ $render-normal:loop-body: { # otherwise render two newlines { break-if-= - add-grapheme screen, 0xa/newline - add-grapheme screen, 0xa/newline + add-code-point-utf8 screen, 0xa/newline + add-code-point-utf8 screen, 0xa/newline newline-seen? <- copy 0/false start-of-paragraph? <- copy 1/true break $render-normal:loop-body @@ -221,20 +221,20 @@ $render-normal:flush-buffered-newline: { { compare c, 0x20 break-if-!= - add-grapheme screen, 0xa/newline + add-code-point-utf8 screen, 0xa/newline break $render-normal:flush-buffered-newline } - add-grapheme screen, 0x20/space + add-code-point-utf8 screen, 0x20/space # fall through to print c } ## end soft newline support $render-normal:whitespace-separated-regions: { - # if previous-grapheme wasn't whitespace, skip this block + # if previous-code-point-utf8 wasn't whitespace, skip this block { - compare previous-grapheme, 0x20/space + compare previous-code-point-utf8, 0x20/space break-if-= - compare previous-grapheme, 0xa/newline + compare previous-code-point-utf8, 0xa/newline break-if-= break $render-normal:whitespace-separated-regions } @@ -260,9 +260,9 @@ $render-normal:whitespace-separated-regions: { } } # - add-grapheme screen, c + add-code-point-utf8 screen, c } # $render-normal:loop-body - previous-grapheme <- copy c + previous-code-point-utf8 <- copy c loop } # $render-normal:loop } @@ -271,7 +271,7 @@ fn render-header-line screen: (addr paginated-screen), fs: (addr buffered-file) $render-header-line:body: { # compute color based on number of '#'s var header-level/esi: int <- copy 1 # caller already grabbed one - var c/eax: grapheme <- copy 0 + var c/eax: code-point-utf8 <- copy 0 { # if done-drawing?(screen) return { @@ -280,7 +280,7 @@ $render-header-line:body: { break-if-!= $render-header-line:body } # - c <- read-grapheme-buffered fs + c <- read-code-point-utf8-buffered fs # if (c != '#') break compare c, 0x23/'#' break-if-!= @@ -298,7 +298,7 @@ $render-header-line:body: { break-if-!= } # - c <- read-grapheme-buffered fs + c <- read-code-point-utf8-buffered fs # if (c == EOF) break compare c, 0xffffffff/end-of-file break-if-= @@ -306,7 +306,7 @@ $render-header-line:body: { compare c, 0xa/newline break-if-= # - add-grapheme screen, c + add-code-point-utf8 screen, c # loop } @@ -353,7 +353,7 @@ fn render-until-asterisk screen: (addr paginated-screen), fs: (addr buffered-fil compare done?, 0/false break-if-!= # - var c/eax: grapheme <- read-grapheme-buffered fs + var c/eax: code-point-utf8 <- read-code-point-utf8-buffered fs # if (c == EOF) break compare c, 0xffffffff/end-of-file break-if-= @@ -361,7 +361,7 @@ fn render-until-asterisk screen: (addr paginated-screen), fs: (addr buffered-fil compare c, 0x2a/'*' break-if-= # - add-grapheme screen, c + add-code-point-utf8 screen, c # loop } @@ -374,7 +374,7 @@ fn render-until-underscore screen: (addr paginated-screen), fs: (addr buffered-f compare done?, 0/false break-if-!= # - var c/eax: grapheme <- read-grapheme-buffered fs + var c/eax: code-point-utf8 <- read-code-point-utf8-buffered fs # if (c == EOF) break compare c, 0xffffffff/end-of-file break-if-= @@ -382,7 +382,7 @@ fn render-until-underscore screen: (addr paginated-screen), fs: (addr buffered-f compare c, 0x5f/'_' break-if-= # - add-grapheme screen, c + add-code-point-utf8 screen, c # loop } diff --git a/linux/browse/paginated-screen.mu b/linux/browse/paginated-screen.mu index f4579d95..05f954a7 100644 --- a/linux/browse/paginated-screen.mu +++ b/linux/browse/paginated-screen.mu @@ -7,7 +7,7 @@ # on each frame # start-drawing # while !done-drawing -# add-grapheme ... +# add-code-point-utf8 ... type paginated-screen { screen: (handle screen) @@ -152,23 +152,23 @@ fn done-drawing? _self: (addr paginated-screen) -> _/eax: boolean { return 1/true } -fn add-grapheme _self: (addr paginated-screen), c: grapheme { -#? print-string-to-real-screen "add-grapheme: " -#? print-grapheme-to-real-screen c +fn add-code-point-utf8 _self: (addr paginated-screen), c: code-point-utf8 { +#? print-string-to-real-screen "add-code-point-utf8: " +#? print-code-point-utf8-to-real-screen c #? print-string-to-real-screen "\n" -$add-grapheme:body: { +$add-code-point-utf8:body: { var self/esi: (addr paginated-screen) <- copy _self { compare c, 0xa/newline break-if-!= next-line self reposition-cursor self - break $add-grapheme:body + break $add-code-point-utf8:body } # print c var screen-ah/eax: (addr handle screen) <- get self, screen var screen-addr/eax: (addr screen) <- lookup *screen-ah - print-grapheme screen-addr, c + print-code-point-utf8 screen-addr, c # self->col++ var tmp/eax: (addr int) <- get self, col increment *tmp @@ -186,21 +186,21 @@ $add-grapheme:body: { ## tests -fn test-print-grapheme-on-paginated-screen { +fn test-print-code-point-utf8-on-paginated-screen { var pg-on-stack: paginated-screen var pg/eax: (addr paginated-screen) <- address pg-on-stack initialize-fake-paginated-screen pg, 3/rows, 0xa/cols, 0xa/page-width, 0, 0 start-drawing pg { - var c/ecx: grapheme <- copy 0x61/a - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x61/a + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 0, "F - test-print-grapheme-on-paginated-screen/done" + check-ints-equal done, 0, "F - test-print-code-point-utf8-on-paginated-screen/done" } var screen-ah/eax: (addr handle screen) <- get pg, screen var screen-addr/eax: (addr screen) <- lookup *screen-ah - check-screen-row screen-addr, 1, "a", "F - test-print-grapheme-on-paginated-screen" + check-screen-row screen-addr, 1, "a", "F - test-print-code-point-utf8-on-paginated-screen" } fn test-print-single-page { @@ -210,29 +210,29 @@ fn test-print-single-page { start-drawing pg # pages at columns [1, 3), [3, 5) { - var c/ecx: grapheme <- copy 0x61/a - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x61/a + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page/done-1" } { - var c/ecx: grapheme <- copy 0x62/b - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x62/b + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page/done-2" } { - var c/ecx: grapheme <- copy 0x63/c - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x63/c + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page/done-3" } { - var c/ecx: grapheme <- copy 0x64/d - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x64/d + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page/done-4" @@ -250,36 +250,36 @@ fn test-print-single-page-narrower-than-page-width { initialize-fake-paginated-screen pg, 2/rows, 4/cols, 5/page-width, 0, 0 start-drawing pg { - var c/ecx: grapheme <- copy 0x61/a - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x61/a + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width/done-1" } { - var c/ecx: grapheme <- copy 0x62/b - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x62/b + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width/done-2" } { - var c/ecx: grapheme <- copy 0x63/c - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x63/c + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width/done-3" } { - var c/ecx: grapheme <- copy 0x64/d - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x64/d + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width/done-4" } { - var c/ecx: grapheme <- copy 0x65/e - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x65/e + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width/done-5" @@ -297,36 +297,36 @@ fn test-print-single-page-narrower-than-page-width-with-margin { initialize-fake-paginated-screen pg, 2/rows, 4/cols, 5/page-width, 0/top-margin, 1/left-margin start-drawing pg { - var c/ecx: grapheme <- copy 0x61/a - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x61/a + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width-with-margin/done-1" } { - var c/ecx: grapheme <- copy 0x62/b - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x62/b + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width-with-margin/done-2" } { - var c/ecx: grapheme <- copy 0x63/c - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x63/c + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width-with-margin/done-3" } { - var c/ecx: grapheme <- copy 0x64/d - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x64/d + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width-with-margin/done-4" } { - var c/ecx: grapheme <- copy 0x65/e - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x65/e + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width-with-margin/done-5" @@ -344,29 +344,29 @@ fn test-print-multiple-pages { initialize-fake-paginated-screen pg, 2/rows, 2/cols, 1/page-width, 0, 0 start-drawing pg { - var c/ecx: grapheme <- copy 0x61/a - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x61/a + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages/done-1" } { - var c/ecx: grapheme <- copy 0x62/b - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x62/b + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages/done-2" } { - var c/ecx: grapheme <- copy 0x63/c - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x63/c + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages/done-3" } { - var c/ecx: grapheme <- copy 0x64/d - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x64/d + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 1, "F - test-print-multiple-pages/done-4" @@ -384,57 +384,57 @@ fn test-print-multiple-pages-2 { initialize-fake-paginated-screen pg, 2/rows, 4/cols, 2/page-width, 0, 0 start-drawing pg { - var c/ecx: grapheme <- copy 0x61/a - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x61/a + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-1" } { - var c/ecx: grapheme <- copy 0x62/b - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x62/b + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-2" } { - var c/ecx: grapheme <- copy 0x63/c - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x63/c + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-3" } { - var c/ecx: grapheme <- copy 0x64/d - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x64/d + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-4" } { - var c/ecx: grapheme <- copy 0x65/e - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x65/e + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-5" } { - var c/ecx: grapheme <- copy 0x66/f - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x66/f + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-6" } { - var c/ecx: grapheme <- copy 0x67/g - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x67/g + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-7" } { - var c/ecx: grapheme <- copy 0x68/h - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x68/h + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 1, "F - test-print-multiple-pages-2/done-8" @@ -452,60 +452,60 @@ fn test-print-multiple-pages-with-margins { initialize-fake-paginated-screen pg, 3/rows, 6/cols, 2/page-width, 1/top-margin, 1/left-margin start-drawing pg { - var c/ecx: grapheme <- copy 0x61/a - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x61/a + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-1" + check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-1" } { - var c/ecx: grapheme <- copy 0x62/b - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x62/b + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-2" + check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-2" } { - var c/ecx: grapheme <- copy 0x63/c - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x63/c + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-3" + check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-3" } { - var c/ecx: grapheme <- copy 0x64/d - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x64/d + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-4" + check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-4" } { - var c/ecx: grapheme <- copy 0x65/e - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x65/e + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-5" + check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-5" } { - var c/ecx: grapheme <- copy 0x66/f - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x66/f + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-6" + check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-6" } { - var c/ecx: grapheme <- copy 0x67/g - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x67/g + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-7" + check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-7" } { - var c/ecx: grapheme <- copy 0x68/h - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x68/h + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 1, "F - test-print-multiple-pages-with-margins/grapheme-8" + check-ints-equal done, 1, "F - test-print-multiple-pages-with-margins/code-point-utf8-8" } var screen-ah/eax: (addr handle screen) <- get pg, screen var screen-addr/eax: (addr screen) <- lookup *screen-ah diff --git a/linux/mu b/linux/mu index fdeacea0..94cb0a6e 100755 Binary files a/linux/mu and b/linux/mu differ diff --git a/linux/mu.subx b/linux/mu.subx index 879b751e..b4c55006 100644 --- a/linux/mu.subx +++ b/linux/mu.subx @@ -416,8 +416,8 @@ Type-id: # (stream (addr array byte)) "stream"/imm32 # 11 "slice"/imm32 # 12 "code-point"/imm32 # 13; smallest scannable unit from a text stream - "grapheme"/imm32 # 14; smallest printable unit; will eventually be composed of multiple code-points, but currently corresponds 1:1 - # only 4-byte graphemes in utf-8 are currently supported; + "code-point-utf8"/imm32 # 14; smallest printable unit; will eventually be composed of multiple code-points, but currently corresponds 1:1 + # only 4-byte code-point-utf8s in utf-8 are currently supported; # unclear how we should deal with larger clusters. "float"/imm32 # 15 # 0x40 @@ -22183,9 +22183,9 @@ $mu-numberlike-output?:check-code-point: (simple-mu-type? %esi 0xd) # code-point => eax 3d/compare-eax-and 0/imm32/false 75/jump-if-!= $mu-numberlike-output?:return-true/disp8 -$mu-numberlike-output?:check-grapheme: - # if t is a grapheme, return - (simple-mu-type? %esi 0xe) # grapheme => eax +$mu-numberlike-output?:check-code-point-utf8: + # if t is a code-point-utf8, return + (simple-mu-type? %esi 0xe) # code-point-utf8 => eax 3d/compare-eax-and 0/imm32/false 75/jump-if-!= $mu-numberlike-output?:return-true/disp8 $mu-numberlike-output?:return-false: diff --git a/linux/tile/box.mu b/linux/tile/box.mu index 859d0b8e..f5155ea8 100644 --- a/linux/tile/box.mu +++ b/linux/tile/box.mu @@ -78,7 +78,7 @@ fn clear-rect screen: (addr screen), row1: int, col1: int, row2: int, col2: int { compare j, col2 break-if-> - print-grapheme screen 0x20/space + print-code-point-utf8 screen 0x20/space j <- increment loop } @@ -98,7 +98,7 @@ fn clear-rect2 screen: (addr screen), row1: int, col1: int, w: int, h: int { { compare j, h break-if->= - print-grapheme screen 0x20/space + print-code-point-utf8 screen 0x20/space j <- increment loop } diff --git a/linux/tile/environment.mu b/linux/tile/environment.mu index 3c869d3c..1512db6d 100644 --- a/linux/tile/environment.mu +++ b/linux/tile/environment.mu @@ -70,7 +70,7 @@ fn initialize-environment-with-fake-screen _self: (addr environment), nrows: int # Iterate ############# -fn process _self: (addr environment), key: grapheme { +fn process _self: (addr environment), key: code-point-utf8 { var self/esi: (addr environment) <- copy _self var fn-name-ah/eax: (addr handle word) <- get self, partial-function-name var fn-name/eax: (addr word) <- lookup *fn-name-ah @@ -102,7 +102,7 @@ fn process _self: (addr environment), key: grapheme { } # collect new name in partial-function-name, and move the cursor to function with that name -fn process-goto-dialog _self: (addr environment), key: grapheme { +fn process-goto-dialog _self: (addr environment), key: code-point-utf8 { var self/esi: (addr environment) <- copy _self var fn-name-ah/edi: (addr handle word) <- get self, partial-function-name # if 'esc' pressed, cancel goto @@ -130,7 +130,7 @@ fn process-goto-dialog _self: (addr environment), key: grapheme { compare key, 0x7f/del # backspace on Macs $process-goto-dialog:backspace: { break-if-!= - # if not at start, delete grapheme before cursor + # if not at start, delete code-point-utf8 before cursor var fn-name/eax: (addr word) <- lookup *fn-name-ah var at-start?/eax: boolean <- cursor-at-start? fn-name compare at-start?, 0/false @@ -142,24 +142,24 @@ fn process-goto-dialog _self: (addr environment), key: grapheme { return } # otherwise insert key within current word - var print?/eax: boolean <- real-grapheme? key - $process-goto-dialog:real-grapheme: { + var print?/eax: boolean <- real-code-point-utf8? key + $process-goto-dialog:real-code-point-utf8: { compare print?, 0/false break-if-= var fn-name/eax: (addr word) <- lookup *fn-name-ah - add-grapheme-to-word fn-name, key + add-code-point-utf8-to-word fn-name, key return } # silently ignore other hotkeys } -fn process-function _self: (addr environment), _function: (addr function), key: grapheme { +fn process-function _self: (addr environment), _function: (addr function), key: code-point-utf8 { var self/esi: (addr environment) <- copy _self var function/edi: (addr function) <- copy _function process-function-edit self, function, key } -fn process-function-edit _self: (addr environment), _function: (addr function), key: grapheme { +fn process-function-edit _self: (addr environment), _function: (addr function), key: code-point-utf8 { var self/esi: (addr environment) <- copy _self var function/edi: (addr function) <- copy _function var cursor-word-ah/ebx: (addr handle word) <- get function, cursor-word @@ -290,7 +290,7 @@ fn process-function-edit _self: (addr environment), _function: (addr function), compare key, 0x7f/del # backspace on Macs $process-function-edit:backspace: { break-if-!= - # if not at start of some word, delete grapheme before cursor within current word + # if not at start of some word, delete code-point-utf8 before cursor within current word var at-start?/eax: boolean <- cursor-at-start? cursor-word compare at-start?, 0/false { @@ -325,25 +325,25 @@ fn process-function-edit _self: (addr environment), _function: (addr function), copy-object new-prev-word-ah, cursor-word-ah return } - # if start of word is quote and grapheme before cursor is not, just insert it as usual + # if start of word is quote and code-point-utf8 before cursor is not, just insert it as usual # TODO: support string escaping { - var first-grapheme/eax: grapheme <- first-grapheme cursor-word - compare first-grapheme, 0x22/double-quote + var first-code-point-utf8/eax: code-point-utf8 <- first-code-point-utf8 cursor-word + compare first-code-point-utf8, 0x22/double-quote break-if-!= - var final-grapheme/eax: grapheme <- grapheme-before-cursor cursor-word - compare final-grapheme, 0x22/double-quote + var final-code-point-utf8/eax: code-point-utf8 <- code-point-utf8-before-cursor cursor-word + compare final-code-point-utf8, 0x22/double-quote break-if-= break $process-function-edit:space } - # if start of word is '[' and grapheme before cursor is not ']', just insert it as usual + # if start of word is '[' and code-point-utf8 before cursor is not ']', just insert it as usual # TODO: support nested arrays { - var first-grapheme/eax: grapheme <- first-grapheme cursor-word - compare first-grapheme, 0x5b/[ + var first-code-point-utf8/eax: code-point-utf8 <- first-code-point-utf8 cursor-word + compare first-code-point-utf8, 0x5b/[ break-if-!= - var final-grapheme/eax: grapheme <- grapheme-before-cursor cursor-word - compare final-grapheme, 0x5d/] + var final-code-point-utf8/eax: code-point-utf8 <- code-point-utf8-before-cursor cursor-word + compare final-code-point-utf8, 0x5d/] break-if-= break $process-function-edit:space } @@ -368,26 +368,26 @@ fn process-function-edit _self: (addr environment), _function: (addr function), var at-end?/eax: boolean <- cursor-at-end? cursor-word compare at-end?, 0/false break-if-!= - var g/eax: grapheme <- pop-after-cursor cursor-word - add-grapheme-to-word next-word, g + var g/eax: code-point-utf8 <- pop-after-cursor cursor-word + add-code-point-utf8-to-word next-word, g loop } cursor-to-start next-word return } # otherwise insert key within current word - var g/edx: grapheme <- copy key - var print?/eax: boolean <- real-grapheme? key - $process-function-edit:real-grapheme: { + var g/edx: code-point-utf8 <- copy key + var print?/eax: boolean <- real-code-point-utf8? key + $process-function-edit:real-code-point-utf8: { compare print?, 0/false break-if-= - add-grapheme-to-word cursor-word, g + add-code-point-utf8-to-word cursor-word, g return } # silently ignore other hotkeys } -fn process-sandbox _self: (addr environment), _sandbox: (addr sandbox), key: grapheme { +fn process-sandbox _self: (addr environment), _sandbox: (addr sandbox), key: code-point-utf8 { var self/esi: (addr environment) <- copy _self var sandbox/edi: (addr sandbox) <- copy _sandbox var rename-word-mode-ah?/ecx: (addr handle word) <- get sandbox, partial-name-for-cursor-word @@ -413,7 +413,7 @@ fn process-sandbox _self: (addr environment), _sandbox: (addr sandbox), key: gra process-sandbox-edit self, sandbox, key } -fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key: grapheme { +fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key: code-point-utf8 { var self/esi: (addr environment) <- copy _self var sandbox/edi: (addr sandbox) <- copy _sandbox var cursor-call-path-ah/eax: (addr handle call-path-element) <- get sandbox, cursor-call-path @@ -730,7 +730,7 @@ fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key compare key, 0x7f/del # backspace on Macs $process-sandbox-edit:backspace: { break-if-!= - # if not at start of some word, delete grapheme before cursor within current word + # if not at start of some word, delete code-point-utf8 before cursor within current word var at-start?/eax: boolean <- cursor-at-start? cursor-word compare at-start?, 0/false { @@ -766,25 +766,25 @@ fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key decrement-final-element cursor-call-path return } - # if start of word is quote and grapheme before cursor is not, just insert it as usual + # if start of word is quote and code-point-utf8 before cursor is not, just insert it as usual # TODO: support string escaping { - var first-grapheme/eax: grapheme <- first-grapheme cursor-word - compare first-grapheme, 0x22/double-quote + var first-code-point-utf8/eax: code-point-utf8 <- first-code-point-utf8 cursor-word + compare first-code-point-utf8, 0x22/double-quote break-if-!= - var final-grapheme/eax: grapheme <- grapheme-before-cursor cursor-word - compare final-grapheme, 0x22/double-quote + var final-code-point-utf8/eax: code-point-utf8 <- code-point-utf8-before-cursor cursor-word + compare final-code-point-utf8, 0x22/double-quote break-if-= break $process-sandbox-edit:space } - # if start of word is '[' and grapheme before cursor is not ']', just insert it as usual + # if start of word is '[' and code-point-utf8 before cursor is not ']', just insert it as usual # TODO: support nested arrays { - var first-grapheme/eax: grapheme <- first-grapheme cursor-word - compare first-grapheme, 0x5b/[ + var first-code-point-utf8/eax: code-point-utf8 <- first-code-point-utf8 cursor-word + compare first-code-point-utf8, 0x5b/[ break-if-!= - var final-grapheme/eax: grapheme <- grapheme-before-cursor cursor-word - compare final-grapheme, 0x5d/] + var final-code-point-utf8/eax: code-point-utf8 <- code-point-utf8-before-cursor cursor-word + compare final-code-point-utf8, 0x5d/] break-if-= break $process-sandbox-edit:space } @@ -809,8 +809,8 @@ fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key var at-end?/eax: boolean <- cursor-at-end? cursor-word compare at-end?, 0/false break-if-!= - var g/eax: grapheme <- pop-after-cursor cursor-word - add-grapheme-to-word next-word, g + var g/eax: code-point-utf8 <- pop-after-cursor cursor-word + add-code-point-utf8-to-word next-word, g loop } cursor-to-start next-word @@ -838,12 +838,12 @@ fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key return } # otherwise insert key within current word - var g/edx: grapheme <- copy key - var print?/eax: boolean <- real-grapheme? key - $process-sandbox-edit:real-grapheme: { + var g/edx: code-point-utf8 <- copy key + var print?/eax: boolean <- real-code-point-utf8? key + $process-sandbox-edit:real-code-point-utf8: { compare print?, 0/false break-if-= - add-grapheme-to-word cursor-word, g + add-code-point-utf8-to-word cursor-word, g return } # silently ignore other hotkeys @@ -852,7 +852,7 @@ fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key # collect new name in partial-name-for-cursor-word, and then rename the word # at cursor to it # Precondition: cursor-call-path is a singleton (not within a call) -fn process-sandbox-rename _sandbox: (addr sandbox), key: grapheme { +fn process-sandbox-rename _sandbox: (addr sandbox), key: code-point-utf8 { var sandbox/esi: (addr sandbox) <- copy _sandbox var new-name-ah/edi: (addr handle word) <- get sandbox, partial-name-for-cursor-word # if 'esc' pressed, cancel rename @@ -911,7 +911,7 @@ fn process-sandbox-rename _sandbox: (addr sandbox), key: grapheme { { var new-name/eax: (addr word) <- lookup *new-name-ah cursor-to-start new-name - add-grapheme-to-word new-name, 0x3d/= + add-code-point-utf8-to-word new-name, 0x3d/= } # append name to new line chain-words new-line-word-ah, new-name-ah @@ -941,7 +941,7 @@ fn process-sandbox-rename _sandbox: (addr sandbox), key: grapheme { compare key, 0x7f/del # backspace on Macs $process-sandbox-rename:backspace: { break-if-!= - # if not at start, delete grapheme before cursor + # if not at start, delete code-point-utf8 before cursor var new-name/eax: (addr word) <- lookup *new-name-ah var at-start?/eax: boolean <- cursor-at-start? new-name compare at-start?, 0/false @@ -953,12 +953,12 @@ fn process-sandbox-rename _sandbox: (addr sandbox), key: grapheme { return } # otherwise insert key within current word - var print?/eax: boolean <- real-grapheme? key - $process-sandbox-rename:real-grapheme: { + var print?/eax: boolean <- real-code-point-utf8? key + $process-sandbox-rename:real-code-point-utf8: { compare print?, 0/false break-if-= var new-name/eax: (addr word) <- lookup *new-name-ah - add-grapheme-to-word new-name, key + add-code-point-utf8-to-word new-name, key return } # silently ignore other hotkeys @@ -968,7 +968,7 @@ fn process-sandbox-rename _sandbox: (addr sandbox), key: grapheme { # of the sandbox to be a new function with that name. Replace the last line # with a call to the appropriate function. # Precondition: cursor-call-path is a singleton (not within a call) -fn process-sandbox-define _sandbox: (addr sandbox), functions: (addr handle function), key: grapheme { +fn process-sandbox-define _sandbox: (addr sandbox), functions: (addr handle function), key: code-point-utf8 { var sandbox/esi: (addr sandbox) <- copy _sandbox var new-name-ah/edi: (addr handle word) <- get sandbox, partial-name-for-function # if 'esc' pressed, cancel define @@ -1033,7 +1033,7 @@ fn process-sandbox-define _sandbox: (addr sandbox), functions: (addr handle func compare key, 0x7f/del # backspace on Macs $process-sandbox-define:backspace: { break-if-!= - # if not at start, delete grapheme before cursor + # if not at start, delete code-point-utf8 before cursor var new-name/eax: (addr word) <- lookup *new-name-ah var at-start?/eax: boolean <- cursor-at-start? new-name compare at-start?, 0/false @@ -1045,12 +1045,12 @@ fn process-sandbox-define _sandbox: (addr sandbox), functions: (addr handle func return } # otherwise insert key within current word - var print?/eax: boolean <- real-grapheme? key - $process-sandbox-define:real-grapheme: { + var print?/eax: boolean <- real-code-point-utf8? key + $process-sandbox-define:real-code-point-utf8: { compare print?, 0/false break-if-= var new-name/eax: (addr word) <- lookup *new-name-ah - add-grapheme-to-word new-name, key + add-code-point-utf8-to-word new-name, key return } # silently ignore other hotkeys @@ -2107,7 +2107,7 @@ fn render-function-right-aligned screen: (addr screen), row: int, right-col: int start-color screen, 0, 0xf7 clear-rect screen, row, col, new-row, col2 col <- add 1 -#? var dummy/eax: grapheme <- read-key-from-real-keyboard +#? var dummy/eax: code-point-utf8 <- read-key-from-real-keyboard render-function screen, row, col, f new-row <- add 1/function-bottom-margin col <- subtract 1/function-left-padding @@ -2144,7 +2144,7 @@ fn render-function screen: (addr screen), row: int, col: int, _f: (addr function render-line-without-stack screen, body, row, col, cursor-word, cursor-row, cursor-col } -fn real-grapheme? g: grapheme -> _/eax: boolean { +fn real-code-point-utf8? g: code-point-utf8 -> _/eax: boolean { # if g == newline return true compare g, 0xa { diff --git a/linux/tile/gap-buffer.mu b/linux/tile/gap-buffer.mu index 0132daf0..1441684b 100644 --- a/linux/tile/gap-buffer.mu +++ b/linux/tile/gap-buffer.mu @@ -1,14 +1,14 @@ type gap-buffer { - left: grapheme-stack - right: grapheme-stack + left: code-point-utf8-stack + right: code-point-utf8-stack } fn initialize-gap-buffer _self: (addr gap-buffer) { var self/esi: (addr gap-buffer) <- copy _self - var left/eax: (addr grapheme-stack) <- get self, left - initialize-grapheme-stack left, 0x10/max-word-size - var right/eax: (addr grapheme-stack) <- get self, right - initialize-grapheme-stack right, 0x10/max-word-size + var left/eax: (addr code-point-utf8-stack) <- get self, left + initialize-code-point-utf8-stack left, 0x10/max-word-size + var right/eax: (addr code-point-utf8-stack) <- get self, right + initialize-code-point-utf8-stack right, 0x10/max-word-size } # just for tests @@ -21,8 +21,8 @@ fn initialize-gap-buffer-with self: (addr gap-buffer), s: (addr array byte) { var done?/eax: boolean <- stream-empty? stream compare done?, 0/false break-if-!= - var g/eax: grapheme <- read-grapheme stream - add-grapheme-at-gap self, g + var g/eax: code-point-utf8 <- read-code-point-utf8 stream + add-code-point-utf8-at-gap self, g loop } } @@ -37,44 +37,44 @@ fn gap-buffer-to-string self: (addr gap-buffer), out: (addr handle array byte) { fn emit-gap-buffer _self: (addr gap-buffer), out: (addr stream byte) { var self/esi: (addr gap-buffer) <- copy _self clear-stream out - var left/eax: (addr grapheme-stack) <- get self, left + var left/eax: (addr code-point-utf8-stack) <- get self, left emit-stack-from-bottom left, out - var right/eax: (addr grapheme-stack) <- get self, right + var right/eax: (addr code-point-utf8-stack) <- get self, right emit-stack-from-top right, out } # dump stack from bottom to top -fn emit-stack-from-bottom _self: (addr grapheme-stack), out: (addr stream byte) { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn emit-stack-from-bottom _self: (addr code-point-utf8-stack), out: (addr stream byte) { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/eax: int <- copy 0 { compare i, *top-addr break-if->= - var g/edx: (addr grapheme) <- index data, i - write-grapheme out, *g + var g/edx: (addr code-point-utf8) <- index data, i + write-code-point-utf8 out, *g i <- increment loop } } # dump stack from top to bottom -fn emit-stack-from-top _self: (addr grapheme-stack), out: (addr stream byte) { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn emit-stack-from-top _self: (addr code-point-utf8-stack), out: (addr stream byte) { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/eax: int <- copy *top-addr i <- decrement { compare i, 0 break-if-< - var g/edx: (addr grapheme) <- index data, i - write-grapheme out, *g + var g/edx: (addr code-point-utf8) <- index data, i + write-code-point-utf8 out, *g i <- decrement loop } @@ -82,33 +82,33 @@ fn emit-stack-from-top _self: (addr grapheme-stack), out: (addr stream byte) { fn render-gap-buffer screen: (addr screen), _gap: (addr gap-buffer) { var gap/esi: (addr gap-buffer) <- copy _gap - var left/eax: (addr grapheme-stack) <- get gap, left + var left/eax: (addr code-point-utf8-stack) <- get gap, left render-stack-from-bottom left, screen - var right/eax: (addr grapheme-stack) <- get gap, right + var right/eax: (addr code-point-utf8-stack) <- get gap, right render-stack-from-top right, screen } fn gap-buffer-length _gap: (addr gap-buffer) -> _/eax: int { var gap/esi: (addr gap-buffer) <- copy _gap - var left/eax: (addr grapheme-stack) <- get gap, left + var left/eax: (addr code-point-utf8-stack) <- get gap, left var tmp/eax: (addr int) <- get left, top var left-length/ecx: int <- copy *tmp - var right/esi: (addr grapheme-stack) <- get gap, right + var right/esi: (addr code-point-utf8-stack) <- get gap, right tmp <- get right, top var result/eax: int <- copy *tmp result <- add left-length return result } -fn add-grapheme-at-gap _self: (addr gap-buffer), g: grapheme { +fn add-code-point-utf8-at-gap _self: (addr gap-buffer), g: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self - var left/eax: (addr grapheme-stack) <- get self, left - push-grapheme-stack left, g + var left/eax: (addr code-point-utf8-stack) <- get self, left + push-code-point-utf8-stack left, g } fn gap-to-start self: (addr gap-buffer) { { - var curr/eax: grapheme <- gap-left self + var curr/eax: code-point-utf8 <- gap-left self compare curr, -1 loop-if-!= } @@ -116,7 +116,7 @@ fn gap-to-start self: (addr gap-buffer) { fn gap-to-end self: (addr gap-buffer) { { - var curr/eax: grapheme <- gap-right self + var curr/eax: code-point-utf8 <- gap-right self compare curr, -1 loop-if-!= } @@ -124,96 +124,96 @@ fn gap-to-end self: (addr gap-buffer) { fn gap-at-start? _self: (addr gap-buffer) -> _/eax: boolean { var self/esi: (addr gap-buffer) <- copy _self - var left/eax: (addr grapheme-stack) <- get self, left - var result/eax: boolean <- grapheme-stack-empty? left + var left/eax: (addr code-point-utf8-stack) <- get self, left + var result/eax: boolean <- code-point-utf8-stack-empty? left return result } fn gap-at-end? _self: (addr gap-buffer) -> _/eax: boolean { var self/esi: (addr gap-buffer) <- copy _self - var right/eax: (addr grapheme-stack) <- get self, right - var result/eax: boolean <- grapheme-stack-empty? right + var right/eax: (addr code-point-utf8-stack) <- get self, right + var result/eax: boolean <- code-point-utf8-stack-empty? right return result } -fn gap-right _self: (addr gap-buffer) -> _/eax: grapheme { +fn gap-right _self: (addr gap-buffer) -> _/eax: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self - var g/eax: grapheme <- copy 0 - var right/ecx: (addr grapheme-stack) <- get self, right - g <- pop-grapheme-stack right + var g/eax: code-point-utf8 <- copy 0 + var right/ecx: (addr code-point-utf8-stack) <- get self, right + g <- pop-code-point-utf8-stack right compare g, -1 { break-if-= - var left/ecx: (addr grapheme-stack) <- get self, left - push-grapheme-stack left, g + var left/ecx: (addr code-point-utf8-stack) <- get self, left + push-code-point-utf8-stack left, g } return g } -fn gap-left _self: (addr gap-buffer) -> _/eax: grapheme { +fn gap-left _self: (addr gap-buffer) -> _/eax: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self - var g/eax: grapheme <- copy 0 + var g/eax: code-point-utf8 <- copy 0 { - var left/ecx: (addr grapheme-stack) <- get self, left - g <- pop-grapheme-stack left + var left/ecx: (addr code-point-utf8-stack) <- get self, left + g <- pop-code-point-utf8-stack left } compare g, -1 { break-if-= - var right/ecx: (addr grapheme-stack) <- get self, right - push-grapheme-stack right, g + var right/ecx: (addr code-point-utf8-stack) <- get self, right + push-code-point-utf8-stack right, g } return g } fn gap-index _self: (addr gap-buffer) -> _/eax: int { var self/eax: (addr gap-buffer) <- copy _self - var left/eax: (addr grapheme-stack) <- get self, left + var left/eax: (addr code-point-utf8-stack) <- get self, left var top-addr/eax: (addr int) <- get left, top var result/eax: int <- copy *top-addr return result } -fn first-grapheme-in-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme { +fn first-code-point-utf8-in-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self # try to read from left - var left/eax: (addr grapheme-stack) <- get self, left + var left/eax: (addr code-point-utf8-stack) <- get self, left var top-addr/ecx: (addr int) <- get left, top compare *top-addr, 0 { break-if-<= - var data-ah/eax: (addr handle array grapheme) <- get left, data - var data/eax: (addr array grapheme) <- lookup *data-ah - var result-addr/eax: (addr grapheme) <- index data, 0 + var data-ah/eax: (addr handle array code-point-utf8) <- get left, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah + var result-addr/eax: (addr code-point-utf8) <- index data, 0 return *result-addr } # try to read from right - var right/eax: (addr grapheme-stack) <- get self, right + var right/eax: (addr code-point-utf8-stack) <- get self, right top-addr <- get right, top compare *top-addr, 0 { break-if-<= - var data-ah/eax: (addr handle array grapheme) <- get right, data - var data/eax: (addr array grapheme) <- lookup *data-ah + var data-ah/eax: (addr handle array code-point-utf8) <- get right, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah var top/ecx: int <- copy *top-addr top <- decrement - var result-addr/eax: (addr grapheme) <- index data, top + var result-addr/eax: (addr code-point-utf8) <- index data, top return *result-addr } # give up return -1 } -fn grapheme-before-cursor-in-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme { +fn code-point-utf8-before-cursor-in-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self # try to read from left - var left/ecx: (addr grapheme-stack) <- get self, left + var left/ecx: (addr code-point-utf8-stack) <- get self, left var top-addr/edx: (addr int) <- get left, top compare *top-addr, 0 { break-if-<= - var result/eax: grapheme <- pop-grapheme-stack left - push-grapheme-stack left, result + var result/eax: code-point-utf8 <- pop-code-point-utf8-stack left + push-code-point-utf8-stack left, result return result } # give up @@ -222,27 +222,27 @@ fn grapheme-before-cursor-in-gap-buffer _self: (addr gap-buffer) -> _/eax: graph fn delete-before-gap _self: (addr gap-buffer) { var self/eax: (addr gap-buffer) <- copy _self - var left/eax: (addr grapheme-stack) <- get self, left - var dummy/eax: grapheme <- pop-grapheme-stack left + var left/eax: (addr code-point-utf8-stack) <- get self, left + var dummy/eax: code-point-utf8 <- pop-code-point-utf8-stack left } -fn pop-after-gap _self: (addr gap-buffer) -> _/eax: grapheme { +fn pop-after-gap _self: (addr gap-buffer) -> _/eax: code-point-utf8 { var self/eax: (addr gap-buffer) <- copy _self - var right/eax: (addr grapheme-stack) <- get self, right - var result/eax: grapheme <- pop-grapheme-stack right + var right/eax: (addr code-point-utf8-stack) <- get self, right + var result/eax: code-point-utf8 <- pop-code-point-utf8-stack right return result } fn gap-buffer-equal? _self: (addr gap-buffer), s: (addr array byte) -> _/eax: boolean { var self/esi: (addr gap-buffer) <- copy _self - # complication: graphemes may be multiple bytes + # complication: code-point-utf8s may be multiple bytes # so don't rely on length # instead turn the expected result into a stream and arrange to read from it in order var stream-storage: (stream byte 0x10/max-word-size) var expected-stream/ecx: (addr stream byte) <- address stream-storage write expected-stream, s # compare left - var left/edx: (addr grapheme-stack) <- get self, left + var left/edx: (addr code-point-utf8-stack) <- get self, left var result/eax: boolean <- prefix-match? left, expected-stream compare result, 0/false { @@ -250,7 +250,7 @@ fn gap-buffer-equal? _self: (addr gap-buffer), s: (addr array byte) -> _/eax: bo return result } # compare right - var right/edx: (addr grapheme-stack) <- get self, right + var right/edx: (addr code-point-utf8-stack) <- get self, right result <- suffix-match? right, expected-stream compare result, 0/false { @@ -267,10 +267,10 @@ fn test-gap-buffer-equal-from-end? { var g/esi: (addr gap-buffer) <- address _g initialize-gap-buffer g # - var c/eax: grapheme <- copy 0x61/a - add-grapheme-at-gap g, c - add-grapheme-at-gap g, c - add-grapheme-at-gap g, c + var c/eax: code-point-utf8 <- copy 0x61/a + add-code-point-utf8-at-gap g, c + add-code-point-utf8-at-gap g, c + add-code-point-utf8-at-gap g, c # gap is at end (right is empty) var _result/eax: boolean <- gap-buffer-equal? g, "aaa" var result/eax: int <- copy _result @@ -282,11 +282,11 @@ fn test-gap-buffer-equal-from-middle? { var g/esi: (addr gap-buffer) <- address _g initialize-gap-buffer g # - var c/eax: grapheme <- copy 0x61/a - add-grapheme-at-gap g, c - add-grapheme-at-gap g, c - add-grapheme-at-gap g, c - var dummy/eax: grapheme <- gap-left g + var c/eax: code-point-utf8 <- copy 0x61/a + add-code-point-utf8-at-gap g, c + add-code-point-utf8-at-gap g, c + add-code-point-utf8-at-gap g, c + var dummy/eax: code-point-utf8 <- gap-left g # gap is in the middle var _result/eax: boolean <- gap-buffer-equal? g, "aaa" var result/eax: int <- copy _result @@ -298,11 +298,11 @@ fn test-gap-buffer-equal-from-start? { var g/esi: (addr gap-buffer) <- address _g initialize-gap-buffer g # - var c/eax: grapheme <- copy 0x61/a - add-grapheme-at-gap g, c - add-grapheme-at-gap g, c - add-grapheme-at-gap g, c - var dummy/eax: grapheme <- gap-left g + var c/eax: code-point-utf8 <- copy 0x61/a + add-code-point-utf8-at-gap g, c + add-code-point-utf8-at-gap g, c + add-code-point-utf8-at-gap g, c + var dummy/eax: code-point-utf8 <- gap-left g dummy <- gap-left g dummy <- gap-left g # gap is at the start @@ -319,25 +319,25 @@ fn copy-gap-buffer _src-ah: (addr handle gap-buffer), _dest-ah: (addr handle gap var dest-ah/eax: (addr handle gap-buffer) <- copy _dest-ah var _dest-a/eax: (addr gap-buffer) <- lookup *dest-ah var dest-a/edi: (addr gap-buffer) <- copy _dest-a - # copy left grapheme-stack - var src/ecx: (addr grapheme-stack) <- get src-a, left - var dest/edx: (addr grapheme-stack) <- get dest-a, left - copy-grapheme-stack src, dest - # copy right grapheme-stack + # copy left code-point-utf8-stack + var src/ecx: (addr code-point-utf8-stack) <- get src-a, left + var dest/edx: (addr code-point-utf8-stack) <- get dest-a, left + copy-code-point-utf8-stack src, dest + # copy right code-point-utf8-stack src <- get src-a, right dest <- get dest-a, right - copy-grapheme-stack src, dest + copy-code-point-utf8-stack src, dest } fn gap-buffer-is-decimal-integer? _self: (addr gap-buffer) -> _/eax: boolean { var self/esi: (addr gap-buffer) <- copy _self - var curr/ecx: (addr grapheme-stack) <- get self, left - var result/eax: boolean <- grapheme-stack-is-decimal-integer? curr + var curr/ecx: (addr code-point-utf8-stack) <- get self, left + var result/eax: boolean <- code-point-utf8-stack-is-decimal-integer? curr { compare result, 0/false break-if-= curr <- get self, right - result <- grapheme-stack-is-decimal-integer? curr + result <- code-point-utf8-stack-is-decimal-integer? curr } return result } diff --git a/linux/tile/grapheme-stack.mu b/linux/tile/grapheme-stack.mu index 0ea59ae5..c7565a1c 100644 --- a/linux/tile/grapheme-stack.mu +++ b/linux/tile/grapheme-stack.mu @@ -1,24 +1,24 @@ -type grapheme-stack { - data: (handle array grapheme) +type code-point-utf8-stack { + data: (handle array code-point-utf8) top: int } -fn initialize-grapheme-stack _self: (addr grapheme-stack), n: int { - var self/esi: (addr grapheme-stack) <- copy _self - var d/edi: (addr handle array grapheme) <- get self, data +fn initialize-code-point-utf8-stack _self: (addr code-point-utf8-stack), n: int { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var d/edi: (addr handle array code-point-utf8) <- get self, data populate d, n var top/eax: (addr int) <- get self, top copy-to *top, 0 } -fn clear-grapheme-stack _self: (addr grapheme-stack) { - var self/esi: (addr grapheme-stack) <- copy _self +fn clear-code-point-utf8-stack _self: (addr code-point-utf8-stack) { + var self/esi: (addr code-point-utf8-stack) <- copy _self var top/eax: (addr int) <- get self, top copy-to *top, 0 } -fn grapheme-stack-empty? _self: (addr grapheme-stack) -> _/eax: boolean { - var self/esi: (addr grapheme-stack) <- copy _self +fn code-point-utf8-stack-empty? _self: (addr code-point-utf8-stack) -> _/eax: boolean { + var self/esi: (addr code-point-utf8-stack) <- copy _self var top/eax: (addr int) <- get self, top compare *top, 0 { @@ -28,20 +28,20 @@ fn grapheme-stack-empty? _self: (addr grapheme-stack) -> _/eax: boolean { return 0/false } -fn push-grapheme-stack _self: (addr grapheme-stack), _val: grapheme { - var self/esi: (addr grapheme-stack) <- copy _self +fn push-code-point-utf8-stack _self: (addr code-point-utf8-stack), _val: code-point-utf8 { + var self/esi: (addr code-point-utf8-stack) <- copy _self var top-addr/ecx: (addr int) <- get self, top - var data-ah/edx: (addr handle array grapheme) <- get self, data - var data/eax: (addr array grapheme) <- lookup *data-ah + var data-ah/edx: (addr handle array code-point-utf8) <- get self, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah var top/edx: int <- copy *top-addr - var dest-addr/edx: (addr grapheme) <- index data, top - var val/eax: grapheme <- copy _val + var dest-addr/edx: (addr code-point-utf8) <- index data, top + var val/eax: code-point-utf8 <- copy _val copy-to *dest-addr, val add-to *top-addr, 1 } -fn pop-grapheme-stack _self: (addr grapheme-stack) -> _/eax: grapheme { - var self/esi: (addr grapheme-stack) <- copy _self +fn pop-code-point-utf8-stack _self: (addr code-point-utf8-stack) -> _/eax: code-point-utf8 { + var self/esi: (addr code-point-utf8-stack) <- copy _self var top-addr/ecx: (addr int) <- get self, top { compare *top-addr, 0 @@ -49,25 +49,25 @@ fn pop-grapheme-stack _self: (addr grapheme-stack) -> _/eax: grapheme { return -1 } subtract-from *top-addr, 1 - var data-ah/edx: (addr handle array grapheme) <- get self, data - var data/eax: (addr array grapheme) <- lookup *data-ah + var data-ah/edx: (addr handle array code-point-utf8) <- get self, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah var top/edx: int <- copy *top-addr - var result-addr/eax: (addr grapheme) <- index data, top + var result-addr/eax: (addr code-point-utf8) <- index data, top return *result-addr } -fn copy-grapheme-stack _src: (addr grapheme-stack), dest: (addr grapheme-stack) { - var src/esi: (addr grapheme-stack) <- copy _src - var data-ah/edi: (addr handle array grapheme) <- get src, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn copy-code-point-utf8-stack _src: (addr code-point-utf8-stack), dest: (addr code-point-utf8-stack) { + var src/esi: (addr code-point-utf8-stack) <- copy _src + var data-ah/edi: (addr handle array code-point-utf8) <- get src, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get src, top var i/eax: int <- copy 0 { compare i, *top-addr break-if->= - var g/edx: (addr grapheme) <- index data, i - push-grapheme-stack dest, *g + var g/edx: (addr code-point-utf8) <- index data, i + push-code-point-utf8-stack dest, *g i <- increment loop } @@ -75,18 +75,18 @@ fn copy-grapheme-stack _src: (addr grapheme-stack), dest: (addr grapheme-stack) # dump stack to screen from bottom to top # don't move the cursor or anything -fn render-stack-from-bottom _self: (addr grapheme-stack), screen: (addr screen) { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn render-stack-from-bottom _self: (addr code-point-utf8-stack), screen: (addr screen) { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/eax: int <- copy 0 { compare i, *top-addr break-if->= - var g/edx: (addr grapheme) <- index data, i - print-grapheme screen, *g + var g/edx: (addr code-point-utf8) <- index data, i + print-code-point-utf8 screen, *g i <- increment loop } @@ -94,19 +94,19 @@ fn render-stack-from-bottom _self: (addr grapheme-stack), screen: (addr screen) # dump stack to screen from top to bottom # don't move the cursor or anything -fn render-stack-from-top _self: (addr grapheme-stack), screen: (addr screen) { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn render-stack-from-top _self: (addr code-point-utf8-stack), screen: (addr screen) { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/eax: int <- copy *top-addr i <- decrement { compare i, 0 break-if-< - var g/edx: (addr grapheme) <- index data, i - print-grapheme screen, *g + var g/edx: (addr code-point-utf8) <- index data, i + print-code-point-utf8 screen, *g i <- decrement loop } @@ -114,11 +114,11 @@ fn render-stack-from-top _self: (addr grapheme-stack), screen: (addr screen) { # compare from bottom # beware: modifies 'stream', which must be disposed of after a false result -fn prefix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: boolean { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn prefix-match? _self: (addr code-point-utf8-stack), s: (addr stream byte) -> _/eax: boolean { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/ebx: int <- copy 0 { @@ -126,8 +126,8 @@ fn prefix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b break-if->= # if curr != expected, return false { - var curr-a/edx: (addr grapheme) <- index data, i - var expected/eax: grapheme <- read-grapheme s + var curr-a/edx: (addr code-point-utf8) <- index data, i + var expected/eax: code-point-utf8 <- read-code-point-utf8 s { compare expected, *curr-a break-if-= @@ -142,11 +142,11 @@ fn prefix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b # compare from bottom # beware: modifies 'stream', which must be disposed of after a false result -fn suffix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: boolean { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn suffix-match? _self: (addr code-point-utf8-stack), s: (addr stream byte) -> _/eax: boolean { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/eax: (addr int) <- get self, top var i/ebx: int <- copy *top-addr i <- decrement @@ -154,8 +154,8 @@ fn suffix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b compare i, 0 break-if-< { - var curr-a/edx: (addr grapheme) <- index data, i - var expected/eax: grapheme <- read-grapheme s + var curr-a/edx: (addr code-point-utf8) <- index data, i + var expected/eax: code-point-utf8 <- read-code-point-utf8 s # if curr != expected, return false { compare expected, *curr-a @@ -169,18 +169,18 @@ fn suffix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b return 1 # true } -fn grapheme-stack-is-decimal-integer? _self: (addr grapheme-stack) -> _/eax: boolean { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/eax: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edx: (addr array grapheme) <- copy _data +fn code-point-utf8-stack-is-decimal-integer? _self: (addr code-point-utf8-stack) -> _/eax: boolean { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/eax: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edx: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/ebx: int <- copy 0 var result/eax: boolean <- copy 1/true - $grapheme-stack-is-integer?:loop: { + $code-point-utf8-stack-is-integer?:loop: { compare i, *top-addr break-if->= - var g/edx: (addr grapheme) <- index data, i + var g/edx: (addr code-point-utf8) <- index data, i result <- decimal-digit? *g compare result, 0/false break-if-= diff --git a/linux/tile/main.mu b/linux/tile/main.mu index e0daaf1b..f2561a28 100644 --- a/linux/tile/main.mu +++ b/linux/tile/main.mu @@ -54,7 +54,7 @@ fn interactive { initialize-environment env { render env - var key/eax: grapheme <- read-key-from-real-keyboard + var key/eax: code-point-utf8 <- read-key-from-real-keyboard compare key, 0x11/ctrl-q break-if-= process env, key @@ -79,7 +79,7 @@ fn process-all env: (addr environment), cmds: (addr array byte) { var done?/eax: boolean <- stream-empty? cmds-stream-a compare done?, 0/false break-if-!= - var g/eax: grapheme <- read-grapheme cmds-stream-a + var g/eax: code-point-utf8 <- read-code-point-utf8 cmds-stream-a process env, g loop } @@ -105,7 +105,7 @@ fn repl { var done?/eax: boolean <- stream-empty? line compare done?, 0/false break-if-!= - var g/eax: grapheme <- read-grapheme line + var g/eax: code-point-utf8 <- read-code-point-utf8 line process env, g loop } diff --git a/linux/tile/surface.mu b/linux/tile/surface.mu index 2e353022..e86e22b9 100644 --- a/linux/tile/surface.mu +++ b/linux/tile/surface.mu @@ -111,10 +111,10 @@ fn print-surface-cell-at _self: (addr surface), screen-row: int, screen-col: int compare idx, 0 { break-if->= - var space/ecx: grapheme <- copy 0x20 + var space/ecx: code-point-utf8 <- copy 0x20 var screen-ah/edi: (addr handle screen) <- get self, screen var screen/eax: (addr screen) <- lookup *screen-ah - print-grapheme screen, space + print-code-point-utf8 screen, space return } # otherwise print the appropriate screen-cell @@ -156,9 +156,9 @@ fn print-screen-cell screen: (addr screen), _cell: (addr screen-cell) { break-if-= start-blinking screen } - var g/eax: (addr grapheme) <- get cell, data - print-grapheme screen, *g -#? var g2/eax: grapheme <- copy *g + var g/eax: (addr code-point-utf8) <- get cell, data + print-code-point-utf8 screen, *g +#? var g2/eax: code-point-utf8 <- copy *g #? var g3/eax: int <- copy g2 #? print-int32-hex-to-real-screen g3 #? print-string-to-real-screen "\n" @@ -264,7 +264,7 @@ fn num-lines in: (addr array byte) -> _/ecx: int { var done?/eax: boolean <- stream-empty? s-addr compare done?, 0/false break-if-!= - var g/eax: grapheme <- read-grapheme s-addr + var g/eax: code-point-utf8 <- read-code-point-utf8 s-addr compare g, 0xa/newline loop-if-!= result <- increment @@ -282,7 +282,7 @@ fn first-line-length in: (addr array byte) -> _/edx: int { var done?/eax: boolean <- stream-empty? s-addr compare done?, 0/false break-if-!= - var g/eax: grapheme <- read-grapheme s-addr + var g/eax: code-point-utf8 <- read-code-point-utf8 s-addr compare g, 0xa/newline break-if-= result <- increment @@ -301,12 +301,12 @@ fn fill-in _out: (addr array screen-cell), in: (addr array byte) { var done?/eax: boolean <- stream-empty? s-addr compare done?, 0/false break-if-!= - var g/eax: grapheme <- read-grapheme s-addr + var g/eax: code-point-utf8 <- read-code-point-utf8 s-addr compare g, 0xa/newline loop-if-= var offset/edx: (offset screen-cell) <- compute-offset out, idx var dest/edx: (addr screen-cell) <- index out, offset - var dest2/edx: (addr grapheme) <- get dest, data + var dest2/edx: (addr code-point-utf8) <- get dest, data copy-to *dest2, g idx <- increment loop diff --git a/linux/tile/value.mu b/linux/tile/value.mu index 8bd01676..0eacd8be 100644 --- a/linux/tile/value.mu +++ b/linux/tile/value.mu @@ -90,15 +90,15 @@ fn render-number screen: (addr screen), val: float, top-level?: boolean { fg <- copy 0 } start-color screen, fg, bg - print-grapheme screen, 0x20/space + print-code-point-utf8 screen, 0x20/space print-float-decimal-approximate screen, val, 3 - print-grapheme screen, 0x20/space + print-code-point-utf8 screen, 0x20/space } fn render-array-at screen: (addr screen), row: int, col: int, _a: (addr array value) { start-color screen, 0xf2, 7 # don't surround in spaces - print-grapheme screen, 0x5b/[ + print-code-point-utf8 screen, 0x5b/[ increment col var a/esi: (addr array value) <- copy _a var max/ecx: int <- length a @@ -122,7 +122,7 @@ fn render-array-at screen: (addr screen), row: int, col: int, _a: (addr array va i <- increment loop } - print-grapheme screen, 0x5d/] + print-code-point-utf8 screen, 0x5d/] } fn render-screen screen: (addr screen), row: int, col: int, _target-screen: (addr screen) { @@ -179,13 +179,13 @@ fn print-screen-cell-of-fake-screen screen: (addr screen), _target: (addr screen start-blinking screen start-color screen, 0, 1 } - var g/eax: grapheme <- screen-grapheme-at target, row, col + var g/eax: code-point-utf8 <- screen-code-point-utf8-at target, row, col { compare g, 0 break-if-!= g <- copy 0x20/space } - print-grapheme screen, g + print-code-point-utf8 screen, g reset-formatting screen } diff --git a/linux/tile/word.mu b/linux/tile/word.mu index b4f5000b..3b4851f0 100644 --- a/linux/tile/word.mu +++ b/linux/tile/word.mu @@ -58,15 +58,15 @@ fn move-word-contents _src-ah: (addr handle word), _dest-ah: (addr handle word) cursor-to-start src var src-data-ah/eax: (addr handle gap-buffer) <- get src, scalar-data var src-data/eax: (addr gap-buffer) <- lookup *src-data-ah - var src-stack/ecx: (addr grapheme-stack) <- get src-data, right + var src-stack/ecx: (addr code-point-utf8-stack) <- get src-data, right { - var done?/eax: boolean <- grapheme-stack-empty? src-stack + var done?/eax: boolean <- code-point-utf8-stack-empty? src-stack compare done?, 0/false break-if-!= - var g/eax: grapheme <- pop-grapheme-stack src-stack -#? print-grapheme 0, g + var g/eax: code-point-utf8 <- pop-code-point-utf8-stack src-stack +#? print-code-point-utf8 0, g #? print-string 0, "\n" - add-grapheme-to-word dest, g + add-code-point-utf8-to-word dest, g loop } } @@ -79,17 +79,17 @@ fn copy-word-contents-before-cursor _src-ah: (addr handle word), _dest-ah: (addr var src/eax: (addr word) <- lookup *src-ah var src-data-ah/eax: (addr handle gap-buffer) <- get src, scalar-data var src-data/eax: (addr gap-buffer) <- lookup *src-data-ah - var src-stack/ecx: (addr grapheme-stack) <- get src-data, left - var src-stack-data-ah/eax: (addr handle array grapheme) <- get src-stack, data - var _src-stack-data/eax: (addr array grapheme) <- lookup *src-stack-data-ah - var src-stack-data/edx: (addr array grapheme) <- copy _src-stack-data + var src-stack/ecx: (addr code-point-utf8-stack) <- get src-data, left + var src-stack-data-ah/eax: (addr handle array code-point-utf8) <- get src-stack, data + var _src-stack-data/eax: (addr array code-point-utf8) <- lookup *src-stack-data-ah + var src-stack-data/edx: (addr array code-point-utf8) <- copy _src-stack-data var top-addr/ecx: (addr int) <- get src-stack, top var i/eax: int <- copy 0 { compare i, *top-addr break-if->= - var g/edx: (addr grapheme) <- index src-stack-data, i - add-grapheme-to-word dest, *g + var g/edx: (addr code-point-utf8) <- index src-stack-data, i + add-code-point-utf8-to-word dest, *g i <- increment loop } @@ -129,27 +129,27 @@ fn final-word _in: (addr handle word), out: (addr handle word) { copy-object curr-ah, out # modify 'out' right at the end, just in case it's same as 'in' } -fn first-grapheme _self: (addr word) -> _/eax: grapheme { +fn first-code-point-utf8 _self: (addr word) -> _/eax: code-point-utf8 { var self/esi: (addr word) <- copy _self var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data var data/eax: (addr gap-buffer) <- lookup *data-ah - var result/eax: grapheme <- first-grapheme-in-gap-buffer data + var result/eax: code-point-utf8 <- first-code-point-utf8-in-gap-buffer data return result } -fn grapheme-before-cursor _self: (addr word) -> _/eax: grapheme { +fn code-point-utf8-before-cursor _self: (addr word) -> _/eax: code-point-utf8 { var self/esi: (addr word) <- copy _self var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data var data/eax: (addr gap-buffer) <- lookup *data-ah - var result/eax: grapheme <- grapheme-before-cursor-in-gap-buffer data + var result/eax: code-point-utf8 <- code-point-utf8-before-cursor-in-gap-buffer data return result } -fn add-grapheme-to-word _self: (addr word), c: grapheme { +fn add-code-point-utf8-to-word _self: (addr word), c: code-point-utf8 { var self/esi: (addr word) <- copy _self var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data var data/eax: (addr gap-buffer) <- lookup *data-ah - add-grapheme-at-gap data, c + add-code-point-utf8-at-gap data, c } fn cursor-at-start? _self: (addr word) -> _/eax: boolean { @@ -172,14 +172,14 @@ fn cursor-left _self: (addr word) { var self/esi: (addr word) <- copy _self var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data var data/eax: (addr gap-buffer) <- lookup *data-ah - var dummy/eax: grapheme <- gap-left data + var dummy/eax: code-point-utf8 <- gap-left data } fn cursor-right _self: (addr word) { var self/esi: (addr word) <- copy _self var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data var data/eax: (addr gap-buffer) <- lookup *data-ah - var dummy/eax: grapheme <- gap-right data + var dummy/eax: code-point-utf8 <- gap-right data } fn cursor-to-start _self: (addr word) { @@ -211,11 +211,11 @@ fn delete-before-cursor _self: (addr word) { delete-before-gap data } -fn pop-after-cursor _self: (addr word) -> _/eax: grapheme { +fn pop-after-cursor _self: (addr word) -> _/eax: code-point-utf8 { var self/esi: (addr word) <- copy _self var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data var data/eax: (addr gap-buffer) <- lookup *data-ah - var result/eax: grapheme <- pop-after-gap data + var result/eax: code-point-utf8 <- pop-after-gap data return result } @@ -553,14 +553,14 @@ fn parse-words in: (addr array byte), out-ah: (addr handle word) { var done?/eax: boolean <- stream-empty? in-stream-a compare done?, 0/false break-if-!= - var _g/eax: grapheme <- read-grapheme in-stream-a - var g/ecx: grapheme <- copy _g + var _g/eax: code-point-utf8 <- read-code-point-utf8 in-stream-a + var g/ecx: code-point-utf8 <- copy _g # if not space, insert compare g, 0x20/space { break-if-= var cursor-word/eax: (addr word) <- lookup *cursor-word-ah - add-grapheme-to-word cursor-word, g + add-code-point-utf8-to-word cursor-word, g loop $parse-words:loop } # otherwise insert word after and move cursor to it diff --git a/linux/vocabulary.md b/linux/vocabulary.md index f1c6e3a5..2eefae33 100644 --- a/linux/vocabulary.md +++ b/linux/vocabulary.md @@ -206,8 +206,8 @@ doesn't yet parse floating-point literals: - `print-int32-buffered`: int -> buffered-file - textual representation in hex, including '0x' prefix -- `write-grapheme`: grapheme -> stream -- `to-grapheme`: code-point -> grapheme +- `write-code-point-utf8`: code-point-utf8 -> stream +- `to-utf8`: code-point -> code-point-utf8 - `write-float-decimal-approximate`: float, precision: int -> stream @@ -226,8 +226,8 @@ there isn't enough room in the destination stream. - `read-line-buffered`: buffered-file -> stream - Will abort the entire program if there isn't enough room. -- `read-grapheme`: stream -> grapheme -- `read-grapheme-buffered`: buffered-file -> grapheme +- `read-code-point-utf8`: stream -> code-point-utf8 +- `read-code-point-utf8-buffered`: buffered-file -> code-point-utf8 - `read-lines`: buffered-file -> array of strings @@ -268,7 +268,7 @@ Unix terminal properties supported by almost all modern terminal emulators. - `print-string`: string -> screen - `print-stream` -- `print-grapheme` +- `print-code-point-utf8` - `print-code-point` - `print-int32-hex` - `print-int32-decimal` @@ -290,7 +290,7 @@ manipulated. Assertions for tests: -- `screen-grapheme-at` +- `screen-code-point-utf8-at` - `screen-color-at` - `screen-background-color-at` - `screen-bold-at?` diff --git a/mu.md b/mu.md index cfecadf0..428a28d0 100644 --- a/mu.md +++ b/mu.md @@ -80,7 +80,7 @@ You can store several types in these registers: - (addr T) (address into memory) - byte (uses only 8 bits) - code-point (Unicode) - - grapheme (code-point encoded in UTF-8) + - code-point-utf8 (code-point encoded in UTF-8) There's one 32-bit type you _cannot_ store in these registers: - float @@ -579,9 +579,9 @@ are a few functions to help with them: # bytes append-byte s: (addr stream byte), var: int # write lower byte of var var/eax: byte <- read-byte s: (addr stream byte) - # 32-bit graphemes encoded in UTF-8 - write-grapheme out: (addr stream byte), g: grapheme - g/eax: grapheme <- read-grapheme in: (addr stream byte) + # 32-bit code-point-utf8s encoded in UTF-8 + write-code-point-utf8 out: (addr stream byte), g: code-point-utf8 + g/eax: code-point-utf8 <- read-code-point-utf8 in: (addr stream byte) ``` You can check if a stream is empty or full: diff --git a/shell/README.md b/shell/README.md index 3a9cdf3c..180782eb 100644 --- a/shell/README.md +++ b/shell/README.md @@ -150,7 +150,7 @@ def (a <> b) ``` To permit arbitrary infix operators, the Mu shell partitions the space of -graphemes between operators and regular symbols. As a result, you can't define +code-point-utf8s between operators and regular symbols. As a result, you can't define symbols mixing the two. ``` '*global* diff --git a/shell/environment.mu b/shell/environment.mu index c3d78d86..439d5423 100644 --- a/shell/environment.mu +++ b/shell/environment.mu @@ -2,7 +2,7 @@ # # vim:textwidth& # It would be nice for tests to use a narrower screen than the standard 0x80 of -# 1024 pixels with 8px-wide graphemes. But it complicates rendering logic to +# 1024 pixels with 8px-wide code-point-utf8s. But it complicates rendering logic to # make width configurable, so we just use longer lines than usual. type environment { @@ -93,7 +93,7 @@ fn type-in self: (addr environment), screen: (addr screen), keys: (addr array by var done?/eax: boolean <- stream-empty? input-stream compare done?, 0/false break-if-!= - var key/eax: grapheme <- read-grapheme input-stream + var key/eax: code-point-utf8 <- read-code-point-utf8 input-stream edit-environment self, key, 0/no-disk render-environment screen, self loop @@ -145,7 +145,7 @@ fn render-environment screen: (addr screen), _self: (addr environment) { render-sandbox-menu screen, sandbox } -fn edit-environment _self: (addr environment), key: grapheme, data-disk: (addr disk) { +fn edit-environment _self: (addr environment), key: code-point-utf8, data-disk: (addr disk) { var self/esi: (addr environment) <- copy _self var globals/edi: (addr global-table) <- get self, globals var sandbox/ecx: (addr sandbox) <- get self, sandbox diff --git a/shell/global.mu b/shell/global.mu index f6a779f9..329556c1 100644 --- a/shell/global.mu +++ b/shell/global.mu @@ -230,7 +230,7 @@ fn render-globals-menu screen: (addr screen), _self: (addr global-table) { draw-text-rightward-from-cursor screen, " >> ", width, 7/fg, 0xc5/bg=blue-bg } -fn edit-globals _self: (addr global-table), key: grapheme { +fn edit-globals _self: (addr global-table), key: code-point-utf8 { var self/esi: (addr global-table) <- copy _self # ctrl-s { diff --git a/shell/infix.mu b/shell/infix.mu index 41e8fa5d..2ca1b875 100644 --- a/shell/infix.mu +++ b/shell/infix.mu @@ -312,8 +312,8 @@ fn tokenize-infix _sym-ah: (addr handle cell), trace: (addr trace) { var buffer/edi: (addr gap-buffer) <- address buffer-storage initialize-gap-buffer buffer, 0x40/max-symbol-size # scan for first non-$ - var g/eax: grapheme <- read-grapheme sym-data - add-grapheme-at-gap buffer, g + var g/eax: code-point-utf8 <- read-code-point-utf8 sym-data + add-code-point-utf8-at-gap buffer, g { compare g, 0x24/dollar break-if-!= @@ -323,28 +323,28 @@ fn tokenize-infix _sym-ah: (addr handle cell), trace: (addr trace) { break-if-= return # symbol is all '$'s; do nothing } - g <- read-grapheme sym-data - add-grapheme-at-gap buffer, g + g <- read-code-point-utf8 sym-data + add-code-point-utf8-at-gap buffer, g loop } var tokenization-needed?: boolean - var _operator-so-far?/eax: boolean <- operator-grapheme? g + var _operator-so-far?/eax: boolean <- operator-code-point-utf8? g var operator-so-far?/ecx: boolean <- copy _operator-so-far? { var done?/eax: boolean <- stream-empty? sym-data compare done?, 0/false break-if-!= - var g/eax: grapheme <- read-grapheme sym-data + var g/eax: code-point-utf8 <- read-code-point-utf8 sym-data { - var curr-operator?/eax: boolean <- operator-grapheme? g + var curr-operator?/eax: boolean <- operator-code-point-utf8? g compare curr-operator?, operator-so-far? break-if-= # state change; insert a space - add-grapheme-at-gap buffer, 0x20/space + add-code-point-utf8-at-gap buffer, 0x20/space operator-so-far? <- copy curr-operator? copy-to tokenization-needed?, 1/true } - add-grapheme-at-gap buffer, g + add-code-point-utf8-at-gap buffer, g loop } compare tokenization-needed?, 0/false @@ -406,7 +406,7 @@ fn test-infix { # helpers -# return true if x is composed entirely of operator graphemes, optionally prefixed with some '$'s +# return true if x is composed entirely of operator code-point-utf8s, optionally prefixed with some '$'s # some operator, some non-operator => pre-tokenized symbol; return false # all '$'s => return false fn operator-symbol? _x: (addr cell) -> _/eax: boolean { @@ -421,7 +421,7 @@ fn operator-symbol? _x: (addr cell) -> _/eax: boolean { var _x-data/eax: (addr stream byte) <- lookup *x-data-ah var x-data/esi: (addr stream byte) <- copy _x-data rewind-stream x-data - var g/eax: grapheme <- read-grapheme x-data + var g/eax: code-point-utf8 <- read-code-point-utf8 x-data # special case: '$' is reserved for gensyms, and can work with either # operator or non-operator symbols. { @@ -434,12 +434,12 @@ fn operator-symbol? _x: (addr cell) -> _/eax: boolean { # '$', '$$', '$$$', etc. are regular symbols return 0/false } - g <- read-grapheme x-data + g <- read-code-point-utf8 x-data loop } { { - var result/eax: boolean <- operator-grapheme? g + var result/eax: boolean <- operator-code-point-utf8? g compare result, 0/false break-if-!= return 0/false @@ -449,13 +449,13 @@ fn operator-symbol? _x: (addr cell) -> _/eax: boolean { compare done?, 0/false } break-if-!= - g <- read-grapheme x-data + g <- read-code-point-utf8 x-data loop } return 1/true } -fn operator-grapheme? g: grapheme -> _/eax: boolean { +fn operator-code-point-utf8? g: code-point-utf8 -> _/eax: boolean { # '$' is special and can be in either a symbol or operator; here we treat it as a symbol compare g, 0x25/percent { diff --git a/shell/main.mu b/shell/main.mu index 1f0e2de9..a588324a 100644 --- a/shell/main.mu +++ b/shell/main.mu @@ -13,7 +13,7 @@ fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk) var key/eax: byte <- read-key keyboard compare key, 0 loop-if-= - var key/eax: grapheme <- copy key + var key/eax: code-point-utf8 <- copy key edit-environment env, key, data-disk } loop diff --git a/shell/primitives.mu b/shell/primitives.mu index e955b531..a87009d3 100644 --- a/shell/primitives.mu +++ b/shell/primitives.mu @@ -172,7 +172,7 @@ fn render-primitives screen: (addr screen), xmin: int, xmax: int, ymax: int { y <- increment var tmpx/eax: int <- copy xmin tmpx <- draw-text-rightward screen, " key", tmpx, left-max, y, 0x2a/fg=orange, 0xdc/bg=green-bg - tmpx <- draw-text-rightward screen, ": keyboard -> grapheme?", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg + tmpx <- draw-text-rightward screen, ": keyboard -> code-point-utf8?", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg y <- increment var tmpx/eax: int <- copy xmin tmpx <- draw-text-rightward screen, "streams", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg @@ -183,7 +183,7 @@ fn render-primitives screen: (addr screen), xmin: int, xmax: int, ymax: int { y <- increment var tmpx/eax: int <- copy xmin tmpx <- draw-text-rightward screen, " write", tmpx, left-max, y, 0x2a/fg=orange, 0xdc/bg=green-bg - tmpx <- draw-text-rightward screen, ": stream grapheme -> stream", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg + tmpx <- draw-text-rightward screen, ": stream code-point-utf8 -> stream", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg y <- increment var tmpx/eax: int <- copy xmin tmpx <- draw-text-rightward screen, " rewind clear", tmpx, left-max, y, 0x2a/fg=orange, 0xdc/bg=green-bg @@ -191,7 +191,7 @@ fn render-primitives screen: (addr screen), xmin: int, xmax: int, ymax: int { y <- increment var tmpx/eax: int <- copy xmin tmpx <- draw-text-rightward screen, " read", tmpx, left-max, y, 0x2a/fg=orange, 0xdc/bg=green-bg - tmpx <- draw-text-rightward screen, ": stream -> grapheme", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg + tmpx <- draw-text-rightward screen, ": stream -> code-point-utf8", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg } fn primitive-global? _x: (addr global) -> _/eax: boolean { @@ -3056,7 +3056,7 @@ fn wait-for-key keyboard: (addr gap-buffer) -> _/eax: int { return result } # otherwise read from fake keyboard - var g/eax: grapheme <- read-from-gap-buffer keyboard + var g/eax: code-point-utf8 <- read-from-gap-buffer keyboard var result/eax: int <- copy g return result } @@ -3121,14 +3121,14 @@ fn apply-write _args-ah: (addr handle cell), out: (addr handle cell), trace: (ad var second-type/eax: (addr int) <- get second, type compare *second-type, 1/number break-if-= - error trace, "second arg for 'write' is not a number/grapheme" + error trace, "second arg for 'write' is not a number/code-point-utf8" return } var second-value/eax: (addr float) <- get second, number-data var x-float/xmm0: float <- copy *second-value var x/eax: int <- convert x-float - var x-grapheme/eax: grapheme <- copy x - write-grapheme stream-data, x-grapheme + var x-code-point-utf8/eax: code-point-utf8 <- copy x + write-code-point-utf8 stream-data, x-code-point-utf8 # return the stream copy-object first-ah, out } @@ -3202,8 +3202,8 @@ fn apply-read _args-ah: (addr handle cell), out: (addr handle cell), trace: (add var _stream-data/eax: (addr stream byte) <- lookup *stream-data-ah var stream-data/ebx: (addr stream byte) <- copy _stream-data #? rewind-stream stream-data - var result-grapheme/eax: grapheme <- read-grapheme stream-data - var result/eax: int <- copy result-grapheme + var result-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 stream-data + var result/eax: int <- copy result-code-point-utf8 new-integer out, result } diff --git a/shell/sandbox.mu b/shell/sandbox.mu index d50f47f0..20471115 100644 --- a/shell/sandbox.mu +++ b/shell/sandbox.mu @@ -449,7 +449,7 @@ fn render-keyboard-menu screen: (addr screen) { draw-text-rightward-from-cursor screen, " to sandbox ", width, 7/fg, 0xc5/bg=blue-bg } -fn edit-sandbox _self: (addr sandbox), key: grapheme, globals: (addr global-table), data-disk: (addr disk) { +fn edit-sandbox _self: (addr sandbox), key: code-point-utf8, globals: (addr global-table), data-disk: (addr disk) { var self/esi: (addr sandbox) <- copy _self # ctrl-s { diff --git a/shell/tokenize.mu b/shell/tokenize.mu index fba26b74..dc392a34 100644 --- a/shell/tokenize.mu +++ b/shell/tokenize.mu @@ -429,13 +429,13 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, next-indent-token in, out, trace # might not be returned } skip-spaces-from-gap-buffer in - var g/eax: grapheme <- peek-from-gap-buffer in + var g/eax: code-point-utf8 <- peek-from-gap-buffer in { compare g, 0x23/comment break-if-!= skip-rest-of-line in } - var g/eax: grapheme <- peek-from-gap-buffer in + var g/eax: code-point-utf8 <- peek-from-gap-buffer in { compare g, 0xa/newline break-if-!= @@ -461,8 +461,8 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, trace-higher trace return 1/at-start-of-line } - var _g/eax: grapheme <- peek-from-gap-buffer in - var g/ecx: grapheme <- copy _g + var _g/eax: code-point-utf8 <- peek-from-gap-buffer in + var g/ecx: code-point-utf8 <- copy _g { var should-trace?/eax: boolean <- should-trace? trace compare should-trace?, 0/false @@ -479,7 +479,7 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, { compare g, 0x22/double-quote break-if-!= - var dummy/eax: grapheme <- read-from-gap-buffer in # skip + var dummy/eax: code-point-utf8 <- read-from-gap-buffer in # skip next-stream-token in, out, trace break $next-token:case } @@ -487,13 +487,13 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, { compare g, 0x5b/open-square-bracket break-if-!= - var dummy/eax: grapheme <- read-from-gap-buffer in # skip open bracket + var dummy/eax: code-point-utf8 <- read-from-gap-buffer in # skip open bracket next-balanced-stream-token in, out, trace break $next-token:case } # other symbol char { - var symbol?/eax: boolean <- symbol-grapheme? g + var symbol?/eax: boolean <- symbol-code-point-utf8? g compare symbol?, 0/false break-if-= next-symbol-token in, out, trace @@ -508,10 +508,10 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, } # other brackets are always single-char tokens { - var bracket?/eax: boolean <- bracket-grapheme? g + var bracket?/eax: boolean <- bracket-code-point-utf8? g compare bracket?, 0/false break-if-= - var g/eax: grapheme <- read-from-gap-buffer in + var g/eax: code-point-utf8 <- read-from-gap-buffer in next-bracket-token g, out, trace break $next-token:case } @@ -519,7 +519,7 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, { compare g, 0x27/single-quote break-if-!= - var g/eax: grapheme <- read-from-gap-buffer in # consume + var g/eax: code-point-utf8 <- read-from-gap-buffer in # consume initialize-token out, "'" break $next-token:case } @@ -527,7 +527,7 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, { compare g, 0x60/backquote break-if-!= - var g/eax: grapheme <- read-from-gap-buffer in # consume + var g/eax: code-point-utf8 <- read-from-gap-buffer in # consume initialize-token out, "`" break $next-token:case } @@ -535,7 +535,7 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, { compare g, 0x2c/comma break-if-!= - var g/eax: grapheme <- read-from-gap-buffer in # consume + var g/eax: code-point-utf8 <- read-from-gap-buffer in # consume # check for unquote-splice { g <- peek-from-gap-buffer in @@ -581,7 +581,7 @@ fn next-symbol-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra var done?/eax: boolean <- gap-buffer-scan-done? in compare done?, 0/false break-if-!= - var g/eax: grapheme <- peek-from-gap-buffer in + var g/eax: code-point-utf8 <- peek-from-gap-buffer in { { var should-trace?/eax: boolean <- should-trace? trace @@ -597,14 +597,14 @@ fn next-symbol-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra } # if non-symbol, return { - var symbol-grapheme?/eax: boolean <- symbol-grapheme? g - compare symbol-grapheme?, 0/false + var symbol-code-point-utf8?/eax: boolean <- symbol-code-point-utf8? g + compare symbol-code-point-utf8?, 0/false break-if-!= trace-text trace, "tokenize", "stop" break $next-symbol-token:loop } - var g/eax: grapheme <- read-from-gap-buffer in - write-grapheme out-data, g + var g/eax: code-point-utf8 <- read-from-gap-buffer in + write-code-point-utf8 out-data, g loop } trace-higher trace @@ -630,16 +630,16 @@ fn next-number-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra var _out-data/eax: (addr stream byte) <- lookup *out-data-ah var out-data/edi: (addr stream byte) <- copy _out-data $next-number-token:check-minus: { - var g/eax: grapheme <- peek-from-gap-buffer in + var g/eax: code-point-utf8 <- peek-from-gap-buffer in compare g, 0x2d/minus g <- read-from-gap-buffer in # consume - write-grapheme out-data, g + write-code-point-utf8 out-data, g } $next-number-token:loop: { var done?/eax: boolean <- gap-buffer-scan-done? in compare done?, 0/false break-if-!= - var g/eax: grapheme <- peek-from-gap-buffer in + var g/eax: code-point-utf8 <- peek-from-gap-buffer in { { var should-trace?/eax: boolean <- should-trace? trace @@ -653,15 +653,15 @@ fn next-number-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra write-int32-hex stream, gval trace trace, "tokenize", stream } - # if not symbol grapheme, return + # if not symbol code-point-utf8, return { - var symbol-grapheme?/eax: boolean <- symbol-grapheme? g - compare symbol-grapheme?, 0/false + var symbol-code-point-utf8?/eax: boolean <- symbol-code-point-utf8? g + compare symbol-code-point-utf8?, 0/false break-if-!= trace-text trace, "tokenize", "stop" break $next-number-token:loop } - # if not digit grapheme, abort + # if not digit code-point-utf8, abort { var digit?/eax: boolean <- decimal-digit? g compare digit?, 0/false @@ -670,8 +670,8 @@ fn next-number-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra return } trace-text trace, "tokenize", "append" - var g/eax: grapheme <- read-from-gap-buffer in - write-grapheme out-data, g + var g/eax: code-point-utf8 <- read-from-gap-buffer in + write-code-point-utf8 out-data, g loop } trace-higher trace @@ -696,10 +696,10 @@ fn next-stream-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra error trace, "unbalanced '\"'" return } - var g/eax: grapheme <- read-from-gap-buffer in + var g/eax: code-point-utf8 <- read-from-gap-buffer in compare g, 0x22/double-quote break-if-= - write-grapheme out-data, g + write-code-point-utf8 out-data, g loop } { @@ -735,7 +735,7 @@ fn next-balanced-stream-token in: (addr gap-buffer), _out: (addr token), trace: error trace, "unbalanced '['" return } - var g/eax: grapheme <- read-from-gap-buffer in + var g/eax: code-point-utf8 <- read-from-gap-buffer in { compare g, 0x5b/open-square-bracket break-if-!= @@ -748,7 +748,7 @@ fn next-balanced-stream-token in: (addr gap-buffer), _out: (addr token), trace: break-if-= $next-balanced-stream-token:loop decrement bracket-count } - write-grapheme out-data, g + write-code-point-utf8 out-data, g loop } { @@ -764,14 +764,14 @@ fn next-balanced-stream-token in: (addr gap-buffer), _out: (addr token), trace: } } -fn next-bracket-token g: grapheme, _out: (addr token), trace: (addr trace) { +fn next-bracket-token g: code-point-utf8, _out: (addr token), trace: (addr trace) { trace-text trace, "tokenize", "bracket" var out/eax: (addr token) <- copy _out var out-data-ah/eax: (addr handle stream byte) <- get out, text-data populate-stream out-data-ah, 0x40 var _out-data/eax: (addr stream byte) <- lookup *out-data-ah var out-data/edi: (addr stream byte) <- copy _out-data - write-grapheme out-data, g + write-code-point-utf8 out-data, g { var should-trace?/eax: boolean <- should-trace? trace compare should-trace?, 0/false @@ -790,7 +790,7 @@ fn skip-rest-of-line in: (addr gap-buffer) { var done?/eax: boolean <- gap-buffer-scan-done? in compare done?, 0/false break-if-!= - var g/eax: grapheme <- peek-from-gap-buffer in + var g/eax: code-point-utf8 <- peek-from-gap-buffer in compare g, 0xa/newline break-if-= g <- read-from-gap-buffer in # consume @@ -810,7 +810,7 @@ fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra var done?/eax: boolean <- gap-buffer-scan-done? in compare done?, 0/false break-if-!= - var g/eax: grapheme <- peek-from-gap-buffer in + var g/eax: code-point-utf8 <- peek-from-gap-buffer in { { var should-trace?/eax: boolean <- should-trace? trace @@ -844,7 +844,7 @@ fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra } } -# Mu carves up the space of graphemes into 4 categories: +# Mu carves up the space of code-point-utf8s into 4 categories: # whitespace # quotes and unquotes (from a Lisp perspective; doesn't include double # quotes or other Unicode quotes) @@ -856,20 +856,20 @@ fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra # During tokenization operators and symbols are treated identically. # A later phase digs into that nuance. -fn symbol-grapheme? g: grapheme -> _/eax: boolean { - var whitespace?/eax: boolean <- whitespace-grapheme? g +fn symbol-code-point-utf8? g: code-point-utf8 -> _/eax: boolean { + var whitespace?/eax: boolean <- whitespace-code-point-utf8? g compare whitespace?, 0/false { break-if-= return 0/false } - var quote-or-unquote?/eax: boolean <- quote-or-unquote-grapheme? g + var quote-or-unquote?/eax: boolean <- quote-or-unquote-code-point-utf8? g compare quote-or-unquote?, 0/false { break-if-= return 0/false } - var bracket?/eax: boolean <- bracket-grapheme? g + var bracket?/eax: boolean <- bracket-code-point-utf8? g compare bracket?, 0/false { break-if-= @@ -888,7 +888,7 @@ fn symbol-grapheme? g: grapheme -> _/eax: boolean { return 1/true } -fn whitespace-grapheme? g: grapheme -> _/eax: boolean { +fn whitespace-code-point-utf8? g: code-point-utf8 -> _/eax: boolean { compare g, 9/tab { break-if-!= @@ -907,7 +907,7 @@ fn whitespace-grapheme? g: grapheme -> _/eax: boolean { return 0/false } -fn quote-or-unquote-grapheme? g: grapheme -> _/eax: boolean { +fn quote-or-unquote-code-point-utf8? g: code-point-utf8 -> _/eax: boolean { compare g, 0x27/single-quote { break-if-!= @@ -931,7 +931,7 @@ fn quote-or-unquote-grapheme? g: grapheme -> _/eax: boolean { return 0/false } -fn bracket-grapheme? g: grapheme -> _/eax: boolean { +fn bracket-code-point-utf8? g: code-point-utf8 -> _/eax: boolean { compare g, 0x28/open-paren { break-if-!= @@ -971,12 +971,12 @@ fn number-token? _self: (addr token) -> _/eax: boolean { var _in-data/eax: (addr stream byte) <- lookup *in-data-ah var in-data/ecx: (addr stream byte) <- copy _in-data rewind-stream in-data - var g/eax: grapheme <- read-grapheme in-data + var g/eax: code-point-utf8 <- read-code-point-utf8 in-data # if '-', read another { compare g, 0x2d/minus break-if-!= - g <- read-grapheme in-data + g <- read-code-point-utf8 in-data } { { @@ -990,7 +990,7 @@ fn number-token? _self: (addr token) -> _/eax: boolean { compare done?, 0/false } break-if-!= - g <- read-grapheme in-data + g <- read-code-point-utf8 in-data loop } return 1/true @@ -1008,8 +1008,8 @@ fn bracket-token? _self: (addr token) -> _/eax: boolean { var in-data-ah/eax: (addr handle stream byte) <- get self, text-data var in-data/eax: (addr stream byte) <- lookup *in-data-ah rewind-stream in-data - var g/eax: grapheme <- read-grapheme in-data - var result/eax: boolean <- bracket-grapheme? g + var g/eax: code-point-utf8 <- read-code-point-utf8 in-data + var result/eax: boolean <- bracket-code-point-utf8? g return result } @@ -1055,7 +1055,7 @@ fn open-paren-token? _self: (addr token) -> _/eax: boolean { var _in-data/eax: (addr stream byte) <- lookup *in-data-ah var in-data/ecx: (addr stream byte) <- copy _in-data rewind-stream in-data - var g/eax: grapheme <- read-grapheme in-data + var g/eax: code-point-utf8 <- read-code-point-utf8 in-data compare g, 0x28/open-paren { break-if-!= @@ -1071,7 +1071,7 @@ fn close-paren-token? _self: (addr token) -> _/eax: boolean { var _in-data/eax: (addr stream byte) <- lookup *in-data-ah var in-data/ecx: (addr stream byte) <- copy _in-data rewind-stream in-data - var g/eax: grapheme <- read-grapheme in-data + var g/eax: code-point-utf8 <- read-code-point-utf8 in-data compare g, 0x29/close-paren { break-if-!= @@ -1087,7 +1087,7 @@ fn dot-token? _self: (addr token) -> _/eax: boolean { var _in-data/eax: (addr stream byte) <- lookup *in-data-ah var in-data/ecx: (addr stream byte) <- copy _in-data rewind-stream in-data - var g/eax: grapheme <- read-grapheme in-data + var g/eax: code-point-utf8 <- read-code-point-utf8 in-data compare g, 0x2e/dot { break-if-!= diff --git a/shell/trace.mu b/shell/trace.mu index 298b7e23..e4c9ec5e 100644 --- a/shell/trace.mu +++ b/shell/trace.mu @@ -904,7 +904,7 @@ fn render-trace-menu screen: (addr screen) { draw-text-rightward-from-cursor screen, " show whole line ", width, 7/fg, 0xc5/bg=blue-bg } -fn edit-trace _self: (addr trace), key: grapheme { +fn edit-trace _self: (addr trace), key: code-point-utf8 { var self/esi: (addr trace) <- copy _self # cursor down { diff --git a/signatures.mu b/signatures.mu index c867ff32..ee8ea5dd 100644 --- a/signatures.mu +++ b/signatures.mu @@ -69,8 +69,8 @@ sig slice-starts-with? s: (addr slice), head: (addr array byte) -> _/eax: boolea sig write-slice out: (addr stream byte), s: (addr slice) sig slice-to-string ad: (addr allocation-descriptor), in: (addr slice), out: (addr handle array byte) sig write-int32-decimal out: (addr stream byte), n: int -sig decimal-digit? c: grapheme -> _/eax: boolean -sig to-decimal-digit in: grapheme -> _/eax: int +sig decimal-digit? c: code-point-utf8 -> _/eax: boolean +sig to-decimal-digit in: code-point-utf8 -> _/eax: int sig next-word line: (addr stream byte), out: (addr slice) # merges '#' comments into a single word sig next-raw-word line: (addr stream byte), out: (addr slice) # does not merge '#' comments sig skip-chars-matching in: (addr stream byte), delimiter: byte @@ -89,18 +89,18 @@ sig parse-array-of-ints s: (addr array byte), out: (addr handle array int) sig parse-array-of-decimal-ints s: (addr array byte), out: (addr handle array int) sig check-array-equal a: (addr array int), expected: (addr array byte), msg: (addr array byte) sig integer-divide a: int, b: int -> _/eax: int, _/edx: int -sig to-code-point in: grapheme -> _/eax: code-point -sig to-grapheme in: code-point -> _/eax: grapheme -sig read-grapheme in: (addr stream byte) -> _/eax: grapheme -sig grapheme-length g: grapheme -> _/edx: int +sig to-code-point in: code-point-utf8 -> _/eax: code-point +sig to-utf8 in: code-point -> _/eax: code-point-utf8 +sig read-code-point-utf8 in: (addr stream byte) -> _/eax: code-point-utf8 +sig utf8-length g: code-point-utf8 -> _/edx: int sig shift-left-bytes n: int, k: int -> _/eax: int -sig write-grapheme out: (addr stream byte), g: grapheme +sig write-code-point-utf8 out: (addr stream byte), g: code-point-utf8 sig fill-in-rational _out: (addr float), nr: int, dr: int sig fill-in-sqrt _out: (addr float), n: int sig rational nr: int, dr: int -> _/xmm0: float sig scale-down-and-round-up n: int, m: int -> _/ecx: int sig substring in: (addr array byte), start: int, len: int, out-ah: (addr handle array byte) -sig split-string in: (addr array byte), delim: grapheme, out: (addr handle array (handle array byte)) +sig split-string in: (addr array byte), delim: code-point-utf8, out: (addr handle array (handle array byte)) sig render-float-decimal screen: (addr screen), in: float, precision: int, x: int, y: int, color: int, background-color: int -> _/eax: int sig write-float-decimal-approximate out: (addr stream byte), in: float, precision: int sig decimal-digits n: int, _buf: (addr array byte) -> _/eax: int @@ -242,22 +242,22 @@ sig slide-down _a: (addr array int), start: int, end: int, target: int sig find-slide-down-slot-in-array _a: (addr array int), _val: int -> _/ecx: int sig check-slide-up before: (addr array byte), start: int, end: int, target: int, after: (addr array byte), msg: (addr array byte) sig check-slide-down before: (addr array byte), start: int, end: int, target: int, after: (addr array byte), msg: (addr array byte) -sig initialize-grapheme-stack _self: (addr grapheme-stack), n: int -sig clear-grapheme-stack _self: (addr grapheme-stack) -sig grapheme-stack-empty? _self: (addr grapheme-stack) -> _/eax: boolean -sig grapheme-stack-length _self: (addr grapheme-stack) -> _/eax: int -sig push-grapheme-stack _self: (addr grapheme-stack), _val: grapheme -sig pop-grapheme-stack _self: (addr grapheme-stack) -> _/eax: grapheme -sig copy-grapheme-stack _src: (addr grapheme-stack), dest: (addr grapheme-stack) -sig render-stack-from-bottom-wrapping-right-then-down screen: (addr screen), _self: (addr grapheme-stack), xmin: int, ymin: int, xmax: int, ymax: int, _x: int, _y: int, highlight-matching-open-paren?: boolean, open-paren-depth: int, color: int, background-color: int -> _/eax: int, _/ecx: int -sig render-stack-from-bottom screen: (addr screen), self: (addr grapheme-stack), x: int, y: int, highlight-matching-open-paren?: boolean, open-paren-depth: int -> _/eax: int -sig render-stack-from-top-wrapping-right-then-down screen: (addr screen), _self: (addr grapheme-stack), xmin: int, ymin: int, xmax: int, ymax: int, _x: int, _y: int, render-cursor?: boolean, color: int, background-color: int -> _/eax: int, _/ecx: int -sig render-stack-from-top screen: (addr screen), self: (addr grapheme-stack), x: int, y: int, render-cursor?: boolean -> _/eax: int -sig get-matching-close-paren-index _self: (addr grapheme-stack), render-cursor?: boolean -> _/edx: int -sig get-matching-open-paren-index _self: (addr grapheme-stack), control: boolean, depth: int -> _/edx: int -sig prefix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: boolean -sig suffix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: boolean -sig grapheme-stack-is-decimal-integer? _self: (addr grapheme-stack) -> _/eax: boolean +sig initialize-code-point-utf8-stack _self: (addr code-point-utf8-stack), n: int +sig clear-code-point-utf8-stack _self: (addr code-point-utf8-stack) +sig code-point-utf8-stack-empty? _self: (addr code-point-utf8-stack) -> _/eax: boolean +sig code-point-utf8-stack-length _self: (addr code-point-utf8-stack) -> _/eax: int +sig push-code-point-utf8-stack _self: (addr code-point-utf8-stack), _val: code-point-utf8 +sig pop-code-point-utf8-stack _self: (addr code-point-utf8-stack) -> _/eax: code-point-utf8 +sig copy-code-point-utf8-stack _src: (addr code-point-utf8-stack), dest: (addr code-point-utf8-stack) +sig render-stack-from-bottom-wrapping-right-then-down screen: (addr screen), _self: (addr code-point-utf8-stack), xmin: int, ymin: int, xmax: int, ymax: int, _x: int, _y: int, highlight-matching-open-paren?: boolean, open-paren-depth: int, color: int, background-color: int -> _/eax: int, _/ecx: int +sig render-stack-from-bottom screen: (addr screen), self: (addr code-point-utf8-stack), x: int, y: int, highlight-matching-open-paren?: boolean, open-paren-depth: int -> _/eax: int +sig render-stack-from-top-wrapping-right-then-down screen: (addr screen), _self: (addr code-point-utf8-stack), xmin: int, ymin: int, xmax: int, ymax: int, _x: int, _y: int, render-cursor?: boolean, color: int, background-color: int -> _/eax: int, _/ecx: int +sig render-stack-from-top screen: (addr screen), self: (addr code-point-utf8-stack), x: int, y: int, render-cursor?: boolean -> _/eax: int +sig get-matching-close-paren-index _self: (addr code-point-utf8-stack), render-cursor?: boolean -> _/edx: int +sig get-matching-open-paren-index _self: (addr code-point-utf8-stack), control: boolean, depth: int -> _/edx: int +sig prefix-match? _self: (addr code-point-utf8-stack), s: (addr stream byte) -> _/eax: boolean +sig suffix-match? _self: (addr code-point-utf8-stack), s: (addr stream byte) -> _/eax: boolean +sig code-point-utf8-stack-is-decimal-integer? _self: (addr code-point-utf8-stack) -> _/eax: boolean sig initialize-gap-buffer _self: (addr gap-buffer), capacity: int sig clear-gap-buffer _self: (addr gap-buffer) sig gap-buffer-empty? _self: (addr gap-buffer) -> _/eax: boolean @@ -266,43 +266,43 @@ sig initialize-gap-buffer-with self: (addr gap-buffer), keys: (addr array byte) sig load-gap-buffer-from-stream self: (addr gap-buffer), in: (addr stream byte) sig emit-gap-buffer self: (addr gap-buffer), out: (addr stream byte) sig append-gap-buffer _self: (addr gap-buffer), out: (addr stream byte) -sig emit-stack-from-bottom _self: (addr grapheme-stack), out: (addr stream byte) -sig emit-stack-from-top _self: (addr grapheme-stack), out: (addr stream byte) +sig emit-stack-from-bottom _self: (addr code-point-utf8-stack), out: (addr stream byte) +sig emit-stack-from-top _self: (addr code-point-utf8-stack), out: (addr stream byte) sig word-at-gap _self: (addr gap-buffer), out: (addr stream byte) -sig grapheme-at-gap _self: (addr gap-buffer) -> _/eax: grapheme -sig top-most-word _self: (addr grapheme-stack) -> _/eax: int -sig emit-stack-from-index _self: (addr grapheme-stack), start: int, out: (addr stream byte) -sig emit-stack-to-index _self: (addr grapheme-stack), end: int, out: (addr stream byte) -sig is-ascii-word-grapheme? g: grapheme -> _/eax: boolean +sig code-point-utf8-at-gap _self: (addr gap-buffer) -> _/eax: code-point-utf8 +sig top-most-word _self: (addr code-point-utf8-stack) -> _/eax: int +sig emit-stack-from-index _self: (addr code-point-utf8-stack), start: int, out: (addr stream byte) +sig emit-stack-to-index _self: (addr code-point-utf8-stack), end: int, out: (addr stream byte) +sig is-ascii-word-code-point-utf8? g: code-point-utf8 -> _/eax: boolean sig render-gap-buffer-wrapping-right-then-down screen: (addr screen), _gap: (addr gap-buffer), xmin: int, ymin: int, xmax: int, ymax: int, render-cursor?: boolean, color: int, background-color: int -> _/eax: int, _/ecx: int sig render-gap-buffer screen: (addr screen), gap: (addr gap-buffer), x: int, y: int, render-cursor?: boolean, color: int, background-color: int -> _/eax: int sig gap-buffer-length _gap: (addr gap-buffer) -> _/eax: int -sig add-grapheme-at-gap _self: (addr gap-buffer), g: grapheme +sig add-code-point-utf8-at-gap _self: (addr gap-buffer), g: code-point-utf8 sig add-code-point-at-gap self: (addr gap-buffer), c: code-point sig gap-to-start self: (addr gap-buffer) sig gap-to-end self: (addr gap-buffer) sig gap-at-start? _self: (addr gap-buffer) -> _/eax: boolean sig gap-at-end? _self: (addr gap-buffer) -> _/eax: boolean -sig gap-right _self: (addr gap-buffer) -> _/eax: grapheme -sig gap-left _self: (addr gap-buffer) -> _/eax: grapheme +sig gap-right _self: (addr gap-buffer) -> _/eax: code-point-utf8 +sig gap-left _self: (addr gap-buffer) -> _/eax: code-point-utf8 sig index-of-gap _self: (addr gap-buffer) -> _/eax: int -sig first-grapheme-in-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme -sig grapheme-before-cursor-in-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme +sig first-code-point-utf8-in-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8 +sig code-point-utf8-before-cursor-in-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8 sig delete-before-gap _self: (addr gap-buffer) -sig pop-after-gap _self: (addr gap-buffer) -> _/eax: grapheme +sig pop-after-gap _self: (addr gap-buffer) -> _/eax: code-point-utf8 sig gap-buffer-equal? _self: (addr gap-buffer), s: (addr array byte) -> _/eax: boolean sig gap-buffers-equal? self: (addr gap-buffer), g: (addr gap-buffer) -> _/eax: boolean -sig gap-index _self: (addr gap-buffer), _n: int -> _/eax: grapheme +sig gap-index _self: (addr gap-buffer), _n: int -> _/eax: code-point-utf8 sig copy-gap-buffer _src-ah: (addr handle gap-buffer), _dest-ah: (addr handle gap-buffer) sig gap-buffer-is-decimal-integer? _self: (addr gap-buffer) -> _/eax: boolean sig highlight-matching-open-paren? _gap: (addr gap-buffer), render-cursor?: boolean -> _/ebx: boolean, _/edi: int sig rewind-gap-buffer _self: (addr gap-buffer) sig gap-buffer-scan-done? _self: (addr gap-buffer) -> _/eax: boolean -sig peek-from-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme -sig read-from-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme +sig peek-from-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8 +sig read-from-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8 sig put-back-from-gap-buffer _self: (addr gap-buffer) sig skip-spaces-from-gap-buffer self: (addr gap-buffer) -sig edit-gap-buffer self: (addr gap-buffer), key: grapheme +sig edit-gap-buffer self: (addr gap-buffer), key: code-point-utf8 sig gap-to-start-of-next-word self: (addr gap-buffer) sig gap-to-end-of-previous-word self: (addr gap-buffer) sig gap-to-previous-start-of-line self: (addr gap-buffer) diff --git a/tutorial/converter.mu b/tutorial/converter.mu index a8aa26e3..b101fbdd 100644 --- a/tutorial/converter.mu +++ b/tutorial/converter.mu @@ -55,7 +55,7 @@ fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk) # process a single keystroke $main:input: { var key/eax: byte <- read-key keyboard - var key/eax: grapheme <- copy key + var key/eax: code-point-utf8 <- copy key compare key, 0 loop-if-= # tab = switch cursor between input areas diff --git a/tutorial/converter2.mu b/tutorial/converter2.mu index ae445239..5e338647 100644 --- a/tutorial/converter2.mu +++ b/tutorial/converter2.mu @@ -37,7 +37,7 @@ fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk) # process a single keystroke $main:input: { var key/eax: byte <- read-key keyboard - var key/eax: grapheme <- copy key + var key/eax: code-point-utf8 <- copy key compare key, 0 loop-if-= # tab = switch cursor between input areas diff --git a/tutorial/index.md b/tutorial/index.md index 48173fbb..3c7f781f 100644 --- a/tutorial/index.md +++ b/tutorial/index.md @@ -541,7 +541,7 @@ fn main screen: (addr screen), keyboard: (addr keyboard) { var done?/eax: boolean <- stream-empty? in compare done?, 0/false break-if-!= - var g/eax: grapheme <- read-grapheme in + var g/eax: code-point-utf8 <- read-code-point-utf8 in # do stuff with g here loop } @@ -550,8 +550,8 @@ fn main screen: (addr screen), keyboard: (addr keyboard) { `read-line-from-keyboard` reads keystrokes from the keyboard until you press the `Enter` (also called `newline`) key, and accumulates them into a _stream_ -of bytes. The loop then repeatedly reads _graphemes_ from the stream. A -grapheme can consist of multiple bytes, particularly outside of the Latin +of bytes. The loop then repeatedly reads _code-point-utf8s_ from the stream. A +code-point-utf8 can consist of multiple bytes, particularly outside of the Latin alphabet and Arabic digits most prevalent in the West. Mu doesn't yet support non-Qwerty keyboards, but support for other keyboards should be easy to add. @@ -561,12 +561,12 @@ give yourself a sense of what you can do with them. Does the above program make sense now? Feel free to experiment to make sense of it. Can you modify it to print out the line a second time, after you've typed it -out until the `Enter` key? Can you print a space after every grapheme when you +out until the `Enter` key? Can you print a space after every code-point-utf8 when you print the line out a second time? You'll need to skim the section on [printing to screen](https://github.com/akkartik/mu/blob/main/vocabulary.md#printing-to-screen) from Mu's vocabulary. Pay particular attention to the difference between a -grapheme and a _code-point_. Mu programs often read characters in units of -graphemes, but they must draw in units of code-points that the font manages. +code-point-utf8 and a _code-point_. Mu programs often read characters in units of +code-point-utf8s, but they must draw in units of code-points that the font manages. (This adds some complexity but helps combine multiple code-points into a single glyph as needed for some languages.) diff --git a/vocabulary.md b/vocabulary.md index 931df057..9047a2fd 100644 --- a/vocabulary.md +++ b/vocabulary.md @@ -43,7 +43,7 @@ how they work under the hood. - Code-points: integer representing a Unicode character. Must be representable in 32 bits as utf-8; largest supported value is 0x10000. -Mu will let you convert between bytes, graphemes and code-points using `copy`, +Mu will let you convert between bytes, code-point-utf8s and code-points using `copy`, and trust that you know what you're doing. Be aware that doing so is only correct for English/Latin characters, digits and symbols. @@ -120,7 +120,7 @@ signatures.mu for their full type signatures. - `append-byte-hex`: writes textual representation of lowest byte in hex to a stream of bytes. Does not write a '0x' prefix. - `read-byte`: reads a single byte from a stream of bytes. -- `read-grapheme`: reads a single unicode grapheme (up to 4 bytes) from a +- `read-code-point-utf8`: reads a single unicode code-point-utf8 (up to 4 bytes) from a stream of bytes. #### reading/writing hex representations of integers @@ -137,7 +137,7 @@ signatures.mu for their full type signatures. - `parse-decimal-int-from-slice` - `parse-decimal-int-from-stream` - `parse-array-of-decimal-ints` -- `decimal-digit?`: checks if a grapheme is in [0, 9] +- `decimal-digit?`: checks if a code-point-utf8 is in [0, 9] #### printing to screen @@ -197,7 +197,7 @@ automatically read and update the cursor position in various ways. These primitives always silently fail if the desired movement would go out of screen bounds. - `move-cursor-to-left-margin-of-next-line` -- `move-cursor-rightward-and-downward`: move cursor one grapheme to the right +- `move-cursor-rightward-and-downward`: move cursor one code-point-utf8 to the right - `draw-text-rightward-from-cursor`: truncate at some right margin. - `draw-text-rightward-from-cursor-over-full-screen`: truncate at right edge @@ -255,7 +255,7 @@ Assertions for tests: `read-key` reads a single key from the keyboard and returns it if it exists. Returns 0 if no key has been pressed. Currently only support single-byte keys, -which are identical to their code-point and grapheme representations. +which are identical to their code-point and code-point-utf8 representations. `read-line-from-keyboard` reads keys from keyboard, echoes them to screen (with given fg/bg colors) and accumulates them in a stream until it encounters