Port some support for unicode to baremetal.
This commit is contained in:
Kartik Agaram 2021-01-09 18:55:24 -08:00
parent 0518944e37
commit 57e4978eac
10 changed files with 729 additions and 11 deletions

87
baremetal/107trace.subx Normal file
View File

@ -0,0 +1,87 @@
# instruction effective address register displacement immediate
# . op subop mod rm32 base index scale r32
# . 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes
# 3-argument variant of _append
_append-3: # out: (addr byte), outend: (addr byte), s: (addr array byte) -> num_bytes_appended/eax
# . prologue
55/push-ebp
89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp
# . save registers
51/push-ecx
# eax = _append-4(out, outend, &s->data[0], &s->data[s->size])
# . . push &s->data[s->size]
8b/copy 1/mod/*+disp8 5/rm32/ebp . . 0/r32/eax 0x10/disp8 . # copy *(ebp+16) to eax
8b/copy 0/mod/indirect 0/rm32/eax . . . 1/r32/ecx . . # copy *eax to ecx
8d/copy-address 1/mod/*+disp8 4/rm32/sib 0/base/eax 1/index/ecx . 1/r32/ecx 4/disp8 . # copy eax+ecx+4 to ecx
51/push-ecx
# . . push &s->data[0]
8d/copy-address 1/mod/*+disp8 0/rm32/eax . . . 1/r32/ecx 4/disp8 . # copy eax+4 to ecx
51/push-ecx
# . . push outend
ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 0xc/disp8 . # push *(ebp+12)
# . . push out
ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 8/disp8 . # push *(ebp+8)
# . . call
e8/call _append-4/disp32
# . . discard args
81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0x10/imm32 # add to esp
$_append-3:end:
# . restore registers
59/pop-to-ecx
# . epilogue
89/copy 3/mod/direct 4/rm32/esp . . . 5/r32/ebp . . # copy ebp to esp
5d/pop-to-ebp
c3/return
# 4-argument variant of _append
_append-4: # out: (addr byte), outend: (addr byte), in: (addr byte), inend: (addr byte) -> num_bytes_appended/eax: int
# . prologue
55/push-ebp
89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp
# . save registers
51/push-ecx
52/push-edx
53/push-ebx
56/push-esi
57/push-edi
# num_bytes_appended = 0
b8/copy-to-eax 0/imm32
# edi = out
8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 7/r32/edi 8/disp8 . # copy *(ebp+8) to edi
# edx = outend
8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 2/r32/edx 0xc/disp8 . # copy *(ebp+12) to edx
# esi = in
8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 6/r32/esi 0x10/disp8 . # copy *(ebp+16) to esi
# ecx = inend
8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 1/r32/ecx 0x14/disp8 . # copy *(ebp+20) to ecx
$_append-4:loop:
# if (in >= inend) break
39/compare 3/mod/direct 6/rm32/esi . . . 1/r32/ecx . . # compare esi with ecx
73/jump-if-addr>= $_append-4:end/disp8
# if (out >= outend) abort # just to catch test failures fast
39/compare 3/mod/direct 7/rm32/edi . . . 2/r32/edx . . # compare edi with edx
73/jump-if-addr>= $_append-4:end/disp8 # TODO: abort
# *out = *in
8a/copy-byte 0/mod/indirect 6/rm32/esi . . . 3/r32/BL . . # copy byte at *esi to BL
88/copy-byte 0/mod/indirect 7/rm32/edi . . . 3/r32/BL . . # copy byte at BL to *edi
# ++num_bytes_appended
40/increment-eax
# ++in
46/increment-esi
# ++out
47/increment-edi
eb/jump $_append-4:loop/disp8
$_append-4:end:
# . restore registers
5f/pop-to-edi
5e/pop-to-esi
5b/pop-to-ebx
5a/pop-to-edx
59/pop-to-ecx
# . epilogue
89/copy 3/mod/direct 4/rm32/esp . . . 5/r32/ebp . . # copy ebp to esp
5d/pop-to-ebp
c3/return
# . . vim:nowrap:textwidth=0

53
baremetal/108write.subx Normal file
View File

@ -0,0 +1,53 @@
# instruction effective address register displacement immediate
# . op subop mod rm32 base index scale r32
# . 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes
write: # f: (addr stream byte), s: (addr array byte)
# . prologue
55/push-ebp
89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp
# if (s == 0) return
81 7/subop/compare 1/mod/*+disp8 5/rm32/ebp . . . . 0xc/disp8 0/imm32 # compare *(ebp+12)
74/jump-if-= $write:end/disp8
# TODO: write to file
# otherwise, treat 'f' as a stream to append to
# . save registers
50/push-eax
51/push-ecx
52/push-edx
53/push-ebx
# ecx = f
8b/copy 1/mod/*+disp8 5/rm32/ebp . . 1/r32/ecx 8/disp8 . # copy *(ebp+8) to ecx
# edx = f->write
8b/copy 0/mod/indirect 1/rm32/ecx . . . 2/r32/edx . . # copy *ecx to edx
# ebx = f->size
8b/copy 1/mod/*+disp8 1/rm32/ecx . . . 3/r32/ebx 8/disp8 . # copy *(ecx+8) to ebx
# eax = _append-3(&f->data[f->write], &f->data[f->size], s)
# . . push s
ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 0xc/disp8 . # push *(ebp+12)
# . . push &f->data[f->size]
8d/copy-address 1/mod/*+disp8 4/rm32/sib 1/base/ecx 3/index/ebx . 3/r32/ebx 0xc/disp8 . # copy ecx+ebx+12 to ebx
53/push-ebx
# . . push &f->data[f->write]
8d/copy-address 1/mod/*+disp8 4/rm32/sib 1/base/ecx 2/index/edx . 3/r32/ebx 0xc/disp8 . # copy ecx+edx+12 to ebx
53/push-ebx
# . . call
e8/call _append-3/disp32
# . . discard args
81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp
# f->write += eax
01/add 0/mod/indirect 1/rm32/ecx . . . 0/r32/eax . . # add eax to *ecx
# . restore registers
5b/pop-to-ebx
5a/pop-to-edx
59/pop-to-ecx
58/pop-to-eax
$write:end:
# . epilogue
89/copy 3/mod/direct 4/rm32/esp . . . 5/r32/ebp . . # copy ebp to esp
5d/pop-to-ebp
c3/return
# TODO: bring in tests once we have check-ints-equal
# . . vim:nowrap:textwidth=0

View File

@ -0,0 +1,33 @@
# TODO: read-byte-buffered
# Return next byte value in eax, with top 3 bytes cleared.
# Abort on reaching end of stream.
read-byte: # s: (addr stream byte) -> result/eax: byte
# . prologue
55/push-ebp
89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp
# . save registers
51/push-ecx
56/push-esi
# esi = s
8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 6/r32/esi 8/disp8 . # copy *(ebp+8) to esi
# ecx = s->read
8b/copy 1/mod/*+disp8 6/rm32/esi . . . 1/r32/ecx 4/disp8 . # copy *(esi+4) to ecx
# if (f->read >= f->write) abort
3b/compare 0/mod/indirect 6/rm32/esi . . . 1/r32/ecx . . # compare ecx with *esi
0f 8d/jump-if->= $read-byte:end/disp32 # TODO: abort
# result = f->data[f->read]
31/xor 3/mod/direct 0/rm32/eax . . . 0/r32/eax . . # clear eax
8a/copy-byte 1/mod/*+disp8 4/rm32/sib 6/base/esi 1/index/ecx . 0/r32/AL 0xc/disp8 . # copy byte at *(esi+ecx+12) to AL
# ++f->read
ff 0/subop/increment 1/mod/*+disp8 6/rm32/esi . . . . 4/disp8 . # increment *(esi+4)
$read-byte:end:
# . restore registers
5e/pop-to-esi
59/pop-to-ecx
# . epilogue
89/copy 3/mod/direct 4/rm32/esp . . . 5/r32/ebp . . # copy ebp to esp
5d/pop-to-ebp
c3/return
# . . vim:nowrap:textwidth=0

View File

@ -0,0 +1,37 @@
# instruction effective address register displacement immediate
# . op subop mod rm32 base index scale r32
# . 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes
# Write lower byte of 'n' to 'f'.
append-byte: # f: (addr stream byte), n: int
# . prologue
55/push-ebp
89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp
# . save registers
51/push-ecx
57/push-edi
# edi = f
8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 7/r32/edi 8/disp8 . # copy *(ebp+8) to edi
# ecx = f->write
8b/copy 0/mod/indirect 7/rm32/edi . . . 1/r32/ecx . . # copy *edi to ecx
# if (f->write >= f->size) abort
3b/compare 1/mod/*+disp8 7/rm32/edi . . . 1/r32/ecx 8/disp8 . # compare ecx with *(edi+8)
7d/jump-if->= $append-byte:end/disp8 # TODO: abort
$append-byte:to-stream:
# write to stream
# f->data[f->write] = LSB(n)
31/xor 3/mod/direct 0/rm32/eax . . . 0/r32/eax . . # clear eax
8a/copy-byte 1/mod/*+disp8 5/rm32/ebp . . . 0/r32/AL 0xc/disp8 . # copy byte at *(ebp+12) to AL
88/copy-byte 1/mod/*+disp8 4/rm32/sib 7/base/edi 1/index/ecx . 0/r32/AL 0xc/disp8 . # copy AL to *(edi+ecx+12)
# ++f->write
ff 0/subop/increment 0/mod/indirect 7/rm32/edi . . . . . . # increment *edi
$append-byte:end:
# . restore registers
5f/pop-to-edi
59/pop-to-ecx
# . epilogue
89/copy 3/mod/direct 4/rm32/esp . . . 5/r32/ebp . . # copy ebp to esp
5d/pop-to-ebp
c3/return
# . . vim:nowrap:textwidth=0

View File

@ -0,0 +1,52 @@
# instruction effective address register displacement immediate
# . op subop mod rm32 base index scale r32
# . 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes
# Fill a region of memory with zeroes.
zero-out: # start: (addr byte), size: int
# pseudocode:
# curr/esi = start
# i/ecx = 0
# while true
# if (i >= size) break
# *curr = 0
# ++curr
# ++i
#
# . prologue
55/push-ebp
89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp
# . save registers
50/push-eax
51/push-ecx
52/push-edx
56/push-esi
# curr/esi = start
8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 6/r32/esi 8/disp8 . # copy *(ebp+8) to esi
# var i/ecx: int = 0
31/xor 3/mod/direct 1/rm32/ecx . . . 1/r32/ecx . . # clear ecx
# edx = size
8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 2/r32/edx 0xc/disp8 . # copy *(ebp+12) to edx
$zero-out:loop:
# if (i >= size) break
39/compare 3/mod/direct 1/rm32/ecx . . . 2/r32/edx . . # compare ecx with edx
7d/jump-if->= $zero-out:end/disp8
# *curr = 0
c6 0/subop/copy-byte 0/mod/direct 6/rm32/esi . . . . . 0/imm8 # copy byte to *esi
# ++curr
46/increment-esi
# ++i
41/increment-ecx
eb/jump $zero-out:loop/disp8
$zero-out:end:
# . restore registers
5e/pop-to-esi
5a/pop-to-edx
59/pop-to-ecx
58/pop-to-eax
# . epilogue
89/copy 3/mod/direct 4/rm32/esp . . . 5/r32/ebp . . # copy ebp to esp
5d/pop-to-ebp
c3/return
# . . vim:nowrap:textwidth=0

View File

@ -0,0 +1,61 @@
# A function which pushes n zeros on the stack.
# Really only intended to be called from code generated by mu.subx (for array
# vars on the stack).
== code
#? Entry:
#? # . prologue
#? 89/<- %ebp 4/r32/esp
#? #
#? 68/push 0xfcfdfeff/imm32
#? b8/copy-to-eax 0x34353637/imm32
#? $dump-stack0:
#? (push-n-zero-bytes 4)
#? 68/push 0x20/imm32
#? $dump-stack9:
#? b8/copy-to-eax 1/imm32/exit
#? cd/syscall 0x80/imm8
# This is not a regular function, so it won't be idiomatic.
# Registers must be properly restored.
# Registers can be spilled, but that modifies the stack and needs to be
# cleaned up.
# Overhead:
# 62 + n*6 instructions to push n bytes.
# If we just emitted code to push n zeroes, it would be:
# 5 bytes for 4 zero bytes, so 1.25 bytes per zero. And that's not even
# instructions.
# But on the other hand it would destroy the instruction cache, where this
# approach requires 15 instructions, fixed.
# n must be positive
push-n-zero-bytes: # n: int
$push-n-zero-bytes:prologue:
89/<- *Push-n-zero-bytes-ebp 5/r32/ebp # spill ebp without affecting stack
89/<- %ebp 4/r32/esp
$push-n-zero-bytes:copy-ra:
# -- esp = ebp
89/<- *Push-n-zero-bytes-eax 0/r32/eax
8b/-> *esp 0/r32/eax
2b/subtract *(ebp+4) 4/r32/esp
# -- esp+n = ebp
89/<- *esp 0/r32/eax
8b/-> *Push-n-zero-bytes-eax 0/r32/eax
$push-n-zero-bytes:bulk-cleaning:
89/<- *Push-n-zero-bytes-esp 4/r32/esp
81 0/subop/add *Push-n-zero-bytes-esp 4/imm32
81 0/subop/add *(ebp+4) 4/imm32
(zero-out *Push-n-zero-bytes-esp *(ebp+4)) # n+4
$push-n-zero-bytes:epilogue:
8b/-> *Push-n-zero-bytes-ebp 5/r32/ebp # restore spill
c3/return
== data
Push-n-zero-bytes-ebp: # (addr int)
0/imm32
Push-n-zero-bytes-esp: # (addr int)
0/imm32
Push-n-zero-bytes-eax:
0/imm32

200
baremetal/309stream.subx Normal file
View File

@ -0,0 +1,200 @@
# Some unsafe methods not intended to be used directly in SubX, only through
# Mu after proper type-checking.
== code
stream-empty?: # s: (addr stream _) -> result/eax: boolean
# . prologue
55/push-ebp
89/<- %ebp 4/r32/esp
# . save registers
51/push-ecx
56/push-esi
# result = false
b8/copy-to-eax 0/imm32/false
# esi = s
8b/-> *(ebp+8) 6/r32/esi
# return s->read >= s->write
8b/-> *esi 1/r32/ecx
39/compare-with *(esi+4) 1/r32/ecx
0f 9d/set-if->= %al
$stream-empty?:end:
# . restore registers
5e/pop-to-esi
59/pop-to-ecx
# . epilogue
89/<- %esp 5/r32/ebp
5d/pop-to-ebp
c3/return
stream-full?: # s: (addr stream _) -> result/eax: boolean
# . prologue
55/push-ebp
89/<- %ebp 4/r32/esp
# . save registers
51/push-ecx
56/push-esi
# result = false
b8/copy-to-eax 0/imm32/false
# esi = s
8b/-> *(ebp+8) 6/r32/esi
# return s->write >= s->size
8b/-> *(esi+8) 1/r32/ecx
39/compare-with *esi 1/r32/ecx
0f 9d/set-if->= %al
$stream-full?:end:
# . restore registers
5e/pop-to-esi
59/pop-to-ecx
# . epilogue
89/<- %esp 5/r32/ebp
5d/pop-to-ebp
c3/return
write-to-stream: # s: (addr stream _), in: (addr byte), n: int
# . prologue
55/push-ebp
89/<- %ebp 4/r32/esp
# . save registers
50/push-eax
51/push-ecx
52/push-edx
53/push-ebx
57/push-edi
# edi = s
8b/-> *(ebp+8) 7/r32/edi
# var swrite/edx: int = s->write
8b/-> *edi 2/r32/edx
# if (swrite + n > s->size) return
8b/-> *(ebp+0x10) 1/r32/ecx
01/add-to %ecx 2/r32/edx
3b/compare 1/r32/ecx *(edi+8)
0f 8f/jump-if-> $write-to-stream:end/disp32 # TODO: abort
# var out/edx: (addr byte) = s->data + s->write
8d/copy-address *(edi+edx+0xc) 2/r32/edx
# var outend/ebx: (addr byte) = out + n
8b/-> *(ebp+0x10) 3/r32/ebx
8d/copy-address *(edx+ebx) 3/r32/ebx
# eax = in
8b/-> *(ebp+0xc) 0/r32/eax
# var inend/ecx: (addr byte) = in + n
8b/-> *(ebp+0x10) 1/r32/ecx
8d/copy-address *(eax+ecx) 1/r32/ecx
#
(_append-4 %edx %ebx %eax %ecx) # => eax
# s->write += n
8b/-> *(ebp+0x10) 1/r32/ecx
01/add-to *edi 1/r32/ecx
$write-to-stream:end:
# . restore registers
5f/pop-to-edi
5b/pop-to-ebx
5a/pop-to-edx
59/pop-to-ecx
58/pop-to-eax
# . epilogue
89/<- %esp 5/r32/ebp
5d/pop-to-ebp
c3/return
read-from-stream: # s: (addr stream _), out: (addr byte), n: int
# . prologue
55/push-ebp
89/<- %ebp 4/r32/esp
# . save registers
50/push-eax
51/push-ecx
52/push-edx
53/push-ebx
56/push-esi
# esi = s
8b/-> *(ebp+8) 6/r32/esi
# var sread/edx: int = s->read
8b/-> *(esi+4) 2/r32/edx
# if (sread + n > s->write) return
8b/-> *(ebp+0x10) 1/r32/ecx
01/add-to %ecx 2/r32/edx
3b/compare 1/r32/ecx *esi
0f 8f/jump-if-> $read-from-stream:end/disp32 # TODO: abort
# var in/edx: (addr byte) = s->data + s->read
8d/copy-address *(esi+edx+0xc) 2/r32/edx
# var inend/ebx: (addr byte) = in + n
8b/-> *(ebp+0x10) 3/r32/ebx
8d/copy-address *(edx+ebx) 3/r32/ebx
# eax = out
8b/-> *(ebp+0xc) 0/r32/eax
# var outend/ecx: (addr byte) = out + n
8b/-> *(ebp+0x10) 1/r32/ecx
8d/copy-address *(eax+ecx) 1/r32/ecx
#
(_append-4 %eax %ecx %edx %ebx) # => eax
# s->read += n
8b/-> *(ebp+0x10) 1/r32/ecx
01/add-to *(esi+4) 1/r32/ecx
$read-from-stream:end:
# . restore registers
5e/pop-to-esi
5b/pop-to-ebx
5a/pop-to-edx
59/pop-to-ecx
58/pop-to-eax
# . epilogue
89/<- %esp 5/r32/ebp
5d/pop-to-ebp
c3/return
stream-first: # s: (addr stream byte) -> result/eax: byte
# . prologue
55/push-ebp
89/<- %ebp 4/r32/esp
# . save registers
51/push-ecx
56/push-esi
# result = false
b8/copy-to-eax 0/imm32
# esi = s
8b/-> *(ebp+8) 6/r32/esi
# var idx/ecx: int = s->read
8b/-> *(esi+4) 1/r32/ecx
# if idx >= s->write return 0
3b/compare-with 1/r32/ecx *esi
7d/jump-if->= $stream-first:end/disp8
# result = s->data[idx]
8a/byte-> *(esi+ecx+0xc) 0/r32/AL
$stream-first:end:
# . restore registers
5e/pop-to-esi
59/pop-to-ecx
# . epilogue
89/<- %esp 5/r32/ebp
5d/pop-to-ebp
c3/return
stream-final: # s: (addr stream byte) -> result/eax: byte
# . prologue
55/push-ebp
89/<- %ebp 4/r32/esp
# . save registers
51/push-ecx
56/push-esi
# result = false
b8/copy-to-eax 0/imm32
# esi = s
8b/-> *(ebp+8) 6/r32/esi
# var max/ecx: int = s->write
8b/-> *esi 1/r32/ecx
# if s->read >= max return 0
39/compare-with *(esi+4) 1/r32/ecx
7d/jump-if->= $stream-final:end/disp8
# var idx/ecx: int = max - 1
49/decrement-ecx
# result = s->data[idx]
8a/byte-> *(esi+ecx+0xc) 0/r32/AL
$stream-final:end:
# . restore registers
5e/pop-to-esi
59/pop-to-ecx
# . epilogue
89/<- %esp 5/r32/ebp
5d/pop-to-ebp
c3/return

View File

@ -1,3 +1,8 @@
sig pixel screen: (addr screen), x: int, y: int, color: int
sig read-key kbd: (addr keyboard) -> _/eax: byte
sig draw-grapheme screen: (addr screen), g: grapheme, x: int, y: int, color: int
sig write f: (addr stream byte), s: (addr array byte)
sig append-byte f: (addr stream byte), n: int
sig read-byte s: (addr stream byte) -> _/eax: byte
sig stream-empty? s: (addr stream _) -> _/eax: boolean

View File

@ -1,16 +1,13 @@
fn draw-text-rightward screen: (addr screen), _text: (addr array byte), x: int, y: int, color: int {
var text/esi: (addr array byte) <- copy _text
var len/ecx: int <- length text
var i/edx: int <- copy 0
fn draw-text-rightward screen: (addr screen), text: (addr array byte), x: int, y: int, color: int {
var stream-storage: (stream byte 0x100)
var stream/esi: (addr stream byte) <- address stream-storage
write stream, text
{
compare i, len
break-if->=
var g/eax: (addr byte) <- index text, i
var g2/eax: byte <- copy-byte *g
var g3/eax: grapheme <- copy g2
draw-grapheme screen, g3, x, y, color
var g/eax: grapheme <- read-grapheme stream
compare g, 0xffffffff # end-of-file
break-if-=
draw-grapheme screen, g, x, y, color
add-to x, 8 # font-width
i <- increment
loop
}
}

193
baremetal/403unicode.mu Normal file
View File

@ -0,0 +1,193 @@
# Helpers for Unicode.
#
# Mu has no characters, only code points and graphemes.
# Code points are the indivisible atoms of text streams.
# https://en.wikipedia.org/wiki/Code_point
# Graphemes are the smallest self-contained unit of text.
# Graphemes may consist of multiple code points.
#
# Mu graphemes are always represented in utf-8, and they are required to fit
# in 4 bytes.
#
# Mu doesn't currently support combining code points, or graphemes made of
# multiple code points. One day we will.
# We also don't currently support code points that translate into multiple
# or wide graphemes. (In particular, Tab will never be supported.)
# transliterated from tb_utf8_unicode_to_char in https://github.com/nsf/termbox
# https://wiki.tcl-lang.org/page/UTF%2D8+bit+by+bit explains the algorithm
#
# The day we want to support combining characters, this function will need to
# take multiple code points. Or something.
fn to-grapheme in: code-point -> _/eax: grapheme {
var c/eax: int <- copy in
var num-trailers/ecx: int <- copy 0
var first/edx: int <- copy 0
$to-grapheme:compute-length: {
# single byte: just return it
compare c, 0x7f
{
break-if->
var g/eax: grapheme <- copy c
return g
}
# 2 bytes
compare c, 0x7ff
{
break-if->
num-trailers <- copy 1
first <- copy 0xc0
break $to-grapheme:compute-length
}
# 3 bytes
compare c, 0xffff
{
break-if->
num-trailers <- copy 2
first <- copy 0xe0
break $to-grapheme:compute-length
}
# 4 bytes
compare c, 0x1fffff
{
break-if->
num-trailers <- copy 3
first <- copy 0xf0
break $to-grapheme:compute-length
}
# more than 4 bytes: unsupported
# TODO: print error message to stderr
compare c, 0x1fffff
{
break-if->
return 0
}
}
# emit trailer bytes, 6 bits from 'in', first two bits '10'
var result/edi: grapheme <- copy 0
{
compare num-trailers, 0
break-if-<=
var tmp/esi: int <- copy c
tmp <- and 0x3f
tmp <- or 0x80
result <- shift-left 8
result <- or tmp
# update loop state
c <- shift-right 6
num-trailers <- decrement
loop
}
# emit engine
result <- shift-left 8
result <- or c
result <- or first
#
return result
}
# TODO: bring in tests once we have check-ints-equal
# read the next grapheme from a stream of bytes
fn read-grapheme in: (addr stream byte) -> _/eax: grapheme {
# if at eof, return EOF
{
var eof?/eax: boolean <- stream-empty? in
compare eof?, 0 # false
break-if-=
return 0xffffffff
}
var c/eax: byte <- read-byte in
var num-trailers/ecx: int <- copy 0
$read-grapheme:compute-length: {
# single byte: just return it
compare c, 0xc0
{
break-if->=
var g/eax: grapheme <- copy c
return g
}
compare c, 0xfe
{
break-if-<
var g/eax: grapheme <- copy c
return g
}
# 2 bytes
compare c, 0xe0
{
break-if->=
num-trailers <- copy 1
break $read-grapheme:compute-length
}
# 3 bytes
compare c, 0xf0
{
break-if->=
num-trailers <- copy 2
break $read-grapheme:compute-length
}
# 4 bytes
compare c, 0xf8
{
break-if->=
num-trailers <- copy 3
break $read-grapheme:compute-length
}
# TODO: print error message
return 0
}
# prepend trailer bytes
var result/edi: grapheme <- copy c
var num-byte-shifts/edx: int <- copy 1
{
compare num-trailers, 0
break-if-<=
var tmp/eax: byte <- read-byte in
var tmp2/eax: int <- copy tmp
tmp2 <- shift-left-bytes tmp2, num-byte-shifts
result <- or tmp2
# update loop state
num-byte-shifts <- increment
num-trailers <- decrement
loop
}
return result
}
# needed because available primitives only shift by a literal/constant number of bits
fn shift-left-bytes n: int, k: int -> _/eax: int {
var i/ecx: int <- copy 0
var result/eax: int <- copy n
{
compare i, k
break-if->=
compare i, 4 # only 4 bytes in 32 bits
break-if->=
result <- shift-left 8
i <- increment
loop
}
return result
}
# write a grapheme to a stream of bytes
# this is like write-to-stream, except we skip leading 0 bytes
fn write-grapheme out: (addr stream byte), g: grapheme {
$write-grapheme:body: {
var c/eax: int <- copy g
append-byte out, c # first byte is always written
c <- shift-right 8
compare c, 0
break-if-= $write-grapheme:body
append-byte out, c
c <- shift-right 8
compare c, 0
break-if-= $write-grapheme:body
append-byte out, c
c <- shift-right 8
compare c, 0
break-if-= $write-grapheme:body
append-byte out, c
}
}