From d2f96cb0b6c5f05f36122088d7daa546c283fd9a Mon Sep 17 00:00:00 2001 From: "Kartik K. Agaram" Date: Wed, 1 Sep 2021 12:46:25 -0700 Subject: [PATCH] rendering code-points with combining characters There's a new example app showing this ability. Still to go: support for combining characters when rendering text and streams. --- 103glyph.subx | 72 +++++++++++++++++++++++++++++++++++++++------------ 400.mu | 2 ++ apps/ex14.mu | 9 ++----- apps/ex15.mu | 36 ++++++++++++++++++++++++++ 4 files changed, 95 insertions(+), 24 deletions(-) create mode 100644 apps/ex15.mu diff --git a/103glyph.subx b/103glyph.subx index 23b7ba4f..4a3f7267 100644 --- a/103glyph.subx +++ b/103glyph.subx @@ -1,9 +1,10 @@ # Use the built-in font to draw glyphs to screen. # https://en.wikipedia.org/wiki/Glyph#Typography -# The Mu computer can currently only render glyphs corresponding to single -# code points. No combining characters. +# Extremely hacky support for combining characters. # https://en.wikipedia.org/wiki/Code_point # https://en.wikipedia.org/wiki/Combining_character +# All we support is drawing combining characters atop the same screen cell as +# a single base code point. See the overlay? arguments below. # # We need to do this in machine code because Mu doesn't have global variables # yet (for the start of the font). @@ -20,13 +21,25 @@ draw-code-point-on-real-screen: # c: code-point, x: int, y: int, color: int, ba 55/push-ebp 89/<- %ebp 4/r32/esp # - (draw-code-point-on-screen-buffer *Video-memory-addr *(ebp+8) *(ebp+0xc) *(ebp+0x10) *(ebp+0x14) *(ebp+0x18) 0x80 0x30) # => eax + (draw-code-point-on-screen-buffer *Video-memory-addr *(ebp+8) *(ebp+0xc) *(ebp+0x10) *(ebp+0x14) *(ebp+0x18) 0 0x80 0x30) # 0/no-overlay => eax $draw-code-point-on-real-screen:end: # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return +overlay-code-point-on-real-screen: # c: code-point, x: int, y: int, color: int, background-color: int -> _/eax + # . prologue + 55/push-ebp + 89/<- %ebp 4/r32/esp + # + (draw-code-point-on-screen-buffer *Video-memory-addr *(ebp+8) *(ebp+0xc) *(ebp+0x10) *(ebp+0x14) *(ebp+0x18) 1 0x80 0x30) # 1/overlay => eax +$overlay-code-point-on-real-screen:end: + # . epilogue + 89/<- %esp 5/r32/ebp + 5d/pop-to-ebp + c3/return + draw-code-point-on-screen-array: # screen-data: (addr array byte), c: code-point, x: int, y: int, color: int, background-color: int, screen-width: int, screen-height: int -> _/eax: int # . prologue 55/push-ebp @@ -71,15 +84,15 @@ $draw-code-point-on-screen-array:abort: # 'buffer' here is not a valid Mu type: a naked address without a length. # returns number of 8x16 units printed to screen (1 or 2). -draw-code-point-on-screen-buffer: # buffer: (addr byte), c: code-point, x: int, y: int, color: int, background-color: int, screen-width: int, screen-height: int -> _/eax: int +draw-code-point-on-screen-buffer: # buffer: (addr byte), c: code-point, x: int, y: int, color: int, background-color: int, overlay?: boolean, screen-width: int, screen-height: int -> _/eax: int # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # . save registers 56/push-esi # switch screen-width and screen-height from code-point to pixel units - c1 4/subop/shift-left *(ebp+20) 3/imm8/log2-font-width - c1 4/subop/shift-left *(ebp+24) 4/imm8/log2-font-height + c1 4/subop/shift-left *(ebp+24) 3/imm8/log2-font-width + c1 4/subop/shift-left *(ebp+28) 4/imm8/log2-font-height # esi = c 8b/-> *(ebp+0xc) 6/r32/esi # if (c >= 4352) return # unicode planes supported: latin, greek, cyrillic, armenian, hebrew, arabic, syriac, thaana, n'ko, indian (iscii), sinhala, thai, lao, tibetan, myanmar, georgian @@ -97,11 +110,11 @@ draw-code-point-on-screen-buffer: # buffer: (addr byte), c: code-point, x: int, 3d/compare-eax-and 8/imm32 { 75/jump-if-!= break/disp8 - (draw-narrow-code-point-on-screen-buffer *(ebp+8) %esi *(ebp+0x10) *(ebp+0x14) *(ebp+0x18) *(ebp+0x1c) *(ebp+0x20) *(ebp+0x24)) + (draw-narrow-code-point-on-screen-buffer *(ebp+8) %esi *(ebp+0x10) *(ebp+0x14) *(ebp+0x18) *(ebp+0x1c) *(ebp+0x20) *(ebp+0x24) *(ebp+0x28)) b8/copy-to-eax 1/imm32 eb/jump $draw-code-point-on-screen-buffer:end/disp8 } - (draw-wide-code-point-on-screen-buffer *(ebp+8) %esi *(ebp+0x10) *(ebp+0x14) *(ebp+0x18) *(ebp+0x1c) *(ebp+0x20) *(ebp+0x24)) + (draw-wide-code-point-on-screen-buffer *(ebp+8) %esi *(ebp+0x10) *(ebp+0x14) *(ebp+0x18) *(ebp+0x1c) *(ebp+0x20) *(ebp+0x24) *(ebp+0x28)) b8/copy-to-eax 2/imm32 $draw-code-point-on-screen-buffer:end: # . restore registers @@ -134,9 +147,30 @@ $wide-code-point?:end: 5d/pop-to-ebp c3/return +combining-code-point?: # c: code-point -> _/eax: boolean + # . prologue + 55/push-ebp + 89/<- %ebp 4/r32/esp + # eax = c + 8b/-> *(ebp+8) 0/r32/eax + # if (c >= 128) return # characters beyond ASCII currently not supported + 3d/compare-eax-and 0x80/imm32 + 0f 8d/jump-if->= $combining-code-point?:end/disp32 + # var letter-bitmap/eax = font[c] + 69/multiply %eax 0x22/imm32/glyph-size 0/r32/eax + 05/add-to-eax 0x0010000c/imm32/Font # see boot.subx + # dispatch based on letter-bitmap->is-combine? + 8a/byte-> *(eax+1) 0/r32/AL + 25/and-eax-with 0xff/imm32 +$combining-code-point?:end: + # . epilogue + 89/<- %esp 5/r32/ebp + 5d/pop-to-ebp + c3/return + # buffer: naked address to raw screen RAM without a length # letter-bitmap: naked address to 8-pixel wide font glyph -draw-narrow-code-point-on-screen-buffer: # buffer: (addr byte), letter-bitmap: (addr byte), x: int, y: int, color: int, background-color: int, screen-width: int, screen-height: int +draw-narrow-code-point-on-screen-buffer: # buffer: (addr byte), letter-bitmap: (addr byte), x: int, y: int, color: int, background-color: int, overlay?: boolean, screen-width: int, screen-height: int # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp @@ -161,7 +195,7 @@ draw-narrow-code-point-on-screen-buffer: # buffer: (addr byte), letter-bitmap: # var row-bitmap/ebx: byte = *letter-bitmap bb/copy-to-ebx 0/imm32 8a/byte-> *esi 3/r32/BL - (draw-run-of-pixels-from-glyph *(ebp+8) %ebx *(ebp+0x10) %edx *(ebp+0x18) *(ebp+0x1c) *(ebp+0x20) *(ebp+0x24)) + (draw-run-of-pixels-from-glyph *(ebp+8) %ebx *(ebp+0x10) %edx *(ebp+0x18) *(ebp+0x1c) *(ebp+0x20) *(ebp+0x24) *(ebp+0x28)) # ++y 42/increment-edx # next bitmap row @@ -182,7 +216,7 @@ $draw-narrow-code-point-on-screen-buffer:end: # buffer: naked address to raw screen RAM without a length # letter-bitmap: naked address to 16-pixel wide font glyph -draw-wide-code-point-on-screen-buffer: # buffer: (addr byte), letter-bitmap: (addr byte), x: int, y: int, color: int, background-color: int, screen-width: int, screen-height: int +draw-wide-code-point-on-screen-buffer: # buffer: (addr byte), letter-bitmap: (addr byte), x: int, y: int, color: int, background-color: int, overlay?: boolean, screen-width: int, screen-height: int # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp @@ -213,11 +247,11 @@ draw-wide-code-point-on-screen-buffer: # buffer: (addr byte), letter-bitmap: (a # ecx = x 8b/-> *(ebp+0x10) 1/r32/ecx # first half-row - (draw-run-of-pixels-from-glyph *(ebp+8) %ebx %ecx %edx *(ebp+0x18) *(ebp+0x1c) *(ebp+0x20) *(ebp+0x24)) + (draw-run-of-pixels-from-glyph *(ebp+8) %ebx %ecx %edx *(ebp+0x18) *(ebp+0x1c) *(ebp+0x20) *(ebp+0x24) *(ebp+0x28)) # second half-row 8a/byte-> *(esi+1) 3/r32/BL 41/increment-ecx - (draw-run-of-pixels-from-glyph *(ebp+8) %ebx %ecx %edx *(ebp+0x18) *(ebp+0x1c) *(ebp+0x20) *(ebp+0x24)) + (draw-run-of-pixels-from-glyph *(ebp+8) %ebx %ecx %edx *(ebp+0x18) *(ebp+0x1c) *(ebp+0x20) *(ebp+0x24) *(ebp+0x28)) # ++y 42/increment-edx # next bitmap row @@ -239,7 +273,7 @@ $draw-wide-code-point-on-screen-buffer:end: c3/return # draw 8 pixels from a single glyph byte in a font bitmap -draw-run-of-pixels-from-glyph: # buffer: (addr byte), glyph-byte: byte, x: int, y: int, color: int, background-color: int, screen-width: int, screen-height: int +draw-run-of-pixels-from-glyph: # buffer: (addr byte), glyph-byte: byte, x: int, y: int, color: int, background-color: int, overlay?: boolean, screen-width: int, screen-height: int # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp @@ -265,11 +299,15 @@ draw-run-of-pixels-from-glyph: # buffer: (addr byte), glyph-byte: byte, x: int, # if LSB, draw a pixel in the given color { 73/jump-if-not-CF break/disp8 - (pixel-on-screen-buffer *(ebp+8) %eax *(ebp+0x14) *(ebp+0x18) *(ebp+0x20) *(ebp+0x24)) + (pixel-on-screen-buffer *(ebp+8) %eax *(ebp+0x14) *(ebp+0x18) *(ebp+0x24) *(ebp+0x28)) eb/jump $draw-code-point-on-screen-buffer:continue/disp8 } - # otherwise use the background color - (pixel-on-screen-buffer *(ebp+8) %eax *(ebp+0x14) *(ebp+0x1c) *(ebp+0x20) *(ebp+0x24)) + # otherwise use the background color (except when overlay?) + { + 81 7/subop/compare *(ebp+0x20) 0/imm32/false + 75/jump-if-!= break/disp8 + (pixel-on-screen-buffer *(ebp+8) %eax *(ebp+0x14) *(ebp+0x1c) *(ebp+0x24) *(ebp+0x28)) + } $draw-code-point-on-screen-buffer:continue: # --x 48/decrement-eax diff --git a/400.mu b/400.mu index f0fe08f5..539093b8 100644 --- a/400.mu +++ b/400.mu @@ -1,8 +1,10 @@ # screen sig pixel-on-real-screen x: int, y: int, color: int sig draw-code-point-on-real-screen c: code-point, x: int, y: int, color: int, background-color: int -> _/eax: int +sig overlay-code-point-on-real-screen c: code-point, x: int, y: int, color: int, background-color: int -> _/eax: int sig draw-code-point-on-screen-array screen-data: (addr array byte), c: code-point, x: int, y: int, color: int, background-color: int, screen-width: int, screen-height: int -> _/eax: int sig wide-code-point? c: code-point -> _/eax: boolean +sig combining-code-point? c: code-point -> _/eax: boolean sig cursor-position-on-real-screen -> _/eax: int, _/ecx: int sig set-cursor-position-on-real-screen x: int, y: int sig draw-cursor-on-real-screen c: code-point diff --git a/apps/ex14.mu b/apps/ex14.mu index 4a2d5dd7..b940e4f6 100644 --- a/apps/ex14.mu +++ b/apps/ex14.mu @@ -1,7 +1,7 @@ # Unicode demo # -# Mu can't read Unicode from keyboard yet, so we'll read from disk and print -# to screen. +# Mu can't read Unicode from keyboard yet, so we'll read utf-8 from disk and +# print to screen. # # Steps for trying it out: # 1. Translate this example into a disk image code.img. @@ -13,11 +13,6 @@ # qemu-system-i386 -hda code.img -hdb data.img # # Expected output: 'நட' in green near the top-left corner of screen -# -# Limitations: -# - Utf-8 is the one true encoding. -# - No keyboard support yet. -# - Just single-code-point graphemes so far. No combiner characters, etc. fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk) { var text-storage: (stream byte 0x200) diff --git a/apps/ex15.mu b/apps/ex15.mu new file mode 100644 index 00000000..2d1c6bcf --- /dev/null +++ b/apps/ex15.mu @@ -0,0 +1,36 @@ +# Demo of combining-character support in Mu, which can be summarized as, "the +# old typewriter-based approach of backing up one character and adding the +# accent or _matra_ in." +# https://en.wikipedia.org/wiki/Combining_character +# +# Mu uses this approach for both accents in Latin languages and vowel +# diacritics in Abugida scripts. +# https://en.wikipedia.org/wiki/Diacritic +# https://en.wikipedia.org/wiki/Abugida +# +# Steps for trying it out: +# 1. Translate this example into a disk image code.img. +# ./translate apps/ex15.mu +# 2. Run: +# qemu-system-i386 -hda code.img -hdb data.img +# +# Expected output: 'à' in green in a few places near the top-left corner of +# screen, showing off what this approach can and cannot do. + +fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk) { + # at the top of screen, the accent is almost cropped + var dummy/eax: int <- draw-code-point-on-real-screen 0x61/a, 0/x 0/y, 3/fg 0/bg + var dummy/eax: int <- overlay-code-point-on-real-screen 0x0300/combining-grave-accent, 0/x 0/y, 3/fg 0/bg + + # below a grapheme with a descender, the accent uglily overlaps + # https://en.wikipedia.org/wiki/Descender + var dummy/eax: int <- draw-code-point-on-real-screen 0x67/g, 4/x 3/y, 3/fg 0/bg + var dummy/eax: int <- draw-code-point-on-real-screen 0x61/a, 4/x 4/y, 3/fg 0/bg + var dummy/eax: int <- overlay-code-point-on-real-screen 0x0300/combining-grave-accent, 4/x 4/y, 3/fg 0/bg + + # beside a grapheme with a descender, it becomes more obvious that monowidth fonts can't make baselines line up + # https://en.wikipedia.org/wiki/Baseline_(typography) + var dummy/eax: int <- draw-code-point-on-real-screen 0x67/g, 8/x 3/y, 3/fg 0/bg + var dummy/eax: int <- draw-code-point-on-real-screen 0x61/a, 9/x 3/y, 3/fg 0/bg + var dummy/eax: int <- overlay-code-point-on-real-screen 0x0300/combining-grave-accent, 9/x 3/y, 3/fg 0/bg +}