From 60a50c92456fed87c4eaceee7a4649098238a2eb Mon Sep 17 00:00:00 2001 From: "Kartik K. Agaram" Date: Thu, 2 Sep 2021 15:38:45 -0700 Subject: [PATCH] support combining characters in streams of text Fake screens can't handle them yet. --- 103glyph.subx | 26 ++++++++++++++++------ 500fake-screen.mu | 23 +++++++++++++++++++- 501draw-text.mu | 40 +++++++++++++++++++++++++++++++--- apps/ex15.mu | 55 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 134 insertions(+), 10 deletions(-) diff --git a/103glyph.subx b/103glyph.subx index 4a3f7267..3fef28b4 100644 --- a/103glyph.subx +++ b/103glyph.subx @@ -130,9 +130,9 @@ wide-code-point?: # c: code-point -> _/eax: boolean 89/<- %ebp 4/r32/esp # eax = c 8b/-> *(ebp+8) 0/r32/eax - # if (c >= 128) return # characters beyond ASCII currently not supported - 3d/compare-eax-and 0x80/imm32 - 0f 8d/jump-if->= $wide-code-point?:end/disp32 + # if (c >= 4352) return false + 3d/compare-eax-and 0x1100/imm32 + 0f 8d/jump-if->= $wide-code-point?:return-false/disp32 # var letter-bitmap/eax = font[c] 69/multiply %eax 0x22/imm32/glyph-size 0/r32/eax 05/add-to-eax 0x0010000c/imm32/Font # see boot.subx @@ -147,15 +147,22 @@ $wide-code-point?:end: 5d/pop-to-ebp c3/return +$wide-code-point?:return-false: + b8/copy-to-eax 0/imm32/false + # . epilogue + 89/<- %esp 5/r32/ebp + 5d/pop-to-ebp + c3/return + combining-code-point?: # c: code-point -> _/eax: boolean # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # eax = c 8b/-> *(ebp+8) 0/r32/eax - # if (c >= 128) return # characters beyond ASCII currently not supported - 3d/compare-eax-and 0x80/imm32 - 0f 8d/jump-if->= $combining-code-point?:end/disp32 + # if (c >= 4352) return false + 3d/compare-eax-and 0x1100/imm32 + 0f 8d/jump-if->= $combining-code-point?:return-false/disp32 # var letter-bitmap/eax = font[c] 69/multiply %eax 0x22/imm32/glyph-size 0/r32/eax 05/add-to-eax 0x0010000c/imm32/Font # see boot.subx @@ -168,6 +175,13 @@ $combining-code-point?:end: 5d/pop-to-ebp c3/return +$combining-code-point?:return-false: + b8/copy-to-eax 0/imm32/false + # . epilogue + 89/<- %esp 5/r32/ebp + 5d/pop-to-ebp + c3/return + # buffer: naked address to raw screen RAM without a length # letter-bitmap: naked address to 8-pixel wide font glyph draw-narrow-code-point-on-screen-buffer: # buffer: (addr byte), letter-bitmap: (addr byte), x: int, y: int, color: int, background-color: int, overlay?: boolean, screen-width: int, screen-height: int diff --git a/500fake-screen.mu b/500fake-screen.mu index be7d9e0f..bda3673f 100644 --- a/500fake-screen.mu +++ b/500fake-screen.mu @@ -23,7 +23,7 @@ type screen { } type screen-cell { - data: code-point + data: code-point # TODO: support combining characters overlaid on another character color: int background-color: int unused?: boolean @@ -109,6 +109,27 @@ fn draw-code-point _screen: (addr screen), c: code-point, x: int, y: int, color: return 1 } +fn overlay-code-point _screen: (addr screen), c: code-point, x: int, y: int, color: int, background-color: int -> _/eax: int { + var screen/esi: (addr screen) <- copy _screen + { + compare screen, 0 + break-if-!= + var result/eax: int <- overlay-code-point-on-real-screen c, x, y, color, background-color + return result + } + # fake screen + # TODO: support overlays in fake screen + var wide?/eax: boolean <- wide-code-point? c + compare wide?, 0/false + { + break-if-= + draw-wide-code-point-on-fake-screen screen, c, x, y, color, background-color + return 2 + } + draw-narrow-code-point-on-fake-screen screen, c, x, y, color, background-color + return 1 +} + fn draw-narrow-code-point-on-fake-screen _screen: (addr screen), c: code-point, x: int, y: int, color: int, background-color: int { var screen/esi: (addr screen) <- copy _screen # ignore if out of bounds diff --git a/501draw-text.mu b/501draw-text.mu index a43fc822..d598c08f 100644 --- a/501draw-text.mu +++ b/501draw-text.mu @@ -207,10 +207,21 @@ fn draw-stream-wrapping-right-then-down screen: (addr screen), stream: (addr str var xcurr/ecx: int <- copy x var ycurr/edx: int <- copy y var c/ebx: code-point <- copy 0 + var next-c/esi: code-point <- copy 0 $draw-stream-wrapping-right-then-down:loop: { - var g/eax: grapheme <- read-grapheme stream - var _c/eax: code-point <- to-code-point g - c <- copy _c + # read c from either next-c or stream + $draw-stream-wrapping-right-then-down:read-base: { + compare next-c, 0 + { + break-if-= + c <- copy next-c + next-c <- copy 0 + break $draw-stream-wrapping-right-then-down:read-base + } + var g/eax: grapheme <- read-grapheme stream + var _c/eax: code-point <- to-code-point g + c <- copy _c + } compare c, 0xffffffff/end-of-file break-if-= compare c, 0xa/newline @@ -223,6 +234,29 @@ fn draw-stream-wrapping-right-then-down screen: (addr screen), stream: (addr str break $draw-stream-wrapping-right-then-down:loop } var offset/eax: int <- draw-code-point screen, c, xcurr, ycurr, color, background-color + # overlay a combining character if necessary + $draw-stream-wrapping-right-then-down:read-combiner: { + var done?/eax: boolean <- stream-empty? stream + compare done?, 0/false + break-if-!= + # read a character + var g/eax: grapheme <- read-grapheme stream + var c/eax: code-point <- to-code-point g + # if not a combining character, save for next iteration and loop + { + var combining-code-point?/eax: boolean <- combining-code-point? c + compare combining-code-point?, 0/false + } + { + break-if-!= + next-c <- copy c + break $draw-stream-wrapping-right-then-down:read-combiner + } + # otherwise overlay it without saving its width + # This means strange results if a base and its combiner have different + # widths. We'll always follow the base width. + var dummy/eax: int <- overlay-code-point screen, c, xcurr, ycurr, color, background-color + } xcurr <- add offset compare xcurr, xmax { diff --git a/apps/ex15.mu b/apps/ex15.mu index 991b7f08..3f0d32bc 100644 --- a/apps/ex15.mu +++ b/apps/ex15.mu @@ -77,4 +77,59 @@ fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk) # kaha var dummy/eax: int <- draw-code-point-on-real-screen 0x0915/devanagari-letter-ka, 0x13/x 9/y, 3/fg 0/bg var dummy/eax: int <- overlay-code-point-on-real-screen 0x0903/devanagari-visarga, 0x13/x 9/y, 3/fg 0/bg + + # render the same letters as a single stream of utf-8 graphemes rather than individual code-points. + var text-storage: (stream byte 0x200) + var text/esi: (addr stream byte) <- address text-storage + var g/eax: grapheme <- to-grapheme 0x0915/devanagari-letter-ka + var ka/ecx: grapheme <- copy g + # ka + write-grapheme text, ka + # kaa + write-grapheme text, ka + g <- to-grapheme 0x093e/devanagari-vowel-aa + write-grapheme text, g + # ki + write-grapheme text, ka + g <- to-grapheme 0x093f/devanagari-vowel-i + write-grapheme text, g + # kee + write-grapheme text, ka + g <- to-grapheme 0x0940/devanagari-vowel-ii + write-grapheme text, g + # ku + write-grapheme text, ka + g <- to-grapheme 0x0941/devanagari-vowel-u + write-grapheme text, g + # koo + write-grapheme text, ka + g <- to-grapheme 0x0942/devanagari-vowel-oo + write-grapheme text, g + # kay + write-grapheme text, ka + g <- to-grapheme 0x0947/devanagari-vowel-E + write-grapheme text, g + # kai + write-grapheme text, ka + g <- to-grapheme 0x0948/devanagari-vowel-ai + write-grapheme text, g + # ko + write-grapheme text, ka + g <- to-grapheme 0x094b/devanagari-vowel-o + write-grapheme text, g + # kow + write-grapheme text, ka + g <- to-grapheme 0x094f/devanagari-vowel-aw + write-grapheme text, g + # kan + write-grapheme text, ka + g <- to-grapheme 0x0902/devanagari-anusvara + write-grapheme text, g + # kaha + write-grapheme text, ka + g <- to-grapheme 0x0903/devanagari-visarga + write-grapheme text, g + # render everything + set-cursor-position screen, 4/x 0xe/y + draw-stream-wrapping-right-then-down-from-cursor-over-full-screen screen, text, 3/fg 0/bg }