Resolution of improper multibyte rendering.

This commit is contained in:
Paul Mosier 2023-03-02 20:00:49 -05:00
parent 8959f4ef97
commit 0a8acd40cc
2 changed files with 26 additions and 52 deletions

View File

@ -36,7 +36,8 @@ When the software is running, the '?' key will give you the list of commands ava
## Known issues
Menus and forms will not render correctly if your terminal window is too narrow. Resize the terminal and this should work.
* Menus and forms will not render correctly if your terminal window is too narrow. Resize the terminal and this should work.
* Extended ASCII characters, typically in the Latin-1 set, will not render due to a quirk of how ncurses handles UTF-8.
## Feedback / Support / Gratuity

View File

@ -101,19 +101,7 @@ void pane::renderText() {
int nontextual = parseConf(config["markup"]["nontextual"]);
// int footnotes = 0; not implemented yet
// convert input to wide characters for proper rendering
//wchar_t* text = (wchar_t*) malloc(sizeof(wchar_t) * (length + 1));
//if (! text) wrapup(1, "Error allocating memory in renderText.\n");
//wmemset(text, L'\0', length + 1);
//
///* some typographical quotes won't go through mbstowcs (eg. Eph 4:8 BWE), so
//* alter those; this seems inefficient, but we don't know how many bytes are
//* in each multibyte char so I think it has to be done this way */
//for (int i = 0; i < length; i++) {
//int converted = mbstowcs(text, rawtext, i);
//if (converted == -1) strncpy(&(rawtext[i-1]), "'", 2);
//}
//mbstowcs(text, rawtext, length - 1);
// copy our unformatted text to local scope for window formatting
char* text = (char*) malloc(sizeof(char*) * length + 1);
if (! text) wrapup(1, "Error allocating memory in renderText.\n");
memset(text, '\0', strlen(rawtext) + 1);
@ -129,13 +117,14 @@ void pane::renderText() {
for (int p = 0; p < 2; p++) {
// loop through the text
// XXX -- we have to use mbrlen() to determine how large the next character is, and advance by that
for (int i = 0; i < length; i++) {
int i = 0;
while (i < length) {
/* check if we're in markup - it's not printed and it doesn't
* affect our line lengths, so spin through it */
if (inmarkup) {
if (text[i] == '>') inmarkup = 0;
i++;
continue;
}
@ -143,27 +132,21 @@ void pane::renderText() {
if ((text[i] == '<') && (! rawonly)) {
inmarkup = 1;
//if ((! strmatch(&text[i], L"</q>", 1)) && (p == 1)) {
if ((! strmatch(&text[i], "</q>", 1)) && (p == 1)) {
// end of redletter bracket
makered = 0;
//} else if ((! strmatch(&text[i], L"</transChange>", 1))
} else if ((! strmatch(&text[i], "</transChange>", 1))
&& (p == 1)) {
// end of interpretive text bracket
makeital = 0;
//} else if ((! strmatch(&text[i], L"<p>", 1)) && (p == 0)) {
} else if ((! strmatch(&text[i], "<p>", 1)) && (p == 0)) {
// paragraph break - replace </p> with </>'\n'
//int endindex = strmatch(&text[i], L"</p>", 0);
int endindex = strmatch(&text[i], "</p>", 0);
if (endindex != -1)
//wmemcpy(&text[i + endindex + 2], L">\n", 2);
memcpy(&text[i + endindex + 2], ">\n", 2);
//} else if ((! strmatch(&text[i], L"<w savlm=", 1))
} else if ((! strmatch(&text[i], "<w savlm=", 1))
&& (p == 0) && (strongs == 1)) {
/* Strong's number - the format is below, but note
@ -173,35 +156,27 @@ void pane::renderText() {
* <w savlm="strong:[G|H]NNNN(N)">word</w> */
// 1. get boundary of open tag
//int endbracket = strmatch(&text[i], L">", 0);
int endbracket = strmatch(&text[i], ">", 0);
// 2. get Strong's parameters
//wchar_t* num = (wchar_t*) malloc(sizeof(wchar_t*)
char* num = (char*) malloc(sizeof(char*) * 100);
if (! num) wrapup(1,
"Error declaring memory in renderText.\n");
int numidx = 0;
//int strnum = strmatch(&text[i], L"strong:", 0) + 7;
int strnum = strmatch(&text[i], "strong:", 0) + 7;
while ((strnum < endbracket) && (strnum != -1)) {
//int nextspace = strmatch(&text[i+strnum], L" ", 0);
int nextspace = strmatch(&text[i+strnum], " ", 0);
//int space1 = strmatch(&text[i+strnum], L"\"", 0);
int space1 = strmatch(&text[i+strnum], "\"", 0);
int len = ((nextspace == -1) || (space1 < nextspace)
? space1
: nextspace);
//if (numidx != 0) wmemcpy(&num[numidx++], L" ", 1);
if (numidx != 0) memcpy(&num[numidx++], " ", 1);
//wmemcpy(&num[numidx], &text[i+strnum], len);
memcpy(&num[numidx], &text[i+strnum], len);
numidx += len;
int nextnum =
//strmatch(&text[i+strnum], L"strong:", 0);
strmatch(&text[i+strnum], "strong:", 0);
strnum = (nextnum != -1
? strnum + nextnum + 7
@ -213,31 +188,21 @@ void pane::renderText() {
* below zero, otherwise get word boundaries */
int wordstart = endbracket + 1;
int endtag =
//(endbracket > strmatch(&text[i], L"/", 0)
(endbracket > strmatch(&text[i], "/", 0)
? endbracket + 1
//: strmatch(&text[i], L"</w>", 0));
: strmatch(&text[i], "</w>", 0));
// 4. determine word boundaries & rewrite
int wordlen = endtag - wordstart;
//wchar_t* word = (wchar_t*) malloc(sizeof(wchar_t*)
char* word = (char*) malloc(sizeof(char*)
* (wordlen == 0 ? 1 : wordlen));
if (! word) wrapup(1,
"Error rewriting markup in renderText.\n");
//wmemcpy(word, &text[i+wordstart], wordlen);
memcpy(word, &text[i+wordstart], wordlen);
// rewrite
int start = i + endtag - wordlen - numidx - 4;
//wmemcpy(&text[start], L"\"", 1);
//wmemcpy(&text[start + 1], L">", 1);
//wmemcpy(&text[start + 2], word, wordlen);
//wmemcpy(&text[start + 2 + wordlen], L"[", 1);
//wmemcpy(&text[start + 3 + wordlen], num, numidx);
//wmemcpy(&text[start + 3 + wordlen + numidx], L"]", 1);
memcpy(&text[start], "\"", 1);
memcpy(&text[start + 1], ">", 1);
memcpy(&text[start + 2], word, wordlen);
@ -252,13 +217,11 @@ void pane::renderText() {
// kept for debugging
//fwprintf(stderr, L"(I): %ls\n", text);
//} else if ((! strmatch(&text[i], L"<q marker", 1))
} else if ((! strmatch(&text[i], "<q marker", 1))
&& (p == 1) && (redletter == 1)) {
// start of redletter bracket
makered = 1;
//} else if ((! strmatch(&text[i], L"<transChange type=\"added\"", 1))
} else if ((! strmatch(&text[i], "<transChange type=\"added\"", 1))
&& (p == 1) && (nontextual == 1)) {
// start of interpretive text bracket
@ -269,8 +232,16 @@ void pane::renderText() {
continue;
} // markup check
// XXX -- add in here call to mbrlen() to determine number of bytes this character is --
// we will need it to determine our advance amount in the loop
/* determine how large this multibyte character is -- we will need it to
* determine our advance amount in the loop */
int offset = 0;
size_t charlen = mbrlen(&text[i], 5, NULL);
while (((int) mbrlen(&text[i+offset], 5, NULL) == -1) &&
(i + offset < length)) {
/* mbrlen() says the next char is not a valid multibyte char, so find
* the length by figuring out where the succeeding character is */
offset++;
}
if (p == 0) {
// handle word wrapping
@ -301,7 +272,6 @@ void pane::renderText() {
lastprintspace = printable;
}
// XXX -- this is probably okay; check for linelength issues when moving to mbrlen()
linelength++;
// various word wrapping debugging statements
@ -312,18 +282,21 @@ void pane::renderText() {
} else {
// printing -- pull out the single character we care about
//wchar_t single[] = L"\0\0";
// XXX -- need to use mbrlen() to determine how many bytes we really need to pull
char single[] = "\0\0";
//wcsncpy(&single[0], &text[i], 1);
strncpy(&single[0], &text[i], 1);
wattrset(pad, COLOR_PAIR(makered)
| (makeital ? A_ITALIC : 0));
waddstr(pad, single);
if (((int) charlen == -1) && (offset == 1)) {
/* This is an extended ascii character (probably Latin-1) and not
* UTF-8. In setting up ncurses for UTF-8 we set a locale and seem to
* make these characters unprintable. To avoid massive rendering
* errors we have to substitute them with something else. */
waddstr(pad, "?");
} else waddnstr(pad, &text[i], charlen);
}
printable++;
i += ((int) charlen == -1 ? offset : (int) charlen);
} // text loop
// text rewriting debugging