Resolution of improper multibyte rendering.
This commit is contained in:
parent
8959f4ef97
commit
0a8acd40cc
|
@ -36,7 +36,8 @@ When the software is running, the '?' key will give you the list of commands ava
|
|||
|
||||
|
||||
## Known issues
|
||||
Menus and forms will not render correctly if your terminal window is too narrow. Resize the terminal and this should work.
|
||||
* Menus and forms will not render correctly if your terminal window is too narrow. Resize the terminal and this should work.
|
||||
* Extended ASCII characters, typically in the Latin-1 set, will not render due to a quirk of how ncurses handles UTF-8.
|
||||
|
||||
|
||||
## Feedback / Support / Gratuity
|
||||
|
|
75
pane.cpp
75
pane.cpp
|
@ -101,19 +101,7 @@ void pane::renderText() {
|
|||
int nontextual = parseConf(config["markup"]["nontextual"]);
|
||||
// int footnotes = 0; not implemented yet
|
||||
|
||||
// convert input to wide characters for proper rendering
|
||||
//wchar_t* text = (wchar_t*) malloc(sizeof(wchar_t) * (length + 1));
|
||||
//if (! text) wrapup(1, "Error allocating memory in renderText.\n");
|
||||
//wmemset(text, L'\0', length + 1);
|
||||
//
|
||||
///* some typographical quotes won't go through mbstowcs (eg. Eph 4:8 BWE), so
|
||||
//* alter those; this seems inefficient, but we don't know how many bytes are
|
||||
//* in each multibyte char so I think it has to be done this way */
|
||||
//for (int i = 0; i < length; i++) {
|
||||
//int converted = mbstowcs(text, rawtext, i);
|
||||
//if (converted == -1) strncpy(&(rawtext[i-1]), "'", 2);
|
||||
//}
|
||||
//mbstowcs(text, rawtext, length - 1);
|
||||
// copy our unformatted text to local scope for window formatting
|
||||
char* text = (char*) malloc(sizeof(char*) * length + 1);
|
||||
if (! text) wrapup(1, "Error allocating memory in renderText.\n");
|
||||
memset(text, '\0', strlen(rawtext) + 1);
|
||||
|
@ -129,13 +117,14 @@ void pane::renderText() {
|
|||
for (int p = 0; p < 2; p++) {
|
||||
|
||||
// loop through the text
|
||||
// XXX -- we have to use mbrlen() to determine how large the next character is, and advance by that
|
||||
for (int i = 0; i < length; i++) {
|
||||
int i = 0;
|
||||
while (i < length) {
|
||||
|
||||
/* check if we're in markup - it's not printed and it doesn't
|
||||
* affect our line lengths, so spin through it */
|
||||
if (inmarkup) {
|
||||
if (text[i] == '>') inmarkup = 0;
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -143,27 +132,21 @@ void pane::renderText() {
|
|||
if ((text[i] == '<') && (! rawonly)) {
|
||||
inmarkup = 1;
|
||||
|
||||
//if ((! strmatch(&text[i], L"</q>", 1)) && (p == 1)) {
|
||||
if ((! strmatch(&text[i], "</q>", 1)) && (p == 1)) {
|
||||
// end of redletter bracket
|
||||
makered = 0;
|
||||
|
||||
//} else if ((! strmatch(&text[i], L"</transChange>", 1))
|
||||
} else if ((! strmatch(&text[i], "</transChange>", 1))
|
||||
&& (p == 1)) {
|
||||
// end of interpretive text bracket
|
||||
makeital = 0;
|
||||
|
||||
//} else if ((! strmatch(&text[i], L"<p>", 1)) && (p == 0)) {
|
||||
} else if ((! strmatch(&text[i], "<p>", 1)) && (p == 0)) {
|
||||
// paragraph break - replace </p> with </>'\n'
|
||||
//int endindex = strmatch(&text[i], L"</p>", 0);
|
||||
int endindex = strmatch(&text[i], "</p>", 0);
|
||||
if (endindex != -1)
|
||||
//wmemcpy(&text[i + endindex + 2], L">\n", 2);
|
||||
memcpy(&text[i + endindex + 2], ">\n", 2);
|
||||
|
||||
//} else if ((! strmatch(&text[i], L"<w savlm=", 1))
|
||||
} else if ((! strmatch(&text[i], "<w savlm=", 1))
|
||||
&& (p == 0) && (strongs == 1)) {
|
||||
/* Strong's number - the format is below, but note
|
||||
|
@ -173,35 +156,27 @@ void pane::renderText() {
|
|||
* <w savlm="strong:[G|H]NNNN(N)">word</w> */
|
||||
|
||||
// 1. get boundary of open tag
|
||||
//int endbracket = strmatch(&text[i], L">", 0);
|
||||
int endbracket = strmatch(&text[i], ">", 0);
|
||||
|
||||
// 2. get Strong's parameters
|
||||
//wchar_t* num = (wchar_t*) malloc(sizeof(wchar_t*)
|
||||
char* num = (char*) malloc(sizeof(char*) * 100);
|
||||
if (! num) wrapup(1,
|
||||
"Error declaring memory in renderText.\n");
|
||||
int numidx = 0;
|
||||
|
||||
//int strnum = strmatch(&text[i], L"strong:", 0) + 7;
|
||||
int strnum = strmatch(&text[i], "strong:", 0) + 7;
|
||||
while ((strnum < endbracket) && (strnum != -1)) {
|
||||
//int nextspace = strmatch(&text[i+strnum], L" ", 0);
|
||||
int nextspace = strmatch(&text[i+strnum], " ", 0);
|
||||
//int space1 = strmatch(&text[i+strnum], L"\"", 0);
|
||||
int space1 = strmatch(&text[i+strnum], "\"", 0);
|
||||
int len = ((nextspace == -1) || (space1 < nextspace)
|
||||
? space1
|
||||
: nextspace);
|
||||
|
||||
//if (numidx != 0) wmemcpy(&num[numidx++], L" ", 1);
|
||||
if (numidx != 0) memcpy(&num[numidx++], " ", 1);
|
||||
//wmemcpy(&num[numidx], &text[i+strnum], len);
|
||||
memcpy(&num[numidx], &text[i+strnum], len);
|
||||
numidx += len;
|
||||
|
||||
int nextnum =
|
||||
//strmatch(&text[i+strnum], L"strong:", 0);
|
||||
strmatch(&text[i+strnum], "strong:", 0);
|
||||
strnum = (nextnum != -1
|
||||
? strnum + nextnum + 7
|
||||
|
@ -213,31 +188,21 @@ void pane::renderText() {
|
|||
* below zero, otherwise get word boundaries */
|
||||
int wordstart = endbracket + 1;
|
||||
int endtag =
|
||||
//(endbracket > strmatch(&text[i], L"/", 0)
|
||||
(endbracket > strmatch(&text[i], "/", 0)
|
||||
? endbracket + 1
|
||||
//: strmatch(&text[i], L"</w>", 0));
|
||||
: strmatch(&text[i], "</w>", 0));
|
||||
|
||||
// 4. determine word boundaries & rewrite
|
||||
int wordlen = endtag - wordstart;
|
||||
//wchar_t* word = (wchar_t*) malloc(sizeof(wchar_t*)
|
||||
char* word = (char*) malloc(sizeof(char*)
|
||||
* (wordlen == 0 ? 1 : wordlen));
|
||||
|
||||
if (! word) wrapup(1,
|
||||
"Error rewriting markup in renderText.\n");
|
||||
//wmemcpy(word, &text[i+wordstart], wordlen);
|
||||
memcpy(word, &text[i+wordstart], wordlen);
|
||||
|
||||
// rewrite
|
||||
int start = i + endtag - wordlen - numidx - 4;
|
||||
//wmemcpy(&text[start], L"\"", 1);
|
||||
//wmemcpy(&text[start + 1], L">", 1);
|
||||
//wmemcpy(&text[start + 2], word, wordlen);
|
||||
//wmemcpy(&text[start + 2 + wordlen], L"[", 1);
|
||||
//wmemcpy(&text[start + 3 + wordlen], num, numidx);
|
||||
//wmemcpy(&text[start + 3 + wordlen + numidx], L"]", 1);
|
||||
memcpy(&text[start], "\"", 1);
|
||||
memcpy(&text[start + 1], ">", 1);
|
||||
memcpy(&text[start + 2], word, wordlen);
|
||||
|
@ -252,13 +217,11 @@ void pane::renderText() {
|
|||
// kept for debugging
|
||||
//fwprintf(stderr, L"(I): %ls\n", text);
|
||||
|
||||
//} else if ((! strmatch(&text[i], L"<q marker", 1))
|
||||
} else if ((! strmatch(&text[i], "<q marker", 1))
|
||||
&& (p == 1) && (redletter == 1)) {
|
||||
// start of redletter bracket
|
||||
makered = 1;
|
||||
|
||||
//} else if ((! strmatch(&text[i], L"<transChange type=\"added\"", 1))
|
||||
} else if ((! strmatch(&text[i], "<transChange type=\"added\"", 1))
|
||||
&& (p == 1) && (nontextual == 1)) {
|
||||
// start of interpretive text bracket
|
||||
|
@ -269,8 +232,16 @@ void pane::renderText() {
|
|||
continue;
|
||||
} // markup check
|
||||
|
||||
// XXX -- add in here call to mbrlen() to determine number of bytes this character is --
|
||||
// we will need it to determine our advance amount in the loop
|
||||
/* determine how large this multibyte character is -- we will need it to
|
||||
* determine our advance amount in the loop */
|
||||
int offset = 0;
|
||||
size_t charlen = mbrlen(&text[i], 5, NULL);
|
||||
while (((int) mbrlen(&text[i+offset], 5, NULL) == -1) &&
|
||||
(i + offset < length)) {
|
||||
/* mbrlen() says the next char is not a valid multibyte char, so find
|
||||
* the length by figuring out where the succeeding character is */
|
||||
offset++;
|
||||
}
|
||||
|
||||
if (p == 0) {
|
||||
// handle word wrapping
|
||||
|
@ -301,7 +272,6 @@ void pane::renderText() {
|
|||
lastprintspace = printable;
|
||||
}
|
||||
|
||||
// XXX -- this is probably okay; check for linelength issues when moving to mbrlen()
|
||||
linelength++;
|
||||
|
||||
// various word wrapping debugging statements
|
||||
|
@ -312,18 +282,21 @@ void pane::renderText() {
|
|||
|
||||
} else {
|
||||
// printing -- pull out the single character we care about
|
||||
//wchar_t single[] = L"\0\0";
|
||||
// XXX -- need to use mbrlen() to determine how many bytes we really need to pull
|
||||
char single[] = "\0\0";
|
||||
//wcsncpy(&single[0], &text[i], 1);
|
||||
strncpy(&single[0], &text[i], 1);
|
||||
|
||||
wattrset(pad, COLOR_PAIR(makered)
|
||||
| (makeital ? A_ITALIC : 0));
|
||||
waddstr(pad, single);
|
||||
|
||||
if (((int) charlen == -1) && (offset == 1)) {
|
||||
/* This is an extended ascii character (probably Latin-1) and not
|
||||
* UTF-8. In setting up ncurses for UTF-8 we set a locale and seem to
|
||||
* make these characters unprintable. To avoid massive rendering
|
||||
* errors we have to substitute them with something else. */
|
||||
waddstr(pad, "?");
|
||||
|
||||
} else waddnstr(pad, &text[i], charlen);
|
||||
}
|
||||
|
||||
printable++;
|
||||
i += ((int) charlen == -1 ? offset : (int) charlen);
|
||||
} // text loop
|
||||
|
||||
// text rewriting debugging
|
||||
|
|
Loading…
Reference in New Issue