compatibility: reduce the assumption the builtin libc locale is ASCII-compatible

This is not clearly defined by the C99 standard.
Define ctype-like macros to force it to be ASCII / UTF-8 (not extended ASCII or
something like noticed on OpenBSD 3.8).

(In practise modern libc libraries are all ASCII and UTF-8-compatible. Otherwise
this would break many programs)
This commit is contained in:
Hiltjo Posthuma 2022-03-29 11:03:54 +02:00
parent 880256b8bf
commit d5ee385b4b
7 changed files with 48 additions and 46 deletions

50
sfeed.c
View File

@ -246,7 +246,7 @@ gettag(enum FeedType feedtype, const char *name, size_t namelen)
static char *
ltrim(const char *s)
{
for (; isspace((unsigned char)*s); s++)
for (; ISSPACE((unsigned char)*s); s++)
;
return (char *)s;
}
@ -256,7 +256,7 @@ rtrim(const char *s)
{
const char *e;
for (e = s + strlen(s); e > s && isspace((unsigned char)*(e - 1)); e--)
for (e = s + strlen(s); e > s && ISSPACE((unsigned char)*(e - 1)); e--)
;
return (char *)e;
}
@ -341,7 +341,7 @@ printtrimmed(const char *s)
p = ltrim(s);
e = rtrim(p);
for (; *p && p != e; p++) {
if (isspace((unsigned char)*p))
if (ISSPACE((unsigned char)*p))
putchar(' '); /* any whitespace to space */
else if (!ISCNTRL((unsigned char)*p))
/* ignore other control chars */
@ -514,20 +514,20 @@ gettzoffset(const char *s)
long tzhour = 0, tzmin = 0;
size_t i;
for (; isspace((unsigned char)*s); s++)
for (; ISSPACE((unsigned char)*s); s++)
;
switch (*s) {
case '-': /* offset */
case '+':
for (i = 0, p = s + 1; i < 2 && isdigit((unsigned char)*p); i++, p++)
for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
tzhour = (tzhour * 10) + (*p - '0');
if (*p == ':')
p++;
for (i = 0; i < 2 && isdigit((unsigned char)*p); i++, p++)
for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
tzmin = (tzmin * 10) + (*p - '0');
return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : 1);
default: /* timezone name */
for (i = 0; isalpha((unsigned char)s[i]); i++)
for (i = 0; ISALPHA((unsigned char)s[i]); i++)
;
if (i != 3)
return 0;
@ -565,35 +565,35 @@ parsetime(const char *s, long long *tp)
int va[6] = { 0 }, i, j, v, vi;
size_t m;
for (; isspace((unsigned char)*s); s++)
for (; ISSPACE((unsigned char)*s); s++)
;
if (!isdigit((unsigned char)*s) && !isalpha((unsigned char)*s))
if (!ISDIGIT((unsigned char)*s) && !ISALPHA((unsigned char)*s))
return -1;
if (isdigit((unsigned char)s[0]) &&
isdigit((unsigned char)s[1]) &&
isdigit((unsigned char)s[2]) &&
isdigit((unsigned char)s[3])) {
if (ISDIGIT((unsigned char)s[0]) &&
ISDIGIT((unsigned char)s[1]) &&
ISDIGIT((unsigned char)s[2]) &&
ISDIGIT((unsigned char)s[3])) {
/* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" */
vi = 0;
} else {
/* format: "[%a, ]%d %b %Y %H:%M:%S" */
/* parse "[%a, ]%d %b %Y " part, then use time parsing as above */
for (; isalpha((unsigned char)*s); s++)
for (; ISALPHA((unsigned char)*s); s++)
;
for (; isspace((unsigned char)*s); s++)
for (; ISSPACE((unsigned char)*s); s++)
;
if (*s == ',')
s++;
for (; isspace((unsigned char)*s); s++)
for (; ISSPACE((unsigned char)*s); s++)
;
for (v = 0, i = 0; i < 2 && isdigit((unsigned char)*s); s++, i++)
for (v = 0, i = 0; i < 2 && ISDIGIT((unsigned char)*s); s++, i++)
v = (v * 10) + (*s - '0');
va[2] = v; /* day */
for (; isspace((unsigned char)*s); s++)
for (; ISSPACE((unsigned char)*s); s++)
;
/* end of word month */
for (j = 0; isalpha((unsigned char)s[j]); j++)
for (j = 0; ISALPHA((unsigned char)s[j]); j++)
;
/* check month name */
if (j < 3 || j > 9)
@ -609,15 +609,15 @@ parsetime(const char *s, long long *tp)
}
if (m >= 12)
return -1; /* no month found */
for (; isspace((unsigned char)*s); s++)
for (; ISSPACE((unsigned char)*s); s++)
;
for (v = 0, i = 0; i < 4 && isdigit((unsigned char)*s); s++, i++)
for (v = 0, i = 0; i < 4 && ISDIGIT((unsigned char)*s); s++, i++)
v = (v * 10) + (*s - '0');
/* obsolete short year: RFC2822 4.3 */
if (i <= 3)
v += (v >= 0 && v <= 49) ? 2000 : 1900;
va[0] = v; /* year */
for (; isspace((unsigned char)*s); s++)
for (; ISSPACE((unsigned char)*s); s++)
;
/* parse only regular time part, see below */
vi = 3;
@ -626,20 +626,20 @@ parsetime(const char *s, long long *tp)
/* parse time parts (and possibly remaining date parts) */
for (; *s && vi < 6; vi++) {
for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
isdigit((unsigned char)*s); s++, i++) {
ISDIGIT((unsigned char)*s); s++, i++) {
v = (v * 10) + (*s - '0');
}
va[vi] = v;
if ((vi < 2 && *s == '-') ||
(vi == 2 && (*s == 'T' || isspace((unsigned char)*s))) ||
(vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s))) ||
(vi > 2 && *s == ':'))
s++;
}
/* skip milliseconds in for example: "%Y-%m-%dT%H:%M:%S.000Z" */
if (*s == '.') {
for (s++; isdigit((unsigned char)*s); s++)
for (s++; ISDIGIT((unsigned char)*s); s++)
;
}

View File

@ -1,4 +1,3 @@
#include <ctype.h>
#include <stdio.h>
#include <strings.h>

View File

@ -1,4 +1,3 @@
#include <ctype.h>
#include <stdio.h>
#include <strings.h>

View File

@ -1,4 +1,3 @@
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
@ -26,10 +25,10 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
return;
for (; *v; v++) {
if (isalpha((unsigned char)*v) ||
isdigit((unsigned char)*v) ||
if (ISALPHA((unsigned char)*v) ||
ISDIGIT((unsigned char)*v) ||
*v == '.' || *v == ':' || *v == '-' || *v == '_')
putchar(tolower((unsigned char)*v));
putchar(TOLOWER((unsigned char)*v));
}
}

9
util.c
View File

@ -1,4 +1,3 @@
#include <ctype.h>
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
@ -66,8 +65,8 @@ strcasestr(const char *h, const char *n)
return (char *)h;
for (; *h; ++h) {
for (i = 0; n[i] && tolower((unsigned char)n[i]) ==
tolower((unsigned char)h[i]); ++i)
for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) ==
TOLOWER((unsigned char)h[i]); ++i)
;
if (n[i] == '\0')
return (char *)h;
@ -82,7 +81,7 @@ uri_hasscheme(const char *s)
{
const char *p = s;
for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
*p == '+' || *p == '-' || *p == '.'; p++)
;
/* scheme, except if empty and starts with ":" then it is a path */
@ -109,7 +108,7 @@ uri_parse(const char *s, struct uri *u)
}
/* scheme / protocol part */
for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
*p == '+' || *p == '-' || *p == '.'; p++)
;
/* scheme, except if empty and starts with ":" then it is a path */

6
util.h
View File

@ -8,8 +8,12 @@
#define unveil(p1,p2) 0
#endif
/* control-character in the ASCII range 0-127: compatible with UTF-8 */
/* ctype-like macros, but always compatible with ASCII / UTF-8 */
#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
#define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f)
#define ISDIGIT(c) (((unsigned)c) - '0' < 10)
#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
#define TOLOWER(c) ((((unsigned)c) - 'A' < 26) ? ((c) | 32) : (c))
#undef strcasestr
char *strcasestr(const char *, const char *);

20
xml.c
View File

@ -1,4 +1,3 @@
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
@ -6,6 +5,9 @@
#include "xml.h"
#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
static void
xml_parseattrs(XMLParser *x)
{
@ -13,7 +15,7 @@ xml_parseattrs(XMLParser *x)
int c, endsep, endname = 0, valuestart = 0;
while ((c = GETNEXT()) != EOF) {
if (isspace(c)) {
if (ISSPACE(c)) {
if (namelen)
endname = 1;
continue;
@ -23,7 +25,7 @@ xml_parseattrs(XMLParser *x)
x->name[namelen] = '\0';
valuestart = 1;
endname = 1;
} else if (namelen && ((endname && !valuestart && isalpha(c)) || (c == '>' || c == '/'))) {
} else if (namelen && ((endname && !valuestart && ISALPHA(c)) || (c == '>' || c == '/'))) {
/* attribute without value */
x->name[namelen] = '\0';
if (x->xmlattrstart)
@ -44,7 +46,7 @@ xml_parseattrs(XMLParser *x)
if (c == '\'' || c == '"') {
endsep = c;
} else {
endsep = ' '; /* isspace() */
endsep = ' '; /* ISSPACE() */
goto startvalue;
}
@ -58,7 +60,7 @@ startvalue:
x->data[0] = c;
valuelen = 1;
while ((c = GETNEXT()) != EOF) {
if (c == endsep || (endsep == ' ' && (c == '>' || isspace(c))))
if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c))))
break;
if (valuelen < sizeof(x->data) - 1)
x->data[valuelen++] = c;
@ -79,7 +81,7 @@ startvalue:
break;
}
}
} else if (c != endsep && !(endsep == ' ' && (c == '>' || isspace(c)))) {
} else if (c != endsep && !(endsep == ' ' && (c == '>' || ISSPACE(c)))) {
if (valuelen < sizeof(x->data) - 1) {
x->data[valuelen++] = c;
} else {
@ -90,7 +92,7 @@ startvalue:
valuelen = 1;
}
}
if (c == endsep || (endsep == ' ' && (c == '>' || isspace(c)))) {
if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) {
x->data[valuelen] = '\0';
if (x->xmlattr)
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
@ -328,7 +330,7 @@ xml_parse(XMLParser *x)
while ((c = GETNEXT()) != EOF) {
if (c == '/')
x->isshorttag = 1; /* short tag */
else if (c == '>' || isspace(c)) {
else if (c == '>' || ISSPACE(c)) {
x->tag[x->taglen] = '\0';
if (isend) { /* end tag, starts with </ */
if (x->xmltagend)
@ -339,7 +341,7 @@ xml_parse(XMLParser *x)
/* start tag */
if (x->xmltagstart)
x->xmltagstart(x, x->tag, x->taglen);
if (isspace(c))
if (ISSPACE(c))
xml_parseattrs(x);
if (x->xmltagstartparsed)
x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);