391 lines
8.8 KiB
C
391 lines
8.8 KiB
C
#include <errno.h>
|
|
#include <stdarg.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <wchar.h>
|
|
|
|
#include "util.h"
|
|
|
|
/* print to stderr, print error message of errno and exit().
|
|
Unlike BSD err() it does not prefix __progname */
|
|
__dead void
|
|
err(int exitstatus, const char *fmt, ...)
|
|
{
|
|
va_list ap;
|
|
int saved_errno;
|
|
|
|
saved_errno = errno;
|
|
|
|
if (fmt) {
|
|
va_start(ap, fmt);
|
|
vfprintf(stderr, fmt, ap);
|
|
va_end(ap);
|
|
fputs(": ", stderr);
|
|
}
|
|
fprintf(stderr, "%s\n", strerror(saved_errno));
|
|
|
|
exit(exitstatus);
|
|
}
|
|
|
|
/* print to stderr and exit().
|
|
Unlike BSD errx() it does not prefix __progname */
|
|
__dead void
|
|
errx(int exitstatus, const char *fmt, ...)
|
|
{
|
|
va_list ap;
|
|
|
|
if (fmt) {
|
|
va_start(ap, fmt);
|
|
vfprintf(stderr, fmt, ap);
|
|
va_end(ap);
|
|
}
|
|
fputs("\n", stderr);
|
|
|
|
exit(exitstatus);
|
|
}
|
|
|
|
/* Handle read or write errors for a FILE * stream */
|
|
void
|
|
checkfileerror(FILE *fp, const char *name, int mode)
|
|
{
|
|
if (mode == 'r' && ferror(fp))
|
|
errx(1, "read error: %s", name);
|
|
else if (mode == 'w' && (fflush(fp) || ferror(fp)))
|
|
errx(1, "write error: %s", name);
|
|
}
|
|
|
|
/* strcasestr() included for portability */
|
|
char *
|
|
strcasestr(const char *h, const char *n)
|
|
{
|
|
size_t i;
|
|
|
|
if (!n[0])
|
|
return (char *)h;
|
|
|
|
for (; *h; ++h) {
|
|
for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) ==
|
|
TOLOWER((unsigned char)h[i]); ++i)
|
|
;
|
|
if (n[i] == '\0')
|
|
return (char *)h;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/* Check if string has a non-empty scheme / protocol part. */
|
|
int
|
|
uri_hasscheme(const char *s)
|
|
{
|
|
const char *p = s;
|
|
|
|
for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
|
|
*p == '+' || *p == '-' || *p == '.'; p++)
|
|
;
|
|
/* scheme, except if empty and starts with ":" then it is a path */
|
|
return (*p == ':' && p != s);
|
|
}
|
|
|
|
/* Parse URI string `s` into an uri structure `u`.
|
|
Returns 0 on success or -1 on failure */
|
|
int
|
|
uri_parse(const char *s, struct uri *u)
|
|
{
|
|
const char *p = s;
|
|
char *endptr;
|
|
size_t i;
|
|
long l;
|
|
|
|
u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
|
|
u->path[0] = u->query[0] = u->fragment[0] = '\0';
|
|
|
|
/* protocol-relative */
|
|
if (*p == '/' && *(p + 1) == '/') {
|
|
p += 2; /* skip "//" */
|
|
goto parseauth;
|
|
}
|
|
|
|
/* scheme / protocol part */
|
|
for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
|
|
*p == '+' || *p == '-' || *p == '.'; p++)
|
|
;
|
|
/* scheme, except if empty and starts with ":" then it is a path */
|
|
if (*p == ':' && p != s) {
|
|
if (*(p + 1) == '/' && *(p + 2) == '/')
|
|
p += 3; /* skip "://" */
|
|
else
|
|
p++; /* skip ":" */
|
|
|
|
if ((size_t)(p - s) >= sizeof(u->proto))
|
|
return -1; /* protocol too long */
|
|
memcpy(u->proto, s, p - s);
|
|
u->proto[p - s] = '\0';
|
|
|
|
if (*(p - 1) != '/')
|
|
goto parsepath;
|
|
} else {
|
|
p = s; /* no scheme format, reset to start */
|
|
goto parsepath;
|
|
}
|
|
|
|
parseauth:
|
|
/* userinfo (username:password) */
|
|
i = strcspn(p, "@/?#");
|
|
if (p[i] == '@') {
|
|
if (i >= sizeof(u->userinfo))
|
|
return -1; /* userinfo too long */
|
|
memcpy(u->userinfo, p, i);
|
|
u->userinfo[i] = '\0';
|
|
p += i + 1;
|
|
}
|
|
|
|
/* IPv6 address */
|
|
if (*p == '[') {
|
|
/* bracket not found, host too short or too long */
|
|
i = strcspn(p, "]");
|
|
if (p[i] != ']' || i < 3)
|
|
return -1;
|
|
i++; /* including "]" */
|
|
} else {
|
|
/* domain / host part, skip until port, path or end. */
|
|
i = strcspn(p, ":/?#");
|
|
}
|
|
if (i >= sizeof(u->host))
|
|
return -1; /* host too long */
|
|
memcpy(u->host, p, i);
|
|
u->host[i] = '\0';
|
|
p += i;
|
|
|
|
/* port */
|
|
if (*p == ':') {
|
|
p++;
|
|
if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
|
|
return -1; /* port too long */
|
|
memcpy(u->port, p, i);
|
|
u->port[i] = '\0';
|
|
/* check for valid port: range 1 - 65535, may be empty */
|
|
errno = 0;
|
|
l = strtol(u->port, &endptr, 10);
|
|
if (i && (errno || *endptr || l <= 0 || l > 65535))
|
|
return -1;
|
|
p += i;
|
|
}
|
|
|
|
parsepath:
|
|
/* path */
|
|
if ((i = strcspn(p, "?#")) >= sizeof(u->path))
|
|
return -1; /* path too long */
|
|
memcpy(u->path, p, i);
|
|
u->path[i] = '\0';
|
|
p += i;
|
|
|
|
/* query */
|
|
if (*p == '?') {
|
|
p++;
|
|
if ((i = strcspn(p, "#")) >= sizeof(u->query))
|
|
return -1; /* query too long */
|
|
memcpy(u->query, p, i);
|
|
u->query[i] = '\0';
|
|
p += i;
|
|
}
|
|
|
|
/* fragment */
|
|
if (*p == '#') {
|
|
p++;
|
|
if ((i = strlen(p)) >= sizeof(u->fragment))
|
|
return -1; /* fragment too long */
|
|
memcpy(u->fragment, p, i);
|
|
u->fragment[i] = '\0';
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Transform and try to make the URI `u` absolute using base URI `b` into `a`.
|
|
Follows some of the logic from "RFC 3986 - 5.2.2. Transform References".
|
|
Returns 0 on success, -1 on error or truncation. */
|
|
int
|
|
uri_makeabs(struct uri *a, struct uri *u, struct uri *b)
|
|
{
|
|
char *p;
|
|
int c;
|
|
|
|
strlcpy(a->fragment, u->fragment, sizeof(a->fragment));
|
|
|
|
if (u->proto[0] || u->host[0]) {
|
|
strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a->proto));
|
|
strlcpy(a->host, u->host, sizeof(a->host));
|
|
strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo));
|
|
strlcpy(a->host, u->host, sizeof(a->host));
|
|
strlcpy(a->port, u->port, sizeof(a->port));
|
|
strlcpy(a->path, u->path, sizeof(a->path));
|
|
strlcpy(a->query, u->query, sizeof(a->query));
|
|
return 0;
|
|
}
|
|
|
|
strlcpy(a->proto, b->proto, sizeof(a->proto));
|
|
strlcpy(a->host, b->host, sizeof(a->host));
|
|
strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo));
|
|
strlcpy(a->host, b->host, sizeof(a->host));
|
|
strlcpy(a->port, b->port, sizeof(a->port));
|
|
|
|
if (!u->path[0]) {
|
|
strlcpy(a->path, b->path, sizeof(a->path));
|
|
} else if (u->path[0] == '/') {
|
|
strlcpy(a->path, u->path, sizeof(a->path));
|
|
} else {
|
|
a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0';
|
|
a->path[1] = '\0';
|
|
|
|
if ((p = strrchr(b->path, '/'))) {
|
|
c = *(++p);
|
|
*p = '\0'; /* temporary NUL-terminate */
|
|
if (strlcat(a->path, b->path, sizeof(a->path)) >= sizeof(a->path))
|
|
return -1;
|
|
*p = c; /* restore */
|
|
}
|
|
if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->path))
|
|
return -1;
|
|
}
|
|
|
|
if (u->path[0] || u->query[0])
|
|
strlcpy(a->query, u->query, sizeof(a->query));
|
|
else
|
|
strlcpy(a->query, b->query, sizeof(a->query));
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
uri_format(char *buf, size_t bufsiz, struct uri *u)
|
|
{
|
|
return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s",
|
|
u->proto,
|
|
u->userinfo[0] ? u->userinfo : "",
|
|
u->userinfo[0] ? "@" : "",
|
|
u->host,
|
|
u->port[0] ? ":" : "",
|
|
u->port,
|
|
u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "",
|
|
u->path,
|
|
u->query[0] ? "?" : "",
|
|
u->query,
|
|
u->fragment[0] ? "#" : "",
|
|
u->fragment);
|
|
}
|
|
|
|
/* Splits fields in the line buffer by replacing TAB separators with NUL ('\0')
|
|
* terminators and assign these fields as pointers. If there are less fields
|
|
* than expected then the field is an empty string constant. */
|
|
void
|
|
parseline(char *line, char *fields[FieldLast])
|
|
{
|
|
char *prev, *s;
|
|
size_t i;
|
|
|
|
for (prev = line, i = 0;
|
|
(s = strchr(prev, '\t')) && i < FieldLast - 1;
|
|
i++) {
|
|
*s = '\0';
|
|
fields[i] = prev;
|
|
prev = s + 1;
|
|
}
|
|
fields[i++] = prev;
|
|
/* make non-parsed fields empty. */
|
|
for (; i < FieldLast; i++)
|
|
fields[i] = "";
|
|
}
|
|
|
|
/* Parse time to time_t, assumes time_t is signed, ignores fractions. */
|
|
int
|
|
strtotime(const char *s, time_t *t)
|
|
{
|
|
long long l;
|
|
char *e;
|
|
|
|
errno = 0;
|
|
l = strtoll(s, &e, 10);
|
|
if (errno || *s == '\0' || *e)
|
|
return -1;
|
|
|
|
/* NOTE: the type long long supports the 64-bit range. If time_t is
|
|
64-bit it is "2038-ready", otherwise it is truncated/wrapped. */
|
|
if (t)
|
|
*t = (time_t)l;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Escape characters below as HTML 2.0 / XML 1.0. */
|
|
void
|
|
xmlencode(const char *s, FILE *fp)
|
|
{
|
|
for (; *s; ++s) {
|
|
switch (*s) {
|
|
case '<': fputs("<", fp); break;
|
|
case '>': fputs(">", fp); break;
|
|
case '\'': fputs("'", fp); break;
|
|
case '&': fputs("&", fp); break;
|
|
case '"': fputs(""", fp); break;
|
|
default: putc(*s, fp);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* print `len` columns of characters. If string is shorter pad the rest with
|
|
* characters `pad`. */
|
|
void
|
|
printutf8pad(FILE *fp, const char *s, size_t len, int pad)
|
|
{
|
|
wchar_t wc;
|
|
size_t col = 0, i, slen;
|
|
int inc, rl, w;
|
|
|
|
if (!len)
|
|
return;
|
|
|
|
slen = strlen(s);
|
|
for (i = 0; i < slen; i += inc) {
|
|
inc = 1; /* next byte */
|
|
if ((unsigned char)s[i] < 32) {
|
|
continue; /* skip control characters */
|
|
} else if ((unsigned char)s[i] >= 127) {
|
|
rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i : 4);
|
|
inc = rl;
|
|
if (rl < 0) {
|
|
mbtowc(NULL, NULL, 0); /* reset state */
|
|
inc = 1; /* invalid, seek next byte */
|
|
w = 1; /* replacement char is one width */
|
|
} else if ((w = wcwidth(wc)) == -1) {
|
|
continue;
|
|
}
|
|
|
|
if (col + w > len || (col + w == len && s[i + inc])) {
|
|
fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */
|
|
col++;
|
|
break;
|
|
} else if (rl < 0) {
|
|
fputs(UTF_INVALID_SYMBOL, fp); /* replacement */
|
|
col++;
|
|
continue;
|
|
}
|
|
fwrite(&s[i], 1, rl, fp);
|
|
col += w;
|
|
} else {
|
|
/* optimization: simple ASCII character */
|
|
if (col + 1 > len || (col + 1 == len && s[i + 1])) {
|
|
fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */
|
|
col++;
|
|
break;
|
|
}
|
|
putc(s[i], fp);
|
|
col++;
|
|
}
|
|
|
|
}
|
|
for (; col < len; ++col)
|
|
putc(pad, fp);
|
|
}
|