sfeed/util.c

#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>

#include "util.h"

/* print to stderr, print error message of errno and exit().
   Unlike BSD err() it does not prefix __progname */
__dead void
err(int exitstatus, const char *fmt, ...)
{
	va_list ap;
	int saved_errno;

	saved_errno = errno;

	if (fmt) {
		va_start(ap, fmt);
		vfprintf(stderr, fmt, ap);
		va_end(ap);
		fputs(": ", stderr);
	}
	fprintf(stderr, "%s\n", strerror(saved_errno));

	exit(exitstatus);
}

/* print to stderr and exit().
   Unlike BSD errx() it does not prefix __progname */
__dead void
errx(int exitstatus, const char *fmt, ...)
{
	va_list ap;

	if (fmt) {
		va_start(ap, fmt);
		vfprintf(stderr, fmt, ap);
		va_end(ap);
	}
	fputs("\n", stderr);

	exit(exitstatus);
}

/* Handle read or write errors for a FILE * stream */
void
checkfileerror(FILE *fp, const char *name, int mode)
{
	if (mode == 'r' && ferror(fp))
		errx(1, "read error: %s", name);
	else if (mode == 'w' && (fflush(fp) || ferror(fp)))
		errx(1, "write error: %s", name);
}

/* strcasestr() included for portability */
char *
strcasestr(const char *h, const char *n)
{
	size_t i;

	if (!n[0])
		return (char *)h;

	for (; *h; ++h) {
		for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) ==
		            TOLOWER((unsigned char)h[i]); ++i)
			;
		if (n[i] == '\0')
			return (char *)h;
	}

	return NULL;
}

/* Check if string has a non-empty scheme / protocol part. */
int
uri_hasscheme(const char *s)
{
	const char *p = s;

	for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
		       *p == '+' || *p == '-' || *p == '.'; p++)
		;
	/* scheme, except if empty and starts with ":" then it is a path */
	return (*p == ':' && p != s);
}

/* Parse URI string `s` into an uri structure `u`.
   Returns 0 on success or -1 on failure */
int
uri_parse(const char *s, struct uri *u)
{
	const char *p = s;
	char *endptr;
	size_t i;
	long l;

	u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
	u->path[0] = u->query[0] = u->fragment[0] = '\0';

	/* protocol-relative */
	if (*p == '/' && *(p + 1) == '/') {
		p += 2; /* skip "//" */
		goto parseauth;
	}

	/* scheme / protocol part */
	for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
		       *p == '+' || *p == '-' || *p == '.'; p++)
		;
	/* scheme, except if empty and starts with ":" then it is a path */
	if (*p == ':' && p != s) {
		if (*(p + 1) == '/' && *(p + 2) == '/')
			p += 3; /* skip "://" */
		else
			p++; /* skip ":" */

		if ((size_t)(p - s) >= sizeof(u->proto))
			return -1; /* protocol too long */
		memcpy(u->proto, s, p - s);
		u->proto[p - s] = '\0';

		if (*(p - 1) != '/')
			goto parsepath;
	} else {
		p = s; /* no scheme format, reset to start */
		goto parsepath;
	}

parseauth:
	/* userinfo (username:password) */
	i = strcspn(p, "@/?#");
	if (p[i] == '@') {
		if (i >= sizeof(u->userinfo))
			return -1; /* userinfo too long */
		memcpy(u->userinfo, p, i);
		u->userinfo[i] = '\0';
		p += i + 1;
	}

	/* IPv6 address */
	if (*p == '[') {
		/* bracket not found, host too short or too long */
		i = strcspn(p, "]");
		if (p[i] != ']' || i < 3)
			return -1;
		i++; /* including "]" */
	} else {
		/* domain / host part, skip until port, path or end. */
		i = strcspn(p, ":/?#");
	}
	if (i >= sizeof(u->host))
		return -1; /* host too long */
	memcpy(u->host, p, i);
	u->host[i] = '\0';
	p += i;

	/* port */
	if (*p == ':') {
		p++;
		if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
			return -1; /* port too long */
		memcpy(u->port, p, i);
		u->port[i] = '\0';
		/* check for valid port: range 1 - 65535, may be empty */
		errno = 0;
		l = strtol(u->port, &endptr, 10);
		if (i && (errno || *endptr || l <= 0 || l > 65535))
			return -1;
		p += i;
	}

parsepath:
	/* path */
	if ((i = strcspn(p, "?#")) >= sizeof(u->path))
		return -1; /* path too long */
	memcpy(u->path, p, i);
	u->path[i] = '\0';
	p += i;

	/* query */
	if (*p == '?') {
		p++;
		if ((i = strcspn(p, "#")) >= sizeof(u->query))
			return -1; /* query too long */
		memcpy(u->query, p, i);
		u->query[i] = '\0';
		p += i;
	}

	/* fragment */
	if (*p == '#') {
		p++;
		if ((i = strlen(p)) >= sizeof(u->fragment))
			return -1; /* fragment too long */
		memcpy(u->fragment, p, i);
		u->fragment[i] = '\0';
	}

	return 0;
}

/* Transform and try to make the URI `u` absolute using base URI `b` into `a`.
   Follows some of the logic from "RFC 3986 - 5.2.2. Transform References".
   Returns 0 on success, -1 on error or truncation. */
int
uri_makeabs(struct uri *a, struct uri *u, struct uri *b)
{
	char *p;
	int c;

	strlcpy(a->fragment, u->fragment, sizeof(a->fragment));

	if (u->proto[0] || u->host[0]) {
		strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a->proto));
		strlcpy(a->host, u->host, sizeof(a->host));
		strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo));
		strlcpy(a->host, u->host, sizeof(a->host));
		strlcpy(a->port, u->port, sizeof(a->port));
		strlcpy(a->path, u->path, sizeof(a->path));
		strlcpy(a->query, u->query, sizeof(a->query));
		return 0;
	}

	strlcpy(a->proto, b->proto, sizeof(a->proto));
	strlcpy(a->host, b->host, sizeof(a->host));
	strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo));
	strlcpy(a->host, b->host, sizeof(a->host));
	strlcpy(a->port, b->port, sizeof(a->port));

	if (!u->path[0]) {
		strlcpy(a->path, b->path, sizeof(a->path));
	} else if (u->path[0] == '/') {
		strlcpy(a->path, u->path, sizeof(a->path));
	} else {
		a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0';
		a->path[1] = '\0';

		if ((p = strrchr(b->path, '/'))) {
			c = *(++p);
			*p = '\0'; /* temporary NUL-terminate */
			if (strlcat(a->path, b->path, sizeof(a->path)) >= sizeof(a->path))
				return -1;
			*p = c; /* restore */
		}
		if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->path))
			return -1;
	}

	if (u->path[0] || u->query[0])
		strlcpy(a->query, u->query, sizeof(a->query));
	else
		strlcpy(a->query, b->query, sizeof(a->query));

	return 0;
}

int
uri_format(char *buf, size_t bufsiz, struct uri *u)
{
	return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s",
		u->proto,
		u->userinfo[0] ? u->userinfo : "",
		u->userinfo[0] ? "@" : "",
		u->host,
		u->port[0] ? ":" : "",
		u->port,
		u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "",
		u->path,
		u->query[0] ? "?" : "",
		u->query,
		u->fragment[0] ? "#" : "",
		u->fragment);
}

/* Splits fields in the line buffer by replacing TAB separators with NUL ('\0')
 * terminators and assign these fields as pointers. If there are less fields
 * than expected then the field is an empty string constant. */
void
parseline(char *line, char *fields[FieldLast])
{
	char *prev, *s;
	size_t i;

	for (prev = line, i = 0;
	    (s = strchr(prev, '\t')) && i < FieldLast - 1;
	    i++) {
		*s = '\0';
		fields[i] = prev;
		prev = s + 1;
	}
	fields[i++] = prev;
	/* make non-parsed fields empty. */
	for (; i < FieldLast; i++)
		fields[i] = "";
}

/* Parse time to time_t, assumes time_t is signed, ignores fractions. */
int
strtotime(const char *s, time_t *t)
{
	long long l;
	char *e;

	errno = 0;
	l = strtoll(s, &e, 10);
	if (errno || *s == '\0' || *e)
		return -1;

	/* NOTE: the type long long supports the 64-bit range. If time_t is
	   64-bit it is "2038-ready", otherwise it is truncated/wrapped. */
	if (t)
		*t = (time_t)l;

	return 0;
}

/* Escape characters below as HTML 2.0 / XML 1.0. */
void
xmlencode(const char *s, FILE *fp)
{
	for (; *s; ++s) {
		switch (*s) {
		case '<':  fputs("&lt;",   fp); break;
		case '>':  fputs("&gt;",   fp); break;
		case '\'': fputs("&#39;",  fp); break;
		case '&':  fputs("&amp;",  fp); break;
		case '"':  fputs("&quot;", fp); break;
		default:   putc(*s, fp);
		}
	}
}

/* print `len` columns of characters. If string is shorter pad the rest with
 * characters `pad`. */
void
printutf8pad(FILE *fp, const char *s, size_t len, int pad)
{
	wchar_t wc;
	size_t col = 0, i, slen;
	int inc, rl, w;

	if (!len)
		return;

	slen = strlen(s);
	for (i = 0; i < slen; i += inc) {
		inc = 1; /* next byte */
		if ((unsigned char)s[i] < 32) {
			continue; /* skip control characters */
		} else if ((unsigned char)s[i] >= 127) {
			rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i : 4);
			inc = rl;
			if (rl < 0) {
				mbtowc(NULL, NULL, 0); /* reset state */
				inc = 1; /* invalid, seek next byte */
				w = 1; /* replacement char is one width */
			} else if ((w = wcwidth(wc)) == -1) {
				continue;
			}

			if (col + w > len || (col + w == len && s[i + inc])) {
				fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */
				col++;
				break;
			} else if (rl < 0) {
				fputs(UTF_INVALID_SYMBOL, fp); /* replacement */
				col++;
				continue;
			}
			fwrite(&s[i], 1, rl, fp);
			col += w;
		} else {
			/* optimization: simple ASCII character */
			if (col + 1 > len || (col + 1 == len && s[i + 1])) {
				fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */
				col++;
				break;
			}
			putc(s[i], fp);
			col++;
		}

	}
	for (; col < len; ++col)
		putc(pad, fp);
}