2015-07-28 19:24:06 +00:00
|
|
|
#include <sys/types.h>
|
|
|
|
|
2014-11-11 18:56:05 +00:00
|
|
|
#include <ctype.h>
|
2015-05-24 20:46:47 +00:00
|
|
|
#include <err.h>
|
2015-01-03 11:44:32 +00:00
|
|
|
#include <errno.h>
|
|
|
|
#include <limits.h>
|
2015-07-28 19:24:06 +00:00
|
|
|
#include <stdarg.h>
|
2012-08-03 10:03:17 +00:00
|
|
|
#include <stdio.h>
|
2016-02-27 15:21:30 +00:00
|
|
|
#include <stdint.h>
|
2012-08-03 10:03:17 +00:00
|
|
|
#include <stdlib.h>
|
2014-11-11 18:56:05 +00:00
|
|
|
#include <string.h>
|
|
|
|
#include <time.h>
|
2016-02-27 15:21:30 +00:00
|
|
|
#include <wchar.h>
|
2013-01-20 18:14:34 +00:00
|
|
|
|
2014-03-31 20:46:58 +00:00
|
|
|
#include "util.h"
|
|
|
|
|
2015-07-28 19:24:06 +00:00
|
|
|
int
|
|
|
|
parseuri(const char *s, struct uri *u, int rel)
|
|
|
|
{
|
2015-08-22 12:58:28 +00:00
|
|
|
const char *p = s, *b;
|
|
|
|
char *endptr = NULL;
|
2015-06-20 22:20:12 +00:00
|
|
|
size_t i;
|
2015-08-22 12:58:28 +00:00
|
|
|
unsigned long l;
|
2015-06-20 22:20:12 +00:00
|
|
|
|
2015-08-22 12:58:28 +00:00
|
|
|
u->proto[0] = u->host[0] = u->path[0] = u->port[0] = '\0';
|
2015-07-28 19:24:06 +00:00
|
|
|
if (!*s)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* prefix is "//", don't read protocol, skip to domain parsing */
|
|
|
|
if (!strncmp(p, "//", 2)) {
|
|
|
|
p += 2; /* skip "//" */
|
|
|
|
} else {
|
|
|
|
/* protocol part */
|
|
|
|
for (p = s; *p && (isalpha((int)*p) || isdigit((int)*p) ||
|
|
|
|
*p == '+' || *p == '-' || *p == '.'); p++)
|
|
|
|
;
|
|
|
|
if (!strncmp(p, "://", 3)) {
|
2015-08-22 12:58:28 +00:00
|
|
|
if (p - s >= (ssize_t)sizeof(u->proto))
|
2015-07-28 19:24:06 +00:00
|
|
|
return -1; /* protocol too long */
|
|
|
|
memcpy(u->proto, s, p - s);
|
2015-08-10 20:02:19 +00:00
|
|
|
u->proto[p - s] = '\0';
|
2015-07-28 19:24:06 +00:00
|
|
|
p += 3; /* skip "://" */
|
|
|
|
} else {
|
|
|
|
p = s; /* no protocol format, set to start */
|
|
|
|
/* relative url: read rest as path, else as domain */
|
|
|
|
if (rel)
|
|
|
|
goto readpath;
|
|
|
|
}
|
2015-06-20 22:20:12 +00:00
|
|
|
}
|
2015-08-22 12:58:28 +00:00
|
|
|
/* IPv6 address */
|
|
|
|
if (*p == '[') {
|
|
|
|
/* bracket not found or host too long */
|
|
|
|
if (!(b = strchr(p, ']')) || b - p >= (ssize_t)sizeof(u->host))
|
|
|
|
return -1;
|
|
|
|
memcpy(u->host, p + 1, b - p - 1);
|
|
|
|
u->host[b - p] = '\0';
|
|
|
|
p = b + 1;
|
|
|
|
} else {
|
|
|
|
/* domain / host part, skip until port, path or end. */
|
|
|
|
if ((i = strcspn(p, ":/")) >= sizeof(u->host))
|
|
|
|
return -1; /* host too long */
|
|
|
|
memcpy(u->host, p, i);
|
|
|
|
u->host[i] = '\0';
|
|
|
|
p = &p[i];
|
|
|
|
}
|
|
|
|
/* port */
|
|
|
|
if (*p == ':') {
|
|
|
|
if ((i = strcspn(++p, "/")) >= sizeof(u->port))
|
|
|
|
return -1; /* port too long */
|
|
|
|
memcpy(u->port, p, i);
|
|
|
|
u->port[i] = '\0';
|
2016-04-10 17:33:27 +00:00
|
|
|
/* check for valid port: range 1 - 65535 */
|
2015-08-22 12:58:28 +00:00
|
|
|
errno = 0;
|
|
|
|
l = strtoul(u->port, &endptr, 10);
|
2016-04-10 17:33:27 +00:00
|
|
|
if (errno || u->port[0] == '\0' || *endptr ||
|
|
|
|
!l || l > 65535)
|
2015-08-22 12:58:28 +00:00
|
|
|
return -1;
|
|
|
|
p = &p[i];
|
|
|
|
}
|
2015-07-28 19:24:06 +00:00
|
|
|
readpath:
|
|
|
|
if (u->host[0]) {
|
|
|
|
p = &p[strspn(p, "/")];
|
|
|
|
strlcpy(u->path, "/", sizeof(u->path));
|
|
|
|
} else {
|
2015-08-22 12:58:28 +00:00
|
|
|
/* absolute uri must have a host specified */
|
2015-07-28 19:24:06 +00:00
|
|
|
if (!rel)
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
/* treat truncation as an error */
|
2015-08-16 17:31:46 +00:00
|
|
|
if (strlcat(u->path, p, sizeof(u->path)) >= sizeof(u->path))
|
|
|
|
return -1;
|
|
|
|
return 0;
|
2015-06-20 22:20:12 +00:00
|
|
|
}
|
|
|
|
|
2016-04-10 18:19:29 +00:00
|
|
|
static int
|
|
|
|
encodeuri(char *buf, size_t bufsiz, const char *s)
|
|
|
|
{
|
|
|
|
static const char *table = "0123456789ABCDEF";
|
|
|
|
size_t i, b;
|
|
|
|
|
|
|
|
for (i = 0, b = 0; s[i]; i++) {
|
|
|
|
if ((int)s[i] == ' ' ||
|
|
|
|
(unsigned char)s[i] > 127 ||
|
|
|
|
iscntrl((int)s[i])) {
|
|
|
|
if (b + 3 >= bufsiz)
|
|
|
|
return -1;
|
|
|
|
buf[b++] = '%';
|
|
|
|
buf[b++] = table[((uint8_t)s[i] >> 4) & 15];
|
|
|
|
buf[b++] = table[(uint8_t)s[i] & 15];
|
|
|
|
} else if (b < bufsiz) {
|
|
|
|
buf[b++] = s[i];
|
|
|
|
} else {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (b >= bufsiz)
|
|
|
|
return -1;
|
|
|
|
buf[b] = '\0';
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-07-31 20:29:34 +00:00
|
|
|
/* Get absolute uri; if `link` is relative use `base` to make it absolute.
|
|
|
|
* the returned string in `buf` is uri encoded, see: encodeuri(). */
|
2015-07-28 19:24:06 +00:00
|
|
|
int
|
2016-04-10 18:19:29 +00:00
|
|
|
absuri(char *buf, size_t bufsiz, const char *link, const char *base)
|
2014-11-11 18:56:05 +00:00
|
|
|
{
|
2015-07-28 19:24:06 +00:00
|
|
|
struct uri ulink, ubase;
|
2015-08-22 14:18:10 +00:00
|
|
|
char tmp[4096], *host, *p, *port;
|
2015-07-28 19:24:06 +00:00
|
|
|
int r = -1, c;
|
2015-07-28 19:49:24 +00:00
|
|
|
size_t i;
|
2015-07-28 19:24:06 +00:00
|
|
|
|
|
|
|
buf[0] = '\0';
|
|
|
|
if (parseuri(base, &ubase, 0) == -1 ||
|
2015-08-22 14:18:10 +00:00
|
|
|
parseuri(link, &ulink, 1) == -1 ||
|
|
|
|
(!ulink.host[0] && !ubase.host[0]))
|
2015-07-28 19:24:06 +00:00
|
|
|
return -1;
|
|
|
|
|
2015-08-22 14:18:10 +00:00
|
|
|
if (!strncmp(link, "//", 2)) {
|
|
|
|
host = ulink.host;
|
|
|
|
port = ulink.port;
|
|
|
|
} else {
|
|
|
|
host = ulink.host[0] ? ulink.host : ubase.host;
|
|
|
|
port = ulink.port[0] ? ulink.port : ubase.port;
|
|
|
|
}
|
2015-08-22 12:58:44 +00:00
|
|
|
r = snprintf(tmp, sizeof(tmp), "%s://%s%s%s",
|
2015-07-28 19:24:06 +00:00
|
|
|
ulink.proto[0] ?
|
|
|
|
ulink.proto :
|
|
|
|
(ubase.proto[0] ? ubase.proto : "http"),
|
2015-08-22 14:18:10 +00:00
|
|
|
host,
|
2015-08-22 12:58:44 +00:00
|
|
|
port[0] ? ":" : "",
|
|
|
|
port);
|
2015-07-28 19:24:06 +00:00
|
|
|
if (r == -1 || (size_t)r >= sizeof(tmp))
|
2016-02-27 15:21:30 +00:00
|
|
|
return -1; /* error or truncation */
|
2015-07-28 19:24:06 +00:00
|
|
|
|
|
|
|
/* relative to root */
|
|
|
|
if (!ulink.host[0] && ulink.path[0] != '/') {
|
|
|
|
/* relative to base url path */
|
|
|
|
if (ulink.path[0]) {
|
|
|
|
if ((p = strrchr(ubase.path, '/'))) {
|
|
|
|
/* temporary null-terminate */
|
|
|
|
c = *(++p);
|
|
|
|
*p = '\0';
|
2015-07-28 19:49:24 +00:00
|
|
|
i = strlcat(tmp, ubase.path, sizeof(tmp));
|
2015-07-28 19:24:06 +00:00
|
|
|
*p = c; /* restore */
|
2015-07-28 19:49:24 +00:00
|
|
|
if (i >= sizeof(tmp))
|
|
|
|
return -1;
|
2015-07-28 19:24:06 +00:00
|
|
|
}
|
2015-08-16 17:31:46 +00:00
|
|
|
} else if (strlcat(tmp, ubase.path, sizeof(tmp)) >=
|
|
|
|
sizeof(tmp)) {
|
|
|
|
return -1;
|
2012-08-03 10:03:17 +00:00
|
|
|
}
|
2015-07-28 19:24:06 +00:00
|
|
|
}
|
|
|
|
if (strlcat(tmp, ulink.path, sizeof(tmp)) >= sizeof(tmp))
|
|
|
|
return -1;
|
|
|
|
|
2016-04-10 18:19:29 +00:00
|
|
|
return encodeuri(buf, bufsiz, tmp);
|
2012-08-03 10:03:17 +00:00
|
|
|
}
|
|
|
|
|
2015-07-31 20:29:34 +00:00
|
|
|
/* Read a field-separated line from 'fp',
|
2014-06-27 13:42:53 +00:00
|
|
|
* separated by a character 'separator',
|
2015-08-02 11:45:43 +00:00
|
|
|
* 'fields' is a list of pointers with a size of FieldLast (must be >0).
|
|
|
|
* 'line' buffer is allocated using malloc, 'size' will contain the allocated
|
|
|
|
* buffer size.
|
|
|
|
* returns: amount of fields read (>0) or -1 on error. */
|
2016-02-27 15:21:30 +00:00
|
|
|
size_t
|
|
|
|
parseline(char *line, char *fields[FieldLast])
|
2014-04-08 22:00:13 +00:00
|
|
|
{
|
2012-08-03 10:03:17 +00:00
|
|
|
char *prev, *s;
|
2015-08-02 11:45:43 +00:00
|
|
|
size_t i;
|
2012-08-03 10:03:17 +00:00
|
|
|
|
2016-02-27 15:21:30 +00:00
|
|
|
for (prev = line, i = 0;
|
2015-08-02 11:45:43 +00:00
|
|
|
(s = strchr(prev, '\t')) && i < FieldLast - 1;
|
2015-05-16 13:56:00 +00:00
|
|
|
i++) {
|
|
|
|
*s = '\0';
|
2012-08-03 10:03:17 +00:00
|
|
|
fields[i] = prev;
|
2015-05-16 13:56:00 +00:00
|
|
|
prev = s + 1;
|
2012-08-03 10:03:17 +00:00
|
|
|
}
|
2015-05-16 13:56:00 +00:00
|
|
|
fields[i++] = prev;
|
|
|
|
/* make non-parsed fields empty. */
|
2015-08-02 11:45:43 +00:00
|
|
|
for (; i < FieldLast; i++)
|
2015-05-16 13:56:00 +00:00
|
|
|
fields[i] = "";
|
|
|
|
|
2016-02-27 15:21:30 +00:00
|
|
|
return i;
|
2012-08-03 10:03:17 +00:00
|
|
|
}
|
|
|
|
|
2016-04-10 12:34:25 +00:00
|
|
|
/* Parse time to time_t, assumes time_t is signed, ignores fractions. */
|
2015-01-03 11:44:32 +00:00
|
|
|
int
|
|
|
|
strtotime(const char *s, time_t *t)
|
|
|
|
{
|
2016-04-10 11:55:07 +00:00
|
|
|
long long l;
|
2015-08-07 19:23:58 +00:00
|
|
|
char *e;
|
2015-01-03 11:44:32 +00:00
|
|
|
|
|
|
|
errno = 0;
|
2016-04-10 11:55:07 +00:00
|
|
|
l = strtoll(s, &e, 10);
|
|
|
|
if (errno || *s == '\0' || *e)
|
2015-01-03 11:44:32 +00:00
|
|
|
return -1;
|
2016-04-10 12:34:25 +00:00
|
|
|
/* NOTE: assumes time_t is 64-bit on 64-bit platforms:
|
|
|
|
long long (atleast 32-bit) to time_t. */
|
2015-08-02 11:45:43 +00:00
|
|
|
if (t)
|
|
|
|
*t = (time_t)l;
|
2015-01-03 11:44:32 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2015-05-24 20:46:47 +00:00
|
|
|
|
2015-08-02 11:08:12 +00:00
|
|
|
/* Escape characters below as HTML 2.0 / XML 1.0. */
|
2015-05-24 20:46:47 +00:00
|
|
|
void
|
2015-08-02 11:08:12 +00:00
|
|
|
xmlencode(const char *s, FILE *fp)
|
2015-07-31 19:06:52 +00:00
|
|
|
{
|
|
|
|
for (; *s; s++) {
|
2015-08-02 11:08:12 +00:00
|
|
|
switch(*s) {
|
|
|
|
case '<': fputs("<", fp); break;
|
|
|
|
case '>': fputs(">", fp); break;
|
2017-04-27 11:14:32 +00:00
|
|
|
case '\'': fputs("'", fp); break;
|
2015-08-02 11:08:12 +00:00
|
|
|
case '&': fputs("&", fp); break;
|
|
|
|
case '"': fputs(""", fp); break;
|
|
|
|
default: fputc(*s, fp);
|
2015-05-24 20:46:47 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-06-20 22:19:22 +00:00
|
|
|
|
2016-02-27 15:21:30 +00:00
|
|
|
/* print `len' columns of characters. If string is shorter pad the rest
|
|
|
|
* with characters `pad`. */
|
|
|
|
void
|
|
|
|
printutf8pad(FILE *fp, const char *s, size_t len, int pad)
|
|
|
|
{
|
|
|
|
wchar_t w;
|
|
|
|
size_t n = 0, i;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
for (i = 0; *s && n < len; i++, s++) {
|
|
|
|
if (ISUTF8(*s)) {
|
|
|
|
if ((r = mbtowc(&w, s, 4)) == -1)
|
|
|
|
break;
|
|
|
|
if ((r = wcwidth(w)) == -1)
|
|
|
|
r = 1;
|
|
|
|
n += (size_t)r;
|
|
|
|
}
|
|
|
|
putc(*s, fp);
|
|
|
|
}
|
|
|
|
for (; n < len; n++)
|
|
|
|
putc(pad, fp);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t
|
|
|
|
murmur3_32(const char *key, uint32_t len, uint32_t seed)
|
|
|
|
{
|
|
|
|
static const uint32_t c1 = 0xcc9e2d51;
|
|
|
|
static const uint32_t c2 = 0x1b873593;
|
|
|
|
static const uint32_t r1 = 15;
|
|
|
|
static const uint32_t r2 = 13;
|
|
|
|
static const uint32_t m = 5;
|
|
|
|
static const uint32_t n = 0xe6546b64;
|
|
|
|
uint32_t hash = seed;
|
|
|
|
const int nblocks = len / 4;
|
|
|
|
const uint32_t *blocks = (const uint32_t *) key;
|
|
|
|
int i;
|
|
|
|
uint32_t k, k1;
|
|
|
|
const uint8_t *tail;
|
|
|
|
|
|
|
|
for (i = 0; i < nblocks; i++) {
|
|
|
|
k = blocks[i];
|
|
|
|
k *= c1;
|
|
|
|
k = ROT32(k, r1);
|
|
|
|
k *= c2;
|
|
|
|
|
|
|
|
hash ^= k;
|
|
|
|
hash = ROT32(hash, r2) * m + n;
|
|
|
|
}
|
|
|
|
tail = (const uint8_t *) (key + nblocks * 4);
|
|
|
|
|
|
|
|
k1 = 0;
|
|
|
|
switch (len & 3) {
|
|
|
|
case 3:
|
|
|
|
k1 ^= tail[2] << 16;
|
|
|
|
case 2:
|
|
|
|
k1 ^= tail[1] << 8;
|
|
|
|
case 1:
|
|
|
|
k1 ^= tail[0];
|
|
|
|
|
|
|
|
k1 *= c1;
|
|
|
|
k1 = ROT32(k1, r1);
|
|
|
|
k1 *= c2;
|
|
|
|
hash ^= k1;
|
|
|
|
}
|
|
|
|
|
|
|
|
hash ^= len;
|
|
|
|
hash ^= (hash >> 16);
|
|
|
|
hash *= 0x85ebca6b;
|
|
|
|
hash ^= (hash >> 13);
|
|
|
|
hash *= 0xc2b2ae35;
|
|
|
|
hash ^= (hash >> 16);
|
|
|
|
|
|
|
|
return hash;
|
|
|
|
}
|