2014-11-11 18:56:05 +00:00
|
|
|
#include <ctype.h>
|
2015-01-03 11:44:32 +00:00
|
|
|
#include <errno.h>
|
2012-08-03 10:03:17 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
2014-11-11 18:56:05 +00:00
|
|
|
#include <string.h>
|
2016-02-27 15:21:30 +00:00
|
|
|
#include <wchar.h>
|
2013-01-20 18:14:34 +00:00
|
|
|
|
2014-03-31 20:46:58 +00:00
|
|
|
#include "util.h"
|
|
|
|
|
2015-07-28 19:24:06 +00:00
|
|
|
int
|
|
|
|
parseuri(const char *s, struct uri *u, int rel)
|
|
|
|
{
|
2015-08-22 12:58:28 +00:00
|
|
|
const char *p = s, *b;
|
|
|
|
char *endptr = NULL;
|
2015-06-20 22:20:12 +00:00
|
|
|
size_t i;
|
2015-08-22 12:58:28 +00:00
|
|
|
unsigned long l;
|
2015-06-20 22:20:12 +00:00
|
|
|
|
2015-08-22 12:58:28 +00:00
|
|
|
u->proto[0] = u->host[0] = u->path[0] = u->port[0] = '\0';
|
2015-07-28 19:24:06 +00:00
|
|
|
if (!*s)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* prefix is "//", don't read protocol, skip to domain parsing */
|
|
|
|
if (!strncmp(p, "//", 2)) {
|
|
|
|
p += 2; /* skip "//" */
|
|
|
|
} else {
|
|
|
|
/* protocol part */
|
2020-10-12 16:49:56 +00:00
|
|
|
for (p = s; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
|
|
|
|
*p == '+' || *p == '-' || *p == '.'; p++)
|
2015-07-28 19:24:06 +00:00
|
|
|
;
|
|
|
|
if (!strncmp(p, "://", 3)) {
|
2018-02-18 13:38:37 +00:00
|
|
|
if ((size_t)(p - s) >= sizeof(u->proto))
|
2015-07-28 19:24:06 +00:00
|
|
|
return -1; /* protocol too long */
|
|
|
|
memcpy(u->proto, s, p - s);
|
2015-08-10 20:02:19 +00:00
|
|
|
u->proto[p - s] = '\0';
|
2015-07-28 19:24:06 +00:00
|
|
|
p += 3; /* skip "://" */
|
|
|
|
} else {
|
|
|
|
p = s; /* no protocol format, set to start */
|
|
|
|
/* relative url: read rest as path, else as domain */
|
|
|
|
if (rel)
|
|
|
|
goto readpath;
|
|
|
|
}
|
2015-06-20 22:20:12 +00:00
|
|
|
}
|
2015-08-22 12:58:28 +00:00
|
|
|
/* IPv6 address */
|
|
|
|
if (*p == '[') {
|
|
|
|
/* bracket not found or host too long */
|
2019-04-21 09:54:44 +00:00
|
|
|
if (!(b = strchr(p, ']')) || (size_t)(b - p) < 3 ||
|
|
|
|
(size_t)(b - p) >= sizeof(u->host))
|
2015-08-22 12:58:28 +00:00
|
|
|
return -1;
|
2019-04-21 09:54:44 +00:00
|
|
|
memcpy(u->host, p, b - p + 1);
|
|
|
|
u->host[b - p + 1] = '\0';
|
2015-08-22 12:58:28 +00:00
|
|
|
p = b + 1;
|
|
|
|
} else {
|
|
|
|
/* domain / host part, skip until port, path or end. */
|
|
|
|
if ((i = strcspn(p, ":/")) >= sizeof(u->host))
|
|
|
|
return -1; /* host too long */
|
|
|
|
memcpy(u->host, p, i);
|
|
|
|
u->host[i] = '\0';
|
|
|
|
p = &p[i];
|
|
|
|
}
|
|
|
|
/* port */
|
|
|
|
if (*p == ':') {
|
|
|
|
if ((i = strcspn(++p, "/")) >= sizeof(u->port))
|
|
|
|
return -1; /* port too long */
|
|
|
|
memcpy(u->port, p, i);
|
|
|
|
u->port[i] = '\0';
|
2016-04-10 17:33:27 +00:00
|
|
|
/* check for valid port: range 1 - 65535 */
|
2015-08-22 12:58:28 +00:00
|
|
|
errno = 0;
|
|
|
|
l = strtoul(u->port, &endptr, 10);
|
2016-04-10 17:33:27 +00:00
|
|
|
if (errno || u->port[0] == '\0' || *endptr ||
|
|
|
|
!l || l > 65535)
|
2015-08-22 12:58:28 +00:00
|
|
|
return -1;
|
|
|
|
p = &p[i];
|
|
|
|
}
|
2015-07-28 19:24:06 +00:00
|
|
|
readpath:
|
|
|
|
if (u->host[0]) {
|
|
|
|
p = &p[strspn(p, "/")];
|
|
|
|
strlcpy(u->path, "/", sizeof(u->path));
|
|
|
|
} else {
|
2015-08-22 12:58:28 +00:00
|
|
|
/* absolute uri must have a host specified */
|
2015-07-28 19:24:06 +00:00
|
|
|
if (!rel)
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
/* treat truncation as an error */
|
2015-08-16 17:31:46 +00:00
|
|
|
if (strlcat(u->path, p, sizeof(u->path)) >= sizeof(u->path))
|
|
|
|
return -1;
|
|
|
|
return 0;
|
2015-06-20 22:20:12 +00:00
|
|
|
}
|
|
|
|
|
2016-04-10 18:19:29 +00:00
|
|
|
static int
|
|
|
|
encodeuri(char *buf, size_t bufsiz, const char *s)
|
|
|
|
{
|
|
|
|
static const char *table = "0123456789ABCDEF";
|
|
|
|
size_t i, b;
|
|
|
|
|
|
|
|
for (i = 0, b = 0; s[i]; i++) {
|
2020-05-27 18:33:39 +00:00
|
|
|
if ((unsigned char)s[i] <= ' ' ||
|
|
|
|
(unsigned char)s[i] >= 127) {
|
2016-04-10 18:19:29 +00:00
|
|
|
if (b + 3 >= bufsiz)
|
|
|
|
return -1;
|
|
|
|
buf[b++] = '%';
|
2018-09-07 17:01:49 +00:00
|
|
|
buf[b++] = table[((unsigned char)s[i] >> 4) & 15];
|
|
|
|
buf[b++] = table[(unsigned char)s[i] & 15];
|
2016-04-10 18:19:29 +00:00
|
|
|
} else if (b < bufsiz) {
|
|
|
|
buf[b++] = s[i];
|
|
|
|
} else {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (b >= bufsiz)
|
|
|
|
return -1;
|
|
|
|
buf[b] = '\0';
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-07-31 20:29:34 +00:00
|
|
|
/* Get absolute uri; if `link` is relative use `base` to make it absolute.
|
|
|
|
* the returned string in `buf` is uri encoded, see: encodeuri(). */
|
2015-07-28 19:24:06 +00:00
|
|
|
int
|
2016-04-10 18:19:29 +00:00
|
|
|
absuri(char *buf, size_t bufsiz, const char *link, const char *base)
|
2014-11-11 18:56:05 +00:00
|
|
|
{
|
2015-07-28 19:24:06 +00:00
|
|
|
struct uri ulink, ubase;
|
2015-08-22 14:18:10 +00:00
|
|
|
char tmp[4096], *host, *p, *port;
|
2019-04-21 09:54:44 +00:00
|
|
|
int c, r;
|
2015-07-28 19:49:24 +00:00
|
|
|
size_t i;
|
2015-07-28 19:24:06 +00:00
|
|
|
|
|
|
|
buf[0] = '\0';
|
|
|
|
if (parseuri(base, &ubase, 0) == -1 ||
|
2015-08-22 14:18:10 +00:00
|
|
|
parseuri(link, &ulink, 1) == -1 ||
|
|
|
|
(!ulink.host[0] && !ubase.host[0]))
|
2015-07-28 19:24:06 +00:00
|
|
|
return -1;
|
|
|
|
|
2015-08-22 14:18:10 +00:00
|
|
|
if (!strncmp(link, "//", 2)) {
|
|
|
|
host = ulink.host;
|
|
|
|
port = ulink.port;
|
|
|
|
} else {
|
|
|
|
host = ulink.host[0] ? ulink.host : ubase.host;
|
|
|
|
port = ulink.port[0] ? ulink.port : ubase.port;
|
|
|
|
}
|
2015-08-22 12:58:44 +00:00
|
|
|
r = snprintf(tmp, sizeof(tmp), "%s://%s%s%s",
|
2015-07-28 19:24:06 +00:00
|
|
|
ulink.proto[0] ?
|
|
|
|
ulink.proto :
|
|
|
|
(ubase.proto[0] ? ubase.proto : "http"),
|
2015-08-22 14:18:10 +00:00
|
|
|
host,
|
2015-08-22 12:58:44 +00:00
|
|
|
port[0] ? ":" : "",
|
|
|
|
port);
|
2019-03-08 17:26:30 +00:00
|
|
|
if (r < 0 || (size_t)r >= sizeof(tmp))
|
2016-02-27 15:21:30 +00:00
|
|
|
return -1; /* error or truncation */
|
2015-07-28 19:24:06 +00:00
|
|
|
|
|
|
|
/* relative to root */
|
|
|
|
if (!ulink.host[0] && ulink.path[0] != '/') {
|
|
|
|
/* relative to base url path */
|
|
|
|
if (ulink.path[0]) {
|
|
|
|
if ((p = strrchr(ubase.path, '/'))) {
|
|
|
|
/* temporary null-terminate */
|
|
|
|
c = *(++p);
|
|
|
|
*p = '\0';
|
2015-07-28 19:49:24 +00:00
|
|
|
i = strlcat(tmp, ubase.path, sizeof(tmp));
|
2015-07-28 19:24:06 +00:00
|
|
|
*p = c; /* restore */
|
2015-07-28 19:49:24 +00:00
|
|
|
if (i >= sizeof(tmp))
|
|
|
|
return -1;
|
2015-07-28 19:24:06 +00:00
|
|
|
}
|
2015-08-16 17:31:46 +00:00
|
|
|
} else if (strlcat(tmp, ubase.path, sizeof(tmp)) >=
|
|
|
|
sizeof(tmp)) {
|
|
|
|
return -1;
|
2012-08-03 10:03:17 +00:00
|
|
|
}
|
2015-07-28 19:24:06 +00:00
|
|
|
}
|
|
|
|
if (strlcat(tmp, ulink.path, sizeof(tmp)) >= sizeof(tmp))
|
|
|
|
return -1;
|
|
|
|
|
2016-04-10 18:19:29 +00:00
|
|
|
return encodeuri(buf, bufsiz, tmp);
|
2012-08-03 10:03:17 +00:00
|
|
|
}
|
|
|
|
|
2020-04-01 16:40:02 +00:00
|
|
|
/* Splits fields in the line buffer by replacing TAB separators with NUL ('\0')
|
|
|
|
* terminators and assign these fields as pointers. If there are less fields
|
|
|
|
* than expected then the field is an empty string constant. */
|
|
|
|
void
|
2016-02-27 15:21:30 +00:00
|
|
|
parseline(char *line, char *fields[FieldLast])
|
2014-04-08 22:00:13 +00:00
|
|
|
{
|
2012-08-03 10:03:17 +00:00
|
|
|
char *prev, *s;
|
2015-08-02 11:45:43 +00:00
|
|
|
size_t i;
|
2012-08-03 10:03:17 +00:00
|
|
|
|
2016-02-27 15:21:30 +00:00
|
|
|
for (prev = line, i = 0;
|
2015-08-02 11:45:43 +00:00
|
|
|
(s = strchr(prev, '\t')) && i < FieldLast - 1;
|
2015-05-16 13:56:00 +00:00
|
|
|
i++) {
|
|
|
|
*s = '\0';
|
2012-08-03 10:03:17 +00:00
|
|
|
fields[i] = prev;
|
2015-05-16 13:56:00 +00:00
|
|
|
prev = s + 1;
|
2012-08-03 10:03:17 +00:00
|
|
|
}
|
2015-05-16 13:56:00 +00:00
|
|
|
fields[i++] = prev;
|
|
|
|
/* make non-parsed fields empty. */
|
2015-08-02 11:45:43 +00:00
|
|
|
for (; i < FieldLast; i++)
|
2015-05-16 13:56:00 +00:00
|
|
|
fields[i] = "";
|
2012-08-03 10:03:17 +00:00
|
|
|
}
|
|
|
|
|
2016-04-10 12:34:25 +00:00
|
|
|
/* Parse time to time_t, assumes time_t is signed, ignores fractions. */
|
2015-01-03 11:44:32 +00:00
|
|
|
int
|
|
|
|
strtotime(const char *s, time_t *t)
|
|
|
|
{
|
2016-04-10 11:55:07 +00:00
|
|
|
long long l;
|
2015-08-07 19:23:58 +00:00
|
|
|
char *e;
|
2015-01-03 11:44:32 +00:00
|
|
|
|
|
|
|
errno = 0;
|
2016-04-10 11:55:07 +00:00
|
|
|
l = strtoll(s, &e, 10);
|
|
|
|
if (errno || *s == '\0' || *e)
|
2015-01-03 11:44:32 +00:00
|
|
|
return -1;
|
2016-04-10 12:34:25 +00:00
|
|
|
/* NOTE: assumes time_t is 64-bit on 64-bit platforms:
|
|
|
|
long long (atleast 32-bit) to time_t. */
|
2015-08-02 11:45:43 +00:00
|
|
|
if (t)
|
|
|
|
*t = (time_t)l;
|
2015-01-03 11:44:32 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2015-05-24 20:46:47 +00:00
|
|
|
|
2015-08-02 11:08:12 +00:00
|
|
|
/* Escape characters below as HTML 2.0 / XML 1.0. */
|
2015-05-24 20:46:47 +00:00
|
|
|
void
|
2015-08-02 11:08:12 +00:00
|
|
|
xmlencode(const char *s, FILE *fp)
|
2015-07-31 19:06:52 +00:00
|
|
|
{
|
2019-04-23 22:19:42 +00:00
|
|
|
for (; *s; ++s) {
|
|
|
|
switch (*s) {
|
2015-08-02 11:08:12 +00:00
|
|
|
case '<': fputs("<", fp); break;
|
|
|
|
case '>': fputs(">", fp); break;
|
2017-04-27 11:14:32 +00:00
|
|
|
case '\'': fputs("'", fp); break;
|
2015-08-02 11:08:12 +00:00
|
|
|
case '&': fputs("&", fp); break;
|
|
|
|
case '"': fputs(""", fp); break;
|
|
|
|
default: fputc(*s, fp);
|
2015-05-24 20:46:47 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-06-20 22:19:22 +00:00
|
|
|
|
2018-06-24 11:10:14 +00:00
|
|
|
/* print `len' columns of characters. If string is shorter pad the rest with
|
|
|
|
* characters `pad`. */
|
2016-02-27 15:21:30 +00:00
|
|
|
void
|
|
|
|
printutf8pad(FILE *fp, const char *s, size_t len, int pad)
|
|
|
|
{
|
2018-06-24 11:10:14 +00:00
|
|
|
wchar_t wc;
|
2017-06-29 17:03:20 +00:00
|
|
|
size_t col = 0, i, slen;
|
2018-06-24 11:10:14 +00:00
|
|
|
int rl, w;
|
2016-02-27 15:21:30 +00:00
|
|
|
|
2017-06-29 17:03:20 +00:00
|
|
|
if (!len)
|
|
|
|
return;
|
|
|
|
|
|
|
|
slen = strlen(s);
|
2018-06-24 11:10:14 +00:00
|
|
|
for (i = 0; i < slen; i += rl) {
|
2020-03-09 18:16:52 +00:00
|
|
|
rl = w = 1;
|
|
|
|
if ((unsigned char)s[i] < 32)
|
2018-06-24 11:10:14 +00:00
|
|
|
continue;
|
2020-03-09 18:16:52 +00:00
|
|
|
if ((unsigned char)s[i] >= 127) {
|
|
|
|
if ((rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i : 4)) <= 0)
|
|
|
|
break;
|
|
|
|
if ((w = wcwidth(wc)) == -1)
|
|
|
|
continue;
|
|
|
|
}
|
2018-06-24 11:10:14 +00:00
|
|
|
if (col + w > len || (col + w == len && s[i + rl])) {
|
2017-06-29 17:03:20 +00:00
|
|
|
fputs("\xe2\x80\xa6", fp);
|
2018-06-24 11:10:14 +00:00
|
|
|
col++;
|
2017-06-29 17:03:20 +00:00
|
|
|
break;
|
2016-02-27 15:21:30 +00:00
|
|
|
}
|
2017-06-29 17:03:20 +00:00
|
|
|
fwrite(&s[i], 1, rl, fp);
|
2018-06-24 11:10:14 +00:00
|
|
|
col += w;
|
2016-02-27 15:21:30 +00:00
|
|
|
}
|
2018-06-24 11:10:14 +00:00
|
|
|
for (; col < len; ++col)
|
2016-02-27 15:21:30 +00:00
|
|
|
putc(pad, fp);
|
|
|
|
}
|