2022-03-27 12:57:05 +00:00
|
|
|
#include <sys/types.h>
|
|
|
|
|
2018-02-16 11:40:05 +00:00
|
|
|
#include <limits.h>
|
2017-11-19 13:05:00 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <time.h>
|
|
|
|
|
|
|
|
#include "util.h"
|
|
|
|
|
2019-07-04 19:56:31 +00:00
|
|
|
static struct feed f;
|
2020-01-18 18:19:41 +00:00
|
|
|
static char *prefixpath = "/", *host = "127.0.0.1", *port = "70"; /* default */
|
2017-11-19 13:05:00 +00:00
|
|
|
static char *line;
|
|
|
|
static size_t linesize;
|
2018-02-16 11:40:05 +00:00
|
|
|
static time_t comparetime;
|
2017-11-19 13:05:00 +00:00
|
|
|
|
2020-01-18 18:19:41 +00:00
|
|
|
/* Escape characters in gopher, CR and LF are ignored */
|
2017-11-19 13:05:00 +00:00
|
|
|
void
|
2020-01-18 18:19:41 +00:00
|
|
|
gophertext(FILE *fp, const char *s)
|
2017-11-19 13:05:00 +00:00
|
|
|
{
|
2020-01-18 18:19:41 +00:00
|
|
|
for (; *s; s++) {
|
2018-02-16 11:40:05 +00:00
|
|
|
switch (*s) {
|
2017-11-19 13:05:00 +00:00
|
|
|
case '\r': /* ignore CR */
|
|
|
|
case '\n': /* ignore LF */
|
|
|
|
break;
|
|
|
|
case '\t':
|
|
|
|
fputs(" ", fp);
|
|
|
|
break;
|
|
|
|
default:
|
2021-01-08 11:05:14 +00:00
|
|
|
putc(*s, fp);
|
2017-11-19 13:05:00 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2018-02-16 11:40:05 +00:00
|
|
|
printfeed(FILE *fpitems, FILE *fpin, struct feed *f)
|
2017-11-19 13:05:00 +00:00
|
|
|
{
|
2020-05-13 16:01:03 +00:00
|
|
|
struct uri u;
|
util: improve/refactor URI parsing and formatting
Removed/rewritten the functions:
absuri, parseuri, and encodeuri() for percent-encoding.
The functions are now split separately with the following purpose:
- uri_format: format struct uri into a string.
- uri_hasscheme: quick check if a string is absolute or not.
- uri_makeabs: make a URI absolute using a base uri and the original URI.
- uri_parse: parse a string into a struct uri.
The following URLs are better parsed:
- URLs with extra "/"'s in the path prepended are kept as is, no "/" is added
either for empty paths.
- URLs like "http://codemadness.org" are not changed to
"http://codemadness.org/" anymore (paths are kept as is, unless they are
non-empty and not start with "/").
- Paths are not percent-encoded anymore.
- URLs with userinfo field (username, password) are parsed.
like: ftp://user:password@[2001:db8::7]:2121/rfc/rfc1808.txt
- Non-authoritive URLs like mailto:some@email.org, magnet URIs, ISBN URIs/urn,
like: urn:isbn:0-395-36341-1 are allowed and parsed correctly.
- Both local (file:///) and non-local (file://) are supported.
- Specifying a base URL with a port will now only use it when the relative URL
has no host and port set and follows RFC3986 5.2.2 more closely.
- Parsing numeric port: parse as signed long and check <= 0, empty port is
allowed.
- Parsing URIs containing query, fragment, but no path separator (/) will now
parse the component properly.
For sfeed:
- Parse the baseURI only once (no need to do it every time for making absolute
URIs).
- If a link/enclosure is absolute already or if there is no base URL specified
then just print the link directly. There have also been other small performance
improvements related to handling URIs.
References:
- https://tools.ietf.org/html/rfc3986
- Section "5.2.2. Transform References" have also been helpful.
2021-02-16 17:38:56 +00:00
|
|
|
char *fields[FieldLast];
|
|
|
|
char *itemhost, *itemport, *itempath, *itemquery, *itemfragment;
|
2018-02-16 11:40:05 +00:00
|
|
|
ssize_t linelen;
|
|
|
|
unsigned int isnew;
|
2021-01-10 15:57:53 +00:00
|
|
|
struct tm rtm, *tm;
|
2017-11-19 13:05:00 +00:00
|
|
|
time_t parsedtime;
|
2020-05-13 16:01:03 +00:00
|
|
|
int itemtype;
|
2017-11-19 13:05:00 +00:00
|
|
|
|
2020-01-18 18:19:41 +00:00
|
|
|
if (f->name[0]) {
|
|
|
|
fprintf(fpitems, "i%s\t\t%s\t%s\r\n", f->name, host, port);
|
|
|
|
fprintf(fpitems, "i\t\t%s\t%s\r\n", host, port);
|
|
|
|
}
|
2018-02-16 11:40:05 +00:00
|
|
|
|
2022-03-14 18:22:42 +00:00
|
|
|
while ((linelen = getline(&line, &linesize, fpin)) > 0 &&
|
|
|
|
!ferror(fpitems)) {
|
2017-11-19 13:05:00 +00:00
|
|
|
if (line[linelen - 1] == '\n')
|
|
|
|
line[--linelen] = '\0';
|
2020-04-01 16:40:02 +00:00
|
|
|
parseline(line, fields);
|
2017-11-19 13:05:00 +00:00
|
|
|
|
2020-05-13 16:01:03 +00:00
|
|
|
itemhost = host;
|
|
|
|
itemport = port;
|
|
|
|
itemtype = 'i';
|
|
|
|
itempath = fields[FieldLink];
|
util: improve/refactor URI parsing and formatting
Removed/rewritten the functions:
absuri, parseuri, and encodeuri() for percent-encoding.
The functions are now split separately with the following purpose:
- uri_format: format struct uri into a string.
- uri_hasscheme: quick check if a string is absolute or not.
- uri_makeabs: make a URI absolute using a base uri and the original URI.
- uri_parse: parse a string into a struct uri.
The following URLs are better parsed:
- URLs with extra "/"'s in the path prepended are kept as is, no "/" is added
either for empty paths.
- URLs like "http://codemadness.org" are not changed to
"http://codemadness.org/" anymore (paths are kept as is, unless they are
non-empty and not start with "/").
- Paths are not percent-encoded anymore.
- URLs with userinfo field (username, password) are parsed.
like: ftp://user:password@[2001:db8::7]:2121/rfc/rfc1808.txt
- Non-authoritive URLs like mailto:some@email.org, magnet URIs, ISBN URIs/urn,
like: urn:isbn:0-395-36341-1 are allowed and parsed correctly.
- Both local (file:///) and non-local (file://) are supported.
- Specifying a base URL with a port will now only use it when the relative URL
has no host and port set and follows RFC3986 5.2.2 more closely.
- Parsing numeric port: parse as signed long and check <= 0, empty port is
allowed.
- Parsing URIs containing query, fragment, but no path separator (/) will now
parse the component properly.
For sfeed:
- Parse the baseURI only once (no need to do it every time for making absolute
URIs).
- If a link/enclosure is absolute already or if there is no base URL specified
then just print the link directly. There have also been other small performance
improvements related to handling URIs.
References:
- https://tools.ietf.org/html/rfc3986
- Section "5.2.2. Transform References" have also been helpful.
2021-02-16 17:38:56 +00:00
|
|
|
itemquery = "";
|
|
|
|
itemfragment = "";
|
2020-05-13 16:01:03 +00:00
|
|
|
|
2019-04-29 22:30:33 +00:00
|
|
|
if (fields[FieldLink][0]) {
|
2020-05-13 16:01:03 +00:00
|
|
|
itemtype = 'h';
|
2022-07-20 18:37:14 +00:00
|
|
|
/* if it's a gopher URL then change it into a DirEntity */
|
2020-05-13 17:27:14 +00:00
|
|
|
if (!strncmp(fields[FieldLink], "gopher://", 9) &&
|
util: improve/refactor URI parsing and formatting
Removed/rewritten the functions:
absuri, parseuri, and encodeuri() for percent-encoding.
The functions are now split separately with the following purpose:
- uri_format: format struct uri into a string.
- uri_hasscheme: quick check if a string is absolute or not.
- uri_makeabs: make a URI absolute using a base uri and the original URI.
- uri_parse: parse a string into a struct uri.
The following URLs are better parsed:
- URLs with extra "/"'s in the path prepended are kept as is, no "/" is added
either for empty paths.
- URLs like "http://codemadness.org" are not changed to
"http://codemadness.org/" anymore (paths are kept as is, unless they are
non-empty and not start with "/").
- Paths are not percent-encoded anymore.
- URLs with userinfo field (username, password) are parsed.
like: ftp://user:password@[2001:db8::7]:2121/rfc/rfc1808.txt
- Non-authoritive URLs like mailto:some@email.org, magnet URIs, ISBN URIs/urn,
like: urn:isbn:0-395-36341-1 are allowed and parsed correctly.
- Both local (file:///) and non-local (file://) are supported.
- Specifying a base URL with a port will now only use it when the relative URL
has no host and port set and follows RFC3986 5.2.2 more closely.
- Parsing numeric port: parse as signed long and check <= 0, empty port is
allowed.
- Parsing URIs containing query, fragment, but no path separator (/) will now
parse the component properly.
For sfeed:
- Parse the baseURI only once (no need to do it every time for making absolute
URIs).
- If a link/enclosure is absolute already or if there is no base URL specified
then just print the link directly. There have also been other small performance
improvements related to handling URIs.
References:
- https://tools.ietf.org/html/rfc3986
- Section "5.2.2. Transform References" have also been helpful.
2021-02-16 17:38:56 +00:00
|
|
|
uri_parse(fields[FieldLink], &u) != -1) {
|
2020-05-13 16:01:03 +00:00
|
|
|
itemhost = u.host;
|
|
|
|
itemport = u.port[0] ? u.port : "70";
|
|
|
|
itemtype = '1';
|
|
|
|
itempath = u.path;
|
util: improve/refactor URI parsing and formatting
Removed/rewritten the functions:
absuri, parseuri, and encodeuri() for percent-encoding.
The functions are now split separately with the following purpose:
- uri_format: format struct uri into a string.
- uri_hasscheme: quick check if a string is absolute or not.
- uri_makeabs: make a URI absolute using a base uri and the original URI.
- uri_parse: parse a string into a struct uri.
The following URLs are better parsed:
- URLs with extra "/"'s in the path prepended are kept as is, no "/" is added
either for empty paths.
- URLs like "http://codemadness.org" are not changed to
"http://codemadness.org/" anymore (paths are kept as is, unless they are
non-empty and not start with "/").
- Paths are not percent-encoded anymore.
- URLs with userinfo field (username, password) are parsed.
like: ftp://user:password@[2001:db8::7]:2121/rfc/rfc1808.txt
- Non-authoritive URLs like mailto:some@email.org, magnet URIs, ISBN URIs/urn,
like: urn:isbn:0-395-36341-1 are allowed and parsed correctly.
- Both local (file:///) and non-local (file://) are supported.
- Specifying a base URL with a port will now only use it when the relative URL
has no host and port set and follows RFC3986 5.2.2 more closely.
- Parsing numeric port: parse as signed long and check <= 0, empty port is
allowed.
- Parsing URIs containing query, fragment, but no path separator (/) will now
parse the component properly.
For sfeed:
- Parse the baseURI only once (no need to do it every time for making absolute
URIs).
- If a link/enclosure is absolute already or if there is no base URL specified
then just print the link directly. There have also been other small performance
improvements related to handling URIs.
References:
- https://tools.ietf.org/html/rfc3986
- Section "5.2.2. Transform References" have also been helpful.
2021-02-16 17:38:56 +00:00
|
|
|
itemquery = u.query;
|
|
|
|
itemfragment = u.fragment;
|
2020-05-13 16:01:03 +00:00
|
|
|
|
|
|
|
if (itempath[0] == '/') {
|
|
|
|
itempath++;
|
|
|
|
if (*itempath) {
|
|
|
|
itemtype = *itempath;
|
|
|
|
itempath++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-04-29 22:30:33 +00:00
|
|
|
}
|
2020-05-13 16:01:03 +00:00
|
|
|
|
2020-07-05 13:57:02 +00:00
|
|
|
parsedtime = 0;
|
|
|
|
if (!strtotime(fields[FieldUnixTimestamp], &parsedtime) &&
|
2021-01-10 15:57:53 +00:00
|
|
|
(tm = localtime_r(&parsedtime, &rtm))) {
|
2020-07-05 13:57:02 +00:00
|
|
|
isnew = (parsedtime >= comparetime) ? 1 : 0;
|
|
|
|
f->totalnew += isnew;
|
|
|
|
|
|
|
|
fprintf(fpitems, "%c%c %04d-%02d-%02d %02d:%02d ",
|
|
|
|
itemtype,
|
|
|
|
isnew ? 'N' : ' ',
|
|
|
|
tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
|
|
|
|
tm->tm_hour, tm->tm_min);
|
|
|
|
} else {
|
|
|
|
fprintf(fpitems, "%c ", itemtype);
|
|
|
|
}
|
|
|
|
f->total++;
|
|
|
|
|
2020-05-13 16:01:03 +00:00
|
|
|
gophertext(fpitems, fields[FieldTitle]);
|
|
|
|
fputs("\t", fpitems);
|
|
|
|
if (itemtype == 'h' && fields[FieldLink] == itempath)
|
|
|
|
fputs("URL:", fpitems);
|
|
|
|
gophertext(fpitems, itempath);
|
util: improve/refactor URI parsing and formatting
Removed/rewritten the functions:
absuri, parseuri, and encodeuri() for percent-encoding.
The functions are now split separately with the following purpose:
- uri_format: format struct uri into a string.
- uri_hasscheme: quick check if a string is absolute or not.
- uri_makeabs: make a URI absolute using a base uri and the original URI.
- uri_parse: parse a string into a struct uri.
The following URLs are better parsed:
- URLs with extra "/"'s in the path prepended are kept as is, no "/" is added
either for empty paths.
- URLs like "http://codemadness.org" are not changed to
"http://codemadness.org/" anymore (paths are kept as is, unless they are
non-empty and not start with "/").
- Paths are not percent-encoded anymore.
- URLs with userinfo field (username, password) are parsed.
like: ftp://user:password@[2001:db8::7]:2121/rfc/rfc1808.txt
- Non-authoritive URLs like mailto:some@email.org, magnet URIs, ISBN URIs/urn,
like: urn:isbn:0-395-36341-1 are allowed and parsed correctly.
- Both local (file:///) and non-local (file://) are supported.
- Specifying a base URL with a port will now only use it when the relative URL
has no host and port set and follows RFC3986 5.2.2 more closely.
- Parsing numeric port: parse as signed long and check <= 0, empty port is
allowed.
- Parsing URIs containing query, fragment, but no path separator (/) will now
parse the component properly.
For sfeed:
- Parse the baseURI only once (no need to do it every time for making absolute
URIs).
- If a link/enclosure is absolute already or if there is no base URL specified
then just print the link directly. There have also been other small performance
improvements related to handling URIs.
References:
- https://tools.ietf.org/html/rfc3986
- Section "5.2.2. Transform References" have also been helpful.
2021-02-16 17:38:56 +00:00
|
|
|
if (itemquery[0]) {
|
|
|
|
fputs("?", fpitems);
|
|
|
|
gophertext(fpitems, itemquery);
|
|
|
|
}
|
|
|
|
if (itemfragment[0]) {
|
|
|
|
fputs("#", fpitems);
|
|
|
|
gophertext(fpitems, itemfragment);
|
|
|
|
}
|
2020-05-13 16:01:03 +00:00
|
|
|
fprintf(fpitems, "\t%s\t%s\r\n", itemhost, itemport);
|
2017-11-19 13:05:00 +00:00
|
|
|
}
|
2020-01-18 18:19:41 +00:00
|
|
|
fputs(".\r\n", fpitems);
|
2017-11-19 13:05:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
main(int argc, char *argv[])
|
|
|
|
{
|
2018-02-16 11:40:05 +00:00
|
|
|
FILE *fpitems, *fpindex, *fp;
|
2020-01-18 18:19:41 +00:00
|
|
|
char *name, *p, path[PATH_MAX + 1];
|
2019-07-04 19:56:31 +00:00
|
|
|
int i, r;
|
2017-11-19 13:05:00 +00:00
|
|
|
|
2021-01-01 21:38:10 +00:00
|
|
|
if (argc == 1) {
|
|
|
|
if (pledge("stdio", NULL) == -1)
|
|
|
|
err(1, "pledge");
|
|
|
|
} else {
|
|
|
|
if (unveil("/", "r") == -1)
|
2021-06-01 16:33:29 +00:00
|
|
|
err(1, "unveil: /");
|
2021-01-01 21:38:10 +00:00
|
|
|
if (unveil(".", "rwc") == -1)
|
2021-06-01 16:33:29 +00:00
|
|
|
err(1, "unveil: .");
|
2021-01-01 21:38:10 +00:00
|
|
|
if (pledge("stdio rpath wpath cpath", NULL) == -1)
|
|
|
|
err(1, "pledge");
|
|
|
|
}
|
2017-11-19 13:05:00 +00:00
|
|
|
|
2022-03-14 12:25:52 +00:00
|
|
|
if ((comparetime = time(NULL)) == (time_t)-1)
|
|
|
|
errx(1, "time");
|
2017-11-19 13:05:00 +00:00
|
|
|
/* 1 day is old news */
|
|
|
|
comparetime -= 86400;
|
|
|
|
|
2020-01-18 18:19:41 +00:00
|
|
|
if ((p = getenv("SFEED_GOPHER_HOST")))
|
|
|
|
host = p;
|
|
|
|
if ((p = getenv("SFEED_GOPHER_PORT")))
|
|
|
|
port = p;
|
|
|
|
|
2017-11-19 13:05:00 +00:00
|
|
|
if (argc == 1) {
|
2019-07-04 19:56:31 +00:00
|
|
|
f.name = "";
|
|
|
|
printfeed(stdout, stdin, &f);
|
2022-03-14 18:22:42 +00:00
|
|
|
checkfileerror(stdin, "<stdin>", 'r');
|
|
|
|
checkfileerror(stdout, "<stdout>", 'w');
|
2017-11-19 13:05:00 +00:00
|
|
|
} else {
|
2020-01-18 18:19:41 +00:00
|
|
|
if ((p = getenv("SFEED_GOPHER_PATH")))
|
|
|
|
prefixpath = p;
|
2019-07-04 19:56:31 +00:00
|
|
|
|
2018-02-16 11:40:05 +00:00
|
|
|
/* write main index page */
|
2020-01-18 18:19:41 +00:00
|
|
|
if (!(fpindex = fopen("index", "wb")))
|
|
|
|
err(1, "fopen: index");
|
2018-02-16 11:40:05 +00:00
|
|
|
|
2017-11-19 13:05:00 +00:00
|
|
|
for (i = 1; i < argc; i++) {
|
2019-07-04 19:56:31 +00:00
|
|
|
memset(&f, 0, sizeof(f));
|
2018-02-16 11:40:05 +00:00
|
|
|
name = ((name = strrchr(argv[i], '/'))) ? name + 1 : argv[i];
|
2019-07-04 19:56:31 +00:00
|
|
|
f.name = name;
|
2018-02-16 11:40:05 +00:00
|
|
|
|
2017-11-19 13:05:00 +00:00
|
|
|
if (!(fp = fopen(argv[i], "r")))
|
|
|
|
err(1, "fopen: %s", argv[i]);
|
2018-02-16 11:40:05 +00:00
|
|
|
|
2020-01-18 18:19:41 +00:00
|
|
|
r = snprintf(path, sizeof(path), "%s", name);
|
2019-07-04 19:56:31 +00:00
|
|
|
if (r < 0 || (size_t)r >= sizeof(path))
|
|
|
|
errx(1, "path truncation: %s", path);
|
2018-02-16 11:40:05 +00:00
|
|
|
if (!(fpitems = fopen(path, "wb")))
|
|
|
|
err(1, "fopen");
|
2019-07-04 19:56:31 +00:00
|
|
|
printfeed(fpitems, fp, &f);
|
2022-03-14 18:22:42 +00:00
|
|
|
checkfileerror(fp, argv[i], 'r');
|
|
|
|
checkfileerror(fpitems, path, 'w');
|
2017-11-19 13:05:00 +00:00
|
|
|
fclose(fp);
|
2018-02-16 11:40:05 +00:00
|
|
|
fclose(fpitems);
|
|
|
|
|
|
|
|
/* append directory item to index */
|
2020-01-18 18:19:41 +00:00
|
|
|
fputs("1", fpindex);
|
|
|
|
gophertext(fpindex, name);
|
|
|
|
fprintf(fpindex, " (%lu/%lu)\t", f.totalnew, f.total);
|
|
|
|
gophertext(fpindex, prefixpath);
|
|
|
|
gophertext(fpindex, path);
|
|
|
|
fprintf(fpindex, "\t%s\t%s\r\n", host, port);
|
2017-11-19 13:05:00 +00:00
|
|
|
}
|
2020-01-18 18:19:41 +00:00
|
|
|
fputs(".\r\n", fpindex);
|
2022-03-14 18:22:42 +00:00
|
|
|
checkfileerror(fpindex, "index", 'w');
|
2018-02-16 11:40:05 +00:00
|
|
|
fclose(fpindex);
|
2017-11-19 13:05:00 +00:00
|
|
|
}
|
2018-02-16 11:40:05 +00:00
|
|
|
|
2017-11-19 13:05:00 +00:00
|
|
|
return 0;
|
|
|
|
}
|