sfeed_frames: overhaul

sfeed_frames used to write HTML pages for each entry for each feed. This can
be useful but had security issues, because the context of the content changes.

sfeed_frames is now a HTML version which works better with browsers that don't
support CSS or tables well like w3m and lynx. It is now an alternative for
sfeed_html.

- Don't reference and embed HTML content for security reasons. This was
  documented under "SECURITY CONSIDERATIONS" in the man page.
- Tighten pledge(2).
- Simplify
This commit is contained in:
Hiltjo Posthuma 2018-08-16 14:16:58 +02:00
parent a3b6627ae9
commit b7e288a964
2 changed files with 36 additions and 203 deletions

View File

@ -1,4 +1,4 @@
.Dd August 5, 2015
.Dd August 16, 2018
.Dt SFEED_FRAMES 1
.Os
.Sh NAME
@ -14,39 +14,24 @@ formats feed data (TSV) from
to HTML.
It reads TSV data from stdin or
.Ar file
and writes HTML files to the current directory.
and writes HTML files for the frameset to the current directory.
If no
.Ar file
parameters are specified and therefore the data is read from stdin then the
feed name is named "unnamed".
menu.html file is not written.
.Sh FILES WRITTEN
.Bl -tag -width 13n
.It index.html
The main HTML file referencing to the frames items.html and
menu.html.
The main HTML file referencing to the frames items.html and menu.html.
.It items.html
Contains all the items as HTML links to the local content.
The items frame contains all the item HTML links to the remote content.
.It menu.html
Menu frame which contains navigation "anchor" links to the feed names
in items.html.
The menu frame which contains navigation "anchor" links to the feed names in
items.html.
.El
.Sh FILE STRUCTURE
Items for each feed category are in the format: feedname/itemname.html.
The feedname and item names are normalized, whitespace characters are replaced
with a - character, multiple whitespaces are replaced by a single - character
and trailing whitespace will be removed.
The itemname is based on the title of the items.
The feedname and title is truncated to a maximum of 128 bytes.
The maximum length of the path is PATH_MAX or filesystem-specific (truncated).
.Sh SEE ALSO
.Xr sfeed 1 ,
.Xr sfeed_html 1 ,
.Xr sfeed_plain 1
.Sh AUTHORS
.An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org
.Sh SECURITY CONSIDERATIONS
Each item content file contains the content formatted as HTML, if the feed data
contains HTML like Javascripts, tracking cookies, custom styles and such
these will also be displayed.
Due to the crazy nature of "the web" these things are complex to filter.
Some security and privacy can be gained by using an adblocker, script blocker
and to set your browser settings more strictly.

View File

@ -12,120 +12,23 @@
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <utime.h>
#include "util.h"
static struct feed **feeds;
static char *line;
static size_t linesize;
static struct timespec times[2];
static time_t comparetime;
static unsigned long totalnew;
/* Unescape / decode fields printed by string_print_encoded()
* "\\" to "\", "\t", to TAB, "\n" to newline. Unrecognised escape sequences
* are ignored: "\z" etc. */
static void
printcontent(const char *s, FILE *fp)
{
for (; *s; s++) {
switch (*s) {
case '\\':
switch (*(++s)) {
case '\0': return; /* ignore */
case '\\': fputc('\\', fp); break;
case 't': fputc('\t', fp); break;
case 'n': fputc('\n', fp); break;
}
break;
default:
fputc((int)*s, fp);
}
}
}
/* Unescape / decode fields printed by string_print_encoded()
* "\\" to "\", "\t", to TAB, "\n" to newline. Unrecognised escape sequences
* are ignored: "\z" etc. Encode HTML 2.0 / XML 1.0 entities. */
static void
printcontentxml(const char *s, FILE *fp)
{
for (; *s; s++) {
switch (*s) {
case '\\':
switch (*(++s)) {
case '\0': return; /* ignore */
case '\\': fputc('\\', fp); break;
case 't': fputc('\t', fp); break;
case 'n': fputc('\n', fp); break;
}
break;
/* XML entities */
case '<': fputs("&lt;", fp); break;
case '>': fputs("&gt;", fp); break;
case '\'': fputs("&#39;", fp); break;
case '&': fputs("&amp;", fp); break;
case '"': fputs("&quot;", fp); break;
default: fputc((int)*s, fp);
}
}
}
/* normalize path names, transform to lower-case and replace non-alpha and
* non-digit with '-' */
static size_t
normalizepath(const char *path, char *buf, size_t bufsiz)
{
size_t i, r = 0;
for (i = 0; *path && i < bufsiz; path++) {
if (isalpha((int)*path) || isdigit((int)*path)) {
buf[i++] = tolower((int)*path);
r = 0;
} else {
/* don't repeat '-', don't start with '-' */
if (!r && i)
buf[i++] = '-';
r++;
}
}
/* remove trailing '-' */
for (; i > 0 && (buf[i - 1] == '-'); i--)
;
if (bufsiz > 0)
buf[i] = '\0';
return i;
}
static void
printfeed(FILE *fpitems, FILE *fpin, struct feed *f)
{
char dirpath[PATH_MAX], filepath[PATH_MAX];
char *fields[FieldLast], *feedname, name[128];
char *fields[FieldLast];
ssize_t linelen;
FILE *fpcontent = NULL;
unsigned int isnew;
struct tm *tm;
time_t parsedtime;
int fd, r;
if (f->name[0])
feedname = f->name;
else
feedname = "unnamed";
/* make directory for feedname */
if (!normalizepath(feedname, name, sizeof(name)))
return;
strlcpy(dirpath, name, sizeof(dirpath));
/* error creating directory and it doesn't exist. */
if (mkdir(dirpath, S_IRWXU | S_IRWXG | S_IRWXO) == -1 && errno != EEXIST)
err(1, "mkdir: %s", dirpath);
/* menu if not unnamed */
if (f->name[0]) {
@ -150,68 +53,6 @@ printfeed(FILE *fpitems, FILE *fpin, struct feed *f)
if (!(tm = localtime(&parsedtime)))
err(1, "localtime");
if (!normalizepath(fields[FieldTitle], name, sizeof(name)))
continue;
r = snprintf(filepath, sizeof(filepath), "%s/%s-%lld.html",
dirpath, name, (long long)parsedtime);
if (r == -1 || (size_t)r >= sizeof(filepath))
errx(1, "snprintf: path truncation: '%s/%s-%lld.html'",
dirpath, name, (long long)parsedtime);
/* content file doesn't exist yet and has error? */
if ((fd = open(filepath, O_CREAT | O_EXCL | O_WRONLY,
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) == -1) {
if (errno != EEXIST)
err(1, "open: %s", filepath);
} else {
if (!(fpcontent = fdopen(fd, "wb")))
err(1, "fdopen: %s", filepath);
fputs("<html><head>"
"<link rel=\"stylesheet\" type=\"text/css\" href=\"../../style.css\" />"
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />"
"</head>\n<body class=\"frame\">"
"<div class=\"content\"><h2>", fpcontent);
if (fields[FieldLink][0]) {
fputs("<a href=\"", fpcontent);
xmlencode(fields[FieldLink], fpcontent);
fputs("\">", fpcontent);
}
xmlencode(fields[FieldTitle], fpcontent);
if (fields[FieldLink][0])
fputs("</a>", fpcontent);
fputs("</h2>", fpcontent);
/* NOTE: this prints the raw HTML of the feed, this is
* potentially dangerous, it is left up to the
* user / browser to trust a feed it's HTML content. */
if (!strcmp(fields[FieldContentType], "html")) {
printcontent(fields[FieldContent], fpcontent);
} else {
/* plain-text, wrap with <pre> */
fputs("<pre>", fpcontent);
printcontentxml(fields[FieldContent], fpcontent);
fputs("</pre>", fpcontent);
}
fputs("</div></body></html>\n", fpcontent);
/* set modified and access time of file to time of item. */
if (parsedtime) {
/* flush writes before setting atime and mtime
else the remaining (buffered) write can occur at
fclose() and overwrite our time again. */
fflush(fpcontent);
times[0].tv_sec = parsedtime;
times[1].tv_sec = parsedtime;
if (futimens(fd, times) == -1)
err(1, "futimens");
}
fclose(fpcontent);
}
isnew = (parsedtime >= comparetime) ? 1 : 0;
totalnew += isnew;
f->totalnew += isnew;
@ -223,11 +64,15 @@ printfeed(FILE *fpitems, FILE *fpin, struct feed *f)
if (isnew)
fputs("<b><u>", fpitems);
fputs("<a href=\"", fpitems);
fputs(filepath, fpitems);
fputs("\" target=\"content\">", fpitems);
xmlencode(fields[FieldTitle], fpitems);
fputs("</a>", fpitems);
if (fields[FieldLink][0]) {
fputs("<a href=\"", fpitems);
xmlencode(fields[FieldLink], fpitems);
fputs("\">", fpitems);
xmlencode(fields[FieldTitle], fpitems);
fputs("</a>", fpitems);
} else {
xmlencode(fields[FieldTitle], fpitems);
}
if (isnew)
fputs("</u></b>", fpitems);
fputs("\n", fpitems);
@ -237,12 +82,12 @@ printfeed(FILE *fpitems, FILE *fpin, struct feed *f)
int
main(int argc, char *argv[])
{
FILE *fpindex, *fpitems, *fpmenu, *fp;
FILE *fpindex, *fpitems, *fpmenu = NULL, *fp;
char *name;
int i, showsidebar = (argc > 1);
struct feed *f;
if (pledge("stdio rpath wpath cpath fattr", NULL) == -1)
if (pledge("stdio rpath wpath cpath", NULL) == -1)
err(1, "pledge");
if (!(feeds = calloc(argc, sizeof(struct feed *))))
@ -256,11 +101,15 @@ main(int argc, char *argv[])
/* write main index page */
if (!(fpindex = fopen("index.html", "wb")))
err(1, "fopen: index.html");
if (!(fpmenu = fopen("menu.html", "wb")))
err(1, "fopen: menu.html");
if (!(fpitems = fopen("items.html", "wb")))
err(1, "fopen: items.html");
fputs("<html><head><link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" />"
if (showsidebar && !(fpmenu = fopen("menu.html", "wb")))
err(1, "fopen: menu.html");
if (pledge("stdio rpath", NULL) == -1)
err(1, "pledge");
fputs("<html><head><link rel=\"stylesheet\" type=\"text/css\" href=\"style.css\" />"
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /></head>"
"<body class=\"frame\"><div id=\"items\"><pre>", fpitems);
@ -288,7 +137,7 @@ main(int argc, char *argv[])
if (showsidebar) {
fputs("<html><head>"
"<link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" />\n"
"<link rel=\"stylesheet\" type=\"text/css\" href=\"style.css\" />\n"
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n"
"</head><body class=\"frame\"><div id=\"sidebar\">", fpmenu);
@ -312,25 +161,24 @@ main(int argc, char *argv[])
}
fputs("<!DOCTYPE html><html><head>\n\t<title>Newsfeed (", fpindex);
fprintf(fpindex, "%lu", totalnew);
fputs(")</title>\n\t<link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" />\n"
fputs(")</title>\n\t<link rel=\"stylesheet\" type=\"text/css\" href=\"style.css\" />\n"
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n"
"</head>\n", fpindex);
if (showsidebar) {
fputs("<frameset framespacing=\"0\" cols=\"200,*\" frameborder=\"1\">\n"
" <frame name=\"menu\" src=\"menu.html\" target=\"menu\">\n", fpindex);
fputs("<frameset framespacing=\"0\" cols=\"250,*\" frameborder=\"1\">\n"
"\t<frame name=\"menu\" src=\"menu.html\" target=\"menu\">\n", fpindex);
} else {
fputs("<frameset framespacing=\"0\" cols=\"*\" frameborder=\"1\">\n", fpindex);
}
fputs("\t<frameset id=\"frameset\" framespacing=\"0\" cols=\"50%,50%\" frameborder=\"1\">\n"
"\t\t<frame name=\"items\" src=\"items.html\" target=\"items\">\n"
"\t\t<frame name=\"content\" target=\"content\">\n"
"\t</frameset>\n"
fputs(
"\t<frame name=\"items\" src=\"items.html\" target=\"items\">\n"
"</frameset>\n"
"</html>\n", fpindex);
fclose(fpitems);
fclose(fpmenu);
fclose(fpindex);
fclose(fpitems);
if (fpmenu)
fclose(fpmenu);
return 0;
}