2018-04-27 13:20:52 +00:00
|
|
|
// This file is part of Snownews - A lightweight console RSS newsreader
|
|
|
|
//
|
|
|
|
// Copyright (c) 2003-2009 Oliver Feiler <kiza@kcore.de>
|
|
|
|
// Copyright (c) 2003-2009 Rene Puls <rpuls@gmx.net>
|
2021-04-11 16:52:08 +00:00
|
|
|
// Copyright (c) 2021 Mike Sharov <msharov@users.sourceforge.net>
|
2018-04-27 13:20:52 +00:00
|
|
|
//
|
|
|
|
// Snownews is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU General Public License version 3
|
|
|
|
// as published by the Free Software Foundation.
|
|
|
|
//
|
|
|
|
// Snownews is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty
|
|
|
|
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU General Public License
|
|
|
|
// along with Snownews. If not, see http://www.gnu.org/licenses/.
|
2018-04-25 15:17:06 +00:00
|
|
|
|
2018-04-30 15:15:42 +00:00
|
|
|
#include "main.h"
|
2021-04-11 17:12:53 +00:00
|
|
|
#include "conv.h"
|
|
|
|
#include "uiutil.h"
|
|
|
|
#include <ctype.h>
|
2015-07-02 16:27:59 +00:00
|
|
|
#include <iconv.h>
|
|
|
|
#include <libxml/HTMLparser.h>
|
|
|
|
#include <langinfo.h>
|
2021-04-11 16:52:08 +00:00
|
|
|
#include <openssl/evp.h>
|
|
|
|
#include <openssl/md5.h>
|
2015-07-02 16:27:59 +00:00
|
|
|
|
2018-04-30 15:15:42 +00:00
|
|
|
//----------------------------------------------------------------------
|
2015-07-02 16:27:59 +00:00
|
|
|
|
2018-04-25 15:17:06 +00:00
|
|
|
static int calcAgeInDays (const struct tm* t);
|
2015-07-02 16:27:59 +00:00
|
|
|
|
2018-04-30 15:15:42 +00:00
|
|
|
//----------------------------------------------------------------------
|
|
|
|
|
2021-04-11 17:12:53 +00:00
|
|
|
// This is a replacement for strsep which is not portable (missing on Solaris).
|
|
|
|
//
|
|
|
|
// http://www.winehq.com/hypermail/wine-patches/2001/11/0024.html
|
|
|
|
//
|
|
|
|
// The following function was written by Francois Gouget.
|
|
|
|
#ifdef SUN
|
|
|
|
char* strsep (char** str, const char* delims)
|
|
|
|
{
|
|
|
|
if (!*str) // No more tokens
|
|
|
|
return NULL;
|
|
|
|
char* token = *str;
|
|
|
|
while (**str) {
|
|
|
|
if (strchr (delims, **str)) {
|
|
|
|
*(*str)++ = '\0';
|
|
|
|
return token;
|
|
|
|
}
|
|
|
|
++(*str);
|
|
|
|
}
|
|
|
|
// There is no other token
|
|
|
|
*str = NULL;
|
|
|
|
return token;
|
|
|
|
}
|
|
|
|
|
|
|
|
// timegm() is not available on Solaris
|
|
|
|
static time_t timegm (struct tm* t)
|
|
|
|
{
|
|
|
|
time_t tl = mktime (t);
|
|
|
|
if (tl == -1) {
|
|
|
|
--t->tm_hour;
|
|
|
|
tl = mktime (t);
|
|
|
|
if (tl == -1)
|
|
|
|
return -1; // can't deal with output from strptime
|
|
|
|
tl += 3600;
|
|
|
|
}
|
|
|
|
struct tm* tg = gmtime (&tl);
|
|
|
|
tg->tm_isdst = 0;
|
|
|
|
time_t tb = mktime (tg);
|
|
|
|
if (tb == -1) {
|
|
|
|
--tg->tm_hour;
|
|
|
|
tb = mktime (tg);
|
|
|
|
if (tb == -1)
|
|
|
|
return -1; // can't deal with output from gmtime
|
|
|
|
tb += 3600;
|
|
|
|
}
|
|
|
|
return tl - (tb - tl);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// strcasestr stolen from: http://www.unixpapa.com/incnote/string.html
|
|
|
|
const char* s_strcasestr (const char* a, const char* b)
|
|
|
|
{
|
|
|
|
const size_t lena = strlen (a), lenb = strlen (b);
|
|
|
|
char f[3];
|
|
|
|
snprintf (f, sizeof (f), "%c%c", tolower (*b), toupper (*b));
|
|
|
|
for (size_t l = strcspn (a, f); l != lena; l += strcspn (a + l + 1, f) + 1)
|
|
|
|
if (strncasecmp (a + l, b, lenb) == 0)
|
|
|
|
return a + l;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
char* iconvert (const char* inbuf)
|
|
|
|
{
|
|
|
|
iconv_t cd; // Iconvs conversion descriptor.
|
|
|
|
if (_settings.global_charset)
|
|
|
|
cd = iconv_open (_settings.global_charset, "UTF-8");
|
|
|
|
else {
|
|
|
|
const char* langcset = nl_langinfo (CODESET);
|
|
|
|
if (strcasecmp (langcset, "UTF-8") == 0)
|
|
|
|
return strdup (inbuf); // Already in UTF-8
|
|
|
|
char target_charset[64];
|
|
|
|
snprintf (target_charset, sizeof (target_charset), "%s//TRANSLIT", langcset);
|
|
|
|
cd = iconv_open (target_charset, "UTF-8");
|
|
|
|
}
|
|
|
|
if (cd == (iconv_t) - 1)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
size_t inbytesleft = strlen (inbuf), outbytesleft = inbytesleft;
|
|
|
|
// We need two pointers so we do not lose the string starting position.
|
|
|
|
char* outbuf = malloc (outbytesleft + 1), *outbuf_first = outbuf;
|
|
|
|
|
|
|
|
if (iconv (cd, (char **) &inbuf, &inbytesleft, &outbuf, &outbytesleft) == (size_t) -1) {
|
|
|
|
free (outbuf_first);
|
2015-07-02 16:27:59 +00:00
|
|
|
iconv_close (cd);
|
2021-04-09 21:56:41 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
*outbuf = 0;
|
|
|
|
iconv_close (cd);
|
|
|
|
return outbuf_first;
|
2015-07-02 16:27:59 +00:00
|
|
|
}
|
|
|
|
|
2018-04-27 14:47:43 +00:00
|
|
|
// UIDejunk: remove html tags from feed description and convert
|
|
|
|
// html entities to something useful if we hit them.
|
|
|
|
// This function took almost forever to get right, but at least I learned
|
|
|
|
// that html entity … has nothing to do with Lucifer's ISP, but
|
|
|
|
// instead means "..." (3 dots, "and so on...").
|
2021-04-09 21:56:41 +00:00
|
|
|
char* UIDejunk (const char* feed_description)
|
|
|
|
{
|
|
|
|
// Gracefully handle passed NULL ptr.
|
|
|
|
if (feed_description == NULL)
|
|
|
|
return strdup ("(null)");
|
|
|
|
|
|
|
|
// Make a copy and point *start to it so we can free the stuff again!
|
|
|
|
char* text = strdup (feed_description); // feed_description copy
|
|
|
|
char* start = text; // Points to first char everytime. Need to free this.
|
|
|
|
|
|
|
|
// If text begins with a tag, discard all of them.
|
|
|
|
while (1) {
|
|
|
|
if (text[0] == '<') {
|
|
|
|
strsep (&text, "<");
|
|
|
|
strsep (&text, ">");
|
|
|
|
} else
|
|
|
|
break;
|
|
|
|
if (text == NULL) {
|
|
|
|
free (start);
|
|
|
|
return strdup (_("No description available."));
|
2015-07-02 16:27:59 +00:00
|
|
|
}
|
2021-04-09 21:56:41 +00:00
|
|
|
}
|
|
|
|
char* newtext = malloc (1); // Detag'ed *text.
|
|
|
|
newtext[0] = '\0';
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
// Strip tags... tagsoup mode.
|
|
|
|
// strsep puts everything before "<" into detagged.
|
|
|
|
const char* detagged = strsep (&text, "<");
|
|
|
|
if (detagged == NULL)
|
|
|
|
break;
|
|
|
|
|
|
|
|
// Replace <p> and <br> (in all incarnations) with newlines, but only
|
|
|
|
// if there isn't already a following newline.
|
|
|
|
if (text != NULL) {
|
|
|
|
if ((strncasecmp (text, "p", 1) == 0) || (strncasecmp (text, "br", 2) == 0)) {
|
|
|
|
if ((strncasecmp (text, "br>\n", 4) != 0) && (strncasecmp (text, "br/>\n", 5) != 0) && (strncasecmp (text, "br />\n", 6) != 0) && (strncasecmp (text, "p>\n", 3) != 0)) {
|
|
|
|
newtext = realloc (newtext, strlen (newtext) + 2);
|
|
|
|
strcat (newtext, "\n");
|
2015-07-02 16:27:59 +00:00
|
|
|
}
|
2021-04-09 21:56:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
newtext = realloc (newtext, strlen (newtext) + strlen (detagged) + 1);
|
2015-07-02 16:27:59 +00:00
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
// Now append detagged to newtext.
|
|
|
|
strcat (newtext, detagged);
|
2018-04-25 15:17:06 +00:00
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
// Advance *text to next position after the closed tag.
|
|
|
|
const char* htmltag = strsep (&text, ">");
|
|
|
|
if (htmltag == NULL)
|
|
|
|
break;
|
2018-04-25 15:17:06 +00:00
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
if (s_strcasestr (htmltag, "img src") != NULL) {
|
2018-04-27 14:47:43 +00:00
|
|
|
#if 0
|
2021-04-09 21:56:41 +00:00
|
|
|
attribute = s_strcasestr (htmltag, "alt=");
|
|
|
|
if (attribute == NULL)
|
|
|
|
continue;
|
|
|
|
size_t len = strlen (attribute);
|
|
|
|
newtext = realloc (newtext, strlen (newtext) + 6 + len + 2);
|
|
|
|
strcat (newtext, "[img: ");
|
|
|
|
strncat (newtext, attribute + 5, len - 6);
|
|
|
|
strcat (newtext, "]");
|
2018-04-27 14:47:43 +00:00
|
|
|
#endif
|
2021-04-09 21:56:41 +00:00
|
|
|
newtext = realloc (newtext, strlen (newtext) + 7);
|
|
|
|
strcat (newtext, "[img] ");
|
|
|
|
}
|
2018-04-27 14:47:43 +00:00
|
|
|
#if 0
|
2021-04-09 21:56:41 +00:00
|
|
|
else if (s_strcasestr (htmltag, "a href") != NULL) {
|
|
|
|
newtext = realloc (newtext, strlen (newtext) + 8);
|
|
|
|
strcat (newtext, "[link: ");
|
|
|
|
} else if (strcasecmp (htmltag, "/a") == 0) {
|
|
|
|
newtext = realloc (newtext, strlen (newtext) + 2);
|
|
|
|
strcat (newtext, "]");
|
2015-07-02 16:27:59 +00:00
|
|
|
}
|
2021-04-09 21:56:41 +00:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
free (start);
|
2018-04-25 15:17:06 +00:00
|
|
|
|
2021-04-19 16:53:10 +00:00
|
|
|
CleanupString (newtext, false);
|
2018-04-25 15:17:06 +00:00
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
// See if there are any entities in the string at all.
|
|
|
|
if (strchr (newtext, '&') != NULL) {
|
|
|
|
text = strdup (newtext);
|
|
|
|
start = text;
|
|
|
|
free (newtext);
|
2018-04-25 15:17:06 +00:00
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
newtext = malloc (1);
|
|
|
|
newtext[0] = '\0';
|
2018-04-25 15:17:06 +00:00
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
while (1) {
|
|
|
|
// Strip HTML entities.
|
|
|
|
const char* detagged = strsep (&text, "&");
|
|
|
|
if (detagged == NULL)
|
|
|
|
break;
|
2018-04-25 15:17:06 +00:00
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
if (*detagged) {
|
|
|
|
newtext = realloc (newtext, strlen (newtext) + strlen (detagged) + 1);
|
|
|
|
strcat (newtext, detagged);
|
|
|
|
}
|
|
|
|
// Expand newtext by one char.
|
|
|
|
newtext = realloc (newtext, strlen (newtext) + 2);
|
|
|
|
// This might break if there is an & sign in the text.
|
|
|
|
const char* entity = strsep (&text, ";");
|
|
|
|
if (entity != NULL) {
|
|
|
|
// XML defined entities.
|
|
|
|
if (strcmp (entity, "amp") == 0) {
|
|
|
|
strcat (newtext, "&");
|
|
|
|
continue;
|
|
|
|
} else if (strcmp (entity, "lt") == 0) {
|
|
|
|
strcat (newtext, "<");
|
|
|
|
continue;
|
|
|
|
} else if (strcmp (entity, "gt") == 0) {
|
|
|
|
strcat (newtext, ">");
|
|
|
|
continue;
|
|
|
|
} else if (strcmp (entity, "quot") == 0) {
|
|
|
|
strcat (newtext, "\"");
|
|
|
|
continue;
|
|
|
|
} else if (strcmp (entity, "apos") == 0) {
|
|
|
|
strcat (newtext, "'");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// Decode user defined entities.
|
|
|
|
bool found = false;
|
|
|
|
for (const struct entity * cur_entity = _settings.html_entities; cur_entity; cur_entity = cur_entity->next) {
|
|
|
|
if (strcmp (entity, cur_entity->entity) == 0) {
|
|
|
|
// We have found a matching entity.
|
|
|
|
|
|
|
|
// If entity_length is more than 1 char we need to realloc
|
|
|
|
// more space in newtext.
|
|
|
|
if (cur_entity->entity_length > 1)
|
|
|
|
newtext = realloc (newtext, strlen (newtext) + cur_entity->entity_length + 1);
|
|
|
|
|
|
|
|
// Append new entity.
|
|
|
|
strcat (newtext, cur_entity->converted_entity);
|
|
|
|
|
|
|
|
// Set found flag.
|
|
|
|
found = true;
|
|
|
|
|
|
|
|
// We can now leave the for loop.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Try to parse some standard entities.
|
|
|
|
if (!found) {
|
|
|
|
wchar_t ch = 0;
|
|
|
|
// See if it was a numeric entity.
|
|
|
|
if (entity[0] == '#') {
|
|
|
|
if (entity[1] == 'x')
|
|
|
|
ch = strtoul (entity + 2, NULL, 16);
|
|
|
|
else
|
|
|
|
ch = atol (entity + 1);
|
|
|
|
} else {
|
2021-04-10 18:59:19 +00:00
|
|
|
const htmlEntityDesc* ep = htmlEntityLookup ((xmlChar*) entity);
|
2021-04-09 21:56:41 +00:00
|
|
|
if (ep)
|
|
|
|
ch = ep->value;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ch > 0) {
|
2015-07-02 16:27:59 +00:00
|
|
|
#ifdef __STDC_ISO_10646__
|
2021-04-09 21:56:41 +00:00
|
|
|
// Convert to locale encoding and append.
|
|
|
|
size_t len = strlen (newtext);
|
|
|
|
newtext = realloc (newtext, len + MB_CUR_MAX + 1);
|
|
|
|
int mblen = wctomb (newtext + len, ch);
|
|
|
|
// Only set found flag if the conversion worked.
|
|
|
|
if (mblen > 0) {
|
|
|
|
newtext[len + mblen] = '\0';
|
|
|
|
found = true;
|
|
|
|
}
|
2015-07-02 16:27:59 +00:00
|
|
|
#else
|
2021-04-09 21:56:41 +00:00
|
|
|
// Since we can't use wctomb(), just convert ASCII.
|
|
|
|
if (ch <= CHAR_MAX) {
|
|
|
|
sprintf (newtext + strlen (newtext), "%c", (char) ch);
|
|
|
|
found = true;
|
|
|
|
}
|
2015-07-02 16:27:59 +00:00
|
|
|
#endif
|
2021-04-09 21:56:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// If nothing matched so far, put text back in.
|
|
|
|
if (!found) {
|
|
|
|
// Changed into &+entity to avoid stray semicolons
|
|
|
|
// at the end of wrapped text if no entity matches.
|
|
|
|
newtext = realloc (newtext, strlen (newtext) + strlen (entity) + 2);
|
|
|
|
strcat (newtext, "&");
|
|
|
|
strcat (newtext, entity);
|
2015-07-02 16:27:59 +00:00
|
|
|
}
|
2021-04-09 21:56:41 +00:00
|
|
|
} else
|
|
|
|
break;
|
2018-04-25 15:17:06 +00:00
|
|
|
}
|
2021-04-09 21:56:41 +00:00
|
|
|
free (start);
|
|
|
|
}
|
|
|
|
return newtext;
|
2015-07-02 16:27:59 +00:00
|
|
|
}
|
|
|
|
|
2018-04-27 14:47:43 +00:00
|
|
|
// 5th try at a wrap text functions.
|
|
|
|
// My first version was broken, my second one sucked, my third try was
|
|
|
|
// so overcomplicated I didn't understand it anymore... Kianga tried
|
|
|
|
// the 4th version which corrupted some random memory unfortunately...
|
|
|
|
// but this one works. Heureka!
|
2021-04-09 21:56:41 +00:00
|
|
|
char* WrapText (const char* text, unsigned width)
|
|
|
|
{
|
|
|
|
char* textblob = strdup (text); // Working copy of text.
|
|
|
|
char* start = textblob;
|
|
|
|
|
|
|
|
char* line = malloc (1); // One line of text with max width.
|
|
|
|
line[0] = '\0';
|
|
|
|
|
|
|
|
char* newtext = malloc (1);
|
|
|
|
memset (newtext, 0, 1);
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
// First, cut at \n.
|
|
|
|
char* chapter = strsep (&textblob, "\n");
|
|
|
|
if (chapter == NULL)
|
|
|
|
break;
|
2015-07-02 16:27:59 +00:00
|
|
|
while (1) {
|
2021-04-09 21:56:41 +00:00
|
|
|
char* savepos = chapter; // Saved position pointer so we can go back in the string.
|
|
|
|
const char* chunk = strsep (&chapter, " ");
|
|
|
|
|
|
|
|
// Last chunk.
|
|
|
|
if (chunk == NULL) {
|
|
|
|
if (line != NULL) {
|
|
|
|
newtext = realloc (newtext, strlen (newtext) + strlen (line) + 2);
|
|
|
|
strcat (newtext, line);
|
|
|
|
strcat (newtext, "\n");
|
|
|
|
line[0] = '\0';
|
2015-07-02 16:27:59 +00:00
|
|
|
}
|
2021-04-09 21:56:41 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2021-04-10 13:44:09 +00:00
|
|
|
if (utf8_length (chunk) > width) {
|
2021-04-09 21:56:41 +00:00
|
|
|
// First copy remaining stuff in line to newtext.
|
|
|
|
newtext = realloc (newtext, strlen (newtext) + strlen (line) + 2);
|
|
|
|
strcat (newtext, line);
|
|
|
|
strcat (newtext, "\n");
|
|
|
|
|
|
|
|
free (line);
|
|
|
|
line = malloc (1);
|
|
|
|
line[0] = '\0';
|
|
|
|
|
|
|
|
// Then copy chunk with max length of line to newtext.
|
|
|
|
line = realloc (line, width + 1);
|
|
|
|
strncat (line, chunk, width - 5);
|
|
|
|
strcat (line, "...");
|
|
|
|
newtext = realloc (newtext, strlen (newtext) + width + 2);
|
|
|
|
strcat (newtext, line);
|
|
|
|
strcat (newtext, "\n");
|
|
|
|
free (line);
|
|
|
|
line = malloc (1);
|
|
|
|
line[0] = '\0';
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2021-07-28 00:32:14 +00:00
|
|
|
if (utf8_length (line) + utf8_length (chunk) <= width) {
|
2021-04-09 21:56:41 +00:00
|
|
|
line = realloc (line, strlen (line) + strlen (chunk) + 2);
|
|
|
|
strcat (line, chunk);
|
|
|
|
strcat (line, " ");
|
|
|
|
} else {
|
|
|
|
// Why can chapter be NULL here anyway?
|
|
|
|
if (chapter != NULL) {
|
|
|
|
--chapter;
|
|
|
|
chapter[0] = ' ';
|
|
|
|
}
|
|
|
|
chapter = savepos;
|
|
|
|
newtext = realloc (newtext, strlen (newtext) + strlen (line) + 2);
|
|
|
|
strcat (newtext, line);
|
|
|
|
strcat (newtext, "\n");
|
|
|
|
free (line);
|
|
|
|
line = malloc (1);
|
|
|
|
line[0] = '\0';
|
|
|
|
}
|
2015-07-02 16:27:59 +00:00
|
|
|
}
|
2021-04-09 21:56:41 +00:00
|
|
|
}
|
|
|
|
free (line);
|
|
|
|
free (start);
|
|
|
|
return newtext;
|
2015-07-02 16:27:59 +00:00
|
|
|
}
|
|
|
|
|
2018-04-27 14:47:43 +00:00
|
|
|
// Remove leading whitspaces, newlines, tabs.
|
|
|
|
// This function should be safe for working on UTF-8 strings.
|
2021-04-19 16:53:10 +00:00
|
|
|
// fullclean: false = only suck chars from beginning of string
|
|
|
|
// true = remove newlines inside the string
|
|
|
|
void CleanupString (char* s, bool fullclean)
|
2021-04-09 21:56:41 +00:00
|
|
|
{
|
|
|
|
// If we are passed a NULL pointer, leave it alone and return.
|
2021-04-19 16:53:10 +00:00
|
|
|
if (!s)
|
2021-04-09 21:56:41 +00:00
|
|
|
return;
|
|
|
|
|
2021-04-19 16:53:10 +00:00
|
|
|
// Remove leading spaces
|
|
|
|
size_t len = strlen (s), leadspace = 0;
|
|
|
|
while (leadspace < len && isspace (s[leadspace]))
|
|
|
|
++leadspace;
|
|
|
|
if (leadspace) {
|
|
|
|
memmove (s, s + leadspace, (len + 1) - leadspace);
|
|
|
|
len -= leadspace;
|
2021-04-09 21:56:41 +00:00
|
|
|
}
|
|
|
|
|
2021-04-19 16:53:10 +00:00
|
|
|
// Eat newlines and tabs along the whole s.
|
2021-04-09 21:56:41 +00:00
|
|
|
for (size_t i = 0; i < len; ++i) {
|
2021-04-19 16:53:10 +00:00
|
|
|
if (s[i] == '\t')
|
|
|
|
s[i] = ' ';
|
|
|
|
if (fullclean && s[i] == '\n')
|
|
|
|
s[i] = ' ';
|
2021-04-09 21:56:41 +00:00
|
|
|
}
|
2021-04-11 21:36:45 +00:00
|
|
|
|
2021-06-04 18:12:52 +00:00
|
|
|
// Remove embedded CDATA tags
|
|
|
|
#define CDATA_START "<![CDATA["
|
|
|
|
for (char* si = s; (si = strstr (si, CDATA_START));)
|
|
|
|
memmove (si, si + strlen(CDATA_START), (len -= strlen(CDATA_START)) - (si - s) + 1);
|
|
|
|
#define CDATA_END "]]>"
|
|
|
|
for (char* si = s; (si = strstr (si, CDATA_END));)
|
|
|
|
memmove (si, si + strlen(CDATA_END), (len -= strlen(CDATA_END)) - (si - s) + 1);
|
|
|
|
#define CDATA_BROK "]]"
|
|
|
|
for (char* si = s; (si = strstr (si, CDATA_BROK));)
|
|
|
|
memmove (si, si + strlen(CDATA_BROK), (len -= strlen(CDATA_BROK)) - (si - s) + 1);
|
|
|
|
|
2021-04-11 21:36:45 +00:00
|
|
|
// Remove trailing spaces.
|
2021-04-19 16:53:10 +00:00
|
|
|
while (len > 1 && isspace (s[len-1]))
|
|
|
|
s[--len] = 0;
|
2015-07-02 16:27:59 +00:00
|
|
|
}
|
|
|
|
|
2018-04-27 14:47:43 +00:00
|
|
|
// http://foo.bar/address.rdf -> http:__foo.bar_address.rdf
|
2021-04-09 21:56:41 +00:00
|
|
|
char* Hashify (const char* url)
|
|
|
|
{
|
|
|
|
char* hashed_url = strdup (url);
|
|
|
|
size_t len = strlen (hashed_url);
|
|
|
|
|
|
|
|
// Don't allow filenames > 128 chars for teeny weeny operating systems.
|
|
|
|
if (len > 128) {
|
|
|
|
len = 128;
|
|
|
|
hashed_url[128] = '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t i = 0; i < len; ++i) {
|
|
|
|
if (((hashed_url[i] < 32) || (hashed_url[i] > 38)) && ((hashed_url[i] < 43) || (hashed_url[i] > 46)) && ((hashed_url[i] < 48) || (hashed_url[i] > 90)) && ((hashed_url[i] < 97) || (hashed_url[i] > 122)) && (hashed_url[i] != 0))
|
|
|
|
hashed_url[i] = '_';
|
|
|
|
|
|
|
|
// Cygwin doesn't seem to like anything besides a-z0-9 in filenames. Zap'em!
|
2015-07-02 16:27:59 +00:00
|
|
|
#ifdef __CYGWIN__
|
2021-04-09 21:56:41 +00:00
|
|
|
if (((hashed_url[i] < 48) || (hashed_url[i] > 57)) && ((hashed_url[i] < 65) || (hashed_url[i] > 90)) && ((hashed_url[i] < 97) || (hashed_url[i] > 122)) && (hashed_url[i] != 0))
|
|
|
|
hashed_url[i] = '_';
|
2015-07-02 16:27:59 +00:00
|
|
|
#endif
|
2021-04-09 21:56:41 +00:00
|
|
|
}
|
2018-04-25 15:17:06 +00:00
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
return hashed_url;
|
2015-07-02 16:27:59 +00:00
|
|
|
}
|
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
char* genItemHash (const char* const* hashitems, unsigned items)
|
|
|
|
{
|
2021-04-11 16:52:08 +00:00
|
|
|
EVP_MD_CTX* mdctx = EVP_MD_CTX_new();
|
|
|
|
EVP_DigestInit (mdctx, EVP_md5());
|
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
for (unsigned i = 0; i < items; ++i)
|
|
|
|
if (hashitems[i])
|
2021-04-11 16:52:08 +00:00
|
|
|
EVP_DigestUpdate (mdctx, hashitems[i], strlen (hashitems[i]));
|
|
|
|
|
|
|
|
unsigned char md_value[EVP_MAX_MD_SIZE];
|
|
|
|
unsigned md_len = 0;
|
|
|
|
EVP_DigestFinal_ex (mdctx, md_value, &md_len);
|
|
|
|
EVP_MD_CTX_free (mdctx);
|
|
|
|
|
|
|
|
char hashtext [MD5_DIGEST_LENGTH*2 + 1];
|
|
|
|
for (unsigned i = 0; i < md_len; ++i)
|
|
|
|
sprintf (&hashtext[2*i], "%02hhx", md_value[i]);
|
2021-04-09 21:56:41 +00:00
|
|
|
return strdup (hashtext);
|
2015-07-02 16:27:59 +00:00
|
|
|
}
|
|
|
|
|
2018-04-27 14:47:43 +00:00
|
|
|
// Date conversion
|
|
|
|
// 2004-11-20T19:45:00+00:00
|
2021-04-09 21:56:41 +00:00
|
|
|
time_t ISODateToUnix (const char* ISODate)
|
|
|
|
{
|
|
|
|
// Do not crash with an empty tag
|
|
|
|
if (!ISODate)
|
|
|
|
return 0;
|
|
|
|
struct tm t = { };
|
|
|
|
// OpenBSD does not know %F == %Y-%m-%d
|
|
|
|
if (!strptime (ISODate, "%Y-%m-%dT%T", &t) && !strptime (ISODate, "%Y-%m-%d", &t))
|
|
|
|
return 0;
|
2015-07-02 16:27:59 +00:00
|
|
|
#ifdef __CYGWIN__
|
2021-04-09 21:56:41 +00:00
|
|
|
return mktime (&t);
|
2015-07-02 16:27:59 +00:00
|
|
|
#else
|
2021-04-09 21:56:41 +00:00
|
|
|
return timegm (&t);
|
2015-07-02 16:27:59 +00:00
|
|
|
#endif
|
|
|
|
}
|
2018-04-25 15:17:06 +00:00
|
|
|
|
2018-04-30 01:08:06 +00:00
|
|
|
// Sat, 20 Nov 2004 21:45:40 +0000
|
2021-04-09 21:56:41 +00:00
|
|
|
time_t pubDateToUnix (const char* pubDate)
|
|
|
|
{
|
|
|
|
// Do not crash with an empty Tag
|
|
|
|
if (!pubDate)
|
|
|
|
return 0;
|
2015-07-02 16:27:59 +00:00
|
|
|
#ifdef LOCALEPATH
|
2021-04-09 21:56:41 +00:00
|
|
|
// Cruft!
|
|
|
|
// Save old locale so we can parse the stupid pubDate format.
|
|
|
|
// However strftime is not really more intelligent since there is no
|
|
|
|
// format string for abbr. month name NOT in the current locale. Grr.
|
|
|
|
//
|
|
|
|
// This is also not thread safe!
|
|
|
|
char* oldlocale = setlocale (LC_TIME, NULL);
|
|
|
|
if (oldlocale)
|
|
|
|
setlocale (LC_TIME, "C");
|
2015-07-02 16:27:59 +00:00
|
|
|
#endif
|
2021-04-09 21:56:41 +00:00
|
|
|
struct tm t = { };
|
|
|
|
char* r = strptime (pubDate + strlen ("Sat, "), "%d %b %Y %T", &t);
|
2018-04-30 01:08:06 +00:00
|
|
|
#ifdef LOCALEPATH
|
2021-04-09 21:56:41 +00:00
|
|
|
if (oldlocale)
|
|
|
|
setlocale (LC_TIME, oldlocale);
|
2018-04-30 01:08:06 +00:00
|
|
|
#endif
|
2021-04-09 21:56:41 +00:00
|
|
|
if (!r)
|
|
|
|
return 0;
|
2015-07-02 16:27:59 +00:00
|
|
|
#ifdef __CYGWIN__
|
2021-04-09 21:56:41 +00:00
|
|
|
return mktime (&t);
|
2015-07-02 16:27:59 +00:00
|
|
|
#else
|
2021-04-09 21:56:41 +00:00
|
|
|
return timegm (&t);
|
2015-07-02 16:27:59 +00:00
|
|
|
#endif
|
|
|
|
}
|
2018-04-25 15:17:06 +00:00
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
char* unixToPostDateString (time_t unixDate)
|
|
|
|
{
|
|
|
|
int len = 64;
|
|
|
|
char* time_str = malloc (len);
|
2018-04-25 15:17:06 +00:00
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
int strfstr_len = 32;
|
|
|
|
char* time_strfstr = malloc (strfstr_len);
|
2018-04-25 15:17:06 +00:00
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
struct tm t;
|
|
|
|
gmtime_r (&unixDate, &t);
|
2018-04-25 15:17:06 +00:00
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
strftime (time_strfstr, strfstr_len, _(", %H:%M"), &t);
|
|
|
|
strcpy (time_str, _("Posted "));
|
|
|
|
len -= strlen (_("Posted "));
|
|
|
|
if (len <= 0)
|
|
|
|
return NULL;
|
2018-04-25 15:17:06 +00:00
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
int age = calcAgeInDays (&t);
|
|
|
|
if (age == 0) {
|
|
|
|
strncat (time_str, _("today"), len - 1);
|
|
|
|
len -= strlen (_("today"));
|
|
|
|
if (len <= 0)
|
|
|
|
return NULL;
|
|
|
|
if (!(!t.tm_hour && !t.tm_min && !t.tm_sec))
|
|
|
|
strncat (time_str, time_strfstr, len - 1);
|
|
|
|
} else if (age == 1) {
|
|
|
|
strncat (time_str, _("yesterday"), len - 1);
|
|
|
|
len -= strlen (_("yesterday"));
|
|
|
|
if (len <= 0)
|
|
|
|
return NULL;
|
|
|
|
if (!(!t.tm_hour && !t.tm_min && !t.tm_sec))
|
|
|
|
strncat (time_str, time_strfstr, len - 1);
|
|
|
|
} else if ((age > 1) && (age < 7)) {
|
|
|
|
char tmpstr[32];
|
|
|
|
snprintf (tmpstr, sizeof (tmpstr), _("%d days ago"), age);
|
|
|
|
strncat (time_str, tmpstr, len - 1);
|
|
|
|
len -= strlen (tmpstr);
|
|
|
|
if (len <= 0)
|
|
|
|
return NULL;
|
|
|
|
if (!(!t.tm_hour && !t.tm_min && !t.tm_sec))
|
|
|
|
strncat (time_str, time_strfstr, len - 1);
|
|
|
|
} else if (age == 7) {
|
|
|
|
strncat (time_str, _("a week ago"), len - 1);
|
|
|
|
len -= strlen (_("a week ago"));
|
|
|
|
if (len <= 0)
|
|
|
|
return NULL;
|
|
|
|
if (!(!t.tm_hour && !t.tm_min && !t.tm_sec))
|
|
|
|
strncat (time_str, time_strfstr, len - 1);
|
|
|
|
} else if (age < 0) {
|
|
|
|
strcpy (time_str, _("Not yet posted: "));
|
|
|
|
len = 64 - strlen (_("Not yet posted: "));
|
|
|
|
if (!t.tm_hour && !t.tm_min && !t.tm_sec)
|
|
|
|
strftime (time_strfstr, strfstr_len, _("%x"), &t);
|
|
|
|
else
|
|
|
|
strftime (time_strfstr, strfstr_len, _("%x, %H:%M"), &t);
|
|
|
|
strncat (time_str, time_strfstr, len - 1);
|
|
|
|
} else {
|
|
|
|
if (!t.tm_hour && !t.tm_min && !t.tm_sec)
|
|
|
|
strftime (time_strfstr, strfstr_len, _("%x"), &t);
|
|
|
|
else
|
|
|
|
strftime (time_strfstr, strfstr_len, _("%x, %H:%M"), &t);
|
|
|
|
strncat (time_str, time_strfstr, len - 1);
|
|
|
|
}
|
|
|
|
free (time_strfstr);
|
|
|
|
|
|
|
|
return time_str;
|
2015-07-02 16:27:59 +00:00
|
|
|
}
|
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
static int calcAgeInDays (const struct tm* t)
|
|
|
|
{
|
|
|
|
time_t unix_t = time (NULL);
|
|
|
|
struct tm current_t;
|
|
|
|
gmtime_r (&unix_t, ¤t_t);
|
2015-07-02 16:27:59 +00:00
|
|
|
|
2021-04-09 21:56:41 +00:00
|
|
|
// (((current year - passed year) * 365) + current year day) - passed year day
|
|
|
|
return (((current_t.tm_year - t->tm_year) * 365) + current_t.tm_yday) - t->tm_yday;
|
2015-07-02 16:27:59 +00:00
|
|
|
}
|