Use existing feed as cache when present

This commit is contained in:
Alexis Marie Wright 2022-04-08 00:24:44 -04:00
parent b4ec4c5f73
commit 8bc0a50e8f
5 changed files with 41 additions and 17 deletions

View File

@ -6,9 +6,10 @@ class ComicPage {
public string $pageNo = '';
public string $title = '';
public string $imageUrl = '';
public int $pubDate = 0;
function __construct() {}
function __toString() {
return "[{$this->href}] [{$this->imageUrl}] {$this->pageNo} // {$this->title}";
return "[{$this->href}] [{$this->imageUrl}] {$this->pubDate} {$this->pageNo} // {$this->title}";
}
};

View File

@ -7,9 +7,14 @@ include_once('lib/log.php');
include_once('lib/fetch-url.php');
include_once('lib/comic-page.php');
function fetchSiteContent() {
function fetchSiteContent($knownContent = array()) {
$comicPages = array();
$knownPageUrls = array();
foreach ($knownContent as $knownItem) {
$knownPageUrls[$knownItem->href] = $knownItem;
};
$archivePage = fetchUrl(Config::archiveUrl);
foreach ($archivePage->find('div.textcontent ul') as $arc) {
@ -17,9 +22,17 @@ function fetchSiteContent() {
$comicLinks = $arc->find('a');
foreach ($comicLinks as $el) {
$pageUrl = Config::baseUrl . "/" . $el->href;
if (@$knownPageUrls[$pageUrl]) {
Log::debug("Using cached version of " . $pageUrl);
array_push($comicPages, $knownPageUrls[$pageUrl]);
continue;
};
$link = new ComicPage;
$link->href = Config::baseUrl . "/" . $el->href;
$link->href = $pageUrl;
$link->pageNo = $el->innerText;
// we'll get the image URL from this later, but fetch it early

View File

@ -3,36 +3,45 @@ declare(strict_types = 1);
include_once('config/default.php');
function generateFeed($content) {
function generateFeed($unsortedContent) {
$loader = new \Twig\Loader\FilesystemLoader('templates/');
$twig = new \Twig\Environment($loader, [
'autoescape' => false,
'strict_variables' => true
]);
$ts = time();
$fakeNow = time();
$realNow = time();
$items = [];
foreach ($content as $page) {
// artificially enforce ordering on undated historical items
$ts -= Config::feed['itemDelaySeconds'];
$now = date('c', $ts);
$content = $unsortedContent;
// Sort by date descending (newest first)
usort($content, fn($a, $b) => $a->pubDate < $b->pubDate ? 1 : -1);
foreach ($content as $page) {
$content = $twig->render('item-content.html', [
'url' => $page->href,
'imageUrl' => $page->imageUrl
]);
$items[] = $twig->render('item.xml', [
$item = $twig->render('item.xml', [
'title' => htmlspecialchars($page->title),
'pageNo' => $page->pageNo,
'fullTitle' => htmlspecialchars($page->pageNo . " // " . $page->title),
'date' => $now,
'date' => $page->pubDate ? date('c', $page->pubDate) : date('c', $fakeNow),
'url' => $page->href,
'imageUrl' => htmlspecialchars($page->imageUrl),
'content' => htmlspecialchars($content)
]);
$items[] = $item;
// we use this to artificially enforce ordering on undated
// historical items (but note that it starts with the real
// time, so that when we see a newly published item it's
// accurately dated)
$fakeNow -= Config::feed['itemDelaySeconds'];
};
$feed = $twig->render('feed.xml', [
@ -40,7 +49,7 @@ function generateFeed($content) {
'description' => Config::feed['description'],
'baseUrl' => Config::baseUrl,
'feedUrl' => Config::feed['feedUrl'],
'date' => $now,
'date' => $realNow,
'items' => implode("\n", $items)
]);

View File

@ -25,10 +25,11 @@ function readFeed($path) {
$item->pageNo = (string) $itemXml['data-pageno'];
$item->title = (string) $itemXml['data-title'];
$item->imageUrl = (string) $itemXml['data-imageurl'];
$item->pubDate = strtotime((string) $itemXml->pubDate);
print $item;
exit();
array_push($item);
array_push($items, $item);
};
Log::info("Read " . count($items) . " items from " . $path);
return $items;
};

View File

@ -15,7 +15,7 @@ include_once('lib/fetch-site-content.php');
include_once('lib/generate-feed.php');
$existingFeed = readFeed(Config::feedPath);
$content = fetchSiteContent();
$content = fetchSiteContent($existingFeed);
$feedXml = generateFeed(array_reverse($content));
Log::info("writing " . strlen($feedXml) . " bytes to " . Config::feedPath);