Use existing feed as cache when present

This commit is contained in:
Alexis Marie Wright 2022-04-08 00:24:44 -04:00
parent b4ec4c5f73
commit 8bc0a50e8f
5 changed files with 41 additions and 17 deletions

View File

@ -6,9 +6,10 @@ class ComicPage {
public string $pageNo = ''; public string $pageNo = '';
public string $title = ''; public string $title = '';
public string $imageUrl = ''; public string $imageUrl = '';
public int $pubDate = 0;
function __construct() {} function __construct() {}
function __toString() { function __toString() {
return "[{$this->href}] [{$this->imageUrl}] {$this->pageNo} // {$this->title}"; return "[{$this->href}] [{$this->imageUrl}] {$this->pubDate} {$this->pageNo} // {$this->title}";
} }
}; };

View File

@ -7,9 +7,14 @@ include_once('lib/log.php');
include_once('lib/fetch-url.php'); include_once('lib/fetch-url.php');
include_once('lib/comic-page.php'); include_once('lib/comic-page.php');
function fetchSiteContent() { function fetchSiteContent($knownContent = array()) {
$comicPages = array(); $comicPages = array();
$knownPageUrls = array();
foreach ($knownContent as $knownItem) {
$knownPageUrls[$knownItem->href] = $knownItem;
};
$archivePage = fetchUrl(Config::archiveUrl); $archivePage = fetchUrl(Config::archiveUrl);
foreach ($archivePage->find('div.textcontent ul') as $arc) { foreach ($archivePage->find('div.textcontent ul') as $arc) {
@ -17,9 +22,17 @@ function fetchSiteContent() {
$comicLinks = $arc->find('a'); $comicLinks = $arc->find('a');
foreach ($comicLinks as $el) { foreach ($comicLinks as $el) {
$pageUrl = Config::baseUrl . "/" . $el->href;
if (@$knownPageUrls[$pageUrl]) {
Log::debug("Using cached version of " . $pageUrl);
array_push($comicPages, $knownPageUrls[$pageUrl]);
continue;
};
$link = new ComicPage; $link = new ComicPage;
$link->href = Config::baseUrl . "/" . $el->href; $link->href = $pageUrl;
$link->pageNo = $el->innerText; $link->pageNo = $el->innerText;
// we'll get the image URL from this later, but fetch it early // we'll get the image URL from this later, but fetch it early

View File

@ -3,36 +3,45 @@ declare(strict_types = 1);
include_once('config/default.php'); include_once('config/default.php');
function generateFeed($content) { function generateFeed($unsortedContent) {
$loader = new \Twig\Loader\FilesystemLoader('templates/'); $loader = new \Twig\Loader\FilesystemLoader('templates/');
$twig = new \Twig\Environment($loader, [ $twig = new \Twig\Environment($loader, [
'autoescape' => false, 'autoescape' => false,
'strict_variables' => true 'strict_variables' => true
]); ]);
$ts = time(); $fakeNow = time();
$realNow = time();
$items = []; $items = [];
foreach ($content as $page) { $content = $unsortedContent;
// artificially enforce ordering on undated historical items // Sort by date descending (newest first)
$ts -= Config::feed['itemDelaySeconds']; usort($content, fn($a, $b) => $a->pubDate < $b->pubDate ? 1 : -1);
$now = date('c', $ts);
foreach ($content as $page) {
$content = $twig->render('item-content.html', [ $content = $twig->render('item-content.html', [
'url' => $page->href, 'url' => $page->href,
'imageUrl' => $page->imageUrl 'imageUrl' => $page->imageUrl
]); ]);
$items[] = $twig->render('item.xml', [ $item = $twig->render('item.xml', [
'title' => htmlspecialchars($page->title), 'title' => htmlspecialchars($page->title),
'pageNo' => $page->pageNo, 'pageNo' => $page->pageNo,
'fullTitle' => htmlspecialchars($page->pageNo . " // " . $page->title), 'fullTitle' => htmlspecialchars($page->pageNo . " // " . $page->title),
'date' => $now, 'date' => $page->pubDate ? date('c', $page->pubDate) : date('c', $fakeNow),
'url' => $page->href, 'url' => $page->href,
'imageUrl' => htmlspecialchars($page->imageUrl), 'imageUrl' => htmlspecialchars($page->imageUrl),
'content' => htmlspecialchars($content) 'content' => htmlspecialchars($content)
]); ]);
$items[] = $item;
// we use this to artificially enforce ordering on undated
// historical items (but note that it starts with the real
// time, so that when we see a newly published item it's
// accurately dated)
$fakeNow -= Config::feed['itemDelaySeconds'];
}; };
$feed = $twig->render('feed.xml', [ $feed = $twig->render('feed.xml', [
@ -40,7 +49,7 @@ function generateFeed($content) {
'description' => Config::feed['description'], 'description' => Config::feed['description'],
'baseUrl' => Config::baseUrl, 'baseUrl' => Config::baseUrl,
'feedUrl' => Config::feed['feedUrl'], 'feedUrl' => Config::feed['feedUrl'],
'date' => $now, 'date' => $realNow,
'items' => implode("\n", $items) 'items' => implode("\n", $items)
]); ]);

View File

@ -25,10 +25,11 @@ function readFeed($path) {
$item->pageNo = (string) $itemXml['data-pageno']; $item->pageNo = (string) $itemXml['data-pageno'];
$item->title = (string) $itemXml['data-title']; $item->title = (string) $itemXml['data-title'];
$item->imageUrl = (string) $itemXml['data-imageurl']; $item->imageUrl = (string) $itemXml['data-imageurl'];
$item->pubDate = strtotime((string) $itemXml->pubDate);
print $item; array_push($items, $item);
exit();
array_push($item);
}; };
Log::info("Read " . count($items) . " items from " . $path);
return $items;
}; };

View File

@ -15,7 +15,7 @@ include_once('lib/fetch-site-content.php');
include_once('lib/generate-feed.php'); include_once('lib/generate-feed.php');
$existingFeed = readFeed(Config::feedPath); $existingFeed = readFeed(Config::feedPath);
$content = fetchSiteContent(); $content = fetchSiteContent($existingFeed);
$feedXml = generateFeed(array_reverse($content)); $feedXml = generateFeed(array_reverse($content));
Log::info("writing " . strlen($feedXml) . " bytes to " . Config::feedPath); Log::info("writing " . strlen($feedXml) . " bytes to " . Config::feedPath);