Use existing feed as cache when present
This commit is contained in:
parent
b4ec4c5f73
commit
8bc0a50e8f
|
@ -6,9 +6,10 @@ class ComicPage {
|
|||
public string $pageNo = '';
|
||||
public string $title = '';
|
||||
public string $imageUrl = '';
|
||||
public int $pubDate = 0;
|
||||
|
||||
function __construct() {}
|
||||
function __toString() {
|
||||
return "[{$this->href}] [{$this->imageUrl}] {$this->pageNo} // {$this->title}";
|
||||
return "[{$this->href}] [{$this->imageUrl}] {$this->pubDate} {$this->pageNo} // {$this->title}";
|
||||
}
|
||||
};
|
||||
|
|
|
@ -7,9 +7,14 @@ include_once('lib/log.php');
|
|||
include_once('lib/fetch-url.php');
|
||||
include_once('lib/comic-page.php');
|
||||
|
||||
function fetchSiteContent() {
|
||||
function fetchSiteContent($knownContent = array()) {
|
||||
$comicPages = array();
|
||||
|
||||
$knownPageUrls = array();
|
||||
foreach ($knownContent as $knownItem) {
|
||||
$knownPageUrls[$knownItem->href] = $knownItem;
|
||||
};
|
||||
|
||||
$archivePage = fetchUrl(Config::archiveUrl);
|
||||
|
||||
foreach ($archivePage->find('div.textcontent ul') as $arc) {
|
||||
|
@ -17,9 +22,17 @@ function fetchSiteContent() {
|
|||
|
||||
$comicLinks = $arc->find('a');
|
||||
foreach ($comicLinks as $el) {
|
||||
$pageUrl = Config::baseUrl . "/" . $el->href;
|
||||
|
||||
if (@$knownPageUrls[$pageUrl]) {
|
||||
Log::debug("Using cached version of " . $pageUrl);
|
||||
array_push($comicPages, $knownPageUrls[$pageUrl]);
|
||||
continue;
|
||||
};
|
||||
|
||||
$link = new ComicPage;
|
||||
|
||||
$link->href = Config::baseUrl . "/" . $el->href;
|
||||
$link->href = $pageUrl;
|
||||
$link->pageNo = $el->innerText;
|
||||
|
||||
// we'll get the image URL from this later, but fetch it early
|
||||
|
|
|
@ -3,36 +3,45 @@ declare(strict_types = 1);
|
|||
|
||||
include_once('config/default.php');
|
||||
|
||||
function generateFeed($content) {
|
||||
function generateFeed($unsortedContent) {
|
||||
$loader = new \Twig\Loader\FilesystemLoader('templates/');
|
||||
$twig = new \Twig\Environment($loader, [
|
||||
'autoescape' => false,
|
||||
'strict_variables' => true
|
||||
]);
|
||||
|
||||
$ts = time();
|
||||
$fakeNow = time();
|
||||
$realNow = time();
|
||||
|
||||
$items = [];
|
||||
|
||||
foreach ($content as $page) {
|
||||
// artificially enforce ordering on undated historical items
|
||||
$ts -= Config::feed['itemDelaySeconds'];
|
||||
$now = date('c', $ts);
|
||||
$content = $unsortedContent;
|
||||
// Sort by date descending (newest first)
|
||||
usort($content, fn($a, $b) => $a->pubDate < $b->pubDate ? 1 : -1);
|
||||
|
||||
foreach ($content as $page) {
|
||||
$content = $twig->render('item-content.html', [
|
||||
'url' => $page->href,
|
||||
'imageUrl' => $page->imageUrl
|
||||
]);
|
||||
|
||||
$items[] = $twig->render('item.xml', [
|
||||
$item = $twig->render('item.xml', [
|
||||
'title' => htmlspecialchars($page->title),
|
||||
'pageNo' => $page->pageNo,
|
||||
'fullTitle' => htmlspecialchars($page->pageNo . " // " . $page->title),
|
||||
'date' => $now,
|
||||
'date' => $page->pubDate ? date('c', $page->pubDate) : date('c', $fakeNow),
|
||||
'url' => $page->href,
|
||||
'imageUrl' => htmlspecialchars($page->imageUrl),
|
||||
'content' => htmlspecialchars($content)
|
||||
]);
|
||||
|
||||
$items[] = $item;
|
||||
|
||||
// we use this to artificially enforce ordering on undated
|
||||
// historical items (but note that it starts with the real
|
||||
// time, so that when we see a newly published item it's
|
||||
// accurately dated)
|
||||
$fakeNow -= Config::feed['itemDelaySeconds'];
|
||||
};
|
||||
|
||||
$feed = $twig->render('feed.xml', [
|
||||
|
@ -40,7 +49,7 @@ function generateFeed($content) {
|
|||
'description' => Config::feed['description'],
|
||||
'baseUrl' => Config::baseUrl,
|
||||
'feedUrl' => Config::feed['feedUrl'],
|
||||
'date' => $now,
|
||||
'date' => $realNow,
|
||||
'items' => implode("\n", $items)
|
||||
]);
|
||||
|
||||
|
|
|
@ -25,10 +25,11 @@ function readFeed($path) {
|
|||
$item->pageNo = (string) $itemXml['data-pageno'];
|
||||
$item->title = (string) $itemXml['data-title'];
|
||||
$item->imageUrl = (string) $itemXml['data-imageurl'];
|
||||
$item->pubDate = strtotime((string) $itemXml->pubDate);
|
||||
|
||||
print $item;
|
||||
exit();
|
||||
|
||||
array_push($item);
|
||||
array_push($items, $item);
|
||||
};
|
||||
|
||||
Log::info("Read " . count($items) . " items from " . $path);
|
||||
return $items;
|
||||
};
|
||||
|
|
|
@ -15,7 +15,7 @@ include_once('lib/fetch-site-content.php');
|
|||
include_once('lib/generate-feed.php');
|
||||
|
||||
$existingFeed = readFeed(Config::feedPath);
|
||||
$content = fetchSiteContent();
|
||||
$content = fetchSiteContent($existingFeed);
|
||||
$feedXml = generateFeed(array_reverse($content));
|
||||
|
||||
Log::info("writing " . strlen($feedXml) . " bytes to " . Config::feedPath);
|
||||
|
|
Loading…
Reference in New Issue