Use existing feed as cache when present
This commit is contained in:
parent
b4ec4c5f73
commit
8bc0a50e8f
|
@ -6,9 +6,10 @@ class ComicPage {
|
||||||
public string $pageNo = '';
|
public string $pageNo = '';
|
||||||
public string $title = '';
|
public string $title = '';
|
||||||
public string $imageUrl = '';
|
public string $imageUrl = '';
|
||||||
|
public int $pubDate = 0;
|
||||||
|
|
||||||
function __construct() {}
|
function __construct() {}
|
||||||
function __toString() {
|
function __toString() {
|
||||||
return "[{$this->href}] [{$this->imageUrl}] {$this->pageNo} // {$this->title}";
|
return "[{$this->href}] [{$this->imageUrl}] {$this->pubDate} {$this->pageNo} // {$this->title}";
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -7,9 +7,14 @@ include_once('lib/log.php');
|
||||||
include_once('lib/fetch-url.php');
|
include_once('lib/fetch-url.php');
|
||||||
include_once('lib/comic-page.php');
|
include_once('lib/comic-page.php');
|
||||||
|
|
||||||
function fetchSiteContent() {
|
function fetchSiteContent($knownContent = array()) {
|
||||||
$comicPages = array();
|
$comicPages = array();
|
||||||
|
|
||||||
|
$knownPageUrls = array();
|
||||||
|
foreach ($knownContent as $knownItem) {
|
||||||
|
$knownPageUrls[$knownItem->href] = $knownItem;
|
||||||
|
};
|
||||||
|
|
||||||
$archivePage = fetchUrl(Config::archiveUrl);
|
$archivePage = fetchUrl(Config::archiveUrl);
|
||||||
|
|
||||||
foreach ($archivePage->find('div.textcontent ul') as $arc) {
|
foreach ($archivePage->find('div.textcontent ul') as $arc) {
|
||||||
|
@ -17,9 +22,17 @@ function fetchSiteContent() {
|
||||||
|
|
||||||
$comicLinks = $arc->find('a');
|
$comicLinks = $arc->find('a');
|
||||||
foreach ($comicLinks as $el) {
|
foreach ($comicLinks as $el) {
|
||||||
|
$pageUrl = Config::baseUrl . "/" . $el->href;
|
||||||
|
|
||||||
|
if (@$knownPageUrls[$pageUrl]) {
|
||||||
|
Log::debug("Using cached version of " . $pageUrl);
|
||||||
|
array_push($comicPages, $knownPageUrls[$pageUrl]);
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
$link = new ComicPage;
|
$link = new ComicPage;
|
||||||
|
|
||||||
$link->href = Config::baseUrl . "/" . $el->href;
|
$link->href = $pageUrl;
|
||||||
$link->pageNo = $el->innerText;
|
$link->pageNo = $el->innerText;
|
||||||
|
|
||||||
// we'll get the image URL from this later, but fetch it early
|
// we'll get the image URL from this later, but fetch it early
|
||||||
|
|
|
@ -3,36 +3,45 @@ declare(strict_types = 1);
|
||||||
|
|
||||||
include_once('config/default.php');
|
include_once('config/default.php');
|
||||||
|
|
||||||
function generateFeed($content) {
|
function generateFeed($unsortedContent) {
|
||||||
$loader = new \Twig\Loader\FilesystemLoader('templates/');
|
$loader = new \Twig\Loader\FilesystemLoader('templates/');
|
||||||
$twig = new \Twig\Environment($loader, [
|
$twig = new \Twig\Environment($loader, [
|
||||||
'autoescape' => false,
|
'autoescape' => false,
|
||||||
'strict_variables' => true
|
'strict_variables' => true
|
||||||
]);
|
]);
|
||||||
|
|
||||||
$ts = time();
|
$fakeNow = time();
|
||||||
|
$realNow = time();
|
||||||
|
|
||||||
$items = [];
|
$items = [];
|
||||||
|
|
||||||
foreach ($content as $page) {
|
$content = $unsortedContent;
|
||||||
// artificially enforce ordering on undated historical items
|
// Sort by date descending (newest first)
|
||||||
$ts -= Config::feed['itemDelaySeconds'];
|
usort($content, fn($a, $b) => $a->pubDate < $b->pubDate ? 1 : -1);
|
||||||
$now = date('c', $ts);
|
|
||||||
|
|
||||||
|
foreach ($content as $page) {
|
||||||
$content = $twig->render('item-content.html', [
|
$content = $twig->render('item-content.html', [
|
||||||
'url' => $page->href,
|
'url' => $page->href,
|
||||||
'imageUrl' => $page->imageUrl
|
'imageUrl' => $page->imageUrl
|
||||||
]);
|
]);
|
||||||
|
|
||||||
$items[] = $twig->render('item.xml', [
|
$item = $twig->render('item.xml', [
|
||||||
'title' => htmlspecialchars($page->title),
|
'title' => htmlspecialchars($page->title),
|
||||||
'pageNo' => $page->pageNo,
|
'pageNo' => $page->pageNo,
|
||||||
'fullTitle' => htmlspecialchars($page->pageNo . " // " . $page->title),
|
'fullTitle' => htmlspecialchars($page->pageNo . " // " . $page->title),
|
||||||
'date' => $now,
|
'date' => $page->pubDate ? date('c', $page->pubDate) : date('c', $fakeNow),
|
||||||
'url' => $page->href,
|
'url' => $page->href,
|
||||||
'imageUrl' => htmlspecialchars($page->imageUrl),
|
'imageUrl' => htmlspecialchars($page->imageUrl),
|
||||||
'content' => htmlspecialchars($content)
|
'content' => htmlspecialchars($content)
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
$items[] = $item;
|
||||||
|
|
||||||
|
// we use this to artificially enforce ordering on undated
|
||||||
|
// historical items (but note that it starts with the real
|
||||||
|
// time, so that when we see a newly published item it's
|
||||||
|
// accurately dated)
|
||||||
|
$fakeNow -= Config::feed['itemDelaySeconds'];
|
||||||
};
|
};
|
||||||
|
|
||||||
$feed = $twig->render('feed.xml', [
|
$feed = $twig->render('feed.xml', [
|
||||||
|
@ -40,7 +49,7 @@ function generateFeed($content) {
|
||||||
'description' => Config::feed['description'],
|
'description' => Config::feed['description'],
|
||||||
'baseUrl' => Config::baseUrl,
|
'baseUrl' => Config::baseUrl,
|
||||||
'feedUrl' => Config::feed['feedUrl'],
|
'feedUrl' => Config::feed['feedUrl'],
|
||||||
'date' => $now,
|
'date' => $realNow,
|
||||||
'items' => implode("\n", $items)
|
'items' => implode("\n", $items)
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
|
|
@ -25,10 +25,11 @@ function readFeed($path) {
|
||||||
$item->pageNo = (string) $itemXml['data-pageno'];
|
$item->pageNo = (string) $itemXml['data-pageno'];
|
||||||
$item->title = (string) $itemXml['data-title'];
|
$item->title = (string) $itemXml['data-title'];
|
||||||
$item->imageUrl = (string) $itemXml['data-imageurl'];
|
$item->imageUrl = (string) $itemXml['data-imageurl'];
|
||||||
|
$item->pubDate = strtotime((string) $itemXml->pubDate);
|
||||||
|
|
||||||
print $item;
|
array_push($items, $item);
|
||||||
exit();
|
|
||||||
|
|
||||||
array_push($item);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Log::info("Read " . count($items) . " items from " . $path);
|
||||||
|
return $items;
|
||||||
};
|
};
|
||||||
|
|
|
@ -15,7 +15,7 @@ include_once('lib/fetch-site-content.php');
|
||||||
include_once('lib/generate-feed.php');
|
include_once('lib/generate-feed.php');
|
||||||
|
|
||||||
$existingFeed = readFeed(Config::feedPath);
|
$existingFeed = readFeed(Config::feedPath);
|
||||||
$content = fetchSiteContent();
|
$content = fetchSiteContent($existingFeed);
|
||||||
$feedXml = generateFeed(array_reverse($content));
|
$feedXml = generateFeed(array_reverse($content));
|
||||||
|
|
||||||
Log::info("writing " . strlen($feedXml) . " bytes to " . Config::feedPath);
|
Log::info("writing " . strlen($feedXml) . " bytes to " . Config::feedPath);
|
||||||
|
|
Loading…
Reference in New Issue