This commit is contained in:
Alexis Marie Wright 2022-03-04 21:50:40 -05:00
parent b0b9c28097
commit 2f86d3e559
5 changed files with 73 additions and 19 deletions

View File

@ -1,9 +1,17 @@
<?php
declare(strict_types = 1);
class Config {
const feed = [
'title' => '[INHUMAN] a sci-fi webcomic',
'description' => '...',
'feedUrl' => 'http://www.inhuman-comic.com/feed.xml'
];
// where to start spidering
const baseURL = "http://www.inhuman-comic.com";
// how the script identifies itself to the server while spidering
// (this will also be used as the "Generator" value in the feed XML)
const userAgent = 'Lexie\'s RSS Monster (for Cial) (lexie@alexis-marie-wright.me)';
// a CSS selector identifying the "next page" link to follow while spidering
const nextLinkSelector = 'div.body a.prev';
@ -15,5 +23,5 @@ class Config {
'silly' => false
];
// if set, fetch only this many pages while spidering (false to fetch all)
const TEST_stopAfter = 10;
const TEST_stopAfter = 2;
}

View File

@ -1,4 +1,7 @@
<?php
declare(strict_types = 1);
include_once('config/default.php');
use GuzzleHttp\Client as HTTPClient;
use PHPHtmlParser\Dom as DOM;
@ -49,6 +52,7 @@ function fetchPages() {
array_push($content, [
'url' => $url,
'imageUrl' => Config::baseURL . '/' . $dom->find('div.page img')[0]->src,
'body' => $dom,
'pageNumber' => $matches[1]
]);

View File

@ -12,24 +12,31 @@ include_once('config/default.php');
include_once('lib/log.php');
include_once('lib/fetch-pages.php');
// --- this'll be in a lib someday
function parsePageForFeedContent(string $url, DOM $page) {
$imageUrl = Config::baseUrl . '/' . $page->find('div.page img')[0]->src;
return [
title => 'INHUMAN | Page ###',
link => $url,
guid => $url,
description =>
"<a style=\"text-decoration: none; border: none;\" href=\"" . $url . "\">" .
"<img src=\"" . $imageUrl . "\" />" .
"</a>"
];
};
// --- ok back to the real shit
$content = fetchPages();
$loader = new \Twig\Loader\FilesystemLoader('templates/');
$twig = new \Twig\Environment($loader, [
'autoescape' => false
]);
$now = date('c');
$items = [];
foreach ($content as $result) {
print "{$result['url']} {$result['pageNumber']}\n";
};
$items[] = $twig->render('item.xml', [
'title' => $result['pageNumber'] . " | " . Config::feed['title'],
'url' => $result['url'],
'imageUrl' => $result['imageUrl'],
'date' => $now
]);
};
print $twig->render('feed.xml', [
'title' => Config::feed['title'],
'description' => Config::feed['description'],
'baseUrl' => Config::baseURL,
'feedUrl' => Config::feed['feedUrl'],
'date' => $now,
'items' => implode("\r\n", $items)
]);

24
templates/feed.xml Normal file
View File

@ -0,0 +1,24 @@
<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
{% autoescape %}
<channel>
<title>
{{ title }}
</title>
<description>
{{ description }}
</description>
<link>{{ baseUrl }}</link>
<atom:link href="{{ feedUrl }}" rel="self" type="application/rss+xml" />
<pubDate>{{ date }}</pubDate>
<lastBuildDate>{{ date }}</lastBuildDate>
<generator>{{ date }}</generator>
<items>
{% endautoescape %}
{{ items }}
</items>
</channel>
</rss>

11
templates/item.xml Normal file
View File

@ -0,0 +1,11 @@
<item>
<title>{{ title }}</title>
<description>
<a style="border: 0; text-decoration: none;" href="{{ url }}">
<img src="{{ imageUrl }}" />
</a>
</description>
<pubDate>{{ date }}</pubDate>
<link>{{ url }}</link>
<guid isPermaLink="true">{{ url }}</guid>
</item>