2022-03-04 03:45:18 +00:00
|
|
|
<?php
|
|
|
|
declare(strict_types = 1);
|
|
|
|
|
2022-03-04 04:14:45 +00:00
|
|
|
// Some dependencies haven't been updated for PHP 8
|
|
|
|
// This can probably be removed for PHP 7.3
|
|
|
|
error_reporting(E_ALL & ~E_DEPRECATED);
|
|
|
|
|
2022-03-04 03:45:18 +00:00
|
|
|
require('vendor/autoload.php');
|
|
|
|
|
|
|
|
use GuzzleHttp\Client as HTTPClient;
|
|
|
|
use PHPHtmlParser\Dom as DOM;
|
|
|
|
|
|
|
|
class Log {
|
2022-03-04 04:14:45 +00:00
|
|
|
private static $enabled = [
|
|
|
|
'silly' => false,
|
|
|
|
'debug' => true,
|
|
|
|
'info' => true
|
|
|
|
];
|
|
|
|
|
2022-03-04 03:45:18 +00:00
|
|
|
private static function emit(string $level, $message) {
|
2022-03-04 04:14:45 +00:00
|
|
|
if (!Log::$enabled[$level]) {
|
|
|
|
return;
|
|
|
|
};
|
2022-03-04 03:45:18 +00:00
|
|
|
$now = date("c");
|
2022-03-04 04:14:45 +00:00
|
|
|
print("$now [$level] $message\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
public static function silly($message) {
|
|
|
|
Log::emit('silly', $message);
|
2022-03-04 03:45:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
public static function debug($message) {
|
|
|
|
Log::emit('debug', $message);
|
|
|
|
}
|
|
|
|
|
|
|
|
public static function info($message) {
|
|
|
|
Log::emit('info', $message);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
function fetchUrl(string $url) {
|
|
|
|
$client = new HTTPClient();
|
|
|
|
|
|
|
|
$res = $client->request(
|
|
|
|
'GET', $url, [
|
|
|
|
'http_errors' => false,
|
2022-03-04 04:14:45 +00:00
|
|
|
'headers' => [
|
|
|
|
'User-Agent' => 'Lexie\'s RSS Monster (for Cial) (lexie@alexis-marie-wright.me)'
|
|
|
|
]
|
2022-03-04 03:45:18 +00:00
|
|
|
]
|
|
|
|
);
|
|
|
|
|
|
|
|
if ($res->getStatusCode() !== 200) {
|
|
|
|
Log::debug($res->getBody());
|
|
|
|
throw "Request for $url returned {$res->getStatusCode()}";
|
|
|
|
};
|
|
|
|
|
2022-03-04 04:14:45 +00:00
|
|
|
return strval($res->getBody());
|
2022-03-04 03:45:18 +00:00
|
|
|
};
|
|
|
|
|
2022-03-04 04:14:45 +00:00
|
|
|
$baseUrl = 'http://inhuman-comic.com';
|
|
|
|
|
|
|
|
$nextLinkSelector = 'div.body a.prev';
|
|
|
|
$urls = [$baseUrl];
|
|
|
|
$fetchedUrls = [];
|
|
|
|
|
|
|
|
while (count($urls) > 0) {
|
|
|
|
$url = array_shift($urls);
|
|
|
|
|
|
|
|
$res = fetchUrl($url);
|
|
|
|
Log::info("$url: fetched " . strlen($res) . " bytes");
|
|
|
|
|
|
|
|
Log::silly($res);
|
|
|
|
|
|
|
|
array_push($fetchedUrls, $url);
|
|
|
|
|
|
|
|
$dom = new DOM;
|
|
|
|
$dom->loadStr($res);
|
|
|
|
|
|
|
|
foreach (array($dom->find($nextLinkSelector)) as $el) {
|
|
|
|
$nextUrl = $baseUrl . '/' . $el->href;
|
|
|
|
|
|
|
|
if (array_search($nextUrl, $fetchedUrls)) {
|
|
|
|
Log::debug("$url: already fetched next link $nextUrl");
|
|
|
|
continue;
|
|
|
|
};
|
|
|
|
|
|
|
|
if (array_search($nextUrl, $urls)) {
|
|
|
|
Log::debug("$url: duplicate next link $nextUrl");
|
|
|
|
continue;
|
|
|
|
};
|
|
|
|
|
|
|
|
Log::info("$url: next link $nextUrl");
|
|
|
|
array_push($urls, $nextUrl);
|
|
|
|
};
|
|
|
|
|
|
|
|
usleep(500000); // wait half a second, to be minimally polite
|
|
|
|
};
|
|
|
|
Log::info("Out of next links!");
|