Temp before using archive page
This commit is contained in:
parent
ef399a7c1c
commit
d7fd21f338
|
@ -20,10 +20,11 @@ class Config {
|
|||
// which log levels to emit (true) or suppress (false)
|
||||
// nothing below 'info' should be important in normal operation
|
||||
const logLevels = [
|
||||
'warn' => true,
|
||||
'info' => true,
|
||||
'debug' => true,
|
||||
'silly' => false
|
||||
];
|
||||
// if set, fetch only this many pages while spidering (false to fetch all)
|
||||
const TEST_stopAfter = 2;
|
||||
const TEST_stopAfter = false;
|
||||
}
|
||||
|
|
|
@ -44,17 +44,58 @@ function fetchPages() {
|
|||
$dom = new DOM;
|
||||
$dom->loadStr($res);
|
||||
|
||||
$pageNumber = null;
|
||||
|
||||
// extract page number (if any) from this page's URL
|
||||
$matches = array(null, null);
|
||||
if ($url !== Config::baseURL && 0 === preg_match('/comic(\d+)\.php$/', $url, $matches)) {
|
||||
throw new Exception("failed to find a page number in $url");
|
||||
if ($url === Config::baseURL) {
|
||||
Log::debug("$url is the root; will patch up page number later");
|
||||
} else {
|
||||
$matches = array();
|
||||
|
||||
if (preg_match('/comic(\d+)\.php$/', $url, $matches)) {
|
||||
$pageNumber = $matches[1];
|
||||
} else {
|
||||
Log::debug("failed to find a page number in $url, trying special cases");
|
||||
|
||||
if (preg_match('/arc(\d+)\-(front|back)/', $url, $matches)) {
|
||||
$pageNumber = "Arc " . $matches[1] . ": " . ucfirst($matches[2]) . " Cover";
|
||||
}
|
||||
elseif (preg_match('/cover13/', $url)) {
|
||||
$pageNumber = 'Arc 13: Front Cover';
|
||||
}
|
||||
elseif (preg_match('/back12/', $url)) {
|
||||
$pageNumber = 'Arc 12: Back Cover';
|
||||
}
|
||||
elseif (preg_match('/arc12splash/', $url)) {
|
||||
$pageNumber = 'Arc 12: Front Cover';
|
||||
}
|
||||
elseif (preg_match('/arc11/', $url)) {
|
||||
$pageNumber = 'Arc 11: Front Cover';
|
||||
}
|
||||
elseif (preg_match('/back10/', $url)) {
|
||||
$pageNumber = 'Arc 10: Back Cover';
|
||||
}
|
||||
elseif (preg_match('/splash10/', $url)) {
|
||||
$pageNumber = 'Arc 10: Cover';
|
||||
}
|
||||
elseif (preg_match('/arc9/', $url)) {
|
||||
$pageNumber = 'Arc 9: Cover';
|
||||
}
|
||||
elseif (preg_match('/arc8/', $url)) {
|
||||
$pageNumber = 'Arc 8: Cover';
|
||||
};
|
||||
};
|
||||
|
||||
if ($pageNumber === null) {
|
||||
throw new Exception("failed to find a page number in $url");
|
||||
};
|
||||
};
|
||||
|
||||
array_push($content, [
|
||||
'url' => $url,
|
||||
'imageUrl' => Config::baseURL . '/' . $dom->find('div.page img')[0]->src,
|
||||
'body' => $dom,
|
||||
'pageNumber' => $matches[1]
|
||||
'pageNumber' => $pageNumber
|
||||
]);
|
||||
|
||||
// extract next page URL from this page's content
|
||||
|
@ -83,8 +124,16 @@ function fetchPages() {
|
|||
usleep(500000); // wait half a second, to be minimally polite
|
||||
};
|
||||
|
||||
// patch up the page number on the title page
|
||||
$content[0]['pageNumber'] = $content[1]['pageNumber'] + 1;
|
||||
// patch up the page number on the title page by adding 1 to the
|
||||
// first numeric page number we find, starting from the newest
|
||||
// non-root page
|
||||
foreach (array_slice($content, 1) as $page) {
|
||||
if (preg_match('/^\d+$/', $page['pageNumber'])) {
|
||||
$content[0]['pageNumber'] = $page['pageNumber'] + 1;
|
||||
Log::info("patched up root page number to " . $content[0]['pageNumber']);
|
||||
break;
|
||||
};
|
||||
};
|
||||
|
||||
return $content;
|
||||
}
|
|
@ -24,5 +24,9 @@ class Log {
|
|||
public static function info($message) {
|
||||
Log::emit('info', $message);
|
||||
}
|
||||
|
||||
public static function warn($message) {
|
||||
Log::emit('warn', $message);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue