mirror of https://git.lolcat.ca/lolcat/4get.git
Compare commits
3 Commits
c4c008c192
...
faece63356
Author | SHA1 | Date |
---|---|---|
lolcat | faece63356 | |
lolcat | 697ef3733d | |
lolcat | 635b2386d4 |
|
@ -16,9 +16,9 @@ $frontend = new frontend();
|
||||||
/*
|
/*
|
||||||
Captcha
|
Captcha
|
||||||
*/
|
*/
|
||||||
include "lib/captcha_gen.php";
|
include "lib/bot_protection.php";
|
||||||
$null = null;
|
$null = null;
|
||||||
new captcha($null, $null, $null, "images", false);
|
new bot_protection($null, $null, $null, "images", false);
|
||||||
|
|
||||||
[$scraper, $filters] = $frontend->getscraperfilters(
|
[$scraper, $filters] = $frontend->getscraperfilters(
|
||||||
"images",
|
"images",
|
||||||
|
|
|
@ -16,9 +16,9 @@ $frontend = new frontend();
|
||||||
/*
|
/*
|
||||||
Captcha
|
Captcha
|
||||||
*/
|
*/
|
||||||
include "lib/captcha_gen.php";
|
include "lib/bot_protection.php";
|
||||||
$null = null;
|
$null = null;
|
||||||
new captcha($null, $null, $null, "music", false);
|
new bot_protection($null, $null, $null, "music", false);
|
||||||
|
|
||||||
[$scraper, $filters] = $frontend->getscraperfilters(
|
[$scraper, $filters] = $frontend->getscraperfilters(
|
||||||
"music",
|
"music",
|
||||||
|
|
|
@ -16,9 +16,9 @@ $frontend = new frontend();
|
||||||
/*
|
/*
|
||||||
Captcha
|
Captcha
|
||||||
*/
|
*/
|
||||||
include "lib/captcha_gen.php";
|
include "lib/bot_protection.php";
|
||||||
$null = null;
|
$null = null;
|
||||||
new captcha($null, $null, $null, "news", false);
|
new bot_protection($null, $null, $null, "news", false);
|
||||||
|
|
||||||
[$scraper, $filters] = $frontend->getscraperfilters(
|
[$scraper, $filters] = $frontend->getscraperfilters(
|
||||||
"news",
|
"news",
|
||||||
|
|
|
@ -16,9 +16,9 @@ $frontend = new frontend();
|
||||||
/*
|
/*
|
||||||
Captcha
|
Captcha
|
||||||
*/
|
*/
|
||||||
include "lib/captcha_gen.php";
|
include "lib/bot_protection.php";
|
||||||
$null = null;
|
$null = null;
|
||||||
new captcha($null, $null, $null, "videos", false);
|
new bot_protection($null, $null, $null, "videos", false);
|
||||||
|
|
||||||
[$scraper, $filters] = $frontend->getscraperfilters(
|
[$scraper, $filters] = $frontend->getscraperfilters(
|
||||||
"videos",
|
"videos",
|
||||||
|
|
|
@ -16,9 +16,9 @@ $frontend = new frontend();
|
||||||
/*
|
/*
|
||||||
Captcha
|
Captcha
|
||||||
*/
|
*/
|
||||||
include "lib/captcha_gen.php";
|
include "lib/bot_protection.php";
|
||||||
$null = null;
|
$null = null;
|
||||||
new captcha($null, $null, $null, "web", false);
|
new bot_protection($null, $null, $null, "web", false);
|
||||||
|
|
||||||
[$scraper, $filters] = $frontend->getscraperfilters(
|
[$scraper, $filters] = $frontend->getscraperfilters(
|
||||||
"web",
|
"web",
|
||||||
|
|
|
@ -83,7 +83,7 @@ class config{
|
||||||
|
|
||||||
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
|
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
|
||||||
// Changing this might break things.
|
// Changing this might break things.
|
||||||
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0";
|
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0";
|
||||||
|
|
||||||
// Proxy pool assignments for each scraper
|
// Proxy pool assignments for each scraper
|
||||||
// false = Use server's raw IP
|
// false = Use server's raw IP
|
||||||
|
@ -118,8 +118,8 @@ class config{
|
||||||
// SOUNDCLOUD
|
// SOUNDCLOUD
|
||||||
// Get these parameters by making a search on soundcloud with network
|
// Get these parameters by making a search on soundcloud with network
|
||||||
// tab open, then filter URLs using "search?q=". (No need to login)
|
// tab open, then filter URLs using "search?q=". (No need to login)
|
||||||
const SC_USER_ID = "59333-426459-717969-168008";
|
const SC_USER_ID = "447501-577662-794348-352629";
|
||||||
const SC_CLIENT_TOKEN = "8BBZpqUP1KSN4W6YB64xog2PX4Dw98b1";
|
const SC_CLIENT_TOKEN = "VNc62l3wxDWS0Ol62j5UYNc1gsZ3UXPv";
|
||||||
|
|
||||||
// MARGINALIA
|
// MARGINALIA
|
||||||
// Get an API key by contacting the Marginalia.nu maintainer. The "public" key
|
// Get an API key by contacting the Marginalia.nu maintainer. The "public" key
|
||||||
|
|
|
@ -522,6 +522,7 @@ class google{
|
||||||
"Accept-Language: en-US,en;q=0.5",
|
"Accept-Language: en-US,en;q=0.5",
|
||||||
"Accept-Encoding: gzip",
|
"Accept-Encoding: gzip",
|
||||||
"DNT: 1",
|
"DNT: 1",
|
||||||
|
"Cookie: SOCS=CAESNQgCEitib3FfaWRlbnRpdHlmcm9udGVuZHVpc2VydmVyXzIwMjQwMzE3LjA4X3AwGgJlbiAEGgYIgM7orwY",
|
||||||
"Connection: keep-alive",
|
"Connection: keep-alive",
|
||||||
"Upgrade-Insecure-Requests: 1",
|
"Upgrade-Insecure-Requests: 1",
|
||||||
"Sec-Fetch-Dest: document",
|
"Sec-Fetch-Dest: document",
|
||||||
|
@ -977,9 +978,9 @@ class google{
|
||||||
"related" => []
|
"related" => []
|
||||||
];
|
];
|
||||||
|
|
||||||
if($this->detect_sorry($html)){
|
if($error = $this->detect_sorry($html)){
|
||||||
|
|
||||||
throw new Exception("Google blocked this 4get instance. Please set up a proxy!");
|
throw new Exception($error);
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->parsejavascript($html);
|
$this->parsejavascript($html);
|
||||||
|
@ -2813,9 +2814,9 @@ class google{
|
||||||
throw new Exception("Failed to get search page");
|
throw new Exception("Failed to get search page");
|
||||||
}
|
}
|
||||||
|
|
||||||
if($this->detect_sorry($html)){
|
if($error = $this->detect_sorry($html)){
|
||||||
|
|
||||||
throw new Exception("Google blocked this 4get instance. Please set up a proxy!");
|
throw new Exception($error);
|
||||||
}
|
}
|
||||||
|
|
||||||
$out = [
|
$out = [
|
||||||
|
@ -3649,7 +3650,30 @@ class google{
|
||||||
$detect_sorry[0]["innerHTML"] == "302 Moved"
|
$detect_sorry[0]["innerHTML"] == "302 Moved"
|
||||||
){
|
){
|
||||||
|
|
||||||
return true;
|
// may be consent.google.com in europe or /sorry captcha page
|
||||||
|
$url =
|
||||||
|
$this->fuckhtml
|
||||||
|
->getElementsByTagName("a");
|
||||||
|
|
||||||
|
if(
|
||||||
|
strpos(
|
||||||
|
parse_url(
|
||||||
|
$this->fuckhtml
|
||||||
|
->getTextContent(
|
||||||
|
$url[0]["attributes"]["href"]
|
||||||
|
),
|
||||||
|
PHP_URL_PATH
|
||||||
|
),
|
||||||
|
"/sorry"
|
||||||
|
) === 0
|
||||||
|
){
|
||||||
|
|
||||||
|
// found /sorry
|
||||||
|
return "Google blocked this 4get instance. Please setup a proxy!";
|
||||||
|
}
|
||||||
|
|
||||||
|
// found consent.google, should not happen anymore
|
||||||
|
return "Google served a GPDR consent form. This should not happen, please report if you encounter this message";
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
|
Loading…
Reference in New Issue