Compare commits

...

4 Commits

Author SHA1 Message Date
Em 932abec381 logging for vpngate scripts 2021-06-23 17:56:53 -04:00
Em 81301c57c4 add sleep time to regex-scraper 2021-06-23 15:49:56 -04:00
Em ca8a0f2a06 add vpngate checker 2021-06-23 15:01:01 -04:00
Em 485ea881bc get first value in the args list 2021-06-23 15:00:53 -04:00
7 changed files with 64 additions and 8 deletions

View File

@ -53,14 +53,12 @@ async def add(request):
@app.route("/api/v1/getproxies")
async def getproxies(request):
proxy_types = dict(request.args).get("proxy_type")
proxy_types = request.args["proxy_type"]
if not proxy_types:
return response.text("No proxy_type specified", status=400)
if isinstance(proxy_types, str):
proxy_types = [proxy_types]
amount = int(request.args.get("amount", config["proxy_amount"]))
tdelta = request.args.get("timedelta", config["timedelta"])
max_num_failures = request.args.get("max_failures", config["max_num_failures"])
amount = int(request.args.get("amount"))
tdelta = int(request.args.get("timedelta"))
max_num_failures = int(request.args.get("max_num_failures"))
select_query = """
select id, host(ip) as ip, port, host(exit_ip) as exit_ip, proxy_type, comment from proxies

View File

@ -0,0 +1,11 @@
get_proxies_url: "http://127.0.0.1:8080/api/v1/getproxies"
update_proxies_url: "http://127.0.0.1:8080/api/v1/updateproxies"
proxy_timeout: 30
proxy_amount: 100
proxy_threads: 10
max_num_failures: 24
timedelta: 86400
# Feel free to actually use this if you want
check_proxies_url: "http://ophcheck.nodv.in/check.txt"
proxy_string: "asdjflasjdfkasidjf"

View File

@ -0,0 +1,38 @@
import requests
import yaml
from concurrent.futures import ThreadPoolExecutor, as_completed
config = yaml.full_load(open("config.yaml").read())
def check_proxy(ip, port, exit_ip, proxy_type, comment, p_id):
base_results = {
"id": p_id,
"status": "inactive",
"entry_ip": ip,
"exit_ip": exit_ip,
"proxy_type": proxy_type,
"port": port,
"comment": comment,
}
try:
requests.get(f"https://{ip}:{port}", timeout=config["proxy_timeout"])
except requests.exceptions.SSLError as e:
if "opengw.net" in str(e):
base_results["status"] = "active"
finally:
return base_results
proxy_results = []
amount = config["proxy_amount"]
r = requests.get(f"{config['get_proxies_url']}?amount={amount}&proxy_type=vpngate&timedelta={config['timedelta']}&max_num_failures={config['max_num_failures']}")
current_proxies = r.json()
all_threads = {}
with ThreadPoolExecutor(max_workers=config["proxy_threads"]) as executor:
for proxy in current_proxies["proxies"]:
all_threads[executor.submit(check_proxy, proxy["ip"], proxy["port"], proxy["exit_ip"], proxy["proxy_type"], proxy["comment"], proxy["id"])] = proxy["id"]
for future in as_completed(all_threads):
proxy_results.append(future.result(timeout=30))
data = {"proxies": proxy_results}
submit = requests.post(config["update_proxies_url"], json=data)

View File

@ -6,6 +6,9 @@ dbhost: "127.0.0.1"
# Fake UA for sites that actually check it
user_agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:90.0) Gecko/20100101 Firefox/90.0"
# Seconds to pause between requests (avoid triggering some WAFs)
pause_time: 30
# for regex_scraper.py
regex_sites:
# this key will go into the comment fields

View File

@ -1,6 +1,7 @@
import requests
import yaml
import re
import time
from datetime import datetime
import psycopg2
import psycopg2.extras
@ -41,6 +42,7 @@ with conn:
with conn.cursor() as curs:
for site, data in config["regex_sites"].items():
check_site(site, data)
time.sleep(config["pause_time"])
scraped_ips = []
[scraped_ips.append(p[0]) for p in scraped_proxies]
curs.execute("select ip, port from proxies where host(ip)=ANY(%s)", (scraped_ips,))

View File

@ -36,3 +36,5 @@ with pgconn:
insert into proxies (ip, port, exit_ip, proxy_type, submitted_at, comment)
values %s"""
psycopg2.extras.execute_values(curs, insert_query, scraped_servers)
print(f'added {len(scraped_servers)} to db')

View File

@ -16,7 +16,7 @@ with pgconn:
current_servers = curs.fetchall()
proxies = []
for i in range(1,3):
for i in range(1,10):
r = requests.get(f"https://freevpn.gg/?p={i}", headers={"User-Agent": config["user_agent"]})
data = BeautifulSoup(r.text, features="html5lib")
for ip_data in data.find_all(class_="card-box"):
@ -31,4 +31,6 @@ with pgconn:
insert_query = """
insert into proxies (ip, port, exit_ip, status, proxy_type, submitted_at, comment)
values %s"""
psycopg2.extras.execute_values(curs, insert_query, proxies)
psycopg2.extras.execute_values(curs, insert_query, proxies)
print(f'added {len(proxies)} to db')