Compare commits

...

10 Commits

Author SHA1 Message Date
Em b4dc2c9196 add debug log for results and add new proxy types 2021-07-04 18:54:46 -04:00
Em c2ab57540f add dronebl ticket checker 2021-07-04 18:39:59 -04:00
Em 8c83925ff0 oops 2021-06-26 15:50:33 -04:00
Em 3cb8e2d114 add dronebl submitter 2021-06-26 12:31:53 -04:00
Em cd9a3ecd05 don't import unnecessary things 2021-06-26 12:31:32 -04:00
Em fae7232ed2 add todo list to readme 2021-06-26 09:08:38 -04:00
Em 15a3f82ba7 Forgot to change a variable 2021-06-26 09:04:14 -04:00
Em 5af158cb7d actually import sys 2021-06-26 09:03:50 -04:00
Em 968021cb19 Add first attempt at http/socks checking 2021-06-25 22:00:10 -04:00
Em f6f5ad8a5e Make sure we've got an actual internet connection before starting 2021-06-25 21:59:53 -04:00
10 changed files with 389 additions and 3 deletions

3
.gitignore vendored
View File

@ -1,3 +1,4 @@
.env/
env/
config.yaml
config.yaml
*.log

View File

@ -4,6 +4,8 @@ A Python webapp that "herds" a list of open proxies, with an API for external pr
`scripts/` contains some example scripts that might be useful.
There's a [todo list](https://todo.sr.ht/~emerson/openproxyherder) for issue and feature tracking.
## Setup openproxyherder
This is tested with Python 3.8 and 3.9, but should work with Python 3.5 and above.

View File

@ -1,6 +1,4 @@
import asyncio
import asyncpg
import uvloop
import yaml
from datetime import datetime, timedelta
from ipaddress import ip_address

View File

@ -6,6 +6,8 @@ proxy_threads: 10
max_num_failures: 24
timedelta: 86400
user_agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:90.0) Gecko/20100101 Firefox/90.0"
# Feel free to actually use this if you want
check_proxies_url: "http://ophcheck.nodv.in/check.txt"
proxy_string: "asdjflasjdfkasidjf"

View File

@ -0,0 +1,94 @@
import pycurl
import requests
import sys
import yaml
from concurrent.futures import ThreadPoolExecutor, as_completed
from io import BytesIO
config = yaml.full_load(open("config.yaml").read())
def check_connection(ip, port, proxy_type):
if proxy_type == "httpsocks":
current_proxy_type = "http"
else:
current_proxy_type = proxy_type
b = BytesIO()
c = pycurl.Curl()
c.setopt(c.URL, config['check_proxies_url'])
c.setopt(c.PROXY, f"{current_proxy_type}://{ip}:{port}")
c.setopt(c.WRITEDATA, b)
c.setopt(c.CONNECTTIMEOUT, config["proxy_timeout"])
c.setopt(c.TIMEOUT, config["proxy_timeout"] * 2)
c.setopt(c.SSL_VERIFYPEER, 0)
c.setopt(c.SSL_VERIFYHOST, 0)
c.setopt(c.USERAGENT, config["user_agent"])
try:
c.perform()
c.close()
except pycurl.error as e:
if e.args[0] == 28:
return ("inactive", "unknown", proxy_type)
if e.args[0] == 7 and "Connection refused" in e.args[1]:
return ("inactive", "unknown", proxy_type)
if "Proxy CONNECT aborted" in e.args[1]:
return check_connection(ip, port, "socks5")
if e.args[0] == 7 and "SOCKS4" in e.args[1]:
return check_connection(ip, port, "socks5")
if e.args[0] == 56 and "Connection reset" in e.args[1]:
return check_connection(ip, port, "socks4")
if e.args[0] == 52:
return check_connection(ip, port, "socks4")
if e.args[0] == 35:
return check_connection(ip, port, "http")
else:
return ("inactive", "unknown", proxy_type)
except Exception as e:
print(e)
return ("inactive", "unknown", proxy_type)
else:
proxy_return = b.getvalue().decode(errors="ignore").split(" ", maxsplit=1)
if len(proxy_return) == 2 and proxy_return[1].rstrip('\n') == config["proxy_string"]:
print(f"{current_proxy_type}://{ip}:{port} {proxy_return}")
return ("active", proxy_return[0], current_proxy_type)
else:
return ("inactive", "unknown", proxy_type)
def check_proxy(ip, port, exit_ip, proxy_type, comment, p_id):
base_results = {
"id": p_id,
"status": "inactive",
"entry_ip": ip,
"exit_ip": exit_ip,
"proxy_type": proxy_type,
"port": port,
"comment": comment,
}
proxy_status, proxy_exit, found_proxy_type = check_connection(ip, port, proxy_type)
if proxy_status == "active":
base_results["status"] = "active"
base_results["exit_ip"] = proxy_exit
base_results["proxy_type"] = found_proxy_type
return base_results
# Make sure we've got an internet connection first
test = requests.get(config['check_proxies_url'])
test_split = test.text.split(" ", maxsplit=1)
if len(test_split) != 2 or test_split[1].rstrip('\n') != config["proxy_string"]:
sys.exit(1)
proxy_results = []
amount = config["proxy_amount"]
r = requests.get(f"{config['get_proxies_url']}?amount={amount}&proxy_type=http&proxy_type=socks5&proxy_type=socks4&proxy_type=https&proxy_type=httpsocks&timedelta={config['timedelta']}&max_num_failures={config['max_num_failures']}")
current_proxies = r.json()
all_threads = {}
with ThreadPoolExecutor(max_workers=config["proxy_threads"]) as executor:
for proxy in current_proxies["proxies"]:
all_threads[executor.submit(check_proxy, proxy["ip"], proxy["port"], proxy["exit_ip"], proxy["proxy_type"], proxy["comment"], proxy["id"])] = proxy["id"]
for future in as_completed(all_threads):
proxy_results.append(future.result(timeout=30))
data = {"proxies": proxy_results}
submit = requests.post(f"{config['update_proxies_url']}", json=data)

View File

@ -1,4 +1,5 @@
import requests
import sys
import yaml
from concurrent.futures import ThreadPoolExecutor, as_completed
@ -23,6 +24,14 @@ def check_proxy(ip, port, exit_ip, proxy_type, comment, p_id):
finally:
return base_results
# Make sure we've got an internet connection first
test = requests.get(config['check_proxies_url'])
test_split = test.text.split(" ", maxsplit=1)
if len(test_split) != 2 or test_split[1].rstrip('\n') != config["proxy_string"]:
print(test_split)
print("Aborting, not connected to internet")
sys.exit(1)
proxy_results = []
amount = config["proxy_amount"]
r = requests.get(f"{config['get_proxies_url']}?amount={amount}&proxy_type=vpngate&timedelta={config['timedelta']}&max_num_failures={config['max_num_failures']}")

View File

@ -0,0 +1,7 @@
# DroneBL stuff
A script to submit IPs to DroneBL. Obviously, you'll need a DroneBL key.
You'll also need another table to store the previous lookups:
`create table dronebl (id serial primary key, ip inet);`

View File

@ -0,0 +1,42 @@
dbname: openproxyherder
dbuser: openproxyherder
dbpassword: openproxyherder
dbhost: "127.0.0.1"
dronebl_key: testing
dronebl_account_name: testing
dronebl_account_password: testing
user_agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:90.0) Gecko/20100101 Firefox/90.0"
email_host: mail.google.com
# Needs to be STARTTLS
email_port: 587
email_username: "hi@gmail.com"
email_password: "hi"
email_domain: "gmail.com"
email_from: "hi@gmail.com"
# Optional
#email_bcc: "example@example.com"
email_inactive: |
Hello,
The DroneBL entry for {} has been removed.
If there are no other listings for this IP, it will be removed from the
blacklist in the next 30 minutes.
Note that this is an automated email, if you have a follow-up question
please reply to this email.
For reference, here is the comment you submitted:
{}
email_active: |
Hello,
The DroneBL removal request for {} has been denied. Details below:
{}
Please fix this open proxy and then request removal again.
Note that this is an automated email, if you have a follow-up question
please reply to this email.
For reference, here is the comment you submitted:
{}

View File

@ -0,0 +1,134 @@
import email
import logging
import psycopg2
import requests
import smtplib
import time
import yaml
from bs4 import BeautifulSoup
from datetime import datetime
config = yaml.full_load(open("config.yaml"))
pgconn = psycopg2.connect(dbname=config["dbname"], user=config["dbuser"], password=config["dbpassword"], host=config["dbhost"])
key = config["dronebl_key"]
entries_to_remove = []
smtp_connection = smtplib.SMTP(config["email_host"], config["email_port"])
smtp_connection.starttls()
smtp_connection.login(config["email_username"], config["email_password"])
debug_log = logging.getLogger("dronebl_check_tickets")
debug_log.setLevel(logging.DEBUG)
debug_log_file_handler = logging.FileHandler("dronebl_check_tickets_debug.log")
debug_log_file_handler.setLevel(logging.DEBUG)
debug_log.addHandler(debug_log_file_handler)
debug_log.debug(f"Current run at {datetime.now()}")
s = requests.Session()
headers = {"User-Agent": config["user_agent"]}
def send_active_email(ticket, details, comment):
msg = email.message.EmailMessage()
msg["From"] = config["email_from"]
msg["Date"] = email.utils.localtime()
if config.get("email_bcc"):
msg["Bcc"] = config["email_bcc"]
msg["Message-ID"] = email.utils.make_msgid(domain=config["email_domain"])
msg["To"] = ticket["email"]
msg["Subject"] = f"DroneBL removal request for {ticket['ip']} denied"
msg.set_content(config["email_active"].format(ticket["ip"], details, comment))
smtp_connection.send_message(msg)
def send_inactive_email(ticket, comment):
msg = email.message.EmailMessage()
msg["From"] = config["email_from"]
msg["Date"] = email.utils.localtime()
if config.get("email_bcc"):
msg["Bcc"] = config["email_bcc"]
msg["Message-ID"] = email.utils.make_msgid(domain=config["email_domain"])
msg["To"] = ticket["email"]
msg["Subject"] = f"DroneBL entry for {ticket['ip']} removed"
msg.set_content(config["email_inactive"].format(ticket["ip"], comment))
smtp_connection.send_message(msg)
def remove_dronebl_entries(entries):
request = "\n".join(entries)
request_string = f'<?xml version="1.0"?>\n<request key="{key}">\n{request}\n</request>'
headers = {
"Content-Type": "text/xml",
"Content-Length": str(len(request_string)),
}
debug_log.debug(request_string)
r = requests.post("https://mirror1.dronebl.org/RPC2", data=request_string, headers=headers)
debug_log.debug(r.text)
def get_ticket_data():
login_form = {"ref": "https://mirror1.dronebl.org/admin/tickets", "account": config["dronebl_account_name"], "password": config["dronebl_account_password"]}
r = s.post("https://mirror1.dronebl.org/admin/login", headers=headers, data=login_form)
html = BeautifulSoup(r.text, "html5lib")
tickets = html.select(".listing2 > tbody > tr")
ticket_array = []
for ticket in tickets:
ticket_data = ticket.find_all("td")
ticket_array.append({
"ticket_id": ticket_data[0].text,
"incident_id": ticket_data[1].text,
"ip": ticket_data[2].text,
"proxy_type": ticket_data[3].text,
"port": ticket_data[4].text,
"date_submitted": ticket_data[5].text,
"name": ticket_data[6].text,
"email": ticket_data[7].text,
})
return ticket_array
with pgconn:
with pgconn.cursor() as curs:
for ticket in get_ticket_data():
ip = ticket["ip"]
curs.execute("select id,ip,port,exit_ip,status,proxy_type,last_seen,recheck from proxies where ip=%s or exit_ip=%s", (ip, ip))
should_remove = True
results = curs.fetchall()
comment = s.post("https://mirror1.dronebl.org/admin/tickets", headers=headers, data={"ajax": "comments", "id": ticket["ticket_id"]})
debug_log.debug(comment.text)
already_sent = False
for entry in results:
debug_log.debug(results)
if entry[4] != "inactive":
should_remove = False
if entry[4] == "active":
if entry[7]:
if entry[1] == entry[3] or entry[1] == ip:
if entry[5] in ["http", "socks5", "socks4", "httpsocks", "https"]:
details = f"{entry[1]} has an open {entry[5]} proxy on port {entry[2]}, confirmed active at {entry[6]}"
if entry[5] == "vpngate":
details = f"{entry[1]} is running an open VPNGate proxy on port {entry[2]}, confirmed active at {entry[6]}. These are heavily abused by spammers, therefore they are blocked."
else:
details = f"Confirmed open {entry[5]} proxy with an entry IP of {entry[1]}, port {entry[2]} and exit IP of {entry[3]}, confirmed active at {entry[6]}"
debug_log.debug(details)
s.post("https://mirror1.dronebl.org/admin/tickets", headers=headers, data={"ajax": "delete", "id": ticket["ticket_id"]})
if not already_sent:
send_active_email(ticket, details, comment.text)
debug_log.debug(f"Sent email: {details}")
already_sent = True
curs.execute("update proxies set recheck=false where id=%s", (entry[0],))
else:
curs.execute("update proxies set status='unscanned', recheck=true where id=%s", (entry[0],))
debug_log.debug(f"IP: {ip} is active, rechecking")
if entry[4] in ["scanning", "unscanned"]:
debug_log.debug(f"IP: {ip} is {entry[4]}, skipping")
if should_remove:
if not results:
debug_log.debug(f"IP: {ip} is not in database")
else:
debug_log.debug(f"IP: {ip} has all inactive proxies")
incident_id = ticket["incident_id"]
entries_to_remove.append(f'<remove id="{incident_id}" />')
curs.execute("update proxies set recheck=false where ip=%s or exit_ip=%s", (ip, ip))
curs.execute("delete from dronebl where ip=%s", (ip,))
pgconn.commit()
send_inactive_email(ticket, comment.text)
time.sleep(5)
smtp_connection.quit()
if entries_to_remove:
remove_dronebl_entries(entries_to_remove)

View File

@ -0,0 +1,97 @@
import yaml
import psycopg2
import psycopg2.extras
import requests
import sys
from xml.etree import ElementTree
class DroneBL(object):
def __init__(self):
self.config = yaml.full_load(open("config.yaml"))
self.conn = psycopg2.connect(dbname=self.config["dbname"], user=self.config["dbuser"], password=self.config["dbpassword"], host=self.config["dbhost"])
def make_request(self, text):
key = self.config["dronebl_key"]
request_string = f'<?xml version="1.0"?>\n<request key="{key}">\n{text}\n</request>'
headers = {
"Content-Type": "text/xml",
"Content-Length": str(len(request_string)),
}
try:
r = requests.post("https://mirror1.dronebl.org/RPC2", data=request_string, headers=headers)
print(r.text)
except requests.exceptions.SSLError as e:
print(str(e))
sys.exit(1)
return r
def sync_db(self):
with self.conn:
with self.conn.cursor() as curs:
curs.execute(f"""select ip, exit_ip, proxy_type from proxies
where ip not in (select ip from dronebl) and
exit_ip not in (select ip from dronebl) and
status='active' limit 200""")
new_proxies = curs.fetchall()
proxies_to_test = []
for data in new_proxies:
ip = data[0]
exit_ip = data[1]
proxy_type = data[2]
if proxy_type == "http":
dbl_type = 9
elif proxy_type == "vpngate":
dbl_type = 19
elif proxy_type == "socks4" or proxy_type == "socks5":
dbl_type = 8
else:
continue
proxies_to_test.append((ip, dbl_type, "listed"))
if ip != exit_ip:
proxies_to_test.append((exit_ip, dbl_type, "listed"))
ip_addresses = []
while len(proxies_to_test) > 0:
xml_add_tags = []
xml_lookup_tags = []
if len(proxies_to_test) > 40:
proxies_to_check = proxies_to_test[:40]
proxies_to_test = proxies_to_test[40:]
else:
proxies_to_check = proxies_to_test
proxies_to_test = []
for proxy in proxies_to_check:
ip = proxy[0]
if ip not in ip_addresses:
ip_addresses.append(ip)
xml_lookup_tags.append(f"<lookup ip='{ip}' listed='1' />")
lookup_request = self.make_request("\n".join(xml_lookup_tags))
ip_address_tuple = ((ip,) for ip in ip_addresses)
insert_query = "insert into dronebl (ip) values %s"
psycopg2.extras.execute_values(curs, insert_query, ip_address_tuple)
self.conn.commit()
try:
tree = ElementTree.fromstring(lookup_request.text)
except:
tree = []
for child in tree:
if child.tag != "result":
continue
if child.attrib["listed"] == "1":
ip = child.attrib["ip"]
if ip in ip_addresses:
ip_addresses.remove(ip)
for proxy in proxies_to_check:
if proxy[0] in ip_addresses:
xml_add_tags.append(f"<add ip='{proxy[0]}' type='{proxy[1]}' />")
self.make_request("\n".join(xml_add_tags))
if __name__ == "__main__":
DroneBL().sync_db()