79 lines
2.2 KiB
Python
79 lines
2.2 KiB
Python
import os
|
|
import re
|
|
import subprocess
|
|
from subprocess import PIPE
|
|
import urllib.parse
|
|
import lxml.html as html
|
|
from selenium import webdriver
|
|
from selenium.webdriver.firefox.options import Options
|
|
|
|
|
|
|
|
class Spider():
|
|
sel = None
|
|
|
|
def start_selenium(self):
|
|
options = Options()
|
|
options.add_argument("--headless")
|
|
self.sel = webdriver.Firefox(options=options)
|
|
return self.sel
|
|
|
|
def get(self, url):
|
|
if None == self.sel:
|
|
self.start_selenium()
|
|
self.sel.get(url)
|
|
return self.sel.page_source
|
|
|
|
##################################################
|
|
|
|
class PListSpider(Spider):
|
|
"""This spider crawls https://proxy-list.download/SOCKS5"""
|
|
|
|
url = "https://proxy-list.download/SOCKS5"
|
|
|
|
def get_proxies(self):
|
|
"""returns lit of (ip, port) tuples"""
|
|
page = self.get(self.url)
|
|
tree = html.fromstring(page)
|
|
proxies = []
|
|
for entry in tree.xpath('//tr'):
|
|
for e in entry.xpath('./td/text()'):
|
|
m = re.search("[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}", e)
|
|
if m != None:
|
|
ip = m.group(0)
|
|
continue
|
|
m = re.search("[0-9]{1,5}", e)
|
|
if m != None:
|
|
port = m.group(0)
|
|
proxies.append((ip, port))
|
|
break
|
|
return proxies
|
|
|
|
##################################################
|
|
|
|
def main():
|
|
spider = PListSpider()
|
|
proxies = spider.get_proxies()
|
|
|
|
fd = open("proxychains.conf", "r")
|
|
conf = fd.read()
|
|
fd.close()
|
|
|
|
for proxy in proxies:
|
|
fd = open("tmp.conf", "w+")
|
|
fd.write(conf+"\nsocks5 {ip} {port}".format(ip=proxy[0], port=proxy[1]))
|
|
fd.close()
|
|
cmd = "proxychains -f tmp.conf curl --url https://check.torproject.org"
|
|
print (cmd)
|
|
cmdOut = subprocess.Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE).communicate()
|
|
print (cmdOut[1])
|
|
cmdOut = cmdOut[1]
|
|
os.remove("tmp.conf")
|
|
if not b"timeout!" in cmdOut:
|
|
pListFd = open("pList.txt", "a+")
|
|
pListFd.write(proxy[0]+","+proxy[1]+"\n")
|
|
pListFd.close()
|
|
|
|
if "__main__" == __name__:
|
|
main()
|