forked from solderpunk/AV-98
netcache now works for gemini
This commit is contained in:
parent
a7c8ed33d5
commit
b745b04f7a
249
netcache.py
249
netcache.py
|
@ -1,8 +1,11 @@
|
||||||
#!/bin/python
|
#!/bin/python
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import argparse
|
import argparse
|
||||||
import requests
|
import requests
|
||||||
|
import codecs
|
||||||
|
import getpass
|
||||||
import socket
|
import socket
|
||||||
import ssl
|
import ssl
|
||||||
from ssl import CertificateError
|
from ssl import CertificateError
|
||||||
|
@ -34,6 +37,10 @@ standard_ports = {
|
||||||
}
|
}
|
||||||
default_protocol = "gemini"
|
default_protocol = "gemini"
|
||||||
|
|
||||||
|
CRLF = '\r\n'
|
||||||
|
DEFAULT_TIMEOUT = 10
|
||||||
|
_MAX_REDIRECTS = 5
|
||||||
|
|
||||||
def parse_mime(mime):
|
def parse_mime(mime):
|
||||||
options = {}
|
options = {}
|
||||||
if mime:
|
if mime:
|
||||||
|
@ -343,7 +350,7 @@ def _fetch_gopher(url,timeout=10):
|
||||||
else:
|
else:
|
||||||
# by default, we should consider Gopher
|
# by default, we should consider Gopher
|
||||||
mime = "text/gopher"
|
mime = "text/gopher"
|
||||||
cache = write_body(response,mime)
|
cache = write_body(url,response,mime)
|
||||||
return cache
|
return cache
|
||||||
|
|
||||||
def _fetch_finger(url,timeout=10):
|
def _fetch_finger(url,timeout=10):
|
||||||
|
@ -384,7 +391,7 @@ def _fetch_spartan(url):
|
||||||
body = fp.read()
|
body = fp.read()
|
||||||
if meta.startswith("text"):
|
if meta.startswith("text"):
|
||||||
body = body.decode("UTF-8")
|
body = body.decode("UTF-8")
|
||||||
cache = write_body(body,meta)
|
cache = write_body(url,body,meta)
|
||||||
elif code == 3:
|
elif code == 3:
|
||||||
redirect_url = url_parts._replace(path=meta).geturl()
|
redirect_url = url_parts._replace(path=meta).geturl()
|
||||||
else:
|
else:
|
||||||
|
@ -395,7 +402,7 @@ def _fetch_spartan(url):
|
||||||
cache = _fetch_spartan(redirect_url)
|
cache = _fetch_spartan(redirect_url)
|
||||||
return cache
|
return cache
|
||||||
|
|
||||||
def _fetch_gemini(url):
|
def _fetch_gemini(url,options={}):
|
||||||
cache = None
|
cache = None
|
||||||
url_parts = urllib.parse.urlparse(url)
|
url_parts = urllib.parse.urlparse(url)
|
||||||
host = url_parts.hostname
|
host = url_parts.hostname
|
||||||
|
@ -404,34 +411,35 @@ def _fetch_gemini(url):
|
||||||
query = url_parts.query
|
query = url_parts.query
|
||||||
# Be careful with client certificates!
|
# Be careful with client certificates!
|
||||||
# Are we crossing a domain boundary?
|
# Are we crossing a domain boundary?
|
||||||
if self.active_cert_domains and host not in self.active_cert_domains:
|
# TODO : code should be adapted to netcache
|
||||||
if self.active_is_transient:
|
# if self.active_cert_domains and host not in self.active_cert_domains:
|
||||||
print("Permanently delete currently active transient certificate?")
|
# if self.active_is_transient:
|
||||||
resp = input("Y/N? ")
|
# print("Permanently delete currently active transient certificate?")
|
||||||
if resp.strip().lower() in ("y", "yes"):
|
# resp = input("Y/N? ")
|
||||||
print("Destroying certificate.")
|
# if resp.strip().lower() in ("y", "yes"):
|
||||||
self._deactivate_client_cert()
|
# print("Destroying certificate.")
|
||||||
else:
|
# self._deactivate_client_cert()
|
||||||
print("Staying here.")
|
# else:
|
||||||
raise UserAbortException()
|
# print("Staying here.")
|
||||||
else:
|
# raise UserAbortException()
|
||||||
print("PRIVACY ALERT: Deactivate client cert before connecting to a new domain?")
|
# else:
|
||||||
resp = input("Y/N? ")
|
# print("PRIVACY ALERT: Deactivate client cert before connecting to a new domain?")
|
||||||
if resp.strip().lower() in ("n", "no"):
|
# resp = input("Y/N? ")
|
||||||
print("Keeping certificate active for {}".format(host))
|
# if resp.strip().lower() in ("n", "no"):
|
||||||
else:
|
# print("Keeping certificate active for {}".format(host))
|
||||||
print("Deactivating certificate.")
|
# else:
|
||||||
self._deactivate_client_cert()
|
# print("Deactivating certificate.")
|
||||||
|
# self._deactivate_client_cert()
|
||||||
# Suggest reactivating previous certs
|
#
|
||||||
if not self.client_certs["active"] and host in self.client_certs:
|
# # Suggest reactivating previous certs
|
||||||
print("PRIVACY ALERT: Reactivate previously used client cert for {}?".format(host))
|
# if not self.client_certs["active"] and host in self.client_certs:
|
||||||
resp = input("Y/N? ")
|
# print("PRIVACY ALERT: Reactivate previously used client cert for {}?".format(host))
|
||||||
if resp.strip().lower() in ("y", "yes"):
|
# resp = input("Y/N? ")
|
||||||
self._activate_client_cert(*self.client_certs[host])
|
# if resp.strip().lower() in ("y", "yes"):
|
||||||
else:
|
# self._activate_client_cert(*self.client_certs[host])
|
||||||
print("Remaining unidentified.")
|
# else:
|
||||||
self.client_certs.pop(host)
|
# print("Remaining unidentified.")
|
||||||
|
# self.client_certs.pop(host)
|
||||||
|
|
||||||
# In AV-98, this was the _send_request method
|
# In AV-98, this was the _send_request method
|
||||||
#Send a selector to a given host and port.
|
#Send a selector to a given host and port.
|
||||||
|
@ -457,16 +465,6 @@ def _fetch_gemini(url):
|
||||||
# Prepare TLS context
|
# Prepare TLS context
|
||||||
protocol = ssl.PROTOCOL_TLS_CLIENT if sys.version_info.minor >=6 else ssl.PROTOCOL_TLSv1_2
|
protocol = ssl.PROTOCOL_TLS_CLIENT if sys.version_info.minor >=6 else ssl.PROTOCOL_TLSv1_2
|
||||||
context = ssl.SSLContext(protocol)
|
context = ssl.SSLContext(protocol)
|
||||||
|
|
||||||
# Use CAs or TOFU
|
|
||||||
#TODO : should we care about this options?
|
|
||||||
#if self.options["tls_mode"] == "ca":
|
|
||||||
# context.verify_mode = ssl.CERT_REQUIRED
|
|
||||||
# context.check_hostname = True
|
|
||||||
# context.load_default_certs()
|
|
||||||
#else:
|
|
||||||
# context.check_hostname = False
|
|
||||||
# context.verify_mode = ssl.CERT_NONE
|
|
||||||
context.check_hostname=False
|
context.check_hostname=False
|
||||||
context.verify_mode = ssl.CERT_NONE
|
context.verify_mode = ssl.CERT_NONE
|
||||||
# Impose minimum TLS version
|
# Impose minimum TLS version
|
||||||
|
@ -487,57 +485,50 @@ def _fetch_gemini(url):
|
||||||
# Rely on the server to only support sensible things, I guess...
|
# Rely on the server to only support sensible things, I guess...
|
||||||
pass
|
pass
|
||||||
|
|
||||||
#TODO: I’m here in the refactor
|
#TODO: certificate handling to refactor
|
||||||
# Load client certificate if needed
|
# # Load client certificate if needed
|
||||||
if self.client_certs["active"]:
|
# if self.client_certs["active"]:
|
||||||
certfile, keyfile = self.client_certs["active"]
|
# certfile, keyfile = self.client_certs["active"]
|
||||||
context.load_cert_chain(certfile, keyfile)
|
# context.load_cert_chain(certfile, keyfile)
|
||||||
|
|
||||||
# Connect to remote host by any address possible
|
# Connect to remote host by any address possible
|
||||||
err = None
|
err = None
|
||||||
for address in addresses:
|
for address in addresses:
|
||||||
self._debug("Connecting to: " + str(address[4]))
|
s = socket.socket(address[0], address[1])
|
||||||
s = socket.socket(address[0], address[1])
|
if "timeout" in options:
|
||||||
if self.sync_only:
|
timeout = options["timeout"]
|
||||||
timeout = self.options["short_timeout"]
|
|
||||||
else:
|
|
||||||
timeout = self.options["timeout"]
|
|
||||||
s.settimeout(timeout)
|
|
||||||
s = context.wrap_socket(s, server_hostname = host)
|
|
||||||
try:
|
|
||||||
s.connect(address[4])
|
|
||||||
break
|
|
||||||
except OSError as e:
|
|
||||||
err = e
|
|
||||||
else:
|
else:
|
||||||
# If we couldn't connect to *any* of the addresses, just
|
timeout = DEFAULT_TIMEOUT
|
||||||
# bubble up the exception from the last attempt and deny
|
s.settimeout(timeout)
|
||||||
# knowledge of earlier failures.
|
s = context.wrap_socket(s, server_hostname = host)
|
||||||
raise err
|
try:
|
||||||
if sys.version_info.minor >=5:
|
s.connect(address[4])
|
||||||
self._debug("Established {} connection.".format(s.version()))
|
break
|
||||||
self._debug("Cipher is: {}.".format(s.cipher()))
|
except OSError as e:
|
||||||
# Do TOFU
|
err = e
|
||||||
if self.options["tls_mode"] != "ca":
|
else:
|
||||||
cert = s.getpeercert(binary_form=True)
|
# If we couldn't connect to *any* of the addresses, just
|
||||||
self._validate_cert(address[4][0], host, cert)
|
# bubble up the exception from the last attempt and deny
|
||||||
# Remember that we showed the current cert to this domain...
|
# knowledge of earlier failures.
|
||||||
if self.client_certs["active"]:
|
raise err
|
||||||
self.active_cert_domains.append(host)
|
|
||||||
self.client_certs[host] = self.client_certs["active"]
|
# Do TOFU
|
||||||
# Send request and wrap response in a file descriptor
|
cert = s.getpeercert(binary_form=True)
|
||||||
url = urllib.parse.urlparse(gi.url)
|
# TODO: another cert handling to refactor
|
||||||
new_netloc = host
|
# Remember that we showed the current cert to this domain...
|
||||||
if port != 1965:
|
# self._validate_cert(address[4][0], host, cert)
|
||||||
new_netloc += ":" + str(port)
|
# if self.client_certs["active"]:
|
||||||
url = urllib.parse.urlunparse(url._replace(netloc=new_netloc))
|
# self.active_cert_domains.append(host)
|
||||||
self._debug("Sending %s<CRLF>" % url)
|
# self.client_certs[host] = self.client_certs["active"]
|
||||||
s.sendall((url + CRLF).encode("UTF-8"))
|
# Send request and wrap response in a file descriptor
|
||||||
mf= s.makefile(mode = "rb")
|
url = urllib.parse.urlparse(url)
|
||||||
return address, mf
|
new_netloc = host
|
||||||
##
|
if port != standard_ports["gemini"]:
|
||||||
## end of send_request
|
new_netloc += ":" + str(port)
|
||||||
TODO :address, f = self._send_request(gi)
|
url = urllib.parse.urlunparse(url._replace(netloc=new_netloc))
|
||||||
|
s.sendall((url + CRLF).encode("UTF-8"))
|
||||||
|
f= s.makefile(mode = "rb")
|
||||||
|
## end of send_request in AV98
|
||||||
# Spec dictates <META> should not exceed 1024 bytes,
|
# Spec dictates <META> should not exceed 1024 bytes,
|
||||||
# so maximum valid header length is 1027 bytes.
|
# so maximum valid header length is 1027 bytes.
|
||||||
header = f.readline(1027)
|
header = f.readline(1027)
|
||||||
|
@ -545,7 +536,6 @@ def _fetch_gemini(url):
|
||||||
if not header or header[-1] != '\n':
|
if not header or header[-1] != '\n':
|
||||||
raise RuntimeError("Received invalid header from server!")
|
raise RuntimeError("Received invalid header from server!")
|
||||||
header = header.strip()
|
header = header.strip()
|
||||||
self._debug("Response header: %s." % header)
|
|
||||||
# Validate header
|
# Validate header
|
||||||
status, meta = header.split(maxsplit=1)
|
status, meta = header.split(maxsplit=1)
|
||||||
if len(meta) > 1024 or len(status) != 2 or not status.isnumeric():
|
if len(meta) > 1024 or len(status) != 2 or not status.isnumeric():
|
||||||
|
@ -553,62 +543,63 @@ def _fetch_gemini(url):
|
||||||
raise RuntimeError("Received invalid header from server!")
|
raise RuntimeError("Received invalid header from server!")
|
||||||
# Update redirect loop/maze escaping state
|
# Update redirect loop/maze escaping state
|
||||||
if not status.startswith("3"):
|
if not status.startswith("3"):
|
||||||
self.previous_redirectors = set()
|
previous_redirectors = set()
|
||||||
|
#TODO FIXME
|
||||||
|
else:
|
||||||
|
#we set a previous_redirectors anyway because refactoring in progress
|
||||||
|
previous_redirectors = set()
|
||||||
# Handle non-SUCCESS headers, which don't have a response body
|
# Handle non-SUCCESS headers, which don't have a response body
|
||||||
# Inputs
|
# Inputs
|
||||||
if status.startswith("1"):
|
if status.startswith("1"):
|
||||||
if self.sync_only:
|
print(meta)
|
||||||
return None
|
if status == "11":
|
||||||
|
user_input = getpass.getpass("> ")
|
||||||
else:
|
else:
|
||||||
print(meta)
|
user_input = input("> ")
|
||||||
if status == "11":
|
return _fetch_gemini(query(user_input))
|
||||||
user_input = getpass.getpass("> ")
|
|
||||||
else:
|
|
||||||
user_input = input("> ")
|
|
||||||
return self._fetch_over_network(query(user_input))
|
|
||||||
# Redirects
|
# Redirects
|
||||||
elif status.startswith("3"):
|
elif status.startswith("3"):
|
||||||
new_gi = GeminiItem(gi.absolutise_url(meta))
|
newurl = urllib.parse.urljoin(url,meta)
|
||||||
if new_gi.url == gi.url:
|
if newurl == url:
|
||||||
raise RuntimeError("URL redirects to itself!")
|
raise RuntimeError("URL redirects to itself!")
|
||||||
elif new_gi.url in self.previous_redirectors:
|
elif newurl in previous_redirectors:
|
||||||
raise RuntimeError("Caught in redirect loop!")
|
raise RuntimeError("Caught in redirect loop!")
|
||||||
elif len(self.previous_redirectors) == _MAX_REDIRECTS:
|
elif len(previous_redirectors) == _MAX_REDIRECTS:
|
||||||
raise RuntimeError("Refusing to follow more than %d consecutive redirects!" % _MAX_REDIRECTS)
|
raise RuntimeError("Refusing to follow more than %d consecutive redirects!" % _MAX_REDIRECTS)
|
||||||
elif self.sync_only:
|
# TODO: redirections handling should be refactored
|
||||||
follow = self.automatic_choice
|
# elif "interactive" in options and not options["interactive"]:
|
||||||
# Never follow cross-domain redirects without asking
|
# follow = self.automatic_choice
|
||||||
elif new_gi.host.encode("idna") != gi.host.encode("idna"):
|
# # Never follow cross-domain redirects without asking
|
||||||
follow = input("Follow cross-domain redirect to %s? (y/n) " % new_gi.url)
|
# elif new_gi.host.encode("idna") != gi.host.encode("idna"):
|
||||||
# Never follow cross-protocol redirects without asking
|
# follow = input("Follow cross-domain redirect to %s? (y/n) " % new_gi.url)
|
||||||
elif new_gi.scheme != gi.scheme:
|
# # Never follow cross-protocol redirects without asking
|
||||||
follow = input("Follow cross-protocol redirect to %s? (y/n) " % new_gi.url)
|
# elif new_gi.scheme != gi.scheme:
|
||||||
# Don't follow *any* redirect without asking if auto-follow is off
|
# follow = input("Follow cross-protocol redirect to %s? (y/n) " % new_gi.url)
|
||||||
elif not self.options["auto_follow_redirects"]:
|
# # Don't follow *any* redirect without asking if auto-follow is off
|
||||||
follow = input("Follow redirect to %s? (y/n) " % new_gi.url)
|
# elif not self.options["auto_follow_redirects"]:
|
||||||
# Otherwise, follow away
|
# follow = input("Follow redirect to %s? (y/n) " % new_gi.url)
|
||||||
|
# # Otherwise, follow away
|
||||||
else:
|
else:
|
||||||
follow = "yes"
|
follow = "yes"
|
||||||
if follow.strip().lower() not in ("y", "yes"):
|
if follow.strip().lower() not in ("y", "yes"):
|
||||||
raise UserAbortException()
|
raise UserAbortException()
|
||||||
self._debug("Following redirect to %s." % new_gi.url)
|
previous_redirectors.add(url)
|
||||||
self._debug("This is consecutive redirect number %d." % len(self.previous_redirectors))
|
# if status == "31":
|
||||||
self.previous_redirectors.add(gi.url)
|
# # Permanent redirect
|
||||||
if status == "31":
|
# self.permanent_redirects[gi.url] = new_gi.url
|
||||||
# Permanent redirect
|
return _fetch_gemini(newurl)
|
||||||
self.permanent_redirects[gi.url] = new_gi.url
|
|
||||||
return self._fetch_over_network(new_gi)
|
|
||||||
# Errors
|
# Errors
|
||||||
elif status.startswith("4") or status.startswith("5"):
|
elif status.startswith("4") or status.startswith("5"):
|
||||||
raise RuntimeError(meta)
|
raise RuntimeError(meta)
|
||||||
# Client cert
|
# Client cert
|
||||||
elif status.startswith("6"):
|
# elif status.startswith("6"):
|
||||||
self._handle_cert_request(meta)
|
# self._handle_cert_request(meta)
|
||||||
return self._fetch_over_network(gi)
|
# return self._fetch_over_network(gi)
|
||||||
# Invalid status
|
# Invalid status
|
||||||
elif not status.startswith("2"):
|
elif not status.startswith("2"):
|
||||||
raise RuntimeError("Server returned undefined status code %s!" % status)
|
raise RuntimeError("Server returned undefined status code %s!" % status)
|
||||||
# If we're here, this must be a success and there's a response body
|
# If we're here, this must be a success and there's a response body
|
||||||
|
print("status : %s"%status)
|
||||||
assert status.startswith("2")
|
assert status.startswith("2")
|
||||||
mime = meta
|
mime = meta
|
||||||
# Read the response body over the network
|
# Read the response body over the network
|
||||||
|
@ -634,14 +625,14 @@ def _fetch_gemini(url):
|
||||||
encoding declared in header!" % encoding)
|
encoding declared in header!" % encoding)
|
||||||
else:
|
else:
|
||||||
body = fbody
|
body = fbody
|
||||||
gi.write_body(body,mime)
|
cache = write_body(url,body,mime)
|
||||||
return gi
|
return cache
|
||||||
|
|
||||||
|
|
||||||
def fetch(url):
|
def fetch(url):
|
||||||
url = normalize_url(url)
|
url = normalize_url(url)
|
||||||
path=None
|
path=None
|
||||||
if "://" in url
|
if "://" in url:
|
||||||
scheme = url.split("://")[0]
|
scheme = url.split("://")[0]
|
||||||
if scheme not in standard_ports:
|
if scheme not in standard_ports:
|
||||||
print("%s is not a supported protocol"%scheme)
|
print("%s is not a supported protocol"%scheme)
|
||||||
|
@ -651,6 +642,8 @@ def fetch(url):
|
||||||
path=_fetch_gopher(url)
|
path=_fetch_gopher(url)
|
||||||
elif scheme == "finger":
|
elif scheme == "finger":
|
||||||
path=_fetch_finger(url)
|
path=_fetch_finger(url)
|
||||||
|
elif scheme == "gemini":
|
||||||
|
patch=_fetch_gemini(url)
|
||||||
else:
|
else:
|
||||||
print("scheme %s not implemented yet")
|
print("scheme %s not implemented yet")
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -21,10 +21,8 @@ __version__ = "1.9.2"
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import cmd
|
import cmd
|
||||||
import codecs
|
|
||||||
import datetime
|
import datetime
|
||||||
import fnmatch
|
import fnmatch
|
||||||
import getpass
|
|
||||||
import glob
|
import glob
|
||||||
import hashlib
|
import hashlib
|
||||||
import io
|
import io
|
||||||
|
@ -89,7 +87,6 @@ if os.path.exists(_old_config):
|
||||||
#if no XDG .local/share and not XDG .config, we use the old config
|
#if no XDG .local/share and not XDG .config, we use the old config
|
||||||
if not os.path.exists(data_home) and os.path.exists(_old_config):
|
if not os.path.exists(data_home) and os.path.exists(_old_config):
|
||||||
_DATA_DIR = _CONFIG_DIR
|
_DATA_DIR = _CONFIG_DIR
|
||||||
_MAX_REDIRECTS = 5
|
|
||||||
_MAX_CACHE_SIZE = 10
|
_MAX_CACHE_SIZE = 10
|
||||||
_MAX_CACHE_AGE_SECS = 180
|
_MAX_CACHE_AGE_SECS = 180
|
||||||
|
|
||||||
|
@ -548,7 +545,6 @@ class GeminiItem():
|
||||||
def to_map_line(self):
|
def to_map_line(self):
|
||||||
return "=> {} {}\n".format(self.url_mode(), self.get_page_title())
|
return "=> {} {}\n".format(self.url_mode(), self.get_page_title())
|
||||||
|
|
||||||
CRLF = '\r\n'
|
|
||||||
|
|
||||||
# Cheap and cheerful URL detector
|
# Cheap and cheerful URL detector
|
||||||
def looks_like_url(word):
|
def looks_like_url(word):
|
||||||
|
@ -611,7 +607,6 @@ class GeminiClient(cmd.Cmd):
|
||||||
self.marks = {}
|
self.marks = {}
|
||||||
self.page_index = 0
|
self.page_index = 0
|
||||||
self.permanent_redirects = {}
|
self.permanent_redirects = {}
|
||||||
self.previous_redirectors = set()
|
|
||||||
# Sync-only mode is restriced by design
|
# Sync-only mode is restriced by design
|
||||||
self.visited_hosts = set()
|
self.visited_hosts = set()
|
||||||
self.offline_only = False
|
self.offline_only = False
|
||||||
|
|
Loading…
Reference in New Issue