Refactor of networking logic.
1. Move client certificate handling stuff inside of _send_request(). 2. Change _fetch_over_network() to not be recursive, by just looping through calls to _send_request(). This facilitates moving the redirect-tracking state inside _fetch_over_network(), instead of keeping it in GeminiClient. 3. Also allow _fetch_over_network() to save response to a provided filename, and use this to implement do_save(), rather than _go_to_gi(). This avoids the need for awkward gymnastics with the internal state.
This commit is contained in:
parent
01da844141
commit
713616d556
301
av98.py
301
av98.py
|
@ -248,7 +248,6 @@ class GeminiClient(cmd.Cmd):
|
||||||
self.marks = {}
|
self.marks = {}
|
||||||
self.page_index = 0
|
self.page_index = 0
|
||||||
self.permanent_redirects = {}
|
self.permanent_redirects = {}
|
||||||
self.previous_redirectors = set()
|
|
||||||
self.restricted = restricted
|
self.restricted = restricted
|
||||||
self.tmp_filename = ""
|
self.tmp_filename = ""
|
||||||
self.visited_hosts = set()
|
self.visited_hosts = set()
|
||||||
|
@ -388,7 +387,125 @@ Slow internet connection? Use 'set timeout' to be more patient.""")
|
||||||
body = None
|
body = None
|
||||||
return mime, body, gi.path
|
return mime, body, gi.path
|
||||||
|
|
||||||
def _fetch_over_network(self, gi):
|
def _fetch_over_network(self, gi, destination=None):
|
||||||
|
|
||||||
|
previous_redirectors = set()
|
||||||
|
while True:
|
||||||
|
# Send request to server
|
||||||
|
try:
|
||||||
|
status, meta, address, f = self._send_request(gi)
|
||||||
|
except Exception as err:
|
||||||
|
if isinstance(err, socket.gaierror):
|
||||||
|
self.log["dns_failures"] += 1
|
||||||
|
elif isinstance(err, ConnectionRefusedError):
|
||||||
|
self.log["refused_connections"] += 1
|
||||||
|
elif isinstance(err, ConnectionResetError):
|
||||||
|
self.log["reset_connections"] += 1
|
||||||
|
elif isinstance(err, (TimeoutError, socket.timeout)):
|
||||||
|
self.log["timeouts"] += 1
|
||||||
|
raise err
|
||||||
|
|
||||||
|
# Update redirect loop/maze escaping state
|
||||||
|
if not status.startswith("3"):
|
||||||
|
previous_redirectors = set()
|
||||||
|
|
||||||
|
# Handle non-SUCCESS headers, which don't have a response body
|
||||||
|
# Inputs
|
||||||
|
if status.startswith("1"):
|
||||||
|
if status == "11":
|
||||||
|
user_input = getpass.getpass("> ")
|
||||||
|
else:
|
||||||
|
user_input = input("> ")
|
||||||
|
gi = gi.query(user_input)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Redirects
|
||||||
|
elif status.startswith("3"):
|
||||||
|
new_gi = GeminiItem(gi.absolutise_url(meta))
|
||||||
|
if new_gi.url == gi.url:
|
||||||
|
raise RuntimeError("URL redirects to itself!")
|
||||||
|
elif new_gi.url in previous_redirectors:
|
||||||
|
raise RuntimeError("Caught in redirect loop!")
|
||||||
|
elif len(previous_redirectors) == _MAX_REDIRECTS:
|
||||||
|
raise RuntimeError("Refusing to follow more than %d consecutive redirects!" % _MAX_REDIRECTS)
|
||||||
|
# Never follow cross-domain redirects without asking
|
||||||
|
elif new_gi.host != gi.host:
|
||||||
|
follow = input("Follow cross-domain redirect to %s? (y/n) " % new_gi.url)
|
||||||
|
# Never follow cross-protocol redirects without asking
|
||||||
|
elif new_gi.scheme != gi.scheme:
|
||||||
|
follow = input("Follow cross-protocol redirect to %s? (y/n) " % new_gi.url)
|
||||||
|
# Don't follow *any* redirect without asking if auto-follow is off
|
||||||
|
elif not self.options["auto_follow_redirects"]:
|
||||||
|
follow = input("Follow redirect to %s? (y/n) " % new_gi.url)
|
||||||
|
# Otherwise, follow away
|
||||||
|
else:
|
||||||
|
follow = "yes"
|
||||||
|
if follow.strip().lower() not in ("y", "yes"):
|
||||||
|
raise UserAbortException()
|
||||||
|
ui_out.debug("Following redirect to %s." % new_gi.url)
|
||||||
|
ui_out.debug("This is consecutive redirect number %d." % len(previous_redirectors))
|
||||||
|
previous_redirectors.add(gi.url)
|
||||||
|
if status == "31":
|
||||||
|
# Permanent redirect
|
||||||
|
self.permanent_redirects[gi.url] = new_gi.url
|
||||||
|
gi = new_gi
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Errors
|
||||||
|
elif status.startswith("4") or status.startswith("5"):
|
||||||
|
raise RuntimeError(meta)
|
||||||
|
|
||||||
|
# Client cert
|
||||||
|
elif status.startswith("6"):
|
||||||
|
self._handle_cert_request(meta, status, gi.host)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Invalid status
|
||||||
|
elif not status.startswith("2"):
|
||||||
|
raise RuntimeError("Server returned undefined status code %s!" % status)
|
||||||
|
|
||||||
|
# If we're here, this must be a success and there's a response body,
|
||||||
|
# so break out of the request loop
|
||||||
|
assert status.startswith("2")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Fill in default MIME type or validate a provided one
|
||||||
|
mime = meta
|
||||||
|
if mime == "":
|
||||||
|
mime = "text/gemini; charset=utf-8"
|
||||||
|
mime, mime_options = cgi.parse_header(mime)
|
||||||
|
if "charset" in mime_options:
|
||||||
|
try:
|
||||||
|
codecs.lookup(mime_options["charset"])
|
||||||
|
except LookupError:
|
||||||
|
raise RuntimeError("Header declared unknown encoding %s" % value)
|
||||||
|
|
||||||
|
# Save response body to disk
|
||||||
|
body, size, filename = self._write_response_to_file(mime, mime_options, f, destination)
|
||||||
|
ui_out.debug("Wrote %d byte response to %s." % (size, filename))
|
||||||
|
|
||||||
|
# Maintain cache and log
|
||||||
|
if self.options["cache"]:
|
||||||
|
self.cache.add(gi.url, mime, filename)
|
||||||
|
self._log_visit(gi, address, size)
|
||||||
|
|
||||||
|
return gi, mime, body, filename
|
||||||
|
|
||||||
|
def _send_request(self, gi):
|
||||||
|
"""Send a selector to a given host and port.
|
||||||
|
Returns the resolved address and binary file with the reply."""
|
||||||
|
|
||||||
|
# Figure out which host to connect to
|
||||||
|
if gi.scheme == "gemini":
|
||||||
|
# For Gemini requests, connect to the host and port specified in the URL
|
||||||
|
host, port = gi.host, gi.port
|
||||||
|
elif gi.scheme == "gopher":
|
||||||
|
# For Gopher requests, use the configured proxy
|
||||||
|
host, port = self.options["gopher_proxy"].rsplit(":", 1)
|
||||||
|
ui_out.debug("Using gopher proxy: " + self.options["gopher_proxy"])
|
||||||
|
elif gi.scheme in ("http", "https"):
|
||||||
|
host, port = self.options["http_proxy"].rsplit(":",1)
|
||||||
|
ui_out.debug("Using http proxy: " + self.options["http_proxy"])
|
||||||
|
|
||||||
# Be careful with client certificates!
|
# Be careful with client certificates!
|
||||||
# Are we crossing a domain boundary?
|
# Are we crossing a domain boundary?
|
||||||
|
@ -421,133 +538,6 @@ Slow internet connection? Use 'set timeout' to be more patient.""")
|
||||||
print("Remaining unidentified.")
|
print("Remaining unidentified.")
|
||||||
self.client_certs.pop(gi.host)
|
self.client_certs.pop(gi.host)
|
||||||
|
|
||||||
# Send request to server
|
|
||||||
try:
|
|
||||||
status, meta, address, f = self._send_request(gi)
|
|
||||||
except Exception as err:
|
|
||||||
if isinstance(err, socket.gaierror):
|
|
||||||
self.log["dns_failures"] += 1
|
|
||||||
elif isinstance(err, ConnectionRefusedError):
|
|
||||||
self.log["refused_connections"] += 1
|
|
||||||
elif isinstance(err, ConnectionResetError):
|
|
||||||
self.log["reset_connections"] += 1
|
|
||||||
elif isinstance(err, (TimeoutError, socket.timeout)):
|
|
||||||
self.log["timeouts"] += 1
|
|
||||||
raise err
|
|
||||||
|
|
||||||
# Update redirect loop/maze escaping state
|
|
||||||
if not status.startswith("3"):
|
|
||||||
self.previous_redirectors = set()
|
|
||||||
|
|
||||||
# Handle non-SUCCESS headers, which don't have a response body
|
|
||||||
# Inputs
|
|
||||||
if status.startswith("1"):
|
|
||||||
print(meta)
|
|
||||||
if status == "11":
|
|
||||||
user_input = getpass.getpass("> ")
|
|
||||||
else:
|
|
||||||
user_input = input("> ")
|
|
||||||
return self._fetch_over_network(gi.query(user_input))
|
|
||||||
|
|
||||||
# Redirects
|
|
||||||
elif status.startswith("3"):
|
|
||||||
new_gi = GeminiItem(gi.absolutise_url(meta))
|
|
||||||
if new_gi.url == gi.url:
|
|
||||||
raise RuntimeError("URL redirects to itself!")
|
|
||||||
elif new_gi.url in self.previous_redirectors:
|
|
||||||
raise RuntimeError("Caught in redirect loop!")
|
|
||||||
elif len(self.previous_redirectors) == _MAX_REDIRECTS:
|
|
||||||
raise RuntimeError("Refusing to follow more than %d consecutive redirects!" % _MAX_REDIRECTS)
|
|
||||||
# Never follow cross-domain redirects without asking
|
|
||||||
elif new_gi.host != gi.host:
|
|
||||||
follow = input("Follow cross-domain redirect to %s? (y/n) " % new_gi.url)
|
|
||||||
# Never follow cross-protocol redirects without asking
|
|
||||||
elif new_gi.scheme != gi.scheme:
|
|
||||||
follow = input("Follow cross-protocol redirect to %s? (y/n) " % new_gi.url)
|
|
||||||
# Don't follow *any* redirect without asking if auto-follow is off
|
|
||||||
elif not self.options["auto_follow_redirects"]:
|
|
||||||
follow = input("Follow redirect to %s? (y/n) " % new_gi.url)
|
|
||||||
# Otherwise, follow away
|
|
||||||
else:
|
|
||||||
follow = "yes"
|
|
||||||
if follow.strip().lower() not in ("y", "yes"):
|
|
||||||
raise UserAbortException()
|
|
||||||
ui_out.debug("Following redirect to %s." % new_gi.url)
|
|
||||||
ui_out.debug("This is consecutive redirect number %d." % len(self.previous_redirectors))
|
|
||||||
self.previous_redirectors.add(gi.url)
|
|
||||||
if status == "31":
|
|
||||||
# Permanent redirect
|
|
||||||
self.permanent_redirects[gi.url] = new_gi.url
|
|
||||||
return self._fetch_over_network(new_gi)
|
|
||||||
|
|
||||||
# Errors
|
|
||||||
elif status.startswith("4") or status.startswith("5"):
|
|
||||||
raise RuntimeError(meta)
|
|
||||||
|
|
||||||
# Client cert
|
|
||||||
elif status.startswith("6"):
|
|
||||||
self._handle_cert_request(meta, status, gi.host)
|
|
||||||
return self._fetch_over_network(gi)
|
|
||||||
|
|
||||||
# Invalid status
|
|
||||||
elif not status.startswith("2"):
|
|
||||||
raise RuntimeError("Server returned undefined status code %s!" % status)
|
|
||||||
|
|
||||||
# If we're here, this must be a success and there's a response body
|
|
||||||
assert status.startswith("2")
|
|
||||||
|
|
||||||
mime = meta
|
|
||||||
if mime == "":
|
|
||||||
mime = "text/gemini; charset=utf-8"
|
|
||||||
mime, mime_options = cgi.parse_header(mime)
|
|
||||||
if "charset" in mime_options:
|
|
||||||
try:
|
|
||||||
codecs.lookup(mime_options["charset"])
|
|
||||||
except LookupError:
|
|
||||||
raise RuntimeError("Header declared unknown encoding %s" % value)
|
|
||||||
|
|
||||||
# Read the response body over the network
|
|
||||||
body = f.read()
|
|
||||||
|
|
||||||
# Save the result in a temporary file
|
|
||||||
## Set file mode
|
|
||||||
if mime.startswith("text/"):
|
|
||||||
mode = "w"
|
|
||||||
encoding = mime_options.get("charset", "UTF-8")
|
|
||||||
try:
|
|
||||||
body = body.decode(encoding)
|
|
||||||
except UnicodeError:
|
|
||||||
raise RuntimeError("Could not decode response body using %s encoding declared in header!" % encoding)
|
|
||||||
else:
|
|
||||||
mode = "wb"
|
|
||||||
encoding = None
|
|
||||||
## Write
|
|
||||||
tmpf = tempfile.NamedTemporaryFile(mode, encoding=encoding, delete=False)
|
|
||||||
size = tmpf.write(body)
|
|
||||||
tmpf.close()
|
|
||||||
self.tmp_filename = tmpf.name
|
|
||||||
ui_out.debug("Wrote %d byte response to %s." % (size, self.tmp_filename))
|
|
||||||
|
|
||||||
# Maintain cache and log
|
|
||||||
if self.options["cache"]:
|
|
||||||
self.cache.add(gi.url, mime, self.tmp_filename)
|
|
||||||
self._log_visit(gi, address, size)
|
|
||||||
|
|
||||||
return gi, mime, body, self.tmp_filename
|
|
||||||
|
|
||||||
def _send_request(self, gi):
|
|
||||||
"""Send a selector to a given host and port.
|
|
||||||
Returns the resolved address and binary file with the reply."""
|
|
||||||
if gi.scheme == "gemini":
|
|
||||||
# For Gemini requests, connect to the host and port specified in the URL
|
|
||||||
host, port = gi.host, gi.port
|
|
||||||
elif gi.scheme == "gopher":
|
|
||||||
# For Gopher requests, use the configured proxy
|
|
||||||
host, port = self.options["gopher_proxy"].rsplit(":", 1)
|
|
||||||
ui_out.debug("Using gopher proxy: " + self.options["gopher_proxy"])
|
|
||||||
elif gi.scheme in ("http", "https"):
|
|
||||||
host, port = self.options["http_proxy"].rsplit(":",1)
|
|
||||||
ui_out.debug("Using http proxy: " + self.options["http_proxy"])
|
|
||||||
# Do DNS resolution
|
# Do DNS resolution
|
||||||
addresses = self._get_addresses(host, port)
|
addresses = self._get_addresses(host, port)
|
||||||
|
|
||||||
|
@ -646,6 +636,37 @@ Slow internet connection? Use 'set timeout' to be more patient.""")
|
||||||
|
|
||||||
return status, meta, address, f
|
return status, meta, address, f
|
||||||
|
|
||||||
|
def _write_response_to_file(self, mime, mime_options, f, destination):
|
||||||
|
# Read the response body over the network
|
||||||
|
body = f.read()
|
||||||
|
|
||||||
|
# Save the result to a temporary file
|
||||||
|
|
||||||
|
## Determine file mode
|
||||||
|
if mime.startswith("text/"):
|
||||||
|
mode = "w"
|
||||||
|
encoding = mime_options.get("charset", "UTF-8")
|
||||||
|
try:
|
||||||
|
body = body.decode(encoding)
|
||||||
|
except UnicodeError:
|
||||||
|
raise RuntimeError("Could not decode response body using %s encoding declared in header!" % encoding)
|
||||||
|
else:
|
||||||
|
mode = "wb"
|
||||||
|
encoding = None
|
||||||
|
|
||||||
|
## Use a temporary file if a filename was not provided
|
||||||
|
if destination:
|
||||||
|
fp = open(destination, mode, encoding=encoding)
|
||||||
|
else:
|
||||||
|
fp = tempfile.NamedTemporaryFile(mode, encoding=encoding, delete=False)
|
||||||
|
self.tmp_filename = fp.name
|
||||||
|
|
||||||
|
## Write
|
||||||
|
size = fp.write(body)
|
||||||
|
fp.close()
|
||||||
|
|
||||||
|
return body, size, destination or self.tmp_filename
|
||||||
|
|
||||||
def _get_addresses(self, host, port):
|
def _get_addresses(self, host, port):
|
||||||
# DNS lookup - will get IPv4 and IPv6 records if IPv6 is enabled
|
# DNS lookup - will get IPv4 and IPv6 records if IPv6 is enabled
|
||||||
if ":" in host:
|
if ":" in host:
|
||||||
|
@ -1296,37 +1317,39 @@ Use 'ls -l' to see URLs."""
|
||||||
print("You must provide an index, a filename, or both.")
|
print("You must provide an index, a filename, or both.")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Next, fetch the item to save, if it's not the current one.
|
# Determine GI to save
|
||||||
if index:
|
if index:
|
||||||
last_gi = self.gi
|
|
||||||
try:
|
try:
|
||||||
gi = self.lookup[index-1]
|
gi = self.lookup[index-1]
|
||||||
self._go_to_gi(gi, update_hist = False, handle = False)
|
saving_current = False
|
||||||
except IndexError:
|
except IndexError:
|
||||||
print ("Index too high!")
|
print ("Index too high!")
|
||||||
self.gi = last_gi
|
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
gi = self.gi
|
gi = self.gi
|
||||||
|
saving_current = True
|
||||||
|
|
||||||
# Derive filename from current GI's path, if one hasn't been set
|
# Derive filename from current GI's path, if one hasn't been set
|
||||||
if not filename:
|
if not filename:
|
||||||
filename = os.path.basename(gi.path)
|
filename = os.path.basename(gi.path)
|
||||||
|
|
||||||
# Check for filename collisions and actually do the save if safe
|
# Check for filename collisions
|
||||||
if os.path.exists(filename):
|
if os.path.exists(filename):
|
||||||
print("File %s already exists!" % filename)
|
print("File %s already exists!" % filename)
|
||||||
else:
|
return
|
||||||
|
|
||||||
|
# Actually do the save operation
|
||||||
|
if saving_current:
|
||||||
# Don't use _get_active_tmpfile() here, because we want to save the
|
# Don't use _get_active_tmpfile() here, because we want to save the
|
||||||
# "source code" of menus, not the rendered view - this way AV-98
|
# "source code" of menus, not the rendered view - this way AV-98
|
||||||
# can navigate to it later.
|
# can navigate to it later.
|
||||||
src = gi.path if gi.scheme == "file" else self.tmp_filename
|
src = gi.path if gi.scheme == "file" else self.tmp_filename
|
||||||
shutil.copyfile(src, filename)
|
shutil.copyfile(src, filename)
|
||||||
print("Saved to %s" % filename)
|
else:
|
||||||
|
## Download an item that's not the current one
|
||||||
|
self._fetch_over_network(gi, filename)
|
||||||
|
|
||||||
# Restore gi if necessary
|
print("Saved to %s" % filename)
|
||||||
if index != None:
|
|
||||||
self._go_to_gi(last_gi, handle=False)
|
|
||||||
|
|
||||||
@needs_gi
|
@needs_gi
|
||||||
def do_url(self, *args):
|
def do_url(self, *args):
|
||||||
|
|
Loading…
Reference in New Issue