netcache now works for gemini

This commit is contained in:
Lionel Dricot 2023-07-18 00:39:06 +02:00
parent a7c8ed33d5
commit b745b04f7a
2 changed files with 121 additions and 133 deletions

View File

@ -1,8 +1,11 @@
#!/bin/python #!/bin/python
import os import os
import sys
import urllib.parse import urllib.parse
import argparse import argparse
import requests import requests
import codecs
import getpass
import socket import socket
import ssl import ssl
from ssl import CertificateError from ssl import CertificateError
@ -34,6 +37,10 @@ standard_ports = {
} }
default_protocol = "gemini" default_protocol = "gemini"
CRLF = '\r\n'
DEFAULT_TIMEOUT = 10
_MAX_REDIRECTS = 5
def parse_mime(mime): def parse_mime(mime):
options = {} options = {}
if mime: if mime:
@ -343,7 +350,7 @@ def _fetch_gopher(url,timeout=10):
else: else:
# by default, we should consider Gopher # by default, we should consider Gopher
mime = "text/gopher" mime = "text/gopher"
cache = write_body(response,mime) cache = write_body(url,response,mime)
return cache return cache
def _fetch_finger(url,timeout=10): def _fetch_finger(url,timeout=10):
@ -384,7 +391,7 @@ def _fetch_spartan(url):
body = fp.read() body = fp.read()
if meta.startswith("text"): if meta.startswith("text"):
body = body.decode("UTF-8") body = body.decode("UTF-8")
cache = write_body(body,meta) cache = write_body(url,body,meta)
elif code == 3: elif code == 3:
redirect_url = url_parts._replace(path=meta).geturl() redirect_url = url_parts._replace(path=meta).geturl()
else: else:
@ -395,7 +402,7 @@ def _fetch_spartan(url):
cache = _fetch_spartan(redirect_url) cache = _fetch_spartan(redirect_url)
return cache return cache
def _fetch_gemini(url): def _fetch_gemini(url,options={}):
cache = None cache = None
url_parts = urllib.parse.urlparse(url) url_parts = urllib.parse.urlparse(url)
host = url_parts.hostname host = url_parts.hostname
@ -404,34 +411,35 @@ def _fetch_gemini(url):
query = url_parts.query query = url_parts.query
# Be careful with client certificates! # Be careful with client certificates!
# Are we crossing a domain boundary? # Are we crossing a domain boundary?
if self.active_cert_domains and host not in self.active_cert_domains: # TODO :code should be adapted to netcache
if self.active_is_transient: # if self.active_cert_domains and host not in self.active_cert_domains:
print("Permanently delete currently active transient certificate?") # if self.active_is_transient:
resp = input("Y/N? ") # print("Permanently delete currently active transient certificate?")
if resp.strip().lower() in ("y", "yes"): # resp = input("Y/N? ")
print("Destroying certificate.") # if resp.strip().lower() in ("y", "yes"):
self._deactivate_client_cert() # print("Destroying certificate.")
else: # self._deactivate_client_cert()
print("Staying here.") # else:
raise UserAbortException() # print("Staying here.")
else: # raise UserAbortException()
print("PRIVACY ALERT: Deactivate client cert before connecting to a new domain?") # else:
resp = input("Y/N? ") # print("PRIVACY ALERT: Deactivate client cert before connecting to a new domain?")
if resp.strip().lower() in ("n", "no"): # resp = input("Y/N? ")
print("Keeping certificate active for {}".format(host)) # if resp.strip().lower() in ("n", "no"):
else: # print("Keeping certificate active for {}".format(host))
print("Deactivating certificate.") # else:
self._deactivate_client_cert() # print("Deactivating certificate.")
# self._deactivate_client_cert()
# Suggest reactivating previous certs #
if not self.client_certs["active"] and host in self.client_certs: # # Suggest reactivating previous certs
print("PRIVACY ALERT: Reactivate previously used client cert for {}?".format(host)) # if not self.client_certs["active"] and host in self.client_certs:
resp = input("Y/N? ") # print("PRIVACY ALERT: Reactivate previously used client cert for {}?".format(host))
if resp.strip().lower() in ("y", "yes"): # resp = input("Y/N? ")
self._activate_client_cert(*self.client_certs[host]) # if resp.strip().lower() in ("y", "yes"):
else: # self._activate_client_cert(*self.client_certs[host])
print("Remaining unidentified.") # else:
self.client_certs.pop(host) # print("Remaining unidentified.")
# self.client_certs.pop(host)
# In AV-98, this was the _send_request method # In AV-98, this was the _send_request method
#Send a selector to a given host and port. #Send a selector to a given host and port.
@ -457,16 +465,6 @@ def _fetch_gemini(url):
# Prepare TLS context # Prepare TLS context
protocol = ssl.PROTOCOL_TLS_CLIENT if sys.version_info.minor >=6 else ssl.PROTOCOL_TLSv1_2 protocol = ssl.PROTOCOL_TLS_CLIENT if sys.version_info.minor >=6 else ssl.PROTOCOL_TLSv1_2
context = ssl.SSLContext(protocol) context = ssl.SSLContext(protocol)
# Use CAs or TOFU
#TODO : should we care about this options?
#if self.options["tls_mode"] == "ca":
# context.verify_mode = ssl.CERT_REQUIRED
# context.check_hostname = True
# context.load_default_certs()
#else:
# context.check_hostname = False
# context.verify_mode = ssl.CERT_NONE
context.check_hostname=False context.check_hostname=False
context.verify_mode = ssl.CERT_NONE context.verify_mode = ssl.CERT_NONE
# Impose minimum TLS version # Impose minimum TLS version
@ -487,57 +485,50 @@ def _fetch_gemini(url):
# Rely on the server to only support sensible things, I guess... # Rely on the server to only support sensible things, I guess...
pass pass
#TODO: Im here in the refactor #TODO: certificate handling to refactor
# Load client certificate if needed # # Load client certificate if needed
if self.client_certs["active"]: # if self.client_certs["active"]:
certfile, keyfile = self.client_certs["active"] # certfile, keyfile = self.client_certs["active"]
context.load_cert_chain(certfile, keyfile) # context.load_cert_chain(certfile, keyfile)
# Connect to remote host by any address possible # Connect to remote host by any address possible
err = None err = None
for address in addresses: for address in addresses:
self._debug("Connecting to: " + str(address[4])) s = socket.socket(address[0], address[1])
s = socket.socket(address[0], address[1]) if "timeout" in options:
if self.sync_only: timeout = options["timeout"]
timeout = self.options["short_timeout"]
else:
timeout = self.options["timeout"]
s.settimeout(timeout)
s = context.wrap_socket(s, server_hostname = host)
try:
s.connect(address[4])
break
except OSError as e:
err = e
else: else:
# If we couldn't connect to *any* of the addresses, just timeout = DEFAULT_TIMEOUT
# bubble up the exception from the last attempt and deny s.settimeout(timeout)
# knowledge of earlier failures. s = context.wrap_socket(s, server_hostname = host)
raise err try:
if sys.version_info.minor >=5: s.connect(address[4])
self._debug("Established {} connection.".format(s.version())) break
self._debug("Cipher is: {}.".format(s.cipher())) except OSError as e:
# Do TOFU err = e
if self.options["tls_mode"] != "ca": else:
cert = s.getpeercert(binary_form=True) # If we couldn't connect to *any* of the addresses, just
self._validate_cert(address[4][0], host, cert) # bubble up the exception from the last attempt and deny
# Remember that we showed the current cert to this domain... # knowledge of earlier failures.
if self.client_certs["active"]: raise err
self.active_cert_domains.append(host)
self.client_certs[host] = self.client_certs["active"] # Do TOFU
# Send request and wrap response in a file descriptor cert = s.getpeercert(binary_form=True)
url = urllib.parse.urlparse(gi.url) # TODO: another cert handling to refactor
new_netloc = host # Remember that we showed the current cert to this domain...
if port != 1965: # self._validate_cert(address[4][0], host, cert)
new_netloc += ":" + str(port) # if self.client_certs["active"]:
url = urllib.parse.urlunparse(url._replace(netloc=new_netloc)) # self.active_cert_domains.append(host)
self._debug("Sending %s<CRLF>" % url) # self.client_certs[host] = self.client_certs["active"]
s.sendall((url + CRLF).encode("UTF-8")) # Send request and wrap response in a file descriptor
mf= s.makefile(mode = "rb") url = urllib.parse.urlparse(url)
return address, mf new_netloc = host
## if port != standard_ports["gemini"]:
## end of send_request new_netloc += ":" + str(port)
TODO :address, f = self._send_request(gi) url = urllib.parse.urlunparse(url._replace(netloc=new_netloc))
s.sendall((url + CRLF).encode("UTF-8"))
f= s.makefile(mode = "rb")
## end of send_request in AV98
# Spec dictates <META> should not exceed 1024 bytes, # Spec dictates <META> should not exceed 1024 bytes,
# so maximum valid header length is 1027 bytes. # so maximum valid header length is 1027 bytes.
header = f.readline(1027) header = f.readline(1027)
@ -545,7 +536,6 @@ def _fetch_gemini(url):
if not header or header[-1] != '\n': if not header or header[-1] != '\n':
raise RuntimeError("Received invalid header from server!") raise RuntimeError("Received invalid header from server!")
header = header.strip() header = header.strip()
self._debug("Response header: %s." % header)
# Validate header # Validate header
status, meta = header.split(maxsplit=1) status, meta = header.split(maxsplit=1)
if len(meta) > 1024 or len(status) != 2 or not status.isnumeric(): if len(meta) > 1024 or len(status) != 2 or not status.isnumeric():
@ -553,62 +543,63 @@ def _fetch_gemini(url):
raise RuntimeError("Received invalid header from server!") raise RuntimeError("Received invalid header from server!")
# Update redirect loop/maze escaping state # Update redirect loop/maze escaping state
if not status.startswith("3"): if not status.startswith("3"):
self.previous_redirectors = set() previous_redirectors = set()
#TODO FIXME
else:
#we set a previous_redirectors anyway because refactoring in progress
previous_redirectors = set()
# Handle non-SUCCESS headers, which don't have a response body # Handle non-SUCCESS headers, which don't have a response body
# Inputs # Inputs
if status.startswith("1"): if status.startswith("1"):
if self.sync_only: print(meta)
return None if status == "11":
user_input = getpass.getpass("> ")
else: else:
print(meta) user_input = input("> ")
if status == "11": return _fetch_gemini(query(user_input))
user_input = getpass.getpass("> ")
else:
user_input = input("> ")
return self._fetch_over_network(query(user_input))
# Redirects # Redirects
elif status.startswith("3"): elif status.startswith("3"):
new_gi = GeminiItem(gi.absolutise_url(meta)) newurl = urllib.parse.urljoin(url,meta)
if new_gi.url == gi.url: if newurl == url:
raise RuntimeError("URL redirects to itself!") raise RuntimeError("URL redirects to itself!")
elif new_gi.url in self.previous_redirectors: elif newurl in previous_redirectors:
raise RuntimeError("Caught in redirect loop!") raise RuntimeError("Caught in redirect loop!")
elif len(self.previous_redirectors) == _MAX_REDIRECTS: elif len(previous_redirectors) == _MAX_REDIRECTS:
raise RuntimeError("Refusing to follow more than %d consecutive redirects!" % _MAX_REDIRECTS) raise RuntimeError("Refusing to follow more than %d consecutive redirects!" % _MAX_REDIRECTS)
elif self.sync_only: # TODO: redirections handling should be refactored
follow = self.automatic_choice # elif "interactive" in options and not options["interactive"]:
# Never follow cross-domain redirects without asking # follow = self.automatic_choice
elif new_gi.host.encode("idna") != gi.host.encode("idna"): # # Never follow cross-domain redirects without asking
follow = input("Follow cross-domain redirect to %s? (y/n) " % new_gi.url) # elif new_gi.host.encode("idna") != gi.host.encode("idna"):
# Never follow cross-protocol redirects without asking # follow = input("Follow cross-domain redirect to %s? (y/n) " % new_gi.url)
elif new_gi.scheme != gi.scheme: # # Never follow cross-protocol redirects without asking
follow = input("Follow cross-protocol redirect to %s? (y/n) " % new_gi.url) # elif new_gi.scheme != gi.scheme:
# Don't follow *any* redirect without asking if auto-follow is off # follow = input("Follow cross-protocol redirect to %s? (y/n) " % new_gi.url)
elif not self.options["auto_follow_redirects"]: # # Don't follow *any* redirect without asking if auto-follow is off
follow = input("Follow redirect to %s? (y/n) " % new_gi.url) # elif not self.options["auto_follow_redirects"]:
# Otherwise, follow away # follow = input("Follow redirect to %s? (y/n) " % new_gi.url)
# # Otherwise, follow away
else: else:
follow = "yes" follow = "yes"
if follow.strip().lower() not in ("y", "yes"): if follow.strip().lower() not in ("y", "yes"):
raise UserAbortException() raise UserAbortException()
self._debug("Following redirect to %s." % new_gi.url) previous_redirectors.add(url)
self._debug("This is consecutive redirect number %d." % len(self.previous_redirectors)) # if status == "31":
self.previous_redirectors.add(gi.url) # # Permanent redirect
if status == "31": # self.permanent_redirects[gi.url] = new_gi.url
# Permanent redirect return _fetch_gemini(newurl)
self.permanent_redirects[gi.url] = new_gi.url
return self._fetch_over_network(new_gi)
# Errors # Errors
elif status.startswith("4") or status.startswith("5"): elif status.startswith("4") or status.startswith("5"):
raise RuntimeError(meta) raise RuntimeError(meta)
# Client cert # Client cert
elif status.startswith("6"): # elif status.startswith("6"):
self._handle_cert_request(meta) # self._handle_cert_request(meta)
return self._fetch_over_network(gi) # return self._fetch_over_network(gi)
# Invalid status # Invalid status
elif not status.startswith("2"): elif not status.startswith("2"):
raise RuntimeError("Server returned undefined status code %s!" % status) raise RuntimeError("Server returned undefined status code %s!" % status)
# If we're here, this must be a success and there's a response body # If we're here, this must be a success and there's a response body
print("status :%s"%status)
assert status.startswith("2") assert status.startswith("2")
mime = meta mime = meta
# Read the response body over the network # Read the response body over the network
@ -634,14 +625,14 @@ def _fetch_gemini(url):
encoding declared in header!" % encoding) encoding declared in header!" % encoding)
else: else:
body = fbody body = fbody
gi.write_body(body,mime) cache = write_body(url,body,mime)
return gi return cache
def fetch(url): def fetch(url):
url = normalize_url(url) url = normalize_url(url)
path=None path=None
if "://" in url if "://" in url:
scheme = url.split("://")[0] scheme = url.split("://")[0]
if scheme not in standard_ports: if scheme not in standard_ports:
print("%s is not a supported protocol"%scheme) print("%s is not a supported protocol"%scheme)
@ -651,6 +642,8 @@ def fetch(url):
path=_fetch_gopher(url) path=_fetch_gopher(url)
elif scheme == "finger": elif scheme == "finger":
path=_fetch_finger(url) path=_fetch_finger(url)
elif scheme == "gemini":
patch=_fetch_gemini(url)
else: else:
print("scheme %s not implemented yet") print("scheme %s not implemented yet")
else: else:

View File

@ -21,10 +21,8 @@ __version__ = "1.9.2"
import argparse import argparse
import cmd import cmd
import codecs
import datetime import datetime
import fnmatch import fnmatch
import getpass
import glob import glob
import hashlib import hashlib
import io import io
@ -89,7 +87,6 @@ if os.path.exists(_old_config):
#if no XDG .local/share and not XDG .config, we use the old config #if no XDG .local/share and not XDG .config, we use the old config
if not os.path.exists(data_home) and os.path.exists(_old_config): if not os.path.exists(data_home) and os.path.exists(_old_config):
_DATA_DIR = _CONFIG_DIR _DATA_DIR = _CONFIG_DIR
_MAX_REDIRECTS = 5
_MAX_CACHE_SIZE = 10 _MAX_CACHE_SIZE = 10
_MAX_CACHE_AGE_SECS = 180 _MAX_CACHE_AGE_SECS = 180
@ -548,7 +545,6 @@ class GeminiItem():
def to_map_line(self): def to_map_line(self):
return "=> {} {}\n".format(self.url_mode(), self.get_page_title()) return "=> {} {}\n".format(self.url_mode(), self.get_page_title())
CRLF = '\r\n'
# Cheap and cheerful URL detector # Cheap and cheerful URL detector
def looks_like_url(word): def looks_like_url(word):
@ -611,7 +607,6 @@ class GeminiClient(cmd.Cmd):
self.marks = {} self.marks = {}
self.page_index = 0 self.page_index = 0
self.permanent_redirects = {} self.permanent_redirects = {}
self.previous_redirectors = set()
# Sync-only mode is restriced by design # Sync-only mode is restriced by design
self.visited_hosts = set() self.visited_hosts = set()
self.offline_only = False self.offline_only = False