basic --synconly mode is now working

This commit is contained in:
Lionel Dricot 2021-12-10 11:27:48 +01:00
parent 6586f0834a
commit 9da4fd150b
2 changed files with 37 additions and 28 deletions

View File

@ -1,17 +1,20 @@
# AV-98 # AV-98
This fork is an experiment by Ploum to add offline capabilities to AV-98. This fork is an experiment by Ploum ( gemini://rawtext.club/~ploum) to add offline capabilities to AV-98.
In AV-98, use the command "offline" to switch between offline/online. In AV-98, use the command "offline" to switch between offline/online.
While offline, only content cached in .cache/av-98/ is accessed. While offline, only content cached in .cache/av-98/ is accessed.
Use "av-98.py --synconly" to build a cache containing your bookmarks and all links in your bookmarks. It is quite slow, be patient.
* FIXME: doesnt handle MIME other than text/gemini * FIXME: doesnt handle MIME other than text/gemini
* FIXME: consider root file is always index.gmi * FIXME: consider root file is always index.gmi
* FIXME: bug with "submit" form, I dont know why * FIXME: if a file exists in the cache where it should have been a folder, it fails (should instead remove the file)
* FIXME: if a file exist where it should have been a folder, it fails (should instead remove the file) * FIXME: xml files are sometimes opened when in --synconly
* FIXME: certificates error are not handled in --synconly
* TODO: automatically populate the cache, not only while browsing.
* TODO: handle request done offline and retrieve them later * TODO: handle request done offline and retrieve them later
* TODO: know when to refresh the cache * TODO: know when to refresh the cache instead of always downloading everything.
This is a fork of the original [AV-98](https://tildegit.org/solderpunk/AV-98) This is a fork of the original [AV-98](https://tildegit.org/solderpunk/AV-98)
by Solderpunk. by Solderpunk.

52
av98.py
View File

@ -296,6 +296,7 @@ class GeminiClient(cmd.Cmd):
"debug" : False, "debug" : False,
"ipv6" : True, "ipv6" : True,
"timeout" : 600, "timeout" : 600,
"short_timeout" : 5,
"width" : 80, "width" : 80,
"auto_follow_redirects" : True, "auto_follow_redirects" : True,
"gopher_proxy" : None, "gopher_proxy" : None,
@ -342,7 +343,7 @@ class GeminiClient(cmd.Cmd):
and calling a handler program, and updating the history.""" and calling a handler program, and updating the history."""
# Don't try to speak to servers running other protocols # Don't try to speak to servers running other protocols
if gi.scheme in ("http", "https"): if gi.scheme in ("http", "https") and not self.sync_only:
if not self.options.get("http_proxy",None): if not self.options.get("http_proxy",None):
webbrowser.open_new_tab(gi.url) webbrowser.open_new_tab(gi.url)
return return
@ -352,7 +353,8 @@ class GeminiClient(cmd.Cmd):
if resp.strip().lower() in ("n","no"): if resp.strip().lower() in ("n","no"):
webbrowser.open_new_tab(gi.url) webbrowser.open_new_tab(gi.url)
return return
elif gi.scheme == "gopher" and not self.options.get("gopher_proxy", None): elif gi.scheme == "gopher" and not self.options.get("gopher_proxy", None)\
and not self.sync_only:
print("""AV-98 does not speak Gopher natively. print("""AV-98 does not speak Gopher natively.
However, you can use `set gopher_proxy hostname:port` to tell it about a However, you can use `set gopher_proxy hostname:port` to tell it about a
Gopher-to-Gemini proxy (such as a running Agena instance), in which case Gopher-to-Gemini proxy (such as a running Agena instance), in which case
@ -369,7 +371,7 @@ you'll be able to transparently follow links to Gopherspace!""")
else: else:
print("Sorry, that file does not exist.") print("Sorry, that file does not exist.")
return return
elif gi.scheme not in ("gemini", "gopher"): elif gi.scheme not in ("gemini", "gopher") and not self.sync_only:
print("Sorry, no support for {} links.".format(gi.scheme)) print("Sorry, no support for {} links.".format(gi.scheme))
return return
@ -413,16 +415,19 @@ you'll be able to transparently follow links to Gopherspace!""")
print("ERROR: DNS error!") print("ERROR: DNS error!")
elif isinstance(err, ConnectionRefusedError): elif isinstance(err, ConnectionRefusedError):
self.log["refused_connections"] += 1 self.log["refused_connections"] += 1
print("ERROR: Connection refused!") print("ERROR1: Connection refused!")
elif isinstance(err, ConnectionResetError): elif isinstance(err, ConnectionResetError):
self.log["reset_connections"] += 1 self.log["reset_connections"] += 1
print("ERROR: Connection reset!") print("ERROR2: Connection reset!")
elif isinstance(err, (TimeoutError, socket.timeout)): elif isinstance(err, (TimeoutError, socket.timeout)):
self.log["timeouts"] += 1 self.log["timeouts"] += 1
print("""ERROR: Connection timed out! if not self.sync_only:
Slow internet connection? Use 'set timeout' to be more patient.""") print("""ERROR3: Connection timed out!
Slow internet connection? Use 'set timeout' to be more patient.""")
else: else:
print("ERROR: " + str(err)) # we fail silently when sync_only
if not self.sync_only:
print("ERROR4: " + str(err))
return return
# Pass file to handler, unless we were asked not to # Pass file to handler, unless we were asked not to
@ -655,7 +660,11 @@ you'll be able to transparently follow links to Gopherspace!""")
for address in addresses: for address in addresses:
self._debug("Connecting to: " + str(address[4])) self._debug("Connecting to: " + str(address[4]))
s = socket.socket(address[0], address[1]) s = socket.socket(address[0], address[1])
s.settimeout(self.options["timeout"]) if self.sync_only:
timeout = self.options["short_timeout"]
else:
timeout = self.options["timeout"]
s.settimeout(timeout)
s = context.wrap_socket(s, server_hostname = gi.host) s = context.wrap_socket(s, server_hostname = gi.host)
try: try:
s.connect(address[4]) s.connect(address[4])
@ -1720,22 +1729,19 @@ def main():
# Endless interpret loop # Endless interpret loop
if args.synconly: if args.synconly:
gc.onecmd("sync_only") gc.onecmd("sync_only")
print("TODO : explore bms until depth N")
gc.onecmd("bm") gc.onecmd("bm")
#gc.onecmd("t *")
# only one URL fetched while debugging # only one URL fetched while debugging
gc.onecmd("t 1 2") #gc.onecmd("t 1")
# root urls to explore with depth=1 original_lookup = gc.lookup
nb_urls = len(gc.waypoints) to_visit = gc.lookup
i = 0 for j in original_lookup:
while len(gc.waypoints) > 0: print("Caching: ",j.url)
gc.onecmd("t") gc.onecmd("go %s" %j.url)
gc.onecmd("url") # Depth = 1
if i < nb_urls: temp_lookup = gc.lookup
# FIXME : find when it crashes for k in temp_lookup:
print(i, "catching from url:") print(" -> ",k.url)
#gc.onecmd("t *") gc.onecmd("go %s" %k.url)
i += 1
gc.onecmd("blackbox") gc.onecmd("blackbox")
else: else:
while True: while True: