Http download which are not to be put in tour are now limited to 20Mo, to avoid downloading large unwanted files

This commit is contained in:
Lionel Dricot 2022-03-03 16:16:32 +01:00
parent 484987e21f
commit d28a3423d6
2 changed files with 16 additions and 11 deletions

View File

@ -1,7 +1,7 @@
# Offpunk History
## 0.5 - Unreleased
- When syncing, deep http links with content above 20Mo are not downloaded.
- Http links with content above 20Mo are not downloaded during sync (except when explicitely requested)
- Improving subscriptions with more feedback and better detection
- Avoid deprecated SSL methods (thanks Phoebos for the report)
- Fixed multiple crashes

View File

@ -1455,7 +1455,8 @@ class GeminiClient(cmd.Cmd):
"http_proxy": None,
"https_everywhere": False,
"archives_size" : 100,
"history_size" : 100
"history_size" : 100,
"max_size_download " : 20,
}
global TERM_WIDTH
TERM_WIDTH = self.options["width"]
@ -1486,7 +1487,8 @@ class GeminiClient(cmd.Cmd):
(hostname text, address text, fingerprint text,
first_seen date, last_seen date, count integer)""")
def _go_to_gi(self, gi, update_hist=True, check_cache=True, handle=True,readable=True):
def _go_to_gi(self, gi, update_hist=True, check_cache=True, handle=True,\
readable=True,limit_size=False):
"""This method might be considered "the heart of Offpunk".
Everything involved in fetching a gemini resource happens here:
sending the request over the network, parsing the response,
@ -1545,9 +1547,9 @@ class GeminiClient(cmd.Cmd):
try:
if gi.scheme in ("http", "https"):
if self.support_http:
if self.sync_only:
if limit_size:
# Lets cap automatic downloads to 20Mo
max_download = 20000000
max_download = int(self.options["max_size_download"])*1000000
else:
max_download = None
gi = self._fetch_http(gi,max_length=max_download)
@ -1648,9 +1650,10 @@ class GeminiClient(cmd.Cmd):
else:
length = 0
if max_length and length > max_length:
print("TEST :%s has been cancelled because its size is above limit"%gi.url)
response.close()
err = "Size of %s is %s ko\n"%(gi.url,length/1000)
err += "Offpunk only download automatically content under %s\n" %max_length
err = "Size of %s is %s Mo\n"%(gi.url,length/1000000)
err += "Offpunk only download automatically content under %s Mo\n" %(max_length/1000000)
err += "To retrieve this content anyway, type 'reload'."
gi.set_error(err)
return gi
@ -3405,16 +3408,18 @@ Argument : duration of cache validity (in seconds)."""
#Did we already had a cache (even an old one) ?
isnew = not gitem.is_cache_valid()
print("%s [%s/%s] Fetch "%(strin,count[0],count[1]),gitem.url,end=endline)
self._go_to_gi(gitem,update_hist=False)
#If not saving to tour, then we should limit download size
limit = not savetotour
self._go_to_gi(gitem,update_hist=False,limit_size=limit)
if savetotour and isnew and gitem.is_cache_valid():
#we add to the next tour only if we managed to cache
#the ressource
add_to_tour(gitem)
#Now, recursive call, even if we didnt refresh the cache
if depth > 0:
#we only savetotour at the first level of recursion
if depth > 1:
savetotour=False
#we should only savetotour at the first level of recursion
# The code for this was removed so, currently, we savetotour
# at every level of recursion.
links = gitem.get_links()
subcount = [0,len(links)]
d = depth - 1