Http download which are not to be put in tour are now limited to 20Mo, to avoid downloading large unwanted files
This commit is contained in:
parent
484987e21f
commit
d28a3423d6
|
@ -1,7 +1,7 @@
|
||||||
# Offpunk History
|
# Offpunk History
|
||||||
|
|
||||||
## 0.5 - Unreleased
|
## 0.5 - Unreleased
|
||||||
- When syncing, deep http links with content above 20Mo are not downloaded.
|
- Http links with content above 20Mo are not downloaded during sync (except when explicitely requested)
|
||||||
- Improving subscriptions with more feedback and better detection
|
- Improving subscriptions with more feedback and better detection
|
||||||
- Avoid deprecated SSL methods (thanks Phoebos for the report)
|
- Avoid deprecated SSL methods (thanks Phoebos for the report)
|
||||||
- Fixed multiple crashes
|
- Fixed multiple crashes
|
||||||
|
|
25
offpunk.py
25
offpunk.py
|
@ -1455,7 +1455,8 @@ class GeminiClient(cmd.Cmd):
|
||||||
"http_proxy": None,
|
"http_proxy": None,
|
||||||
"https_everywhere": False,
|
"https_everywhere": False,
|
||||||
"archives_size" : 100,
|
"archives_size" : 100,
|
||||||
"history_size" : 100
|
"history_size" : 100,
|
||||||
|
"max_size_download " : 20,
|
||||||
}
|
}
|
||||||
global TERM_WIDTH
|
global TERM_WIDTH
|
||||||
TERM_WIDTH = self.options["width"]
|
TERM_WIDTH = self.options["width"]
|
||||||
|
@ -1486,7 +1487,8 @@ class GeminiClient(cmd.Cmd):
|
||||||
(hostname text, address text, fingerprint text,
|
(hostname text, address text, fingerprint text,
|
||||||
first_seen date, last_seen date, count integer)""")
|
first_seen date, last_seen date, count integer)""")
|
||||||
|
|
||||||
def _go_to_gi(self, gi, update_hist=True, check_cache=True, handle=True,readable=True):
|
def _go_to_gi(self, gi, update_hist=True, check_cache=True, handle=True,\
|
||||||
|
readable=True,limit_size=False):
|
||||||
"""This method might be considered "the heart of Offpunk".
|
"""This method might be considered "the heart of Offpunk".
|
||||||
Everything involved in fetching a gemini resource happens here:
|
Everything involved in fetching a gemini resource happens here:
|
||||||
sending the request over the network, parsing the response,
|
sending the request over the network, parsing the response,
|
||||||
|
@ -1545,9 +1547,9 @@ class GeminiClient(cmd.Cmd):
|
||||||
try:
|
try:
|
||||||
if gi.scheme in ("http", "https"):
|
if gi.scheme in ("http", "https"):
|
||||||
if self.support_http:
|
if self.support_http:
|
||||||
if self.sync_only:
|
if limit_size:
|
||||||
# Let’s cap automatic downloads to 20Mo
|
# Let’s cap automatic downloads to 20Mo
|
||||||
max_download = 20000000
|
max_download = int(self.options["max_size_download"])*1000000
|
||||||
else:
|
else:
|
||||||
max_download = None
|
max_download = None
|
||||||
gi = self._fetch_http(gi,max_length=max_download)
|
gi = self._fetch_http(gi,max_length=max_download)
|
||||||
|
@ -1648,9 +1650,10 @@ class GeminiClient(cmd.Cmd):
|
||||||
else:
|
else:
|
||||||
length = 0
|
length = 0
|
||||||
if max_length and length > max_length:
|
if max_length and length > max_length:
|
||||||
|
print("TEST : %s has been cancelled because its size is above limit"%gi.url)
|
||||||
response.close()
|
response.close()
|
||||||
err = "Size of %s is %s ko\n"%(gi.url,length/1000)
|
err = "Size of %s is %s Mo\n"%(gi.url,length/1000000)
|
||||||
err += "Offpunk only download automatically content under %s\n" %max_length
|
err += "Offpunk only download automatically content under %s Mo\n" %(max_length/1000000)
|
||||||
err += "To retrieve this content anyway, type 'reload'."
|
err += "To retrieve this content anyway, type 'reload'."
|
||||||
gi.set_error(err)
|
gi.set_error(err)
|
||||||
return gi
|
return gi
|
||||||
|
@ -3405,16 +3408,18 @@ Argument : duration of cache validity (in seconds)."""
|
||||||
#Did we already had a cache (even an old one) ?
|
#Did we already had a cache (even an old one) ?
|
||||||
isnew = not gitem.is_cache_valid()
|
isnew = not gitem.is_cache_valid()
|
||||||
print("%s [%s/%s] Fetch "%(strin,count[0],count[1]),gitem.url,end=endline)
|
print("%s [%s/%s] Fetch "%(strin,count[0],count[1]),gitem.url,end=endline)
|
||||||
self._go_to_gi(gitem,update_hist=False)
|
#If not saving to tour, then we should limit download size
|
||||||
|
limit = not savetotour
|
||||||
|
self._go_to_gi(gitem,update_hist=False,limit_size=limit)
|
||||||
if savetotour and isnew and gitem.is_cache_valid():
|
if savetotour and isnew and gitem.is_cache_valid():
|
||||||
#we add to the next tour only if we managed to cache
|
#we add to the next tour only if we managed to cache
|
||||||
#the ressource
|
#the ressource
|
||||||
add_to_tour(gitem)
|
add_to_tour(gitem)
|
||||||
#Now, recursive call, even if we didn’t refresh the cache
|
#Now, recursive call, even if we didn’t refresh the cache
|
||||||
if depth > 0:
|
if depth > 0:
|
||||||
#we only savetotour at the first level of recursion
|
#we should only savetotour at the first level of recursion
|
||||||
if depth > 1:
|
# The code for this was removed so, currently, we savetotour
|
||||||
savetotour=False
|
# at every level of recursion.
|
||||||
links = gitem.get_links()
|
links = gitem.get_links()
|
||||||
subcount = [0,len(links)]
|
subcount = [0,len(links)]
|
||||||
d = depth - 1
|
d = depth - 1
|
||||||
|
|
Loading…
Reference in New Issue