From 05da20801cf87fa12b2127ca626b154d79a3146a Mon Sep 17 00:00:00 2001 From: Lionel Dricot Date: Thu, 24 Mar 2022 22:00:04 +0100 Subject: [PATCH] Streaming servers without content-length are closed after 5Mo of download --- CHANGELOG | 1 + offpunk.py | 28 +++++++++++++++++++++------- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index ad0cd27..1fe1e0c 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,7 @@ # Offpunk History ## 1.3 - Unreleased +- Streaming URL without valid content-length are now closed after 5Mo of download (thanks to Eoin Carney for reporting the issue) - Fixed a crash when the cache is already a dir inside a dir. ## 1.2 - March 24th 2022 diff --git a/offpunk.py b/offpunk.py index 8560fe6..7fb4bac 100755 --- a/offpunk.py +++ b/offpunk.py @@ -1808,7 +1808,7 @@ class GeminiClient(cmd.Cmd): "tls_mode" : "tofu", "archives_size" : 200, "history_size" : 200, - "max_size_download" : 20, + "max_size_download" : 10, } global TERM_WIDTH TERM_WIDTH = self.options["width"] @@ -1969,9 +1969,17 @@ class GeminiClient(cmd.Cmd): print("You can use the ! command to specify another handler program or pipeline.") def _fetch_http(self,gi,max_length=None): + def set_error(item,length,max_length): + err = "Size of %s is %s Mo\n"%(item.url,length) + err += "Offpunk only download automatically content under %s Mo\n" %(max_length/1000000) + err += "To retrieve this content anyway, type 'reload'." + item.set_error(err) + return item header = {} header["User-Agent"] = "Offpunk browser v%s"%_VERSION - with requests.get(gi.url,headers=header, stream=True) as response: + with requests.get(gi.url,headers=header, stream=True,timeout=5) as response: + #print("This is header for %s"%gi.url) + #print(response.headers) if "content-type" in response.headers: mime = response.headers['content-type'] else: @@ -1982,11 +1990,17 @@ class GeminiClient(cmd.Cmd): length = 0 if max_length and length > max_length: response.close() - err = "Size of %s is %s Mo\n"%(gi.url,length/1000000) - err += "Offpunk only download automatically content under %s Mo\n" %(max_length/1000000) - err += "To retrieve this content anyway, type 'reload'." - gi.set_error(err) - return gi + return set_error(gi,str(length/1000000),max_length) + elif max_length and length == 0: + body = b'' + for r in response.iter_content(): + body += r + #We divide max_size for streamed content + #in order to catch them faster + if sys.getsizeof(body) > max_length/2: + response.close() + return set_error(gi,"streaming",max_length) + response.close() else: body = response.content response.close()