diff --git a/CHANGELOG b/CHANGELOG index ac9d065..5503cc8 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,8 +3,10 @@ ## 2.0-beta2 - unreleased Changes since beta1 - "--sync" can optionnaly take some lists as arguments, in order to make for specific sync +- improves handling of base64 images - fixes gophermap being considered as gemtext files - fixes opening mailto links +- attempt at hiding XMLparsedAsHTMLWarning from BS4 library ## 2.0-beta1 - September 05th 2023 This is an an experimental release. Bug reports and feedbacks are welcome on the offpunk-devel list. diff --git a/ansicat.py b/ansicat.py index 7a5e62f..535c923 100755 --- a/ansicat.py +++ b/ansicat.py @@ -13,6 +13,7 @@ import fnmatch import netcache import offthemes from offutils import run,term_width,is_local,looks_like_base64 +import base64 from offutils import _DATA_DIR try: from readability import Document @@ -982,6 +983,7 @@ class HtmlRenderer(AbstractRenderer): #4 followings line are there to translate the URL into cache path img = netcache.get_cache_path(imgurl) if imgdata: + os.makedirs(os.path.dirname(img), exist_ok=True) with open(img,"wb") as cached: cached.write(base64.b64decode(imgdata)) cached.close() @@ -1116,10 +1118,10 @@ class HtmlRenderer(AbstractRenderer): else: text += "[IMG]" if src: - links.append(src+" "+text) if not mode in self.images: self.images[mode] = [] - abs_url = urllib.parse.urljoin(self.url, src) + abs_url,data = looks_like_base64(src,self.url) + links.append(abs_url+" "+text) self.images[mode].append(abs_url) link_id = " [%s]"%(len(links)+startlinks) r.add_block(ansi_img) diff --git a/offutils.py b/offutils.py index 603901b..dd716cb 100644 --- a/offutils.py +++ b/offutils.py @@ -136,7 +136,11 @@ def looks_like_base64(src,baseurl): if src and src.startswith("data:image/"): if ";base64," in src: splitted = src.split(";base64,") - extension = splitted[0].strip("data:image/")[:3] + #splitted[0] is something like data:image/jpg + if "/" in splitted[0]: + extension = splitted[0].split("/")[1] + else: + extension = "data" imgdata = splitted[1] imgname = imgdata[:20] + "." + extension imgurl = urllib.parse.urljoin(baseurl, imgname)