Improve handling of base64 images

Also makes images links absolute in the rendering
This commit is contained in:
Lionel Dricot 2023-09-13 21:07:32 +02:00
parent 7a606b71cf
commit ea82e81d75
3 changed files with 11 additions and 3 deletions

View File

@ -3,8 +3,10 @@
## 2.0-beta2 - unreleased ## 2.0-beta2 - unreleased
Changes since beta1 Changes since beta1
- "--sync" can optionnaly take some lists as arguments, in order to make for specific sync - "--sync" can optionnaly take some lists as arguments, in order to make for specific sync
- improves handling of base64 images
- fixes gophermap being considered as gemtext files - fixes gophermap being considered as gemtext files
- fixes opening mailto links - fixes opening mailto links
- attempt at hiding XMLparsedAsHTMLWarning from BS4 library
## 2.0-beta1 - September 05th 2023 ## 2.0-beta1 - September 05th 2023
This is an an experimental release. Bug reports and feedbacks are welcome on the offpunk-devel list. This is an an experimental release. Bug reports and feedbacks are welcome on the offpunk-devel list.

View File

@ -13,6 +13,7 @@ import fnmatch
import netcache import netcache
import offthemes import offthemes
from offutils import run,term_width,is_local,looks_like_base64 from offutils import run,term_width,is_local,looks_like_base64
import base64
from offutils import _DATA_DIR from offutils import _DATA_DIR
try: try:
from readability import Document from readability import Document
@ -982,6 +983,7 @@ class HtmlRenderer(AbstractRenderer):
#4 followings line are there to translate the URL into cache path #4 followings line are there to translate the URL into cache path
img = netcache.get_cache_path(imgurl) img = netcache.get_cache_path(imgurl)
if imgdata: if imgdata:
os.makedirs(os.path.dirname(img), exist_ok=True)
with open(img,"wb") as cached: with open(img,"wb") as cached:
cached.write(base64.b64decode(imgdata)) cached.write(base64.b64decode(imgdata))
cached.close() cached.close()
@ -1116,10 +1118,10 @@ class HtmlRenderer(AbstractRenderer):
else: else:
text += "[IMG]" text += "[IMG]"
if src: if src:
links.append(src+" "+text)
if not mode in self.images: if not mode in self.images:
self.images[mode] = [] self.images[mode] = []
abs_url = urllib.parse.urljoin(self.url, src) abs_url,data = looks_like_base64(src,self.url)
links.append(abs_url+" "+text)
self.images[mode].append(abs_url) self.images[mode].append(abs_url)
link_id = " [%s]"%(len(links)+startlinks) link_id = " [%s]"%(len(links)+startlinks)
r.add_block(ansi_img) r.add_block(ansi_img)

View File

@ -136,7 +136,11 @@ def looks_like_base64(src,baseurl):
if src and src.startswith("data:image/"): if src and src.startswith("data:image/"):
if ";base64," in src: if ";base64," in src:
splitted = src.split(";base64,") splitted = src.split(";base64,")
extension = splitted[0].strip("data:image/")[:3] #splitted[0] is something like data:image/jpg
if "/" in splitted[0]:
extension = splitted[0].split("/")[1]
else:
extension = "data"
imgdata = splitted[1] imgdata = splitted[1]
imgname = imgdata[:20] + "." + extension imgname = imgdata[:20] + "." + extension
imgurl = urllib.parse.urljoin(baseurl, imgname) imgurl = urllib.parse.urljoin(baseurl, imgname)