Improve handling of base64 images

Also makes images links absolute in the rendering
This commit is contained in:
Lionel Dricot 2023-09-13 21:07:32 +02:00
parent 7a606b71cf
commit ea82e81d75
3 changed files with 11 additions and 3 deletions

View File

@ -3,8 +3,10 @@
## 2.0-beta2 - unreleased
Changes since beta1
- "--sync" can optionnaly take some lists as arguments, in order to make for specific sync
- improves handling of base64 images
- fixes gophermap being considered as gemtext files
- fixes opening mailto links
- attempt at hiding XMLparsedAsHTMLWarning from BS4 library
## 2.0-beta1 - September 05th 2023
This is an an experimental release. Bug reports and feedbacks are welcome on the offpunk-devel list.

View File

@ -13,6 +13,7 @@ import fnmatch
import netcache
import offthemes
from offutils import run,term_width,is_local,looks_like_base64
import base64
from offutils import _DATA_DIR
try:
from readability import Document
@ -982,6 +983,7 @@ class HtmlRenderer(AbstractRenderer):
#4 followings line are there to translate the URL into cache path
img = netcache.get_cache_path(imgurl)
if imgdata:
os.makedirs(os.path.dirname(img), exist_ok=True)
with open(img,"wb") as cached:
cached.write(base64.b64decode(imgdata))
cached.close()
@ -1116,10 +1118,10 @@ class HtmlRenderer(AbstractRenderer):
else:
text += "[IMG]"
if src:
links.append(src+" "+text)
if not mode in self.images:
self.images[mode] = []
abs_url = urllib.parse.urljoin(self.url, src)
abs_url,data = looks_like_base64(src,self.url)
links.append(abs_url+" "+text)
self.images[mode].append(abs_url)
link_id = " [%s]"%(len(links)+startlinks)
r.add_block(ansi_img)

View File

@ -136,7 +136,11 @@ def looks_like_base64(src,baseurl):
if src and src.startswith("data:image/"):
if ";base64," in src:
splitted = src.split(";base64,")
extension = splitted[0].strip("data:image/")[:3]
#splitted[0] is something like data:image/jpg
if "/" in splitted[0]:
extension = splitted[0].split("/")[1]
else:
extension = "data"
imgdata = splitted[1]
imgname = imgdata[:20] + "." + extension
imgurl = urllib.parse.urljoin(baseurl, imgname)