WHYY: support figures in articles

This commit is contained in:
epiii2 2022-09-10 03:18:43 +00:00
parent 15ad286860
commit 4d28abcfa7
1 changed files with 8 additions and 1 deletions

View File

@ -686,7 +686,14 @@ class HandlerWHYYNews(Handler):
if self.matching_classes(child, rejects):
continue
try:
text = child.get_text().replace("\n", " ")
if child.name == "figure":
for img in child.find_all("img"):
imgsrc = self.href(img, "src")
for cap in child.find_all("figcaption"):
caption = cap.get_text()
text = f"=> {imgsrc} Figure: {caption}"
else:
text = child.get_text().replace("\n", " ")
except AttributeError:
continue
if child.name in ["h1", "h2", "h3", "h4", "h5", "h6"]: