tilde-projects/Code/irc/tumblr.py

51 lines
1.6 KiB
Python

#!/usr/bin/python3
import urllib
from bs4 import BeautifulSoup
import random
import re
def tumble(url):
# Find the max pages
soup = BeautifulSoup(urllib.request.urlopen(url).read(), "html.parser")
pages = soup.findAll("span", "page-numbers")[0].text.split("/")[
1
] # this could totally fail several ways
tries = 3
while True:
try:
page = random.randrange(1, int(pages) + 1)
# Parse a page
soup = BeautifulSoup(
urllib.request.urlopen(url + "/page/" + str(page)).read(), "html.parser"
)
article = random.choice(soup.findAll("article"))
quote = article.find("blockquote").text.replace("\n", "")
if len(article.find("footer").findAll("ul")) > 1:
quote += re.sub(
"\n+", " ", article.find("footer").findAll("ul")[0].text
)
# the hash tags
quote += (
"("
+ re.sub("\n+", " ", article.find("footer").findAll("ul")[1].text)
+ ")"
)
# and the date and notes
else:
quote += (
"("
+ re.sub("\n+", " ", article.find("footer").findAll("ul")[0].text)
+ ")"
)
# just the date and notes
return quote
except: # sometimes we fail. let's retry a few times
if tries == 0:
return ""
else:
tries -= 1
break