tilde-projects/Code/irc/tumblr.py

24 lines
979 B
Python

import urllib
from bs4 import BeautifulSoup
import random
import re
def tumble(url):
#Find the max pages
soup = BeautifulSoup(urllib.urlopen(url).read(), 'html.parser')
pages = soup.findAll('span', 'page-numbers')[0].text.split('/')[1] #this could totally fail several ways
page = random.randrange(1, int(pages)+1)
#Parse a page
soup = BeautifulSoup(urllib.urlopen(url + '/page/' + str(page)).read(), 'html.parser')
article = random.choice(soup.findAll('article'))
quote = article.find('blockquote').text.replace('\n','');
if len(article.find('footer').findAll('ul')) > 1:
quote += re.sub('\n+', ' ', article.find('footer').findAll('ul')[0].text); #the hash tags
quote += '(' + re.sub('\n+', ' ', article.find('footer').findAll('ul')[1].text) + ')'; #and the date and notes
else:
quote += '(' + re.sub('\n+', ' ', article.find('footer').findAll('ul')[0].text) + ')'; #just the date and notes
return quote.encode('ascii', 'ignore')