tilde-projects/Code/irc/tumblr.py

import urllib
from bs4 import BeautifulSoup
import random
import re

def tumble(url):
  #Find the max pages
  soup = BeautifulSoup(urllib.urlopen(url).read(), 'html.parser')
  pages = soup.findAll('span', 'page-numbers')[0].text.split('/')[1] #this could totally fail several ways
  page = random.randrange(1, int(pages)+1)

  #Parse a page
  soup = BeautifulSoup(urllib.urlopen(url + '/page/' + str(page)).read(), 'html.parser')
  article = random.choice(soup.findAll('article'))
  quote = article.find('blockquote').text.replace('\n','');
  if len(article.find('footer').findAll('ul')) > 1:
       quote += re.sub('\n+', ' ', article.find('footer').findAll('ul')[0].text); #the hash tags
       quote += '(' + re.sub('\n+', ' ', article.find('footer').findAll('ul')[1].text) + ')'; #and the date and notes
  else:
       quote += '(' + re.sub('\n+', ' ', article.find('footer').findAll('ul')[0].text) + ')'; #just the date and notes

  return quote.encode('ascii', 'ignore')