#!/usr/bin/env python3 # web2text: a dead simple script to get the important stuff # From a website. # As usual the WTFPL applies import codecs import click from snarfbot.snarf3k import snarf, slugify from snarfbot.linkview import textview, DEFAULT_USER_AGENT from io import StringIO @click.group() def cli(): pass @cli.command() @click.argument('uri') @click.option('--save', is_flag=True) def scrape(uri, save): """ Attempt an elagent scraping of the page. Save results to a file if desired """ pack = snarf(uri) if save: svsname = slugify(pack[0]) + '.txt' fp = codecs.open(svsname, "w", 'utf-8') fp.write(pack[1]) fp.close() else: click.echo_via_pager(pack[1]) @cli.command() @click.argument('uri') @click.option('--user-agent', default=DEFAULT_USER_AGENT, show_default=True) def uglydump(uri, user_agent): """ Less Elagent but more functional page scrape dump to stdout """ dump = StringIO() l = textview(uri, user_agent=user_agent) for s in l: dump.write(s) click.echo(dump.getvalue()) if __name__ == '__main__': cli()