2020-12-29 05:06:37 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
# web2text: a dead simple script to get the important stuff
|
|
|
|
# From a website.
|
|
|
|
|
|
|
|
# As usual the WTFPL applies
|
|
|
|
import codecs
|
|
|
|
import click
|
2020-12-31 02:46:12 +00:00
|
|
|
from snarfbot.snarf3k import snarf, slugify
|
2020-12-29 05:06:37 +00:00
|
|
|
|
|
|
|
@click.group()
|
|
|
|
def cli():
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
@cli.command()
|
|
|
|
@click.argument('uri')
|
|
|
|
@click.option('--save', is_flag=True)
|
|
|
|
def scrape(uri, save):
|
|
|
|
#
|
|
|
|
pack = snarf(uri)
|
|
|
|
if save:
|
|
|
|
svsname = slugify(pack[0]) + '.txt'
|
|
|
|
fp = codecs.open(svsname, "w", 'utf-8')
|
|
|
|
fp.write(pack[1])
|
|
|
|
fp.close()
|
|
|
|
else:
|
|
|
|
click.echo_via_pager(pack[1])
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
cli()
|