bookmobile/bookmobile

150 lines
3.9 KiB
Fish
Executable File

#!/usr/bin/env fish
# Lee2sman 2021
# Defaults
set theme themes/medium.css
set format html
set output .
function check_prereq
if not command -sq pandoc
echo "Bookmobile requires pandoc but it's not installed. Exiting."
exit 1
end
end
function usage
echo "Bookmobile: Simple command line script to download articles for offline reading as html, epub or markdown. Saves a simplified html document to current directory as default."
echo ""
echo "Optionally, you can specify a theme, format, filename and output directory"
echo ""
echo "Usage: ./bookmobile url [ --theme THEME ] [ --format FORMAT ] [ --name FILENAME ] [ --output OUTPUT ]"
echo " [-h | --help ]"
echo ""
echo "Options:"
echo ""
echo " --help"
echo " -h Display usage info"
echo ""
echo " --theme THEME"
echo " -t THEME Set theme for reader format. Default: medium"
echo " Theme options: "
for theme in themes/*.css
echo " " (string replace -r '\.[^\.]+$' '' (basename $theme))
end
echo ""
echo " --format FORMAT"
echo " -f FORMAT Set file format (html, reader, markdown, epub)"
echo " default: html"
echo ""
echo " --name NAME"
echo " -n NAME Set specified file name instead of defaulting to article title"
echo ""
echo " --output OUTPUT"
echo " -o OUTPUT Set output location"
echo " default: current directory"
echo ""
end
function check_args
argparse h/help t/theme= f/format= n/name= o/output= -- $argv
or exit 1
if set -q _flag_help
usage
exit
end
if set -q _flag_t
set theme themes/$_flag_theme.css
end
if set -q _flag_f
set format $_flag_format
end
if set -q _flag_n
set -g name $_flag_name
end
if set -q _flag_o
set output $_flag_output
end
end
function set_metadata
set -g url $argv[1]
set -g title (readable $url --properties "title")
if test -n "$name" #check if title specified by user
set -g title $name
else if test -z "$title" #if title was empty, set title to date time
set -g title (date "+%Y-%m-%d_%H:%M:%S")
else
#clean up title
#stop a title at first : or - or • or | or (
set -g title (string split ":" $title)[1]
set -g title (string split " -" $title)[1] #split if - used to break up a title, not between 2 words
set -g title (string split "|" $title)[1]
set -g title (string split "•" $title)[1]
set -g title (string split "(" $title)[1]
#set -g title (echo $temp_title | sed 's/[:(-•|]/\n/g')[1]
#remove leading and trailing spaces
set -g title (string trim $title)
#convert remaining spaces to _ underscores
set -g title (string replace -a ' ' '_' $title)
#remove ,'? from title
set -g title (string replace -r -a '[?,\'\"]' '' $title)
#limit length to 35 characters (arbitrarily selected but a prudent length)
set -g title (string sub --length 35 $title)
end
end
function process_article
# pulldown html page
readable $url --output $output/$title.html
switch $format
case html
#just a placeholder
if test (string match 'http*' $url )
echo "Html saved to " $output
else
echo "Error. Html requires URLs beginning http/https:"
end
case reader
pandoc -s $output/$title.html -c $theme -o $output/$title-article.html
rm -f $output/$title.html
echo "Reader file saved to " $output " with theme " $theme
case markdown
pandoc -f html -t markdown_github-raw_html -o $output/$title.md $output/$title.html
rm -f $output/$title.html
echo "Markdown file saved to " $output
case epub
pandoc -s $output/$title.html -f html -t epub -o $output/$title.epub
rm -f $output/$title.html
echo "Epub saved to " $output
case '*'
echo "Invalid output format"
end
end
check_prereq
check_args $argv
set_metadata $argv
process_article