tamias/tamias.sh

116 lines
2.6 KiB
Bash
Executable File

#!/bin/sh -e
# TODO: try to get lang from the original document since we download it anyway
# TODO: switch for not downloading the original too
# TODO: support multiple URLs in one command
# TODO: optionally minify HTML
# TODO: download required resources too - images...
# TODO: do not create a new file if the name is the same and the content is the same too
# TODO: cache download date/time too
# TODO: extract txt-only/markdown
# TODO: add an option to use a local CSS file
# TODO: add an option to embed a CSS file inside the HTML
# TODO: check if dependencies are installed
# TODO: replace readability-cli with something better
# dependencies:
# - curl
# - readability-cli (https://git.tilde.institute/paper/readability-cli)
usage() {
echo 'usage: tamias [-c cachedir] [-l lang] <url>'
}
CACHEDIR="$HOME/tamias/"
lang=en
while true; do
case "$1" in
-l)
if [ -z "$2" ]; then
usage
exit 1
fi
lang="$2"
shift
shift
;;
-c)
if [ -z "$2" ]; then
usage
exit 1
fi
CACHEDIR="$2"
shift
shift
;;
-h)
usage
exit 0
;;
*)
url="$1"
break
;;
esac
done
if [ -z "$url" ]; then
usage
exit 1
fi
getname() {
url="$1"
#echo "$url"
name=$(echo "$url" | rev | cut -d'/' -f 1 | rev)
if [ "$name" = "" ]; then
name=$(echo "$url" | rev | cut -d'/' -f 2 | rev)
fi
originalname="$name"
i=1
while :; do
if [ -e "$name" ] || [ -e "$name.orig" ]; then
name="$originalname$i"
else
break
fi
i=$(echo "$i+1" | bc)
done
echo "$name"
}
mkdir -p "$CACHEDIR"
cd "$CACHEDIR"
name=$(getname "$url")
echo "$name"
# download original
curl -s "$url" > "$name.orig"
# use readability on it
readable=$(readability-cli "$url")
images=$(echo "$readable" | pup img 'attr{src}' | sort -u)
echo "$images"
for image in $images; do
imgname="$(getname "$image")"
echo "$imgname"
curl -s "$image" > "$imgname"
done
echo '<!DOCTYPE html>
<html lang="'"$lang"'">
<head>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta charset="UTF-8">
<meta name="url" content="'"$url"'">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/kognise/water.css@latest/dist/dark.min.css">
</head>
<body class="container">
' > "$name"
echo "$readable" >> "$name"
echo '
</body>
</html>' >> "$name"