sfeed/sfeed_update

146 lines
3.5 KiB
Plaintext
Raw Normal View History

#!/bin/sh
# update feeds, merge with old feeds.
# NOTE: assumes "sfeed_*" executables are in $PATH.
# defaults
sfeedpath="$HOME/.sfeed/feeds"
# load config (evaluate shellscript).
# loadconfig(configfile)
loadconfig() {
# allow to specify config via argv[1].
if [ "$1" != "" ]; then
# get absolute path of config file.
config=$(readlink -f "$1")
else
# default config location.
config="$HOME/.sfeed/sfeedrc"
fi
# config is loaded here to be able to override $sfeedpath or functions.
if [ -r "${config}" ]; then
. "${config}"
else
echo "Configuration file \"${config}\" does not exist or is not readable." >&2
echo "See sfeedrc.example for an example." >&2
exit 1
fi
}
# convert encoding from one encoding to another.
# convertencoding(from, to)
convertencoding() {
2015-01-04 22:45:51 +00:00
# if from != to
if [ "$1" != "" ] && [ "$2" != "" ] && [ "$1" != "$2" ]; then
iconv -cs -f "$1" -t "$2" 2> /dev/null
else
2015-01-04 22:45:51 +00:00
# else no convert, just output
cat
fi
}
# merge raw files: unique sort by id, title, link.
# merge(name, oldfile, newfile)
merge() {
sort -t ' ' -u -k6,6 -k2,2 -k3,3 "$2" "$3" 2>/dev/null
}
# filter fields.
# filter(name)
filter() {
cat
}
# order by timestamp (descending).
# order(name)
order() {
sort -t ' ' -k1rn,1
}
# fetch a feed via HTTP/HTTPS etc.
# fetchfeed(name, url, feedfile)
fetchfeed() {
if curl -L --max-redirs 0 -H "User-Agent:" -f -s -m 15 \
-z "$3" "$2" 2>/dev/null; then
printf " OK %s %s\n" "$(date +'%H:%M:%S')" "$1" >&2
else
printf "FAIL %s %s\n" "$(date +'%H:%M:%S')" "$1" >&2
fi
}
# fetch and parse feed.
# feed(name, feedurl, [basesiteurl], [encoding])
feed() {
(name="$1"
filename="$(printf '%s' "$1" | tr '/' '_')"
feedurl="$2"
basesiteurl="$3"
tmpfeedfile="${sfeedtmpdir}/${filename}"
tmpencfile=""
encoding="$4"
sfeedfile="${sfeedpath}/${filename}"
if [ ! "${encoding}" = "" ]; then
fetchfeed "${name}" "${feedurl}" "${sfeedfile}" | \
convertencoding "${encoding}" "utf-8"
else # detect encoding.
tmpencfile="${tmpfeedfile}.enc"
fetchfeed "${name}" "${feedurl}" "${sfeedfile}" > "${tmpencfile}"
detectenc=$(sfeed_xmlenc < "${tmpencfile}")
convertencoding "${detectenc}" "utf-8" < "${tmpencfile}"
fi | sfeed "${basesiteurl}" | filter "${name}" > "${tmpfeedfile}"
# get new data and merge with old.
sfeedfilenew="${sfeedpath}/${filename}.new"
# new feed data is non-empty.
if [ -s "${tmpfeedfile}" ]; then
# if file exists, merge
if [ -e "${sfeedfile}" ]; then
merge "${name}" "${sfeedfile}" "${tmpfeedfile}" | \
order "${name}" > "${sfeedfilenew}"
# overwrite old file with updated file
mv "${sfeedfilenew}" "${sfeedfile}"
else
merge "${name}" "/dev/null" "${tmpfeedfile}" | \
order "${name}" > "${sfeedfile}"
fi
fi) &
}
cleanup() {
# remove temporary files.
rm -rf "${sfeedtmpdir}"
}
interrupted() {
isinterrupted="1"
}
feeds() {
echo "Configuration file \"${config}\" is invalid or does not contain a \"feeds\" function." >&2
echo "See sfeedrc.example for an example." >&2
}
# kill whole current process group on ^C (SIGINT).
isinterrupted="0"
# SIGTERM: signal to terminate parent.
trap -- "interrupted" "TERM"
# SIGINT: kill all running childs >:D
trap -- "kill -TERM -$$" "INT"
# load config file.
loadconfig "$1"
# fetch feeds and store in temporary file.
sfeedtmpdir="$(mktemp -d '/tmp/sfeed_XXXXXX')"
# make sure path exists.
mkdir -p "${sfeedpath}"
# fetch feeds specified in config file.
feeds
2015-07-31 20:50:12 +00:00
# wait till all feeds are fetched (concurrently).
wait
# cleanup temporary files etc.
cleanup
# on SIGINT exit with 128 + signal (SIGINT = 2).
[ "${isinterrupted}" = "1" ] && exit 130
exit 0