2012-08-03 10:03:17 +00:00
|
|
|
#!/bin/sh
|
|
|
|
# update feeds, merge with old feeds.
|
2014-03-31 20:46:58 +00:00
|
|
|
# NOTE: assumes "sfeed_*" executables are in $PATH.
|
2012-08-03 10:03:17 +00:00
|
|
|
|
|
|
|
# defaults
|
2015-08-22 14:52:46 +00:00
|
|
|
sfeedpath="$HOME/.sfeed/feeds"
|
2012-08-03 10:03:17 +00:00
|
|
|
|
|
|
|
# load config (evaluate shellscript).
|
|
|
|
# loadconfig(configfile)
|
|
|
|
loadconfig() {
|
|
|
|
# allow to specify config via argv[1].
|
2018-09-28 15:11:56 +00:00
|
|
|
if [ "$1" != "" ]; then
|
2012-08-03 10:03:17 +00:00
|
|
|
# get absolute path of config file.
|
|
|
|
config=$(readlink -f "$1")
|
|
|
|
else
|
|
|
|
# default config location.
|
|
|
|
config="$HOME/.sfeed/sfeedrc"
|
|
|
|
fi
|
|
|
|
|
2018-09-28 15:11:56 +00:00
|
|
|
# config is loaded here to be able to override $sfeedpath or functions.
|
2015-06-20 22:15:37 +00:00
|
|
|
if [ -r "${config}" ]; then
|
|
|
|
. "${config}"
|
2012-08-03 10:03:17 +00:00
|
|
|
else
|
2015-06-20 22:15:37 +00:00
|
|
|
echo "Configuration file \"${config}\" does not exist or is not readable." >&2
|
2012-08-03 10:03:17 +00:00
|
|
|
echo "See sfeedrc.example for an example." >&2
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2014-03-31 20:46:58 +00:00
|
|
|
# convert encoding from one encoding to another.
|
|
|
|
# convertencoding(from, to)
|
|
|
|
convertencoding() {
|
2015-01-04 22:45:51 +00:00
|
|
|
# if from != to
|
2018-09-28 15:11:56 +00:00
|
|
|
if [ "$1" != "" ] && [ "$2" != "" ] && [ "$1" != "$2" ]; then
|
2014-03-31 20:46:58 +00:00
|
|
|
iconv -cs -f "$1" -t "$2" 2> /dev/null
|
|
|
|
else
|
2015-01-04 22:45:51 +00:00
|
|
|
# else no convert, just output
|
|
|
|
cat
|
2014-03-31 20:46:58 +00:00
|
|
|
fi
|
2012-08-03 10:03:17 +00:00
|
|
|
}
|
|
|
|
|
2018-09-28 15:11:56 +00:00
|
|
|
# merge raw files: unique sort by id, title, link.
|
|
|
|
# merge(name, oldfile, newfile)
|
|
|
|
merge() {
|
|
|
|
sort -t ' ' -u -k6,6 -k2,2 -k3,3 "$2" "$3" 2>/dev/null
|
|
|
|
}
|
|
|
|
|
|
|
|
# filter fields.
|
|
|
|
# filter(name)
|
|
|
|
filter() {
|
|
|
|
cat
|
|
|
|
}
|
|
|
|
|
|
|
|
# order by timestamp (descending).
|
|
|
|
# order(name)
|
|
|
|
order() {
|
|
|
|
sort -t ' ' -k1rn,1
|
|
|
|
}
|
|
|
|
|
|
|
|
# fetch a feed via HTTP/HTTPS etc.
|
|
|
|
# fetchfeed(name, url, feedfile)
|
|
|
|
fetchfeed() {
|
2018-09-29 10:22:39 +00:00
|
|
|
if curl -L --max-redirs 0 -H "User-Agent:" -f -s -m 15 \
|
2018-09-28 15:11:56 +00:00
|
|
|
-z "$3" "$2" 2>/dev/null; then
|
|
|
|
printf " OK %s %s\n" "$(date +'%H:%M:%S')" "$1" >&2
|
|
|
|
else
|
|
|
|
printf "FAIL %s %s\n" "$(date +'%H:%M:%S')" "$1" >&2
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2012-08-03 10:03:17 +00:00
|
|
|
# fetch and parse feed.
|
2014-03-31 20:46:58 +00:00
|
|
|
# feed(name, feedurl, [basesiteurl], [encoding])
|
2012-08-03 10:03:17 +00:00
|
|
|
feed() {
|
2015-08-07 18:46:42 +00:00
|
|
|
(name="$1"
|
2018-02-18 13:40:41 +00:00
|
|
|
filename="$(printf '%s' "$1" | tr '/' '_')"
|
2017-12-16 11:09:31 +00:00
|
|
|
feedurl="$2"
|
|
|
|
basesiteurl="$3"
|
|
|
|
tmpfeedfile="${sfeedtmpdir}/${filename}"
|
2014-03-31 20:46:58 +00:00
|
|
|
tmpencfile=""
|
|
|
|
encoding="$4"
|
2017-12-16 11:09:31 +00:00
|
|
|
sfeedfile="${sfeedpath}/${filename}"
|
2018-02-18 13:40:41 +00:00
|
|
|
|
2015-06-20 22:15:37 +00:00
|
|
|
if [ ! "${encoding}" = "" ]; then
|
2018-09-28 15:11:56 +00:00
|
|
|
fetchfeed "${name}" "${feedurl}" "${sfeedfile}" | \
|
2017-12-16 11:09:31 +00:00
|
|
|
convertencoding "${encoding}" "utf-8"
|
2014-03-31 20:46:58 +00:00
|
|
|
else # detect encoding.
|
2015-08-07 18:46:42 +00:00
|
|
|
tmpencfile="${tmpfeedfile}.enc"
|
2018-09-28 15:11:56 +00:00
|
|
|
fetchfeed "${name}" "${feedurl}" "${sfeedfile}" > "${tmpencfile}"
|
2015-06-20 22:15:37 +00:00
|
|
|
detectenc=$(sfeed_xmlenc < "${tmpencfile}")
|
|
|
|
convertencoding "${detectenc}" "utf-8" < "${tmpencfile}"
|
2018-09-28 15:11:56 +00:00
|
|
|
fi | sfeed "${basesiteurl}" | filter "${name}" > "${tmpfeedfile}"
|
2015-06-20 22:15:37 +00:00
|
|
|
|
|
|
|
# get new data and merge with old.
|
2017-12-16 11:09:31 +00:00
|
|
|
sfeedfilenew="${sfeedpath}/${filename}.new"
|
2016-01-31 14:31:17 +00:00
|
|
|
# new feed data is non-empty.
|
|
|
|
if [ -s "${tmpfeedfile}" ]; then
|
|
|
|
# if file exists, merge
|
|
|
|
if [ -e "${sfeedfile}" ]; then
|
2018-09-28 15:11:56 +00:00
|
|
|
merge "${name}" "${sfeedfile}" "${tmpfeedfile}" | \
|
|
|
|
order "${name}" > "${sfeedfilenew}"
|
2017-12-24 12:13:17 +00:00
|
|
|
|
2018-02-18 14:46:49 +00:00
|
|
|
# overwrite old file with updated file
|
|
|
|
mv "${sfeedfilenew}" "${sfeedfile}"
|
2016-01-31 14:31:17 +00:00
|
|
|
else
|
2018-09-28 15:11:56 +00:00
|
|
|
merge "${name}" "/dev/null" "${tmpfeedfile}" | \
|
|
|
|
order "${name}" > "${sfeedfile}"
|
2016-01-31 14:31:17 +00:00
|
|
|
fi
|
2015-08-05 20:17:45 +00:00
|
|
|
fi) &
|
2012-08-03 10:03:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
cleanup() {
|
2018-09-28 15:11:56 +00:00
|
|
|
# remove temporary files.
|
2015-06-20 22:15:37 +00:00
|
|
|
rm -rf "${sfeedtmpdir}"
|
2012-08-03 10:03:17 +00:00
|
|
|
}
|
|
|
|
|
2018-09-10 16:54:13 +00:00
|
|
|
interrupted() {
|
|
|
|
isinterrupted="1"
|
|
|
|
}
|
|
|
|
|
2014-03-31 20:46:58 +00:00
|
|
|
feeds() {
|
2015-06-20 22:15:37 +00:00
|
|
|
echo "Configuration file \"${config}\" is invalid or does not contain a \"feeds\" function." >&2
|
2014-03-31 20:46:58 +00:00
|
|
|
echo "See sfeedrc.example for an example." >&2
|
|
|
|
}
|
|
|
|
|
2018-09-07 17:05:40 +00:00
|
|
|
# kill whole current process group on ^C (SIGINT).
|
|
|
|
isinterrupted="0"
|
2014-03-31 20:46:58 +00:00
|
|
|
# SIGTERM: signal to terminate parent.
|
2018-09-28 15:11:56 +00:00
|
|
|
trap -- "interrupted" "TERM"
|
2014-03-31 20:46:58 +00:00
|
|
|
# SIGINT: kill all running childs >:D
|
2018-09-28 15:11:56 +00:00
|
|
|
trap -- "kill -TERM -$$" "INT"
|
2018-09-10 16:54:13 +00:00
|
|
|
# load config file.
|
|
|
|
loadconfig "$1"
|
|
|
|
# fetch feeds and store in temporary file.
|
|
|
|
sfeedtmpdir="$(mktemp -d '/tmp/sfeed_XXXXXX')"
|
2015-06-20 22:15:37 +00:00
|
|
|
# make sure path exists.
|
2015-08-22 14:52:46 +00:00
|
|
|
mkdir -p "${sfeedpath}"
|
2012-08-03 10:03:17 +00:00
|
|
|
# fetch feeds specified in config file.
|
|
|
|
feeds
|
2015-07-31 20:50:12 +00:00
|
|
|
# wait till all feeds are fetched (concurrently).
|
2012-08-03 10:03:17 +00:00
|
|
|
wait
|
|
|
|
# cleanup temporary files etc.
|
|
|
|
cleanup
|
2018-09-07 17:05:40 +00:00
|
|
|
# on SIGINT exit with 128 + signal (SIGINT = 2).
|
|
|
|
[ "${isinterrupted}" = "1" ] && exit 130
|
|
|
|
exit 0
|