sfeed_update: add filter(), order() support per feed + improvements
Pass the name parameter to the functions and add these to the pipeline. They can be overridden in the config. - add the ability to change the merge logic per feed. - add the ability to filter lines and fields per feed. - add the ability to order lines differently per feed. - add filter example to README. - code-style: - fetchfeed consistency in parameter order. - change [ x"" = x"" ] to [ "" = "" ]. Simplify some if statements. - wrap long line in fetchfeed(). - use signal names for trap.
This commit is contained in:
parent
5aa78eb161
commit
cc9f0d5549
60
README
60
README
|
@ -127,12 +127,18 @@ Files read at runtime by sfeed_update(1)
|
|||
----------------------------------------
|
||||
|
||||
sfeedrc - Config file. This file is evaluated as a shellscript in
|
||||
sfeed_update(1). You can for example override the fetchfeed()
|
||||
function to use wget(1), OpenBSD ftp(1) an other download program or
|
||||
you can override the merge() function to change the merge logic. The
|
||||
function feeds() is called to fetch the feeds. The function feed()
|
||||
can safely be executed concurrently as a background job in your
|
||||
sfeedrc(5) config file to make updating faster.
|
||||
sfeed_update(1).
|
||||
|
||||
Atleast the following functions can be overridden per feed:
|
||||
|
||||
- fetchfeed: to use wget(1), OpenBSD ftp(1) or an other download program.
|
||||
- merge: to change the merge logic.
|
||||
- filter: to filter on fields.
|
||||
- order: to change the sort order.
|
||||
|
||||
The function feeds() is called to fetch the feeds. The function feed() can
|
||||
safely be executed concurrently as a background job in your sfeedrc(5) config
|
||||
file to make updating faster.
|
||||
|
||||
|
||||
Files written at runtime by sfeed_update(1)
|
||||
|
@ -212,6 +218,48 @@ argument is optional):
|
|||
|
||||
- - -
|
||||
|
||||
# filter fields.
|
||||
# filter(name)
|
||||
filter() {
|
||||
case "$1" in
|
||||
"tweakers")
|
||||
LC_LOCALE=C awk -F ' ' 'BEGIN {
|
||||
OFS = " ";
|
||||
}
|
||||
# skip ads.
|
||||
$2 ~ /^ADV:/ {
|
||||
next;
|
||||
}
|
||||
# shorten link.
|
||||
{
|
||||
if (match($3, /^https:\/\/tweakers\.net\/(nieuws|downloads|reviews|geek)\/[0-9]+\//)) {
|
||||
$3 = substr($3, RSTART, RLENGTH);
|
||||
}
|
||||
print $0;
|
||||
}';;
|
||||
"yt BSDNow")
|
||||
# filter only BSD Now from channel.
|
||||
LC_LOCALE=C awk -F ' ' '$2 ~ / \| BSD Now/';;
|
||||
*)
|
||||
cat;;
|
||||
esac | \
|
||||
# replace youtube links with embed links.
|
||||
sed 's@www.youtube.com/watch?v=@www.youtube.com/embed/@g' | \
|
||||
# try to strip utm_ tracking parameters.
|
||||
LC_LOCALE=C awk -F ' ' 'BEGIN {
|
||||
OFS = " ";
|
||||
}
|
||||
{
|
||||
gsub(/\?utm_([^&]+)/, "?", $3);
|
||||
gsub(/&utm_([^&]+)/, "", $3);
|
||||
gsub(/\?&/, "?", $3);
|
||||
gsub(/[\?&]+$/, "", $3);
|
||||
print $0;
|
||||
}'
|
||||
}
|
||||
|
||||
- - -
|
||||
|
||||
Over time your feeds file might become quite big. You can archive items from a
|
||||
specific date by doing for example:
|
||||
|
||||
|
|
73
sfeed_update
73
sfeed_update
|
@ -9,7 +9,7 @@ sfeedpath="$HOME/.sfeed/feeds"
|
|||
# loadconfig(configfile)
|
||||
loadconfig() {
|
||||
# allow to specify config via argv[1].
|
||||
if [ ! x"$1" = x"" ]; then
|
||||
if [ "$1" != "" ]; then
|
||||
# get absolute path of config file.
|
||||
config=$(readlink -f "$1")
|
||||
else
|
||||
|
@ -17,8 +17,7 @@ loadconfig() {
|
|||
config="$HOME/.sfeed/sfeedrc"
|
||||
fi
|
||||
|
||||
# load config: config is loaded here to be able to override $sfeedpath
|
||||
# or functions.
|
||||
# config is loaded here to be able to override $sfeedpath or functions.
|
||||
if [ -r "${config}" ]; then
|
||||
. "${config}"
|
||||
else
|
||||
|
@ -28,30 +27,11 @@ loadconfig() {
|
|||
fi
|
||||
}
|
||||
|
||||
# merge raw files.
|
||||
# merge(oldfile, newfile)
|
||||
merge() {
|
||||
# unique sort by id, title, link.
|
||||
# order by timestamp (desc).
|
||||
(sort -t ' ' -u -k6,6 -k2,2 -k3,3 "$1" "$2" 2>/dev/null) |
|
||||
sort -t ' ' -k1rn,1
|
||||
}
|
||||
|
||||
# fetch a feed via HTTP/HTTPS etc.
|
||||
# fetchfeed(url, name, feedfile)
|
||||
fetchfeed() {
|
||||
if curl -L --max-redirs 0 -H 'User-Agent:' -f -s -S -m 15 -z "$3" "$1" 2>/dev/null; then
|
||||
printf " OK %s %s\n" "$(date +'%H:%M:%S')" "$2" >&2
|
||||
else
|
||||
printf "FAIL %s %s\n" "$(date +'%H:%M:%S')" "$2" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
# convert encoding from one encoding to another.
|
||||
# convertencoding(from, to)
|
||||
convertencoding() {
|
||||
# if from != to
|
||||
if [ ! "$1" = "" ] && [ ! "$2" = "" ] && [ ! "$1" = "$2" ]; then
|
||||
if [ "$1" != "" ] && [ "$2" != "" ] && [ "$1" != "$2" ]; then
|
||||
iconv -cs -f "$1" -t "$2" 2> /dev/null
|
||||
else
|
||||
# else no convert, just output
|
||||
|
@ -59,6 +39,35 @@ convertencoding() {
|
|||
fi
|
||||
}
|
||||
|
||||
# merge raw files: unique sort by id, title, link.
|
||||
# merge(name, oldfile, newfile)
|
||||
merge() {
|
||||
sort -t ' ' -u -k6,6 -k2,2 -k3,3 "$2" "$3" 2>/dev/null
|
||||
}
|
||||
|
||||
# filter fields.
|
||||
# filter(name)
|
||||
filter() {
|
||||
cat
|
||||
}
|
||||
|
||||
# order by timestamp (descending).
|
||||
# order(name)
|
||||
order() {
|
||||
sort -t ' ' -k1rn,1
|
||||
}
|
||||
|
||||
# fetch a feed via HTTP/HTTPS etc.
|
||||
# fetchfeed(name, url, feedfile)
|
||||
fetchfeed() {
|
||||
if curl -L --max-redirs 0 -H "User-Agent:" -f -s -S -m 15 \
|
||||
-z "$3" "$2" 2>/dev/null; then
|
||||
printf " OK %s %s\n" "$(date +'%H:%M:%S')" "$1" >&2
|
||||
else
|
||||
printf "FAIL %s %s\n" "$(date +'%H:%M:%S')" "$1" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
# fetch and parse feed.
|
||||
# feed(name, feedurl, [basesiteurl], [encoding])
|
||||
feed() {
|
||||
|
@ -72,14 +81,14 @@ feed() {
|
|||
sfeedfile="${sfeedpath}/${filename}"
|
||||
|
||||
if [ ! "${encoding}" = "" ]; then
|
||||
fetchfeed "${feedurl}" "${name}" "${sfeedfile}" | \
|
||||
fetchfeed "${name}" "${feedurl}" "${sfeedfile}" | \
|
||||
convertencoding "${encoding}" "utf-8"
|
||||
else # detect encoding.
|
||||
tmpencfile="${tmpfeedfile}.enc"
|
||||
fetchfeed "${feedurl}" "${name}" "${sfeedfile}" > "${tmpencfile}"
|
||||
fetchfeed "${name}" "${feedurl}" "${sfeedfile}" > "${tmpencfile}"
|
||||
detectenc=$(sfeed_xmlenc < "${tmpencfile}")
|
||||
convertencoding "${detectenc}" "utf-8" < "${tmpencfile}"
|
||||
fi | sfeed "${basesiteurl}" > "${tmpfeedfile}"
|
||||
fi | sfeed "${basesiteurl}" | filter "${name}" > "${tmpfeedfile}"
|
||||
|
||||
# get new data and merge with old.
|
||||
sfeedfilenew="${sfeedpath}/${filename}.new"
|
||||
|
@ -87,18 +96,20 @@ feed() {
|
|||
if [ -s "${tmpfeedfile}" ]; then
|
||||
# if file exists, merge
|
||||
if [ -e "${sfeedfile}" ]; then
|
||||
merge "${sfeedfile}" "${tmpfeedfile}" > "${sfeedfilenew}"
|
||||
merge "${name}" "${sfeedfile}" "${tmpfeedfile}" | \
|
||||
order "${name}" > "${sfeedfilenew}"
|
||||
|
||||
# overwrite old file with updated file
|
||||
mv "${sfeedfilenew}" "${sfeedfile}"
|
||||
else
|
||||
merge "/dev/null" "${tmpfeedfile}" > "${sfeedfile}"
|
||||
merge "${name}" "/dev/null" "${tmpfeedfile}" | \
|
||||
order "${name}" > "${sfeedfile}"
|
||||
fi
|
||||
fi) &
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
# remove temporary files
|
||||
# remove temporary files.
|
||||
rm -rf "${sfeedtmpdir}"
|
||||
}
|
||||
|
||||
|
@ -114,9 +125,9 @@ feeds() {
|
|||
# kill whole current process group on ^C (SIGINT).
|
||||
isinterrupted="0"
|
||||
# SIGTERM: signal to terminate parent.
|
||||
trap -- "interrupted" "15"
|
||||
trap -- "interrupted" "TERM"
|
||||
# SIGINT: kill all running childs >:D
|
||||
trap -- "kill -TERM -$$" "2"
|
||||
trap -- "kill -TERM -$$" "INT"
|
||||
# load config file.
|
||||
loadconfig "$1"
|
||||
# fetch feeds and store in temporary file.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.Dd August 5, 2015
|
||||
.Dd September 28, 2018
|
||||
.Dt SFEED_UPDATE 1
|
||||
.Os
|
||||
.Sh NAME
|
||||
|
@ -29,15 +29,28 @@ section for more information.
|
|||
Config file, see the sfeedrc.example file for an example.
|
||||
This file is evaluated as a shellscript in
|
||||
.Nm .
|
||||
You can for example override the fetchfeed() function to
|
||||
use
|
||||
.Xr curl 1 ,
|
||||
.Pp
|
||||
Atleast the following functions can be overridden per feed:
|
||||
.Bl -tag -width 17n
|
||||
.It fetchfeed
|
||||
to use
|
||||
.Xr wget 1 ,
|
||||
or an other network downloader or you can override the merge() function to
|
||||
change the merge logic.
|
||||
OpenBSD
|
||||
.Xr ftp 1
|
||||
or an other download program.
|
||||
.It merge
|
||||
to change the merge logic.
|
||||
.It filter
|
||||
to filter on fields.
|
||||
.It order
|
||||
to change the sort order.
|
||||
.El
|
||||
.Pp
|
||||
The function feeds() is called to fetch the feeds.
|
||||
By default the function feed() is executed concurrently as a background job to
|
||||
speedup updating.
|
||||
The function feed() can safely be executed concurrently as a background job in
|
||||
your
|
||||
.Xr sfeedrc 5
|
||||
config file to make updating faster.
|
||||
.El
|
||||
.Sh FILES WRITTEN
|
||||
.Bl -tag -width 17n
|
||||
|
|
Loading…
Reference in New Issue