commit cc9f0d5549b21bb6254aede2ff479698183ea5e3
parent 5aa78eb161a89f3803cc6efa35e214dd2e8f5386
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Fri, 28 Sep 2018 17:11:56 +0200
sfeed_update: add filter(), order() support per feed + improvements
Pass the name parameter to the functions and add these to the pipeline. They
can be overridden in the config.
- add the ability to change the merge logic per feed.
- add the ability to filter lines and fields per feed.
- add the ability to order lines differently per feed.
- add filter example to README.
- code-style:
- fetchfeed consistency in parameter order.
- change [ x"" = x"" ] to [ "" = "" ]. Simplify some if statements.
- wrap long line in fetchfeed().
- use signal names for trap.
Diffstat:
M | README | | | 60 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++------ |
M | sfeed_update | | | 73 | ++++++++++++++++++++++++++++++++++++++++++------------------------------- |
M | sfeed_update.1 | | | 29 | +++++++++++++++++++++-------- |
3 files changed, 117 insertions(+), 45 deletions(-)
diff --git a/README b/README
@@ -127,12 +127,18 @@ Files read at runtime by sfeed_update(1)
----------------------------------------
sfeedrc - Config file. This file is evaluated as a shellscript in
- sfeed_update(1). You can for example override the fetchfeed()
- function to use wget(1), OpenBSD ftp(1) an other download program or
- you can override the merge() function to change the merge logic. The
- function feeds() is called to fetch the feeds. The function feed()
- can safely be executed concurrently as a background job in your
- sfeedrc(5) config file to make updating faster.
+ sfeed_update(1).
+
+Atleast the following functions can be overridden per feed:
+
+- fetchfeed: to use wget(1), OpenBSD ftp(1) or an other download program.
+- merge: to change the merge logic.
+- filter: to filter on fields.
+- order: to change the sort order.
+
+The function feeds() is called to fetch the feeds. The function feed() can
+safely be executed concurrently as a background job in your sfeedrc(5) config
+file to make updating faster.
Files written at runtime by sfeed_update(1)
@@ -212,6 +218,48 @@ argument is optional):
- - -
+# filter fields.
+# filter(name)
+filter() {
+ case "$1" in
+ "tweakers")
+ LC_LOCALE=C awk -F ' ' 'BEGIN {
+ OFS = " ";
+ }
+ # skip ads.
+ $2 ~ /^ADV:/ {
+ next;
+ }
+ # shorten link.
+ {
+ if (match($3, /^https:\/\/tweakers\.net\/(nieuws|downloads|reviews|geek)\/[0-9]+\//)) {
+ $3 = substr($3, RSTART, RLENGTH);
+ }
+ print $0;
+ }';;
+ "yt BSDNow")
+ # filter only BSD Now from channel.
+ LC_LOCALE=C awk -F ' ' '$2 ~ / \| BSD Now/';;
+ *)
+ cat;;
+ esac | \
+ # replace youtube links with embed links.
+ sed 's@www.youtube.com/watch?v=@www.youtube.com/embed/@g' | \
+ # try to strip utm_ tracking parameters.
+ LC_LOCALE=C awk -F ' ' 'BEGIN {
+ OFS = " ";
+ }
+ {
+ gsub(/\?utm_([^&]+)/, "?", $3);
+ gsub(/&utm_([^&]+)/, "", $3);
+ gsub(/\?&/, "?", $3);
+ gsub(/[\?&]+$/, "", $3);
+ print $0;
+ }'
+}
+
+- - -
+
Over time your feeds file might become quite big. You can archive items from a
specific date by doing for example:
diff --git a/sfeed_update b/sfeed_update
@@ -9,7 +9,7 @@ sfeedpath="$HOME/.sfeed/feeds"
# loadconfig(configfile)
loadconfig() {
# allow to specify config via argv[1].
- if [ ! x"$1" = x"" ]; then
+ if [ "$1" != "" ]; then
# get absolute path of config file.
config=$(readlink -f "$1")
else
@@ -17,8 +17,7 @@ loadconfig() {
config="$HOME/.sfeed/sfeedrc"
fi
- # load config: config is loaded here to be able to override $sfeedpath
- # or functions.
+ # config is loaded here to be able to override $sfeedpath or functions.
if [ -r "${config}" ]; then
. "${config}"
else
@@ -28,30 +27,11 @@ loadconfig() {
fi
}
-# merge raw files.
-# merge(oldfile, newfile)
-merge() {
- # unique sort by id, title, link.
- # order by timestamp (desc).
- (sort -t ' ' -u -k6,6 -k2,2 -k3,3 "$1" "$2" 2>/dev/null) |
- sort -t ' ' -k1rn,1
-}
-
-# fetch a feed via HTTP/HTTPS etc.
-# fetchfeed(url, name, feedfile)
-fetchfeed() {
- if curl -L --max-redirs 0 -H 'User-Agent:' -f -s -S -m 15 -z "$3" "$1" 2>/dev/null; then
- printf " OK %s %s\n" "$(date +'%H:%M:%S')" "$2" >&2
- else
- printf "FAIL %s %s\n" "$(date +'%H:%M:%S')" "$2" >&2
- fi
-}
-
# convert encoding from one encoding to another.
# convertencoding(from, to)
convertencoding() {
# if from != to
- if [ ! "$1" = "" ] && [ ! "$2" = "" ] && [ ! "$1" = "$2" ]; then
+ if [ "$1" != "" ] && [ "$2" != "" ] && [ "$1" != "$2" ]; then
iconv -cs -f "$1" -t "$2" 2> /dev/null
else
# else no convert, just output
@@ -59,6 +39,35 @@ convertencoding() {
fi
}
+# merge raw files: unique sort by id, title, link.
+# merge(name, oldfile, newfile)
+merge() {
+ sort -t ' ' -u -k6,6 -k2,2 -k3,3 "$2" "$3" 2>/dev/null
+}
+
+# filter fields.
+# filter(name)
+filter() {
+ cat
+}
+
+# order by timestamp (descending).
+# order(name)
+order() {
+ sort -t ' ' -k1rn,1
+}
+
+# fetch a feed via HTTP/HTTPS etc.
+# fetchfeed(name, url, feedfile)
+fetchfeed() {
+ if curl -L --max-redirs 0 -H "User-Agent:" -f -s -S -m 15 \
+ -z "$3" "$2" 2>/dev/null; then
+ printf " OK %s %s\n" "$(date +'%H:%M:%S')" "$1" >&2
+ else
+ printf "FAIL %s %s\n" "$(date +'%H:%M:%S')" "$1" >&2
+ fi
+}
+
# fetch and parse feed.
# feed(name, feedurl, [basesiteurl], [encoding])
feed() {
@@ -72,14 +81,14 @@ feed() {
sfeedfile="${sfeedpath}/${filename}"
if [ ! "${encoding}" = "" ]; then
- fetchfeed "${feedurl}" "${name}" "${sfeedfile}" | \
+ fetchfeed "${name}" "${feedurl}" "${sfeedfile}" | \
convertencoding "${encoding}" "utf-8"
else # detect encoding.
tmpencfile="${tmpfeedfile}.enc"
- fetchfeed "${feedurl}" "${name}" "${sfeedfile}" > "${tmpencfile}"
+ fetchfeed "${name}" "${feedurl}" "${sfeedfile}" > "${tmpencfile}"
detectenc=$(sfeed_xmlenc < "${tmpencfile}")
convertencoding "${detectenc}" "utf-8" < "${tmpencfile}"
- fi | sfeed "${basesiteurl}" > "${tmpfeedfile}"
+ fi | sfeed "${basesiteurl}" | filter "${name}" > "${tmpfeedfile}"
# get new data and merge with old.
sfeedfilenew="${sfeedpath}/${filename}.new"
@@ -87,18 +96,20 @@ feed() {
if [ -s "${tmpfeedfile}" ]; then
# if file exists, merge
if [ -e "${sfeedfile}" ]; then
- merge "${sfeedfile}" "${tmpfeedfile}" > "${sfeedfilenew}"
+ merge "${name}" "${sfeedfile}" "${tmpfeedfile}" | \
+ order "${name}" > "${sfeedfilenew}"
# overwrite old file with updated file
mv "${sfeedfilenew}" "${sfeedfile}"
else
- merge "/dev/null" "${tmpfeedfile}" > "${sfeedfile}"
+ merge "${name}" "/dev/null" "${tmpfeedfile}" | \
+ order "${name}" > "${sfeedfile}"
fi
fi) &
}
cleanup() {
- # remove temporary files
+ # remove temporary files.
rm -rf "${sfeedtmpdir}"
}
@@ -114,9 +125,9 @@ feeds() {
# kill whole current process group on ^C (SIGINT).
isinterrupted="0"
# SIGTERM: signal to terminate parent.
-trap -- "interrupted" "15"
+trap -- "interrupted" "TERM"
# SIGINT: kill all running childs >:D
-trap -- "kill -TERM -$$" "2"
+trap -- "kill -TERM -$$" "INT"
# load config file.
loadconfig "$1"
# fetch feeds and store in temporary file.
diff --git a/sfeed_update.1 b/sfeed_update.1
@@ -1,4 +1,4 @@
-.Dd August 5, 2015
+.Dd September 28, 2018
.Dt SFEED_UPDATE 1
.Os
.Sh NAME
@@ -29,15 +29,28 @@ section for more information.
Config file, see the sfeedrc.example file for an example.
This file is evaluated as a shellscript in
.Nm .
-You can for example override the fetchfeed() function to
-use
-.Xr curl 1 ,
+.Pp
+Atleast the following functions can be overridden per feed:
+.Bl -tag -width 17n
+.It fetchfeed
+to use
.Xr wget 1 ,
-or an other network downloader or you can override the merge() function to
-change the merge logic.
+OpenBSD
+.Xr ftp 1
+or an other download program.
+.It merge
+to change the merge logic.
+.It filter
+to filter on fields.
+.It order
+to change the sort order.
+.El
+.Pp
The function feeds() is called to fetch the feeds.
-By default the function feed() is executed concurrently as a background job to
-speedup updating.
+The function feed() can safely be executed concurrently as a background job in
+your
+.Xr sfeedrc 5
+config file to make updating faster.
.El
.Sh FILES WRITTEN
.Bl -tag -width 17n