commit cc9f0d5549b21bb6254aede2ff479698183ea5e3
parent 5aa78eb161a89f3803cc6efa35e214dd2e8f5386
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Fri, 28 Sep 2018 17:11:56 +0200
sfeed_update: add filter(), order() support per feed + improvements
Pass the name parameter to the functions and add these to the pipeline. They
can be overridden in the config.
- add the ability to change the merge logic per feed.
- add the ability to filter lines and fields per feed.
- add the ability to order lines differently per feed.
- add filter example to README.
- code-style:
  - fetchfeed consistency in parameter order.
  - change [ x"" = x"" ] to [ "" = "" ]. Simplify some if statements.
  - wrap long line in fetchfeed().
  - use signal names for trap.
Diffstat:
| M | README |  |  | 60 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++------ | 
| M | sfeed_update |  |  | 73 | ++++++++++++++++++++++++++++++++++++++++++------------------------------- | 
| M | sfeed_update.1 |  |  | 29 | +++++++++++++++++++++-------- | 
3 files changed, 117 insertions(+), 45 deletions(-)
diff --git a/README b/README
@@ -127,12 +127,18 @@ Files read at runtime by sfeed_update(1)
 ----------------------------------------
 
 sfeedrc - Config file. This file is evaluated as a shellscript in
-          sfeed_update(1). You can for example override the fetchfeed()
-          function to use wget(1), OpenBSD ftp(1) an other download program or
-          you can override the merge() function to change the merge logic. The
-          function feeds() is called to fetch the feeds. The function feed()
-          can safely be executed concurrently as a background job in your
-          sfeedrc(5) config file to make updating faster.
+          sfeed_update(1).
+
+Atleast the following functions can be overridden per feed:
+
+- fetchfeed: to use wget(1), OpenBSD ftp(1) or an other download program.
+- merge: to change the merge logic.
+- filter: to filter on fields.
+- order: to change the sort order.
+
+The function feeds() is called to fetch the feeds. The function feed() can
+safely be executed concurrently as a background job in your sfeedrc(5) config
+file to make updating faster.
 
 
 Files written at runtime by sfeed_update(1)
@@ -212,6 +218,48 @@ argument is optional):
 
 - - -
 
+# filter fields.
+# filter(name)
+filter() {
+	case "$1" in
+	"tweakers")
+		LC_LOCALE=C awk -F '	' 'BEGIN {
+			OFS = "	";
+		}
+		# skip ads.
+		$2 ~ /^ADV:/ {
+			next;
+		}
+		# shorten link.
+		{
+			if (match($3, /^https:\/\/tweakers\.net\/(nieuws|downloads|reviews|geek)\/[0-9]+\//)) {
+				$3 = substr($3, RSTART, RLENGTH);
+			}
+			print $0;
+		}';;
+	"yt BSDNow")
+		# filter only BSD Now from channel.
+		LC_LOCALE=C awk -F '	' '$2 ~ / \| BSD Now/';;
+	*)
+		cat;;
+	esac | \
+		# replace youtube links with embed links.
+		sed 's@www.youtube.com/watch?v=@www.youtube.com/embed/@g' | \
+		# try to strip utm_ tracking parameters.
+		LC_LOCALE=C awk -F '	' 'BEGIN {
+			OFS = "	";
+		}
+		{
+			gsub(/\?utm_([^&]+)/, "?", $3);
+			gsub(/&utm_([^&]+)/, "", $3);
+			gsub(/\?&/, "?", $3);
+			gsub(/[\?&]+$/, "", $3);
+			print $0;
+		}'
+}
+
+- - -
+
 Over time your feeds file might become quite big. You can archive items from a
 specific date by doing for example:
 
diff --git a/sfeed_update b/sfeed_update
@@ -9,7 +9,7 @@ sfeedpath="$HOME/.sfeed/feeds"
 # loadconfig(configfile)
 loadconfig() {
 	# allow to specify config via argv[1].
-	if [ ! x"$1" = x"" ]; then
+	if [ "$1" != "" ]; then
 		# get absolute path of config file.
 		config=$(readlink -f "$1")
 	else
@@ -17,8 +17,7 @@ loadconfig() {
 		config="$HOME/.sfeed/sfeedrc"
 	fi
 
-	# load config: config is loaded here to be able to override $sfeedpath
-	# or functions.
+	# config is loaded here to be able to override $sfeedpath or functions.
 	if [ -r "${config}" ]; then
 		. "${config}"
 	else
@@ -28,30 +27,11 @@ loadconfig() {
 	fi
 }
 
-# merge raw files.
-# merge(oldfile, newfile)
-merge() {
-	# unique sort by id, title, link.
-	# order by timestamp (desc).
-	(sort -t '	' -u -k6,6 -k2,2 -k3,3 "$1" "$2" 2>/dev/null) |
-	sort -t '	' -k1rn,1
-}
-
-# fetch a feed via HTTP/HTTPS etc.
-# fetchfeed(url, name, feedfile)
-fetchfeed() {
-	if curl -L --max-redirs 0 -H 'User-Agent:' -f -s -S -m 15 -z "$3" "$1" 2>/dev/null; then
-		printf "  OK %s %s\n" "$(date +'%H:%M:%S')" "$2" >&2
-	else
-		printf "FAIL %s %s\n" "$(date +'%H:%M:%S')" "$2" >&2
-	fi
-}
-
 # convert encoding from one encoding to another.
 # convertencoding(from, to)
 convertencoding() {
 	# if from != to
-	if [ ! "$1" = "" ] && [ ! "$2" = "" ] && [ ! "$1" = "$2" ]; then
+	if [ "$1" != "" ] && [ "$2" != "" ] && [ "$1" != "$2" ]; then
 		iconv -cs -f "$1" -t "$2" 2> /dev/null
 	else
 		# else no convert, just output
@@ -59,6 +39,35 @@ convertencoding() {
 	fi
 }
 
+# merge raw files: unique sort by id, title, link.
+# merge(name, oldfile, newfile)
+merge() {
+	sort -t '	' -u -k6,6 -k2,2 -k3,3 "$2" "$3" 2>/dev/null
+}
+
+# filter fields.
+# filter(name)
+filter() {
+	cat
+}
+
+# order by timestamp (descending).
+# order(name)
+order() {
+	sort -t '	' -k1rn,1
+}
+
+# fetch a feed via HTTP/HTTPS etc.
+# fetchfeed(name, url, feedfile)
+fetchfeed() {
+	if curl -L --max-redirs 0 -H "User-Agent:" -f -s -S -m 15 \
+		-z "$3" "$2" 2>/dev/null; then
+		printf "  OK %s %s\n" "$(date +'%H:%M:%S')" "$1" >&2
+	else
+		printf "FAIL %s %s\n" "$(date +'%H:%M:%S')" "$1" >&2
+	fi
+}
+
 # fetch and parse feed.
 # feed(name, feedurl, [basesiteurl], [encoding])
 feed() {
@@ -72,14 +81,14 @@ feed() {
 	sfeedfile="${sfeedpath}/${filename}"
 
 	if [ ! "${encoding}" = "" ]; then
-		fetchfeed "${feedurl}" "${name}" "${sfeedfile}" | \
+		fetchfeed "${name}" "${feedurl}" "${sfeedfile}" | \
 			convertencoding "${encoding}" "utf-8"
 	else # detect encoding.
 		tmpencfile="${tmpfeedfile}.enc"
-		fetchfeed "${feedurl}" "${name}" "${sfeedfile}" > "${tmpencfile}"
+		fetchfeed "${name}" "${feedurl}" "${sfeedfile}" > "${tmpencfile}"
 		detectenc=$(sfeed_xmlenc < "${tmpencfile}")
 		convertencoding "${detectenc}" "utf-8" < "${tmpencfile}"
-	fi | sfeed "${basesiteurl}" > "${tmpfeedfile}"
+	fi | sfeed "${basesiteurl}" | filter "${name}" > "${tmpfeedfile}"
 
 	# get new data and merge with old.
 	sfeedfilenew="${sfeedpath}/${filename}.new"
@@ -87,18 +96,20 @@ feed() {
 	if [ -s "${tmpfeedfile}" ]; then
 		# if file exists, merge
 		if [ -e "${sfeedfile}" ]; then
-			merge "${sfeedfile}" "${tmpfeedfile}" > "${sfeedfilenew}"
+			merge "${name}" "${sfeedfile}" "${tmpfeedfile}" | \
+				order "${name}" > "${sfeedfilenew}"
 
 			# overwrite old file with updated file
 			mv "${sfeedfilenew}" "${sfeedfile}"
 		else
-			merge "/dev/null" "${tmpfeedfile}" > "${sfeedfile}"
+			merge "${name}" "/dev/null" "${tmpfeedfile}" | \
+				order "${name}" > "${sfeedfile}"
 		fi
 	fi) &
 }
 
 cleanup() {
-	# remove temporary files
+	# remove temporary files.
 	rm -rf "${sfeedtmpdir}"
 }
 
@@ -114,9 +125,9 @@ feeds() {
 # kill whole current process group on ^C (SIGINT).
 isinterrupted="0"
 # SIGTERM: signal to terminate parent.
-trap -- "interrupted" "15"
+trap -- "interrupted" "TERM"
 # SIGINT: kill all running childs >:D
-trap -- "kill -TERM -$$" "2"
+trap -- "kill -TERM -$$" "INT"
 # load config file.
 loadconfig "$1"
 # fetch feeds and store in temporary file.
diff --git a/sfeed_update.1 b/sfeed_update.1
@@ -1,4 +1,4 @@
-.Dd August 5, 2015
+.Dd September 28, 2018
 .Dt SFEED_UPDATE 1
 .Os
 .Sh NAME
@@ -29,15 +29,28 @@ section for more information.
 Config file, see the sfeedrc.example file for an example.
 This file is evaluated as a shellscript in
 .Nm .
-You can for example override the fetchfeed() function to
-use
-.Xr curl 1 ,
+.Pp
+Atleast the following functions can be overridden per feed:
+.Bl -tag -width 17n
+.It fetchfeed
+to use
 .Xr wget 1 ,
-or an other network downloader or you can override the merge() function to
-change the merge logic.
+OpenBSD
+.Xr ftp 1
+or an other download program.
+.It merge
+to change the merge logic.
+.It filter
+to filter on fields.
+.It order
+to change the sort order.
+.El
+.Pp
 The function feeds() is called to fetch the feeds.
-By default the function feed() is executed concurrently as a background job to
-speedup updating.
+The function feed() can safely be executed concurrently as a background job in
+your
+.Xr sfeedrc 5
+config file to make updating faster.
 .El
 .Sh FILES WRITTEN
 .Bl -tag -width 17n