sfeed

simple feed reader - forked from git.codemadness.org/sfeed
git clone git://src.gearsix.net/sfeedsfeed.zip
Log | Files | Refs | Atom | README | LICENSE

commit 72ce014a188fde5adba4e41bff92a7929340c13d
parent dc7f31a8a2f4a0adf60f6e228688fbe2ab31b42c
Author: gearsix <gearsix@tuta.io>
Date:   Sat, 14 Jun 2025 11:43:14 +0100

Merge branch 'master' into gearsix

Diffstat:
MLICENSE | 2+-
MMakefile | 9+++++----
MREADME | 160+++++++++++++++++++++++++++++++++++++++----------------------------------------
Msfeed.1 | 36++++++++++++++++++------------------
Msfeed.c | 124++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Msfeed_atom.c | 15+++++++--------
Msfeed_content.1 | 9+++++----
Msfeed_curses.1 | 21+++++++++++++--------
Msfeed_curses.c | 218+++++++++++++++++++++++++++++++++++++++----------------------------------------
Msfeed_frames.1 | 7+++++++
Msfeed_frames.c | 8++------
Msfeed_gopher.1 | 4++++
Msfeed_gopher.c | 14+++++---------
Msfeed_html.1 | 7+++++++
Msfeed_html.c | 8++------
Asfeed_json.1 | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Asfeed_json.c | 172+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msfeed_markread | 2+-
Msfeed_markread.1 | 11+++++++----
Msfeed_mbox.1 | 9+++++----
Msfeed_mbox.c | 4+++-
Msfeed_opml_export | 10+++++-----
Msfeed_opml_import.c | 4++--
Msfeed_plain.1 | 7+++++++
Msfeed_plain.c | 8++------
Msfeed_twtxt.c | 2--
Msfeed_update | 97++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------
Msfeed_update.1 | 10++++------
Msfeed_web.1 | 6+++---
Msfeed_web.c | 4++--
Msfeed_xmlenc.c | 4++--
Msfeedrc.5 | 137+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
Msfeedrc.example | 3+++
Mutil.c | 30++++++++++++++++++++++++------
Mutil.h | 4+++-
Mxml.c | 6++++--
Mxml.h | 8++++----
37 files changed, 808 insertions(+), 421 deletions(-)

diff --git a/LICENSE b/LICENSE @@ -1,6 +1,6 @@ ISC License -Copyright (c) 2011-2023 Hiltjo Posthuma <hiltjo@codemadness.org> +Copyright (c) 2011-2025 Hiltjo Posthuma <hiltjo@codemadness.org> Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above diff --git a/Makefile b/Makefile @@ -1,7 +1,7 @@ .POSIX: NAME = sfeed -VERSION = 1.7 +VERSION = 2.2 # curses theme, see themes/ directory. SFEED_THEME = mono @@ -26,7 +26,7 @@ SFEED_CPPFLAGS = -D_DEFAULT_SOURCE -D_XOPEN_SOURCE=700 -D_BSD_SOURCE SFEED_CURSES = sfeed_curses SFEED_CURSES_CFLAGS = ${CFLAGS} SFEED_CURSES_CPPFLAGS = -D_DEFAULT_SOURCE -D_XOPEN_SOURCE=700 -D_BSD_SOURCE \ - -DSFEED_THEME=\"themes/${SFEED_THEME}.h\" ${SFEED_CPPFLAGS} + -DSFEED_THEME=\"themes/${SFEED_THEME}.h\" SFEED_CURSES_LDFLAGS = ${LDFLAGS} -lcurses # Linux: some distros use ncurses and require -lncurses. @@ -34,7 +34,7 @@ SFEED_CURSES_LDFLAGS = ${LDFLAGS} -lcurses # Gentoo Linux: some distros might also require -ltinfo and -D_DEFAULT_SOURCE # to prevent warnings about feature test macros. -#SFEED_CURSES_LDFLAGS = ${LDFLAGS} -lcurses -ltinfo +#SFEED_CURSES_LDFLAGS = ${LDFLAGS} -lncurses -ltinfo # FreeBSD: unset feature test macros for SIGWINCH etc. #SFEED_CURSES_CPPFLAGS = @@ -51,6 +51,7 @@ BIN = \ sfeed_frames\ sfeed_gopher\ sfeed_html\ + sfeed_json\ sfeed_mbox\ sfeed_opml_import\ sfeed_plain\ @@ -146,7 +147,7 @@ clean: install: all # installing executable files and scripts. mkdir -p "${DESTDIR}${PREFIX}/bin" - cp -f ${BIN} ${SCRIPTS} "${DESTDIR}${PREFIX}/bin" + cp -fRP ${BIN} ${SCRIPTS} "${DESTDIR}${PREFIX}/bin" for f in ${BIN} ${SCRIPTS}; do chmod 755 "${DESTDIR}${PREFIX}/bin/$$f"; done # installing example files. mkdir -p "${DESTDIR}${DOCPREFIX}" diff --git a/README b/README @@ -48,7 +48,7 @@ Initial setup: cp sfeedrc.example "$HOME/.sfeed/sfeedrc" Edit the sfeedrc(5) configuration file and change any RSS/Atom feeds. This file -is included and evaluated as a shellscript for sfeed_update, so it's functions +is included and evaluated as a shellscript for sfeed_update, so its functions and behaviour can be overridden: $EDITOR "$HOME/.sfeed/sfeedrc" @@ -118,7 +118,8 @@ Optional dependencies - POSIX sh(1), used by sfeed_update(1) and sfeed_opml_export(1). - POSIX utilities such as awk(1) and sort(1), - used by sfeed_content(1), sfeed_markread(1) and sfeed_update(1). + used by sfeed_content(1), sfeed_markread(1), sfeed_opml_export(1) and + sfeed_update(1). - curl(1) binary: https://curl.haxx.se/ , used by sfeed_update(1), but can be replaced with any tool like wget(1), OpenBSD ftp(1) or hurl(1): https://git.codemadness.org/hurl/ @@ -126,6 +127,8 @@ Optional dependencies used by sfeed_update(1). If the text in your RSS/Atom feeds are already UTF-8 encoded then you don't need this. For a minimal iconv implementation: https://git.etalabs.net/cgit/noxcuse/tree/src/iconv.c +- xargs with support for the -P and -0 option, + used by sfeed_update(1). - mandoc for documentation: https://mdocml.bsd.lv/ - curses (typically ncurses), otherwise see minicurses.h, used by sfeed_curses(1). @@ -151,13 +154,13 @@ sfeed supports a subset of XML 1.0 and a subset of: - Atom 1.0 (RFC 4287): https://datatracker.ietf.org/doc/html/rfc4287 - Atom 0.3 (draft, historic). -- RSS 0.91+. +- RSS 0.90+. - RDF (when used with RSS). - MediaRSS extensions (media:). - Dublin Core extensions (dc:). -Other formats like JSONfeed, twtxt or certain RSS/Atom extensions are supported -by converting them to RSS/Atom or to the sfeed(5) format directly. +Other formats like JSON Feed, twtxt or certain RSS/Atom extensions are +supported by converting them to RSS/Atom or to the sfeed(5) format directly. OS tested @@ -175,7 +178,7 @@ OS tested - Windows (cygwin gcc + mintty, mingw). - HaikuOS - SerenityOS -- FreeDOS (djgpp). +- FreeDOS (djgpp, Open Watcom). - FUZIX (sdcc -mz80, with the sfeed parser program). @@ -196,6 +199,7 @@ sfeed_curses - Format feed data (TSV) to a curses interface. sfeed_frames - Format feed data (TSV) to HTML file(s) with frames. sfeed_gopher - Format feed data (TSV) to Gopher files. sfeed_html - Format feed data (TSV) to HTML. +sfeed_json - Format feed data (TSV) to JSON Feed. sfeed_opml_export - Generate an OPML XML file from a sfeedrc config file. sfeed_opml_import - Generate a sfeedrc config file from an OPML XML file. sfeed_markread - Mark items as read/unread, for use with sfeed_curses. @@ -298,10 +302,12 @@ Just like the other format programs included in sfeed you can run it like this: sfeed_curses < ~/.sfeed/feeds/xkcd -By default sfeed_curses marks the items of the last day as new/bold. To manage -read/unread items in a different way a plain-text file with a list of the read -URLs can be used. To enable this behaviour the path to this file can be -specified by setting the environment variable $SFEED_URL_FILE to the URL file: +By default sfeed_curses marks the items of the last day as new/bold. This limit +might be overridden by setting the environment variable $SFEED_NEW_AGE to the +desired maximum in seconds. To manage read/unread items in a different way a +plain-text file with a list of the read URLs can be used. To enable this +behaviour the path to this file can be specified by setting the environment +variable $SFEED_URL_FILE to the URL file: export SFEED_URL_FILE="$HOME/.sfeed/urls" [ -f "$SFEED_URL_FILE" ] || touch "$SFEED_URL_FILE" @@ -343,7 +349,7 @@ filtering items per feed. It can be used to shorten URLs, filter away advertisements, strip tracking parameters and more. # filter fields. - # filter(name) + # filter(name, url) filter() { case "$1" in "tweakers") @@ -643,14 +649,14 @@ RSS/Atom data or change the default curl options: - - - -Caching, incremental data updates and bandwidth-saving +Caching, incremental data updates and bandwidth saving -For servers that support it some incremental updates and bandwidth-saving can +For servers that support it some incremental updates and bandwidth saving can be done by using the "ETag" HTTP header. -Create a directory for storing the ETags per feed: +Create a directory for storing the ETags and modification timestamps per feed: - mkdir -p ~/.sfeed/etags/ + mkdir -p ~/.sfeed/etags ~/.sfeed/lastmod The curl ETag options (--etag-save and --etag-compare) can be used to store and send the previous ETag header value. curl version 7.73+ is recommended for it @@ -669,13 +675,31 @@ file: # fetch(name, url, feedfile) fetch() { - etag="$HOME/.sfeed/etags/$(basename "$3")" + basename="$(basename "$3")" + etag="$HOME/.sfeed/etags/${basename}" + lastmod="$HOME/.sfeed/lastmod/${basename}" + output="${sfeedtmpdir}/feeds/${filename}.xml" + curl \ - -L --max-redirs 0 -H "User-Agent:" -f -s -m 15 \ + -f -s -m 15 \ + -L --max-redirs 0 \ + -H "User-Agent: sfeed" \ --compressed \ --etag-save "${etag}" --etag-compare "${etag}" \ - -z "${etag}" \ - "$2" 2>/dev/null + -R -o "${output}" \ + -z "${lastmod}" \ + "$2" 2>/dev/null || return 1 + + # succesful, but no file written: assume it is OK and Not Modified. + [ -e "${output}" ] || return 0 + + # use server timestamp from curl -R to set Last-Modified. + touch -r "${output}" "${lastmod}" 2>/dev/null + cat "${output}" 2>/dev/null + # use write output status, other errors are ignored here. + fetchstatus="$?" + rm -f "${output}" 2>/dev/null + return "${fetchstatus}" } These options can come at a cost of some privacy, because it exposes @@ -686,8 +710,8 @@ additional metadata from the previous request. CDNs blocking requests due to a missing HTTP User-Agent request header sfeed_update will not send the "User-Agent" header by default for privacy -reasons. Some CDNs like Cloudflare don't like this and will block such HTTP -requests. +reasons. Some CDNs like Cloudflare or websites like Reddit.com don't like this +and will block such HTTP requests. A custom User-Agent can be set by using the curl -H option, like so: @@ -712,62 +736,6 @@ sfeedrc file and change the curl options "-L --max-redirs 0". - - - -Shellscript to update feeds in parallel more efficiently using xargs -P. - -It creates a queue of the feeds with its settings, then uses xargs to process -them in parallel using the common, but non-POSIX -P option. This is more -efficient than the more portable solution in sfeed_update which can stall a -batch of $maxjobs in the queue if one item is slow. - -sfeed_update_xargs shellscript: - - #!/bin/sh - # update feeds, merge with old feeds using xargs in parallel mode (non-POSIX). - - # include script and reuse its functions, but do not start main(). - SFEED_UPDATE_INCLUDE="1" . sfeed_update - # load config file, sets $config. - loadconfig "$1" - - # process a single feed. - # args are: config, tmpdir, name, feedurl, basesiteurl, encoding - if [ "${SFEED_UPDATE_CHILD}" = "1" ]; then - sfeedtmpdir="$2" - _feed "$3" "$4" "$5" "$6" - exit $? - fi - - # ...else parent mode: - - # feed(name, feedurl, basesiteurl, encoding) - feed() { - # workaround: *BSD xargs doesn't handle empty fields in the middle. - name="${1:-$$}" - feedurl="${2:-http://}" - basesiteurl="${3:-${feedurl}}" - encoding="$4" - - printf '%s\0%s\0%s\0%s\0%s\0%s\0' "${config}" "${sfeedtmpdir}" \ - "${name}" "${feedurl}" "${basesiteurl}" "${encoding}" - } - - # fetch feeds and store in temporary directory. - sfeedtmpdir="$(mktemp -d '/tmp/sfeed_XXXXXX')" - mkdir -p "${sfeedtmpdir}/feeds" - touch "${sfeedtmpdir}/ok" - # make sure path exists. - mkdir -p "${sfeedpath}" - # print feeds for parallel processing with xargs. - feeds | SFEED_UPDATE_CHILD="1" xargs -r -0 -P "${maxjobs}" -L 6 "$(readlink -f "$0")" - status=$? - # check error exit status indicator for parallel jobs. - test -f "${sfeedtmpdir}/ok" || status=1 - # cleanup temporary files etc. - cleanup - exit ${status} - -- - - - Shellscript to handle URLs and enclosures in parallel using xargs -P. This can be used to download and process URLs for downloading podcasts, @@ -781,7 +749,7 @@ arguments are specified then the data is read from stdin. #!/bin/sh # sfeed_download: downloader for URLs and enclosures in sfeed(5) files. - # Dependencies: awk, curl, flock, xargs (-P), youtube-dl. + # Dependencies: awk, curl, flock, xargs (-P), yt-dlp. cachefile="${SFEED_CACHEFILE:-$HOME/.sfeed/downloaded_urls}" jobs="${SFEED_JOBS:-4}" @@ -801,7 +769,7 @@ arguments are specified then the data is read from stdin. fetch() { case "$1" in *youtube.com*) - youtube-dl "$1";; + yt-dlp "$1";; *.flac|*.ogg|*.m3u|*.m3u8|*.m4a|*.mkv|*.mp3|*.mp4|*.wav|*.webm) # allow 2 redirects, hide User-Agent, connect timeout is 15 seconds. curl -O -L --max-redirs 2 -H "User-Agent:" -f -s --connect-timeout 15 "$1";; @@ -851,7 +819,7 @@ arguments are specified then the data is read from stdin. # ...else parent mode: - tmp=$(mktemp) + tmp="$(mktemp)" || exit 1 trap "rm -f ${tmp}" EXIT [ -f "${cachefile}" ] || touch "${cachefile}" @@ -1198,7 +1166,7 @@ Example of a `markallread.sh` shellscript to mark all URLs as read: #!/bin/sh # mark all items/URLs as read. - tmp=$(mktemp) + tmp="$(mktemp)" || exit 1 (cat ~/.sfeed/urls; cut -f 3 ~/.sfeed/feeds/*) | \ awk '!x[$0]++' > "$tmp" && mv "$tmp" ~/.sfeed/urls && @@ -1242,6 +1210,36 @@ This changes the yank command to set the tmux buffer, instead of X11 xclip: SFEED_YANKER="tmux set-buffer \`cat\`" +Alternative for xargs -P and -0 +------------------------------- + +Most xargs implementations support the options -P and -0. +GNU or *BSD has supported them for over 20+ years! + +These functions in sfeed_update can be overridden in sfeedrc, if you don't want +to use xargs: + + feed() { + # wait until ${maxjobs} are finished: will stall the queue if an item + # is slow, but it is portable. + [ ${signo} -ne 0 ] && return + [ $((curjobs % maxjobs)) -eq 0 ] && wait + [ ${signo} -ne 0 ] && return + curjobs=$((curjobs + 1)) + + _feed "$@" & + } + + runfeeds() { + # job counter. + curjobs=0 + # fetch feeds specified in config file. + feeds + # wait till all feeds are fetched (concurrently). + [ ${signo} -eq 0 ] && wait + } + + Known terminal issues --------------------- diff --git a/sfeed.1 b/sfeed.1 @@ -1,4 +1,4 @@ -.Dd January 7, 2023 +.Dd October 27, 2024 .Dt SFEED 1 .Os .Sh NAME @@ -52,51 +52,51 @@ Item, categories, multiple values are separated by the '|' character. .Sh EXIT STATUS .Ex -std .Sh EXAMPLES -.Bd -literal +.Bd -literal -offset 4n curl -s 'https://codemadness.org/atom.xml' | sfeed .Ed .Pp To convert the character set from a feed that is not UTF-8 encoded the .Xr iconv 1 tool can be used: -.Bd -literal +.Bd -literal -offset 4n curl -s 'https://codemadness.org/some_iso-8859-1_feed.xml' | \e iconv -f iso-8859-1 -t utf-8 | \e sfeed .Ed .Sh EXAMPLE SETUP 1. Create a directory for the sfeedrc configuration and the feeds: -.Bd -literal - mkdir -p ~/.sfeed/feeds +.Bd -literal -offset 4n +mkdir -p ~/.sfeed/feeds .Ed .Pp 2. Copy the example .Xr sfeedrc 5 configuration: -.Bd -literal - cp sfeedrc.example ~/.sfeed/sfeedrc - $EDITOR ~/.sfeed/sfeedrc +.Bd -literal -offset 4n +cp sfeedrc.example ~/.sfeed/sfeedrc +$EDITOR ~/.sfeed/sfeedrc .Ed .Pp Or import existing OPML subscriptions using .Xr sfeed_opml_import 1 : -.Bd -literal - sfeed_opml_import < file.opml > ~/.sfeed/sfeedrc +.Bd -literal -offset 4n +sfeed_opml_import < file.opml > ~/.sfeed/sfeedrc .Ed .Pp 3. To update feeds and merge the new items with existing items: -.Bd -literal - sfeed_update +.Bd -literal -offset 4n +sfeed_update .Ed .Pp 4. Format feeds to a plain-text list: -.Bd -literal - sfeed_plain ~/.sfeed/feeds/* +.Bd -literal -offset 4n +sfeed_plain ~/.sfeed/feeds/* .Ed .Pp Or format feeds to a curses interface: -.Bd -literal - sfeed_curses ~/.sfeed/feeds/* +.Bd -literal -offset 4n +sfeed_curses ~/.sfeed/feeds/* .Ed .Pp There are also other formatting programs included. @@ -111,5 +111,5 @@ The README file has more examples. .Sh AUTHORS .An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org .Sh CAVEATS -If a timezone for the timestamp field is not in the RFC822 or RFC3339 format it -is not supported and the timezone is interpreted as UTC+0. +If a timezone for the timestamp field is not in the RFC 822 or RFC 3339 format +it is not supported and the timezone is interpreted as UTC+0. diff --git a/sfeed.c b/sfeed.c @@ -125,7 +125,7 @@ static void xmltagstart(XMLParser *, const char *, size_t); static void xmltagstartparsed(XMLParser *, const char *, size_t, int); /* map tag name to TagId type */ -/* RSS, must be alphabetical order */ +/* RSS, keep this in alphabetical order */ static const FeedTag rsstags[] = { { STRP("author"), RSSTagAuthor }, { STRP("category"), RSSTagCategory }, @@ -142,7 +142,7 @@ static const FeedTag rsstags[] = { { STRP("title"), RSSTagTitle } }; -/* Atom, must be alphabetical order */ +/* Atom, keep this in alphabetical order */ static const FeedTag atomtags[] = { { STRP("author"), AtomTagAuthor }, { STRP("category"), AtomTagCategory }, @@ -212,34 +212,33 @@ static FeedContext ctx; static XMLParser parser; /* XML parser state */ static String attrispermalink, attrrel, attrtype, tmpstr; -static int -tagcmp(const void *v1, const void *v2) -{ - return strcasecmp(((FeedTag *)v1)->name, ((FeedTag *)v2)->name); -} - -/* Unique tagid for parsed tag name. */ +/* Unique tag(id) for parsed tag name. */ static FeedTag * gettag(enum FeedType feedtype, const char *name, size_t namelen) { - FeedTag f, *r = NULL; - - f.name = (char *)name; + FeedTag *r; + size_t i; switch (feedtype) { case FeedTypeRSS: - r = bsearch(&f, rsstags, sizeof(rsstags) / sizeof(rsstags[0]), - sizeof(rsstags[0]), tagcmp); + for (i = 0; i < sizeof(rsstags) / sizeof(rsstags[0]); i++) { + r = (FeedTag *)&rsstags[i]; + if (r->len == namelen && !strcasecmp(r->name, name)) + return r; + } break; case FeedTypeAtom: - r = bsearch(&f, atomtags, sizeof(atomtags) / sizeof(atomtags[0]), - sizeof(atomtags[0]), tagcmp); + for (i = 0; i < sizeof(atomtags) / sizeof(atomtags[0]); i++) { + r = (FeedTag *)&atomtags[i]; + if (r->len == namelen && !strcasecmp(r->name, name)) + return r; + } break; default: break; } - return r; + return NULL; } static char * @@ -293,7 +292,7 @@ string_append(String *s, const char *data, size_t len) return; if (s->len >= SIZE_MAX - len) { - errno = EOVERFLOW; + errno = ENOMEM; err(1, "realloc"); } @@ -383,7 +382,7 @@ string_print_trimmed_multi(String *s) } } -/* Print URL, if it's a relative URL then it uses the global `baseurl`. */ +/* Print URL, if it is a relative URL then it uses the global `baseurl`. */ static void printuri(char *s) { @@ -409,7 +408,7 @@ printuri(char *s) *e = c; /* restore NUL byte to original character */ } -/* Print URL, if it's a relative URL then it uses the global `baseurl`. */ +/* Print URL, if it is a relative URL then it uses the global `baseurl`. */ static void string_print_uri(String *s) { @@ -432,18 +431,23 @@ string_print_timestamp(String *s) printf("%lld", t); } -/* Convert time fields. Returns a UNIX timestamp. */ +/* Convert time fields. Returns a signed (at least) 64-bit UNIX timestamp. + * Parameters should be passed as they are in a struct tm: + * that is: year = year - 1900, month = month - 1. */ static long long datetounix(long long year, int mon, int day, int hour, int min, int sec) { - static const int secs_through_month[] = { + /* seconds in a month in a regular (non-leap) year */ + static const long secs_through_month[] = { 0, 31 * 86400, 59 * 86400, 90 * 86400, 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400, 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 }; int is_leap = 0, cycles, centuries = 0, leaps = 0, rem; long long t; + /* optimization: handle common range year 1902 up to and including 2038 */ if (year - 2ULL <= 136) { + /* amount of leap days relative to 1970: every 4 years */ leaps = (year - 68) >> 2; if (!((year - 68) & 3)) { leaps--; @@ -451,8 +455,11 @@ datetounix(long long year, int mon, int day, int hour, int min, int sec) } else { is_leap = 0; } - t = 31536000 * (year - 70) + 86400 * leaps; + t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 86400 = 31536000 */ } else { + /* general leap year calculation: + * leap years occur mostly every 4 years but every 100 years + * a leap year is skipped unless the year is divisible by 400 */ cycles = (year - 100) / 400; rem = (year - 100) % 400; if (rem < 0) { @@ -462,20 +469,27 @@ datetounix(long long year, int mon, int day, int hour, int min, int sec) if (!rem) { is_leap = 1; } else { - if (rem >= 300) - centuries = 3, rem -= 300; - else if (rem >= 200) - centuries = 2, rem -= 200; - else if (rem >= 100) - centuries = 1, rem -= 100; + if (rem >= 300) { + centuries = 3; + rem -= 300; + } else if (rem >= 200) { + centuries = 2; + rem -= 200; + } else if (rem >= 100) { + centuries = 1; + rem -= 100; + } if (rem) { leaps = rem / 4U; rem %= 4U; is_leap = !rem; } } - leaps += 97 * cycles + 24 * centuries - is_leap; - t = (year - 100) * 31536000LL + leaps * 86400LL + 946684800 + 86400; + leaps += (97 * cycles) + (24 * centuries) - is_leap; + + /* adjust 8 leap days from 1970 up to and including 2000: + * ((30 * 365) + 8) * 86400 = 946771200 */ + t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 946771200LL; } t += secs_through_month[mon]; if (is_leap && mon >= 2) @@ -489,10 +503,10 @@ datetounix(long long year, int mon, int day, int hour, int min, int sec) } /* Get timezone from string, return time offset in seconds from UTC. - * NOTE: only parses timezones in RFC-822, many other timezone names are + * NOTE: only parses timezones in RFC 822, many other timezone names are * ambiguous anyway. - * ANSI and military zones are defined wrong in RFC822 and are unsupported, - * see note on RFC2822 4.3 page 32. */ + * ANSI and military zones are defined wrong in RFC 822 and are unsupported, + * see note on RFC 2822 4.3 page 32. */ static long gettzoffset(const char *s) { @@ -540,7 +554,7 @@ gettzoffset(const char *s) } /* Parse time string `s` into the UNIX timestamp `tp`. - Returns 0 on success or -1 on failure. */ + * Returns 0 on success or -1 on failure. */ static int parsetime(const char *s, long long *tp) { @@ -612,9 +626,9 @@ parsetime(const char *s, long long *tp) ; for (v = 0, i = 0; i < 4 && ISDIGIT((unsigned char)*s); s++, i++) v = (v * 10) + (*s - '0'); - /* obsolete short year: RFC2822 4.3 */ - if (i <= 3) - v += (v >= 0 && v <= 49) ? 2000 : 1900; + /* obsolete short year: RFC 2822 4.3 */ + if (i == 2 || i == 3) + v += (i == 2 && v >= 0 && v <= 49) ? 2000 : 1900; va[0] = v; /* year */ for (; ISSPACE((unsigned char)*s); s++) ; @@ -630,14 +644,14 @@ parsetime(const char *s, long long *tp) } va[vi] = v; - if ((vi < 2 && *s == '-') || - (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s))) || + if ((vi < 2 && (*s == '-' || *s == '/')) || + (vi == 2 && (*s == 'T' || *s == 't' || ISSPACE((unsigned char)*s))) || (vi > 2 && *s == ':')) s++; } /* skip milliseconds in for example: "%Y-%m-%dT%H:%M:%S.000Z" */ - if (*s == '.') { + if (*s == '.' || *s == ',') { for (s++; ISDIGIT((unsigned char)*s); s++) ; } @@ -709,8 +723,8 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, if (!ctx.tag.id) return; - /* content-type may be: Atom: text, xhtml, html or mime-type. - MRSS (media:description): plain, html. */ + /* content-type may be for Atom: text, xhtml, html or a mime-type. + * for MRSS (media:description): plain, html. */ if (ISCONTENTTAG(ctx)) { if (isattr(n, nl, STRP("type"))) string_append(&attrtype, v, vl); @@ -743,7 +757,7 @@ static void xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, const char *data, size_t datalen) { - char buf[16]; + char buf[8]; int len; /* handles transforming inline XML to data */ @@ -820,7 +834,7 @@ xmldata(XMLParser *p, const char *s, size_t len) static void xmldataentity(XMLParser *p, const char *data, size_t datalen) { - char buf[16]; + char buf[8]; int len; if (!ctx.field) @@ -896,7 +910,7 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) return; } - /* set tag type based on it's attribute value */ + /* set tag type based on its attribute value */ if (ctx.tag.id == RSSTagGuid) { /* if empty the default is "true" */ if (!attrispermalink.len || @@ -906,7 +920,7 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) ctx.tag.id = RSSTagGuidPermalinkFalse; } else if (ctx.tag.id == AtomTagLink) { /* empty or "alternate": other types could be - "enclosure", "related", "self" or "via" */ + * "enclosure", "related", "self" or "via" */ if (!attrrel.len || isattr(attrrel.data, attrrel.len, STRP("alternate"))) ctx.tag.id = AtomTagLinkAlternate; else if (isattr(attrrel.data, attrrel.len, STRP("enclosure"))) @@ -918,7 +932,7 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) tagid = ctx.tag.id; /* map tag type to field: unknown or lesser priority is ignored, - when tags of the same type are repeated only the first is used. */ + * when tags of the same type are repeated only the first is used. */ if (fieldmap[tagid] == -1 || (!ISFEEDFIELDMULTI(fieldmap[tagid]) && tagid <= ctx.fields[fieldmap[tagid]].tagid)) { @@ -952,7 +966,7 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) ctx.fields[fieldmap[tagid]].tagid = tagid; /* clear field if it is overwritten (with a priority order) for the new - value, if the field can have multiple values then do not clear it. */ + * value, if the field can have multiple values then do not clear it. */ if (!ISFEEDFIELDMULTI(fieldmap[ctx.tag.id])) string_clear(ctx.field); } @@ -976,9 +990,9 @@ xmltagend(XMLParser *p, const char *t, size_t tl, int isshort) return; } } else if (ctx.tag.id && istag(ctx.tag.name, ctx.tag.len, t, tl)) { - /* matched tag end: close it */ - /* copy also to the link field if the attribute isPermaLink="true" - and it is not set by a tag with higher priority. */ + /* matched tag end: close it. + * copy also to the link field if the attribute isPermaLink="true" + * and it is not set by a tag with higher priority. */ if (ctx.tag.id == RSSTagGuidPermalinkTrue && ctx.field && ctx.tag.id > ctx.fields[FeedFieldLink].tagid) { string_clear(&ctx.fields[FeedFieldLink].str); @@ -1007,8 +1021,8 @@ xmltagend(XMLParser *p, const char *t, size_t tl, int isshort) } /* temporary string: for fields that cannot be processed - directly and need more context, for example by it's tag - attributes, like the Atom link rel="alternate|enclosure". */ + * directly and need more context, for example by its tag + * attributes, like the Atom link rel="alternate|enclosure". */ if (tmpstr.len && ctx.field) { if (ISFEEDFIELDMULTI(fieldmap[ctx.tag.id])) { if (ctx.field->len) @@ -1058,7 +1072,7 @@ main(int argc, char *argv[]) parser.xmltagstart = xmltagstart; parser.xmltagstartparsed = xmltagstartparsed; - /* NOTE: getnext is defined in xml.h for inline optimization */ + /* NOTE: GETNEXT is defined in xml.h for inline optimization */ xml_parse(&parser); checkfileerror(stdin, "<stdin>", 'r'); diff --git a/sfeed_atom.c b/sfeed_atom.c @@ -1,5 +1,3 @@ -#include <sys/types.h> - #include <stdio.h> #include <string.h> #include <time.h> @@ -22,6 +20,8 @@ printcontent(const char *s) case '&': fputs("&amp;", stdout); break; case '"': fputs("&quot;", stdout); break; case '\\': + if (*(s + 1) == '\0') + break; s++; switch (*s) { case 'n': putchar('\n'); break; @@ -93,9 +93,9 @@ printfeed(FILE *fp, const char *feedname) fputs("\t<content type=\"html\">", stdout); } else { /* NOTE: an RSS/Atom viewer may or may not format - whitespace such as newlines. - Workaround: type="html" and <![CDATA[<pre></pre>]]> */ - fputs("\t<content type=\"text\">", stdout); + * whitespace such as newlines. + * Workaround: type="html" and <![CDATA[<pre></pre>]]> */ + fputs("\t<content>", stdout); } printcontent(fields[FieldContent]); fputs("</content>\n", stdout); @@ -128,12 +128,11 @@ main(int argc, char *argv[]) if ((now = time(NULL)) == (time_t)-1) errx(1, "time"); if (!(tm = gmtime_r(&now, &tmnow))) - err(1, "gmtime_r"); + err(1, "gmtime_r: can't get current time"); fputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" "<feed xmlns=\"http://www.w3.org/2005/Atom\">\n" - "\t<title type=\"text\">Newsfeed</title>\n" - "\t<author><name>sfeed</name></author>\n", stdout); + "\t<title>Newsfeed</title>\n", stdout); printf("\t<id>urn:newsfeed:%lld</id>\n" "\t<updated>%04d-%02d-%02dT%02d:%02d:%02dZ</updated>\n", (long long)now, diff --git a/sfeed_content.1 b/sfeed_content.1 @@ -1,4 +1,4 @@ -.Dd December 22, 2021 +.Dd October 27, 2024 .Dt SFEED_CONTENT 1 .Os .Sh NAME @@ -35,12 +35,13 @@ If it is not set it will use lynx by default. .Sh EXIT STATUS .Ex -std .Sh EXAMPLES -.Bd -literal +Example: +.Bd -literal -offset 4n curl -s 'https://codemadness.org/atom_content.xml' | sfeed | sfeed_content .Ed .Pp -The output format looks like this: -.Bd -literal +The output format will look like this: +.Bd -literal -offset 4n Title: The title. Author: The line with the author if it is set. Category: The line with the categories if it is set. diff --git a/sfeed_curses.1 b/sfeed_curses.1 @@ -1,4 +1,4 @@ -.Dd December 20, 2022 +.Dd October 27, 2024 .Dt SFEED_CURSES 1 .Os .Sh NAME @@ -30,6 +30,7 @@ arguments are specified then the data is read from stdin and the feed name is .Pp Items with a timestamp from the last day compared to the system time at the time of loading the feed are marked as new and bold. +This value might be overridden through environment variables. There is also an alternative mode available to mark items as read by matching it against a list of URLs from a plain-text file. Items with an enclosure are marked with a @ symbol. @@ -140,12 +141,12 @@ This will only work when .Ev SFEED_URL_FILE is set. .It f -Mark all items of the current loaded feed as read. +Mark all items of the currently loaded feed as read. This will only work when .Ev SFEED_URL_FILE is set. .It F -Mark all items of the current loaded feed as unread. +Mark all items of the currently loaded feed as unread. This will only work when .Ev SFEED_URL_FILE is set. @@ -207,6 +208,9 @@ SIGWINCH. Read and process a sequence of keys as input commands from this environment variable first, afterwards it reads from the tty as usual. This can be useful to automate certain actions at the start. +.It Ev SFEED_NEW_AGE +Overwrite the maximum age in seconds to mark feeds as new. +By default this is 86400, which equals one day. .It Ev SFEED_PIPER A program where the whole TAB-Separated Value line is piped to. By default this is "sfeed_content". @@ -295,14 +299,15 @@ When plumbing an URL then stdin is closed also. .Ex -std The exit status is 130 on SIGINT and 143 on SIGTERM. .Sh EXAMPLES -.Bd -literal +Example: +.Bd -literal -offset 4n sfeed_curses ~/.sfeed/feeds/* .Ed .Pp Another example which shows some of the features .Nm has: -.Bd -literal +.Bd -literal -offset 4n export SFEED_AUTOCMD="2tgo" export SFEED_URL_FILE="$HOME/.sfeed/urls" [ -f "$SFEED_URL_FILE" ] || touch "$SFEED_URL_FILE" @@ -312,16 +317,16 @@ sfeed_curses ~/.sfeed/feeds/* Which does the following: .Bl -enum .It -Set commands to execute automatically: +Set commands to execute automatically on startup: .Pp -Set the current layout to a horizontal mode ('2' keybind'). +Set the current layout to a horizontal mode ('2' keybind). Showing a feeds sidebar on the top and the feed items on the bottom. .Pp Toggle showing only feeds with new items in the sidebar ('t' keybind). .Pp Go to the first row in the current panel ('g' keybind). .Pp -Load the current selected feed ('o' keybind'). +Load the currently selected feed ('o' keybind). .It Set a file to use for managing read and unread items. This is a plain-text file containing a list of read URLs, one URL per line. diff --git a/sfeed_curses.c b/sfeed_curses.c @@ -1,7 +1,5 @@ #include <sys/ioctl.h> #include <sys/select.h> -#include <sys/time.h> -#include <sys/types.h> #include <sys/wait.h> #include <errno.h> @@ -141,18 +139,18 @@ struct items { size_t cap; /* available capacity */ }; -void alldirty(void); -void cleanup(void); -void draw(void); -int getsidebarsize(void); -void markread(struct pane *, off_t, off_t, int); -void pane_draw(struct pane *); -void sighandler(int); -void updategeom(void); -void updatesidebar(void); -void urls_free(struct urls *); -int urls_hasmatch(struct urls *, const char *); -void urls_read(struct urls *, const char *); +static void alldirty(void); +static void cleanup(void); +static void draw(void); +static int getsidebarsize(void); +static void markread(struct pane *, off_t, off_t, int); +static void pane_draw(struct pane *); +static void sighandler(int); +static void updategeom(void); +static void updatesidebar(void); +static void urls_free(struct urls *); +static int urls_hasmatch(struct urls *, const char *); +static void urls_read(struct urls *, const char *); static struct linebar linebar; static struct statusbar statusbar; @@ -175,7 +173,7 @@ static struct feed *feeds; static struct feed *curfeed; static size_t nfeeds; /* amount of feeds */ static time_t comparetime; -struct urls urls; +static struct urls urls; static char *urlfile; volatile sig_atomic_t state_sigchld = 0, state_sighup = 0, state_sigint = 0; @@ -192,7 +190,7 @@ static int piperia = 1; /* env variable: $SFEED_PIPER_INTERACTIVE */ static int yankeria = 0; /* env variable: $SFEED_YANKER_INTERACTIVE */ static int lazyload = 0; /* env variable: $SFEED_LAZYLOAD */ -int +static int ttywritef(const char *fmt, ...) { va_list ap; @@ -206,7 +204,7 @@ ttywritef(const char *fmt, ...) return n; } -int +static int ttywrite(const char *s) { if (!s) @@ -215,7 +213,7 @@ ttywrite(const char *s) } /* Print to stderr, call cleanup() and _exit(). */ -__dead void +__dead static void die(const char *fmt, ...) { va_list ap; @@ -236,7 +234,7 @@ die(const char *fmt, ...) _exit(1); } -void * +static void * erealloc(void *ptr, size_t size) { void *p; @@ -246,7 +244,7 @@ erealloc(void *ptr, size_t size) return p; } -void * +static void * ecalloc(size_t nmemb, size_t size) { void *p; @@ -256,7 +254,7 @@ ecalloc(size_t nmemb, size_t size) return p; } -char * +static char * estrdup(const char *s) { char *p; @@ -267,7 +265,7 @@ estrdup(const char *s) } /* Wrapper for tparm() which allows NULL parameter for str. */ -char * +static char * tparmnull(const char *str, long p1, long p2, long p3, long p4, long p5, long p6, long p7, long p8, long p9) { @@ -278,7 +276,7 @@ tparmnull(const char *str, long p1, long p2, long p3, long p4, long p5, long p6, } /* Counts column width of character string. */ -size_t +static size_t colw(const char *s) { wchar_t wc; @@ -309,8 +307,8 @@ colw(const char *s) } /* Format `len` columns of characters. If string is shorter pad the rest - with characters `pad`. */ -int + * with characters `pad`. */ +static int utf8pad(char *buf, size_t bufsiz, const char *s, size_t len, int pad) { wchar_t wc; @@ -375,13 +373,13 @@ utf8pad(char *buf, size_t bufsiz, const char *s, size_t len, int pad) return 0; } -void +static void resetstate(void) { ttywrite("\x1b""c"); /* rs1: reset title and state */ } -void +static void updatetitle(void) { unsigned long totalnew = 0, total = 0; @@ -394,32 +392,32 @@ updatetitle(void) ttywritef("\x1b]2;(%lu/%lu) - sfeed_curses\x1b\\", totalnew, total); } -void +static void appmode(int on) { ttywrite(tparmnull(on ? enter_ca_mode : exit_ca_mode, 0, 0, 0, 0, 0, 0, 0, 0, 0)); } -void +static void mousemode(int on) { ttywrite(on ? "\x1b[?1000h" : "\x1b[?1000l"); /* xterm X10 mouse mode */ ttywrite(on ? "\x1b[?1006h" : "\x1b[?1006l"); /* extended SGR mouse mode */ } -void +static void cursormode(int on) { ttywrite(tparmnull(on ? cursor_normal : cursor_invisible, 0, 0, 0, 0, 0, 0, 0, 0, 0)); } -void +static void cursormove(int x, int y) { ttywrite(tparmnull(cursor_address, y, x, 0, 0, 0, 0, 0, 0, 0)); } -void +static void cursorsave(void) { /* do not save the cursor if it won't be restored anyway */ @@ -427,7 +425,7 @@ cursorsave(void) ttywrite(tparmnull(save_cursor, 0, 0, 0, 0, 0, 0, 0, 0, 0)); } -void +static void cursorrestore(void) { /* if the cursor cannot be hidden then move to a consistent position */ @@ -437,7 +435,7 @@ cursorrestore(void) cursormove(0, 0); } -void +static void attrmode(int mode) { switch (mode) { @@ -458,19 +456,19 @@ attrmode(int mode) } } -void +static void cleareol(void) { ttywrite(tparmnull(clr_eol, 0, 0, 0, 0, 0, 0, 0, 0, 0)); } -void +static void clearscreen(void) { ttywrite(tparmnull(clear_screen, 0, 0, 0, 0, 0, 0, 0, 0, 0)); } -void +static void cleanup(void) { struct sigaction sa; @@ -499,7 +497,7 @@ cleanup(void) sigaction(SIGWINCH, &sa, NULL); } -void +static void win_update(struct win *w, int width, int height) { if (width != w->width || height != w->height) @@ -508,7 +506,7 @@ win_update(struct win *w, int width, int height) w->height = height; } -void +static void resizewin(void) { struct winsize winsz; @@ -523,7 +521,7 @@ resizewin(void) alldirty(); } -void +static void init(void) { struct sigaction sa; @@ -563,7 +561,7 @@ init(void) sigaction(SIGWINCH, &sa, NULL); } -void +static void processexit(pid_t pid, int interactive) { struct sigaction sa; @@ -592,11 +590,11 @@ processexit(pid_t pid, int interactive) } /* Pipe item line or item field to a program. - If `field` is -1 then pipe the TSV line, else a specified field. - if `interactive` is 1 then cleanup and restore the tty and wait on the - process. - if 0 then don't do that and also write stdout and stderr to /dev/null. */ -void + * If `field` is -1 then pipe the TSV line, else a specified field. + * if `interactive` is 1 then cleanup and restore the tty and wait on the + * process. + * if 0 then don't do that and also write stdout and stderr to /dev/null. */ +static void pipeitem(const char *cmd, struct item *item, int field, int interactive) { FILE *fp; @@ -636,7 +634,7 @@ pipeitem(const char *cmd, struct item *item, int field, int interactive) } } -void +static void forkexec(char *argv[], int interactive) { pid_t pid; @@ -660,7 +658,7 @@ forkexec(char *argv[], int interactive) } } -struct row * +static struct row * pane_row_get(struct pane *p, off_t pos) { if (pos < 0 || pos >= p->nrows) @@ -671,7 +669,7 @@ pane_row_get(struct pane *p, off_t pos) return p->rows + pos; } -char * +static char * pane_row_text(struct pane *p, struct row *row) { /* custom formatter */ @@ -680,7 +678,7 @@ pane_row_text(struct pane *p, struct row *row) return row->text; } -int +static int pane_row_match(struct pane *p, struct row *row, const char *s) { if (p->row_match) @@ -688,7 +686,7 @@ pane_row_match(struct pane *p, struct row *row, const char *s) return (strcasestr(pane_row_text(p, row), s) != NULL); } -void +static void pane_row_draw(struct pane *p, off_t pos, int selected) { struct row *row; @@ -721,7 +719,7 @@ pane_row_draw(struct pane *p, off_t pos, int selected) cursorrestore(); } -void +static void pane_setpos(struct pane *p, off_t pos) { if (pos < 0) @@ -745,7 +743,7 @@ pane_setpos(struct pane *p, off_t pos) p->pos = pos; } -void +static void pane_scrollpage(struct pane *p, int pages) { off_t pos; @@ -763,13 +761,13 @@ pane_scrollpage(struct pane *p, int pages) } } -void +static void pane_scrolln(struct pane *p, int n) { pane_setpos(p, p->pos + n); } -void +static void pane_setfocus(struct pane *p, int on) { if (p->focused != on) { @@ -778,7 +776,7 @@ pane_setfocus(struct pane *p, int on) } } -void +static void pane_draw(struct pane *p) { off_t pos, y; @@ -795,7 +793,7 @@ pane_draw(struct pane *p) pane_row_draw(p, y + pos, (y + pos) == p->pos); } -void +static void setlayout(int n) { if (layout != LayoutMonocle) @@ -803,7 +801,7 @@ setlayout(int n) layout = n; } -void +static void updategeom(void) { int h, w, x = 0, y = 0; @@ -877,7 +875,7 @@ updategeom(void) alldirty(); } -void +static void scrollbar_setfocus(struct scrollbar *s, int on) { if (s->focused != on) { @@ -886,7 +884,7 @@ scrollbar_setfocus(struct scrollbar *s, int on) } } -void +static void scrollbar_update(struct scrollbar *s, off_t pos, off_t nrows, int pageheight) { int tickpos = 0, ticksize = 0; @@ -911,7 +909,7 @@ scrollbar_update(struct scrollbar *s, off_t pos, off_t nrows, int pageheight) s->ticksize = ticksize; } -void +static void scrollbar_draw(struct scrollbar *s) { off_t y; @@ -950,7 +948,7 @@ scrollbar_draw(struct scrollbar *s) cursorrestore(); } -int +static int readch(void) { unsigned char b; @@ -984,7 +982,7 @@ readch(void) } } -char * +static char * lineeditor(void) { char *input = NULL; @@ -1037,7 +1035,7 @@ lineeditor(void) return input; } -char * +static char * uiprompt(int x, int y, char *fmt, ...) { va_list ap; @@ -1067,7 +1065,7 @@ uiprompt(int x, int y, char *fmt, ...) return input; } -void +static void linebar_draw(struct linebar *b) { int i; @@ -1088,7 +1086,7 @@ linebar_draw(struct linebar *b) cursorrestore(); } -void +static void statusbar_draw(struct statusbar *s) { if (!s->dirty) @@ -1101,14 +1099,14 @@ statusbar_draw(struct statusbar *s) cursormove(s->x, s->y); THEME_STATUSBAR(); /* terminals without xenl (eat newline glitch) mess up scrolling when - using the last cell on the last line on the screen. */ + * using the last cell on the last line on the screen. */ printutf8pad(stdout, s->text, s->width - (!eat_newline_glitch), ' '); fflush(stdout); attrmode(ATTR_RESET); cursorrestore(); } -void +static void statusbar_update(struct statusbar *s, const char *text) { if (s->text && !strcmp(s->text, text)) @@ -1120,7 +1118,7 @@ statusbar_update(struct statusbar *s, const char *text) } /* Line to item, modifies and splits line in-place. */ -int +static int linetoitem(char *line, struct item *item) { char *fields[FieldLast]; @@ -1146,7 +1144,7 @@ linetoitem(char *line, struct item *item) return 0; } -void +static void feed_items_free(struct items *items) { size_t i; @@ -1161,7 +1159,7 @@ feed_items_free(struct items *items) items->cap = 0; } -void +static void feed_items_get(struct feed *f, FILE *fp, struct items *itemsret) { struct item *item, *items = NULL; @@ -1209,7 +1207,7 @@ feed_items_get(struct feed *f, FILE *fp, struct items *itemsret) free(line); } -void +static void updatenewitems(struct feed *f) { struct pane *p; @@ -1233,7 +1231,7 @@ updatenewitems(struct feed *f) f->total = p->nrows; } -void +static void feed_load(struct feed *f, FILE *fp) { /* static, reuse local buffers */ @@ -1254,7 +1252,7 @@ feed_load(struct feed *f, FILE *fp) updatenewitems(f); } -void +static void feed_count(struct feed *f, FILE *fp) { char *fields[FieldLast]; @@ -1283,7 +1281,7 @@ feed_count(struct feed *f, FILE *fp) free(line); } -void +static void feed_setenv(struct feed *f) { if (f && f->path) @@ -1293,7 +1291,7 @@ feed_setenv(struct feed *f) } /* Change feed, have one file open, reopen file if needed. */ -void +static void feeds_set(struct feed *f) { if (curfeed) { @@ -1313,17 +1311,15 @@ feeds_set(struct feed *f) curfeed = f; } -void +static void feeds_load(struct feed *feeds, size_t nfeeds) { struct feed *f; size_t i; errno = 0; - if ((comparetime = time(NULL)) == (time_t)-1) - die("time"); - /* 1 day is old news */ - comparetime -= 86400; + if ((comparetime = getcomparetime()) == (time_t)-1) + die("getcomparetime"); for (i = 0; i < nfeeds; i++) { f = &feeds[i]; @@ -1358,7 +1354,7 @@ feeds_load(struct feed *feeds, size_t nfeeds) } /* find row position of the feed if visible, else return -1 */ -off_t +static off_t feeds_row_get(struct pane *p, struct feed *f) { struct row *row; @@ -1375,7 +1371,7 @@ feeds_row_get(struct pane *p, struct feed *f) return -1; } -void +static void feeds_reloadall(void) { struct pane *p; @@ -1404,7 +1400,7 @@ feeds_reloadall(void) pane_setpos(p, 0); } -void +static void feed_open_selected(struct pane *p) { struct feed *f; @@ -1428,7 +1424,7 @@ feed_open_selected(struct pane *p) } } -void +static void feed_plumb_selected_item(struct pane *p, int field) { struct row *row; @@ -1445,7 +1441,7 @@ feed_plumb_selected_item(struct pane *p, int field) forkexec(cmd, plumberia); } -void +static void feed_pipe_selected_item(struct pane *p) { struct row *row; @@ -1458,7 +1454,7 @@ feed_pipe_selected_item(struct pane *p) pipeitem(pipercmd, item, -1, piperia); } -void +static void feed_yank_selected_item(struct pane *p, int field) { struct row *row; @@ -1471,7 +1467,7 @@ feed_yank_selected_item(struct pane *p, int field) } /* calculate optimal (default) size */ -int +static int getsidebarsizedefault(void) { struct feed *feed; @@ -1504,7 +1500,7 @@ getsidebarsizedefault(void) return 0; } -int +static int getsidebarsize(void) { int size; @@ -1514,7 +1510,7 @@ getsidebarsize(void) return size; } -void +static void adjustsidebarsize(int n) { int size; @@ -1537,7 +1533,7 @@ adjustsidebarsize(int n) } } -void +static void updatesidebar(void) { struct pane *p; @@ -1589,7 +1585,7 @@ updatesidebar(void) p->pos = p->nrows - 1; } -void +static void sighandler(int signo) { switch (signo) { @@ -1601,7 +1597,7 @@ sighandler(int signo) } } -void +static void alldirty(void) { win.dirty = 1; @@ -1613,7 +1609,7 @@ alldirty(void) statusbar.dirty = 1; } -void +static void draw(void) { struct row *row; @@ -1647,7 +1643,7 @@ draw(void) statusbar_draw(&statusbar); } -void +static void mousereport(int button, int release, int keymask, int x, int y) { struct pane *p; @@ -1719,7 +1715,7 @@ mousereport(int button, int release, int keymask, int x, int y) } /* Custom formatter for feed row. */ -char * +static char * feed_row_format(struct pane *p, struct row *row) { /* static, reuse local buffers */ @@ -1760,7 +1756,7 @@ feed_row_format(struct pane *p, struct row *row) return text; } -int +static int feed_row_match(struct pane *p, struct row *row, const char *s) { struct feed *feed; @@ -1770,7 +1766,7 @@ feed_row_match(struct pane *p, struct row *row, const char *s) return (strcasestr(feed->name, s) != NULL); } -struct row * +static struct row * item_row_get(struct pane *p, off_t pos) { struct row *itemrow; @@ -1791,6 +1787,7 @@ item_row_get(struct pane *p, off_t pos) if ((linelen = getline(&line, &linesize, f->fp)) <= 0) { if (ferror(f->fp)) die("getline: %s", f->path); + free(line); return NULL; } @@ -1806,7 +1803,7 @@ item_row_get(struct pane *p, off_t pos) } /* Custom formatter for item row. */ -char * +static char * item_row_format(struct pane *p, struct row *row) { /* static, reuse local buffers */ @@ -1838,7 +1835,7 @@ item_row_format(struct pane *p, struct row *row) return text; } -void +static void markread(struct pane *p, off_t from, off_t to, int isread) { struct row *row; @@ -1878,7 +1875,7 @@ markread(struct pane *p, off_t from, off_t to, int isread) _exit(status); default: /* waitpid() and block on process status change, - fail if exit statuscode was unavailable or non-zero */ + * fail if the exit status code was unavailable or non-zero */ if (waitpid(pid, &status, 0) <= 0 || status) break; @@ -1901,13 +1898,13 @@ markread(struct pane *p, off_t from, off_t to, int isread) } } -int +static int urls_cmp(const void *v1, const void *v2) { return strcmp(*((char **)v1), *((char **)v2)); } -void +static void urls_free(struct urls *urls) { while (urls->len > 0) { @@ -1920,14 +1917,14 @@ urls_free(struct urls *urls) urls->cap = 0; } -int +static int urls_hasmatch(struct urls *urls, const char *url) { return (urls->len && bsearch(&url, urls->items, urls->len, sizeof(char *), urls_cmp)); } -void +static void urls_read(struct urls *urls, const char *urlfile) { FILE *fp; @@ -2075,9 +2072,9 @@ main(int argc, char *argv[]) button &= ~keymask; /* unset key mask */ /* button numbers (0 - 2) encoded in lowest 2 bits - release does not indicate which button (so set to 0). - Handle extended buttons like scrollwheels - and side-buttons by each range. */ + * release does not indicate which button (so set to 0). + * Handle extended buttons like scrollwheels + * and side-buttons by each range. */ release = 0; if (button == 3) { button = -1; @@ -2340,6 +2337,7 @@ nextpage: if (selpane == PaneItems && panes[selpane].nrows) { p = &panes[selpane]; markread(p, p->pos, p->pos, ch == 'r'); + pane_scrolln(&panes[selpane], +1); } break; case 's': /* toggle layout between monocle or non-monocle */ @@ -2366,8 +2364,8 @@ event: if (state_sigchld) { state_sigchld = 0; /* wait on child processes so they don't become a zombie, - do not block the parent process if there is no status, - ignore errors */ + * do not block the parent process if there is no status, + * ignore errors */ while (waitpid((pid_t)-1, NULL, WNOHANG) > 0) ; } diff --git a/sfeed_frames.1 b/sfeed_frames.1 @@ -23,6 +23,7 @@ file is not written. Items with a timestamp from the last day compared to the system time at the time of formatting are counted and marked as new. Items are marked as new using a bold markup. +This value might be overridden through environment variables. .Pp There is an example style.css stylesheet file included in the distribution. .Sh FILES WRITTEN @@ -37,6 +38,12 @@ feeds. The HTML file of the menu frame which contains navigation "anchor" links (like "#feedname") to the feed names in items.html. .El +.Sh ENVIRONMENT VARIABLES +.Bl -tag -width Ds +.It Ev SFEED_NEW_AGE +Overwrite the maximum age in seconds to mark feeds as new. +By default this is 86400, which equals one day. +.El .Sh EXIT STATUS .Ex -std .Sh EXAMPLES diff --git a/sfeed_frames.c b/sfeed_frames.c @@ -1,5 +1,3 @@ -#include <sys/types.h> - #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -87,10 +85,8 @@ main(int argc, char *argv[]) if (!(feeds = calloc(argc, sizeof(struct feed)))) err(1, "calloc"); - if ((comparetime = time(NULL)) == (time_t)-1) - errx(1, "time"); - /* 1 day is old news */ - comparetime -= 86400; + if ((comparetime = getcomparetime()) == (time_t)-1) + errx(1, "getcomparetime"); /* write main index page */ if (!(fpindex = fopen("index.html", "wb"))) diff --git a/sfeed_gopher.1 b/sfeed_gopher.1 @@ -32,6 +32,7 @@ written to stdout and no files are written. .Pp Items with a timestamp from the last day compared to the system time at the time of formatting are counted and marked as new. +This value might be overridden through environment variables. Items are marked as new with the prefix "N" at the start of the line. .Sh ENVIRONMENT .Bl -tag -width Ds @@ -45,6 +46,9 @@ The default is "127.0.0.1". .It Ev SFEED_GOPHER_PORT This environment variable can be used as the Gopher Port field. The default is "70". +.It Ev SFEED_NEW_AGE +Overwrite the maximum age in seconds to mark feeds as new. +By default this is 86400, which equals one day. .El .Sh EXIT STATUS .Ex -std diff --git a/sfeed_gopher.c b/sfeed_gopher.c @@ -1,5 +1,3 @@ -#include <sys/types.h> - #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -7,14 +5,13 @@ #include "util.h" -static struct feed f; static char *prefixpath = "/", *host = "127.0.0.1", *port = "70"; /* default */ static char *line; static size_t linesize; static time_t comparetime; /* Escape characters in gopher, CR and LF are ignored */ -void +static void gophertext(FILE *fp, const char *s) { for (; *s; s++) { @@ -64,7 +61,7 @@ printfeed(FILE *fpitems, FILE *fpin, struct feed *f) if (fields[FieldLink][0]) { itemtype = 'h'; - /* if it's a gopher URL then change it into a DirEntity */ + /* if it is a gopher URL then change it into a DirEntity */ if (!strncmp(fields[FieldLink], "gopher://", 9) && uri_parse(fields[FieldLink], &u) != -1) { itemhost = u.host; @@ -121,6 +118,7 @@ printfeed(FILE *fpitems, FILE *fpin, struct feed *f) int main(int argc, char *argv[]) { + struct feed f = { 0 }; FILE *fpitems, *fpindex, *fp; char *name, *p; int i; @@ -137,10 +135,8 @@ main(int argc, char *argv[]) err(1, "pledge"); } - if ((comparetime = time(NULL)) == (time_t)-1) - errx(1, "time"); - /* 1 day is old news */ - comparetime -= 86400; + if ((comparetime = getcomparetime()) == (time_t)-1) + errx(1, "getcomparetime"); if ((p = getenv("SFEED_GOPHER_HOST"))) host = p; diff --git a/sfeed_html.1 b/sfeed_html.1 @@ -26,9 +26,16 @@ is empty. .Pp Items with a timestamp from the last day compared to the system time at the time of formatting are counted and marked as new. +This value might be overridden through environment variables. Items are marked as new using a bold markup. .Pp There is an example style.css stylesheet file included in the distribution. +.Sh ENVIRONMENT VARIABLES +.Bl -tag -width Ds +.It Ev SFEED_NEW_AGE +Overwrite the maximum age in seconds to mark feeds as new. +By default this is 86400, which equals one day. +.El .Sh EXIT STATUS .Ex -std .Sh EXAMPLES diff --git a/sfeed_html.c b/sfeed_html.c @@ -1,5 +1,3 @@ -#include <sys/types.h> - #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -87,10 +85,8 @@ main(int argc, char *argv[]) if (!(feeds = calloc(argc, sizeof(struct feed)))) err(1, "calloc"); - if ((comparetime = time(NULL)) == (time_t)-1) - errx(1, "time"); - /* 1 day is old news */ - comparetime -= 86400; + if ((comparetime = getcomparetime()) == (time_t)-1) + errx(1, "getcomparetime"); fputs("<!DOCTYPE HTML>\n" "<html>\n" diff --git a/sfeed_json.1 b/sfeed_json.1 @@ -0,0 +1,49 @@ +.Dd August 1, 2023 +.Dt SFEED_JSON 1 +.Os +.Sh NAME +.Nm sfeed_json +.Nd format feed data to JSON Feed +.Sh SYNOPSIS +.Nm +.Op Ar +.Sh DESCRIPTION +.Nm +formats feed data (TSV) from +.Xr sfeed 1 +from stdin or for each +.Ar file +to stdout as JSON Feed data. +If one or more +.Ar file +arguments are specified then the basename of the +.Ar file +is used as the feed name in the output. +If no +.Ar file +arguments are specified and so the data is read from stdin then the feed name +is empty. +.Pp +If +.Nm +is reading from one or more +.Ar file +arguments it will prefix the entry title with "[feed name] ". +.Sh EXIT STATUS +.Ex -std +.Sh EXAMPLES +.Bd -literal +curl -s 'https://codemadness.org/atom.xml' | sfeed | sfeed_json +.Ed +.Sh SEE ALSO +.Xr sfeed 1 , +.Xr sfeed_atom 1 , +.Xr sfeed 5 +.Sh STANDARDS +.Rs +.%T JSON Feed Version 1.1 +.%U https://www.jsonfeed.org/version/1.1/ +.%D Nov, 2022 +.Re +.Sh AUTHORS +.An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org diff --git a/sfeed_json.c b/sfeed_json.c @@ -0,0 +1,172 @@ +#include <stdio.h> +#include <string.h> +#include <time.h> + +#include "util.h" + +static char *line; +static size_t linesize; +static int firstitem = 1; + +/* Unescape / decode fields printed by string_print_encoded() */ +static void +printcontent(const char *s) +{ + for (; *s; s++) { + switch (*s) { + case '\\': + if (*(s + 1) == '\0') + break; + s++; + switch (*s) { + case 'n': fputs("\\n", stdout); break; + case '\\': fputs("\\\\", stdout); break; + case 't': fputs("\\t", stdout); break; + } + break; /* ignore invalid escape sequence */ + case '"': fputs("\\\"", stdout); break; + default: + putchar(*s); + break; + } + } +} + +static void +printfield(const char *s) +{ + for (; *s; s++) { + if (*s == '\\') + fputs("\\\\", stdout); + else if (*s == '"') + fputs("\\\"", stdout); + else + putchar(*s); + } +} + +static void +printfeed(FILE *fp, const char *feedname) +{ + char *fields[FieldLast], timebuf[32]; + struct tm parsedtm, *tm; + time_t parsedtime; + ssize_t linelen; + int ch; + char *p, *s; + + while ((linelen = getline(&line, &linesize, fp)) > 0 && + !ferror(stdout)) { + if (line[linelen - 1] == '\n') + line[--linelen] = '\0'; + parseline(line, fields); + + if (!firstitem) + fputs(",\n", stdout); + firstitem = 0; + + fputs("{\n\t\"id\": \"", stdout); + printfield(fields[FieldId]); + fputs("\"", stdout); + + parsedtime = 0; + if (!strtotime(fields[FieldUnixTimestamp], &parsedtime) && + (tm = gmtime_r(&parsedtime, &parsedtm)) && + strftime(timebuf, sizeof(timebuf), "%Y-%m-%dT%H:%M:%SZ", tm)) { + fputs(",\n\t\"date_published\": \"", stdout); + fputs(timebuf, stdout); + fputs("\"", stdout); + } + + fputs(",\n\t\"title\": \"", stdout); + if (feedname[0]) { + fputs("[", stdout); + printfield(feedname); + fputs("] ", stdout); + } + printfield(fields[FieldTitle]); + fputs("\"", stdout); + + if (fields[FieldLink][0]) { + fputs(",\n\t\"url\": \"", stdout); + printfield(fields[FieldLink]); + fputs("\"", stdout); + } + + if (fields[FieldAuthor][0]) { + fputs(",\n\t\"authors\": [{\"name\": \"", stdout); + printfield(fields[FieldAuthor]); + fputs("\"}]", stdout); + } + + if (fields[FieldCategory][0]) { + fputs(",\n\t\"tags\": [", stdout); + + for (p = s = fields[FieldCategory]; ; s++) { + if (*s == '|' || *s == '\0') { + if (p != fields[FieldCategory]) + fputs(", ", stdout); + ch = *s; + *s = '\0'; /* temporary NUL terminate */ + fputs("\"", stdout); + printfield(p); + fputs("\"", stdout); + *s = ch; /* restore */ + p = s + 1; + } + if (*s == '\0') + break; + } + fputs("]", stdout); + } + + if (fields[FieldEnclosure][0]) { + fputs(",\n\t\"attachments\": [{\"url\": \"", stdout); + printfield(fields[FieldEnclosure]); + fputs("\"}]", stdout); + } + + if (!strcmp(fields[FieldContentType], "html")) + fputs(",\n\t\"content_html\": \"", stdout); + else + fputs(",\n\t\"content_text\": \"", stdout); + printcontent(fields[FieldContent]); + fputs("\"\n}", stdout); + } +} + +int +main(int argc, char *argv[]) +{ + FILE *fp; + char *name; + int i; + + if (pledge(argc == 1 ? "stdio" : "stdio rpath", NULL) == -1) + err(1, "pledge"); + + fputs("{\n" + "\"version\": \"https://jsonfeed.org/version/1.1\",\n" + "\"title\": \"Newsfeed\",\n" + "\"items\": [\n", stdout); + + if (argc == 1) { + printfeed(stdin, ""); + checkfileerror(stdin, "<stdin>", 'r'); + } else { + for (i = 1; i < argc; i++) { + if (!(fp = fopen(argv[i], "r"))) + err(1, "fopen: %s", argv[i]); + name = ((name = strrchr(argv[i], '/'))) ? name + 1 : argv[i]; + printfeed(fp, name); + checkfileerror(fp, argv[i], 'r'); + checkfileerror(stdout, "<stdout>", 'w'); + fclose(fp); + } + } + fputs("]\n}\n", stdout); + + checkfileerror(stdout, "<stdout>", 'w'); + + return 0; +} diff --git a/sfeed_markread b/sfeed_markread @@ -18,7 +18,7 @@ read) cat >> "${urlfile}" ;; unread) - tmp=$(mktemp) + tmp="$(mktemp)" || exit 1 trap "rm -f ${tmp}" EXIT [ -f "${urlfile}" ] || touch "${urlfile}" 2>/dev/null LC_ALL=C awk -F '\t' ' diff --git a/sfeed_markread.1 b/sfeed_markread.1 @@ -1,4 +1,4 @@ -.Dd July 25, 2021 +.Dd October 27, 2024 .Dt SFEED_MARKREAD 1 .Os .Sh NAME @@ -31,13 +31,16 @@ plain-text list of read URLs. .Sh EXIT STATUS .Ex -std .Sh EXAMPLES -.Bd -literal +Example: +.Bd -literal -offset 4n export SFEED_URL_FILE="$HOME/.sfeed/urls" echo 'https://codemadness.org/sfeed.html' | sfeed_markread read .Ed .Pp -or -.Bd -literal +or pass the +.Ar urlfile +as a parameter: +.Bd -literal -offset 4n echo 'https://codemadness.org/sfeed.html' | sfeed_markread read ~/.sfeed/urls .Ed .Sh SEE ALSO diff --git a/sfeed_mbox.1 b/sfeed_mbox.1 @@ -1,4 +1,4 @@ -.Dd August 4, 2021 +.Dd October 27, 2024 .Dt SFEED_MBOX 1 .Os .Sh NAME @@ -47,14 +47,15 @@ By default this is set to "0". .Sh EXIT STATUS .Ex -std .Sh EXAMPLES -.Bd -literal +Example: +.Bd -literal -offset 4n sfeed_mbox ~/.sfeed/feeds/* .Ed .Pp -To include the content. +Below is an example to include the content. This can be insecure for some of the mail clients that interpret HTML code in an unsafe way: -.Bd -literal +.Bd -literal -offset 4n SFEED_MBOX_CONTENT=1 sfeed_mbox ~/.sfeed/feeds/* .Ed .Sh SEE ALSO diff --git a/sfeed_mbox.c b/sfeed_mbox.c @@ -37,6 +37,8 @@ escapefrom: for (; *s; s++) { switch (*s) { case '\\': + if (*(s + 1) == '\0') + break; s++; switch (*s) { case 'n': @@ -82,7 +84,7 @@ printfeed(FILE *fp, const char *feedname) printf("Date: %s\n", dtimebuf); /* invalid/missing: use current time */ } - printf("From: %s <sfeed@>\n", fields[FieldAuthor][0] ? fields[FieldAuthor] : feedname); + printf("From: %s <anonymous@>\n", fields[FieldAuthor][0] ? fields[FieldAuthor] : feedname); printf("To: %s <%s@%s>\n", user, user, host); printf("Subject: %s\n", fields[FieldTitle]); printf("Message-ID: <%s%s%llu@%s>\n", diff --git a/sfeed_opml_export b/sfeed_opml_export @@ -7,16 +7,16 @@ loadconfig() { if [ "$1" != "" ]; then # get absolute path of config file required for including. config="$1" - path=$(readlink -f "${config}" 2>/dev/null) + configpath=$(readlink -f "${config}" 2>/dev/null) else # default config location. config="$HOME/.sfeed/sfeedrc" - path="${config}" + configpath="${config}" fi # config is loaded here to be able to override $sfeedpath or functions. - if [ -r "${path}" ]; then - . "${path}" + if [ -r "${configpath}" ] && [ -f "${configpath}" ]; then + . "${configpath}" else printf "Configuration file \"%s\" cannot be read.\n" "${config}" >&2 echo "See the sfeedrc.example file or the sfeedrc(5) man page for an example." >&2 @@ -38,7 +38,7 @@ cat <<! <?xml version="1.0" encoding="UTF-8"?> <opml version="1.0"> <head> - <title>OPML export from sfeed</title> + <title>OPML export</title> </head> <body> ! diff --git a/sfeed_opml_import.c b/sfeed_opml_import.c @@ -70,7 +70,7 @@ static void xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, const char *v, size_t vl) { - char buf[16]; + char buf[8]; int len; if ((len = xml_entitytostr(v, buf, sizeof(buf))) > 0) @@ -96,7 +96,7 @@ main(void) "# list of feeds to fetch:\n" "feeds() {\n" " # feed <name> <feedurl> [basesiteurl] [encoding]\n", stdout); - /* NOTE: getnext is defined in xml.h for inline optimization */ + /* NOTE: GETNEXT is defined in xml.h for inline optimization */ xml_parse(&parser); fputs("}\n", stdout); diff --git a/sfeed_plain.1 b/sfeed_plain.1 @@ -26,6 +26,7 @@ is empty. .Pp Items with a timestamp from the last day compared to the system time at the time of formatting are marked as new. +This value might be overridden through environment variables. Items are marked as new with the prefix "N" at the start of the line. .Pp .Nm @@ -39,6 +40,12 @@ per rune, using .Xr mbtowc 3 and .Xr wcwidth 3 . +.Sh ENVIRONMENT VARIABLES +.Bl -tag -width Ds +.It Ev SFEED_NEW_AGE +Overwrite the maximum age in seconds to mark feeds as new. +By default this is 86400, which equals one day. +.El .Sh EXIT STATUS .Ex -std .Sh EXAMPLES diff --git a/sfeed_plain.c b/sfeed_plain.c @@ -1,5 +1,3 @@ -#include <sys/types.h> - #include <locale.h> #include <stdio.h> #include <string.h> @@ -63,10 +61,8 @@ main(int argc, char *argv[]) if (pledge(argc == 1 ? "stdio" : "stdio rpath", NULL) == -1) err(1, "pledge"); - if ((comparetime = time(NULL)) == (time_t)-1) - errx(1, "time"); - /* 1 day is old news */ - comparetime -= 86400; + if ((comparetime = getcomparetime()) == (time_t)-1) + errx(1, "getcomparetime"); if (argc == 1) { printfeed(stdin, ""); diff --git a/sfeed_twtxt.c b/sfeed_twtxt.c @@ -1,5 +1,3 @@ -#include <sys/types.h> - #include <stdio.h> #include <string.h> #include <time.h> diff --git a/sfeed_update b/sfeed_update @@ -7,7 +7,7 @@ sfeedpath="$HOME/.sfeed/feeds" # used for processing feeds concurrently: wait until ${maxjobs} amount of # feeds are finished at a time. -maxjobs=8 +maxjobs=16 # load config (evaluate shellscript). # loadconfig(configfile) @@ -16,20 +16,20 @@ loadconfig() { if [ "$1" != "" ]; then # get absolute path of config file required for including. config="$1" - path=$(readlink -f "${config}" 2>/dev/null) + configpath=$(readlink -f "${config}" 2>/dev/null) else # default config location. config="$HOME/.sfeed/sfeedrc" - path="${config}" + configpath="${config}" fi # config is loaded here to be able to override $sfeedpath or functions. - if [ -r "${path}" ]; then - . "${path}" + if [ -r "${configpath}" ] && [ -f "${configpath}" ]; then + . "${configpath}" else printf "Configuration file \"%s\" cannot be read.\n" "${config}" >&2 echo "See the sfeedrc.example file or the sfeedrc(5) man page for an example." >&2 - exit 1 + die fi } @@ -71,7 +71,7 @@ parse() { } # filter fields. -# filter(name) +# filter(name, url) filter() { cat } @@ -83,9 +83,9 @@ merge() { } # order by timestamp (descending). -# order(name) +# order(name, url) order() { - sort -t ' ' -k1rn,1 + sort -t ' ' -k1rn,1 2>/dev/null } # internal handler to fetch and process a feed. @@ -124,7 +124,7 @@ _feed() { fi rm -f "${tmpfeedfile}.utf8" - if ! filter "${name}" < "${tmpfeedfile}.tsv" > "${tmpfeedfile}.filter"; then + if ! filter "${name}" "${feedurl}" < "${tmpfeedfile}.tsv" > "${tmpfeedfile}.filter"; then log_error "${name}" "FAIL (FILTER)" return 1 fi @@ -142,7 +142,7 @@ _feed() { fi rm -f "${tmpfeedfile}.filter" - if ! order "${name}" < "${tmpfeedfile}.merge" > "${tmpfeedfile}.order"; then + if ! order "${name}" "${feedurl}" < "${tmpfeedfile}.merge" > "${tmpfeedfile}.order"; then log_error "${name}" "FAIL (ORDER)" return 1 fi @@ -163,21 +163,29 @@ _feed() { # fetch and process a feed in parallel. # feed(name, feedurl, [basesiteurl], [encoding]) feed() { - # wait until ${maxjobs} are finished: will stall the queue if an item - # is slow, but it is portable. - [ ${signo} -ne 0 ] && return - [ $((curjobs % maxjobs)) -eq 0 ] && wait - [ ${signo} -ne 0 ] && return - curjobs=$((curjobs + 1)) - - _feed "$@" & + # Output job parameters for xargs. + # Specify fields as a single parameter separated by a NUL byte. + # The parameter is split into fields later by the child process, this + # allows using xargs with empty fields across many implementations. + printf '%s\037%s\037%s\037%s\037%s\037%s\0' \ + "${config}" "${sfeedtmpdir}" "$1" "$2" "$3" "$4" } +# cleanup() cleanup() { # remove temporary directory with feed files. rm -rf "${sfeedtmpdir}" } +# die(statuscode) +die() { + statuscode="${1:-1}" # default: exit 1 + # cleanup temporary files etc. + cleanup + exit "${statuscode}" +} + +# sighandler(signo) sighandler() { signo="$1" # ignore TERM signal for myself. @@ -186,14 +194,23 @@ sighandler() { kill -TERM -$$ } +# feeds() feeds() { printf "Configuration file \"%s\" is invalid or does not contain a \"feeds\" function.\n" "${config}" >&2 echo "See sfeedrc.example for an example." >&2 + die } +# runfeeds() +runfeeds() { + # print feeds for parallel processing with xargs. + feeds > "${sfeedtmpdir}/jobs" || die + SFEED_UPDATE_CHILD="1" xargs -x -0 -P "${maxjobs}" -n 1 \ + "$(readlink -f "${argv0}")" < "${sfeedtmpdir}/jobs" +} + +# main(args...) main() { - # job counter. - curjobs=0 # signal number received for parent. signo=0 # SIGINT: signal to interrupt parent. @@ -203,23 +220,37 @@ main() { # load config file. loadconfig "$1" # fetch feeds and store in temporary directory. - sfeedtmpdir="$(mktemp -d '/tmp/sfeed_XXXXXX')" + sfeedtmpdir="$(mktemp -d "${TMPDIR:-/tmp}/sfeed_XXXXXX")" || die mkdir -p "${sfeedtmpdir}/feeds" - touch "${sfeedtmpdir}/ok" + touch "${sfeedtmpdir}/ok" || die # make sure path exists. mkdir -p "${sfeedpath}" - # fetch feeds specified in config file. - feeds - # wait till all feeds are fetched (concurrently). - [ ${signo} -eq 0 ] && wait + # run and process the feeds. + runfeeds + statuscode=$? + # check error exit status indicator for parallel jobs. - [ -f "${sfeedtmpdir}/ok" ] - status=$? - # cleanup temporary files etc. - cleanup + [ -f "${sfeedtmpdir}/ok" ] || statuscode=1 # on signal SIGINT and SIGTERM exit with signal number + 128. - [ ${signo} -ne 0 ] && exit $((signo+128)) - exit ${status} + [ ${signo} -ne 0 ] && die $((signo+128)) + die ${statuscode} } +# process a single feed. +# parameters are: config, tmpdir, name, feedurl, basesiteurl, encoding +if [ "${SFEED_UPDATE_CHILD}" = "1" ]; then + [ "$1" = "" ] && exit 0 # must have an argument set + # IFS is "\037" + printf '%s\n' "$1" | \ + while IFS="" read -r _config _tmpdir _name _feedurl _basesiteurl _encoding; do + loadconfig "${_config}" + sfeedtmpdir="${_tmpdir}" + _feed "${_name}" "${_feedurl}" "${_basesiteurl}" "${_encoding}" + exit "$?" + done + exit 0 +fi + +# ...else parent mode: +argv0="$0" # store $0, in the zsh shell $0 is the name of the function. [ "${SFEED_UPDATE_INCLUDE}" = "1" ] || main "$@" diff --git a/sfeed_update.1 b/sfeed_update.1 @@ -1,4 +1,4 @@ -.Dd December 15, 2022 +.Dd October 27, 2024 .Dt SFEED_UPDATE 1 .Os .Sh NAME @@ -35,15 +35,13 @@ man page for a detailed description of the format and an example file. .El .Sh FILES WRITTEN .Bl -tag -width Ds -.It feedname +.It Pa feedname TAB-separated .Xr sfeed 5 format containing all items per feed. The .Nm script merges new items with this file. -The feed name cannot contain the '/' character because it is a path separator, -they will be replaced with '_'. .El .Sh ENVIRONMENT VARIABLES .Bl -tag -width Ds @@ -57,7 +55,7 @@ entry-point. .Sh LOGGING When processing a feed it will log failures to stderr and non-failures to stdout in the format: -.Bd -literal +.Bd -literal -offset 4n [HH:MM:SS] feedname message .Ed .Sh EXIT STATUS @@ -65,7 +63,7 @@ stdout in the format: If any of the feeds failed to update then the exit status is non-zero. .Sh EXAMPLES To update your feeds and format them in various formats: -.Bd -literal +.Bd -literal -offset 4n # Update feeds sfeed_update "configfile" # Format to a plain-text list diff --git a/sfeed_web.1 b/sfeed_web.1 @@ -13,8 +13,8 @@ reads the HTML data of the webpage from stdin and writes the found URLs to stdout. .Pp Such a link reference in HTML code looks like: -.Bd -literal - <link rel="alternate" href="atom.xml" type="application/atom+xml" /> +.Bd -literal -offset 4n +<link rel="alternate" href="atom.xml" type="application/atom+xml" /> .Ed .Sh OPTIONS .Bl -tag -width 8n @@ -39,7 +39,7 @@ Usually application/atom+xml or application/rss+xml. .Ex -std .Sh EXAMPLES Get URLs from a website: -.Bd -literal +.Bd -literal -offset 4n curl -s -L 'https://codemadness.org/' | sfeed_web 'https://codemadness.org/' .Ed .Sh SEE ALSO diff --git a/sfeed_web.c b/sfeed_web.c @@ -103,7 +103,7 @@ static void xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *a, size_t al, const char *v, size_t vl) { - char buf[16]; + char buf[8]; int len; if (!ishrefattr && !istypeattr) @@ -132,7 +132,7 @@ main(int argc, char *argv[]) parser.xmltagstart = xmltagstart; parser.xmltagstartparsed = xmltagstartparsed; - /* NOTE: getnext is defined in xml.h for inline optimization */ + /* NOTE: GETNEXT is defined in xml.h for inline optimization */ xml_parse(&parser); checkfileerror(stdin, "<stdin>", 'r'); diff --git a/sfeed_xmlenc.c b/sfeed_xmlenc.c @@ -12,7 +12,7 @@ static void xmltagstart(XMLParser *p, const char *t, size_t tl) { /* optimization: try to find a processing instruction only at the - start of the data at the first few starting tags. */ + * start of the data at the first few starting tags. */ if (tags++ > 3) exit(0); } @@ -52,7 +52,7 @@ main(void) parser.xmlattrend = xmlattrend; parser.xmltagstart = xmltagstart; - /* NOTE: getnext is defined in xml.h for inline optimization */ + /* NOTE: GETNEXT is defined in xml.h for inline optimization */ xml_parse(&parser); checkfileerror(stdin, "<stdin>", 'r'); diff --git a/sfeedrc.5 b/sfeedrc.5 @@ -1,4 +1,4 @@ -.Dd January 18, 2023 +.Dd February 9, 2025 .Dt SFEEDRC 5 .Os .Sh NAME @@ -7,7 +7,8 @@ .Sh DESCRIPTION .Nm is the configuration file for -.Xr sfeed_update 1 . +.Xr sfeed_update 1 +and is evaluated as a shellscript. .Sh VARIABLES .Bl -tag -width Ds .It Va sfeedpath @@ -18,7 +19,7 @@ The default is can be used to change the amount of concurrent .Fn feed jobs. -The default is 8. +The default is 16. .El .Sh FUNCTIONS .Bl -tag -width Ds @@ -30,24 +31,34 @@ Inside the .Fn feeds function feeds can be defined by calling the .Fn feed -function, its arguments are: +function. +Its arguments are: .Bl -tag -width Ds .It Fa name Name of the feed, this is also used as the filename for the TAB-separated feed file. The feed name cannot contain the '/' character because it is a path separator, they will be replaced with '_'. +Each +.Fa name +should be unique. .It Fa feedurl -URL to fetch the RSS/Atom data from, usually a HTTP or HTTPS URL. +URL to fetch the RSS/Atom data from. +This is usually a HTTP or HTTPS URL. .It Op Fa basesiteurl Base URL of the feed links. -This argument allows to fix relative item links. +This argument allows fixing relative item links. .Pp -According to the RSS and Atom specification feeds should always have absolute -URLs, however this is not always the case in practise. +According to the RSS and Atom specification, feeds should always have absolute +URLs, but this is not always the case in practice. .It Op Fa encoding -Feeds are decoded from this name to UTF-8, the name should be a usable -character-set for the +Feeds are converted from this +.Ar encoding +to UTF-8. +The +.Ar encoding +should be a usable +character-set name for the .Xr iconv 1 tool. .El @@ -55,14 +66,15 @@ tool. .Sh OVERRIDE FUNCTIONS Because .Xr sfeed_update 1 -is a shellscript each function can be overridden to change its behaviour, -notable functions are: +is a shellscript each function can be overridden to change its behaviour. +Notable functions are: .Bl -tag -width Ds .It Fn fetch "name" "url" "feedfile" -Fetch feed from URL and write the data to stdout, its arguments are: +Fetch feed from URL and write the data to stdout. +Its arguments are: .Bl -tag -width Ds .It Fa name -Specified name in configuration file (useful for logging). +Feed name. .It Fa url URL to fetch. .It Fa feedfile @@ -74,8 +86,8 @@ By default the tool is used. .It Fn convertencoding "name" "from" "to" Convert data from stdin from one text-encoding to another and write it to -stdout, -its arguments are: +stdout. +Its arguments are: .Bl -tag -width Ds .It Fa name Feed name. @@ -92,27 +104,32 @@ is used. Read RSS/Atom XML data from stdin, convert and write it as .Xr sfeed 5 data to stdout. +Its arguments are: .Bl -tag -width Ds .It Fa name -Name of the feed. +Feed name. .It Fa feedurl URL of the feed. .It Fa basesiteurl Base URL of the feed links. This argument allows to fix relative item links. .El -.It Fn filter "name" +.It Fn filter "name" "url" Filter .Xr sfeed 5 -data from stdin and write it to stdout, its arguments are: +data from stdin and write it to stdout. +Its arguments are: .Bl -tag -width Ds .It Fa name Feed name. +.It Fa url +URL of the feed. .El .It Fn merge "name" "oldfile" "newfile" Merge .Xr sfeed 5 -data of oldfile with newfile and write it to stdout, its arguments are: +data of oldfile with newfile and write it to stdout. +Its arguments are: .Bl -tag -width Ds .It Fa name Feed name. @@ -121,19 +138,22 @@ Old file. .It Fa newfile New file. .El -.It Fn order "name" +.It Fn order "name" "url" Sort .Xr sfeed 5 -data from stdin and write it to stdout, its arguments are: +data from stdin and write it to stdout. +Its arguments are: .Bl -tag -width Ds .It Fa name Feed name. +.It Fa url +URL of the feed. .El .El .Sh EXAMPLES An example configuration file is included named sfeedrc.example and also shown below: -.Bd -literal +.Bd -literal -offset 4n #sfeedpath="$HOME/.sfeed/feeds" # list of feeds to fetch: @@ -158,15 +178,78 @@ options for fetching the data, the .Fn fetch function can be overridden and added at the top of the .Nm -file: -.Bd -literal +file, for example: +.Bd -literal -offset 4n # fetch(name, url, feedfile) fetch() { - # allow for 1 redirect, hide User-Agent, timeout is 15 seconds. - curl -L --max-redirs 1 -H "User-Agent:" -f -s -m 15 \e + # allow for 1 redirect, set User-Agent, timeout is 15 seconds. + curl -L --max-redirs 1 -H "User-Agent: 007" -f -s -m 15 \e "$2" 2>/dev/null } .Ed +.Pp +Caching, incremental data updates and bandwidth saving +.Pp +For HTTP servers that support it some bandwidth saving can be done by changing +some of the default curl options. +These options can come at a cost of some privacy, because it exposes additional +metadata from the previous request. +.Pp +.Bl -bullet -compact +.It +The curl ETag options (--etag-save and --etag-compare) can be used to store and +send the previous ETag header value. +curl version 7.73+ is recommended for it to work properly. +.It +The curl -z option can be used to send the modification date of a local file as +a HTTP If-Modified-Since request header. +The server can then respond if the data is modified or not or respond with only +the incremental data. +.It +The curl --compressed option can be used to indicate the client supports +decompression. +Because RSS/Atom feeds are textual XML data this generally compresses very +well. +.It +The example below also sets the User-Agent to sfeed, because some CDNs block +HTTP clients based on the User-Agent request header. +.El +.Pp +Example: +.Bd -literal -offset 4n +mkdir -p "$HOME/.sfeed/etags" "$HOME/.sfeed/lastmod" + +# fetch(name, url, feedfile) +fetch() { + basename="$(basename "$3")" + etag="$HOME/.sfeed/etags/${basename}" + lastmod="$HOME/.sfeed/lastmod/${basename}" + output="${sfeedtmpdir}/feeds/${filename}.xml" + + curl \e + -f -s -m 15 \e + -L --max-redirs 0 \e + -H "User-Agent: sfeed" \e + --compressed \e + --etag-save "${etag}" --etag-compare "${etag}" \e + -R -o "${output}" \e + -z "${lastmod}" \e + "$2" 2>/dev/null || return 1 + + # succesful, but no file written: assume it is OK and Not Modified. + [ -e "${output}" ] || return 0 + + # use server timestamp from curl -R to set Last-Modified. + touch -r "${output}" "${lastmod}" 2>/dev/null + cat "${output}" 2>/dev/null + # use write output status, other errors are ignored here. + fetchstatus="$?" + rm -f "${output}" 2>/dev/null + return "${fetchstatus}" +} +.Ed +.Pp +The README file has more examples. .Sh SEE ALSO .Xr curl 1 , .Xr iconv 1 , diff --git a/sfeedrc.example b/sfeedrc.example @@ -1,3 +1,6 @@ +# for more details see the sfeedrc(5) and sfeed_update(1) man pages +# and the README file. + #sfeedpath="$HOME/.sfeed/feeds" # list of feeds to fetch: diff --git a/util.c b/util.c @@ -8,7 +8,7 @@ #include "util.h" /* print to stderr, print error message of errno and exit(). - Unlike BSD err() it does not prefix __progname */ + * Unlike BSD err() it does not prefix __progname */ __dead void err(int exitstatus, const char *fmt, ...) { @@ -29,7 +29,7 @@ err(int exitstatus, const char *fmt, ...) } /* print to stderr and exit(). - Unlike BSD errx() it does not prefix __progname */ + * Unlike BSD errx() it does not prefix __progname */ __dead void errx(int exitstatus, const char *fmt, ...) { @@ -89,7 +89,7 @@ uri_hasscheme(const char *s) } /* Parse URI string `s` into an uri structure `u`. - Returns 0 on success or -1 on failure */ + * Returns 0 on success or -1 on failure */ int uri_parse(const char *s, struct uri *u) { @@ -204,8 +204,8 @@ parsepath: } /* Transform and try to make the URI `u` absolute using base URI `b` into `a`. - Follows some of the logic from "RFC 3986 - 5.2.2. Transform References". - Returns 0 on success, -1 on error or truncation. */ + * Follows some of the logic from "RFC 3986 - 5.2.2. Transform References". + * Returns 0 on success, -1 on error or truncation. */ int uri_makeabs(struct uri *a, struct uri *u, struct uri *b) { @@ -311,13 +311,31 @@ strtotime(const char *s, time_t *t) return -1; /* NOTE: the type long long supports the 64-bit range. If time_t is - 64-bit it is "2038-ready", otherwise it is truncated/wrapped. */ + * 64-bit it is "2038-ready", otherwise it is truncated/wrapped. */ if (t) *t = (time_t)l; return 0; } +time_t +getcomparetime(void) +{ + time_t now, t; + char *p; + + if ((now = time(NULL)) == (time_t)-1) + return (time_t)-1; + + if ((p = getenv("SFEED_NEW_AGE"))) { + if (strtotime(p, &t) == -1) + return (time_t)-1; + return now - t; + } + + return now - 86400; /* 1 day is old news */ +} + /* Escape characters below as HTML 2.0 / XML 1.0. */ void xmlencode(const char *s, FILE *fp) diff --git a/util.h b/util.h @@ -57,7 +57,8 @@ enum { FieldLast }; -/* hint for compilers and static analyzers that a function exits */ +/* hint for compilers and static analyzers that a function does not return. + * some compilers use: __attribute__((noreturn)), _Noreturn, noreturn */ #ifndef __dead #define __dead #endif @@ -71,6 +72,7 @@ int uri_makeabs(struct uri *, struct uri *, struct uri *); int uri_parse(const char *, struct uri *); void checkfileerror(FILE *, const char *, int); +time_t getcomparetime(void); void parseline(char *, char *[FieldLast]); void printutf8pad(FILE *, const char *, size_t, int); int strtotime(const char *, time_t *); diff --git a/xml.c b/xml.c @@ -317,7 +317,7 @@ xml_parse(XMLParser *x) x->taglen = 1; x->isshorttag = isend = 0; - /* treat processing instruction as shorttag, don't strip "?" prefix. */ + /* treat processing instruction as short tag, don't strip "?" prefix. */ if (c == '?') { x->isshorttag = 1; } else if (c == '/') { @@ -333,6 +333,8 @@ xml_parse(XMLParser *x) else if (c == '>' || ISSPACE(c)) { x->tag[x->taglen] = '\0'; if (isend) { /* end tag, starts with </ */ + while (c != '>' && c != EOF) /* skip until > */ + c = GETNEXT(); if (x->xmltagend) x->xmltagend(x, x->tag, x->taglen, x->isshorttag); x->tag[0] = '\0'; @@ -346,7 +348,7 @@ xml_parse(XMLParser *x) if (x->xmltagstartparsed) x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag); } - /* call tagend for shortform or processing instruction */ + /* call tagend for short tag or processing instruction */ if (x->isshorttag) { if (x->xmltagend) x->xmltagend(x, x->tag, x->taglen, x->isshorttag); diff --git a/xml.h b/xml.h @@ -1,5 +1,5 @@ -#ifndef _XML_H_ -#define _XML_H_ +#ifndef XML_H +#define XML_H #include <stdio.h> @@ -23,14 +23,14 @@ typedef struct xmlparser { #ifndef GETNEXT /* GETNEXT overridden to reduce function call overhead and further - context optimizations. */ + * context optimizations. */ #define GETNEXT getchar_unlocked #endif /* current tag */ char tag[1024]; size_t taglen; - /* current tag is in shortform ? <tag /> */ + /* current tag is a short tag ? <tag /> */ int isshorttag; /* current attribute name */ char name[1024];