sfeed

simple feed reader - forked from git.codemadness.org/sfeed
git clone git://src.gearsix.net/sfeed
Log | Files | Refs | Atom | README | LICENSE

commit 875f4880bc520ca25b718cb7c0ede141f13aeab5
parent c712897c002590dda0aceca09d0a2d5a59313908
Author: gearsix <gearsix@tuta.io>
Date:   Tue, 21 Sep 2021 20:44:14 +0100

Merge branch 'gearsix' of https://notabug.org/gearsix/sfeed into gearsix

Diffstat:
MMakefile | 8++++++--
MREADME | 49+++++++++++++++++++++++++++++++++++--------------
MREADME.xml | 1-
Msfeed.1 | 45++++++++++++++++++++-------------------------
Msfeed.5 | 38++++++++++++++++++--------------------
Msfeed.c | 136++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Msfeed_atom.1 | 17++++++++++-------
Msfeed_atom.c | 5++---
Msfeed_frames.1 | 31++++++++++++++++++++-----------
Msfeed_frames.c | 10+++++-----
Msfeed_gopher.1 | 14++++++++------
Msfeed_gopher.c | 5++---
Msfeed_html.1 | 17++++++++++++-----
Msfeed_html.c | 8++++----
Msfeed_mbox.1 | 34+++++++++++++++++++++++++++-------
Msfeed_mbox.c | 86+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Msfeed_opml_export | 20+++++++++++---------
Msfeed_opml_import.c | 1-
Msfeed_plain.1 | 11++++++-----
Msfeed_plain.c | 2+-
Msfeed_read | 2+-
Msfeed_twtxt.1 | 20+++++++++++++++-----
Msfeed_twtxt.c | 6+++---
Msfeed_update | 12+++++++-----
Msfeed_update.1 | 22++++++++++++++++------
Msfeed_web.1 | 17+++++++++++------
Msfeed_web.c | 1-
Msfeed_xmlenc.1 | 6+++---
Msfeed_xmlenc.c | 1-
Msfeedrc.5 | 25+++++++++++++++++++++----
Mutil.c | 39+++++++++++++++++++++++++++++++++++++++
Mutil.h | 8++++++++
Mxml.h | 2+-
33 files changed, 461 insertions(+), 238 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,7 +1,7 @@ .POSIX: NAME = sfeed -VERSION = 0.9.22 +VERSION = 1.0 # paths PREFIX = /usr/local @@ -13,7 +13,11 @@ RANLIB = ranlib # use system flags. SFEED_CFLAGS = ${CFLAGS} SFEED_LDFLAGS = ${LDFLAGS} -SFEED_CPPFLAGS = -D_POSIX_C_SOURCE=200809L -D_XOPEN_SOURCE=700 -D_BSD_SOURCE +SFEED_CPPFLAGS = -D_DEFAULT_SOURCE -D_XOPEN_SOURCE=700 -D_BSD_SOURCE + +# uncomment for conservative locked I/O. +#SFEED_CPPFLAGS = -D_DEFAULT_SOURCE -D_XOPEN_SOURCE=700 -D_BSD_SOURCE \ +# -DGETNEXT=getchar BIN = \ sfeed\ diff --git a/README b/README @@ -9,6 +9,16 @@ other formats. There are also some programs and scripts included to import and export OPML and to fetch, filter, merge and order feed items. +gearsix changes +--------------- + +I've just added the sfeed_read script, which runs sfeed_updated and uses the +specified sfeed_* tool (defaults to html, specified with -e) to xdg-user-dir +DOCUMENTS (or ~/Documents if missing). +Output files are named after the current time. +Finally, it opens the generated output file using xdg-open. + + Build and install ----------------- @@ -117,7 +127,8 @@ OS tested - FreeBSD - DragonFlyBSD - Windows (cygwin gcc, mingw). -- HaikuOS (using libbsd). +- HaikuOS +- SerenityOS - FreeDOS (djgpp). - FUZIX (sdcc -mz80). @@ -308,7 +319,7 @@ advertisements, strip tracking parameters and more. $8 = filterlink($8); # enclosure # try to remove tracking pixels: <img/> tags with 1px width or height. - gsub("<img[^>]*(width|height)[\s]*=[\s]*[\"'"'"' ]?1[\"'"'"' ]?[^0-9>]+[^>]*>", "", $4); + gsub("<img[^>]*(width|height)[[:space:]]*=[[:space:]]*[\"'"'"' ]?1[\"'"'"' ]?[^0-9>]+[^>]*>", "", $4); print $0; }' @@ -379,6 +390,10 @@ enclosure URL (probably some audio file): } END { if (length(url)) { print url; } }' +... or on a file already sorted from newest to oldest: + + awk -F '\t' '$8 { print $8; exit }' + - - - Over time your feeds file might become quite big. You can archive items of a @@ -407,7 +422,7 @@ fdm config file (~/.sfeed/fdm.conf): cache "${cachepath}" $maildir = "%[home]/feeds/" - # Check if message is in in cache by Message-ID. + # Check if message is in the cache by Message-ID. match case "^Message-ID: (.*)" in headers action { tag "msgid" value "%1" @@ -451,7 +466,7 @@ fdm config file (~/.sfeed/fdm.conf): $cachepath = "%[home]/.sfeed/fdm.cache" cache "${cachepath}" - # Check if message is in in cache by Message-ID. + # Check if message is in the cache by Message-ID. match case "^Message-ID: (.*)" in headers action { tag "msgid" value "%1" @@ -683,7 +698,6 @@ TSV format. # # Usage: create some directory to store the feeds, run this script. # - # Assumes "html" for content-type (Newsboat only handles HTML content). # Assumes feednames are unique and a feed title is set. # newsboat cache.db file. @@ -698,11 +712,10 @@ TSV format. .mode ascii .output SELECT - i.pubDate, i.title, i.url, i.content, i.guid, i.author, - i.enclosure_url, - f.rssurl AS rssurl, f.title AS feedtitle, i.unread --, - -- i.id, i.enclosure_type, i.enqueued, i.flags, i.deleted, - -- i.base + i.pubDate, i.title, i.url, i.content, i.content_mime_type, + i.guid, i.author, i.enclosure_url, + f.rssurl AS rssurl, f.title AS feedtitle, i.unread + -- i.id, i.enclosure_type, i.enqueued, i.flags, i.deleted, i.base FROM rss_feed f INNER JOIN rss_item i ON i.feedurl = f.rssurl ORDER BY @@ -738,17 +751,25 @@ TSV format. return title; } { - fname = feedname($8, $9); + fname = feedname($9, $10); if (!feed[fname]++) { - print "Writing file: \"" fname "\" (title: " $9 ", url: " $8 ")" > "/dev/stderr"; + print "Writing file: \"" fname "\" (title: " $10 ", url: " $9 ")" > "/dev/stderr"; } + contenttype = field($5); + if (contenttype == "") + contenttype = "html"; + else if (index(contenttype, "/html") || index(contenttype, "/xhtml")) + contenttype = "html"; + else + contenttype = "plain"; + print $1 "\t" field($2) "\t" field($3) "\t" content($4) "\t" \ - "html" "\t" field($5) "\t" field($6) "\t" field($7) \ + contenttype "\t" field($6) "\t" field($7) "\t" field($8) "\t" \ > fname; # write URLs of the read items to a file line by line. - if ($10 == "0") { + if ($11 == "0") { print $3 > "urls"; } }' diff --git a/README.xml b/README.xml @@ -83,4 +83,3 @@ License ------- ISC, see LICENSE file. - diff --git a/sfeed.1 b/sfeed.1 @@ -1,4 +1,4 @@ -.Dd February 28, 2021 +.Dd July 29, 2021 .Dt SFEED 1 .Os .Sh NAME @@ -11,45 +11,42 @@ .Nm reads RSS or Atom feed data (XML) from stdin. It writes the feed data in a TAB-separated format to stdout. -A -.Ar baseurl -can be specified if the links or enclosures in the feed are relative URLs. If the .Ar baseurl -is a valid absolute URL then the relative links or enclosures will be -made absolute. +argument is a valid absolute URL then the relative links or enclosures will be +made an absolute URL. .Sh TAB-SEPARATED FORMAT FIELDS -The items are output per line in a TSV-like format. +The items are output per line in a TAB-separated format. .Pp -The fields: title, id, author are not allowed to have newlines and TABs, all -whitespace characters are replaced by a single space character. +For the fields title, id and author each whitespace character is replaced by a +SPACE character. Control characters are removed. .Pp -The content field can contain newlines and is escaped. +The content field can contain newlines and these are escaped. TABs, newlines and '\\' are escaped with '\\', so it becomes: '\\t', '\\n' and '\\\\'. Other whitespace characters except spaces are removed. Control characters are removed. .Pp The order and content of the fields are: -.Bl -tag -width 12n -.It timestamp +.Bl -tag -width 15n +.It 1. timestamp UNIX timestamp in UTC+0, empty if missing or on parse failure. -.It title +.It 2. title Title text, HTML code in titles is ignored and is treated as plain-text. -.It link +.It 3. link Link -.It content +.It 4. content Content, can have plain-text or HTML code depending on the content-type field. -.It content-type -"html" or "plain". -.It id +.It 5. content-type +"html" or "plain" if it has content. +.It 6. id RSS item GUID or Atom id. -.It author +.It 7. author Item author. -.It enclosure +.It 8. enclosure Item, first enclosure. -.It category +.It 9. category Item, categories, multiple values are separated by |. .El .Sh EXIT STATUS @@ -66,7 +63,5 @@ curl -s 'https://codemadness.org/atom.xml' | sfeed .Sh AUTHORS .An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org .Sh CAVEATS -If a timezone is not in the RFC-822 or RFC-3339 format it is not supported and -the UNIX timestamp is interpreted as UTC+0. -.Pp -HTML in titles is treated as plain-text. +If a timezone for the timestamp field is not in the RFC822 or RFC3339 format it +is not supported and the timezone is interpreted as UTC+0. diff --git a/sfeed.5 b/sfeed.5 @@ -1,4 +1,4 @@ -.Dd February 19, 2021 +.Dd July 29, 2021 .Dt SFEED 5 .Os .Sh NAME @@ -10,37 +10,37 @@ .Xr sfeed 1 writes the feed data in a TAB-separated format to stdout. .Sh TAB-SEPARATED FORMAT FIELDS -The items are output per line in a TSV-like format. +The items are output per line in a TAB-separated format. .Pp -The fields: title, id, author are not allowed to have newlines and TABs, all -whitespace characters are replaced by a single space character. +For the fields title, id and author each whitespace character is replaced by a +SPACE character. Control characters are removed. .Pp -The content field can contain newlines and is escaped. +The content field can contain newlines and these are escaped. TABs, newlines and '\\' are escaped with '\\', so it becomes: '\\t', '\\n' and '\\\\'. Other whitespace characters except spaces are removed. Control characters are removed. .Pp The order and content of the fields are: -.Bl -tag -width 12n -.It timestamp +.Bl -tag -width 15n +.It 1. timestamp UNIX timestamp in UTC+0, empty if missing or on parse failure. -.It title +.It 2. title Title text, HTML code in titles is ignored and is treated as plain-text. -.It link +.It 3. link Link -.It content +.It 4. content Content, can have plain-text or HTML code depending on the content-type field. -.It content-type -"html" or "plain". -.It id +.It 5. content-type +"html" or "plain" if it has content. +.It 6. id RSS item GUID or Atom id. -.It author +.It 7. author Item author. -.It enclosure +.It 8. enclosure Item, first enclosure. -.It category +.It 9. category Item, categories, multiple values are separated by |. .El .Sh SEE ALSO @@ -49,7 +49,5 @@ Item, categories, multiple values are separated by |. .Sh AUTHORS .An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org .Sh CAVEATS -If a timezone is not in the RFC-822 or RFC-3339 format it is not supported and -the UNIX timestamp is interpreted as UTC+0. -.Pp -HTML in titles is treated as plain-text. +If a timezone for the timestamp field is not in the RFC822 or RFC3339 format it +is not supported and the timezone is interpreted as UTC+0. diff --git a/sfeed.c b/sfeed.c @@ -1,7 +1,6 @@ #include <sys/types.h> #include <ctype.h> -#include <err.h> #include <errno.h> #include <stdint.h> #include <stdio.h> @@ -58,7 +57,8 @@ enum TagId { RSSTagAuthor, RSSTagDccreator, RSSTagCategory, /* Atom */ - AtomTagUpdated, AtomTagPublished, /* creation date has higher priority */ + /* creation date has higher priority */ + AtomTagModified, AtomTagUpdated, AtomTagIssued, AtomTagPublished, AtomTagTitle, AtomTagMediaDescription, AtomTagSummary, AtomTagContent, AtomTagId, @@ -151,9 +151,11 @@ static FeedTag atomtags[] = { { STRP("category"), AtomTagCategory }, { STRP("content"), AtomTagContent }, { STRP("id"), AtomTagId }, + { STRP("issued"), AtomTagIssued }, /* Atom 0.3 */ /* Atom: <link href="" />, RSS has <link></link> */ { STRP("link"), AtomTagLink }, { STRP("media:description"), AtomTagMediaDescription }, + { STRP("modified"), AtomTagModified }, /* Atom 0.3 */ { STRP("published"), AtomTagPublished }, { STRP("summary"), AtomTagSummary }, { STRP("title"), AtomTagTitle }, @@ -186,7 +188,9 @@ static int fieldmap[TagLast] = { [RSSTagDccreator] = FeedFieldAuthor, [RSSTagCategory] = FeedFieldCategory, /* Atom */ + [AtomTagModified] = FeedFieldTime, [AtomTagUpdated] = FeedFieldTime, + [AtomTagIssued] = FeedFieldTime, [AtomTagPublished] = FeedFieldTime, [AtomTagTitle] = FeedFieldTitle, [AtomTagMediaDescription] = FeedFieldContent, @@ -209,8 +213,7 @@ static const char *baseurl; static FeedContext ctx; static XMLParser parser; /* XML parser state */ -static String tmpstr; -static enum ContentType tmpcontenttype; /* content-type for item */ +static String attrispermalink, attrrel, attrtype, tmpstr; int tagcmp(const void *v1, const void *v2) @@ -333,7 +336,7 @@ string_print_encoded(String *s) } static void -printtrimmed(char *s) +printtrimmed(const char *s) { char *p, *e; @@ -382,7 +385,7 @@ string_print_trimmed_multi(String *s) } } -/* always print absolute URLs (using global baseurl) */ +/* print URL, if it's a relative URL then it uses global baseurl */ void printuri(char *s) { @@ -408,7 +411,7 @@ printuri(char *s) *e = c; /* restore NUL byte to original character */ } -/* always print absolute URLs (using global baseurl) */ +/* print URL, if it's a relative URL then it uses global baseurl */ void string_print_uri(String *s) { @@ -608,6 +611,9 @@ parsetime(const char *s, time_t *tp) ; for (v = 0, i = 0; i < 4 && isdigit((unsigned char)*s); s++, i++) v = (v * 10) + (*s - '0'); + /* obsolete short year: RFC2822 4.3 */ + if (i <= 3) + v += (v >= 0 && v <= 49) ? 2000 : 1900; va[0] = v; /* year */ for (; isspace((unsigned char)*s); s++) ; @@ -702,18 +708,8 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, /* content-type may be: Atom: text, xhtml, html or mime-type. MRSS (media:description): plain, html. */ if (ISCONTENTTAG(ctx)) { - if (isattr(n, nl, STRP("type"))) { - if (isattr(v, vl, STRP("html")) || - isattr(v, vl, STRP("xhtml")) || - isattr(v, vl, STRP("text/html")) || - isattr(v, vl, STRP("text/xhtml"))) { - tmpcontenttype = ContentTypeHTML; - } else if (isattr(v, vl, STRP("text")) || - isattr(v, vl, STRP("plain")) || - isattr(v, vl, STRP("text/plain"))) { - tmpcontenttype = ContentTypePlain; - } - } + if (isattr(n, nl, STRP("type"))) + string_append(&attrtype, v, vl); return; } @@ -721,30 +717,15 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, if (ctx.tag.id == RSSTagEnclosure && isattr(n, nl, STRP("url"))) { string_append(&tmpstr, v, vl); - } else if ((ctx.tag.id == RSSTagGuid || - ctx.tag.id == RSSTagGuidPermalinkFalse || - ctx.tag.id == RSSTagGuidPermalinkTrue) && + } else if (ctx.tag.id == RSSTagGuid && isattr(n, nl, STRP("ispermalink"))) { - if (isattr(v, vl, STRP("true"))) - ctx.tag.id = RSSTagGuidPermalinkTrue; - else - ctx.tag.id = RSSTagGuidPermalinkFalse; + string_append(&attrispermalink, v, vl); } } else if (ctx.feedtype == FeedTypeAtom) { - if (ctx.tag.id == AtomTagLink || - ctx.tag.id == AtomTagLinkAlternate || - ctx.tag.id == AtomTagLinkEnclosure) { + if (ctx.tag.id == AtomTagLink) { if (isattr(n, nl, STRP("rel"))) { - /* empty or "alternate": other types could be - "enclosure", "related", "self" or "via" */ - if (!vl || isattr(v, vl, STRP("alternate"))) - ctx.tag.id = AtomTagLinkAlternate; - else if (isattr(v, vl, STRP("enclosure"))) - ctx.tag.id = AtomTagLinkEnclosure; - else - ctx.tag.id = AtomTagLink; /* unknown */ - } else if (ctx.tag.id != AtomTagLink && - isattr(n, nl, STRP("href"))) { + string_append(&attrrel, v, vl); + } else if (isattr(n, nl, STRP("href"))) { string_append(&tmpstr, v, vl); } } else if (ctx.tag.id == AtomTagCategory && @@ -806,6 +787,18 @@ xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl) } return; } + + if (attrispermalink.len && isattr(n, nl, STRP("ispermalink"))) + string_clear(&attrispermalink); + else if (attrrel.len && isattr(n, nl, STRP("rel"))) + string_clear(&attrrel); + else if (attrtype.len && isattr(n, nl, STRP("type"))) + string_clear(&attrtype); + else if (tmpstr.len && + (isattr(n, nl, STRP("href")) || + isattr(n, nl, STRP("term")) || + isattr(n, nl, STRP("url")))) + string_clear(&tmpstr); /* use the last value for multiple attribute values */ } /* NOTE: this handler can be called multiple times if the data in this @@ -880,29 +873,10 @@ xmltagstart(XMLParser *p, const char *t, size_t tl) memcpy(&(ctx.tag), f, sizeof(ctx.tag)); } - switch (ctx.tag.id) { - case AtomTagLink: - /* without a rel attribute the default link type is "alternate" */ - ctx.tag.id = AtomTagLinkAlternate; - break; - case RSSTagGuid: - /* without a ispermalink attribute the default value is "true" */ - ctx.tag.id = RSSTagGuidPermalinkTrue; - break; - case RSSTagContentEncoded: - case RSSTagDescription: - tmpcontenttype = ContentTypeHTML; /* default content-type */ - break; - case RSSTagMediaDescription: - case AtomTagContent: - case AtomTagMediaDescription: - case AtomTagSummary: - tmpcontenttype = ContentTypePlain; /* default content-type */ - break; - default: - break; - } ctx.iscontenttag = (fieldmap[ctx.tag.id] == FeedFieldContent); + string_clear(&attrispermalink); + string_clear(&attrrel); + string_clear(&attrtype); } static void @@ -920,6 +894,25 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) return; } + /* set tag type based on it's attribute value */ + if (ctx.tag.id == RSSTagGuid) { + /* if empty the default is "true" */ + if (!attrispermalink.len || + isattr(attrispermalink.data, attrispermalink.len, STRP("true"))) + ctx.tag.id = RSSTagGuidPermalinkTrue; + else + ctx.tag.id = RSSTagGuidPermalinkFalse; + } else if (ctx.tag.id == AtomTagLink) { + /* empty or "alternate": other types could be + "enclosure", "related", "self" or "via" */ + if (!attrrel.len || isattr(attrrel.data, attrrel.len, STRP("alternate"))) + ctx.tag.id = AtomTagLinkAlternate; + else if (isattr(attrrel.data, attrrel.len, STRP("enclosure"))) + ctx.tag.id = AtomTagLinkEnclosure; + else + ctx.tag.id = AtomTagLink; /* unknown */ + } + tagid = ctx.tag.id; /* map tag type to field: unknown or lesser priority is ignored, @@ -933,7 +926,24 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) if (ctx.iscontenttag) { ctx.iscontent = 1; ctx.iscontenttag = 0; - ctx.contenttype = tmpcontenttype; + + /* detect content-type based on type attribute */ + if (attrtype.len) { + if (isattr(attrtype.data, attrtype.len, STRP("html")) || + isattr(attrtype.data, attrtype.len, STRP("xhtml")) || + isattr(attrtype.data, attrtype.len, STRP("text/html")) || + isattr(attrtype.data, attrtype.len, STRP("text/xhtml")) || + isattr(attrtype.data, attrtype.len, STRP("application/xhtml+xml"))) + ctx.contenttype = ContentTypeHTML; + else /* unknown: handle as base64 text data */ + ctx.contenttype = ContentTypePlain; + } else { + /* default content-type */ + if (tagid == RSSTagContentEncoded || tagid == RSSTagDescription) + ctx.contenttype = ContentTypeHTML; + else + ctx.contenttype = ContentTypePlain; + } } ctx.field = &(ctx.fields[fieldmap[tagid]].str); @@ -966,7 +976,7 @@ xmltagend(XMLParser *p, const char *t, size_t tl, int isshort) } else if (ctx.tag.id && istag(ctx.tag.name, ctx.tag.len, t, tl)) { /* matched tag end: close it */ /* copy also to the link field if the attribute isPermaLink="true" - and it is not set by a tag with higher prio. */ + and it is not set by a tag with higher prio. */ if (ctx.tag.id == RSSTagGuidPermalinkTrue && ctx.field && ctx.tag.id > ctx.fields[FeedFieldLink].tagid) { string_clear(&ctx.fields[FeedFieldLink].str); diff --git a/sfeed_atom.1 b/sfeed_atom.1 @@ -1,4 +1,4 @@ -.Dd March 15, 2020 +.Dd July 31, 2021 .Dt SFEED_ATOM 1 .Os .Sh NAME @@ -6,32 +6,35 @@ .Nd format feed data to an Atom feed .Sh SYNOPSIS .Nm -.Op Ar file... +.Op Ar .Sh DESCRIPTION .Nm formats feed data (TSV) from .Xr sfeed 1 -from stdin or +from stdin or for each .Ar file to stdout as an Atom (XML) feed. If one or more .Ar file -are specified, the basename of the +arguments are specified then the basename of the .Ar file is used as the feed name in the output. If no .Ar file -parameters are specified and so the data is read from stdin the feed name +arguments are specified and so the data is read from stdin then the feed name is empty. .Pp If .Nm is reading from one or more .Ar file -it will prefix the entry title with the feed name which is the basename of the -input file. +arguments it will prefix the entry title with "[feed name] ". .Sh EXIT STATUS .Ex -std +.Sh EXAMPLES +.Bd -literal +sfeed_atom ~/.sfeed/feeds/* +.Ed .Sh SEE ALSO .Xr sfeed 1 , .Xr sfeed_plain 1 , diff --git a/sfeed_atom.c b/sfeed_atom.c @@ -1,6 +1,5 @@ #include <sys/types.h> -#include <err.h> #include <stdio.h> #include <string.h> #include <time.h> @@ -39,7 +38,7 @@ static void printfeed(FILE *fp, const char *feedname) { char *fields[FieldLast]; - struct tm *tm; + struct tm parsedtm, *tm; time_t parsedtime; ssize_t linelen; @@ -76,7 +75,7 @@ printfeed(FILE *fp, const char *feedname) parsedtime = 0; if (strtotime(fields[FieldUnixTimestamp], &parsedtime) || - !(tm = gmtime(&parsedtime))) + !(tm = gmtime_r(&parsedtime, &parsedtm))) tm = &tmnow; fprintf(stdout, "\t<updated>%04d-%02d-%02dT%02d:%02d:%02dZ</updated>\n", tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, diff --git a/sfeed_frames.1 b/sfeed_frames.1 @@ -1,4 +1,4 @@ -.Dd March 15, 2020 +.Dd July 31, 2021 .Dt SFEED_FRAMES 1 .Os .Sh NAME @@ -6,34 +6,43 @@ .Nd format feed data to HTML with frames .Sh SYNOPSIS .Nm -.Op Ar file... +.Op Ar .Sh DESCRIPTION .Nm formats feed data (TSV) from .Xr sfeed 1 -to HTML. -It reads TSV data from stdin or +from stdin or for each .Ar file -and writes HTML files for the frameset to the current directory. +to HTML. +It writes HTML files for the frameset to the current directory. If no .Ar file -parameters are specified and therefore the data is read from stdin then the -menu.html file is not written. +arguments are specified and so the data is read from stdin then the menu.html +file is not written. .Pp Items with a timestamp from the last day compared to the system time at the time of formatting are counted and marked as new. +Items are marked as new using a bold markup. +.Pp +There is an example style.css stylesheet file included in the distribution. .Sh FILES WRITTEN .Bl -tag -width 13n .It index.html -The main HTML file referencing to the frames items.html and menu.html. +The main HTML file referencing the files for the frames: items.html and +menu.html. .It items.html -The items frame contains all the item HTML links to the remote content. +The HTML file of the items frame which contains all the item links to the +feeds. .It menu.html -The menu frame which contains navigation "anchor" links to the feed names in -items.html. +The HTML file of the menu frame which contains navigation "anchor" links (like +"#feedname") to the feed names in items.html. .El .Sh EXIT STATUS .Ex -std +.Sh EXAMPLES +.Bd -literal +sfeed_frames ~/.sfeed/feeds/* +.Ed .Sh SEE ALSO .Xr sfeed 1 , .Xr sfeed_html 1 , diff --git a/sfeed_frames.c b/sfeed_frames.c @@ -1,6 +1,5 @@ #include <sys/types.h> -#include <err.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -12,7 +11,7 @@ static struct feed *feeds; static char *line; static size_t linesize; static time_t comparetime; -static unsigned long totalnew; +static unsigned long totalnew, total; static void printfeed(FILE *fpitems, FILE *fpin, struct feed *f) @@ -54,6 +53,7 @@ printfeed(FILE *fpitems, FILE *fpin, struct feed *f) fputs(" ", fpitems); } f->total++; + total++; if (fields[FieldLink][0]) { fputs("<a href=\"", fpitems); @@ -159,9 +159,9 @@ main(int argc, char *argv[]) fputs("<!DOCTYPE html>\n<html>\n<head>\n" "\t<meta name=\"referrer\" content=\"no-referrer\" />\n" "\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n" - "\t<title>Newsfeed (", fpindex); - fprintf(fpindex, "%lu", totalnew); - fputs(")</title>\n\t<link rel=\"stylesheet\" type=\"text/css\" href=\"style.css\" />\n" + "\t<title>(", fpindex); + fprintf(fpindex, "%lu/%lu", totalnew, total); + fputs(") - Newsfeed</title>\n\t<link rel=\"stylesheet\" type=\"text/css\" href=\"style.css\" />\n" "</head>\n", fpindex); if (showsidebar) { fputs("<frameset framespacing=\"0\" cols=\"250,*\" frameborder=\"1\">\n" diff --git a/sfeed_gopher.1 b/sfeed_gopher.1 @@ -1,4 +1,4 @@ -.Dd January 3, 2021 +.Dd July 31, 2021 .Dt SFEED_GOPHER 1 .Os .Sh NAME @@ -6,31 +6,33 @@ .Nd format feed data to Gopher files .Sh SYNOPSIS .Nm -.Op Ar file... +.Op Ar .Sh DESCRIPTION .Nm formats feed data (TSV) from .Xr sfeed 1 -from stdin or +from stdin or for each .Ar file to stdout in the raw Gopher output format. .Pp If one or more .Ar file -are specified, the basename of the +arguments are specified then the basename of the .Ar file is used as the feed name in the output. .Nm creates an index file named "index" and for each feed it creates a file with -the same name as the feedname. +the same name as the basename of the feed +.Ar file . .Pp If no .Ar file -parameters are specified and the data is read from stdin then the data is +arguments are specified and so the data is read from stdin then the data is written to stdout and no files are written. .Pp Items with a timestamp from the last day compared to the system time at the time of formatting are counted and marked as new. +Items are marked as new with the prefix "N". .Sh ENVIRONMENT .Bl -tag -width Ds .It Ev SFEED_GOPHER_PATH diff --git a/sfeed_gopher.c b/sfeed_gopher.c @@ -1,6 +1,5 @@ #include <sys/types.h> -#include <err.h> #include <limits.h> #include <stdio.h> #include <stdlib.h> @@ -131,9 +130,9 @@ main(int argc, char *argv[]) err(1, "pledge"); } else { if (unveil("/", "r") == -1) - err(1, "unveil"); + err(1, "unveil: /"); if (unveil(".", "rwc") == -1) - err(1, "unveil"); + err(1, "unveil: ."); if (pledge("stdio rpath wpath cpath", NULL) == -1) err(1, "pledge"); } diff --git a/sfeed_html.1 b/sfeed_html.1 @@ -1,4 +1,4 @@ -.Dd March 15, 2020 +.Dd July 31, 2021 .Dt SFEED_HTML 1 .Os .Sh NAME @@ -6,28 +6,35 @@ .Nd format feed data to HTML .Sh SYNOPSIS .Nm -.Op Ar file... +.Op Ar .Sh DESCRIPTION .Nm formats feed data (TSV) from .Xr sfeed 1 -from stdin or +from stdin or for each .Ar file to stdout in HTML. If one or more .Ar file -are specified, the basename of the +arguments are specified then the basename of the .Ar file is used as the feed name in the output. If no .Ar file -parameters are specified and so the data is read from stdin the feed name +arguments are specified and so the data is read from stdin then the feed name is empty. .Pp Items with a timestamp from the last day compared to the system time at the time of formatting are counted and marked as new. +Items are marked as new using a bold markup. +.Pp +There is an example style.css stylesheet file included in the distribution. .Sh EXIT STATUS .Ex -std +.Sh EXAMPLES +.Bd -literal +sfeed_html ~/.sfeed/feeds/* +.Ed .Sh SEE ALSO .Xr sfeed 1 , .Xr sfeed_frames 1 , diff --git a/sfeed_html.c b/sfeed_html.c @@ -1,6 +1,5 @@ #include <sys/types.h> -#include <err.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -12,7 +11,7 @@ static struct feed *feeds; static int showsidebar; static char *line; static size_t linesize; -static unsigned long totalnew; +static unsigned long totalnew, total; static time_t comparetime; static void @@ -55,6 +54,7 @@ printfeed(FILE *fp, struct feed *f) fputs(" ", stdout); } f->total++; + total++; if (fields[FieldLink][0]) { fputs("<a href=\"", stdout); @@ -147,8 +147,8 @@ main(int argc, char *argv[]) fputs("\t\t</ul>\n\t</div>\n", stdout); } - fprintf(stdout, "\t</body>\n\t<title>Newsfeed (%lu)</title>\n</html>\n", - totalnew); + fprintf(stdout, "\t</body>\n\t<title>(%lu/%lu) - Newsfeed</title>\n</html>\n", + totalnew, total); return 0; } diff --git a/sfeed_mbox.1 b/sfeed_mbox.1 @@ -1,4 +1,4 @@ -.Dd March 15, 2020 +.Dd August 4, 2021 .Dt SFEED_MBOX 1 .Os .Sh NAME @@ -6,37 +6,57 @@ .Nd format feed data to mboxrd .Sh SYNOPSIS .Nm -.Op Ar file... +.Op Ar .Sh DESCRIPTION .Nm formats feed data (TSV) from .Xr sfeed 1 -from stdin or +from stdin or for each .Ar file to stdout in the mboxrd format. If one or more .Ar file -are specified, the basename of the +arguments are specified then the basename of the .Ar file is used as the feed name in the output. If no .Ar file -parameters are specified and so the data is read from stdin the feed name +arguments are specified and so the data is read from stdin then the feed name is empty. The mbox data can be further processed by tools like .Xr procmail 1 or .Xr fdm 1 for example. -See the README file for some useful examples. +See the README file for some examples. .Sh CUSTOM HEADERS To make further filtering simpler some custom headers are set: .Bl -tag -width Ds .It X-Feedname -The feedname (as set in sfeedrc). +The feed name, this is the basename of the feed +.Ar file . +.El +.Sh ENVIRONMENT VARIABLES +.Bl -tag -width Ds +.It Ev SFEED_MBOX_CONTENT +Include the content. +This can be insecure for some of the mail clients that interpret HTML code in +an unsafe way. +By default this is set to "0". .El .Sh EXIT STATUS .Ex -std +.Sh EXAMPLES +.Bd -literal +sfeed_mbox ~/.sfeed/feeds/* +.Ed +.Pp +To include the content. +This can be insecure for some of the mail clients that interpret HTML code in +an unsafe way: +.Bd -literal +SFEED_MBOX_CONTENT=1 sfeed_mbox ~/.sfeed/feeds/* +.Ed .Sh SEE ALSO .Xr fdm 1 , .Xr procmail 1 , diff --git a/sfeed_mbox.c b/sfeed_mbox.c @@ -1,4 +1,3 @@ -#include <err.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -10,6 +9,7 @@ static char *line; static size_t linesize; static char host[256], *user, dtimebuf[32], mtimebuf[32]; +static int usecontent = 0; /* env variable: $SFEED_MBOX_CONTENT */ static unsigned long djb2(unsigned char *s, unsigned long hash) @@ -21,14 +21,47 @@ djb2(unsigned char *s, unsigned long hash) return hash; } +/* Unescape / decode fields printed by string_print_encoded() + * "\\" to "\", "\t", to TAB, "\n" to newline. Unrecognised escape sequences + * are ignored: "\z" etc. Mangle "From " in mboxrd style (always prefix >). */ +static void +printcontent(const char *s, FILE *fp) +{ +escapefrom: + for (; *s == '>'; s++) + fputc('>', fp); + /* escape "From ", mboxrd-style. */ + if (!strncmp(s, "From ", 5)) + fputc('>', fp); + + for (; *s; s++) { + switch (*s) { + case '\\': + s++; + switch (*s) { + case 'n': + fputc('\n', fp); + s++; + goto escapefrom; + case '\\': fputc('\\', fp); break; + case 't': fputc('\t', fp); break; + } + break; + default: + fputc(*s, fp); break; + } + } +} + static void printfeed(FILE *fp, const char *feedname) { char *fields[FieldLast], timebuf[32]; - struct tm *tm; + struct tm parsedtm, *tm; time_t parsedtime; unsigned long hash; ssize_t linelen; + int ishtml; while ((linelen = getline(&line, &linesize, fp)) > 0) { if (line[linelen - 1] == '\n') @@ -41,7 +74,7 @@ printfeed(FILE *fp, const char *feedname) parsedtime = 0; if (!strtotime(fields[FieldUnixTimestamp], &parsedtime) && - (tm = gmtime(&parsedtime)) && + (tm = gmtime_r(&parsedtime, &parsedtm)) && strftime(timebuf, sizeof(timebuf), "%a, %d %b %Y %H:%M:%S +0000", tm)) { printf("Date: %s\n", timebuf); } else { @@ -55,14 +88,44 @@ printfeed(FILE *fp, const char *feedname) fields[FieldUnixTimestamp], fields[FieldUnixTimestamp][0] ? "." : "", hash, feedname); - printf("Content-Type: text/plain; charset=\"utf-8\"\n"); - printf("Content-Transfer-Encoding: binary\n"); - printf("X-Feedname: %s\n\n", feedname); - printf("%s\n", fields[FieldLink]); - if (fields[FieldEnclosure][0]) - printf("\nEnclosure:\n%s\n", fields[FieldEnclosure]); + ishtml = usecontent && !strcmp(fields[FieldContentType], "html"); + if (ishtml) + fputs("Content-Type: text/html; charset=\"utf-8\"\n", stdout); + else + fputs("Content-Type: text/plain; charset=\"utf-8\"\n", stdout); + fputs("Content-Transfer-Encoding: binary\n", stdout); + printf("X-Feedname: %s\n", feedname); fputs("\n", stdout); + + if (ishtml) { + fputs("<p>\n", stdout); + if (fields[FieldLink][0]) { + fputs("Link: <a href=\"", stdout); + xmlencode(fields[FieldLink], stdout); + fputs("\">", stdout); + fputs(fields[FieldLink], stdout); + fputs("</a><br/>\n", stdout); + } + if (fields[FieldEnclosure][0]) { + fputs("Enclosure: <a href=\"", stdout); + xmlencode(fields[FieldEnclosure], stdout); + fputs("\">", stdout); + fputs(fields[FieldEnclosure], stdout); + fputs("</a><br/>\n", stdout); + } + fputs("</p>\n", stdout); + } else { + if (fields[FieldLink][0]) + printf("Link: %s\n", fields[FieldLink]); + if (fields[FieldEnclosure][0]) + printf("Enclosure: %s\n", fields[FieldEnclosure]); + } + if (usecontent) { + fputs("\n", stdout); + printcontent(fields[FieldContent], stdout); + } + fputs("\n\n", stdout); } } @@ -72,12 +135,14 @@ main(int argc, char *argv[]) struct tm tmnow; time_t now; FILE *fp; - char *name; + char *name, *tmp; int i; if (pledge(argc == 1 ? "stdio" : "stdio rpath", NULL) == -1) err(1, "pledge"); + if ((tmp = getenv("SFEED_MBOX_CONTENT"))) + usecontent = !strcmp(tmp, "1"); if (!(user = getenv("USER"))) user = "you"; if (gethostname(host, sizeof(host)) == -1) @@ -104,5 +169,6 @@ main(int argc, char *argv[]) fclose(fp); } } + return 0; } diff --git a/sfeed_opml_export b/sfeed_opml_export @@ -4,20 +4,22 @@ # loadconfig(configfile) loadconfig() { # allow to specify config via argv[1]. - if [ ! x"$1" = x"" ]; then - # get absolute path of config file. - config=$(readlink -f "$1") + if [ "$1" != "" ]; then + # get absolute path of config file required for including. + config="$1" + path=$(readlink -f "${config}" 2>/dev/null) else # default config location. config="$HOME/.sfeed/sfeedrc" + path="${config}" fi - # load config: config is loaded here to be able to override above variables - # (sfeedpath, sfeedfile, etc). - if [ -r "$config" ]; then - . "$config" + # config is loaded here to be able to override $sfeedpath or functions. + if [ -r "${path}" ]; then + . "${path}" else - echo "Configuration file \"$config\" does not exist or is not readable." >&2 + echo "Configuration file \"${config}\" cannot be read." >&2 + echo "See sfeedrc.example for an example." >&2 exit 1 fi } @@ -36,7 +38,7 @@ cat <<! <?xml version="1.0" encoding="UTF-8"?> <opml version="1.0"> <head> - <title>opml export from sfeed</title> + <title>OPML export from sfeed</title> </head> <body> ! diff --git a/sfeed_opml_import.c b/sfeed_opml_import.c @@ -1,5 +1,4 @@ #include <ctype.h> -#include <err.h> #include <stdio.h> #include <strings.h> diff --git a/sfeed_plain.1 b/sfeed_plain.1 @@ -1,4 +1,4 @@ -.Dd January 1, 2021 +.Dd July 25, 2021 .Dt SFEED_PLAIN 1 .Os .Sh NAME @@ -6,26 +6,27 @@ .Nd format feed data to a plain-text list .Sh SYNOPSIS .Nm -.Op Ar file... +.Op Ar .Sh DESCRIPTION .Nm formats feed data (TSV) from .Xr sfeed 1 -from stdin or +from stdin or for each .Ar file to stdout as a plain-text list. If one or more .Ar file -are specified, the basename of the +arguments are specified then the basename of the .Ar file is used as the feed name in the output. If no .Ar file -parameters are specified and so the data is read from stdin the feed name +arguments are specified and so the data is read from stdin then the feed name is empty. .Pp Items with a timestamp from the last day compared to the system time at the time of formatting are marked as new. +Items are marked as new with the prefix "N". .Pp .Nm aligns the output. diff --git a/sfeed_plain.c b/sfeed_plain.c @@ -1,6 +1,5 @@ #include <sys/types.h> -#include <err.h> #include <locale.h> #include <stdio.h> #include <string.h> @@ -81,5 +80,6 @@ main(int argc, char *argv[]) fclose(fp); } } + return 0; } diff --git a/sfeed_read b/sfeed_read @@ -2,7 +2,7 @@ # sfeed_read # author: gearsix -# description: sfeed_update; generate $FOUT && xdg-open $FOUT +# description: sfeed_update; generate $FOUT && xdg-open $FEEDS (xdg-user-dir DOCUMENTS or ~/Documents) # usage: "sfeed_read [ENGINE]" ENGINE (optional) sets the sfeed_X tool to use (default is sfeed_html) TIMESTAMP=$(date +%F) diff --git a/sfeed_twtxt.1 b/sfeed_twtxt.1 @@ -1,4 +1,4 @@ -.Dd March 15, 2020 +.Dd July 31, 2021 .Dt SFEED_TWTXT 1 .Os .Sh NAME @@ -6,25 +6,35 @@ .Nd format feed data to a twtxt feed .Sh SYNOPSIS .Nm -.Op Ar file... +.Op Ar .Sh DESCRIPTION .Nm formats feed data (TSV) from .Xr sfeed 1 -from stdin or +from stdin or for each .Ar file to stdout as a twtxt feed. If one or more .Ar file -are specified, the basename of the +arguments are specified then the basename of the .Ar file is used as the feed name in the output. If no .Ar file -parameters are specified and so the data is read from stdin the feed name +arguments are specified and so the data is read from stdin then the feed name is empty. +.Pp +If +.Nm +is reading from one or more +.Ar file +arguments it will prefix the entry title with "[feed name] ". .Sh EXIT STATUS .Ex -std +.Sh EXAMPLES +.Bd -literal +curl -s 'https://codemadness.org/atom.xml' | sfeed | sfeed_twtxt +.Ed .Sh SEE ALSO .Xr sfeed 1 , .Xr sfeed_plain 1 , diff --git a/sfeed_twtxt.c b/sfeed_twtxt.c @@ -1,6 +1,5 @@ #include <sys/types.h> -#include <err.h> #include <stdio.h> #include <string.h> #include <time.h> @@ -14,7 +13,7 @@ static void printfeed(FILE *fp, const char *feedname) { char *fields[FieldLast]; - struct tm *tm; + struct tm parsedtm, *tm; time_t parsedtime; ssize_t linelen; @@ -25,7 +24,7 @@ printfeed(FILE *fp, const char *feedname) parsedtime = 0; if (!strtotime(fields[FieldUnixTimestamp], &parsedtime) && - (tm = gmtime(&parsedtime))) { + (tm = gmtime_r(&parsedtime, &parsedtm))) { fprintf(stdout, "%04d-%02d-%02dT%02d:%02d:%02dZ\t", tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec); @@ -66,5 +65,6 @@ main(int argc, char *argv[]) fclose(fp); } } + return 0; } diff --git a/sfeed_update b/sfeed_update @@ -14,18 +14,20 @@ maxjobs=8 loadconfig() { # allow to specify config via argv[1]. if [ "$1" != "" ]; then - # get absolute path of config file. - config=$(readlink -f "$1" 2>/dev/null) + # get absolute path of config file required for including. + config="$1" + path=$(readlink -f "${config}" 2>/dev/null) else # default config location. config="$HOME/.sfeed/sfeedrc" + path="${config}" fi # config is loaded here to be able to override $sfeedpath or functions. - if [ -r "${config}" ]; then - . "${config}" + if [ -r "${path}" ]; then + . "${path}" else - echo "Configuration file \"$1\" does not exist or is not readable." >&2 + echo "Configuration file \"${config}\" cannot be read." >&2 echo "See sfeedrc.example for an example." >&2 exit 1 fi diff --git a/sfeed_update.1 b/sfeed_update.1 @@ -1,4 +1,4 @@ -.Dd February 5, 2021 +.Dd August 3, 2021 .Dt SFEED_UPDATE 1 .Os .Sh NAME @@ -11,7 +11,7 @@ .Nm writes TAB-separated feed files and merges new items with the items in any existing files. -The items are stored in one file per feed in the directory +The items are stored as one file per feed in the directory .Pa $HOME/.sfeed/feeds by default. The directory can be changed in the @@ -42,7 +42,17 @@ format containing all items per feed. The .Nm script merges new items with this file. -The feedname cannot contain '/' characters, they will be replaced with '_'. +The feed name cannot contain the '/' character because it is a path separator, +they will be replaced with '_'. +.El +.Sh ENVIRONMENT VARIABLES +.Bl -tag -width Ds +.It SFEED_UPDATE_INCLUDE +When set to "1" +.Nm +can be sourced as a script, but it won't run the +.Fn main +entry-point. .El .Sh ENVIRONMENT VARIABLES .Bl -tag -width Ds @@ -61,11 +71,11 @@ To update your feeds and format them in various formats: # Update sfeed_update "configfile" # Plain-text list -sfeed_plain $HOME/.sfeed/feeds/* > $HOME/.sfeed/feeds.txt +sfeed_plain ~/.sfeed/feeds/* > ~/.sfeed/feeds.txt # HTML -sfeed_html $HOME/.sfeed/feeds/* > $HOME/.sfeed/feeds.html +sfeed_html ~/.sfeed/feeds/* > ~/.sfeed/feeds.html # HTML with frames -mkdir -p somedir && cd somedir && sfeed_frames $HOME/.sfeed/feeds/* +mkdir -p somedir && cd somedir && sfeed_frames ~/.sfeed/feeds/* .Ed .Sh SEE ALSO .Xr sfeed 1 , diff --git a/sfeed_web.1 b/sfeed_web.1 @@ -1,4 +1,4 @@ -.Dd March 12, 2021 +.Dd July 27, 2021 .Dt SFEED_WEB 1 .Os .Sh NAME @@ -9,8 +9,13 @@ .Op Ar baseurl .Sh DESCRIPTION .Nm -reads the HTML website as XML or HTML data from stdin and writes the found -URLs to stdout. +reads the HTML data of the webpage from stdin and writes the found URLs to +stdout. +.Pp +Such a link reference in HTML code looks like: +.Bd -literal + <link rel="alternate" href="atom.xml" type="application/atom+xml" /> +.Ed .Sh OPTIONS .Bl -tag -width 8n .It Ar baseurl @@ -19,7 +24,7 @@ Optional base URL to use for found feed URLs that are relative. .Sh OUTPUT FORMAT url<TAB>content-type<newline> .Bl -tag -width Ds -.It url +.It URL Found relative or absolute URL. .Pp For relative URLs if a <base href="..." /> tag is found it will be used, @@ -33,9 +38,9 @@ Usually application/atom+xml or application/rss+xml. .Sh EXIT STATUS .Ex -std .Sh EXAMPLES -Get URLs from xkcd website: +Get URLs from a website: .Bd -literal -curl -s -L 'http://www.xkcd.com/' | sfeed_web 'http://www.xkcd.com/' +curl -s -L 'https://codemadness.org/' | sfeed_web 'https://codemadness.org/' .Ed .Sh SEE ALSO .Xr sfeed_update 1 , diff --git a/sfeed_web.c b/sfeed_web.c @@ -1,5 +1,4 @@ #include <ctype.h> -#include <err.h> #include <stdio.h> #include <strings.h> diff --git a/sfeed_xmlenc.1 b/sfeed_xmlenc.1 @@ -1,4 +1,4 @@ -.Dd March 12, 2021 +.Dd July 25, 2021 .Dt SFEED_XMLENC 1 .Os .Sh NAME @@ -14,9 +14,9 @@ which are not allowed in an encoding-name. .Sh EXIT STATUS .Ex -std .Sh EXAMPLES -Get text-encoding from xkcd Atom feed: +Get text-encoding from an Atom feed: .Bd -literal -curl -s -L http://www.xkcd.com/atom.xml | sfeed_xmlenc +curl -s -L 'https://codemadness.org/atom.xml' | sfeed_xmlenc .Ed .Sh SEE ALSO .Xr sfeed_update 1 , diff --git a/sfeed_xmlenc.c b/sfeed_xmlenc.c @@ -1,5 +1,4 @@ #include <ctype.h> -#include <err.h> #include <stdio.h> #include <stdlib.h> #include <strings.h> diff --git a/sfeedrc.5 b/sfeedrc.5 @@ -1,4 +1,4 @@ -.Dd January 26, 2021 +.Dd August 5, 2021 .Dt SFEEDRC 5 .Os .Sh NAME @@ -35,11 +35,12 @@ function, its arguments are: .It Fa name Name of the feed, this is also used as the filename for the TAB-separated feed file. -The feedname cannot contain '/' characters, they will be replaced with '_'. +The feed name cannot contain the '/' character because it is a path separator, +they will be replaced with '_'. .It Fa feedurl URL to fetch the RSS/Atom data from, usually a HTTP or HTTPS URL. .It Op Fa basesiteurl -Baseurl of the feed links. +Base URL of the feed links. This argument allows to fix relative item links. .Pp According to the RSS and Atom specification feeds should always have absolute @@ -96,7 +97,7 @@ Name of the feed. .It Fa feedurl URL of the feed. .It Fa basesiteurl -Baseurl of the feed links. +Base URL of the feed links. This argument allows to fix relative item links. .El .It Fn filter "name" @@ -147,6 +148,22 @@ feeds() { feed "xkcd" "https://xkcd.com/atom.xml" "https://xkcd.com" } .Ed +.Pp +To change the default +.Xr curl 1 +options for fetching the data, the +.Fn fetch +function can be overridden and added at the top of the +.Nm +file: +.Bd -literal +# fetch(name, url, feedfile) +fetch() { + # allow for 1 redirect, hide User-Agent, timeout is 15 seconds. + curl -L --max-redirs 1 -H "User-Agent:" -f -s -m 15 \\ + "$2" 2>/dev/null +} +.Ed .Sh SEE ALSO .Xr curl 1 , .Xr iconv 1 , diff --git a/util.c b/util.c @@ -1,5 +1,6 @@ #include <ctype.h> #include <errno.h> +#include <stdarg.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -7,6 +8,44 @@ #include "util.h" +/* print to stderr, print error message of errno and exit(). + Unlike BSD err() it does not prefix __progname */ +__dead void +err(int exitstatus, const char *fmt, ...) +{ + va_list ap; + int saved_errno; + + saved_errno = errno; + + if (fmt) { + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputs(": ", stderr); + } + fprintf(stderr, "%s\n", strerror(saved_errno)); + + exit(exitstatus); +} + +/* print to stderr and exit(). + Unlike BSD errx() it does not prefix __progname */ +__dead void +errx(int exitstatus, const char *fmt, ...) +{ + va_list ap; + + if (fmt) { + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + } + fputs("\n", stderr); + + exit(exitstatus); +} + /* check if string has a non-empty scheme / protocol part */ int uri_hasscheme(const char *s) diff --git a/util.h b/util.h @@ -38,6 +38,14 @@ enum { FieldLast }; +/* hint for compilers and static analyzers that a function exits */ +#ifndef __dead +#define __dead +#endif + +__dead void err(int, const char *, ...); +__dead void errx(int, const char *, ...); + int uri_format(char *, size_t, struct uri *); int uri_hasscheme(const char *); int uri_makeabs(struct uri *, struct uri *, struct uri *); diff --git a/xml.h b/xml.h @@ -24,7 +24,7 @@ typedef struct xmlparser { #ifndef GETNEXT /* GETNEXT overridden to reduce function call overhead and further context optimizations. */ - #define GETNEXT getchar + #define GETNEXT getchar_unlocked #endif /* current tag */