commit a1d56564fdf8aa700468fb9feebe9cb05e4ab584
parent 356e7d79925f91b9b703ee63e3680694c53a59a4
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Fri, 31 Jul 2015 21:54:36 +0200
update and improve documentation (WIP)
Diffstat:
9 files changed, 132 insertions(+), 109 deletions(-)
diff --git a/README b/README
@@ -23,10 +23,7 @@ Optional dependencies
used by sfeed_update. If the text in your RSS/Atom feeds are already UTF-8
encoded then you don't need this. For an alternative minimal iconv
implementation: http://git.etalabs.net/cgit/noxcuse/tree/src/iconv.c
-- mandoc for documentation: http://mdocml.bsd.lv/ . If your host
- system doesn't have mandoc you can copy the legacy man-pages from doc/man
- to your $MANPATH. For the most up-to-date documentation you can convert
- the pages to the legacy format with mandoc -Tman (make doc-oldman).
+- mandoc for documentation: http://mdocml.bsd.lv/ .
Platforms tested
@@ -42,11 +39,12 @@ Files
sfeed - Binary (from sfeed.c); read XML RSS or Atom feed data from
stdin. Write feed data in tab-separated format to stdout.
-sfeed_html - Format feeds file (TSV) from sfeed_update to HTML.
-sfeed_frames - Format feeds as a HTML file with frames.
+sfeed_html - Format feeds file (TSV) to HTML.
+sfeed_frames - Format feeds file (TSV) to HTML file(s) with frames.
+sfeed_mbox - Format feeds file (TSV) to mbox.
sfeed_opml_import - Generate a sfeedrc config file based on an opml file.
sfeed_opml_export - Generate an opml file based on a sfeedrc config file.
-sfeed_plain - Format feeds file (TSV) from sfeed_update to plain text.
+sfeed_plain - Format feeds file (TSV) to a plain-text list.
sfeed_update - Shellscript; update feeds and merge with old feeds in the
file $HOME/.sfeed/feeds by default.
sfeed_web - Find urls to RSS/Atom feed from a webpage.
@@ -80,13 +78,13 @@ TAB-separated format
The items are saved in a TSV-like format.
-The fields: title, id, author are not allowed to have newlines, tabs, all
+The fields: title, id, author are not allowed to have newlines and TABs. All
whitespace is replaced by a single space character. Control characters are
removed.
-The content field can contain newlines and is escaped. TABs, newline and '\'
+The content field can contain newlines and is escaped. TABs, newlines and '\'
are escaped with '\', so: '\n', '\t', and '\\'. Other whitespace characters
-except space are removed. Control characters are also removed.
+except space are removed. Control characters are removed.
The timestamp field is converted to a UNIX timestamp. The timestamp is also
stored as formatted as a separate field.
@@ -96,7 +94,7 @@ The order and format of the fields are:
item UNIX timestamp - string UNIX timestamp (UTC+0).
item formatted timestamp - string timestamp, YYYY-mm-dd HH:MM:SS (UTC[+-]HH:MM)|tz
item title - string
-item link - string, absolute url, unsafe characters are encoded.
+item link - string, absolute url, characters are uri encoded.
item content - string
item contenttype - string, "html" or "plain".
item id - string
@@ -115,8 +113,8 @@ Using make (respects $DESTDIR and $PREFIX):
make install
-Usage
------
+Usage and examples
+------------------
Find RSS/Atom feed urls from a webpage:
@@ -126,18 +124,19 @@ output:
application/rss+xml http://codemadness.org/blog/rss.xml
application/atom+xml http://codemadness.org/blog/atom.xml
+- - -
To update feeds and format the feeds file (configfile argument is optional):
sfeed_update "configfile"
- sfeed_html < $HOME/.sfeed/feeds/* > $HOME/.sfeed/feeds.html
- sfeed_plain < $HOME/.sfeed/feeds/* > $HOME/.sfeed/feeds.txt
+ sfeed_html $HOME/.sfeed/feeds/* > $HOME/.sfeed/feeds.html
+ sfeed_plain $HOME/.sfeed/feeds/* > $HOME/.sfeed/feeds.txt
mkdir -p somedir && cd somedir && sfeed_frames $HOME/.sfeed/feeds/*
Example script to view feeds with dmenu, opens selected url in $BROWSER:
#!/bin/sh
- url=$(sfeed_plain < $HOME/.sfeed/feeds/* | dmenu -l 35 -i |
+ url=$(sfeed_plain $HOME/.sfeed/feeds/* | dmenu -l 35 -i |
sed 's@^.* \([a-zA-Z]*://\)\(.*\)$@\1\2@')
[ ! "$url" = "" ] && $BROWSER "$url"
@@ -157,12 +156,14 @@ format:
sfeed_opml_import < opmlfile.xml > $HOME/.sfeed/sfeedrc
+- - -
Export an opml file of your feeds from a sfeedrc config file (configfile
argument is optional):
sfeed_opml_export configfile > myfeeds.opml
+- - -
Over time your feeds file might become quite big. You can archive items from a
specific date by doing for example: (make sure to change
@@ -181,6 +182,103 @@ mktime("YYYY mm dd HH mm ss")):
mv feeds feeds.old
mv feeds.clean feeds
+- - -
+
+Convert mbox to separate maildirs per feed and filter duplicate messages
+using fdm: https://github.com/nicm/fdm .
+
+For example using the following config (~/.sfeed/fdm.conf):
+
+ set unmatched-mail keep
+
+ account "sfeed" mbox "%[home]/.sfeed/mbox"
+ $cachepath = "%[home]/.sfeed/mbox.cache"
+ cache "${cachepath}"
+ $feedsdir = "%[home]/feeds/"
+
+ # check if in cache by message-id.
+ match case "^Message-ID: (.*)" in headers
+ action {
+ tag "msgid" value "%1"
+ }
+ continue
+ # if in cache, stop.
+ match matched and in-cache "${cachepath}" key "%[msgid]"
+ action {
+ keep
+ }
+
+ # not in cache, process it and add to cache.
+ match case "^X-Feedname: (.*)" in headers
+ action {
+ maildir "${feedsdir}%1"
+ add-to-cache "${cachepath}" key "%[msgid]"
+ keep
+ }
+
+Now run:
+
+$ sfeed_mbox ~/.sfeed/feeds/* > ~/.sfeed/mbox
+$ fdm -f ~/.sfeed/fdm.conf fetch
+
+Now you can view feeds in mutt(1) for example.
+
+- - -
+
+Use procmail to format mbox to separate maildirs per feed.
+Depends on: procmail, formail, sfeed_mbox.
+
+procmail_maildirs.sh file:
+
+ maildir="$HOME/feeds"
+ feedsdir="$HOME/.sfeed/feeds"
+ procmailconfig="$HOME/.sfeed/procmailrc"
+
+ # message-id cache to prevent duplicates.
+ mkdir -p "${maildir}/.cache"
+
+ if ! test -r "${procmailconfig}"; then
+ echo "Procmail configuration file \"${procmailconfig}\" does not exist or is not readable." >&2
+ echo "See procmailrc.example for an example." >&2
+ exit 1
+ fi
+
+ find "${feedsdir}" -type f -exec printf '%s\n' {} \; | while read -r d; do
+ (name=$(basename "${d}")
+ mkdir -p "${maildir}/${name}/cur"
+ mkdir -p "${maildir}/${name}/new"
+ mkdir -p "${maildir}/${name}/tmp"
+ printf 'Mailbox %s\n' "${name}"
+ sfeed_mbox "${d}" | formail -s procmail "${procmailconfig}") &
+ done
+ wait
+
+Procmailrc file:
+
+ # Example for use with sfeed_maildir.
+ # The header X-Feedname is used to split into separate maildirs. It is assumes
+ # this name is sane.
+
+ MAILDIR="$HOME/feeds/"
+
+ :0
+ * ^X-Feedname: \/.*
+ {
+ FEED="$MATCH"
+
+ :0 Wh: "msgid_$FEED.lock"
+ | formail -D 1024000 ".cache/msgid_$FEED.cache"
+
+ :0
+ "$FEED"/
+ }
+
+Now run:
+
+$ procmail_maildirs.sh
+
+Now you can view feeds in mutt(1) for example.
+
License
-------
diff --git a/fdm.conf.example b/fdm.conf.example
@@ -1,31 +0,0 @@
-# Convert mbox to separate maildirs per feed and filter duplicate messages.
-# Usage:
-# $ sfeed_mbox ~/.sfeed/feeds/* > ~/.sfeed/mbox
-# $ fdm -f thisconfig fetch
-
-set unmatched-mail keep
-
-account "sfeed" mbox "%[home]/.sfeed/mbox"
- $cachepath = "%[home]/.sfeed/mbox.cache"
- cache "${cachepath}"
- $feedsdir = "%[home]/feeds/"
-
- # check if in cache by message-id.
- match case "^Message-ID: (.*)" in headers
- action {
- tag "msgid" value "%1"
- }
- continue
- # if in cache, stop.
- match matched and in-cache "${cachepath}" key "%[msgid]"
- action {
- keep
- }
-
- # not in cache, process it and add to cache.
- match case "^X-Feedname: (.*)" in headers
- action {
- maildir "${feedsdir}%1"
- add-to-cache "${cachepath}" key "%[msgid]"
- keep
- }
diff --git a/procmailrc.example b/procmailrc.example
@@ -1,17 +0,0 @@
-# Example for use with sfeed_maildir.
-# The header X-Feedname is used to split into separate maildirs. It is assumes
-# this name is sane.
-
-MAILDIR="$HOME/feeds/"
-
-:0
-* ^X-Feedname: \/.*
-{
- FEED="$MATCH"
-
- :0 Wh: "msgid_$FEED.lock"
- | formail -D 1024000 ".cache/msgid_$FEED.cache"
-
- :0
- "$FEED"/
-}
diff --git a/sfeed.c b/sfeed.c
@@ -601,9 +601,8 @@ xml_handler_start_element(XMLParser *p, const char *name, size_t namelen)
case AtomTagPublished:
case AtomTagUpdated:
/* prefer published over updated if set */
- if (ctx.tagid != AtomTagUpdated || !ctx.item.timestamp.len) {
+ if (ctx.tagid != AtomTagUpdated || !ctx.item.timestamp.len)
ctx.field = &ctx.item.timestamp;
- }
break;
case RSSTagTitle:
case AtomTagTitle:
diff --git a/sfeed_frames.1 b/sfeed_frames.1
@@ -10,10 +10,10 @@
.Sh DESCRIPTION
.Nm
formats a feeds file (TSV) from
-.Xr sfeed_update 1
-to HTML. It reads TSV data from stdin and writes HTML to the specified
+.Xr sfeed 1
+to HTML. It reads TSV data from stdin and writes HTML to the current
directory. For the exact TSV format see
-.Xr sfeed_update 1 .
+.Xr sfeed 1 .
.Sh OPTIONS
.Bl -tag -width 14n
.It Ar directory path
@@ -37,7 +37,6 @@ with a \-, multiple whitespaces are replaced by a single \- and trailing
whitespace will be removed.
.Sh SEE ALSO
.Xr sfeed 1 ,
-.Xr sfeed_plain 1 ,
-.Xr sfeed_update 1
+.Xr sfeed_plain 1
.Sh AUTHORS
.An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org
diff --git a/sfeed_html.1 b/sfeed_html.1
@@ -14,7 +14,6 @@ to HTML. It reads TSV data from stdin and writes HTML to stdout. For the exact T
.Xr sfeed_update 1 .
.Sh SEE ALSO
.Xr sfeed 1 ,
-.Xr sfeed_plain 1 ,
-.Xr sfeed_update 1
+.Xr sfeed_plain 1
.Sh AUTHORS
.An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org
diff --git a/sfeed_maildir b/sfeed_maildir
@@ -1,27 +0,0 @@
-#!/bin/sh
-# Uses procmail to format mbox to maildir, see procmailrc.example.
-# Copy procmailrc.example to $procmailconfig (see below).
-# Depends on: procmail, formail, sfeed_mbox.
-
-maildir="$HOME/feeds"
-feedsdir="$HOME/.sfeed/feeds"
-procmailconfig="$HOME/.sfeed/procmailrc"
-
-# message-id cache to prevent duplicates.
-mkdir -p "${maildir}/.cache"
-
-if ! test -r "${procmailconfig}"; then
- echo "Procmail configuration file \"${procmailconfig}\" does not exist or is not readable." >&2
- echo "See procmailrc.example for an example." >&2
- exit 1
-fi
-
-find "${feedsdir}" -type f -exec printf '%s\n' {} \; | while read -r d; do
- (name=$(basename "${d}")
- mkdir -p "${maildir}/${name}/cur"
- mkdir -p "${maildir}/${name}/new"
- mkdir -p "${maildir}/${name}/tmp"
- printf 'Mailbox %s\n' "${name}"
- sfeed_mbox "${d}" | formail -s procmail "${procmailconfig}") &
-done
-wait
diff --git a/sfeed_mbox.1 b/sfeed_mbox.1
@@ -3,16 +3,19 @@
.Os
.Sh NAME
.Nm sfeed_mbox
-.Nd formats a feeds file to mail
+.Nd formats a feeds file to mbox
.Sh SYNOPSIS
.Nm
.Sh DESCRIPTION
.Nm
formats a feeds file (TSV) from
-.Xr sfeed_update 1
-to mail. It reads TSV data from stdin and writes e-mails to stdout. These can
-be further processed by tools like
-.Xr procmail 1 .
+.Xr sfeed 1
+to mbox. It reads TSV data from stdin and writes mail in the mbox format
+to stdout. These can be further processed by tools like
+.Xr procmail 1
+or
+.Xr fdm 1
+for example.
.Sh FORMAT
Depending on the original content\-type the mail will be formatted as
plain-text (text/plain) or HTML (text/html).
@@ -23,8 +26,8 @@ To make filtering simpler some custom headers are set:
The feedname (set in sfeedrc).
.El
.Sh SEE ALSO
+.Xr fdm 1 ,
.Xr procmail 1 ,
-.Xr sfeed 1 ,
-.Xr sfeed_update 1
+.Xr sfeed 1
.Sh AUTHORS
.An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org
diff --git a/util.c b/util.c
@@ -207,7 +207,7 @@ print(const char *s, FILE *fp, int (*fn)(int, FILE *))
fn((int)*s, fp);
}
-/* unescape / decode fields printed by string_print_encode()
+/* Unescape / decode fields printed by string_print_encode()
* "\\" to "\", "\t", to TAB, "\n" to newline. Unrecognised escape sequences
* are ignored: "\z" etc. Call `fn` on each escaped character. */
void
@@ -227,7 +227,7 @@ decodefield(const char *s, FILE *fp, int (*fn)(int, FILE *))
}
}
-/* print some HTML 2.0 / XML 1.0 as normal text */
+/* Escape characters below as HTML 2.0 / XML 1.0. */
int
xmlencode(int c, FILE *fp)
{