Various improvements - sfeed - simple feed reader

commit 356e7d79925f91b9b703ee63e3680694c53a59a4
parent eb586eda26967183de91c314a57d323b124110bb
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Fri, 31 Jul 2015 21:06:52 +0200

Various improvements

- Only escape characters in "content" field, these can contain newlines.
- Trim newlines and tabs, etc from the title, id and author fields.
- Make decodefield, xmlencode functions easier to "chain" without allocatting
  new buffers.
- Move printutf8pad from util (only used by sfeed_plain) to sfeed_plain.
- Update README, still need to update the man-page and improve the documentation
  in general.
- Code cleanup.

Diffstat:
M README  | 23 ++++++++++++++---------
M sfeed.c  | 172 ++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
M sfeed_frames.c  | 21 ++++++++++++---------
M sfeed_html.c  | 14 +++++++-------
M sfeed_mbox.c  | 15 ++++++++++-----
M sfeed_plain.c  | 32 +++++++++++++++++++++++++++++---
M util.c  | 105 ++++++++++++++++++++++++++-----------------------------------------------------
M util.h  | 8 +++-----

8 files changed, 214 insertions(+), 176 deletions(-)
diff --git a/README b/README
@@ -78,25 +78,30 @@ feeds.new - Temporary file used by sfeed_update to merge items.
 TAB-separated format
 --------------------
 
-The items are saved in a TSV-like format except newlines, tabs and
-backslash are escaped with \ (\n, \t and \\). Other whitespace except
-spaces are removed.
+The items are saved in a TSV-like format.
+
+The fields: title, id, author are not allowed to have newlines, tabs, all
+whitespace is replaced by a single space character. Control characters are
+removed.
+
+The content field can contain newlines and is escaped. TABs, newline and '\'
+are escaped with '\', so: '\n', '\t', and '\\'. Other whitespace characters
+except space are removed. Control characters are also removed.
 
 The timestamp field is converted to a UNIX timestamp. The timestamp is also
-stored as formatted as a separate field. The other fields are left untouched
-(including HTML).
+stored as formatted as a separate field.
 
 The order and format of the fields are:
 
-item UNIX timestamp      - string UNIX timestamp (UTC+0)
+item UNIX timestamp      - string UNIX timestamp (UTC+0).
 item formatted timestamp - string timestamp, YYYY-mm-dd HH:MM:SS (UTC[+-]HH:MM)|tz
 item title               - string
-item link                - string, absolute url, unsafe characters are encoded
+item link                - string, absolute url, unsafe characters are encoded.
 item content             - string
-item contenttype         - string, "html" or "plain"
+item contenttype         - string, "html" or "plain".
 item id                  - string
 item author              - string
-feed type                - string, "rss" or "atom"
+feed type                - string, "rss" or "atom".
 
 CAVEAT: if a timezone is not supported (non-RFC-822) the UNIX timestamp is
         interpreted as UTC+0.
diff --git a/sfeed.c b/sfeed.c
@@ -18,16 +18,16 @@
 /* length of string */
 #define STRSIZ(s)         (sizeof(s)-1)
 
-enum { FeedTypeNone = 0, FeedTypeRSS = 1, FeedTypeAtom = 2 };
+enum FeedType { FeedTypeNone = 0, FeedTypeRSS = 1, FeedTypeAtom = 2 };
 static const char *feedtypes[] = { "", "rss", "atom" };
 
-enum { ContentTypeNone = 0, ContentTypePlain = 1, ContentTypeHTML = 2 };
+enum ContentType { ContentTypeNone = 0, ContentTypePlain = 1, ContentTypeHTML = 2 };
 static const char *contenttypes[] = { "", "plain", "html" };
 
 static const int FieldSeparator = '\t'; /* output field seperator character */
 static const char *baseurl = "";
 
-enum {
+enum TagId {
 	TagUnknown = 0,
 	/* RSS */
 	RSSTagDcdate, RSSTagPubdate, RSSTagTitle,
@@ -59,9 +59,9 @@ typedef struct feeditem {
 } FeedItem;
 
 typedef struct feedtag {
-	char   *name;
-	size_t  namelen;
-	int id;
+	char       *name;
+	size_t      namelen;
+	enum TagId  id;
 } FeedTag;
 
 typedef struct feedcontext {
@@ -75,7 +75,7 @@ typedef struct feedcontext {
 	int       attrcount;
 } FeedContext;
 
-static int    gettag(int, const char *, size_t);
+static enum TagId gettag(enum FeedType, const char *, size_t);
 static int    gettimetz(const char *, char *, size_t, int *);
 static int    isattr(const char *, size_t, const char *, size_t);
 static int    istag(const char *, size_t, const char *, size_t);
@@ -85,7 +85,8 @@ static void   string_append(String *, const char *, size_t);
 static void   string_buffer_init(String *, size_t);
 static void   string_buffer_realloc(String *, size_t);
 static void   string_clear(String *);
-static void   string_print(String *);
+static void   string_print_encoded(String *);
+static void   string_print_trimmed(String *);
 static void   xml_handler_attr(XMLParser *, const char *, size_t,
                                const char *, size_t, const char *, size_t);
 static void   xml_handler_attr_start(XMLParser *, const char *, size_t,
@@ -104,8 +105,8 @@ static FeedContext ctx;
 static XMLParser parser; /* XML parser state */
 
 /* unique number for parsed tag (faster comparison) */
-static int
-gettag(int feedtype, const char *name, size_t namelen)
+static enum TagId
+gettag(enum FeedType feedtype, const char *name, size_t namelen)
 {
 	/* RSS, alphabetical order */
 	static FeedTag rsstag[] = {
@@ -138,24 +139,29 @@ gettag(int feedtype, const char *name, size_t namelen)
 	if (namelen < 2 || namelen > 15)
 		return TagUnknown;
 
-	if (feedtype == FeedTypeRSS) {
+	switch (feedtype) {
+	case FeedTypeRSS:
 		for (i = 0; rsstag[i].name; i++) {
 			if (!(n = strncasecmp(rsstag[i].name, name, rsstag[i].namelen)))
-				return rsstag[i].id;
+				return rsstag[i].id; /* found */
 			/* optimization: it's sorted so nothing after it matches. */
 			if (n > 0)
 				return TagUnknown;
 		}
-	} else if (feedtype == FeedTypeAtom) {
+		break;
+	case FeedTypeAtom:
 		for (i = 0; atomtag[i].name; i++) {
 			if (!(n = strncasecmp(atomtag[i].name, name, atomtag[i].namelen)))
-				return atomtag[i].id;
+				return atomtag[i].id; /* found */
 			/* optimization: it's sorted so nothing after it matches. */
 			if (n > 0)
 				return TagUnknown;
 		}
+		break;
+	default:
+		return TagUnknown;
 	}
-	return TagUnknown;
+	return TagUnknown; /* NOTREACHED */
 }
 
 /* clear string only; don't free, prevents unnecessary reallocation */
@@ -334,23 +340,26 @@ parsetime(const char *s, char *buf, size_t bufsiz, time_t *tp)
 	return -1;
 }
 
-/* print text, escape tabs, newline and carriage return etc */
+/* Print text, encode TABs, newlines and '\', remove other whitespace.
+ * Remove leading and trailing whitespace. */
 static void
-string_print(String *s)
+string_print_encoded(String *s)
 {
 	const char *p, *e;
 
 	/* skip leading whitespace */
-	p = trimstart(s->data);
-	e = trimend(p);
+	for (p = s->data; *p && isspace((int)*p); p++)
+		;
+	/* seek offset of trailing whitespace */
+	for (e = p + strlen(p); e > p && isspace((int)*(e - 1)); e--)
+		;
 
 	for (; *p && p != e; p++) {
-		/* isspace(c) && c != ' '. */
-		if (((unsigned)*p - '\t') < 5) {
-			switch(*p) {
-			case '\n': fputs("\\n", stdout); break;
+		if (isspace((int)*p) && *p != ' ') {
+			switch (*p) {
+			case '\n': fputs("\\n",  stdout); break;
 			case '\\': fputs("\\\\", stdout); break;
-			case '\t': fputs("\\t", stdout); break;
+			case '\t': fputs("\\t",  stdout); break;
 			default: break; /* ignore other whitespace chars */
 			}
 		} else if (!iscntrl((int)*p)) { /* ignore control chars */
@@ -359,6 +368,29 @@ string_print(String *s)
 	}
 }
 
+/* Print text, replace TABs, carriage return and other whitespace with ' '.
+ * Other control chars are removed. Remove leading and trailing whitespace. */
+static void
+string_print_trimmed(String *s)
+{
+	const char *p, *e;
+
+	/* skip leading whitespace */
+	for (p = s->data; *p && isspace((int)*p); p++)
+		;
+	/* seek offset of trailing whitespace */
+	for (e = p + strlen(p); e > p && isspace((int)*(e - 1)); e--)
+		;
+
+	for (; *p && p != e; p++) {
+		if (isspace((int)*p))
+			putchar(' ');
+		else if (!iscntrl((int)*p))
+			/* ignore other control chars */
+			putchar((int)*p);
+	}
+}
+
 static void
 printfields(void)
 {
@@ -376,19 +408,19 @@ printfields(void)
 	if (r != -1)
 		fputs(timebuf, stdout);
 	putchar(FieldSeparator);
-	string_print(&ctx.item.title);
+	string_print_trimmed(&ctx.item.title);
 	putchar(FieldSeparator);
 	/* always print absolute urls */
 	if (absuri(ctx.item.link.data, baseurl, link, sizeof(link)) != -1)
 		fputs(link, stdout);
 	putchar(FieldSeparator);
-	string_print(&ctx.item.content);
+	string_print_encoded(&ctx.item.content);
 	putchar(FieldSeparator);
 	fputs(contenttypes[ctx.item.contenttype], stdout);
 	putchar(FieldSeparator);
-	string_print(&ctx.item.id);
+	string_print_trimmed(&ctx.item.id);
 	putchar(FieldSeparator);
-	string_print(&ctx.item.author);
+	string_print_trimmed(&ctx.item.author);
 	putchar(FieldSeparator);
 	fputs(feedtypes[ctx.item.feedtype], stdout);
 	putchar('\n');
@@ -555,53 +587,59 @@ xml_handler_start_element(XMLParser *p, const char *name, size_t namelen)
 	/* tag already set: return */
 	if (ctx.tag[0] != '\0')
 		return;
+
 	/* in item */
 	strlcpy(ctx.tag, name, sizeof(ctx.tag)); /* NOTE: truncation ignored */
 	ctx.taglen = namelen;
 	ctx.tagid = gettag(ctx.item.feedtype, ctx.tag, ctx.taglen);
-	if (ctx.tagid == TagUnknown)
-		ctx.field = NULL;
 
-	if (ctx.item.feedtype == FeedTypeRSS) {
-		if (ctx.tagid == RSSTagPubdate || ctx.tagid == RSSTagDcdate)
+	switch (ctx.tagid) {
+	case RSSTagPubdate:
+	case RSSTagDcdate:
+		ctx.field = &ctx.item.timestamp;
+		break;
+	case AtomTagPublished:
+	case AtomTagUpdated:
+		/* prefer published over updated if set */
+		if (ctx.tagid != AtomTagUpdated || !ctx.item.timestamp.len) {
 			ctx.field = &ctx.item.timestamp;
-		else if (ctx.tagid == RSSTagTitle)
-			ctx.field = &ctx.item.title;
-		else if (ctx.tagid == RSSTagLink)
-			ctx.field = &ctx.item.link;
-		else if (ctx.tagid == RSSTagDescription ||
-		        ctx.tagid == RSSTagContentencoded) {
-			/* ignore, prefer content:encoded over description */
-			if (ctx.tagid != RSSTagDescription || !ctx.item.content.len) {
-				ctx.iscontenttag = 1;
-				ctx.field = &ctx.item.content;
-			}
-		} else if (ctx.tagid == RSSTagGuid) {
-			ctx.field = &ctx.item.id;
-		} else if (ctx.tagid == RSSTagAuthor || ctx.tagid == RSSTagDccreator) {
-			ctx.field = &ctx.item.author;
 		}
-	} else if (ctx.item.feedtype == FeedTypeAtom) {
-		if (ctx.tagid == AtomTagPublished || ctx.tagid == AtomTagUpdated) {
-			/* ignore, prefer published over updated */
-			if (ctx.tagid != AtomTagUpdated || !ctx.item.timestamp.len) {
-				ctx.field = &ctx.item.timestamp;
-			}
-		} else if (ctx.tagid == AtomTagTitle) {
-			ctx.field = &ctx.item.title;
-		} else if (ctx.tagid == AtomTagSummary || ctx.tagid == AtomTagContent) {
-			/* ignore, prefer content:encoded over description */
-			if (ctx.tagid != AtomTagSummary || !ctx.item.content.len) {
-				ctx.iscontenttag = 1;
-				ctx.field = &ctx.item.content;
-			}
-		} else if (ctx.tagid == AtomTagId) {
-			ctx.field = &ctx.item.id;
-		} else if (ctx.tagid == AtomTagLink) {
-			ctx.field = &ctx.item.link;
-		} else if (ctx.tagid == AtomTagAuthor) {
-			ctx.field = &ctx.item.author;
+		break;
+	case RSSTagTitle:
+	case AtomTagTitle:
+		ctx.field = &ctx.item.title;
+		break;
+	case RSSTagLink:
+	case AtomTagLink:
+		ctx.field = &ctx.item.link;
+		break;
+	case RSSTagDescription:
+	case RSSTagContentencoded:
+		/* prefer content:encoded over description if set */
+		if (ctx.tagid != RSSTagDescription || !ctx.item.content.len) {
+			ctx.iscontenttag = 1;
+			ctx.field = &ctx.item.content;
 		}
+		break;
+	case AtomTagSummary:
+	case AtomTagContent:
+		/* prefer content over summary if set */
+		if (ctx.tagid != AtomTagSummary || !ctx.item.content.len) {
+			ctx.iscontenttag = 1;
+			ctx.field = &ctx.item.content;
+		}
+		break;
+	case RSSTagGuid:
+	case AtomTagId:
+		ctx.field = &ctx.item.id;
+		break;
+	case RSSTagAuthor:
+	case RSSTagDccreator:
+	case AtomTagAuthor:
+		ctx.field = &ctx.item.author;
+		break;
+	default:
+		ctx.field = NULL;
 	}
 	/* clear field */
 	if (ctx.field)
diff --git a/sfeed_frames.c b/sfeed_frames.c
@@ -83,11 +83,11 @@ printfeed(FILE *fpitems, FILE *fpin, struct feed *f)
 	/* menu if not unnamed */
 	if (f->name[0]) {
 		fputs("<h2 id=\"", fpitems);
-		printxmlencoded(f->name, fpitems);
+		print(f->name, fpitems, xmlencode);
 		fputs("\"><a href=\"#", fpitems);
-		printxmlencoded(f->name, fpitems);
+		print(f->name, fpitems, xmlencode);
 		fputs("\">", fpitems);
-		printxmlencoded(f->name, fpitems);
+		print(f->name, fpitems, xmlencode);
 		fputs("</a></h2>\n", fpitems);
 	}
 
@@ -108,11 +108,14 @@ printfeed(FILE *fpitems, FILE *fpin, struct feed *f)
 			      "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /></head>\n"
 			      "<body class=\"frame\"><div class=\"content\">"
 			      "<h2><a href=\"", fpcontent);
-			printxmlencoded(fields[FieldLink], fpcontent);
+			print(fields[FieldLink], fpcontent, xmlencode);
 			fputs("\">", fpcontent);
-			printxmlencoded(fields[FieldTitle], fpcontent);
+			print(fields[FieldTitle], fpcontent, xmlencode);
 			fputs("</a></h2>", fpcontent);
-			printcontent(fields[FieldContent], fpcontent);
+			/* NOTE: this prints the raw HTML of the feed, this is
+			 * potentially dangerous, it is up to the user / browser
+			 * to trust a feed it's HTML content. */
+			decodefield(fields[FieldContent], fpcontent, fputc);
 			fputs("</div></body></html>", fpcontent);
 			fclose(fpcontent);
 		}
@@ -141,7 +144,7 @@ printfeed(FILE *fpitems, FILE *fpin, struct feed *f)
 		fputs("<a href=\"", fpitems);
 		fputs(filepath, fpitems);
 		fputs("\" target=\"content\">", fpitems);
-		printxmlencoded(fields[FieldTitle], fpitems);
+		print(fields[FieldTitle], fpitems, xmlencode);
 		fputs("</a>", fpitems);
 		if (isnew)
 			fputs("</u></b>", fpitems);
@@ -210,11 +213,11 @@ main(int argc, char *argv[])
 				fputs("<a class=\"n\" href=\"items.html#", fpmenu);
 			else
 				fputs("<a href=\"items.html#", fpmenu);
-			printxmlencoded(f->name, fpmenu);
+			print(f->name, fpmenu, xmlencode);
 			fputs("\" target=\"items\">", fpmenu);
 			if (f->totalnew > 0)
 				fputs("<b><u>", fpmenu);
-			printxmlencoded(f->name, fpmenu);
+			print(f->name, fpmenu, xmlencode);
 			fprintf(fpmenu, " (%lu)", f->totalnew);
 			if (f->totalnew > 0)
 				fputs("</u></b>", fpmenu);
diff --git a/sfeed_html.c b/sfeed_html.c
@@ -24,11 +24,11 @@ printfeed(FILE *fp, struct feed *f)
 
 	if (f->name[0] != '\0') {
 		fputs("<h2 id=\"", stdout);
-		printxmlencoded(f->name, stdout);
+		print(f->name, stdout, xmlencode);
 		fputs("\"><a href=\"#", stdout);
-		printxmlencoded(f->name, stdout);
+		print(f->name, stdout, xmlencode);
 		fputs("\">", stdout);
-		printxmlencoded(f->name, stdout);
+		print(f->name, stdout, xmlencode);
 		fputs("</a></h2>\n", stdout);
 	}
 	fputs("<table cellpadding=\"0\" cellspacing=\"0\">\n", stdout);
@@ -53,10 +53,10 @@ printfeed(FILE *fp, struct feed *f)
 			fputs("<b><u>", stdout);
 		if (islink) {
 			fputs("<a href=\"", stdout);
-			printxmlencoded(fields[FieldLink], stdout);
+			print(fields[FieldLink], stdout, xmlencode);
 			fputs("\">", stdout);
 		}
-		printxmlencoded(fields[FieldTitle], stdout);
+		print(fields[FieldTitle], stdout, xmlencode);
 		if (islink)
 			fputs("</a>", stdout);
 		if (isnew)
@@ -126,11 +126,11 @@ main(int argc, char *argv[])
 				fputs("<li class=\"n\"><a href=\"#", stdout);
 			else
 				fputs("<li><a href=\"#", stdout);
-			printxmlencoded(f->name, stdout);
+			print(f->name, stdout, xmlencode);
 			fputs("\">", stdout);
 			if (f->totalnew > 0)
 				fputs("<b><u>", stdout);
-			printxmlencoded(f->name, stdout);
+			print(f->name, stdout, xmlencode);
 			fprintf(stdout, " (%lu)", f->totalnew);
 			if (f->totalnew > 0)
 				fputs("</u></b>", stdout);
diff --git a/sfeed_mbox.c b/sfeed_mbox.c
@@ -60,12 +60,17 @@ printfeed(FILE *fp, const char *feedname)
 			fields[FieldContentType], feedname);
 
 		if (!strcmp(fields[FieldContentType], "html")) {
-			printf("<p>Link: <a href=\"%s\">%s</a></p>\n\n",
-			fields[FieldLink], fields[FieldLink]);
-			printcontent(fields[FieldContent], stdout);
+			fputs("<p>Link: <a href=\"", stdout);
+			decodefield(fields[FieldLink], stdout, fputc);
+			fputs("\">", stdout);
+			decodefield(fields[FieldLink], stdout, fputc);
+			fputs("</a></p>\n\n", stdout);
+			decodefield(fields[FieldContent], stdout, fputc);
 		} else {
-			printf("Link: %s\n\n", fields[FieldLink]);
-			printcontent(fields[FieldContent], stdout);
+			fputs("Link: ", stdout);
+			decodefield(fields[FieldLink], stdout, fputc);
+			fputs("\n\n", stdout);
+			decodefield(fields[FieldContent], stdout, fputc);
 		}
 		fputs("\n\n", stdout);
 	}
diff --git a/sfeed_plain.c b/sfeed_plain.c
@@ -1,8 +1,10 @@
+#include <ctype.h>
 #include <err.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
+#include <wchar.h>
 
 #include "util.h"
 
@@ -10,6 +12,32 @@ static time_t comparetime;
 static char *line = NULL;
 static size_t size = 0;
 
+/* print `len' columns of characters. If string is shorter pad the rest
+ * with characters `pad`. */
+static void
+printutf8pad(FILE *fp, const char *s, size_t len, int pad)
+{
+	wchar_t w;
+	size_t n = 0, i;
+	int r;
+
+	for (i = 0; *s && n < len; i++, s++) {
+		/* skip control characters */
+		if (iscntrl(*s))
+			continue;
+		if (ISUTF8(*s)) {
+			if ((r = mbtowc(&w, s, 4)) == -1)
+				break;
+			if ((r = wcwidth(w)) == -1)
+				r = 1;
+			n += (size_t)r;
+		}
+		putc(*s, fp);
+	}
+	for (; n < len; n++)
+		putc(pad, fp);
+}
+
 static void
 printfeed(FILE *fp, const char *feedname)
 {
@@ -27,9 +55,7 @@ printfeed(FILE *fp, const char *feedname)
 			printf("%-15.15s ", feedname);
 		printf(" %-30.30s  ", fields[FieldTimeFormatted]);
 		printutf8pad(stdout, fields[FieldTitle], 70, ' ');
-		fputs("  ", stdout);
-		fputs(fields[FieldLink], stdout);
-		putchar('\n');
+		printf("  %s\n", fields[FieldLink]);
 	}
 }
 
diff --git a/util.c b/util.c
@@ -10,7 +10,6 @@
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
-#include <wchar.h>
 
 #include "util.h"
 
@@ -73,7 +72,8 @@ readpath:
 	return strlcat(u->path, p, sizeof(u->path)) >= sizeof(u->path) ? -1 : 0;
 }
 
-/* get absolute uri; if `link` is relative use `base` to make it absolute. */
+/* get absolute uri; if `link` is relative use `base` to make it absolute.
+ * the returned string in `buf` is uri encoded, see: encodeuri().  */
 int
 absuri(const char *link, const char *base, char *buf, size_t bufsiz)
 {
@@ -185,63 +185,6 @@ parseline(char **line, size_t *size, char **fields,
 	return (int)i;
 }
 
-char *
-trimend(const char *s)
-{
-	size_t len = strlen(s);
-
-	for (; len > 0 && isspace((int)s[len - 1]); len--)
-		;
-	return (char*)&s[len];
-}
-
-char *
-trimstart(const char *s)
-{
-	for (; *s && isspace((int)*s); s++)
-		;
-	return (char *)s;
-}
-
-void
-printxmlencoded(const char *s, FILE *fp)
-{
-	for (; *s; s++) {
-		switch(*s) {
-		case '<':  fputs("&lt;", fp);   break;
-		case '>':  fputs("&gt;", fp);   break;
-		case '\'': fputs("&apos;", fp); break;
-		case '&':  fputs("&amp;", fp);  break;
-		case '"':  fputs("&quot;", fp); break;
-		default:
-			fputc((int)*s, fp);
-		}
-	}
-}
-
-/* print `len` columns of characters. If string is shorter pad the rest
- * with characters `pad`. */
-void
-printutf8pad(FILE *fp, const char *s, size_t len, int pad)
-{
-	wchar_t w;
-	size_t n = 0, i;
-	int r;
-
-	for (i = 0; *s && n < len; i++, s++) {
-		if (ISUTF8(*s)) {
-			if ((r = mbtowc(&w, s, 4)) == -1)
-				break;
-			if ((r = wcwidth(w)) == -1)
-				r = 1;
-			n += (size_t)r;
-		}
-		putc(*s, fp);
-	}
-	for (; n < len; n++)
-		putc(pad, fp);
-}
-
 /* parse time to time_t, assumes time_t is signed */
 int
 strtotime(const char *s, time_t *t)
@@ -257,27 +200,47 @@ strtotime(const char *s, time_t *t)
 	return 0;
 }
 
-/* print text, ignore tabs, newline and carriage return etc
- * print some HTML 2.0 / XML 1.0 as normal text */
 void
-printcontent(const char *s, FILE *fp)
+print(const char *s, FILE *fp, int (*fn)(int, FILE *))
 {
-	const char *p;
+	for (; *s; s++)
+		fn((int)*s, fp);
+}
 
-	for (p = s; *p; p++) {
-		if (*p == '\\') {
-			switch (*(++p)) {
-			case '\\': fputc('\\', fp); break;
-			case 't':  fputc('\t', fp); break;
-			case 'n':  fputc('\n', fp); break;
-			default:   fputc(*p,   fp);
+/* unescape / decode fields printed by string_print_encode()
+ * "\\" to "\", "\t", to TAB, "\n" to newline. Unrecognised escape sequences
+ * are ignored: "\z" etc. Call `fn` on each escaped character. */
+void
+decodefield(const char *s, FILE *fp, int (*fn)(int, FILE *))
+{
+	for (; *s; s++) {
+		if (*s == '\\') {
+			switch (*(++s)) {
+			case '\\': fn('\\', fp); break;
+			case 't':  fn('\t', fp); break;
+			case 'n':  fn('\n', fp); break;
+			case '\0': return;
 			}
 		} else {
-			fputc(*p, fp);
+			fn((int)*s, fp);
 		}
 	}
 }
 
+/* print some HTML 2.0 / XML 1.0 as normal text */
+int
+xmlencode(int c, FILE *fp)
+{
+	switch(c) {
+	case '<':  return fputs("&lt;",   fp);
+	case '>':  return fputs("&gt;",   fp);
+	case '\'': return fputs("&apos;", fp);
+	case '&':  return fputs("&amp;",  fp);
+	case '"':  return fputs("&quot;", fp);
+	}
+	return fputc(c, fp);
+}
+
 /* Some implementations of basename(3) return a pointer to a static
  * internal buffer (OpenBSD). Others modify the contents of `path` (POSIX).
  * This is a wrapper function that is compatible with both versions.
diff --git a/util.h b/util.h
@@ -26,15 +26,13 @@ enum { FieldUnixTimestamp = 0, FieldTimeFormatted, FieldTitle, FieldLink,
        FieldLast };
 
 int    absuri(const char *, const char *, char *, size_t);
+void   decodefield(const char *, FILE *, int (*)(int, FILE *));
 int    encodeuri(const char *, char *, size_t);
 int    parseline(char **, size_t *, char **, unsigned int, int, FILE *);
 int    parseuri(const char *, struct uri *, int);
-void   printcontent(const char *, FILE *);
-void   printxmlencoded(const char *, FILE *);
-void   printutf8pad(FILE *, const char *, size_t, int);
+void   print(const char *, FILE *, int (*)(int, FILE *));
 int    strtotime(const char *, time_t *);
-char * trimstart(const char *);
-char * trimend(const char *);
 char * xbasename(const char *);
+int    xmlencode(int, FILE *);

	sfeed simple feed reader - forked from git.codemadness.org/sfeed
	git clone git://src.gearsix.net/sfeed	sfeed.zip
	Log \| Files \| Refs \| Atom \| README \| LICENSE

M	README	\|	23	++++++++++++++---------
M	sfeed.c	\|	172	++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
M	sfeed_frames.c	\|	21	++++++++++++---------
M	sfeed_html.c	\|	14	+++++++-------
M	sfeed_mbox.c	\|	15	++++++++++-----
M	sfeed_plain.c	\|	32	+++++++++++++++++++++++++++++---
M	util.c	\|	105	++++++++++++++++++++++++++-----------------------------------------------------
M	util.h	\|	8	+++-----