general cleanups - sfeed - simple feed reader

commit 19430fb45636614d96a08342cd7d83774e888c2e
parent 46b756cc19e199c89fe3b090885243d1c501262b
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Thu,  6 Aug 2015 17:54:09 +0200

general cleanups

Diffstat:
M sfeed.c  | 144 ++++++++++++++++++++++++++++++++++++++++----------------------------------------
M sfeed_html.1  | 6 ++----
M sfeed_mbox.1  | 11 ++++-------
M sfeed_opml_import.c  | 5 ++---
M xml.c  | 19 ++++++++-----------

5 files changed, 88 insertions(+), 97 deletions(-)
diff --git a/sfeed.c b/sfeed.c
@@ -432,29 +432,56 @@ isattr(const char *name, size_t len, const char *name2, size_t len2)
 	return (len == len2 && !strcasecmp(name, name2));
 }
 
-/* NOTE: this handler can be called multiple times if the data in this
- * block is bigger than the buffer. */
 static void
-xml_handler_data(XMLParser *p, const char *s, size_t len)
+xml_handler_attr(XMLParser *p, const char *tag, size_t taglen,
+	const char *name, size_t namelen, const char *value,
+	size_t valuelen)
 {
-	if (!ctx.field)
+	(void)tag;
+	(void)taglen;
+
+	/* handles transforming inline XML to data */
+	if (ISINCONTENT(ctx)) {
+		xml_handler_data(p, value, valuelen);
 		return;
+	}
 
-	/* add only data from <name> inside <author> tag
-	 * or any other non-<author> tag */
-	if (ctx.tagid != AtomTagAuthor || !strcmp(p->tag, "name"))
-		string_append(ctx.field, s, len);
+	if (ctx.item.feedtype == FeedTypeAtom) {
+		if (ISCONTENTTAG(ctx)) {
+			if (isattr(name, namelen, STRP("type")) &&
+			   (isattr(value, valuelen, STRP("xhtml")) ||
+			    isattr(value, valuelen, STRP("text/xhtml")) ||
+			    isattr(value, valuelen, STRP("html")) ||
+			    isattr(value, valuelen, STRP("text/html"))))
+			{
+				ctx.item.contenttype = ContentTypeHTML;
+				p->xmlattrstart = xml_handler_attr_start;
+				p->xmlattrend = xml_handler_attr_end;
+			}
+		} else if (ctx.tagid == AtomTagLink &&
+		          isattr(name, namelen, STRP("href")))
+		{
+			/* link href attribute */
+			string_append(&ctx.item.link, value, valuelen);
+		}
+	}
 }
 
 static void
-xml_handler_cdata(XMLParser *p, const char *s, size_t len)
+xml_handler_attr_end(XMLParser *p, const char *tag, size_t taglen,
+	const char *name, size_t namelen)
 {
-	(void)p;
+	(void)tag;
+	(void)taglen;
+	(void)name;
+	(void)namelen;
 
-	if (!ctx.field)
+	if (!ISINCONTENT(ctx))
 		return;
 
-	string_append(ctx.field, s, len);
+	/* handles transforming inline XML to data */
+	xml_handler_data(p, "\"", 1);
+	ctx.attrcount = 0;
 }
 
 static void
@@ -476,55 +503,50 @@ xml_handler_attr_start(XMLParser *p, const char *tag, size_t taglen,
 }
 
 static void
-xml_handler_attr_end(XMLParser *p, const char *tag, size_t taglen,
-	const char *name, size_t namelen)
+xml_handler_cdata(XMLParser *p, const char *s, size_t len)
 {
-	(void)tag;
-	(void)taglen;
-	(void)name;
-	(void)namelen;
+	(void)p;
 
-	if (!ISINCONTENT(ctx))
+	if (!ctx.field)
 		return;
 
-	/* handles transforming inline XML to data */
-	xml_handler_data(p, "\"", 1);
-	ctx.attrcount = 0;
+	string_append(ctx.field, s, len);
 }
 
+/* NOTE: this handler can be called multiple times if the data in this
+ * block is bigger than the buffer. */
 static void
-xml_handler_attr(XMLParser *p, const char *tag, size_t taglen,
-	const char *name, size_t namelen, const char *value,
-	size_t valuelen)
+xml_handler_data(XMLParser *p, const char *s, size_t len)
 {
-	(void)tag;
-	(void)taglen;
+	if (!ctx.field)
+		return;
 
-	/* handles transforming inline XML to data */
-	if (ISINCONTENT(ctx)) {
-		xml_handler_data(p, value, valuelen);
+	/* add only data from <name> inside <author> tag
+	 * or any other non-<author> tag */
+	if (ctx.tagid != AtomTagAuthor || !strcmp(p->tag, "name"))
+		string_append(ctx.field, s, len);
+}
+
+static void
+xml_handler_data_entity(XMLParser *p, const char *data, size_t datalen)
+{
+	char buffer[16];
+	int len;
+
+	if (!ctx.field)
 		return;
-	}
 
-	if (ctx.item.feedtype == FeedTypeAtom) {
-		if (ISCONTENTTAG(ctx)) {
-			if (isattr(name, namelen, STRP("type")) &&
-			   (isattr(value, valuelen, STRP("xhtml")) ||
-			    isattr(value, valuelen, STRP("text/xhtml")) ||
-			    isattr(value, valuelen, STRP("html")) ||
-			    isattr(value, valuelen, STRP("text/html"))))
-			{
-				ctx.item.contenttype = ContentTypeHTML;
-				p->xmlattrstart = xml_handler_attr_start;
-				p->xmlattrend = xml_handler_attr_end;
-			}
-		} else if (ctx.tagid == AtomTagLink &&
-		          isattr(name, namelen, STRP("href")))
-		{
-			/* link href attribute */
-			string_append(&ctx.item.link, value, valuelen);
-		}
-	}
+	/* try to translate entity, else just pass as data to
+	 * xml_data_handler */
+	len = xml_entitytostr(data, buffer, sizeof(buffer));
+	/* this should never happen (buffer too small) */
+	if (len < 0)
+		return;
+
+	if (len > 0)
+		xml_handler_data(p, buffer, (size_t)len);
+	else
+		xml_handler_data(p, data, datalen);
 }
 
 static void
@@ -646,28 +668,6 @@ xml_handler_start_el_parsed(XMLParser *p, const char *tag, size_t taglen,
 }
 
 static void
-xml_handler_data_entity(XMLParser *p, const char *data, size_t datalen)
-{
-	char buffer[16];
-	int len;
-
-	if (!ctx.field)
-		return;
-
-	/* try to translate entity, else just pass as data to
-	 * xml_data_handler */
-	len = xml_entitytostr(data, buffer, sizeof(buffer));
-	/* this should never happen (buffer too small) */
-	if (len < 0)
-		return;
-
-	if (len > 0)
-		xml_handler_data(p, buffer, (size_t)len);
-	else
-		xml_handler_data(p, data, datalen);
-}
-
-static void
 xml_handler_end_el(XMLParser *p, const char *name, size_t namelen, int isshort)
 {
 	if (ctx.item.feedtype == FeedTypeNone)
diff --git a/sfeed_html.1 b/sfeed_html.1
@@ -13,13 +13,11 @@ formats feed data (TSV) from
 .Xr sfeed 1
 from stdin or
 .Ar file
-to stdout in HTML.
-If one or more
+to stdout in HTML. If one or more
 .Ar file
 are specified, the basename of the
 .Ar file
-is used as the feed name in the output.
-If no
+is used as the feed name in the output. If no
 .Ar file
 parameters are specified and so the data is read from stdin the feed name
 is empty.
diff --git a/sfeed_mbox.1 b/sfeed_mbox.1
@@ -13,18 +13,15 @@ formats feed data (TSV) from
 .Xr sfeed 1
 from stdin or
 .Ar file
-to stdout in the mboxrd format.
-If one or more
+to stdout in the mboxrd format. If one or more
 .Ar file
 are specified, the basename of the
 .Ar file
-is used as the feed name in the output.
-If no
+is used as the feed name in the output. If no
 .Ar file
 parameters are specified and so the data is read from stdin the feed name
-is empty.
-Lines starting with "From " will be mangled in the mboxrd-style. The mbox
-data can be further processed by tools like
+is empty. Lines starting with "From " will be mangled in the mboxrd-style.
+The mbox data can be further processed by tools like
 .Xr procmail 1
 or
 .Xr fdm 1
diff --git a/sfeed_opml_import.c b/sfeed_opml_import.c
@@ -74,10 +74,9 @@ xml_handler_attr(XMLParser *p, const char *tag, size_t taglen,
 int
 main(void)
 {
-	memset(&parser, 0, sizeof(parser));
-	parser.xmltagstart = xml_handler_start_element;
-	parser.xmltagend = xml_handler_end_element;
 	parser.xmlattr = xml_handler_attr;
+	parser.xmltagend = xml_handler_end_element;
+	parser.xmltagstart = xml_handler_start_element;
 
 	fputs(
 	    "# paths\n"
diff --git a/xml.c b/xml.c
@@ -332,6 +332,7 @@ xml_numericentitytostr(const char *e, char *buf, size_t bufsiz)
 	for (b = 0; b < len; b++)
 		buf[b] = (cp >> (8 * (len - 1 - b))) & 0xff;
 	buf[len] = '\0';
+
 	return (ssize_t)len;
 }
 
@@ -359,7 +360,8 @@ xmlparser_parse(XMLParser *x)
 	int c, ispi;
 	size_t datalen, tagdatalen, taglen;
 
-	while ((c = xmlparser_getnext(x)) != EOF && c != '<'); /* skip until < */
+	while ((c = xmlparser_getnext(x)) != EOF && c != '<')
+		; /* skip until < */
 
 	while (c != EOF) {
 		if (c == '<') { /* parse tag */
@@ -369,33 +371,28 @@ xmlparser_parse(XMLParser *x)
 			x->taglen = 0;
 			if (c == '!') { /* cdata and comments */
 				for (tagdatalen = 0; (c = xmlparser_getnext(x)) != EOF;) {
-					if (tagdatalen <= strlen("[CDATA[")) /* if (d < sizeof(x->data)) */
+					if (tagdatalen <= sizeof("[CDATA[") - 1) /* if (d < sizeof(x->data)) */
 						x->data[tagdatalen++] = c; /* TODO: prevent overflow */
 					if (c == '>')
 						break;
-					else if (c == '-' && tagdatalen == strlen("--") &&
+					else if (c == '-' && tagdatalen == sizeof("--") - 1 &&
 							(x->data[0] == '-')) { /* comment */
 						xmlparser_parsecomment(x);
 						break;
 					} else if (c == '[') {
-						if (tagdatalen == strlen("[CDATA[") &&
+						if (tagdatalen == sizeof("[CDATA[") - 1 &&
 							x->data[1] == 'C' && x->data[2] == 'D' &&
 							x->data[3] == 'A' && x->data[4] == 'T' &&
 							x->data[5] == 'A' && x->data[6] == '[') { /* CDATA */
 							xmlparser_parsecdata(x);
 							break;
-						#if 0
-						} else {
-							/* TODO ? */
-							/* markup declaration section */
-							while ((c = xmlparser_getnext(x)) != EOF && c != ']');
-						#endif
 						}
 					}
 				}
 			} else { /* normal tag (open, short open, close), processing instruction. */
 				if (isspace(c))
-					while ((c = xmlparser_getnext(x)) != EOF && isspace(c));
+					while ((c = xmlparser_getnext(x)) != EOF && isspace(c))
+						;
 				if (c == EOF)
 					return;
 				x->tag[0] = c;

	sfeed simple feed reader - forked from git.codemadness.org/sfeed
	git clone git://src.gearsix.net/sfeed	sfeed.zip
	Log \| Files \| Refs \| Atom \| README \| LICENSE

M	sfeed.c	\|	144	++++++++++++++++++++++++++++++++++++++++----------------------------------------
M	sfeed_html.1	\|	6	++----
M	sfeed_mbox.1	\|	11	++++-------
M	sfeed_opml_import.c	\|	5	++---
M	xml.c	\|	19	++++++++-----------