commit b9723075c7a1a5beae432a963a20767d7e3d49b6
parent 6ae8a568981dafebff41bd913a157d130c176dda
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Sun,  2 Aug 2015 17:07:07 +0200
sfeed: cleanup a bit and improve checking if in content
Diffstat:
| M | sfeed.c | | | 54 | +++++++++++++++++++----------------------------------- | 
1 file changed, 19 insertions(+), 35 deletions(-)
diff --git a/sfeed.c b/sfeed.c
@@ -447,7 +447,7 @@ xml_handler_data(XMLParser *p, const char *s, size_t len)
 		return;
 
 	/* add only data from <name> inside <author> tag
-	 * or any other non-<author> tag  */
+	 * or any other non-<author> tag */
 	if (ctx.tagid != AtomTagAuthor || !strcmp(p->tag, "name"))
 		string_append(ctx.field, s, len);
 }
@@ -505,6 +505,12 @@ xml_handler_start_element_parsed(XMLParser *p, const char *tag, size_t taglen,
 	(void)tag;
 	(void)taglen;
 
+	if (ctx.iscontenttag) {
+		ctx.iscontent = 1;
+		ctx.iscontenttag = 0;
+		return;
+	}
+
 	if (!ISINCONTENT(ctx))
 		return;
 
@@ -537,11 +543,8 @@ xml_handler_attr(XMLParser *p, const char *tag, size_t taglen,
 			    isattr(value, valuelen, STRP("text/html"))))
 			{
 				ctx.item.contenttype = ContentTypeHTML;
-				ctx.iscontent = 1;
-/*				p->xmldataentity = NULL;*/ /* TODO: don't convert entities? test this */
 				p->xmlattrstart = xml_handler_attr_start;
 				p->xmlattrend = xml_handler_attr_end;
-				p->xmltagstartparsed = xml_handler_start_element_parsed;
 			}
 		} else if (ctx.tagid == AtomTagLink &&
 		          isattr(name, namelen, STRP("href")))
@@ -555,16 +558,8 @@ xml_handler_attr(XMLParser *p, const char *tag, size_t taglen,
 static void
 xml_handler_start_element(XMLParser *p, const char *name, size_t namelen)
 {
-	/* starts with div, handle as XML, don't convert entities
-	 * (set handler to NULL) */
-	/* TODO: this behaviour in the XML parser is changed, test this */
-	if (ISCONTENTTAG(ctx) && ctx.item.feedtype == FeedTypeAtom &&
-	   namelen == STRSIZ("div") && !strncmp(name, STRP("div"))) {
-		p->xmldataentity = NULL;
-	}
-	if (ctx.iscontent) {
+	if (ISINCONTENT(ctx)) {
 		ctx.attrcount = 0;
-		ctx.iscontenttag = 0;
 		xml_handler_data(p, "<", 1);
 		xml_handler_data(p, name, namelen);
 		return;
@@ -577,13 +572,11 @@ xml_handler_start_element(XMLParser *p, const char *name, size_t namelen)
 			ctx.item.feedtype = FeedTypeAtom;
 			/* default content type for Atom */
 			ctx.item.contenttype = ContentTypePlain;
-			ctx.field = NULL; /* XXX: optimization */
 		} else if (istag(name, namelen, STRP("item"))) {
 			/* RSS */
 			ctx.item.feedtype = FeedTypeRSS;
 			/* default content type for RSS */
 			ctx.item.contenttype = ContentTypeHTML;
-			ctx.field = NULL; /* XXX: optimization */
 		}
 		return;
 	}
@@ -655,6 +648,9 @@ xml_handler_data_entity(XMLParser *p, const char *data, size_t datalen)
 	char buffer[16];
 	int len;
 
+	if (!ctx.field)
+		return;
+
 	/* try to translate entity, else just pass as data to
 	 * xml_data_handler */
 	len = xml_entitytostr(data, buffer, sizeof(buffer));
@@ -683,12 +679,6 @@ xml_handler_end_element(XMLParser *p, const char *name, size_t namelen, int issh
 			ctx.tag[0] = '\0';
 			ctx.taglen = 0;
 			ctx.tagid = TagUnknown;
-
-			p->xmldataentity = xml_handler_data_entity;
-			p->xmlattrstart = NULL;
-			p->xmlattrend = NULL;
-			p->xmltagstartparsed = NULL;
-
 			return;
 		}
 		if (!isshort) {
@@ -715,26 +705,20 @@ xml_handler_end_element(XMLParser *p, const char *name, size_t namelen, int issh
 		string_clear(&ctx.item.content);
 		string_clear(&ctx.item.id);
 		string_clear(&ctx.item.author);
+
 		ctx.item.feedtype = FeedTypeNone;
-		ctx.item.contenttype = ContentTypePlain;
+		ctx.item.contenttype = ContentTypeNone;
+
 		ctx.tag[0] = '\0'; /* unset tag */
 		ctx.taglen = 0;
 		ctx.tagid = TagUnknown;
-
-		/* TODO: not sure if needed */
-		ctx.iscontenttag = 0;
-		ctx.iscontent = 0;
-	} else if (ctx.taglen == namelen && !strcmp(ctx.tag, name)) {
-		/* clear */
-		/* XXX: optimize ? */
 		ctx.field = NULL;
+	} else if (ctx.taglen == namelen && !strcmp(ctx.tag, name)) {
+		/* close field tag */
 		ctx.tag[0] = '\0'; /* unset tag */
 		ctx.taglen = 0;
 		ctx.tagid = TagUnknown;
-
-		/* TODO: not sure if needed */
-		ctx.iscontenttag = 0;
-		ctx.iscontent = 0;
+		ctx.field = NULL;
 	}
 }
 
@@ -751,16 +735,16 @@ main(int argc, char *argv[])
 	string_buffer_init(&ctx.item.content, 4096);
 	string_buffer_init(&ctx.item.id, 1024);
 	string_buffer_init(&ctx.item.author, 256);
-	ctx.item.contenttype = ContentTypePlain;
-	ctx.item.feedtype = FeedTypeNone;
 
 	memset(&parser, 0, sizeof(parser));
 	parser.xmltagstart = xml_handler_start_element;
+	parser.xmltagstartparsed = xml_handler_start_element_parsed;
 	parser.xmltagend = xml_handler_end_element;
 	parser.xmldata = xml_handler_data;
 	parser.xmldataentity = xml_handler_data_entity;
 	parser.xmlattr = xml_handler_attr;
 	parser.xmlcdata = xml_handler_cdata;
+
 	xmlparser_parse_fd(&parser, 0);
 
 	return 0;