sfeed

simple feed reader - forked from git.codemadness.org/sfeed
git clone git://src.gearsix.net/sfeed
Log | Files | Refs | Atom | README | LICENSE

commit 1519f5c5f209f3c213d7f18e60ef68b103b389ac
parent 1b4ca40c918664752bde2fa6d06f76a7585a4ecd
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Mon, 20 May 2013 19:25:51 +0200

update xml parser, many optimizations and dos to unix newlines, much cleanup todo though

Signed-off-by: Hiltjo Posthuma <hiltjo@codemadness.org>

Diffstat:
Mxml.c | 669+++++++++++++++++++++++++++++++++++++++----------------------------------------
Mxml.h | 73+++++++++++++++++++++++++++++++++++++------------------------------------
2 files changed, 366 insertions(+), 376 deletions(-)

diff --git a/xml.c b/xml.c @@ -1,340 +1,329 @@ -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <ctype.h> - -#include "xml.h" - -void -xmlparser_init(XMLParser *x) { - memset(x, 0, sizeof(XMLParser)); - x->fp = stdin; -} - -static int /* like getc(), but do some smart buffering */ -xmlparser_getnext(XMLParser *x) { - if(x->readoffset >= x->readlastbytes) { - if(feof(x->fp)) - return EOF; - x->readoffset = 0; - if(!(x->readlastbytes = fread(x->readbuf, 1, sizeof(x->readbuf), x->fp))) - return EOF; /* 0 bytes read, assume EOF */ - } - return (int)x->readbuf[x->readoffset++]; -} - -static int -xmlparser_parsedata(XMLParser *x, int c) { - size_t datalen = 0; - - if(x->xmldatastart) - x->xmldatastart(x); - do { - if(x->xmldataentity && c == '&') { /* TODO: test this, entity handler */ - x->data[datalen] = '\0'; - x->xmldata(x, x->data, datalen); - x->data[0] = c; - datalen = 1; - while((c = xmlparser_getnext(x)) != EOF) { - if(c == '<') { /* TODO: simplify? duplicate code. */ - goto parsedataend; -/* x->data[datalen] = '\0'; - if(x->xmldata) - x->xmldata(x, x->data, datalen); - if(x->xmldataend) - x->xmldataend(x); - return c;*/ - } - if(datalen < sizeof(x->data) - 1) - x->data[datalen++] = c; - if(isspace(c)) - break; - else if(c == ';') { - x->data[datalen] = '\0'; - x->xmldataentity(x, x->data, datalen); - datalen = 0; - break; - } - } - } else if(c == '<') { /* TODO: simplify? duplicate code. */ -parsedataend: - x->data[datalen] = '\0'; - if(x->xmldata) - x->xmldata(x, x->data, datalen); - if(x->xmldataend) - x->xmldataend(x); - return c; - } else { - if(datalen < sizeof(x->data) - 1) { - x->data[datalen++] = c; - } else { - x->data[datalen] = '\0'; /* TODO: overflow */ - if(x->xmldata) - x->xmldata(x, x->data, datalen); - x->data[0] = c; - datalen = 1; - } - } - } while((c = xmlparser_getnext(x)) != EOF); - return EOF; -} - -static void -xmlparser_parseattrvalue(XMLParser *x, const char *name, size_t namelen, int end) { - size_t valuelen = 0; - int c; - - if(x->xmlattrstart) - x->xmlattrstart(x, x->tag, x->taglen, name, namelen); - for(valuelen = 0; (c = xmlparser_getnext(x)) != EOF;) { - if(x->xmlattrentity && c == '&') { /* entities */ - x->data[valuelen] = '\0'; - /* call data function with data before entity if there is data */ - if(x->xmlattr && valuelen) - x->xmlattr(x, x->tag, x->taglen, name, namelen, x->data, valuelen); - x->data[0] = c; - valuelen = 1; - while((c = xmlparser_getnext(x)) != EOF) { - if(c == end) { /* TODO: simplify? duplicate code. */ - goto parseattrvalueend; -/* x->data[valuelen] = '\0'; - if(x->xmlattr) - x->xmlattr(x, x->tag, x->taglen, name, namelen, x->data, valuelen); - if(x->xmlattrend) - x->xmlattrend(x, x->tag, x->taglen, name, namelen); - return;*/ - } - if(valuelen < sizeof(x->data) - 1) - x->data[valuelen++] = c; - else { /* TODO: entity too long? this should be very strange. */ - x->data[valuelen] = '\0'; - if(x->xmlattr) - x->xmlattr(x, x->tag, x->taglen, name, namelen, x->data, valuelen); - valuelen = 0; /* TODO: incorrect ? ';' is read in c below? */ -/* x->data[0] = '\0'; */ - break; - } - if(c == ';') { - x->data[valuelen] = '\0'; - x->xmlattrentity(x, x->tag, x->taglen, name, namelen, x->data, valuelen); - valuelen = 0; /* TODO: incorrect ? ';' is read in c below? */ - break; - } - } - } else if(c == end) { /* TODO: simplify? duplicate code. */ -parseattrvalueend: - x->data[valuelen] = '\0'; - if(x->xmlattr) - x->xmlattr(x, x->tag, x->taglen, name, namelen, x->data, valuelen); - if(x->xmlattrend) - x->xmlattrend(x, x->tag, x->taglen, name, namelen); - return; - } else { - if(valuelen < sizeof(x->data) - 1) { /* TODO: overflow */ - x->data[valuelen++] = c; - } else { - x->data[valuelen] = '\0'; - if(x->xmlattr) - x->xmlattr(x, x->tag, x->taglen, name, namelen, x->data, valuelen); - x->data[0] = c; - valuelen = 1; - } - } - } -} - -static int -xmlparser_parseattrs(XMLParser *x) { - char name[1024]; /* TODO: dont overflow this bitch, also make it bigger perhaps? */ - size_t namelen = 0, valuelen = 0; - int c, shorttag = 0, endname = 0; - - while((c = xmlparser_getnext(x)) != EOF) { - if(isspace(c) && namelen) /* Do nothing */ - endname = 1; - if(isspace(c) || c == '?') { /* Do nothing */ - } else if(c == '=') { - name[namelen] = '\0'; - } else if(namelen && ((endname && isalpha(c)) || (c == '>' || c == '/'))) { - /* attribute without value */ - name[namelen] = '\0'; - if(x->xmlattrstart) - x->xmlattrstart(x, x->tag, x->taglen, name, namelen); - if(x->xmlattr) - x->xmlattr(x, x->tag, x->taglen, name, namelen, "", 0); - if(x->xmlattrend) - x->xmlattrend(x, x->tag, x->taglen, name, namelen); - endname = 0; - name[0] = c; - namelen = 1; - } else if(namelen && (c == '\'' || c == '"')) { - /* attribute with value */ - xmlparser_parseattrvalue(x, name, namelen, c); - namelen = 0; - valuelen = 0; - endname = 0; - } else if(namelen < sizeof(name) - 1) - name[namelen++] = c; - if(c == '>') { - break; - } else if(c == '/') { /* TODO: cleanup, ugly. */ - shorttag = 1; - namelen = 0; - name[0] = '\0'; - } - } - return shorttag; /* TODO: cleanup, ugly. */ -} - -static void -xmlparser_parsecomment(XMLParser *x) { - size_t datalen = 0, i = 0; - int c; - - if(x->xmlcommentstart) - x->xmlcommentstart(x); - while((c = xmlparser_getnext(x)) != EOF) { - if(c == '-' && i < 2) - i++; - else if(c == '>') { - if(i == 2) { /* (!memcmp(cd, "-->", strlen("-->"))) { */ - if(datalen >= 2) - datalen -= 2; - else - datalen = 0; - x->data[datalen] = '\0'; /* TODO: possible buffer underflow < 0 */ - if(x->xmlcomment) - x->xmlcomment(x, x->data, datalen); /* TODO: possible buffer underflow < 0 */ - if(x->xmlcommentend) - x->xmlcommentend(x); - break; - } - i = 0; - } - if(datalen < sizeof(x->data) - 1) { /* || (c == '-' && d >= sizeof(x->data) - 4)) { */ /* TODO: what if the end has --, and its cut on the boundary, test this. */ - x->data[datalen++] = c; - } else { - x->data[datalen] = '\0'; /* TODO: overflow */ - if(x->xmlcomment) - x->xmlcomment(x, x->data, datalen); - x->data[0] = c; - datalen = 1; - } - } -} - -static void -xmlparser_parsecdata(XMLParser *x) { - size_t datalen = 0, i = 0; - int c; - - if(x->xmlcdatastart) - x->xmlcdatastart(x); - while((c = xmlparser_getnext(x)) != EOF) { - if(c == ']' && i < 2) { - i++; - } else if(c == '>') { - if(i == 2) { /* (!memcmp(cd, "]]", strlen("]]"))) { */ - if(datalen >= 2) - datalen -= 2; - else - datalen = 0; - x->data[datalen] = '\0'; /* TODO: check d >= 3 */ /* TODO: buffer underflow */ - if(x->xmlcdata && datalen) - x->xmlcdata(x, x->data, datalen); /* TODO: buffer underflow */ - if(x->xmlcdataend) - x->xmlcdataend(x); - break; - } - i = 0; - } - if(datalen < sizeof(x->data) - 1) { /* TODO: what if the end has ]>, and its cut on the boundary */ - x->data[datalen++] = c; - } else { - x->data[datalen] = '\0'; - if(x->xmlcdata) - x->xmlcdata(x, x->data, datalen); - x->data[0] = c; - datalen = 1; - } - } -} - -static void -xmlparser_parsetag(XMLParser *x) { - size_t datalen, taglen; - int c, s, isshorttag = 0; - - x->tag[0] = '\0'; - x->taglen = 0; - while((c = xmlparser_getnext(x)) != EOF && isspace(c)); - if(c == '!') { - for(datalen = 0; (c = xmlparser_getnext(x)) != EOF;) { - if(datalen <= strlen("[CDATA[")) /* if(d < sizeof(x->data)) */ - x->data[datalen++] = c; /* TODO: prevent overflow */ - if(c == '>') - break; - else if(c == '-' && !memcmp(x->data, "--", strlen("--"))) { /* comment */ /* TODO: optimize this bitch */ - xmlparser_parsecomment(x); - break; - } else if(c == '[' && !memcmp(x->data, "[CDATA[", strlen("[CDATA["))) { /* cdata */ /* TODO: optimize this bitch */ - xmlparser_parsecdata(x); - break; - } - } - } else if(c == '?') { - while((c = xmlparser_getnext(x)) != EOF) { - if(c == '"' || c == '\'') { - s = c; - while((c = xmlparser_getnext(x)) != EOF) { - if(c == s) - break; - } - } else if(c == '>') - break; - } - } else if(c != EOF && c != '>') { - x->tag[0] = c; - taglen = 1; - while((c = xmlparser_getnext(x)) != EOF) { - if(c == '/') - isshorttag = 1; /* short tag */ - else if(isspace(c) || c == '>') { - x->tag[taglen] = '\0'; - if(x->tag[0] == '/') { /* end tag */ - x->taglen = --taglen; /* len -1 because of / */ - if(x->xmltagend) - x->xmltagend(x, &(x->tag)[1], x->taglen, 0); - } else { - x->taglen = taglen; - if(x->xmltagstart) - x->xmltagstart(x, x->tag, x->taglen); /* start tag */ - if(isspace(c) && xmlparser_parseattrs(x)) - isshorttag = 1; - if(x->xmltagstartparsed) - x->xmltagstartparsed(x, x->tag, x->taglen, isshorttag); - } - if(isshorttag && x->xmltagend) - x->xmltagend(x, x->tag, x->taglen, 1); - break; - } else if(taglen < sizeof(x->tag) - 1) - x->tag[taglen++] = c; /* TODO: prevent overflow */ - } - } -} - -void -xmlparser_parse(XMLParser *x) { - int c; - - while((c = xmlparser_getnext(x)) != EOF) { - /*if(isspace(c));*/ /* Do nothing */ - /*else*/ if(c == '<') /* tag */ - xmlparser_parsetag(x); - else { - xmlparser_parsedata(x, c); - xmlparser_parsetag(x); - } - } - return; -} +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <ctype.h> +#include "xml.h" + + +void +xmlparser_init(XMLParser *x) { + memset(x, 0, sizeof(XMLParser)); + x->fp = stdin; +} + +__inline__ int /* like getc(), but do some smart buffering */ +xmlparser_getnext(XMLParser *x) { + if(x->readoffset >= x->readlastbytes) { + x->readoffset = 0; + if(!(x->readlastbytes = fread(x->readbuf, 1, sizeof(x->readbuf), x->fp))) + return EOF; /* 0 bytes read, assume EOF */ + } + return (int)x->readbuf[x->readoffset++]; +} + +__inline__ void +xmlparser_parseattrvalue(XMLParser *x, const char *name, size_t namelen, int end) { + size_t valuelen = 0; + int c; + + if(x->xmlattrstart) + x->xmlattrstart(x, x->tag, x->taglen, name, namelen); + for(valuelen = 0; (c = xmlparser_getnext(x)) != EOF;) { + if(c == '&' && x->xmlattrentity) { /* entities */ + x->data[valuelen] = '\0'; + /* call data function with data before entity if there is data */ + if(valuelen && x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, name, namelen, x->data, valuelen); + x->data[0] = c; + valuelen = 1; + while((c = xmlparser_getnext(x)) != EOF) { + if(c == end) + goto parseattrvalueend; + if(valuelen < sizeof(x->data) - 1) + x->data[valuelen++] = c; + else { /* TODO: entity too long? this should be very strange. */ + x->data[valuelen] = '\0'; + if(x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, name, namelen, x->data, valuelen); + valuelen = 0; /* TODO: incorrect ? ';' is read in c below? */ +/* x->data[0] = '\0'; */ + break; + } + if(c == ';') { + x->data[valuelen] = '\0'; + x->xmlattrentity(x, x->tag, x->taglen, name, namelen, x->data, valuelen); + valuelen = 0; /* TODO: incorrect ? ';' is read in c below? */ + break; + } + } + } else if(c == end) { /* TODO: ugly, remove goto?, simplify? duplicate code. */ +parseattrvalueend: + x->data[valuelen] = '\0'; + if(x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, name, namelen, x->data, valuelen); + if(x->xmlattrend) + x->xmlattrend(x, x->tag, x->taglen, name, namelen); + return; + } else { + if(valuelen < sizeof(x->data) - 1) { + x->data[valuelen++] = c; + } else { + x->data[valuelen] = '\0'; + if(x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, name, namelen, x->data, valuelen); + x->data[0] = c; + valuelen = 1; + } + } + } +} + +__inline__ void +xmlparser_parseattrs(XMLParser *x, int *isshorttag) { + size_t namelen = 0; + int c, endname = 0; + + while((c = xmlparser_getnext(x)) != EOF) { + if(isspace(c)) { + if(namelen) /* Do nothing */ + endname = 1; + else + continue; + } + if(c == '?' && isspace(c)) { /* Do nothing */ + } else if(c == '=') { + x->name[namelen] = '\0'; + } else if(namelen && ((endname && isalpha(c)) || (c == '>' || c == '/'))) { + /* attribute without value */ + x->name[namelen] = '\0'; + if(x->xmlattrstart) + x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen); + if(x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0); + if(x->xmlattrend) + x->xmlattrend(x, x->tag, x->taglen, x->name, namelen); + endname = 0; + x->name[0] = c; + namelen = 1; + } else if(namelen && (c == '\'' || c == '"')) { + /* attribute with value */ + xmlparser_parseattrvalue(x, x->name, namelen, c); + namelen = 0; + endname = 0; + } else if(namelen < sizeof(x->name) - 1) + x->name[namelen++] = c; + if(c == '>') { + break; + } else if(c == '/') { + *isshorttag = 1; + namelen = 0; + x->name[0] = '\0'; + } + } +} + +__inline__ void +xmlparser_parsecomment(XMLParser *x) { + size_t datalen = 0, i = 0; + int c; + + if(x->xmlcommentstart) + x->xmlcommentstart(x); + while((c = xmlparser_getnext(x)) != EOF) { + if(c == '-' && i < 2) + i++; + else if(c == '>') { + if(i == 2) { /* (!memcmp(cd, "-->", strlen("-->"))) { */ + if(datalen >= 2) { + datalen -= 2; + x->data[datalen] = '\0'; + if(x->xmlcomment) + x->xmlcomment(x, x->data, datalen); +/* } else { + datalen = 0; + x->data[datalen] = '\0';*/ + } + if(x->xmlcommentend) + x->xmlcommentend(x); + break; + } + i = 0; + } + if(datalen < sizeof(x->data) - 1) { /* || (c == '-' && d >= sizeof(x->data) - 4)) { */ /* TODO: what if the end has --, and its cut on the boundary, test this. */ + x->data[datalen++] = c; + } else { + x->data[datalen] = '\0'; + if(x->xmlcomment) + x->xmlcomment(x, x->data, datalen); + x->data[0] = c; + datalen = 1; + } + } +} + +__inline__ void +xmlparser_parsecdata(XMLParser *x) { + size_t datalen = 0, i = 0; + int c; + + if(x->xmlcdatastart) + x->xmlcdatastart(x); + while((c = xmlparser_getnext(x)) != EOF) { + if(c == ']' && i < 2) { + i++; + } else if(c == '>') { + if(i == 2) { /* (!memcmp(cd, "]]", strlen("]]"))) { */ + if(datalen >= 2) { + datalen -= 2; + x->data[datalen] = '\0'; + if(x->xmlcdata) + x->xmlcdata(x, x->data, datalen); +/* } else { + datalen = 0; + x->data[datalen] = '\0';*/ + } + if(x->xmlcdataend) + x->xmlcdataend(x); + break; + } + i = 0; + } + if(datalen < sizeof(x->data) - 1) { /* TODO: what if the end has ]>, and its cut on the boundary */ + x->data[datalen++] = c; + } else { + x->data[datalen] = '\0'; + if(x->xmlcdata) + x->xmlcdata(x, x->data, datalen); + x->data[0] = c; + datalen = 1; + } + } +} + +__inline__ void +xmlparser_parsetag(XMLParser *x) { + size_t datalen, taglen; + int c, s, isshorttag = 0; + + x->tag[0] = '\0'; + x->taglen = 0; + while((c = xmlparser_getnext(x)) != EOF && isspace(c)); + if(c == '!') { + for(datalen = 0; (c = xmlparser_getnext(x)) != EOF;) { + if(datalen <= strlen("[CDATA[")) /* if(d < sizeof(x->data)) */ + x->data[datalen++] = c; /* TODO: prevent overflow */ + if(c == '>') + break; + else if(c == '-' && datalen == strlen("--") && + (x->data[0] == '-')) { /* comment */ /* TODO: optimize this bitch */ + xmlparser_parsecomment(x); + break; + } else if(c == '[' && datalen == strlen("[CDATA[") && + x->data[1] == 'C' && x->data[2] == 'D' && + x->data[3] == 'A' && x->data[4] == 'T' && + x->data[5] == 'A' && x->data[6] == '[') { /* cdata */ + xmlparser_parsecdata(x); + break; + } + } + } else if(c == '?') { + while((c = xmlparser_getnext(x)) != EOF) { + if(c == '"' || c == '\'') + for(s = c; (c = xmlparser_getnext(x)) != EOF && c != s;); + else if(c == '>') + break; + } + /* TODO: find out why checking isalpha(c) gives "not enough memory" + * also check if maybe when there is << or <> it might go into an infinite loop (unsure) */ + } else if(c != EOF && c != '>') { /* TODO: optimize and put above the other conditions ? */ + x->tag[0] = c; + taglen = 1; + while((c = xmlparser_getnext(x)) != EOF) { + if(c == '/') + isshorttag = 1; /* short tag */ + else if(c == '>' || isspace(c)) { + x->tag[taglen] = '\0'; + if(x->tag[0] == '/') { /* end tag, starts with </ */ + x->taglen = --taglen; /* len -1 because of / */ + if(x->xmltagend) + x->xmltagend(x, &(x->tag)[1], x->taglen, 0); + } else { + x->taglen = taglen; + if(x->xmltagstart) + x->xmltagstart(x, x->tag, x->taglen); /* start tag */ + if(isspace(c)) + xmlparser_parseattrs(x, &isshorttag); + if(x->xmltagstartparsed) + x->xmltagstartparsed(x, x->tag, x->taglen, isshorttag); + } + if(isshorttag && x->xmltagend) + x->xmltagend(x, x->tag, x->taglen, 1); + break; + } else if(taglen < sizeof(x->tag) - 1) + x->tag[taglen++] = c; + } + } +} + +void +xmlparser_parsedata(XMLParser *x, int c) { /* TODO: remove int c, ugly */ + size_t datalen = 0; + + if(x->xmldatastart) + x->xmldatastart(x); + do { + if(c == '&' && x->xmldataentity) { /* TODO: test this, entity handler */ + x->data[datalen] = '\0'; + x->xmldata(x, x->data, datalen); + x->data[0] = c; + datalen = 1; + while((c = xmlparser_getnext(x)) != EOF) { + if(c == '<') + goto parsedataend; + if(datalen < sizeof(x->data) - 1) + x->data[datalen++] = c; + if(isspace(c)) + break; + else if(c == ';') { + x->data[datalen] = '\0'; + x->xmldataentity(x, x->data, datalen); + datalen = 0; + break; + } + } + } else if(c == '<') { /* TODO: ugly, remove goto ? simplify? duplicate code. */ +parsedataend: + x->data[datalen] = '\0'; + if(x->xmldata) + x->xmldata(x, x->data, datalen); + if(x->xmldataend) + x->xmldataend(x); + break; + } else { + if(datalen < sizeof(x->data) - 1) { + x->data[datalen++] = c; + } else { + x->data[datalen] = '\0'; + if(x->xmldata) + x->xmldata(x, x->data, datalen); + x->data[0] = c; + datalen = 1; + } + } + } while((c = xmlparser_getnext(x)) != EOF); +} + +void +xmlparser_parse(XMLParser *x) { + int c; + + while((c = xmlparser_getnext(x)) != EOF) { + if(c == '<') /* tag */ + xmlparser_parsetag(x); + else { + xmlparser_parsedata(x, c); + xmlparser_parsetag(x); + } + } + return; +} diff --git a/xml.h b/xml.h @@ -1,36 +1,37 @@ -#include <stdio.h> -#include <string.h> -#include <stdlib.h> - -typedef struct xmlparser { - /* handlers */ - void (*xmltagstart)(struct xmlparser *p, const char *tag, size_t taglen); - void (*xmltagstartparsed)(struct xmlparser *p, const char *tag, size_t taglen, int isshort); - void (*xmltagend)(struct xmlparser *p, const char *tag, size_t taglen, int isshort); - void (*xmldatastart)(struct xmlparser *p); - void (*xmldata)(struct xmlparser *p, const char *data, size_t datalen); - void (*xmldataend)(struct xmlparser *p); - void (*xmldataentity)(struct xmlparser *p, const char *data, size_t datalen); - void (*xmlattrstart)(struct xmlparser *p, const char *tag, size_t taglen, const char *name, size_t namelen); - void (*xmlattr)(struct xmlparser *p, const char *tag, size_t taglen, const char *name, size_t namelen, const char *value, size_t valuelen); - void (*xmlattrend)(struct xmlparser *p, const char *tag, size_t taglen, const char *name, size_t namelen); - void (*xmlattrentity)(struct xmlparser *p, const char *tag, size_t taglen, const char *name, size_t namelen, const char *value, size_t valuelen); - void (*xmlcdatastart)(struct xmlparser *p); - void (*xmlcdata)(struct xmlparser *p, const char *data, size_t datalen); - void (*xmlcdataend)(struct xmlparser *p); - void (*xmlcommentstart)(struct xmlparser *p); - void (*xmlcomment)(struct xmlparser *p, const char *comment, size_t commentlen); - void (*xmlcommentend)(struct xmlparser *p); - - FILE *fp; /* stream to read from */ - /* private; internal state */ - char tag[1024]; /* current tag */ - size_t taglen; - char data[BUFSIZ]; /* data buffer used for tag and attribute data */ - size_t readoffset; - size_t readlastbytes; - unsigned char readbuf[BUFSIZ]; /* read buffer used by xmlparser_getnext() */ -} XMLParser; - -void xmlparser_init(XMLParser *x); -void xmlparser_parse(XMLParser *x); +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +typedef struct xmlparser { + /* handlers */ + void (*xmltagstart)(struct xmlparser *p, const char *tag, size_t taglen); + void (*xmltagstartparsed)(struct xmlparser *p, const char *tag, size_t taglen, int isshort); + void (*xmltagend)(struct xmlparser *p, const char *tag, size_t taglen, int isshort); + void (*xmldatastart)(struct xmlparser *p); + void (*xmldata)(struct xmlparser *p, const char *data, size_t datalen); + void (*xmldataend)(struct xmlparser *p); + void (*xmldataentity)(struct xmlparser *p, const char *data, size_t datalen); + void (*xmlattrstart)(struct xmlparser *p, const char *tag, size_t taglen, const char *name, size_t namelen); + void (*xmlattr)(struct xmlparser *p, const char *tag, size_t taglen, const char *name, size_t namelen, const char *value, size_t valuelen); + void (*xmlattrend)(struct xmlparser *p, const char *tag, size_t taglen, const char *name, size_t namelen); + void (*xmlattrentity)(struct xmlparser *p, const char *tag, size_t taglen, const char *name, size_t namelen, const char *value, size_t valuelen); + void (*xmlcdatastart)(struct xmlparser *p); + void (*xmlcdata)(struct xmlparser *p, const char *data, size_t datalen); + void (*xmlcdataend)(struct xmlparser *p); + void (*xmlcommentstart)(struct xmlparser *p); + void (*xmlcomment)(struct xmlparser *p, const char *comment, size_t commentlen); + void (*xmlcommentend)(struct xmlparser *p); + + FILE *fp; /* stream to read from */ + /* private; internal state */ + char tag[1024]; /* current tag */ + size_t taglen; + char name[256]; /* current attribute name */ + char data[BUFSIZ]; /* data buffer used for tag and attribute data */ + size_t readoffset; + size_t readlastbytes; + unsigned char readbuf[BUFSIZ]; /* read buffer used by xmlparser_getnext() */ +} XMLParser; + +void xmlparser_init(XMLParser *x); +void xmlparser_parse(XMLParser *x);