sfeed

simple feed reader - forked from git.codemadness.org/sfeed
git clone git://src.gearsix.net/sfeed
Log | Files | Refs | Atom | README | LICENSE

commit d8b0c45812890670943becd45383f75d57056e52
parent 1fa71087c9d754b687d52059ee88ca82b45ec1eb
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Mon, 31 Mar 2014 22:46:58 +0200

new version

lots of things changed, but cleanup todo. changelog and consistent stream of small updates will come in the future.

Signed-off-by: Hiltjo Posthuma <hiltjo@codemadness.org>

Diffstat:
MCHANGELOG | 10+++++++++-
MLICENSE | 9++++++---
MMakefile | 63+++++++++++++++++++++++++++++++++++++++++----------------------
MREADME | 21+--------------------
Dcommon.c | 128-------------------------------------------------------------------------------
Dcommon.h | 18------------------
Dcompat.c | 41-----------------------------------------
Dcompat.h | 17-----------------
Mconfig.mk | 6+++---
Msfeed.c | 305++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------
Msfeed_frames.c | 344++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
Msfeed_html.c | 76++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Msfeed_opml_import.c | 20++++++++++----------
Msfeed_plain.c | 14+++++++-------
Asfeed_stats.c | 91+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msfeed_update | 67++++++++++++++++++++++++++++++++++++++-----------------------------
Msfeed_update.1 | 6+++---
Asfeed_web.c | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asfeed_xmlenc.c | 45+++++++++++++++++++++++++++++++++++++++++++++
Msfeedrc.example | 2+-
Mstyle.css | 11+++++++++++
Autil.c | 202+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Autil.h | 26++++++++++++++++++++++++++
Mxml.c | 375++++++++++++++++++++++++++++++++++++++-----------------------------------------
Mxml.h | 4+++-
25 files changed, 1203 insertions(+), 770 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG @@ -3,9 +3,17 @@ v0.9 Features: --------- + + * Feeds are now by default updated in parallel for a huge speedup in performance. + * Added hotkeys to sfeed_html to toggle showing only new items (n key) and + focusing the menu (s key) or items (i key) using a tiny bit of + javascript. + * Auto-detect XML encoding. + + * Added sfeed_frames, a formatting program to output feeds as a HTML file with frames. It's optimized to look good in older browsers that don't necesarily - support CSS or modern HTML like links. See the man page for more details. + support CSS or modern HTML, like links. See the man page for more details. * Removed the dependency on libexpat, using a custom XML parser (xml.*), this parser is also non-validating, it will not check the XML for errors or stop parsing if it contains errors (which is good). diff --git a/LICENSE b/LICENSE @@ -1,6 +1,6 @@ MIT/X Consortium License -© 2011-2013 Hiltjo Posthuma <hiltjo@codemadness.org> +© 2011-214 Hiltjo Posthuma <hiltjo@codemadness.org> Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), @@ -21,9 +21,12 @@ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -tm_to_time and str*case functions from the Musl project, it's license is: +tmtotime and some libc functions: +str*case, strlcpy and macros in compat.c -Copyright © 2005-2012 Rich Felker +from the Musl project, it's license is: + +Copyright © 2005-2013 Rich Felker Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/Makefile b/Makefile @@ -3,10 +3,12 @@ include config.mk NAME = sfeed -SRC = sfeed.c sfeed_plain.c sfeed_html.c sfeed_opml_import.c xml.c sfeed_frames.c common.c compat.c +SRC = sfeed.c sfeed_plain.c sfeed_html.c sfeed_opml_import.c sfeed_frames.c \ + sfeed_xmlenc.c sfeed_web.c xml.c OBJ = ${SRC:.c=.o} -all: options sfeed sfeed_plain sfeed_html sfeed_opml_import sfeed_frames +all: options sfeed sfeed_plain sfeed_html sfeed_opml_import sfeed_frames \ + sfeed_xmlenc sfeed_web options: @echo ${NAME} build options: @@ -20,37 +22,46 @@ options: ${OBJ}: config.mk -sfeed: sfeed.o xml.o compat.o +sfeed: sfeed.o xml.o util.o @echo CC -o $@ - @${CC} -o $@ sfeed.o xml.o compat.o ${LDFLAGS} + @${CC} -o $@ sfeed.o xml.o util.o ${LDFLAGS} -sfeed_opml_import: sfeed_opml_import.o xml.o compat.o +sfeed_opml_import: sfeed_opml_import.o xml.o @echo CC -o $@ - @${CC} -o $@ sfeed_opml_import.o xml.o compat.o ${LDFLAGS} + @${CC} -o $@ sfeed_opml_import.o xml.o ${LDFLAGS} -sfeed_plain: sfeed_plain.o common.o compat.o +sfeed_plain: sfeed_plain.o util.o @echo CC -o $@ - @${CC} -o $@ sfeed_plain.o common.o compat.o ${LDFLAGS} + @${CC} -o $@ sfeed_plain.o util.o ${LDFLAGS} -sfeed_html: sfeed_html.o common.o compat.o +sfeed_html: sfeed_html.o util.o @echo CC -o $@ - @${CC} -o $@ sfeed_html.o common.o compat.o ${LDFLAGS} + @${CC} -o $@ sfeed_html.o util.o ${LDFLAGS} -sfeed_frames: sfeed_frames.o common.o compat.o +sfeed_frames: sfeed_frames.o util.o @echo CC -o $@ - @${CC} -o $@ sfeed_frames.o common.o compat.o ${LDFLAGS} + @${CC} -o $@ sfeed_frames.o util.o ${LDFLAGS} + +sfeed_xmlenc: sfeed_xmlenc.o xml.o + @echo CC -o $@ + @${CC} -o $@ sfeed_xmlenc.o xml.o ${LDFLAGS} + +sfeed_web: sfeed_web.o xml.o util.o + @echo CC -o $@ + @${CC} -o $@ sfeed_web.o xml.o util.o ${LDFLAGS} clean: @echo cleaning - @rm -f sfeed sfeed_plain sfeed_html sfeed_frames sfeed_opml_import ${OBJ} ${NAME}-${VERSION}.tar.gz + @rm -f sfeed sfeed_plain sfeed_html sfeed_frames sfeed_opml_import \ + ${OBJ} ${NAME}-${VERSION}.tar.gz dist: clean @echo creating dist tarball @mkdir -p ${NAME}-${VERSION} @cp -R CHANGELOG LICENSE Makefile README config.mk \ - TODO CREDITS sfeedrc.example style.css ${SRC} common.c sfeed_update sfeed_opml_export \ + TODO CREDITS sfeedrc.example style.css ${SRC} sfeed_update \ sfeed.1 sfeed_update.1 sfeed_plain.1 sfeed_html.1 sfeed_opml_import.1 \ - sfeed_frames.c sfeed_frames.1 sfeed_opml_export.1 ${NAME}-${VERSION} + sfeed_frames.1 sfeed_opml_export sfeed_opml_export.1 ${NAME}-${VERSION} @tar -cf ${NAME}-${VERSION}.tar ${NAME}-${VERSION} @gzip ${NAME}-${VERSION}.tar @rm -rf ${NAME}-${VERSION} @@ -58,13 +69,14 @@ dist: clean install: all @echo installing executable file to ${DESTDIR}${PREFIX}/bin @mkdir -p ${DESTDIR}${PREFIX}/bin - @cp -f sfeed sfeed_update sfeed_plain sfeed_html sfeed_frames \ + @cp -f sfeed sfeed_update sfeed_plain sfeed_html sfeed_frames sfeed_xmlenc \ sfeed_opml_import sfeed_opml_export ${DESTDIR}${PREFIX}/bin @chmod 755 ${DESTDIR}${PREFIX}/bin/sfeed \ ${DESTDIR}${PREFIX}/bin/sfeed_update \ ${DESTDIR}${PREFIX}/bin/sfeed_plain \ ${DESTDIR}${PREFIX}/bin/sfeed_html \ ${DESTDIR}${PREFIX}/bin/sfeed_frames \ + ${DESTDIR}${PREFIX}/bin/sfeed_xmlenc \ ${DESTDIR}${PREFIX}/bin/sfeed_opml_import \ ${DESTDIR}${PREFIX}/bin/sfeed_opml_export @mkdir -p ${DESTDIR}${PREFIX}/share/sfeed @@ -73,12 +85,18 @@ install: all @echo installing manual pages to ${DESTDIR}${MANPREFIX}/man1 @mkdir -p ${DESTDIR}${MANPREFIX}/man1 @sed "s/VERSION/${VERSION}/g" < sfeed.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed.1 - @sed "s/VERSION/${VERSION}/g" < sfeed_update.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_update.1 - @sed "s/VERSION/${VERSION}/g" < sfeed_plain.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1 - @sed "s/VERSION/${VERSION}/g" < sfeed_html.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_html.1 - @sed "s/VERSION/${VERSION}/g" < sfeed_frames.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_frames.1 - @sed "s/VERSION/${VERSION}/g" < sfeed_opml_import.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_import.1 - @sed "s/VERSION/${VERSION}/g" < sfeed_opml_export.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_export.1 + @sed "s/VERSION/${VERSION}/g" < sfeed_update.1 > \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_update.1 + @sed "s/VERSION/${VERSION}/g" < sfeed_plain.1 > \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1 + @sed "s/VERSION/${VERSION}/g" < sfeed_html.1 > \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_html.1 + @sed "s/VERSION/${VERSION}/g" < sfeed_frames.1 > \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_frames.1 + @sed "s/VERSION/${VERSION}/g" < sfeed_opml_import.1 > \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_import.1 + @sed "s/VERSION/${VERSION}/g" < sfeed_opml_export.1 > \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_export.1 @chmod 644 ${DESTDIR}${MANPREFIX}/man1/sfeed.1 \ ${DESTDIR}${MANPREFIX}/man1/sfeed_update.1 \ ${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1 \ @@ -94,6 +112,7 @@ uninstall: ${DESTDIR}${PREFIX}/bin/sfeed_plain \ ${DESTDIR}${PREFIX}/bin/sfeed_html \ ${DESTDIR}${PREFIX}/bin/sfeed_frames \ + ${DESTDIR}${PREFIX}/bin/sfeed_xmlenc \ ${DESTDIR}${PREFIX}/bin/sfeed_opml_import \ ${DESTDIR}${PREFIX}/bin/sfeed_opml_export \ ${DESTDIR}${PREFIX}/share/${NAME}/sfeedrc.example \ diff --git a/README b/README @@ -1,4 +1,4 @@ -sfeed v0.8 +sfeed v0.9 ---------- Simple RSS and Atom parser (and some format programs). @@ -144,25 +144,6 @@ gawk -F '\t' 'BEGIN { mv feeds.clean feeds -Common errors and solutions ---------------------------- - -If you execute sfeed_update and see the error: - - "sfeed: error parsing xml not well-formed (invalid token) at line <linenumber> - column <column>" - -it's possible sfeed is trying to parse a feed which is non-UTF8 encoded. You can -specify the encoding in your sfeedrc file so it will be converted to UTF-8 -using iconv, for example for iso-8859-1 change: - - feed "feedname" "feedurl" "baseurl" - -to: - - feed "feedname" "feedurl" "baseurl" "iso-8859-1" - - License ------- diff --git a/common.c b/common.c @@ -1,128 +0,0 @@ -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <time.h> -#include <ctype.h> -#include "common.h" - -char * -afgets(char **p, size_t *size, FILE *fp) { - char buf[BUFSIZ], *alloc = NULL; - size_t n, len = 0, allocsiz; - int end = 0; - - while(fgets(buf, sizeof(buf), fp)) { - n = strlen(buf); - if(buf[n - 1] == '\n') { /* dont store newlines. */ - buf[n - 1] = '\0'; - n--; - end = 1; /* newline found, end */ - } - len += n; - allocsiz = len + 1; - if(allocsiz > *size) { - if((alloc = realloc(*p, allocsiz))) { - *p = alloc; - *size = allocsiz; - } else { - free(*p); - *p = NULL; - fputs("error: could not realloc\n", stderr); - exit(EXIT_FAILURE); - return NULL; - } - } - strncpy((*p + (len - n)), buf, n); - if(end || feof(fp)) - break; - } - if(*p && len > 0) { - (*p)[len] = '\0'; - return *p; - } - return NULL; -} - -void /* print link; if link is relative use baseurl to make it absolute */ -printlink(const char *link, const char *baseurl, FILE *fp) { - const char *ebaseproto, *ebasedomain, *p; - int isrelative; - - /* protocol part */ - for(p = link; *p && (isalpha((int)*p) || isdigit((int)*p) || *p == '+' || *p == '-' || *p == '.'); p++); - isrelative = strncmp(p, "://", strlen("://")); - if(isrelative) { /* relative link (baseurl is used). */ - if((ebaseproto = strstr(baseurl, "://"))) { - ebaseproto += strlen("://"); - fwrite(baseurl, 1, ebaseproto - baseurl, fp); - } else { - ebaseproto = baseurl; - if(*baseurl || (link[0] == '/' && link[1] == '/')) - fputs("http://", fp); - } - if(link[0] == '/') { /* relative to baseurl domain (not path). */ - if(link[1] == '/') /* absolute url but with protocol from baseurl. */ - link += 2; - else if((ebasedomain = strchr(ebaseproto, '/'))) /* relative to baseurl and baseurl path. */ - fwrite(ebaseproto, 1, ebasedomain - ebaseproto, fp); - else - fputs(ebaseproto, stdout); - } else if((ebasedomain = strrchr(ebaseproto, '/'))) /* relative to baseurl and baseurl path. */ - fwrite(ebaseproto, 1, ebasedomain - ebaseproto + 1, fp); - else { - fputs(ebaseproto, fp); - if(*baseurl && *link) - fputc('/', fp); - } - } - fputs(link, fp); -} - -unsigned int -parseline(char **line, size_t *size, char **fields, unsigned int maxfields, int separator, FILE *fp) { - unsigned int i = 0; - char *prev, *s; - - if(afgets(line, size, fp)) { - for(prev = *line; (s = strchr(prev, separator)) && i <= maxfields; i++) { - *s = '\0'; /* null terminate string. */ - fields[i] = prev; - prev = s + 1; - } - fields[i] = prev; - for(i++; i < maxfields; i++) /* make non-parsed fields empty. */ - fields[i] = ""; - } - return i; -} - -/* print feed name for id; spaces and tabs in string as "-" (spaces in anchors are not valid). */ -void -printfeednameid(const char *s, FILE *fp) { - for(; *s; s++) - fputc(isspace((int)*s) ? '-' : tolower((int)*s), fp); -} - -void -printhtmlencoded(const char *s, FILE *fp) { - for(; *s; s++) { - switch(*s) { - case '<': fputs("&lt;", fp); break; - case '>': fputs("&gt;", fp); break; -/* case '&': fputs("&amp;", fp); break;*/ - default: - fputc(*s, fp); - } - } -} - -void -feedsfree(struct feed *f) { - struct feed *next; - while(f) { - next = f->next; - free(f->name); - free(f); - f = next; - } -} diff --git a/common.h b/common.h @@ -1,18 +0,0 @@ -/* Feed info. */ -struct feed { - char *name; /* feed name */ - unsigned long totalnew; /* amount of new items per feed */ - unsigned long total; /* total items */ - struct feed *next; /* linked list */ -}; - -enum { FieldUnixTimestamp = 0, FieldTimeFormatted, FieldTitle, FieldLink, - FieldContent, FieldContentType, FieldId, FieldAuthor, FieldFeedType, - FieldFeedName, FieldFeedUrl, FieldBaseSiteUrl, FieldLast }; - -char * afgets(char **p, size_t *size, FILE *fp); -void feedsfree(struct feed *f); -unsigned int parseline(char **line, size_t *size, char **fields, unsigned int maxfields, int separator, FILE *fp); -void printfeednameid(const char *s, FILE *fp); -void printhtmlencoded(const char *s, FILE *fp); -void printlink(const char *link, const char *baseurl, FILE *fp); diff --git a/compat.c b/compat.c @@ -1,41 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <ctype.h> -#include <sys/stat.h> -#include <sys/types.h> - -int -xstrcasecmp(const char *_l, const char *_r) { - const unsigned char *l = (void *)_l, *r = (void *)_r; - for(; *l && *r && (*l == *r || tolower(*l) == tolower(*r)); l++, r++); - return tolower(*l) - tolower(*r); -} - -int -xstrncasecmp(const char *_l, const char *_r, size_t n) { - const unsigned char *l=(void *)_l, *r=(void *)_r; - if(!n--) - return 0; - for(; *l && *r && n && (*l == *r || tolower(*l) == tolower(*r)); l++, r++, n--); - return tolower(*l) - tolower(*r); -} - -void * -xstrdup(const char *s) { - size_t len = strlen(s) + 1; - void *p = malloc(len); - if(p) - memcpy(p, s, len); - return p; -} - -int -xmkdir(const char *path, mode_t mode) { -/* TODO: fix for mingw */ -#if MINGW - return mkdir(path); -#else - return mkdir(path, mode); -#endif -} diff --git a/compat.h b/compat.h @@ -1,17 +0,0 @@ -#if 1 -#include <strings.h> -#include <string.h> -#define xstrcasecmp strcasecmp -#define xstrncasecmp strncasecmp -#else -int xstrcasecmp(const char *s1, const char *s2); -int xstrncasecmp(const char *s1, const char *s2, size_t len); -#endif - -/* non-ansi */ -void * xstrdup(const char *s); - -/* for mingw */ -#include <sys/stat.h> -#include <sys/types.h> -int xmkdir(const char *path, mode_t mode); diff --git a/config.mk b/config.mk @@ -1,5 +1,5 @@ # sfeed version -VERSION = 0.8 +VERSION = 0.9 # customize below to fit your system @@ -12,12 +12,12 @@ INCS = LIBS = -lc # debug -#CFLAGS = -fstack-protector-all -O0 -g -ansi -Wall -Wextra -pedantic -DVERSION=\"${VERSION}\" +CFLAGS = -fstack-protector-all -O0 -g -ansi -Wall -Wextra -pedantic -DVERSION=\"${VERSION}\" CFLAGS = -O0 -g -ansi -Wall -Wextra -pedantic -DVERSION=\"${VERSION}\" LDFLAGS = ${LIBS} # optimized -#CFLAGS = -O2 -ansi -DVERSION=\"${VERSION}\" -DVERSION=\"${VERSION}\" +#CFLAGS = -O2 -ansi -DVERSION=\"${VERSION}\" #LDFLAGS = -s ${LIBS} # Solaris diff --git a/sfeed.c b/sfeed.c @@ -1,10 +1,14 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <strings.h> #include <time.h> #include <ctype.h> + +#include "util.h" #include "xml.h" -#include "compat.h" + +#define ISWSNOSPACE(c) (((unsigned)c - '\t') < 5) /* isspace(c) && c != ' ' */ enum { FeedTypeNone = 0, FeedTypeRSS = 1, FeedTypeAtom = 2 }; const char *feedtypes[] = { "", "rss", "atom" }; @@ -12,6 +16,20 @@ const char *feedtypes[] = { "", "rss", "atom" }; enum { ContentTypeNone = 0, ContentTypePlain = 1, ContentTypeHTML = 2 }; const char *contenttypes[] = { "", "plain", "html" }; +const int FieldSeparator = '\t'; /* output field seperator character */ + +enum { + TagUnknown = 0, + /* RSS */ + RSSTagDcdate, RSSTagPubdate, RSSTagTitle, + RSSTagLink, RSSTagDescription, RSSTagContentencoded, + RSSTagGuid, RSSTagAuthor, RSSTagDccreator, + /* Atom */ + AtomTagPublished, AtomTagUpdated, AtomTagTitle, + AtomTagSummary, AtomTagContent, + AtomTagId, AtomTagLink, AtomTagAuthor +}; + typedef struct string { /* String data / pool */ char *data; /* data */ size_t len; /* string length */ @@ -29,41 +47,29 @@ typedef struct feeditem { /* Feed item */ int feedtype; /* FeedTypeRSS or FeedTypeAtom */ } FeedItem; -void die(const char *s); -void cleanup(void); - -String *currentfield = NULL; /* TODO */ -const int FieldSeparator = '\t'; -FeedItem feeditem; /* data for current feed item */ -char feeditemtag[256] = ""; /* current tag _inside_ a feeditem */ -size_t feeditemtaglen = 0; -int feeditemtagid = 0; -int iscontent = 0; -int iscontenttag = 0; -size_t attrcount = 0; -char *standardtz = NULL; /* TZ variable at start of program */ -XMLParser parser; /* XML parser state */ - -enum { - TagUnknown = 0, - /* RSS */ - RSSTagDcdate, RSSTagPubdate, RSSTagTitle, - RSSTagLink, RSSTagDescription, RSSTagContentencoded, - RSSTagGuid, RSSTagAuthor, RSSTagDccreator, - /* Atom */ - AtomTagPublished, AtomTagUpdated, AtomTagTitle, - AtomTagSummary, AtomTagContent, - AtomTagId, AtomTagLink, AtomTagAuthor -}; - typedef struct feedtag { char *name; size_t namelen; int id; } FeedTag; +static void die(const char *s); +static void cleanup(void); + +static String *currentfield = NULL; /* pointer to current FeedItem field String */ +static FeedItem feeditem; /* data for current feed item */ +static char feeditemtag[256] = ""; /* current tag _inside_ a feeditem */ +static size_t feeditemtaglen = 0; +static int feeditemtagid = 0; /* unique number for parsed tag (faster comparison) */ +static int iscontent = 0; +static int iscontenttag = 0; +static size_t attrcount = 0; +static char *standardtz = NULL; /* TZ variable at start of program */ +static XMLParser parser; /* XML parser state */ +static char *append = NULL; + /* TODO: optimize lookup */ -int +static int /* unique number for parsed tag (faster comparison) */ gettag(int feedtype, const char *name, size_t namelen) { /* RSS, alphabetical order */ static FeedTag rsstag[] = { @@ -91,11 +97,11 @@ gettag(int feedtype, const char *name, size_t namelen) { { NULL, 0, -1 } }; int i, n; - + if(namelen >= 2 && namelen <= 15) { if(feedtype == FeedTypeRSS) { for(i = 0; rsstag[i].name; i++) { - if(!(n = xstrncasecmp(rsstag[i].name, name, rsstag[i].namelen))) + if(!(n = strncasecmp(rsstag[i].name, name, rsstag[i].namelen))) return rsstag[i].id; /* optimization: it's sorted so nothing after it matches. */ if(n > 0) @@ -103,7 +109,7 @@ gettag(int feedtype, const char *name, size_t namelen) { } } else if(feedtype == FeedTypeAtom) { for(i = 0; atomtag[i].name; i++) { - if(!(n = xstrncasecmp(atomtag[i].name, name, atomtag[i].namelen))) + if(!(n = strncasecmp(atomtag[i].name, name, atomtag[i].namelen))) return atomtag[i].id; /* optimization: it's sorted so nothing after it matches. */ if(n > 0) @@ -114,8 +120,21 @@ gettag(int feedtype, const char *name, size_t namelen) { return TagUnknown; } -int -entitytostr(const char *e, char *buffer, size_t bufsiz) { +static unsigned long +codepointtoutf8(unsigned long cp) { + if(cp >= 0x10000) /* 4 bytes */ + return 0xf0808080 | ((cp & 0xfc0000) << 6) | ((cp & 0x3f000) << 4) | + ((cp & 0xfc0) << 2) | (cp & 0x3f); + else if(cp >= 0x00800) /* 3 bytes */ + return 0xe08080 | ((cp & 0x3f000) << 4) | ((cp & 0xfc0) << 2) | + (cp & 0x3f); + else if(cp >= 0x80) /* 2 bytes */ + return 0xc080 | ((cp & 0xfc0) << 2) | (cp & 0x3f); + return cp; /* 1 byte */ +} + +static int +namedentitytostr(const char *e, char *buffer, size_t bufsiz) { /* TODO: optimize lookup? */ char *entities[6][2] = { { "&lt;", "<" }, @@ -130,7 +149,7 @@ entitytostr(const char *e, char *buffer, size_t bufsiz) { return 0; for(i = 0; entities[i][0]; i++) { /* NOTE: compares max 7 chars */ - if(!xstrncasecmp(e, entities[i][0], 6)) { + if(!strncasecmp(e, entities[i][0], 6)) { buffer[0] = *(entities[i][1]); buffer[1] = '\0'; return 1; @@ -139,7 +158,49 @@ entitytostr(const char *e, char *buffer, size_t bufsiz) { return 0; } -void +static int +entitytostr(const char *e, char *buffer, size_t bufsiz) { + unsigned long l = 0, cp = 0; + if(*e != '&' || bufsiz < 5) /* doesnt start with & */ + return 0; + e++; + if(*e == '#') { + e++; + if(*e == 'x') { + e++; + l = strtol(e, NULL, 16); /* hex */ + } else + l = strtol(e, NULL, 10); /* decimal */ + if((cp = codepointtoutf8(l))) { + buffer[0] = l & 0xff; + buffer[1] = (l >> 8) & 0xff; + buffer[2] = (l >> 16) & 0xff; + buffer[3] = (l >> 24) & 0xff; + buffer[4] = '\0'; + /* escape whitespace */ + if(ISWSNOSPACE(buffer[0])) { /* isspace(c) && c != ' ' */ + if(buffer[0] == '\n') { /* escape newline */ + buffer[0] = '\\'; + buffer[1] = 'n'; + buffer[2] = '\0'; + } else if(buffer[0] == '\\') { /* escape \ */ + buffer[0] = '\\'; + buffer[1] = '\\'; + buffer[2] = '\0'; + } else if(buffer[0] == '\t') { /* tab */ + buffer[0] = '\\'; + buffer[1] = 't'; + buffer[2] = '\0'; + } + } + } + return 1; + } else /* named entity */ + return namedentitytostr(e, buffer, bufsiz); + return 0; +} + +static void string_clear(String *s) { if(s->data) s->data[0] = '\0'; /* clear string only; don't free, prevents @@ -147,7 +208,7 @@ string_clear(String *s) { s->len = 0; } -void +static void string_buffer_init(String *s, size_t len) { if(!(s->data = malloc(len))) die("can't allocate enough memory"); @@ -155,7 +216,7 @@ string_buffer_init(String *s, size_t len) { string_clear(s); } -void +static void string_free(String *s) { free(s->data); s->data = NULL; @@ -163,12 +224,10 @@ string_free(String *s) { s->len = 0; } -int -string_buffer_expand(String *s, size_t newlen) { +static int +string_buffer_realloc(String *s, size_t newlen) { char *p; size_t alloclen; - /* check if allocation is necesary, dont shrink buffer - should be more than bufsiz ofcourse */ for(alloclen = 16; alloclen <= newlen; alloclen *= 2); if(!(p = realloc(s->data, alloclen))) { string_free(s); /* free previous allocation */ @@ -179,18 +238,20 @@ string_buffer_expand(String *s, size_t newlen) { return s->bufsiz; } -void +static void string_append(String *s, const char *data, size_t len) { if(!len || *data == '\0') return; + /* check if allocation is necesary, dont shrink buffer + should be more than bufsiz ofcourse */ if(s->len + len > s->bufsiz) - string_buffer_expand(s, s->len + len); + string_buffer_realloc(s, s->len + len); memcpy(s->data + s->len, data, len); s->len += len; s->data[s->len] = '\0'; } -void /* cleanup parser, free allocated memory, etc */ +static void /* cleanup, free allocated memory, etc */ cleanup(void) { string_free(&feeditem.timestamp); string_free(&feeditem.title); @@ -200,7 +261,7 @@ cleanup(void) { string_free(&feeditem.author); } -void /* print error message to stderr */ +static void /* print error message to stderr */ die(const char *s) { fputs("sfeed: ", stderr); fputs(s, stderr); @@ -210,7 +271,7 @@ die(const char *s) { /* get timezone from string, return as formatted string and time offset, * for the offset it assumes GMT */ -int +static int gettimetz(const char *s, char *buf, size_t bufsiz) { const char *p = s; char tzname[16] = "", *t = NULL; @@ -239,7 +300,7 @@ gettimetz(const char *s, char *buf, size_t bufsiz) { } else memcpy(tzname, "GMT", strlen("GMT") + 1); if(!(*p)) { - strncpy(buf, tzname, bufsiz); + strlcpy(buf, tzname, bufsiz); /* TODO: dont depend on strlcpy? */ return 0; } if((sscanf(p, "%c%02d:%02d", &c, &tzhour, &tzmin)) > 0); @@ -254,31 +315,30 @@ gettimetz(const char *s, char *buf, size_t bufsiz) { /* parses everything in a format similar to: * "%a, %d %b %Y %H:%M:%S" or "%Y-%m-%d %H:%M:%S" */ /* TODO: calculate time offset (GMT only) from gettimetz ? */ -int +static int parsetimeformat(const char *s, struct tm *t, const char **end) { - static const char *months[] = { + const char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; - const char *p = s; unsigned int i, fm; unsigned long l; memset(t, 0, sizeof(struct tm)); - if((l = strtoul(p, (void *)&p, 10))) { + if((l = strtoul(s, (void *)&s, 10))) { t->tm_year = abs(l) - 1900; - if(!(l = strtoul(p, (void *)&p, 10))) + if(!(l = strtoul(s, (void *)&s, 10))) return 0; t->tm_mon = abs(l) - 1; - if(!(t->tm_mday = abs(strtoul(p, (void *)&p, 10)))) + if(!(t->tm_mday = abs(strtoul(s, (void *)&s, 10)))) return 0; } else { - for(; *p && !isdigit((int)*p); p++); - if(!(t->tm_mday = abs(strtoul(p, (void *)&p, 10)))) + for(; *s && !isdigit((int)*s); s++); + if(!(t->tm_mday = abs(strtoul(s, (void *)&s, 10)))) return 0; - for(; *p && !isalpha((int)*p); p++); /* skip non-alpha */ + for(; *s && !isalpha((int)*s); s++); /* skip non-alpha */ for(fm = 0, i = 0; i < 12; i++) { /* parse month names */ - if(!xstrncasecmp(p, months[i], 3)) { + if(!strncasecmp(s, months[i], 3)) { t->tm_mon = i; fm = 1; break; @@ -286,22 +346,22 @@ parsetimeformat(const char *s, struct tm *t, const char **end) { } if(!fm) /* can't find month */ return 0; - for(; *p && !isdigit((int)*p); p++); /* skip non-digit */ - if(!(l = strtoul(p, (void *)&p, 10))) + for(; *s && !isdigit((int)*s); s++); /* skip non-digit */ + if(!(l = strtoul(s, (void *)&s, 10))) return 0; t->tm_year = abs(l) - 1900; } - for(; *p && !isdigit((int)*p); p++); /* skip non-digit */ - if((t->tm_hour = abs(strtoul(p, (void *)&p, 10))) > 23) + for(; *s && !isdigit((int)*s); s++); /* skip non-digit */ + if((t->tm_hour = abs(strtoul(s, (void *)&s, 10))) > 23) return 0; - for(; *p && !isdigit((int)*p); p++); /* skip non-digit */ - if((t->tm_min = abs(strtoul(p, (void *)&p, 10))) > 59) + for(; *s && !isdigit((int)*s); s++); /* skip non-digit */ + if((t->tm_min = abs(strtoul(s, (void *)&s, 10))) > 59) return 0; - for(; *p && !isdigit((int)*p); p++); /* skip non-digit */ - if((t->tm_sec = abs(strtoul(p, (void *)&p, 10))) > 60) + for(; *s && !isdigit((int)*s); s++); /* skip non-digit */ + if((t->tm_sec = abs(strtoul(s, (void *)&s, 10))) > 60) return 0; if(end) - *end = p; + *end = s; return 1; } @@ -309,8 +369,8 @@ parsetimeformat(const char *s, struct tm *t, const char **end) { #define Q(a,b) ((a)>0 ? (a)/(b) : -(((b)-(a)-1)/(b))) /* copied from Musl C awesome small implementation, see LICENSE. */ -time_t -tm_to_time(struct tm *tm) { +static time_t +tmtotime(struct tm *tm) { time_t year = tm->tm_year - 100; int month = tm->tm_mon; int day = tm->tm_mday; @@ -338,7 +398,7 @@ tm_to_time(struct tm *tm) { 946684800; /* the dawn of time, aka 1970 (30 years of seconds) :) */ } -time_t +static time_t parsetime(const char *s, char *buf) { struct tm tm; char tz[64]; @@ -355,11 +415,12 @@ parsetime(const char *s, char *buf) { tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, tz); /* return UNIX time, reverse offset to GMT+0 */ - return tm_to_time(&tm) - offset; + return tmtotime(&tm) - offset; } return -1; /* can't parse */ } +#if 0 /* print text, ignore tabs, newline and carriage return etc * print some HTML 2.0 / XML 1.0 as normal text */ void @@ -379,8 +440,7 @@ string_print_trimmed(String *s) { return; for(p = s->data; isspace((int)*p); p++); /* strip leading whitespace */ for(; *p; ) { /* ignore tabs, newline and carriage return etc, except space */ - /*if(!isspace((int)*p) || *p == ' ') {*/ - if(!((unsigned)*p - '\t' < 5)) { + if(!ISWSNOSPACE(*p)) { /* !isspace(c) || c == ' ' */ if(*p == '<') { /* skip tags */ if((n = strchr(p, '>'))) { p = n + 1; @@ -389,9 +449,9 @@ string_print_trimmed(String *s) { } buffer[buflen++] = *p; } - if(buflen >= BUFSIZ) { - fwrite(buffer, 1, buflen, stdout); - buflen = 0; + if(buflen >= BUFSIZ) { /* align write size with BUFSIZ */ + fwrite(buffer, 1, BUFSIZ, stdout); + buflen -= BUFSIZ; } p++; } @@ -410,7 +470,7 @@ string_print_textblock(String *s) { /* skip leading whitespace */ for(p = s->data; *p && isspace((int)*p); p++); for(i = 0; *p; p++) { - if(((unsigned)*p - '\t') < 5) { + if(ISWSNOSPACE(*p)) { /* isspace(c) && c != ' ' */ if(*p == '\n') { /* escape newline */ buffer[i++] = '\\'; buffer[i++] = 'n'; @@ -421,32 +481,67 @@ string_print_textblock(String *s) { buffer[i++] = '\\'; buffer[i++] = 't'; } - /* ignore other whitespace chars, except space */ } else { buffer[i++] = *p; } - if(i >= BUFSIZ) { /* TODO: align */ - fwrite(buffer, 1, i, stdout); - i = 0; + if(i >= BUFSIZ) { /* align write size with BUFSIZ */ + fwrite(buffer, 1, BUFSIZ, stdout); + i -= BUFSIZ; } } if(i) fwrite(buffer, 1, i, stdout); } +#endif -int +static void /* print text, escape tabs, newline and carriage return etc */ +string_print(String *s) { + const char *p; + char buffer[BUFSIZ + 4]; + size_t i; + + if(!s->len) + return; + /* skip leading whitespace */ + for(p = s->data; *p && isspace((int)*p); p++); + for(i = 0; *p; p++) { + if(ISWSNOSPACE(*p)) { /* isspace(c) && c != ' ' */ + if(*p == '\n') { /* escape newline */ + buffer[i++] = '\\'; + buffer[i++] = 'n'; + } else if(*p == '\\') { /* escape \ */ + buffer[i++] = '\\'; + buffer[i++] = '\\'; + } else if(*p == '\t') { /* tab */ + buffer[i++] = '\\'; + buffer[i++] = 't'; + } + /* ignore other whitespace chars, except space */ + } else { + buffer[i++] = *p; + } + if(i >= BUFSIZ) { /* align write size with BUFSIZ */ + fwrite(buffer, 1, BUFSIZ, stdout); + i -= BUFSIZ; + } + } + if(i) /* write remaining */ + fwrite(buffer, 1, i, stdout); +} + +static int istag(const char *name, size_t len, const char *name2, size_t len2) { - return (len == len2 && !xstrcasecmp(name, name2)); + return (len == len2 && !strcasecmp(name, name2)); } -int +static int isattr(const char *name, size_t len, const char *name2, size_t len2) { - return (len == len2 && !xstrcasecmp(name, name2)); + return (len == len2 && !strcasecmp(name, name2)); } /* NOTE: this handler can be called multiple times if the data in this * block is bigger than the buffer */ -void +static void xml_handler_data(XMLParser *p, const char *s, size_t len) { if(currentfield) { if(feeditemtagid != AtomTagAuthor || !strcmp(p->tag, "name")) /* author>name */ @@ -454,13 +549,13 @@ xml_handler_data(XMLParser *p, const char *s, size_t len) { } } -void +static void xml_handler_cdata(XMLParser *p, const char *s, size_t len) { if(currentfield) string_append(currentfield, s, len); } -void +static void xml_handler_attr_start(struct xmlparser *p, const char *tag, size_t taglen, const char *name, size_t namelen) { if(iscontent && !iscontenttag) { if(!attrcount) @@ -472,7 +567,7 @@ xml_handler_attr_start(struct xmlparser *p, const char *tag, size_t taglen, cons } } -void +static void xml_handler_attr_end(struct xmlparser *p, const char *tag, size_t taglen, const char *name, size_t namelen) { if(iscontent && !iscontenttag) { xml_handler_data(p, "\"", 1); @@ -480,7 +575,7 @@ xml_handler_attr_end(struct xmlparser *p, const char *tag, size_t taglen, const } } -void +static void xml_handler_start_element_parsed(XMLParser *p, const char *tag, size_t taglen, int isshort) { if(iscontent && !iscontenttag) { if(isshort) @@ -490,7 +585,7 @@ xml_handler_start_element_parsed(XMLParser *p, const char *tag, size_t taglen, i } } -void +static void xml_handler_attr(XMLParser *p, const char *tag, size_t taglen, const char *name, size_t namelen, const char *value, size_t valuelen) { @@ -516,7 +611,7 @@ xml_handler_attr(XMLParser *p, const char *tag, size_t taglen, } } -void +static void xml_handler_start_element(XMLParser *p, const char *name, size_t namelen) { if(iscontenttag) { /* starts with div, handle as XML, dont convert entities */ @@ -541,7 +636,6 @@ xml_handler_start_element(XMLParser *p, const char *name, size_t namelen) { memcpy(feeditemtag, name, namelen + 1); /* copy including nul byte */ feeditemtaglen = namelen; feeditemtagid = gettag(feeditem.feedtype, feeditemtag, feeditemtaglen); - if(feeditem.feedtype == FeedTypeRSS) { if(feeditemtagid == TagUnknown) currentfield = NULL; @@ -587,7 +681,7 @@ xml_handler_start_element(XMLParser *p, const char *name, size_t namelen) { else if(feeditemtagid == AtomTagAuthor) currentfield = &feeditem.author; } - /* TODO: prefer content encoded over content? */ + /* TODO: prefer content encoded over content? test */ } } else { /* start of RSS or Atom item / entry */ if(istag(name, namelen, "entry", strlen("entry"))) { /* Atom */ @@ -602,7 +696,7 @@ xml_handler_start_element(XMLParser *p, const char *name, size_t namelen) { } } -void +static void xml_handler_data_entity(XMLParser *p, const char *data, size_t datalen) { char buffer[16]; size_t len; @@ -620,7 +714,7 @@ xml_handler_data_entity(XMLParser *p, const char *data, size_t datalen) { xml_handler_data(p, data, datalen); /* can't convert entity, just use it's data */ } -void +static void xml_handler_end_element(XMLParser *p, const char *name, size_t namelen, int isshort) { char timebuf[64]; int tagid; @@ -660,19 +754,23 @@ xml_handler_end_element(XMLParser *p, const char *name, size_t namelen, int issh putchar(FieldSeparator); fputs(timebuf, stdout); putchar(FieldSeparator); - string_print_trimmed(&feeditem.title); + string_print(&feeditem.title); putchar(FieldSeparator); - string_print_trimmed(&feeditem.link); + string_print(&feeditem.link); putchar(FieldSeparator); - string_print_textblock(&feeditem.content); + string_print(&feeditem.content); putchar(FieldSeparator); fputs(contenttypes[feeditem.contenttype], stdout); putchar(FieldSeparator); - string_print_trimmed(&feeditem.id); + string_print(&feeditem.id); putchar(FieldSeparator); - string_print_trimmed(&feeditem.author); + string_print(&feeditem.author); putchar(FieldSeparator); fputs(feedtypes[feeditem.feedtype], stdout); + if(append) { + putchar(FieldSeparator); + fputs(append, stdout); + } putchar('\n'); /* clear strings */ @@ -705,9 +803,12 @@ xml_handler_end_element(XMLParser *p, const char *name, size_t namelen, int issh } int -main(void) { +main(int argc, char **argv) { atexit(cleanup); + if(argc > 1) + append = argv[1]; + /* init strings and initial memory pool size */ string_buffer_init(&feeditem.timestamp, 64); string_buffer_init(&feeditem.title, 256); diff --git a/sfeed_frames.c b/sfeed_frames.c @@ -7,12 +7,16 @@ #include <sys/types.h> #include <sys/stat.h> #include <utime.h> -#include "common.h" -#include "compat.h" -static int showsidebar = 1; /* show sidebar ? */ +#include "util.h" -void /* print error message to stderr */ +static unsigned int showsidebar = 1; /* show sidebar ? */ + +static FILE *fpindex = NULL, *fpitems = NULL, *fpmenu = NULL, *fpcontent = NULL; +static char *line = NULL; +static struct feed *feeds = NULL; /* start of feeds linked-list. */ + +static void /* print error message to stderr */ die(const char *s) { fputs("sfeed_frames: ", stderr); fputs(s, stderr); @@ -20,9 +24,23 @@ die(const char *s) { exit(EXIT_FAILURE); } +static void +cleanup(void) { + if(fpmenu) + fclose(fpmenu); + if(fpitems) + fclose(fpitems); + if(fpindex) + fclose(fpindex); + if(fpcontent) + fclose(fpcontent); + free(line); /* free line */ + feedsfree(feeds); /* free feeds linked-list */ +} + /* print text, ignore tabs, newline and carriage return etc * print some HTML 2.0 / XML 1.0 as normal text */ -void +static void printcontent(const char *s, FILE *fp) { const char *p; int len = 0; @@ -42,7 +60,8 @@ printcontent(const char *s, FILE *fp) { } } -size_t +/* TODO: bufsiz - 1 ? */ +static size_t makepathname(char *buffer, size_t bufsiz, const char *path) { const char *p = path; size_t i = 0, r = 0; @@ -64,164 +83,208 @@ makepathname(char *buffer, size_t bufsiz, const char *path) { return i; } -int +static int fileexists(const char *path) { return (!access(path, F_OK)); } int main(int argc, char **argv) { - char *line = NULL, *fields[FieldLast]; + char *fields[FieldLast]; + char name[256]; /* TODO: bigger size? */ + char *basepath = "feeds"; + /* TODO: max path size? */ + char dirpath[1024], filepath[1024], reldirpath[1024], relfilepath[1024]; unsigned long totalfeeds = 0, totalnew = 0; unsigned int isnew; - struct feed *feedcurrent = NULL, *feeds = NULL; /* start of feeds linked-list. */ + struct feed *f, *feedcurrent = NULL; time_t parsedtime, comparetime; - size_t size = 0; - char name[256]; - char dirpath[1024]; - char filepath[1024]; - char reldirpath[1024]; - char relfilepath[1024]; - FILE *fpindex, *fpitems, *fpmenu, *fpcontent; - char *basepath = "feeds"; + size_t size = 0, namelen = 0, basepathlen = 0; + struct utimbuf contenttime; - size_t namelen = 0; + atexit(cleanup); memset(&contenttime, 0, sizeof(contenttime)); if(argc > 1 && argv[1][0] != '\0') basepath = argv[1]; comparetime = time(NULL) - (3600 * 24); /* 1 day is old news */ - xmkdir(basepath, S_IRWXU); + mkdir(basepath, S_IRWXU); /* write main index page */ - if(strlen(basepath) + strlen("/index.html") < sizeof(dirpath) - 1) + basepathlen = strlen(basepath); + if(basepathlen + strlen("/index.html") < sizeof(dirpath) - 1) sprintf(dirpath, "%s/index.html", basepath); - if((fpindex = fopen(dirpath, "w+b"))) { - } - if(strlen(basepath) + strlen("/menu.html") < sizeof(dirpath) - 1) + if(!(fpindex = fopen(dirpath, "w+b"))) + die("can't write index.html"); + if(basepathlen + strlen("/menu.html") < sizeof(dirpath) - 1) sprintf(dirpath, "%s/menu.html", basepath); - if(!(fpmenu = fopen(dirpath, "w+b"))) { - /* TODO: error */ - fclose(fpindex); - return EXIT_FAILURE; - } - if(strlen(basepath) + strlen("/items.html") < sizeof(dirpath) - 1) + if(!(fpmenu = fopen(dirpath, "w+b"))) + die("can't write menu.html"); + if(basepathlen + strlen("/items.html") < sizeof(dirpath) - 1) sprintf(dirpath, "%s/items.html", basepath); - if(!(fpitems = fopen(dirpath, "w+b"))) { - /* TODO: error */ - fclose(fpmenu); - fclose(fpindex); - return EXIT_FAILURE; - } - fputs("<html><head><link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" /></head>", fpitems); - fputs("<body class=\"frame\"><div id=\"items\">", fpitems); - + if(!(fpitems = fopen(dirpath, "w+b"))) + die("can't write items.html"); + fputs("<html><head><link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" /></head>" + "<body class=\"frame\"><div id=\"items\">", fpitems); while(parseline(&line, &size, fields, FieldLast, '\t', stdin) > 0) { + + +/* + dirpath[0] = '\0'; filepath[0] = '\0'; reldirpath[0] = '\0'; relfilepath[0] = '\0'; - namelen = makepathname(name, sizeof(name) - 1, fields[FieldFeedName]); - if(namelen) { - if(strlen(basepath) + namelen + 1 < sizeof(dirpath) - 1) + + +*/ + + + + + + + + /* first of feed section or new feed section. */ + if(!totalfeeds || strcmp(feedcurrent->name, fields[FieldFeedName])) { + + + /* TODO: makepathname isnt necesary if fields[FieldFeedName] is the same as the previous line */ + /* TODO: move this part below where FieldFeedName is checked if its different ? */ + + /* make directory for feedname */ + namelen = makepathname(name, sizeof(name) - 1, fields[FieldFeedName]); + if(!namelen) + continue; + + if(basepathlen + namelen + 1 < sizeof(dirpath) - 1) sprintf(dirpath, "%s/%s", basepath, name); /* TODO: handle error. */ - if(xmkdir(dirpath, S_IRWXU) != -1) { + if(mkdir(dirpath, S_IRWXU) != -1) { + fprintf(stderr, "sfeed_frames: can't write '%s'\n", dirpath); + exit(EXIT_FAILURE); + } + /* TODO: test, replaces strncpy (strncpy is slow) */ + reldirpath[0] = '\0'; + if(namelen < sizeof(reldirpath) - 2) { + memcpy(reldirpath, name, namelen + 1); /* copy including nul byte */ + /* reldirpath[namelen] = '\0';*/ } - strncpy(reldirpath, name, sizeof(reldirpath) - 1); - namelen = makepathname(name, sizeof(name), fields[FieldTitle]); - if(namelen) { - if(strlen(dirpath) + namelen + strlen("/.html") < sizeof(filepath) - 1) - sprintf(filepath, "%s/%s.html", dirpath, name); - if(strlen(reldirpath) + namelen + strlen("/.html") < sizeof(relfilepath) - 1) - sprintf(relfilepath, "%s/%s.html", reldirpath, name); - if(!fileexists(filepath) && (fpcontent = fopen(filepath, "w+b"))) { - fputs("<html><head><link rel=\"stylesheet\" type=\"text/css\" href=\"../../style.css\" /></head>", fpcontent); - fputs("<body class=\"frame\"><div class=\"content\">", fpcontent); - fputs("<h2><a href=\"", fpcontent); - if(fields[FieldBaseSiteUrl][0] != '\0') - printlink(fields[FieldLink], fields[FieldBaseSiteUrl], fpcontent); - else - printlink(fields[FieldLink], fields[FieldFeedUrl], fpcontent); - fputs("\">", fpcontent); - printhtmlencoded(fields[FieldTitle], fpcontent); - fputs("</a></h2>", fpcontent); - printcontent(fields[FieldContent], fpcontent); - fputs("</div></body></html>", fpcontent); - fclose(fpcontent); - } + /* strncpy(reldirpath, name, sizeof(reldirpath) - 1);*/ + + + + + + if(!(f = calloc(1, sizeof(struct feed)))) + die("can't allocate enough memory"); + + - /* first of feed section or new feed section. */ - if(!totalfeeds || strcmp(feedcurrent->name, fields[FieldFeedName])) { - if(totalfeeds) { /* end previous one. */ - fputs("</table>\n", fpitems); - if(!(feedcurrent->next = calloc(1, sizeof(struct feed)))) - die("can't allocate enough memory"); - feedcurrent = feedcurrent->next; - } else { - if(!(feedcurrent = calloc(1, sizeof(struct feed)))) - die("can't allocate enough memory"); - feeds = feedcurrent; /* first item. */ - if(fields[FieldFeedName][0] == '\0') { - showsidebar = 0; - } - } - /* write menu link if new. */ - if(!(feedcurrent->name = xstrdup(fields[FieldFeedName]))) - die("can't allocate enough memory"); - if(fields[FieldFeedName][0] != '\0') { - fputs("<h2 id=\"", fpitems); - printfeednameid(feedcurrent->name, fpitems); - fputs("\"><a href=\"#", fpitems); - printfeednameid(feedcurrent->name, fpitems); - fputs("\">", fpitems); - fputs(feedcurrent->name, fpitems); - fputs("</a></h2>\n", fpitems); - } - fputs("<table cellpadding=\"0\" cellspacing=\"0\">\n", fpitems); - totalfeeds++; + if(totalfeeds) { /* end previous one. */ + fputs("</table>\n", fpitems); + + + feedcurrent->next = f; + feedcurrent = feedcurrent->next; + + + + } else { + + + feedcurrent = f; + + + feeds = feedcurrent; /* first item. */ + if(fields[FieldFeedName][0] == '\0') { + showsidebar = 0; } - /* write item. */ - parsedtime = (time_t)strtol(fields[FieldUnixTimestamp], NULL, 10); - /* set modified and access time of file to time of item. */ - contenttime.actime = parsedtime; - contenttime.modtime = parsedtime; - utime(filepath, &contenttime); - - isnew = (parsedtime >= comparetime); - totalnew += isnew; - feedcurrent->totalnew += isnew; - feedcurrent->total++; - if(isnew) - fputs("<tr class=\"n\"><td nowrap valign=\"top\">", fpitems); - else - fputs("<tr><td nowrap valign=\"top\">", fpitems); - fputs("<tr><td nowrap valign=\"top\">", fpitems); - fputs(fields[FieldTimeFormatted], fpitems); - fputs("</td><td nowrap valign=\"top\">", fpitems); - if(isnew) - fputs("<b><u>", fpitems); - fputs("<a href=\"", fpitems); - fputs(relfilepath, fpitems); - fputs("\" target=\"content\">", fpitems); - printhtmlencoded(fields[FieldTitle], fpitems); - fputs("</a>", fpitems); - if(isnew) - fputs("</u></b>", fpitems); - fputs("</td></tr>\n", fpitems); } + /* write menu link if new. */ + if(!(feedcurrent->name = strdup(fields[FieldFeedName]))) + die("can't allocate enough memory"); + if(fields[FieldFeedName][0] != '\0') { + fputs("<h2 id=\"", fpitems); + printfeednameid(feedcurrent->name, fpitems); + fputs("\"><a href=\"#", fpitems); + printfeednameid(feedcurrent->name, fpitems); + fputs("\">", fpitems); + fputs(feedcurrent->name, fpitems); + fputs("</a></h2>\n", fpitems); + } + fputs("<table cellpadding=\"0\" cellspacing=\"0\">\n", fpitems); + totalfeeds++; } + + + + /* write content */ + namelen = makepathname(name, sizeof(name), fields[FieldTitle]); + if(!namelen) + continue; + if(strlen(dirpath) + namelen + strlen("/.html") < sizeof(filepath) - 1) + sprintf(filepath, "%s/%s.html", dirpath, name); + if(strlen(reldirpath) + namelen + strlen("/.html") < sizeof(relfilepath) - 1) + sprintf(relfilepath, "%s/%s.html", reldirpath, name); + if(!fileexists(filepath) && (fpcontent = fopen(filepath, "w+b"))) { + fputs("<html><head><link rel=\"stylesheet\" type=\"text/css\" href=\"../../style.css\" /></head>" + "<body class=\"frame\"><div class=\"content\">" + "<h2><a href=\"", fpcontent); + if(fields[FieldBaseSiteUrl][0] != '\0') + printlink(fields[FieldLink], fields[FieldBaseSiteUrl], fpcontent); + else + printlink(fields[FieldLink], fields[FieldFeedUrl], fpcontent); + fputs("\">", fpcontent); + printhtmlencoded(fields[FieldTitle], fpcontent); + fputs("</a></h2>", fpcontent); + printcontent(fields[FieldContent], fpcontent); + fputs("</div></body></html>", fpcontent); + fclose(fpcontent); + } + + + + + /* write item. */ + parsedtime = (time_t)strtol(fields[FieldUnixTimestamp], NULL, 10); + /* set modified and access time of file to time of item. */ + contenttime.actime = parsedtime; + contenttime.modtime = parsedtime; + utime(filepath, &contenttime); + + isnew = (parsedtime >= comparetime); + totalnew += isnew; + feedcurrent->totalnew += isnew; + feedcurrent->total++; + if(isnew) + fputs("<tr class=\"n\">", fpitems); + else + fputs("<tr>", fpitems); + fputs("<td nowrap valign=\"top\">", fpitems); + fputs(fields[FieldTimeFormatted], fpitems); + fputs("</td><td nowrap valign=\"top\">", fpitems); + if(isnew) + fputs("<b><u>", fpitems); + fputs("<a href=\"", fpitems); + fputs(relfilepath, fpitems); + fputs("\" target=\"content\">", fpitems); + printhtmlencoded(fields[FieldTitle], fpitems); + fputs("</a>", fpitems); + if(isnew) + fputs("</u></b>", fpitems); + fputs("</td></tr>\n", fpitems); } if(totalfeeds) { fputs("</table>\n", fpitems); } fputs("\n</div></body>\n</html>", fpitems); /* div items */ if(showsidebar) { - fputs("<html><head>", fpmenu); - fputs("<link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" />", fpmenu); - fputs("</head><body class=\"frame\"><div id=\"sidebar\">", fpmenu); + fputs("<html><head>" + "<link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" />" + "</head><body class=\"frame\"><div id=\"sidebar\">", fpmenu); for(feedcurrent = feeds; feedcurrent; feedcurrent = feedcurrent->next) { if(!feedcurrent->name || feedcurrent->name[0] == '\0') continue; @@ -241,33 +304,24 @@ main(int argc, char **argv) { } fputs("</div></body></html>", fpmenu); } - - fputs("<!DOCTYPE html><html><head>\n", fpindex); - fprintf(fpindex, "\t<title>Newsfeed (%lu)</title>\n", totalnew); - fputs("\t<link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" />\n", fpindex); - fputs("</head>\n", fpindex); + fputs("<!DOCTYPE html><html><head>\n\t<title>Newsfeed (", fpindex); + fprintf(fpindex, "%lu", totalnew); + fputs(")</title>\n\t<link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" />\n" + "</head>\n", fpindex); if(showsidebar) { fputs( - "<frameset framespacing=\"0\" cols=\"200,*\" frameborder=\"1\">" - " <frame name=\"menu\" src=\"menu.html\" target=\"menu\">", fpindex); + "<frameset framespacing=\"0\" cols=\"200,*\" frameborder=\"1\">\n" + " <frame name=\"menu\" src=\"menu.html\" target=\"menu\">\n", fpindex); } else { - fputs( - "<frameset framespacing=\"0\" cols=\"*\" frameborder=\"1\">", fpindex); + fputs("<frameset framespacing=\"0\" cols=\"*\" frameborder=\"1\">\n", fpindex); } fputs( - " <frameset id=\"frameset\" framespacing=\"0\" cols=\"50%,50%\" frameborder=\"1\">" - " <frame name=\"items\" src=\"items.html\" target=\"items\">" - " <frame name=\"content\" target=\"content\">" - " </frameset>" - "</frameset>" + " <frameset id=\"frameset\" framespacing=\"0\" cols=\"50%,50%\" frameborder=\"1\">\n" + " <frame name=\"items\" src=\"items.html\" target=\"items\">\n" + " <frame name=\"content\" target=\"content\">\n" + " </frameset>\n" + "</frameset>\n" "</html>", fpindex); - fclose(fpmenu); - fclose(fpitems); - fclose(fpindex); - - free(line); /* free line */ - feedsfree(feeds); /* free feeds linked-list */ - return EXIT_SUCCESS; } diff --git a/sfeed_html.c b/sfeed_html.c @@ -3,12 +3,21 @@ #include <stdlib.h> #include <time.h> #include <ctype.h> -#include "common.h" -#include "compat.h" + +#include "util.h" static int showsidebar = 1; /* show sidebar ? */ -void /* print error message to stderr */ +static struct feed *feeds = NULL; /* start of feeds linked-list. */ +static char *line = NULL; + +static void +cleanup(void) { + free(line); /* free line */ + feedsfree(feeds); /* free feeds linked-list */ +} + +static void /* print error message to stderr */ die(const char *s) { fputs("sfeed_html: ", stderr); fputs(s, stderr); @@ -18,13 +27,14 @@ die(const char *s) { int main(void) { - char *line = NULL, *fields[FieldLast]; + char *fields[FieldLast]; unsigned long totalfeeds = 0, totalnew = 0; - int islink, isnew; - struct feed *feedcurrent = NULL, *feeds = NULL; /* start of feeds linked-list. */ + unsigned int islink, isnew; + struct feed *f, *feedcurrent = NULL; time_t parsedtime, comparetime; size_t size = 0; + atexit(cleanup); comparetime = time(NULL) - (3600 * 24); /* 1 day is old news */ fputs( "<!DOCTYPE HTML>\n" @@ -36,20 +46,25 @@ main(void) { " <body class=\"noframe\">\n", stdout); + if(!(feedcurrent = calloc(1, sizeof(struct feed)))) + die("can't allocate enough memory"); + feeds = feedcurrent; + while(parseline(&line, &size, fields, FieldLast, '\t', stdin) > 0) { parsedtime = (time_t)strtol(fields[FieldUnixTimestamp], NULL, 10); isnew = (parsedtime >= comparetime); islink = (fields[FieldLink][0] != '\0'); /* first of feed section or new feed section. */ - if(!totalfeeds || strcmp(feedcurrent->name, fields[FieldFeedName])) { + if(!totalfeeds || (feedcurrent && strcmp(feedcurrent->name, fields[FieldFeedName]))) { /* TODO: allocate feedcurrent before here, feedcurrent can be NULL */ + if(!(f = calloc(1, sizeof(struct feed)))) + die("can't allocate enough memory"); + /*f->next = NULL;*/ if(totalfeeds) { /* end previous one. */ fputs("</table>\n", stdout); - if(!(feedcurrent->next = calloc(1, sizeof(struct feed)))) - die("can't allocate enough memory"); - feedcurrent = feedcurrent->next; + feedcurrent->next = f; + feedcurrent = f; } else { - if(!(feedcurrent = calloc(1, sizeof(struct feed)))) - die("can't allocate enough memory"); + feedcurrent = f; feeds = feedcurrent; /* first item. */ if(fields[FieldFeedName][0] == '\0' || !showsidebar) { /* set nosidebar class on div for styling */ @@ -58,8 +73,17 @@ main(void) { } else fputs("\t\t<div id=\"items\">\n", stdout); } - if(!(feedcurrent->name = xstrdup(fields[FieldFeedName]))) + + /* TODO: memcpy and make feedcurrent->name static? */ + if(!(feedcurrent->name = strdup(fields[FieldFeedName]))) die("can't allocate enough memory"); + + + /* + feedcurrent->totalnew = 0; + feedcurrent->total = 0; + feedcurrent->next = NULL;*/ + if(fields[FieldFeedName][0] != '\0') { fputs("<h2 id=\"", stdout); printfeednameid(feedcurrent->name, stdout); @@ -75,14 +99,13 @@ main(void) { totalnew += isnew; feedcurrent->totalnew += isnew; feedcurrent->total++; - if(isnew) - fputs("<tr class=\"n\"><td nowrap valign=\"top\">", stdout); + fputs("<tr class=\"n\">", stdout); else - fputs("<tr><td nowrap valign=\"top\">", stdout); + fputs("<tr>", stdout); + fputs("<td nowrap valign=\"top\">", stdout); fputs(fields[FieldTimeFormatted], stdout); fputs("</td><td nowrap valign=\"top\">", stdout); - if(isnew) fputs("<b><u>", stdout); if(islink) { @@ -100,10 +123,8 @@ main(void) { fputs("</u></b>", stdout); fputs("</td></tr>\n", stdout); } - if(totalfeeds) { - fputs("</table>\n", stdout); - fputs("\t\t</div>\n", stdout); /* div items */ - } + if(totalfeeds) + fputs("</table>\n\t\t</div>\n", stdout); /* div items */ if(showsidebar) { fputs("\t<div id=\"sidebar\">\n\t\t<ul>\n", stdout); for(feedcurrent = feeds; feedcurrent; feedcurrent = feedcurrent->next) { @@ -125,6 +146,16 @@ main(void) { } fputs("\t\t</ul>\n\t</div>\n", stdout); } + /* toggle showing only new with "n" */ + fputs("<script type=\"text/javascript\">" + "var b=document.body;window.onkeypress=function(e){" + "switch(String.fromCharCode(e.which)){" + "case 'n':var n='newonly';b.className=/*toggle new only*/" + "b.className.indexOf(n)==-1?b.className+' '+n:b.className.replace(n,'');break;" + "case 'm':case 's':b.querySelector('#sidebar a').focus();break; /*focus menu*/" + "case 'i':b.querySelector('#items').focus();break;/*focus items*/" + "}};" + "</script>", stdout); fputs( " </body>\n" " <title>Newsfeed (", @@ -132,8 +163,5 @@ main(void) { fprintf(stdout, "%lu", totalnew); fputs(")</title>\n</html>", stdout); - free(line); /* free line */ - feedsfree(feeds); /* free feeds linked-list */ - return EXIT_SUCCESS; } diff --git a/sfeed_opml_import.c b/sfeed_opml_import.c @@ -3,23 +3,23 @@ #include <stdlib.h> #include <string.h> #include <strings.h> + #include "xml.h" -#include "compat.h" -XMLParser parser; /* XML parser state */ -char feedurl[2048], feedname[2048], basesiteurl[2048]; +static XMLParser parser; /* XML parser state */ +static char feedurl[2048], feedname[2048], basesiteurl[2048]; -int +static int istag(const char *s1, const char *s2) { - return !xstrcasecmp(s1, s2); + return !strcasecmp(s1, s2); } -int +static int isattr(const char *s1, const char *s2) { - return !xstrcasecmp(s1, s2); + return !strcasecmp(s1, s2); } -void +static void xml_handler_start_element(XMLParser *p, const char *tag, size_t taglen) { if(istag(tag, "outline")) { feedurl[0] = '\0'; @@ -28,7 +28,7 @@ xml_handler_start_element(XMLParser *p, const char *tag, size_t taglen) { } } -void +static void xml_handler_end_element(XMLParser *p, const char *tag, size_t taglen, int isshort) { if(istag(tag, "outline")) { @@ -39,7 +39,7 @@ xml_handler_end_element(XMLParser *p, const char *tag, size_t taglen, } } -void +static void xml_handler_attr(XMLParser *p, const char *tag, size_t taglen, const char *name, size_t namelen, const char *value, size_t valuelen) { if(istag(tag, "outline")) { diff --git a/sfeed_plain.c b/sfeed_plain.c @@ -2,27 +2,27 @@ #include <string.h> #include <stdlib.h> #include <time.h> -#include "common.h" -#include "compat.h" + +#include "util.h" void -printutf8padded(const char *s, size_t len) { +printutf8padded(const char *s, size_t len, FILE *fp, int pad) { size_t n = 0, i; for(i = 0; s[i] && n < len; i++) { if((s[i] & 0xc0) != 0x80) /* start of character */ n++; - putchar(s[i]); + putc(s[i], fp); } for(; n < len; n++) - putchar(' '); + putc(pad, fp); } int main(void) { char *line = NULL, *fields[FieldLast]; time_t parsedtime, comparetime; - int isnew; + unsigned int isnew; size_t size = 0; comparetime = time(NULL) - (3600 * 24); /* 1 day is old news */ @@ -34,7 +34,7 @@ main(void) { printf("%-15.15s ", fields[FieldFeedName]); printf("%-30.30s", fields[FieldTimeFormatted]); fputs(" ", stdout); - printutf8padded(fields[FieldTitle], 70); + printutf8padded(fields[FieldTitle], 70, stdout, ' '); fputs(" ", stdout); if(fields[FieldBaseSiteUrl][0] != '\0') printlink(fields[FieldLink], fields[FieldBaseSiteUrl], stdout); diff --git a/sfeed_stats.c b/sfeed_stats.c @@ -0,0 +1,91 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <time.h> +#include <ctype.h> + +#include "util.h" + +static struct feed *feeds = NULL; /* start of feeds linked-list. */ +static char *line = NULL; + +static void +cleanup(void) { + free(line); /* free line */ + feedsfree(feeds); /* free feeds linked-list */ +} + +static void /* print error message to stderr */ +die(const char *s) { + fputs("sfeed_stats: ", stderr); + fputs(s, stderr); + fputc('\n', stderr); + exit(EXIT_FAILURE); +} + +int +main(void) { + char *fields[FieldLast]; + unsigned long totalfeeds = 0, totalnew = 0; + unsigned int islink, isnew; + struct feed *f, *feedcurrent = NULL; + time_t parsedtime, comparetime; + size_t size = 0; + + atexit(cleanup); + comparetime = time(NULL) - (3600 * 24); /* 1 day is old news */ + + if(!(feedcurrent = calloc(1, sizeof(struct feed)))) + die("can't allocate enough memory"); + feeds = feedcurrent; + + while(parseline(&line, &size, fields, FieldLast, '\t', stdin) > 0) { + parsedtime = (time_t)strtol(fields[FieldUnixTimestamp], NULL, 10); + isnew = (parsedtime >= comparetime); + islink = (fields[FieldLink][0] != '\0'); + /* first of feed section or new feed section. */ + if(!totalfeeds || (feedcurrent && strcmp(feedcurrent->name, fields[FieldFeedName]))) { /* TODO: allocate feedcurrent before here, feedcurrent can be NULL */ + if(!(f = calloc(1, sizeof(struct feed)))) + die("can't allocate enough memory"); + /*f->next = NULL;*/ + if(totalfeeds) { /* end previous one. */ + feedcurrent->next = f; + feedcurrent = f; + } else { + feedcurrent = f; + feeds = feedcurrent; /* first item. */ + } + if(isnew && parsedtime > feedcurrent->timenewest) { + feedcurrent->timenewest = parsedtime; + strncpy(feedcurrent->timenewestformat, fields[FieldTimeFormatted], + sizeof(feedcurrent->timenewestformat)); + } + + /* TODO: memcpy and make feedcurrent->name static? */ + if(!(feedcurrent->name = strdup(fields[FieldFeedName]))) + die("can't allocate enough memory"); + + /* + feedcurrent->totalnew = 0; + feedcurrent->total = 0; + feedcurrent->next = NULL;*/ + + totalfeeds++; + } + totalnew += isnew; + feedcurrent->totalnew += isnew; + feedcurrent->total++; + } + printf("Total new: %lu\n", totalnew); + for(feedcurrent = feeds; feedcurrent; feedcurrent = feedcurrent->next) { + if(!feedcurrent->name || feedcurrent->name[0] == '\0') + continue; +/* printfeednameid(feedcurrent->name, stdout);*/ + fprintf(stdout, "[%4lu / %4lu] %-20s", feedcurrent->totalnew, feedcurrent->total, + feedcurrent->name); + if(feedcurrent->timenewestformat && feedcurrent->timenewestformat[0]) + fprintf(stdout, " (newest %s)", feedcurrent->timenewestformat); + putchar('\n'); + } + return EXIT_SUCCESS; +} diff --git a/sfeed_update b/sfeed_update @@ -1,6 +1,6 @@ #!/bin/sh # update feeds, merge with old feeds. -# NOTE: assumes "sfeed_*" files are in $PATH. +# NOTE: assumes "sfeed_*" executables are in $PATH. # defaults sfeedpath="$HOME/.sfeed" @@ -42,39 +42,40 @@ merge() { } # fetch a feed via HTTP/HTTPS etc. -# fetchfeed(url, name) +# fetchfeed(url, name, lastupdated) fetchfeed() { - if (curl -f -s -S -L --max-time 30 -z "$lastupdated" "$1"); then - printf "%s\n" "[`date`] Fetching $2 [$1] ... done" >&2 + if curl -f -s -S -L --max-time 30 -z "$3" "$1"; then + printf "[ OK] %s %s\n" "[`date '+%Y-%m-%d %H:%M:%S %Z'`]" "$2" >&2 else - printf "%s\n" "[`date`] Fetching $2 [$1] ... fail" >&2 + printf "[FAIL] %s %s\n" "[`date '+%Y-%m-%d %H:%M:%S %Z'`]" "$2" >&2 fi } -# add field after line, output to stdout. -# addfield(field) -addfield() { - # NOTE: IFS is set and restored to prevent stripping whitespace. - OLDIFS="$IFS" - IFS=" -" - while read -r line; do - printf "%s %s\n" "${line}" "$1" - done - IFS="$OLDIFS" +# convert encoding from one encoding to another. +# convertencoding(from, to) +convertencoding() { + if [ ! "$1" = "" ] && [ ! "$2" = "" ] && [ ! "$1" = "$2" ]; then # from != to + iconv -cs -f "$1" -t "$2" 2> /dev/null + else + cat # no convert, just output + fi } # fetch and parse feed. -# feed(name, feedurl, basesiteurl, [encoding]) +# feed(name, feedurl, [basesiteurl], [encoding]) feed() { - tmpfile=$(mktemp -p "$TMPDIR") - (if [ "$4" = "" ]; then - # don't use iconv if encoding not set in config. - fetchfeed "$2" "$1" - else - # use iconv to convert encoding to UTF-8. - fetchfeed "$2" "$1" | iconv -cs -f "$4" -t "utf-8" - fi) | sfeed | addfield "$1 $2 $3" > "$tmpfile" + (tmpfeedfile=$(mktemp -p "$TMPDIR") + tmpencfile="" + encoding="$4" + if [ ! "$encoding" = "" ]; then + fetchfeed "$2" "$1" "$lastupdated" | convertencoding "$encoding" "utf-8" + else # detect encoding. + tmpencfile=$(mktemp -p "$TMPDIR") + fetchfeed "$2" "$1" "$lastupdated" > "$tmpencfile" + detectenc=$(sfeed_xmlenc < "$tmpencfile") + convertencoding "$detectenc" "utf-8" < "$tmpencfile" + rm -f "$tmpencfile" + fi | sfeed "$1 $2 $3" > "$tmpfeedfile") & } terminated() { @@ -86,6 +87,11 @@ cleanup() { rm -rf "$tmpfile" "$TMPDIR" } +feeds() { + echo "Configuration file \"$config\" is invalid or does not contain a \"feeds\" function." >&2 + echo "See sfeedrc.example for an example." >&2 +} + # load config file. loadconfig "$1" # fetch feeds and store in temporary file. @@ -93,17 +99,20 @@ TMPDIR=$(mktemp -d -t "sfeed_XXXXXX") # get date of last modified feedfile in format: # YYYYmmdd HH:MM:SS [+-][0-9]* lastupdated=$(stat -c "%y" "$sfeedfile" 2> /dev/null | cut -c 1-4,6-7,9-10,11-19,30-) -# Kill whole current process group on ^C. +# kill whole current process group on ^C. isrunning="1" -trap -- "terminated" "15" # SIGTERM: signal to terminate parent. -trap -- "kill -TERM -$$" "2" # SIGINT: kill all running childs >:D +# SIGTERM: signal to terminate parent. +trap -- "terminated" "15" +# SIGINT: kill all running childs >:D +trap -- "kill -TERM -$$" "2" # fetch feeds specified in config file. feeds # make sure path exists. mkdir -p "$sfeedpath" # wait till all feeds are fetched (allows running in parallel). wait -[ "$isrunning" = "0" ] && cleanup && exit 1 # if terminated cleanup. +# if terminated cleanup. +[ "$isrunning" = "0" ] && cleanup && exit 1 # concat all individual feed files to a single file. # NOTE: mktemp uses $TMPDIR for temporary directory. tmpfile=$(mktemp -t "sfeed_XXXXXX") diff --git a/sfeed_update.1 b/sfeed_update.1 @@ -59,9 +59,9 @@ This file is evaluated as a shellscript in sfeed_update. You can for example override the fetchfeed() function to use wget, fetch or an other download program or you can override the merge() function to change the merge logic. -The function feeds() is called to fetch the feeds. The -function feed() can safely be executed as a parallel job -in your sfeedrc config file to speedup updating. +The function feeds() is called to fetch the feeds. By +default the function feed() is executed as a parallel +job to speedup updating. .SH FILES WRITTEN .TP .B feeds diff --git a/sfeed_web.c b/sfeed_web.c @@ -0,0 +1,72 @@ +#include <stdio.h> +#include <string.h> +#include <strings.h> +#include <stdlib.h> +#include <ctype.h> + +#include "util.h" +#include "xml.h" + +static unsigned int isbase = 0, islink = 0, isfeedlink = 0, found = 0; +static char feedlink[4096], basehref[4096]; + +static void +xmltagstart(XMLParser *p, const char *tag, size_t taglen) { + isbase = islink = isfeedlink = 0; + if(taglen == 4) { /* optimization */ + if(!strncasecmp(tag, "base", taglen)) + isbase = 1; + else if(!strncasecmp(tag, "link", taglen)) + islink = 1; + } +} + +static void +xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort) { + if(isfeedlink) { + printlink(feedlink, basehref, stdout); + putchar('\n'); + found++; + } +} + +static void +xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name, + size_t namelen, const char *value, size_t valuelen) { + + if(namelen != 4) /* optimization */ + return; + if(isbase) { + if(!strncasecmp(name, "href", namelen)) + strlcpy(basehref, value, sizeof(basehref) - 1); + } else if(islink) { + if(!strncasecmp(name, "type", namelen)) { + if(!strncasecmp(value, "application/atom", strlen("application/atom")) || + !strncasecmp(value, "application/rss", strlen("application/rss"))) { + isfeedlink = 1; + } + } else if(!strncasecmp(name, "href", namelen)) + strlcpy(feedlink, value, sizeof(feedlink) - 1); + } +} + +int +main(int argc, char **argv) { + XMLParser x; + + feedlink[0] = '\0'; + /* base href */ + if(argc > 1) + strlcpy(basehref, argv[1], sizeof(basehref) - 1); + else + basehref[0] = '\0'; + + xmlparser_init(&x); + x.xmltagstart = xmltagstart; + x.xmlattr = xmlattr; + x.xmltagstartparsed = xmltagstartparsed; + + xmlparser_parse(&x); + + return found ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/sfeed_xmlenc.c b/sfeed_xmlenc.c @@ -0,0 +1,45 @@ +#include <stdio.h> +#include <string.h> +#include <strings.h> +#include <stdlib.h> +#include <ctype.h> + +#include "xml.h" + +static int isxmlpi = 0, tags = 0; + +static void +xmltagstart(XMLParser *p, const char *tag, size_t taglen) { + if(tags > 3) /* optimization: try to find processing instruction at start */ + exit(EXIT_FAILURE); + isxmlpi = (tag[0] == '?' && (!strncasecmp(tag, "?xml", taglen))) ? 1 : 0; + tags++; +} + +static void +xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort) { + isxmlpi = 0; +} + +static void +xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name, size_t namelen, const char *value, size_t valuelen) { + if(isxmlpi && (!strncasecmp(name, "encoding", namelen))) { + for(; *value; value++) + putc(tolower((int)*value), stdout); /* output lowercase */ + exit(EXIT_SUCCESS); + } +} + +int +main(int argc, char **argv) { + XMLParser x; + + xmlparser_init(&x); + x.xmltagstart = xmltagstart; + x.xmltagend = xmltagend; + x.xmlattr = xmlattr; + + xmlparser_parse(&x); + + return EXIT_FAILURE; +} diff --git a/sfeedrc.example b/sfeedrc.example @@ -6,7 +6,7 @@ # list of feeds to fetch: feeds() { - # feed <name> <feedurl> <basesiteurl> [encoding] + # feed <name> <feedurl> [basesiteurl] [encoding] feed "codemadness" "http://www.codemadness.nl/blog/rss.xml" feed "explosm" "http://feeds.feedburner.com/Explosm" feed "linux kernel" "http://kernel.org/kdist/rss.xml" "http://kernel.org" "iso-8859-1" diff --git a/style.css b/style.css @@ -69,3 +69,14 @@ body.frame .content { font-family: sans; font-size: medium; } +/* show only new when body has newonly class */ +body.newonly tr, +body.newonly li { + display: none; +} +body.newonly li.n { + display: list-item; +} +body.newonly tr.n { + display: table-row; +} diff --git a/util.c b/util.c @@ -0,0 +1,202 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <time.h> +#include <ctype.h> +#include <sys/types.h> + +#include "util.h" + +#if 0 +/* TODO: optimize */ +char * +afgets(char **p, size_t *size, FILE *fp) { + char buf[BUFSIZ], *alloc = NULL; + size_t n, len = 0, allocsiz; + int end = 0; + + while(fgets(buf, sizeof(buf), fp)) { + n = strlen(buf); + if(buf[n - 1] == '\n') { /* dont store newlines. */ + buf[n - 1] = '\0'; + n--; + end = 1; /* newline found, end */ + } + len += n; + allocsiz = len + 1; + if(allocsiz > *size) { + if((alloc = realloc(*p, allocsiz))) { + *p = alloc; + *size = allocsiz; + } else { + free(*p); + *p = NULL; + fputs("error: could not realloc\n", stderr); + exit(EXIT_FAILURE); + return NULL; + } + } + strncpy((*p + (len - n)), buf, n); + if(end || feof(fp)) + break; + } + if(*p && len > 0) { + (*p)[len] = '\0'; + return *p; + } + return NULL; +} +#endif + +/* + * Taken from OpenBSD. + * Copy src to string dst of size siz. At most siz-1 characters + * will be copied. Always NUL terminates (unless siz == 0). + * Returns strlen(src); if retval >= siz, truncation occurred. + */ +size_t +strlcpy(char *dst, const char *src, size_t siz) { + char *d = dst; + const char *s = src; + size_t n = siz; + + /* copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') + break; + } + } + /* not enough room in dst, add NUL and traverse rest of src */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + return(s - src - 1); /* count does not include NUL */ +} + +/* TODO: optimize */ +char * +afgets(char **p, size_t *size, FILE *fp) { + char buf[BUFSIZ], *alloc = NULL; + size_t n, len = 0, allocsiz; + int end = 0; + + while(!end && !feof(fp) && fgets(buf, sizeof(buf), fp)) { + n = strlen(buf); + if(buf[n - 1] == '\n') { /* dont store newlines. */ + buf[n - 1] = '\0'; + n--; + end = 1; /* newline found, end */ + } + len += n; + allocsiz = len + 1; + if(allocsiz > *size) { + if((alloc = realloc(*p, allocsiz))) { + *p = alloc; + *size = allocsiz; + } else { + free(*p); + *p = NULL; + fputs("error: could not realloc\n", stderr); + exit(EXIT_FAILURE); + return NULL; + } + } + strlcpy((*p + (len - n)), buf, n + 1); /* TODO: dont depend on strlcpy */ +/* strncpy((*p + (len - n)), buf, n);*/ + } + if(*p && len > 0) { + (*p)[len] = '\0'; + return *p; + } + return NULL; +} + +void /* print link; if link is relative use baseurl to make it absolute */ +printlink(const char *link, const char *baseurl, FILE *fp) { + const char *ebaseproto, *ebasedomain, *p; + int isrelative; + + /* protocol part */ + for(p = link; *p && (isalpha((int)*p) || isdigit((int)*p) || *p == '+' || *p == '-' || *p == '.'); p++); + isrelative = strncmp(p, "://", strlen("://")); + if(isrelative) { /* relative link (baseurl is used). */ + if((ebaseproto = strstr(baseurl, "://"))) { + ebaseproto += strlen("://"); + fwrite(baseurl, 1, ebaseproto - baseurl, fp); + } else { + ebaseproto = baseurl; + if(*baseurl || (link[0] == '/' && link[1] == '/')) + fputs("http://", fp); + } + if(link[0] == '/') { /* relative to baseurl domain (not path). */ + if(link[1] == '/') /* absolute url but with protocol from baseurl. */ + link += 2; + else if((ebasedomain = strchr(ebaseproto, '/'))) /* relative to baseurl and baseurl path. */ + fwrite(ebaseproto, 1, ebasedomain - ebaseproto, fp); + else + fputs(ebaseproto, stdout); + } else if((ebasedomain = strrchr(ebaseproto, '/'))) /* relative to baseurl and baseurl path. */ + fwrite(ebaseproto, 1, ebasedomain - ebaseproto + 1, fp); + else { + fputs(ebaseproto, fp); + if(*baseurl && *link) + fputc('/', fp); + } + } + fputs(link, fp); +} + +unsigned int +parseline(char **line, size_t *size, char **fields, unsigned int maxfields, int separator, FILE *fp) { + unsigned int i = 0; + char *prev, *s; + + if(afgets(line, size, fp)) { + for(prev = *line; (s = strchr(prev, separator)) && i <= maxfields; i++) { + *s = '\0'; /* null terminate string. */ + fields[i] = prev; + prev = s + 1; + } + fields[i] = prev; + for(i++; i < maxfields; i++) /* make non-parsed fields empty. */ + fields[i] = ""; + } + return i; +} + +/* print feed name for id; spaces and tabs in string as "-" (spaces in anchors are not valid). */ +void +printfeednameid(const char *s, FILE *fp) { + for(; *s; s++) + fputc(isspace((int)*s) ? '-' : tolower((int)*s), fp); +} + +void +printhtmlencoded(const char *s, FILE *fp) { + for(; *s; s++) { + switch(*s) { + case '<': fputs("&lt;", fp); break; + case '>': fputs("&gt;", fp); break; +/* case '&': fputs("&amp;", fp); break;*/ + default: + fputc(*s, fp); + } + } +} + +void +feedsfree(struct feed *f) { + struct feed *next = NULL; + + for(; f; f = next) { + next = f->next; + /*f->next = NULL;*/ + free(f->name); + /*f->name = NULL;*/ + free(f); + } +} diff --git a/util.h b/util.h @@ -0,0 +1,26 @@ +#include <time.h> + +/* feed info */ +struct feed { + char *name; /* feed name */ + unsigned long totalnew; /* amount of new items per feed */ + unsigned long total; /* total items */ + time_t timenewest; + char timenewestformat[64]; + struct feed *next; /* linked list */ +}; + +enum { FieldUnixTimestamp = 0, FieldTimeFormatted, FieldTitle, FieldLink, + FieldContent, FieldContentType, FieldId, FieldAuthor, FieldFeedType, + FieldFeedName, FieldFeedUrl, FieldBaseSiteUrl, FieldLast }; + +#undef strlcpy +size_t strlcpy(char *, const char *, size_t); + +char * afgets(char **p, size_t *size, FILE *fp); +void feedsfree(struct feed *f); +unsigned int parseline(char **line, size_t *size, char **fields, + unsigned int maxfields, int separator, FILE *fp); +void printfeednameid(const char *s, FILE *fp); +void printhtmlencoded(const char *s, FILE *fp); +void printlink(const char *link, const char *baseurl, FILE *fp); diff --git a/xml.c b/xml.c @@ -2,8 +2,8 @@ #include <string.h> #include <stdlib.h> #include <ctype.h> -#include "xml.h" +#include "xml.h" void xmlparser_init(XMLParser *x) { @@ -22,76 +22,19 @@ xmlparser_getnext(XMLParser *x) { } __inline__ void -xmlparser_parseattrvalue(XMLParser *x, const char *name, size_t namelen, int end) { - size_t valuelen = 0; - int c; - - if(x->xmlattrstart) - x->xmlattrstart(x, x->tag, x->taglen, name, namelen); - for(valuelen = 0; (c = xmlparser_getnext(x)) != EOF;) { - if(c == '&' && x->xmlattrentity) { /* entities */ - x->data[valuelen] = '\0'; - /* call data function with data before entity if there is data */ - if(valuelen && x->xmlattr) - x->xmlattr(x, x->tag, x->taglen, name, namelen, x->data, valuelen); - x->data[0] = c; - valuelen = 1; - while((c = xmlparser_getnext(x)) != EOF) { - if(c == end) - goto parseattrvalueend; - if(valuelen < sizeof(x->data) - 1) - x->data[valuelen++] = c; - else { /* TODO: entity too long? this should be very strange. */ - x->data[valuelen] = '\0'; - if(x->xmlattr) - x->xmlattr(x, x->tag, x->taglen, name, namelen, x->data, valuelen); - valuelen = 0; /* TODO: incorrect ? ';' is read in c below? */ -/* x->data[0] = '\0'; */ - break; - } - if(c == ';') { - x->data[valuelen] = '\0'; - x->xmlattrentity(x, x->tag, x->taglen, name, namelen, x->data, valuelen); - valuelen = 0; /* TODO: incorrect ? ';' is read in c below? */ - break; - } - } - } else if(c == end) { /* TODO: ugly, remove goto?, simplify? duplicate code. */ -parseattrvalueend: - x->data[valuelen] = '\0'; - if(x->xmlattr) - x->xmlattr(x, x->tag, x->taglen, name, namelen, x->data, valuelen); - if(x->xmlattrend) - x->xmlattrend(x, x->tag, x->taglen, name, namelen); - return; - } else { - if(valuelen < sizeof(x->data) - 1) { - x->data[valuelen++] = c; - } else { - x->data[valuelen] = '\0'; - if(x->xmlattr) - x->xmlattr(x, x->tag, x->taglen, name, namelen, x->data, valuelen); - x->data[0] = c; - valuelen = 1; - } - } - } -} - -__inline__ void -xmlparser_parseattrs(XMLParser *x, int *isshorttag) { - size_t namelen = 0; - int c, endname = 0; +xmlparser_parseattrs(XMLParser *x) { + size_t namelen = 0, valuelen; + int c, endsep, endname = 0; while((c = xmlparser_getnext(x)) != EOF) { - if(isspace(c)) { - if(namelen) /* Do nothing */ + if(isspace(c)) { /* TODO: simplify endname ? */ + if(namelen) /* do nothing */ endname = 1; else continue; } - if(c == '?' && isspace(c)) { /* Do nothing */ - } else if(c == '=') { + if(c == '?'); /* ignore */ + else if(c == '=') { x->name[namelen] = '\0'; } else if(namelen && ((endname && isalpha(c)) || (c == '>' || c == '/'))) { /* attribute without value */ @@ -107,7 +50,56 @@ xmlparser_parseattrs(XMLParser *x, int *isshorttag) { namelen = 1; } else if(namelen && (c == '\'' || c == '"')) { /* attribute with value */ - xmlparser_parseattrvalue(x, x->name, namelen, c); + endsep = c; /* c is end separator */ + if(x->xmlattrstart) + x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen); + for(valuelen = 0; (c = xmlparser_getnext(x)) != EOF;) { + if(c == '&' && x->xmlattrentity) { /* entities */ + x->data[valuelen] = '\0'; + /* call data function with data before entity if there is data */ + if(valuelen && x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); + x->data[0] = c; + valuelen = 1; + while((c = xmlparser_getnext(x)) != EOF) { + if(c == endsep) + break; + if(valuelen < sizeof(x->data) - 1) + x->data[valuelen++] = c; + else { /* TODO: entity too long? this should be very strange. */ + x->data[valuelen] = '\0'; + if(x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); + valuelen = 0; + break; + } + if(c == ';') { + x->data[valuelen] = '\0'; + x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); + valuelen = 0; + break; + } + } + } else if(c != endsep) { + if(valuelen < sizeof(x->data) - 1) { + x->data[valuelen++] = c; + } else { + x->data[valuelen] = '\0'; + if(x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); + x->data[0] = c; + valuelen = 1; + } + } + if(c == endsep) { + x->data[valuelen] = '\0'; + if(x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); + if(x->xmlattrend) + x->xmlattrend(x, x->tag, x->taglen, x->name, namelen); + break; + } + } namelen = 0; endname = 0; } else if(namelen < sizeof(x->name) - 1) @@ -115,7 +107,7 @@ xmlparser_parseattrs(XMLParser *x, int *isshorttag) { if(c == '>') { break; } else if(c == '/') { - *isshorttag = 1; + x->isshorttag = 1; namelen = 0; x->name[0] = '\0'; } @@ -133,15 +125,12 @@ xmlparser_parsecomment(XMLParser *x) { if(c == '-' && i < 2) i++; else if(c == '>') { - if(i == 2) { /* (!memcmp(cd, "-->", strlen("-->"))) { */ + if(i == 2) { /* -- */ if(datalen >= 2) { datalen -= 2; x->data[datalen] = '\0'; if(x->xmlcomment) x->xmlcomment(x, x->data, datalen); -/* } else { - datalen = 0; - x->data[datalen] = '\0';*/ } if(x->xmlcommentend) x->xmlcommentend(x); @@ -149,9 +138,9 @@ xmlparser_parsecomment(XMLParser *x) { } i = 0; } - if(datalen < sizeof(x->data) - 1) { /* || (c == '-' && d >= sizeof(x->data) - 4)) { */ /* TODO: what if the end has --, and its cut on the boundary, test this. */ + if(datalen < sizeof(x->data) - 1) /* || (c == '-' && d >= sizeof(x->data) - 4)) { */ /* TODO: what if the end has --, and its cut on the boundary, test this. */ x->data[datalen++] = c; - } else { + else { x->data[datalen] = '\0'; if(x->xmlcomment) x->xmlcomment(x, x->data, datalen); @@ -161,6 +150,13 @@ xmlparser_parsecomment(XMLParser *x) { } } +/* TODO: + * <test><![CDATA[1234567dddd8]]]> + * + * with x->data of sizeof(15) gives 2 ] at end of cdata, should be 1 + * test comment function too for similar bug? + * + */ __inline__ void xmlparser_parsecdata(XMLParser *x) { size_t datalen = 0, i = 0; @@ -172,15 +168,12 @@ xmlparser_parsecdata(XMLParser *x) { if(c == ']' && i < 2) { i++; } else if(c == '>') { - if(i == 2) { /* (!memcmp(cd, "]]", strlen("]]"))) { */ + if(i == 2) { /* ]] */ if(datalen >= 2) { datalen -= 2; x->data[datalen] = '\0'; - if(x->xmlcdata) + if(x->xmlcdata && datalen) x->xmlcdata(x, x->data, datalen); -/* } else { - datalen = 0; - x->data[datalen] = '\0';*/ } if(x->xmlcdataend) x->xmlcdataend(x); @@ -200,130 +193,122 @@ xmlparser_parsecdata(XMLParser *x) { } } -__inline__ void -xmlparser_parsetag(XMLParser *x) { - size_t datalen, taglen; - int c, s, isshorttag = 0; - - x->tag[0] = '\0'; - x->taglen = 0; - while((c = xmlparser_getnext(x)) != EOF && isspace(c)); - if(c == '!') { - for(datalen = 0; (c = xmlparser_getnext(x)) != EOF;) { - if(datalen <= strlen("[CDATA[")) /* if(d < sizeof(x->data)) */ - x->data[datalen++] = c; /* TODO: prevent overflow */ - if(c == '>') - break; - else if(c == '-' && datalen == strlen("--") && - (x->data[0] == '-')) { /* comment */ /* TODO: optimize this bitch */ - xmlparser_parsecomment(x); - break; - } else if(c == '[' && datalen == strlen("[CDATA[") && - x->data[1] == 'C' && x->data[2] == 'D' && - x->data[3] == 'A' && x->data[4] == 'T' && - x->data[5] == 'A' && x->data[6] == '[') { /* cdata */ - xmlparser_parsecdata(x); - break; - } - } - } else if(c == '?') { - while((c = xmlparser_getnext(x)) != EOF) { - if(c == '"' || c == '\'') - for(s = c; (c = xmlparser_getnext(x)) != EOF && c != s;); - else if(c == '>') - break; - } - /* TODO: find out why checking isalpha(c) gives "not enough memory" - * also check if maybe when there is << or <> it might go into an infinite loop (unsure) */ - } else if(c != EOF && c != '>') { /* TODO: optimize and put above the other conditions ? */ - x->tag[0] = c; - taglen = 1; - while((c = xmlparser_getnext(x)) != EOF) { - if(c == '/') - isshorttag = 1; /* short tag */ - else if(c == '>' || isspace(c)) { - x->tag[taglen] = '\0'; - if(x->tag[0] == '/') { /* end tag, starts with </ */ - x->taglen = --taglen; /* len -1 because of / */ - if(x->xmltagend) - x->xmltagend(x, &(x->tag)[1], x->taglen, 0); - } else { - x->taglen = taglen; - if(x->xmltagstart) - x->xmltagstart(x, x->tag, x->taglen); /* start tag */ - if(isspace(c)) - xmlparser_parseattrs(x, &isshorttag); - if(x->xmltagstartparsed) - x->xmltagstartparsed(x, x->tag, x->taglen, isshorttag); - } - if(isshorttag && x->xmltagend) - x->xmltagend(x, x->tag, x->taglen, 1); - break; - } else if(taglen < sizeof(x->tag) - 1) - x->tag[taglen++] = c; - } - } -} - void -xmlparser_parsedata(XMLParser *x, int c) { /* TODO: remove int c, ugly */ - size_t datalen = 0; +xmlparser_parse(XMLParser *x) { + int c, ispi; + size_t datalen, tagdatalen, taglen; - if(x->xmldatastart) - x->xmldatastart(x); - do { - if(c == '&' && x->xmldataentity) { /* TODO: test this, entity handler */ - x->data[datalen] = '\0'; - x->xmldata(x, x->data, datalen); - x->data[0] = c; - datalen = 1; - while((c = xmlparser_getnext(x)) != EOF) { - if(c == '<') - goto parsedataend; - if(datalen < sizeof(x->data) - 1) - x->data[datalen++] = c; + while((c = xmlparser_getnext(x)) != EOF && c != '<'); /* skip until < */ + + while(c != EOF) { + if(c == '<') { /* parse tag */ + if((c = xmlparser_getnext(x)) == EOF) + return; + x->tag[0] = '\0'; + x->taglen = 0; + if(c == '!') { /* cdata and comments */ + for(tagdatalen = 0; (c = xmlparser_getnext(x)) != EOF;) { + if(tagdatalen <= strlen("[CDATA[")) /* if(d < sizeof(x->data)) */ + x->data[tagdatalen++] = c; /* TODO: prevent overflow */ + if(c == '>') + break; + else if(c == '-' && tagdatalen == strlen("--") && + (x->data[0] == '-')) { /* comment */ + xmlparser_parsecomment(x); + break; + } else if(c == '[') { + if(tagdatalen == strlen("[CDATA[") && + x->data[1] == 'C' && x->data[2] == 'D' && + x->data[3] == 'A' && x->data[4] == 'T' && + x->data[5] == 'A' && x->data[6] == '[') { /* cdata */ + xmlparser_parsecdata(x); + break; + } else { + /* markup declaration section */ + while((c = xmlparser_getnext(x)) != EOF && c != ']'); + } + } + } + } else { /* normal tag (open, short open, close), processing instruction. */ if(isspace(c)) - break; - else if(c == ';') { + while((c = xmlparser_getnext(x)) != EOF && isspace(c)); + if(c == EOF) + return; + x->tag[0] = c; + ispi = (c == '?') ? 1 : 0; + x->isshorttag = ispi; + taglen = 1; + while((c = xmlparser_getnext(x)) != EOF) { + if(c == '/') /* TODO: simplify short tag? */ + x->isshorttag = 1; /* short tag */ + else if(c == '>' || isspace(c)) { + x->tag[taglen] = '\0'; + if(x->tag[0] == '/') { /* end tag, starts with </ */ + x->taglen = --taglen; /* len -1 because of / */ + if(taglen && x->xmltagend) + x->xmltagend(x, &(x->tag)[1], x->taglen, 0); + } else { + x->taglen = taglen; + if(x->xmltagstart) + x->xmltagstart(x, x->tag, x->taglen); /* start tag */ + if(isspace(c)) + xmlparser_parseattrs(x); + if(x->xmltagstartparsed) + x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag); + } + if((x->isshorttag || ispi) && x->xmltagend) /* call tagend for shortform or processing instruction */ + x->xmltagend(x, x->tag, x->taglen, 1); + break; + } else if(taglen < sizeof(x->tag) - 1) + x->tag[taglen++] = c; + } + } + } else { /* parse data */ + datalen = 0; + if(x->xmldatastart) + x->xmldatastart(x); + while((c = xmlparser_getnext(x)) != EOF) { + if(c == '&' && x->xmldataentity) { + if(datalen) { + x->data[datalen] = '\0'; + x->xmldata(x, x->data, datalen); + } + x->data[0] = c; + datalen = 1; + while((c = xmlparser_getnext(x)) != EOF) { + if(c == '<') + break; + if(datalen < sizeof(x->data) - 1) + x->data[datalen++] = c; + if(isspace(c)) + break; + else if(c == ';') { + x->data[datalen] = '\0'; + x->xmldataentity(x, x->data, datalen); + datalen = 0; + break; + } + } + } else if(c != '<') { + if(datalen < sizeof(x->data) - 1) { + x->data[datalen++] = c; + } else { + x->data[datalen] = '\0'; + if(x->xmldata) + x->xmldata(x, x->data, datalen); + x->data[0] = c; + datalen = 1; + } + } + if(c == '<') { x->data[datalen] = '\0'; - x->xmldataentity(x, x->data, datalen); - datalen = 0; + if(x->xmldata && datalen) + x->xmldata(x, x->data, datalen); + if(x->xmldataend) + x->xmldataend(x); break; } } - } else if(c == '<') { /* TODO: ugly, remove goto ? simplify? duplicate code. */ -parsedataend: - x->data[datalen] = '\0'; - if(x->xmldata) - x->xmldata(x, x->data, datalen); - if(x->xmldataend) - x->xmldataend(x); - break; - } else { - if(datalen < sizeof(x->data) - 1) { - x->data[datalen++] = c; - } else { - x->data[datalen] = '\0'; - if(x->xmldata) - x->xmldata(x, x->data, datalen); - x->data[0] = c; - datalen = 1; - } - } - } while((c = xmlparser_getnext(x)) != EOF); -} - -void -xmlparser_parse(XMLParser *x) { - int c; - - while((c = xmlparser_getnext(x)) != EOF) { - if(c == '<') /* tag */ - xmlparser_parsetag(x); - else { - xmlparser_parsedata(x, c); - xmlparser_parsetag(x); } } - return; } diff --git a/xml.h b/xml.h @@ -22,9 +22,11 @@ typedef struct xmlparser { void (*xmlcomment)(struct xmlparser *p, const char *comment, size_t commentlen); void (*xmlcommentend)(struct xmlparser *p); - FILE *fp; /* stream to read from */ + FILE *fp; /* file stream to read from */ + /* private; internal state */ char tag[1024]; /* current tag */ + int isshorttag; /* current tag is in short form ? */ size_t taglen; char name[256]; /* current attribute name */ char data[BUFSIZ]; /* data buffer used for tag and attribute data */