commit d8b0c45812890670943becd45383f75d57056e52
parent 1fa71087c9d754b687d52059ee88ca82b45ec1eb
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Mon, 31 Mar 2014 22:46:58 +0200
new version
lots of things changed, but cleanup todo. changelog and consistent stream of small updates will come in the future.
Signed-off-by: Hiltjo Posthuma <hiltjo@codemadness.org>
Diffstat:
M | CHANGELOG | | | 10 | +++++++++- |
M | LICENSE | | | 9 | ++++++--- |
M | Makefile | | | 63 | +++++++++++++++++++++++++++++++++++++++++---------------------- |
M | README | | | 21 | +-------------------- |
D | common.c | | | 128 | ------------------------------------------------------------------------------- |
D | common.h | | | 18 | ------------------ |
D | compat.c | | | 41 | ----------------------------------------- |
D | compat.h | | | 17 | ----------------- |
M | config.mk | | | 6 | +++--- |
M | sfeed.c | | | 305 | ++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------- |
M | sfeed_frames.c | | | 344 | ++++++++++++++++++++++++++++++++++++++++++++++--------------------------------- |
M | sfeed_html.c | | | 76 | ++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------ |
M | sfeed_opml_import.c | | | 20 | ++++++++++---------- |
M | sfeed_plain.c | | | 14 | +++++++------- |
A | sfeed_stats.c | | | 91 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | sfeed_update | | | 67 | ++++++++++++++++++++++++++++++++++++++----------------------------- |
M | sfeed_update.1 | | | 6 | +++--- |
A | sfeed_web.c | | | 72 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | sfeed_xmlenc.c | | | 45 | +++++++++++++++++++++++++++++++++++++++++++++ |
M | sfeedrc.example | | | 2 | +- |
M | style.css | | | 11 | +++++++++++ |
A | util.c | | | 202 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | util.h | | | 26 | ++++++++++++++++++++++++++ |
M | xml.c | | | 375 | ++++++++++++++++++++++++++++++++++++++----------------------------------------- |
M | xml.h | | | 4 | +++- |
25 files changed, 1203 insertions(+), 770 deletions(-)
diff --git a/CHANGELOG b/CHANGELOG
@@ -3,9 +3,17 @@ v0.9
Features:
---------
+
+ * Feeds are now by default updated in parallel for a huge speedup in performance.
+ * Added hotkeys to sfeed_html to toggle showing only new items (n key) and
+ focusing the menu (s key) or items (i key) using a tiny bit of
+ javascript.
+ * Auto-detect XML encoding.
+
+
* Added sfeed_frames, a formatting program to output feeds as a HTML file with
frames. It's optimized to look good in older browsers that don't necesarily
- support CSS or modern HTML like links. See the man page for more details.
+ support CSS or modern HTML, like links. See the man page for more details.
* Removed the dependency on libexpat, using a custom XML parser (xml.*),
this parser is also non-validating, it will not check the XML for errors
or stop parsing if it contains errors (which is good).
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
MIT/X Consortium License
-© 2011-2013 Hiltjo Posthuma <hiltjo@codemadness.org>
+© 2011-214 Hiltjo Posthuma <hiltjo@codemadness.org>
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
@@ -21,9 +21,12 @@ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
-tm_to_time and str*case functions from the Musl project, it's license is:
+tmtotime and some libc functions:
+str*case, strlcpy and macros in compat.c
-Copyright © 2005-2012 Rich Felker
+from the Musl project, it's license is:
+
+Copyright © 2005-2013 Rich Felker
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
diff --git a/Makefile b/Makefile
@@ -3,10 +3,12 @@
include config.mk
NAME = sfeed
-SRC = sfeed.c sfeed_plain.c sfeed_html.c sfeed_opml_import.c xml.c sfeed_frames.c common.c compat.c
+SRC = sfeed.c sfeed_plain.c sfeed_html.c sfeed_opml_import.c sfeed_frames.c \
+ sfeed_xmlenc.c sfeed_web.c xml.c
OBJ = ${SRC:.c=.o}
-all: options sfeed sfeed_plain sfeed_html sfeed_opml_import sfeed_frames
+all: options sfeed sfeed_plain sfeed_html sfeed_opml_import sfeed_frames \
+ sfeed_xmlenc sfeed_web
options:
@echo ${NAME} build options:
@@ -20,37 +22,46 @@ options:
${OBJ}: config.mk
-sfeed: sfeed.o xml.o compat.o
+sfeed: sfeed.o xml.o util.o
@echo CC -o $@
- @${CC} -o $@ sfeed.o xml.o compat.o ${LDFLAGS}
+ @${CC} -o $@ sfeed.o xml.o util.o ${LDFLAGS}
-sfeed_opml_import: sfeed_opml_import.o xml.o compat.o
+sfeed_opml_import: sfeed_opml_import.o xml.o
@echo CC -o $@
- @${CC} -o $@ sfeed_opml_import.o xml.o compat.o ${LDFLAGS}
+ @${CC} -o $@ sfeed_opml_import.o xml.o ${LDFLAGS}
-sfeed_plain: sfeed_plain.o common.o compat.o
+sfeed_plain: sfeed_plain.o util.o
@echo CC -o $@
- @${CC} -o $@ sfeed_plain.o common.o compat.o ${LDFLAGS}
+ @${CC} -o $@ sfeed_plain.o util.o ${LDFLAGS}
-sfeed_html: sfeed_html.o common.o compat.o
+sfeed_html: sfeed_html.o util.o
@echo CC -o $@
- @${CC} -o $@ sfeed_html.o common.o compat.o ${LDFLAGS}
+ @${CC} -o $@ sfeed_html.o util.o ${LDFLAGS}
-sfeed_frames: sfeed_frames.o common.o compat.o
+sfeed_frames: sfeed_frames.o util.o
@echo CC -o $@
- @${CC} -o $@ sfeed_frames.o common.o compat.o ${LDFLAGS}
+ @${CC} -o $@ sfeed_frames.o util.o ${LDFLAGS}
+
+sfeed_xmlenc: sfeed_xmlenc.o xml.o
+ @echo CC -o $@
+ @${CC} -o $@ sfeed_xmlenc.o xml.o ${LDFLAGS}
+
+sfeed_web: sfeed_web.o xml.o util.o
+ @echo CC -o $@
+ @${CC} -o $@ sfeed_web.o xml.o util.o ${LDFLAGS}
clean:
@echo cleaning
- @rm -f sfeed sfeed_plain sfeed_html sfeed_frames sfeed_opml_import ${OBJ} ${NAME}-${VERSION}.tar.gz
+ @rm -f sfeed sfeed_plain sfeed_html sfeed_frames sfeed_opml_import \
+ ${OBJ} ${NAME}-${VERSION}.tar.gz
dist: clean
@echo creating dist tarball
@mkdir -p ${NAME}-${VERSION}
@cp -R CHANGELOG LICENSE Makefile README config.mk \
- TODO CREDITS sfeedrc.example style.css ${SRC} common.c sfeed_update sfeed_opml_export \
+ TODO CREDITS sfeedrc.example style.css ${SRC} sfeed_update \
sfeed.1 sfeed_update.1 sfeed_plain.1 sfeed_html.1 sfeed_opml_import.1 \
- sfeed_frames.c sfeed_frames.1 sfeed_opml_export.1 ${NAME}-${VERSION}
+ sfeed_frames.1 sfeed_opml_export sfeed_opml_export.1 ${NAME}-${VERSION}
@tar -cf ${NAME}-${VERSION}.tar ${NAME}-${VERSION}
@gzip ${NAME}-${VERSION}.tar
@rm -rf ${NAME}-${VERSION}
@@ -58,13 +69,14 @@ dist: clean
install: all
@echo installing executable file to ${DESTDIR}${PREFIX}/bin
@mkdir -p ${DESTDIR}${PREFIX}/bin
- @cp -f sfeed sfeed_update sfeed_plain sfeed_html sfeed_frames \
+ @cp -f sfeed sfeed_update sfeed_plain sfeed_html sfeed_frames sfeed_xmlenc \
sfeed_opml_import sfeed_opml_export ${DESTDIR}${PREFIX}/bin
@chmod 755 ${DESTDIR}${PREFIX}/bin/sfeed \
${DESTDIR}${PREFIX}/bin/sfeed_update \
${DESTDIR}${PREFIX}/bin/sfeed_plain \
${DESTDIR}${PREFIX}/bin/sfeed_html \
${DESTDIR}${PREFIX}/bin/sfeed_frames \
+ ${DESTDIR}${PREFIX}/bin/sfeed_xmlenc \
${DESTDIR}${PREFIX}/bin/sfeed_opml_import \
${DESTDIR}${PREFIX}/bin/sfeed_opml_export
@mkdir -p ${DESTDIR}${PREFIX}/share/sfeed
@@ -73,12 +85,18 @@ install: all
@echo installing manual pages to ${DESTDIR}${MANPREFIX}/man1
@mkdir -p ${DESTDIR}${MANPREFIX}/man1
@sed "s/VERSION/${VERSION}/g" < sfeed.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed.1
- @sed "s/VERSION/${VERSION}/g" < sfeed_update.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_update.1
- @sed "s/VERSION/${VERSION}/g" < sfeed_plain.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1
- @sed "s/VERSION/${VERSION}/g" < sfeed_html.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_html.1
- @sed "s/VERSION/${VERSION}/g" < sfeed_frames.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_frames.1
- @sed "s/VERSION/${VERSION}/g" < sfeed_opml_import.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_import.1
- @sed "s/VERSION/${VERSION}/g" < sfeed_opml_export.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_export.1
+ @sed "s/VERSION/${VERSION}/g" < sfeed_update.1 > \
+ ${DESTDIR}${MANPREFIX}/man1/sfeed_update.1
+ @sed "s/VERSION/${VERSION}/g" < sfeed_plain.1 > \
+ ${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1
+ @sed "s/VERSION/${VERSION}/g" < sfeed_html.1 > \
+ ${DESTDIR}${MANPREFIX}/man1/sfeed_html.1
+ @sed "s/VERSION/${VERSION}/g" < sfeed_frames.1 > \
+ ${DESTDIR}${MANPREFIX}/man1/sfeed_frames.1
+ @sed "s/VERSION/${VERSION}/g" < sfeed_opml_import.1 > \
+ ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_import.1
+ @sed "s/VERSION/${VERSION}/g" < sfeed_opml_export.1 > \
+ ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_export.1
@chmod 644 ${DESTDIR}${MANPREFIX}/man1/sfeed.1 \
${DESTDIR}${MANPREFIX}/man1/sfeed_update.1 \
${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1 \
@@ -94,6 +112,7 @@ uninstall:
${DESTDIR}${PREFIX}/bin/sfeed_plain \
${DESTDIR}${PREFIX}/bin/sfeed_html \
${DESTDIR}${PREFIX}/bin/sfeed_frames \
+ ${DESTDIR}${PREFIX}/bin/sfeed_xmlenc \
${DESTDIR}${PREFIX}/bin/sfeed_opml_import \
${DESTDIR}${PREFIX}/bin/sfeed_opml_export \
${DESTDIR}${PREFIX}/share/${NAME}/sfeedrc.example \
diff --git a/README b/README
@@ -1,4 +1,4 @@
-sfeed v0.8
+sfeed v0.9
----------
Simple RSS and Atom parser (and some format programs).
@@ -144,25 +144,6 @@ gawk -F '\t' 'BEGIN {
mv feeds.clean feeds
-Common errors and solutions
----------------------------
-
-If you execute sfeed_update and see the error:
-
- "sfeed: error parsing xml not well-formed (invalid token) at line <linenumber>
- column <column>"
-
-it's possible sfeed is trying to parse a feed which is non-UTF8 encoded. You can
-specify the encoding in your sfeedrc file so it will be converted to UTF-8
-using iconv, for example for iso-8859-1 change:
-
- feed "feedname" "feedurl" "baseurl"
-
-to:
-
- feed "feedname" "feedurl" "baseurl" "iso-8859-1"
-
-
License
-------
diff --git a/common.c b/common.c
@@ -1,128 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <time.h>
-#include <ctype.h>
-#include "common.h"
-
-char *
-afgets(char **p, size_t *size, FILE *fp) {
- char buf[BUFSIZ], *alloc = NULL;
- size_t n, len = 0, allocsiz;
- int end = 0;
-
- while(fgets(buf, sizeof(buf), fp)) {
- n = strlen(buf);
- if(buf[n - 1] == '\n') { /* dont store newlines. */
- buf[n - 1] = '\0';
- n--;
- end = 1; /* newline found, end */
- }
- len += n;
- allocsiz = len + 1;
- if(allocsiz > *size) {
- if((alloc = realloc(*p, allocsiz))) {
- *p = alloc;
- *size = allocsiz;
- } else {
- free(*p);
- *p = NULL;
- fputs("error: could not realloc\n", stderr);
- exit(EXIT_FAILURE);
- return NULL;
- }
- }
- strncpy((*p + (len - n)), buf, n);
- if(end || feof(fp))
- break;
- }
- if(*p && len > 0) {
- (*p)[len] = '\0';
- return *p;
- }
- return NULL;
-}
-
-void /* print link; if link is relative use baseurl to make it absolute */
-printlink(const char *link, const char *baseurl, FILE *fp) {
- const char *ebaseproto, *ebasedomain, *p;
- int isrelative;
-
- /* protocol part */
- for(p = link; *p && (isalpha((int)*p) || isdigit((int)*p) || *p == '+' || *p == '-' || *p == '.'); p++);
- isrelative = strncmp(p, "://", strlen("://"));
- if(isrelative) { /* relative link (baseurl is used). */
- if((ebaseproto = strstr(baseurl, "://"))) {
- ebaseproto += strlen("://");
- fwrite(baseurl, 1, ebaseproto - baseurl, fp);
- } else {
- ebaseproto = baseurl;
- if(*baseurl || (link[0] == '/' && link[1] == '/'))
- fputs("http://", fp);
- }
- if(link[0] == '/') { /* relative to baseurl domain (not path). */
- if(link[1] == '/') /* absolute url but with protocol from baseurl. */
- link += 2;
- else if((ebasedomain = strchr(ebaseproto, '/'))) /* relative to baseurl and baseurl path. */
- fwrite(ebaseproto, 1, ebasedomain - ebaseproto, fp);
- else
- fputs(ebaseproto, stdout);
- } else if((ebasedomain = strrchr(ebaseproto, '/'))) /* relative to baseurl and baseurl path. */
- fwrite(ebaseproto, 1, ebasedomain - ebaseproto + 1, fp);
- else {
- fputs(ebaseproto, fp);
- if(*baseurl && *link)
- fputc('/', fp);
- }
- }
- fputs(link, fp);
-}
-
-unsigned int
-parseline(char **line, size_t *size, char **fields, unsigned int maxfields, int separator, FILE *fp) {
- unsigned int i = 0;
- char *prev, *s;
-
- if(afgets(line, size, fp)) {
- for(prev = *line; (s = strchr(prev, separator)) && i <= maxfields; i++) {
- *s = '\0'; /* null terminate string. */
- fields[i] = prev;
- prev = s + 1;
- }
- fields[i] = prev;
- for(i++; i < maxfields; i++) /* make non-parsed fields empty. */
- fields[i] = "";
- }
- return i;
-}
-
-/* print feed name for id; spaces and tabs in string as "-" (spaces in anchors are not valid). */
-void
-printfeednameid(const char *s, FILE *fp) {
- for(; *s; s++)
- fputc(isspace((int)*s) ? '-' : tolower((int)*s), fp);
-}
-
-void
-printhtmlencoded(const char *s, FILE *fp) {
- for(; *s; s++) {
- switch(*s) {
- case '<': fputs("<", fp); break;
- case '>': fputs(">", fp); break;
-/* case '&': fputs("&", fp); break;*/
- default:
- fputc(*s, fp);
- }
- }
-}
-
-void
-feedsfree(struct feed *f) {
- struct feed *next;
- while(f) {
- next = f->next;
- free(f->name);
- free(f);
- f = next;
- }
-}
diff --git a/common.h b/common.h
@@ -1,18 +0,0 @@
-/* Feed info. */
-struct feed {
- char *name; /* feed name */
- unsigned long totalnew; /* amount of new items per feed */
- unsigned long total; /* total items */
- struct feed *next; /* linked list */
-};
-
-enum { FieldUnixTimestamp = 0, FieldTimeFormatted, FieldTitle, FieldLink,
- FieldContent, FieldContentType, FieldId, FieldAuthor, FieldFeedType,
- FieldFeedName, FieldFeedUrl, FieldBaseSiteUrl, FieldLast };
-
-char * afgets(char **p, size_t *size, FILE *fp);
-void feedsfree(struct feed *f);
-unsigned int parseline(char **line, size_t *size, char **fields, unsigned int maxfields, int separator, FILE *fp);
-void printfeednameid(const char *s, FILE *fp);
-void printhtmlencoded(const char *s, FILE *fp);
-void printlink(const char *link, const char *baseurl, FILE *fp);
diff --git a/compat.c b/compat.c
@@ -1,41 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-int
-xstrcasecmp(const char *_l, const char *_r) {
- const unsigned char *l = (void *)_l, *r = (void *)_r;
- for(; *l && *r && (*l == *r || tolower(*l) == tolower(*r)); l++, r++);
- return tolower(*l) - tolower(*r);
-}
-
-int
-xstrncasecmp(const char *_l, const char *_r, size_t n) {
- const unsigned char *l=(void *)_l, *r=(void *)_r;
- if(!n--)
- return 0;
- for(; *l && *r && n && (*l == *r || tolower(*l) == tolower(*r)); l++, r++, n--);
- return tolower(*l) - tolower(*r);
-}
-
-void *
-xstrdup(const char *s) {
- size_t len = strlen(s) + 1;
- void *p = malloc(len);
- if(p)
- memcpy(p, s, len);
- return p;
-}
-
-int
-xmkdir(const char *path, mode_t mode) {
-/* TODO: fix for mingw */
-#if MINGW
- return mkdir(path);
-#else
- return mkdir(path, mode);
-#endif
-}
diff --git a/compat.h b/compat.h
@@ -1,17 +0,0 @@
-#if 1
-#include <strings.h>
-#include <string.h>
-#define xstrcasecmp strcasecmp
-#define xstrncasecmp strncasecmp
-#else
-int xstrcasecmp(const char *s1, const char *s2);
-int xstrncasecmp(const char *s1, const char *s2, size_t len);
-#endif
-
-/* non-ansi */
-void * xstrdup(const char *s);
-
-/* for mingw */
-#include <sys/stat.h>
-#include <sys/types.h>
-int xmkdir(const char *path, mode_t mode);
diff --git a/config.mk b/config.mk
@@ -1,5 +1,5 @@
# sfeed version
-VERSION = 0.8
+VERSION = 0.9
# customize below to fit your system
@@ -12,12 +12,12 @@ INCS =
LIBS = -lc
# debug
-#CFLAGS = -fstack-protector-all -O0 -g -ansi -Wall -Wextra -pedantic -DVERSION=\"${VERSION}\"
+CFLAGS = -fstack-protector-all -O0 -g -ansi -Wall -Wextra -pedantic -DVERSION=\"${VERSION}\"
CFLAGS = -O0 -g -ansi -Wall -Wextra -pedantic -DVERSION=\"${VERSION}\"
LDFLAGS = ${LIBS}
# optimized
-#CFLAGS = -O2 -ansi -DVERSION=\"${VERSION}\" -DVERSION=\"${VERSION}\"
+#CFLAGS = -O2 -ansi -DVERSION=\"${VERSION}\"
#LDFLAGS = -s ${LIBS}
# Solaris
diff --git a/sfeed.c b/sfeed.c
@@ -1,10 +1,14 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <strings.h>
#include <time.h>
#include <ctype.h>
+
+#include "util.h"
#include "xml.h"
-#include "compat.h"
+
+#define ISWSNOSPACE(c) (((unsigned)c - '\t') < 5) /* isspace(c) && c != ' ' */
enum { FeedTypeNone = 0, FeedTypeRSS = 1, FeedTypeAtom = 2 };
const char *feedtypes[] = { "", "rss", "atom" };
@@ -12,6 +16,20 @@ const char *feedtypes[] = { "", "rss", "atom" };
enum { ContentTypeNone = 0, ContentTypePlain = 1, ContentTypeHTML = 2 };
const char *contenttypes[] = { "", "plain", "html" };
+const int FieldSeparator = '\t'; /* output field seperator character */
+
+enum {
+ TagUnknown = 0,
+ /* RSS */
+ RSSTagDcdate, RSSTagPubdate, RSSTagTitle,
+ RSSTagLink, RSSTagDescription, RSSTagContentencoded,
+ RSSTagGuid, RSSTagAuthor, RSSTagDccreator,
+ /* Atom */
+ AtomTagPublished, AtomTagUpdated, AtomTagTitle,
+ AtomTagSummary, AtomTagContent,
+ AtomTagId, AtomTagLink, AtomTagAuthor
+};
+
typedef struct string { /* String data / pool */
char *data; /* data */
size_t len; /* string length */
@@ -29,41 +47,29 @@ typedef struct feeditem { /* Feed item */
int feedtype; /* FeedTypeRSS or FeedTypeAtom */
} FeedItem;
-void die(const char *s);
-void cleanup(void);
-
-String *currentfield = NULL; /* TODO */
-const int FieldSeparator = '\t';
-FeedItem feeditem; /* data for current feed item */
-char feeditemtag[256] = ""; /* current tag _inside_ a feeditem */
-size_t feeditemtaglen = 0;
-int feeditemtagid = 0;
-int iscontent = 0;
-int iscontenttag = 0;
-size_t attrcount = 0;
-char *standardtz = NULL; /* TZ variable at start of program */
-XMLParser parser; /* XML parser state */
-
-enum {
- TagUnknown = 0,
- /* RSS */
- RSSTagDcdate, RSSTagPubdate, RSSTagTitle,
- RSSTagLink, RSSTagDescription, RSSTagContentencoded,
- RSSTagGuid, RSSTagAuthor, RSSTagDccreator,
- /* Atom */
- AtomTagPublished, AtomTagUpdated, AtomTagTitle,
- AtomTagSummary, AtomTagContent,
- AtomTagId, AtomTagLink, AtomTagAuthor
-};
-
typedef struct feedtag {
char *name;
size_t namelen;
int id;
} FeedTag;
+static void die(const char *s);
+static void cleanup(void);
+
+static String *currentfield = NULL; /* pointer to current FeedItem field String */
+static FeedItem feeditem; /* data for current feed item */
+static char feeditemtag[256] = ""; /* current tag _inside_ a feeditem */
+static size_t feeditemtaglen = 0;
+static int feeditemtagid = 0; /* unique number for parsed tag (faster comparison) */
+static int iscontent = 0;
+static int iscontenttag = 0;
+static size_t attrcount = 0;
+static char *standardtz = NULL; /* TZ variable at start of program */
+static XMLParser parser; /* XML parser state */
+static char *append = NULL;
+
/* TODO: optimize lookup */
-int
+static int /* unique number for parsed tag (faster comparison) */
gettag(int feedtype, const char *name, size_t namelen) {
/* RSS, alphabetical order */
static FeedTag rsstag[] = {
@@ -91,11 +97,11 @@ gettag(int feedtype, const char *name, size_t namelen) {
{ NULL, 0, -1 }
};
int i, n;
-
+
if(namelen >= 2 && namelen <= 15) {
if(feedtype == FeedTypeRSS) {
for(i = 0; rsstag[i].name; i++) {
- if(!(n = xstrncasecmp(rsstag[i].name, name, rsstag[i].namelen)))
+ if(!(n = strncasecmp(rsstag[i].name, name, rsstag[i].namelen)))
return rsstag[i].id;
/* optimization: it's sorted so nothing after it matches. */
if(n > 0)
@@ -103,7 +109,7 @@ gettag(int feedtype, const char *name, size_t namelen) {
}
} else if(feedtype == FeedTypeAtom) {
for(i = 0; atomtag[i].name; i++) {
- if(!(n = xstrncasecmp(atomtag[i].name, name, atomtag[i].namelen)))
+ if(!(n = strncasecmp(atomtag[i].name, name, atomtag[i].namelen)))
return atomtag[i].id;
/* optimization: it's sorted so nothing after it matches. */
if(n > 0)
@@ -114,8 +120,21 @@ gettag(int feedtype, const char *name, size_t namelen) {
return TagUnknown;
}
-int
-entitytostr(const char *e, char *buffer, size_t bufsiz) {
+static unsigned long
+codepointtoutf8(unsigned long cp) {
+ if(cp >= 0x10000) /* 4 bytes */
+ return 0xf0808080 | ((cp & 0xfc0000) << 6) | ((cp & 0x3f000) << 4) |
+ ((cp & 0xfc0) << 2) | (cp & 0x3f);
+ else if(cp >= 0x00800) /* 3 bytes */
+ return 0xe08080 | ((cp & 0x3f000) << 4) | ((cp & 0xfc0) << 2) |
+ (cp & 0x3f);
+ else if(cp >= 0x80) /* 2 bytes */
+ return 0xc080 | ((cp & 0xfc0) << 2) | (cp & 0x3f);
+ return cp; /* 1 byte */
+}
+
+static int
+namedentitytostr(const char *e, char *buffer, size_t bufsiz) {
/* TODO: optimize lookup? */
char *entities[6][2] = {
{ "<", "<" },
@@ -130,7 +149,7 @@ entitytostr(const char *e, char *buffer, size_t bufsiz) {
return 0;
for(i = 0; entities[i][0]; i++) {
/* NOTE: compares max 7 chars */
- if(!xstrncasecmp(e, entities[i][0], 6)) {
+ if(!strncasecmp(e, entities[i][0], 6)) {
buffer[0] = *(entities[i][1]);
buffer[1] = '\0';
return 1;
@@ -139,7 +158,49 @@ entitytostr(const char *e, char *buffer, size_t bufsiz) {
return 0;
}
-void
+static int
+entitytostr(const char *e, char *buffer, size_t bufsiz) {
+ unsigned long l = 0, cp = 0;
+ if(*e != '&' || bufsiz < 5) /* doesnt start with & */
+ return 0;
+ e++;
+ if(*e == '#') {
+ e++;
+ if(*e == 'x') {
+ e++;
+ l = strtol(e, NULL, 16); /* hex */
+ } else
+ l = strtol(e, NULL, 10); /* decimal */
+ if((cp = codepointtoutf8(l))) {
+ buffer[0] = l & 0xff;
+ buffer[1] = (l >> 8) & 0xff;
+ buffer[2] = (l >> 16) & 0xff;
+ buffer[3] = (l >> 24) & 0xff;
+ buffer[4] = '\0';
+ /* escape whitespace */
+ if(ISWSNOSPACE(buffer[0])) { /* isspace(c) && c != ' ' */
+ if(buffer[0] == '\n') { /* escape newline */
+ buffer[0] = '\\';
+ buffer[1] = 'n';
+ buffer[2] = '\0';
+ } else if(buffer[0] == '\\') { /* escape \ */
+ buffer[0] = '\\';
+ buffer[1] = '\\';
+ buffer[2] = '\0';
+ } else if(buffer[0] == '\t') { /* tab */
+ buffer[0] = '\\';
+ buffer[1] = 't';
+ buffer[2] = '\0';
+ }
+ }
+ }
+ return 1;
+ } else /* named entity */
+ return namedentitytostr(e, buffer, bufsiz);
+ return 0;
+}
+
+static void
string_clear(String *s) {
if(s->data)
s->data[0] = '\0'; /* clear string only; don't free, prevents
@@ -147,7 +208,7 @@ string_clear(String *s) {
s->len = 0;
}
-void
+static void
string_buffer_init(String *s, size_t len) {
if(!(s->data = malloc(len)))
die("can't allocate enough memory");
@@ -155,7 +216,7 @@ string_buffer_init(String *s, size_t len) {
string_clear(s);
}
-void
+static void
string_free(String *s) {
free(s->data);
s->data = NULL;
@@ -163,12 +224,10 @@ string_free(String *s) {
s->len = 0;
}
-int
-string_buffer_expand(String *s, size_t newlen) {
+static int
+string_buffer_realloc(String *s, size_t newlen) {
char *p;
size_t alloclen;
- /* check if allocation is necesary, dont shrink buffer
- should be more than bufsiz ofcourse */
for(alloclen = 16; alloclen <= newlen; alloclen *= 2);
if(!(p = realloc(s->data, alloclen))) {
string_free(s); /* free previous allocation */
@@ -179,18 +238,20 @@ string_buffer_expand(String *s, size_t newlen) {
return s->bufsiz;
}
-void
+static void
string_append(String *s, const char *data, size_t len) {
if(!len || *data == '\0')
return;
+ /* check if allocation is necesary, dont shrink buffer
+ should be more than bufsiz ofcourse */
if(s->len + len > s->bufsiz)
- string_buffer_expand(s, s->len + len);
+ string_buffer_realloc(s, s->len + len);
memcpy(s->data + s->len, data, len);
s->len += len;
s->data[s->len] = '\0';
}
-void /* cleanup parser, free allocated memory, etc */
+static void /* cleanup, free allocated memory, etc */
cleanup(void) {
string_free(&feeditem.timestamp);
string_free(&feeditem.title);
@@ -200,7 +261,7 @@ cleanup(void) {
string_free(&feeditem.author);
}
-void /* print error message to stderr */
+static void /* print error message to stderr */
die(const char *s) {
fputs("sfeed: ", stderr);
fputs(s, stderr);
@@ -210,7 +271,7 @@ die(const char *s) {
/* get timezone from string, return as formatted string and time offset,
* for the offset it assumes GMT */
-int
+static int
gettimetz(const char *s, char *buf, size_t bufsiz) {
const char *p = s;
char tzname[16] = "", *t = NULL;
@@ -239,7 +300,7 @@ gettimetz(const char *s, char *buf, size_t bufsiz) {
} else
memcpy(tzname, "GMT", strlen("GMT") + 1);
if(!(*p)) {
- strncpy(buf, tzname, bufsiz);
+ strlcpy(buf, tzname, bufsiz); /* TODO: dont depend on strlcpy? */
return 0;
}
if((sscanf(p, "%c%02d:%02d", &c, &tzhour, &tzmin)) > 0);
@@ -254,31 +315,30 @@ gettimetz(const char *s, char *buf, size_t bufsiz) {
/* parses everything in a format similar to:
* "%a, %d %b %Y %H:%M:%S" or "%Y-%m-%d %H:%M:%S" */
/* TODO: calculate time offset (GMT only) from gettimetz ? */
-int
+static int
parsetimeformat(const char *s, struct tm *t, const char **end) {
- static const char *months[] = {
+ const char *months[] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct",
"Nov", "Dec"
};
- const char *p = s;
unsigned int i, fm;
unsigned long l;
memset(t, 0, sizeof(struct tm));
- if((l = strtoul(p, (void *)&p, 10))) {
+ if((l = strtoul(s, (void *)&s, 10))) {
t->tm_year = abs(l) - 1900;
- if(!(l = strtoul(p, (void *)&p, 10)))
+ if(!(l = strtoul(s, (void *)&s, 10)))
return 0;
t->tm_mon = abs(l) - 1;
- if(!(t->tm_mday = abs(strtoul(p, (void *)&p, 10))))
+ if(!(t->tm_mday = abs(strtoul(s, (void *)&s, 10))))
return 0;
} else {
- for(; *p && !isdigit((int)*p); p++);
- if(!(t->tm_mday = abs(strtoul(p, (void *)&p, 10))))
+ for(; *s && !isdigit((int)*s); s++);
+ if(!(t->tm_mday = abs(strtoul(s, (void *)&s, 10))))
return 0;
- for(; *p && !isalpha((int)*p); p++); /* skip non-alpha */
+ for(; *s && !isalpha((int)*s); s++); /* skip non-alpha */
for(fm = 0, i = 0; i < 12; i++) { /* parse month names */
- if(!xstrncasecmp(p, months[i], 3)) {
+ if(!strncasecmp(s, months[i], 3)) {
t->tm_mon = i;
fm = 1;
break;
@@ -286,22 +346,22 @@ parsetimeformat(const char *s, struct tm *t, const char **end) {
}
if(!fm) /* can't find month */
return 0;
- for(; *p && !isdigit((int)*p); p++); /* skip non-digit */
- if(!(l = strtoul(p, (void *)&p, 10)))
+ for(; *s && !isdigit((int)*s); s++); /* skip non-digit */
+ if(!(l = strtoul(s, (void *)&s, 10)))
return 0;
t->tm_year = abs(l) - 1900;
}
- for(; *p && !isdigit((int)*p); p++); /* skip non-digit */
- if((t->tm_hour = abs(strtoul(p, (void *)&p, 10))) > 23)
+ for(; *s && !isdigit((int)*s); s++); /* skip non-digit */
+ if((t->tm_hour = abs(strtoul(s, (void *)&s, 10))) > 23)
return 0;
- for(; *p && !isdigit((int)*p); p++); /* skip non-digit */
- if((t->tm_min = abs(strtoul(p, (void *)&p, 10))) > 59)
+ for(; *s && !isdigit((int)*s); s++); /* skip non-digit */
+ if((t->tm_min = abs(strtoul(s, (void *)&s, 10))) > 59)
return 0;
- for(; *p && !isdigit((int)*p); p++); /* skip non-digit */
- if((t->tm_sec = abs(strtoul(p, (void *)&p, 10))) > 60)
+ for(; *s && !isdigit((int)*s); s++); /* skip non-digit */
+ if((t->tm_sec = abs(strtoul(s, (void *)&s, 10))) > 60)
return 0;
if(end)
- *end = p;
+ *end = s;
return 1;
}
@@ -309,8 +369,8 @@ parsetimeformat(const char *s, struct tm *t, const char **end) {
#define Q(a,b) ((a)>0 ? (a)/(b) : -(((b)-(a)-1)/(b)))
/* copied from Musl C awesome small implementation, see LICENSE. */
-time_t
-tm_to_time(struct tm *tm) {
+static time_t
+tmtotime(struct tm *tm) {
time_t year = tm->tm_year - 100;
int month = tm->tm_mon;
int day = tm->tm_mday;
@@ -338,7 +398,7 @@ tm_to_time(struct tm *tm) {
946684800; /* the dawn of time, aka 1970 (30 years of seconds) :) */
}
-time_t
+static time_t
parsetime(const char *s, char *buf) {
struct tm tm;
char tz[64];
@@ -355,11 +415,12 @@ parsetime(const char *s, char *buf) {
tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
tm.tm_hour, tm.tm_min, tm.tm_sec, tz);
/* return UNIX time, reverse offset to GMT+0 */
- return tm_to_time(&tm) - offset;
+ return tmtotime(&tm) - offset;
}
return -1; /* can't parse */
}
+#if 0
/* print text, ignore tabs, newline and carriage return etc
* print some HTML 2.0 / XML 1.0 as normal text */
void
@@ -379,8 +440,7 @@ string_print_trimmed(String *s) {
return;
for(p = s->data; isspace((int)*p); p++); /* strip leading whitespace */
for(; *p; ) { /* ignore tabs, newline and carriage return etc, except space */
- /*if(!isspace((int)*p) || *p == ' ') {*/
- if(!((unsigned)*p - '\t' < 5)) {
+ if(!ISWSNOSPACE(*p)) { /* !isspace(c) || c == ' ' */
if(*p == '<') { /* skip tags */
if((n = strchr(p, '>'))) {
p = n + 1;
@@ -389,9 +449,9 @@ string_print_trimmed(String *s) {
}
buffer[buflen++] = *p;
}
- if(buflen >= BUFSIZ) {
- fwrite(buffer, 1, buflen, stdout);
- buflen = 0;
+ if(buflen >= BUFSIZ) { /* align write size with BUFSIZ */
+ fwrite(buffer, 1, BUFSIZ, stdout);
+ buflen -= BUFSIZ;
}
p++;
}
@@ -410,7 +470,7 @@ string_print_textblock(String *s) {
/* skip leading whitespace */
for(p = s->data; *p && isspace((int)*p); p++);
for(i = 0; *p; p++) {
- if(((unsigned)*p - '\t') < 5) {
+ if(ISWSNOSPACE(*p)) { /* isspace(c) && c != ' ' */
if(*p == '\n') { /* escape newline */
buffer[i++] = '\\';
buffer[i++] = 'n';
@@ -421,32 +481,67 @@ string_print_textblock(String *s) {
buffer[i++] = '\\';
buffer[i++] = 't';
}
- /* ignore other whitespace chars, except space */
} else {
buffer[i++] = *p;
}
- if(i >= BUFSIZ) { /* TODO: align */
- fwrite(buffer, 1, i, stdout);
- i = 0;
+ if(i >= BUFSIZ) { /* align write size with BUFSIZ */
+ fwrite(buffer, 1, BUFSIZ, stdout);
+ i -= BUFSIZ;
}
}
if(i)
fwrite(buffer, 1, i, stdout);
}
+#endif
-int
+static void /* print text, escape tabs, newline and carriage return etc */
+string_print(String *s) {
+ const char *p;
+ char buffer[BUFSIZ + 4];
+ size_t i;
+
+ if(!s->len)
+ return;
+ /* skip leading whitespace */
+ for(p = s->data; *p && isspace((int)*p); p++);
+ for(i = 0; *p; p++) {
+ if(ISWSNOSPACE(*p)) { /* isspace(c) && c != ' ' */
+ if(*p == '\n') { /* escape newline */
+ buffer[i++] = '\\';
+ buffer[i++] = 'n';
+ } else if(*p == '\\') { /* escape \ */
+ buffer[i++] = '\\';
+ buffer[i++] = '\\';
+ } else if(*p == '\t') { /* tab */
+ buffer[i++] = '\\';
+ buffer[i++] = 't';
+ }
+ /* ignore other whitespace chars, except space */
+ } else {
+ buffer[i++] = *p;
+ }
+ if(i >= BUFSIZ) { /* align write size with BUFSIZ */
+ fwrite(buffer, 1, BUFSIZ, stdout);
+ i -= BUFSIZ;
+ }
+ }
+ if(i) /* write remaining */
+ fwrite(buffer, 1, i, stdout);
+}
+
+static int
istag(const char *name, size_t len, const char *name2, size_t len2) {
- return (len == len2 && !xstrcasecmp(name, name2));
+ return (len == len2 && !strcasecmp(name, name2));
}
-int
+static int
isattr(const char *name, size_t len, const char *name2, size_t len2) {
- return (len == len2 && !xstrcasecmp(name, name2));
+ return (len == len2 && !strcasecmp(name, name2));
}
/* NOTE: this handler can be called multiple times if the data in this
* block is bigger than the buffer */
-void
+static void
xml_handler_data(XMLParser *p, const char *s, size_t len) {
if(currentfield) {
if(feeditemtagid != AtomTagAuthor || !strcmp(p->tag, "name")) /* author>name */
@@ -454,13 +549,13 @@ xml_handler_data(XMLParser *p, const char *s, size_t len) {
}
}
-void
+static void
xml_handler_cdata(XMLParser *p, const char *s, size_t len) {
if(currentfield)
string_append(currentfield, s, len);
}
-void
+static void
xml_handler_attr_start(struct xmlparser *p, const char *tag, size_t taglen, const char *name, size_t namelen) {
if(iscontent && !iscontenttag) {
if(!attrcount)
@@ -472,7 +567,7 @@ xml_handler_attr_start(struct xmlparser *p, const char *tag, size_t taglen, cons
}
}
-void
+static void
xml_handler_attr_end(struct xmlparser *p, const char *tag, size_t taglen, const char *name, size_t namelen) {
if(iscontent && !iscontenttag) {
xml_handler_data(p, "\"", 1);
@@ -480,7 +575,7 @@ xml_handler_attr_end(struct xmlparser *p, const char *tag, size_t taglen, const
}
}
-void
+static void
xml_handler_start_element_parsed(XMLParser *p, const char *tag, size_t taglen, int isshort) {
if(iscontent && !iscontenttag) {
if(isshort)
@@ -490,7 +585,7 @@ xml_handler_start_element_parsed(XMLParser *p, const char *tag, size_t taglen, i
}
}
-void
+static void
xml_handler_attr(XMLParser *p, const char *tag, size_t taglen,
const char *name, size_t namelen, const char *value,
size_t valuelen) {
@@ -516,7 +611,7 @@ xml_handler_attr(XMLParser *p, const char *tag, size_t taglen,
}
}
-void
+static void
xml_handler_start_element(XMLParser *p, const char *name, size_t namelen) {
if(iscontenttag) {
/* starts with div, handle as XML, dont convert entities */
@@ -541,7 +636,6 @@ xml_handler_start_element(XMLParser *p, const char *name, size_t namelen) {
memcpy(feeditemtag, name, namelen + 1); /* copy including nul byte */
feeditemtaglen = namelen;
feeditemtagid = gettag(feeditem.feedtype, feeditemtag, feeditemtaglen);
-
if(feeditem.feedtype == FeedTypeRSS) {
if(feeditemtagid == TagUnknown)
currentfield = NULL;
@@ -587,7 +681,7 @@ xml_handler_start_element(XMLParser *p, const char *name, size_t namelen) {
else if(feeditemtagid == AtomTagAuthor)
currentfield = &feeditem.author;
}
- /* TODO: prefer content encoded over content? */
+ /* TODO: prefer content encoded over content? test */
}
} else { /* start of RSS or Atom item / entry */
if(istag(name, namelen, "entry", strlen("entry"))) { /* Atom */
@@ -602,7 +696,7 @@ xml_handler_start_element(XMLParser *p, const char *name, size_t namelen) {
}
}
-void
+static void
xml_handler_data_entity(XMLParser *p, const char *data, size_t datalen) {
char buffer[16];
size_t len;
@@ -620,7 +714,7 @@ xml_handler_data_entity(XMLParser *p, const char *data, size_t datalen) {
xml_handler_data(p, data, datalen); /* can't convert entity, just use it's data */
}
-void
+static void
xml_handler_end_element(XMLParser *p, const char *name, size_t namelen, int isshort) {
char timebuf[64];
int tagid;
@@ -660,19 +754,23 @@ xml_handler_end_element(XMLParser *p, const char *name, size_t namelen, int issh
putchar(FieldSeparator);
fputs(timebuf, stdout);
putchar(FieldSeparator);
- string_print_trimmed(&feeditem.title);
+ string_print(&feeditem.title);
putchar(FieldSeparator);
- string_print_trimmed(&feeditem.link);
+ string_print(&feeditem.link);
putchar(FieldSeparator);
- string_print_textblock(&feeditem.content);
+ string_print(&feeditem.content);
putchar(FieldSeparator);
fputs(contenttypes[feeditem.contenttype], stdout);
putchar(FieldSeparator);
- string_print_trimmed(&feeditem.id);
+ string_print(&feeditem.id);
putchar(FieldSeparator);
- string_print_trimmed(&feeditem.author);
+ string_print(&feeditem.author);
putchar(FieldSeparator);
fputs(feedtypes[feeditem.feedtype], stdout);
+ if(append) {
+ putchar(FieldSeparator);
+ fputs(append, stdout);
+ }
putchar('\n');
/* clear strings */
@@ -705,9 +803,12 @@ xml_handler_end_element(XMLParser *p, const char *name, size_t namelen, int issh
}
int
-main(void) {
+main(int argc, char **argv) {
atexit(cleanup);
+ if(argc > 1)
+ append = argv[1];
+
/* init strings and initial memory pool size */
string_buffer_init(&feeditem.timestamp, 64);
string_buffer_init(&feeditem.title, 256);
diff --git a/sfeed_frames.c b/sfeed_frames.c
@@ -7,12 +7,16 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <utime.h>
-#include "common.h"
-#include "compat.h"
-static int showsidebar = 1; /* show sidebar ? */
+#include "util.h"
-void /* print error message to stderr */
+static unsigned int showsidebar = 1; /* show sidebar ? */
+
+static FILE *fpindex = NULL, *fpitems = NULL, *fpmenu = NULL, *fpcontent = NULL;
+static char *line = NULL;
+static struct feed *feeds = NULL; /* start of feeds linked-list. */
+
+static void /* print error message to stderr */
die(const char *s) {
fputs("sfeed_frames: ", stderr);
fputs(s, stderr);
@@ -20,9 +24,23 @@ die(const char *s) {
exit(EXIT_FAILURE);
}
+static void
+cleanup(void) {
+ if(fpmenu)
+ fclose(fpmenu);
+ if(fpitems)
+ fclose(fpitems);
+ if(fpindex)
+ fclose(fpindex);
+ if(fpcontent)
+ fclose(fpcontent);
+ free(line); /* free line */
+ feedsfree(feeds); /* free feeds linked-list */
+}
+
/* print text, ignore tabs, newline and carriage return etc
* print some HTML 2.0 / XML 1.0 as normal text */
-void
+static void
printcontent(const char *s, FILE *fp) {
const char *p;
int len = 0;
@@ -42,7 +60,8 @@ printcontent(const char *s, FILE *fp) {
}
}
-size_t
+/* TODO: bufsiz - 1 ? */
+static size_t
makepathname(char *buffer, size_t bufsiz, const char *path) {
const char *p = path;
size_t i = 0, r = 0;
@@ -64,164 +83,208 @@ makepathname(char *buffer, size_t bufsiz, const char *path) {
return i;
}
-int
+static int
fileexists(const char *path) {
return (!access(path, F_OK));
}
int
main(int argc, char **argv) {
- char *line = NULL, *fields[FieldLast];
+ char *fields[FieldLast];
+ char name[256]; /* TODO: bigger size? */
+ char *basepath = "feeds";
+ /* TODO: max path size? */
+ char dirpath[1024], filepath[1024], reldirpath[1024], relfilepath[1024];
unsigned long totalfeeds = 0, totalnew = 0;
unsigned int isnew;
- struct feed *feedcurrent = NULL, *feeds = NULL; /* start of feeds linked-list. */
+ struct feed *f, *feedcurrent = NULL;
time_t parsedtime, comparetime;
- size_t size = 0;
- char name[256];
- char dirpath[1024];
- char filepath[1024];
- char reldirpath[1024];
- char relfilepath[1024];
- FILE *fpindex, *fpitems, *fpmenu, *fpcontent;
- char *basepath = "feeds";
+ size_t size = 0, namelen = 0, basepathlen = 0;
+
struct utimbuf contenttime;
- size_t namelen = 0;
+ atexit(cleanup);
memset(&contenttime, 0, sizeof(contenttime));
if(argc > 1 && argv[1][0] != '\0')
basepath = argv[1];
comparetime = time(NULL) - (3600 * 24); /* 1 day is old news */
- xmkdir(basepath, S_IRWXU);
+ mkdir(basepath, S_IRWXU);
/* write main index page */
- if(strlen(basepath) + strlen("/index.html") < sizeof(dirpath) - 1)
+ basepathlen = strlen(basepath);
+ if(basepathlen + strlen("/index.html") < sizeof(dirpath) - 1)
sprintf(dirpath, "%s/index.html", basepath);
- if((fpindex = fopen(dirpath, "w+b"))) {
- }
- if(strlen(basepath) + strlen("/menu.html") < sizeof(dirpath) - 1)
+ if(!(fpindex = fopen(dirpath, "w+b")))
+ die("can't write index.html");
+ if(basepathlen + strlen("/menu.html") < sizeof(dirpath) - 1)
sprintf(dirpath, "%s/menu.html", basepath);
- if(!(fpmenu = fopen(dirpath, "w+b"))) {
- /* TODO: error */
- fclose(fpindex);
- return EXIT_FAILURE;
- }
- if(strlen(basepath) + strlen("/items.html") < sizeof(dirpath) - 1)
+ if(!(fpmenu = fopen(dirpath, "w+b")))
+ die("can't write menu.html");
+ if(basepathlen + strlen("/items.html") < sizeof(dirpath) - 1)
sprintf(dirpath, "%s/items.html", basepath);
- if(!(fpitems = fopen(dirpath, "w+b"))) {
- /* TODO: error */
- fclose(fpmenu);
- fclose(fpindex);
- return EXIT_FAILURE;
- }
- fputs("<html><head><link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" /></head>", fpitems);
- fputs("<body class=\"frame\"><div id=\"items\">", fpitems);
-
+ if(!(fpitems = fopen(dirpath, "w+b")))
+ die("can't write items.html");
+ fputs("<html><head><link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" /></head>"
+ "<body class=\"frame\"><div id=\"items\">", fpitems);
while(parseline(&line, &size, fields, FieldLast, '\t', stdin) > 0) {
+
+
+/*
+
dirpath[0] = '\0';
filepath[0] = '\0';
reldirpath[0] = '\0';
relfilepath[0] = '\0';
- namelen = makepathname(name, sizeof(name) - 1, fields[FieldFeedName]);
- if(namelen) {
- if(strlen(basepath) + namelen + 1 < sizeof(dirpath) - 1)
+
+
+*/
+
+
+
+
+
+
+
+ /* first of feed section or new feed section. */
+ if(!totalfeeds || strcmp(feedcurrent->name, fields[FieldFeedName])) {
+
+
+ /* TODO: makepathname isnt necesary if fields[FieldFeedName] is the same as the previous line */
+ /* TODO: move this part below where FieldFeedName is checked if its different ? */
+
+ /* make directory for feedname */
+ namelen = makepathname(name, sizeof(name) - 1, fields[FieldFeedName]);
+ if(!namelen)
+ continue;
+
+ if(basepathlen + namelen + 1 < sizeof(dirpath) - 1)
sprintf(dirpath, "%s/%s", basepath, name);
/* TODO: handle error. */
- if(xmkdir(dirpath, S_IRWXU) != -1) {
+ if(mkdir(dirpath, S_IRWXU) != -1) {
+ fprintf(stderr, "sfeed_frames: can't write '%s'\n", dirpath);
+ exit(EXIT_FAILURE);
+ }
+ /* TODO: test, replaces strncpy (strncpy is slow) */
+ reldirpath[0] = '\0';
+ if(namelen < sizeof(reldirpath) - 2) {
+ memcpy(reldirpath, name, namelen + 1); /* copy including nul byte */
+ /* reldirpath[namelen] = '\0';*/
}
- strncpy(reldirpath, name, sizeof(reldirpath) - 1);
- namelen = makepathname(name, sizeof(name), fields[FieldTitle]);
- if(namelen) {
- if(strlen(dirpath) + namelen + strlen("/.html") < sizeof(filepath) - 1)
- sprintf(filepath, "%s/%s.html", dirpath, name);
- if(strlen(reldirpath) + namelen + strlen("/.html") < sizeof(relfilepath) - 1)
- sprintf(relfilepath, "%s/%s.html", reldirpath, name);
- if(!fileexists(filepath) && (fpcontent = fopen(filepath, "w+b"))) {
- fputs("<html><head><link rel=\"stylesheet\" type=\"text/css\" href=\"../../style.css\" /></head>", fpcontent);
- fputs("<body class=\"frame\"><div class=\"content\">", fpcontent);
- fputs("<h2><a href=\"", fpcontent);
- if(fields[FieldBaseSiteUrl][0] != '\0')
- printlink(fields[FieldLink], fields[FieldBaseSiteUrl], fpcontent);
- else
- printlink(fields[FieldLink], fields[FieldFeedUrl], fpcontent);
- fputs("\">", fpcontent);
- printhtmlencoded(fields[FieldTitle], fpcontent);
- fputs("</a></h2>", fpcontent);
- printcontent(fields[FieldContent], fpcontent);
- fputs("</div></body></html>", fpcontent);
- fclose(fpcontent);
- }
+ /* strncpy(reldirpath, name, sizeof(reldirpath) - 1);*/
+
+
+
+
+
+ if(!(f = calloc(1, sizeof(struct feed))))
+ die("can't allocate enough memory");
+
+
- /* first of feed section or new feed section. */
- if(!totalfeeds || strcmp(feedcurrent->name, fields[FieldFeedName])) {
- if(totalfeeds) { /* end previous one. */
- fputs("</table>\n", fpitems);
- if(!(feedcurrent->next = calloc(1, sizeof(struct feed))))
- die("can't allocate enough memory");
- feedcurrent = feedcurrent->next;
- } else {
- if(!(feedcurrent = calloc(1, sizeof(struct feed))))
- die("can't allocate enough memory");
- feeds = feedcurrent; /* first item. */
- if(fields[FieldFeedName][0] == '\0') {
- showsidebar = 0;
- }
- }
- /* write menu link if new. */
- if(!(feedcurrent->name = xstrdup(fields[FieldFeedName])))
- die("can't allocate enough memory");
- if(fields[FieldFeedName][0] != '\0') {
- fputs("<h2 id=\"", fpitems);
- printfeednameid(feedcurrent->name, fpitems);
- fputs("\"><a href=\"#", fpitems);
- printfeednameid(feedcurrent->name, fpitems);
- fputs("\">", fpitems);
- fputs(feedcurrent->name, fpitems);
- fputs("</a></h2>\n", fpitems);
- }
- fputs("<table cellpadding=\"0\" cellspacing=\"0\">\n", fpitems);
- totalfeeds++;
+ if(totalfeeds) { /* end previous one. */
+ fputs("</table>\n", fpitems);
+
+
+ feedcurrent->next = f;
+ feedcurrent = feedcurrent->next;
+
+
+
+ } else {
+
+
+ feedcurrent = f;
+
+
+ feeds = feedcurrent; /* first item. */
+ if(fields[FieldFeedName][0] == '\0') {
+ showsidebar = 0;
}
- /* write item. */
- parsedtime = (time_t)strtol(fields[FieldUnixTimestamp], NULL, 10);
- /* set modified and access time of file to time of item. */
- contenttime.actime = parsedtime;
- contenttime.modtime = parsedtime;
- utime(filepath, &contenttime);
-
- isnew = (parsedtime >= comparetime);
- totalnew += isnew;
- feedcurrent->totalnew += isnew;
- feedcurrent->total++;
- if(isnew)
- fputs("<tr class=\"n\"><td nowrap valign=\"top\">", fpitems);
- else
- fputs("<tr><td nowrap valign=\"top\">", fpitems);
- fputs("<tr><td nowrap valign=\"top\">", fpitems);
- fputs(fields[FieldTimeFormatted], fpitems);
- fputs("</td><td nowrap valign=\"top\">", fpitems);
- if(isnew)
- fputs("<b><u>", fpitems);
- fputs("<a href=\"", fpitems);
- fputs(relfilepath, fpitems);
- fputs("\" target=\"content\">", fpitems);
- printhtmlencoded(fields[FieldTitle], fpitems);
- fputs("</a>", fpitems);
- if(isnew)
- fputs("</u></b>", fpitems);
- fputs("</td></tr>\n", fpitems);
}
+ /* write menu link if new. */
+ if(!(feedcurrent->name = strdup(fields[FieldFeedName])))
+ die("can't allocate enough memory");
+ if(fields[FieldFeedName][0] != '\0') {
+ fputs("<h2 id=\"", fpitems);
+ printfeednameid(feedcurrent->name, fpitems);
+ fputs("\"><a href=\"#", fpitems);
+ printfeednameid(feedcurrent->name, fpitems);
+ fputs("\">", fpitems);
+ fputs(feedcurrent->name, fpitems);
+ fputs("</a></h2>\n", fpitems);
+ }
+ fputs("<table cellpadding=\"0\" cellspacing=\"0\">\n", fpitems);
+ totalfeeds++;
}
+
+
+
+ /* write content */
+ namelen = makepathname(name, sizeof(name), fields[FieldTitle]);
+ if(!namelen)
+ continue;
+ if(strlen(dirpath) + namelen + strlen("/.html") < sizeof(filepath) - 1)
+ sprintf(filepath, "%s/%s.html", dirpath, name);
+ if(strlen(reldirpath) + namelen + strlen("/.html") < sizeof(relfilepath) - 1)
+ sprintf(relfilepath, "%s/%s.html", reldirpath, name);
+ if(!fileexists(filepath) && (fpcontent = fopen(filepath, "w+b"))) {
+ fputs("<html><head><link rel=\"stylesheet\" type=\"text/css\" href=\"../../style.css\" /></head>"
+ "<body class=\"frame\"><div class=\"content\">"
+ "<h2><a href=\"", fpcontent);
+ if(fields[FieldBaseSiteUrl][0] != '\0')
+ printlink(fields[FieldLink], fields[FieldBaseSiteUrl], fpcontent);
+ else
+ printlink(fields[FieldLink], fields[FieldFeedUrl], fpcontent);
+ fputs("\">", fpcontent);
+ printhtmlencoded(fields[FieldTitle], fpcontent);
+ fputs("</a></h2>", fpcontent);
+ printcontent(fields[FieldContent], fpcontent);
+ fputs("</div></body></html>", fpcontent);
+ fclose(fpcontent);
+ }
+
+
+
+
+ /* write item. */
+ parsedtime = (time_t)strtol(fields[FieldUnixTimestamp], NULL, 10);
+ /* set modified and access time of file to time of item. */
+ contenttime.actime = parsedtime;
+ contenttime.modtime = parsedtime;
+ utime(filepath, &contenttime);
+
+ isnew = (parsedtime >= comparetime);
+ totalnew += isnew;
+ feedcurrent->totalnew += isnew;
+ feedcurrent->total++;
+ if(isnew)
+ fputs("<tr class=\"n\">", fpitems);
+ else
+ fputs("<tr>", fpitems);
+ fputs("<td nowrap valign=\"top\">", fpitems);
+ fputs(fields[FieldTimeFormatted], fpitems);
+ fputs("</td><td nowrap valign=\"top\">", fpitems);
+ if(isnew)
+ fputs("<b><u>", fpitems);
+ fputs("<a href=\"", fpitems);
+ fputs(relfilepath, fpitems);
+ fputs("\" target=\"content\">", fpitems);
+ printhtmlencoded(fields[FieldTitle], fpitems);
+ fputs("</a>", fpitems);
+ if(isnew)
+ fputs("</u></b>", fpitems);
+ fputs("</td></tr>\n", fpitems);
}
if(totalfeeds) {
fputs("</table>\n", fpitems);
}
fputs("\n</div></body>\n</html>", fpitems); /* div items */
if(showsidebar) {
- fputs("<html><head>", fpmenu);
- fputs("<link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" />", fpmenu);
- fputs("</head><body class=\"frame\"><div id=\"sidebar\">", fpmenu);
+ fputs("<html><head>"
+ "<link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" />"
+ "</head><body class=\"frame\"><div id=\"sidebar\">", fpmenu);
for(feedcurrent = feeds; feedcurrent; feedcurrent = feedcurrent->next) {
if(!feedcurrent->name || feedcurrent->name[0] == '\0')
continue;
@@ -241,33 +304,24 @@ main(int argc, char **argv) {
}
fputs("</div></body></html>", fpmenu);
}
-
- fputs("<!DOCTYPE html><html><head>\n", fpindex);
- fprintf(fpindex, "\t<title>Newsfeed (%lu)</title>\n", totalnew);
- fputs("\t<link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" />\n", fpindex);
- fputs("</head>\n", fpindex);
+ fputs("<!DOCTYPE html><html><head>\n\t<title>Newsfeed (", fpindex);
+ fprintf(fpindex, "%lu", totalnew);
+ fputs(")</title>\n\t<link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" />\n"
+ "</head>\n", fpindex);
if(showsidebar) {
fputs(
- "<frameset framespacing=\"0\" cols=\"200,*\" frameborder=\"1\">"
- " <frame name=\"menu\" src=\"menu.html\" target=\"menu\">", fpindex);
+ "<frameset framespacing=\"0\" cols=\"200,*\" frameborder=\"1\">\n"
+ " <frame name=\"menu\" src=\"menu.html\" target=\"menu\">\n", fpindex);
} else {
- fputs(
- "<frameset framespacing=\"0\" cols=\"*\" frameborder=\"1\">", fpindex);
+ fputs("<frameset framespacing=\"0\" cols=\"*\" frameborder=\"1\">\n", fpindex);
}
fputs(
- " <frameset id=\"frameset\" framespacing=\"0\" cols=\"50%,50%\" frameborder=\"1\">"
- " <frame name=\"items\" src=\"items.html\" target=\"items\">"
- " <frame name=\"content\" target=\"content\">"
- " </frameset>"
- "</frameset>"
+ " <frameset id=\"frameset\" framespacing=\"0\" cols=\"50%,50%\" frameborder=\"1\">\n"
+ " <frame name=\"items\" src=\"items.html\" target=\"items\">\n"
+ " <frame name=\"content\" target=\"content\">\n"
+ " </frameset>\n"
+ "</frameset>\n"
"</html>", fpindex);
- fclose(fpmenu);
- fclose(fpitems);
- fclose(fpindex);
-
- free(line); /* free line */
- feedsfree(feeds); /* free feeds linked-list */
-
return EXIT_SUCCESS;
}
diff --git a/sfeed_html.c b/sfeed_html.c
@@ -3,12 +3,21 @@
#include <stdlib.h>
#include <time.h>
#include <ctype.h>
-#include "common.h"
-#include "compat.h"
+
+#include "util.h"
static int showsidebar = 1; /* show sidebar ? */
-void /* print error message to stderr */
+static struct feed *feeds = NULL; /* start of feeds linked-list. */
+static char *line = NULL;
+
+static void
+cleanup(void) {
+ free(line); /* free line */
+ feedsfree(feeds); /* free feeds linked-list */
+}
+
+static void /* print error message to stderr */
die(const char *s) {
fputs("sfeed_html: ", stderr);
fputs(s, stderr);
@@ -18,13 +27,14 @@ die(const char *s) {
int
main(void) {
- char *line = NULL, *fields[FieldLast];
+ char *fields[FieldLast];
unsigned long totalfeeds = 0, totalnew = 0;
- int islink, isnew;
- struct feed *feedcurrent = NULL, *feeds = NULL; /* start of feeds linked-list. */
+ unsigned int islink, isnew;
+ struct feed *f, *feedcurrent = NULL;
time_t parsedtime, comparetime;
size_t size = 0;
+ atexit(cleanup);
comparetime = time(NULL) - (3600 * 24); /* 1 day is old news */
fputs(
"<!DOCTYPE HTML>\n"
@@ -36,20 +46,25 @@ main(void) {
" <body class=\"noframe\">\n",
stdout);
+ if(!(feedcurrent = calloc(1, sizeof(struct feed))))
+ die("can't allocate enough memory");
+ feeds = feedcurrent;
+
while(parseline(&line, &size, fields, FieldLast, '\t', stdin) > 0) {
parsedtime = (time_t)strtol(fields[FieldUnixTimestamp], NULL, 10);
isnew = (parsedtime >= comparetime);
islink = (fields[FieldLink][0] != '\0');
/* first of feed section or new feed section. */
- if(!totalfeeds || strcmp(feedcurrent->name, fields[FieldFeedName])) {
+ if(!totalfeeds || (feedcurrent && strcmp(feedcurrent->name, fields[FieldFeedName]))) { /* TODO: allocate feedcurrent before here, feedcurrent can be NULL */
+ if(!(f = calloc(1, sizeof(struct feed))))
+ die("can't allocate enough memory");
+ /*f->next = NULL;*/
if(totalfeeds) { /* end previous one. */
fputs("</table>\n", stdout);
- if(!(feedcurrent->next = calloc(1, sizeof(struct feed))))
- die("can't allocate enough memory");
- feedcurrent = feedcurrent->next;
+ feedcurrent->next = f;
+ feedcurrent = f;
} else {
- if(!(feedcurrent = calloc(1, sizeof(struct feed))))
- die("can't allocate enough memory");
+ feedcurrent = f;
feeds = feedcurrent; /* first item. */
if(fields[FieldFeedName][0] == '\0' || !showsidebar) {
/* set nosidebar class on div for styling */
@@ -58,8 +73,17 @@ main(void) {
} else
fputs("\t\t<div id=\"items\">\n", stdout);
}
- if(!(feedcurrent->name = xstrdup(fields[FieldFeedName])))
+
+ /* TODO: memcpy and make feedcurrent->name static? */
+ if(!(feedcurrent->name = strdup(fields[FieldFeedName])))
die("can't allocate enough memory");
+
+
+ /*
+ feedcurrent->totalnew = 0;
+ feedcurrent->total = 0;
+ feedcurrent->next = NULL;*/
+
if(fields[FieldFeedName][0] != '\0') {
fputs("<h2 id=\"", stdout);
printfeednameid(feedcurrent->name, stdout);
@@ -75,14 +99,13 @@ main(void) {
totalnew += isnew;
feedcurrent->totalnew += isnew;
feedcurrent->total++;
-
if(isnew)
- fputs("<tr class=\"n\"><td nowrap valign=\"top\">", stdout);
+ fputs("<tr class=\"n\">", stdout);
else
- fputs("<tr><td nowrap valign=\"top\">", stdout);
+ fputs("<tr>", stdout);
+ fputs("<td nowrap valign=\"top\">", stdout);
fputs(fields[FieldTimeFormatted], stdout);
fputs("</td><td nowrap valign=\"top\">", stdout);
-
if(isnew)
fputs("<b><u>", stdout);
if(islink) {
@@ -100,10 +123,8 @@ main(void) {
fputs("</u></b>", stdout);
fputs("</td></tr>\n", stdout);
}
- if(totalfeeds) {
- fputs("</table>\n", stdout);
- fputs("\t\t</div>\n", stdout); /* div items */
- }
+ if(totalfeeds)
+ fputs("</table>\n\t\t</div>\n", stdout); /* div items */
if(showsidebar) {
fputs("\t<div id=\"sidebar\">\n\t\t<ul>\n", stdout);
for(feedcurrent = feeds; feedcurrent; feedcurrent = feedcurrent->next) {
@@ -125,6 +146,16 @@ main(void) {
}
fputs("\t\t</ul>\n\t</div>\n", stdout);
}
+ /* toggle showing only new with "n" */
+ fputs("<script type=\"text/javascript\">"
+ "var b=document.body;window.onkeypress=function(e){"
+ "switch(String.fromCharCode(e.which)){"
+ "case 'n':var n='newonly';b.className=/*toggle new only*/"
+ "b.className.indexOf(n)==-1?b.className+' '+n:b.className.replace(n,'');break;"
+ "case 'm':case 's':b.querySelector('#sidebar a').focus();break; /*focus menu*/"
+ "case 'i':b.querySelector('#items').focus();break;/*focus items*/"
+ "}};"
+ "</script>", stdout);
fputs(
" </body>\n"
" <title>Newsfeed (",
@@ -132,8 +163,5 @@ main(void) {
fprintf(stdout, "%lu", totalnew);
fputs(")</title>\n</html>", stdout);
- free(line); /* free line */
- feedsfree(feeds); /* free feeds linked-list */
-
return EXIT_SUCCESS;
}
diff --git a/sfeed_opml_import.c b/sfeed_opml_import.c
@@ -3,23 +3,23 @@
#include <stdlib.h>
#include <string.h>
#include <strings.h>
+
#include "xml.h"
-#include "compat.h"
-XMLParser parser; /* XML parser state */
-char feedurl[2048], feedname[2048], basesiteurl[2048];
+static XMLParser parser; /* XML parser state */
+static char feedurl[2048], feedname[2048], basesiteurl[2048];
-int
+static int
istag(const char *s1, const char *s2) {
- return !xstrcasecmp(s1, s2);
+ return !strcasecmp(s1, s2);
}
-int
+static int
isattr(const char *s1, const char *s2) {
- return !xstrcasecmp(s1, s2);
+ return !strcasecmp(s1, s2);
}
-void
+static void
xml_handler_start_element(XMLParser *p, const char *tag, size_t taglen) {
if(istag(tag, "outline")) {
feedurl[0] = '\0';
@@ -28,7 +28,7 @@ xml_handler_start_element(XMLParser *p, const char *tag, size_t taglen) {
}
}
-void
+static void
xml_handler_end_element(XMLParser *p, const char *tag, size_t taglen,
int isshort) {
if(istag(tag, "outline")) {
@@ -39,7 +39,7 @@ xml_handler_end_element(XMLParser *p, const char *tag, size_t taglen,
}
}
-void
+static void
xml_handler_attr(XMLParser *p, const char *tag, size_t taglen,
const char *name, size_t namelen, const char *value, size_t valuelen) {
if(istag(tag, "outline")) {
diff --git a/sfeed_plain.c b/sfeed_plain.c
@@ -2,27 +2,27 @@
#include <string.h>
#include <stdlib.h>
#include <time.h>
-#include "common.h"
-#include "compat.h"
+
+#include "util.h"
void
-printutf8padded(const char *s, size_t len) {
+printutf8padded(const char *s, size_t len, FILE *fp, int pad) {
size_t n = 0, i;
for(i = 0; s[i] && n < len; i++) {
if((s[i] & 0xc0) != 0x80) /* start of character */
n++;
- putchar(s[i]);
+ putc(s[i], fp);
}
for(; n < len; n++)
- putchar(' ');
+ putc(pad, fp);
}
int
main(void) {
char *line = NULL, *fields[FieldLast];
time_t parsedtime, comparetime;
- int isnew;
+ unsigned int isnew;
size_t size = 0;
comparetime = time(NULL) - (3600 * 24); /* 1 day is old news */
@@ -34,7 +34,7 @@ main(void) {
printf("%-15.15s ", fields[FieldFeedName]);
printf("%-30.30s", fields[FieldTimeFormatted]);
fputs(" ", stdout);
- printutf8padded(fields[FieldTitle], 70);
+ printutf8padded(fields[FieldTitle], 70, stdout, ' ');
fputs(" ", stdout);
if(fields[FieldBaseSiteUrl][0] != '\0')
printlink(fields[FieldLink], fields[FieldBaseSiteUrl], stdout);
diff --git a/sfeed_stats.c b/sfeed_stats.c
@@ -0,0 +1,91 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+#include <ctype.h>
+
+#include "util.h"
+
+static struct feed *feeds = NULL; /* start of feeds linked-list. */
+static char *line = NULL;
+
+static void
+cleanup(void) {
+ free(line); /* free line */
+ feedsfree(feeds); /* free feeds linked-list */
+}
+
+static void /* print error message to stderr */
+die(const char *s) {
+ fputs("sfeed_stats: ", stderr);
+ fputs(s, stderr);
+ fputc('\n', stderr);
+ exit(EXIT_FAILURE);
+}
+
+int
+main(void) {
+ char *fields[FieldLast];
+ unsigned long totalfeeds = 0, totalnew = 0;
+ unsigned int islink, isnew;
+ struct feed *f, *feedcurrent = NULL;
+ time_t parsedtime, comparetime;
+ size_t size = 0;
+
+ atexit(cleanup);
+ comparetime = time(NULL) - (3600 * 24); /* 1 day is old news */
+
+ if(!(feedcurrent = calloc(1, sizeof(struct feed))))
+ die("can't allocate enough memory");
+ feeds = feedcurrent;
+
+ while(parseline(&line, &size, fields, FieldLast, '\t', stdin) > 0) {
+ parsedtime = (time_t)strtol(fields[FieldUnixTimestamp], NULL, 10);
+ isnew = (parsedtime >= comparetime);
+ islink = (fields[FieldLink][0] != '\0');
+ /* first of feed section or new feed section. */
+ if(!totalfeeds || (feedcurrent && strcmp(feedcurrent->name, fields[FieldFeedName]))) { /* TODO: allocate feedcurrent before here, feedcurrent can be NULL */
+ if(!(f = calloc(1, sizeof(struct feed))))
+ die("can't allocate enough memory");
+ /*f->next = NULL;*/
+ if(totalfeeds) { /* end previous one. */
+ feedcurrent->next = f;
+ feedcurrent = f;
+ } else {
+ feedcurrent = f;
+ feeds = feedcurrent; /* first item. */
+ }
+ if(isnew && parsedtime > feedcurrent->timenewest) {
+ feedcurrent->timenewest = parsedtime;
+ strncpy(feedcurrent->timenewestformat, fields[FieldTimeFormatted],
+ sizeof(feedcurrent->timenewestformat));
+ }
+
+ /* TODO: memcpy and make feedcurrent->name static? */
+ if(!(feedcurrent->name = strdup(fields[FieldFeedName])))
+ die("can't allocate enough memory");
+
+ /*
+ feedcurrent->totalnew = 0;
+ feedcurrent->total = 0;
+ feedcurrent->next = NULL;*/
+
+ totalfeeds++;
+ }
+ totalnew += isnew;
+ feedcurrent->totalnew += isnew;
+ feedcurrent->total++;
+ }
+ printf("Total new: %lu\n", totalnew);
+ for(feedcurrent = feeds; feedcurrent; feedcurrent = feedcurrent->next) {
+ if(!feedcurrent->name || feedcurrent->name[0] == '\0')
+ continue;
+/* printfeednameid(feedcurrent->name, stdout);*/
+ fprintf(stdout, "[%4lu / %4lu] %-20s", feedcurrent->totalnew, feedcurrent->total,
+ feedcurrent->name);
+ if(feedcurrent->timenewestformat && feedcurrent->timenewestformat[0])
+ fprintf(stdout, " (newest %s)", feedcurrent->timenewestformat);
+ putchar('\n');
+ }
+ return EXIT_SUCCESS;
+}
diff --git a/sfeed_update b/sfeed_update
@@ -1,6 +1,6 @@
#!/bin/sh
# update feeds, merge with old feeds.
-# NOTE: assumes "sfeed_*" files are in $PATH.
+# NOTE: assumes "sfeed_*" executables are in $PATH.
# defaults
sfeedpath="$HOME/.sfeed"
@@ -42,39 +42,40 @@ merge() {
}
# fetch a feed via HTTP/HTTPS etc.
-# fetchfeed(url, name)
+# fetchfeed(url, name, lastupdated)
fetchfeed() {
- if (curl -f -s -S -L --max-time 30 -z "$lastupdated" "$1"); then
- printf "%s\n" "[`date`] Fetching $2 [$1] ... done" >&2
+ if curl -f -s -S -L --max-time 30 -z "$3" "$1"; then
+ printf "[ OK] %s %s\n" "[`date '+%Y-%m-%d %H:%M:%S %Z'`]" "$2" >&2
else
- printf "%s\n" "[`date`] Fetching $2 [$1] ... fail" >&2
+ printf "[FAIL] %s %s\n" "[`date '+%Y-%m-%d %H:%M:%S %Z'`]" "$2" >&2
fi
}
-# add field after line, output to stdout.
-# addfield(field)
-addfield() {
- # NOTE: IFS is set and restored to prevent stripping whitespace.
- OLDIFS="$IFS"
- IFS="
-"
- while read -r line; do
- printf "%s %s\n" "${line}" "$1"
- done
- IFS="$OLDIFS"
+# convert encoding from one encoding to another.
+# convertencoding(from, to)
+convertencoding() {
+ if [ ! "$1" = "" ] && [ ! "$2" = "" ] && [ ! "$1" = "$2" ]; then # from != to
+ iconv -cs -f "$1" -t "$2" 2> /dev/null
+ else
+ cat # no convert, just output
+ fi
}
# fetch and parse feed.
-# feed(name, feedurl, basesiteurl, [encoding])
+# feed(name, feedurl, [basesiteurl], [encoding])
feed() {
- tmpfile=$(mktemp -p "$TMPDIR")
- (if [ "$4" = "" ]; then
- # don't use iconv if encoding not set in config.
- fetchfeed "$2" "$1"
- else
- # use iconv to convert encoding to UTF-8.
- fetchfeed "$2" "$1" | iconv -cs -f "$4" -t "utf-8"
- fi) | sfeed | addfield "$1 $2 $3" > "$tmpfile"
+ (tmpfeedfile=$(mktemp -p "$TMPDIR")
+ tmpencfile=""
+ encoding="$4"
+ if [ ! "$encoding" = "" ]; then
+ fetchfeed "$2" "$1" "$lastupdated" | convertencoding "$encoding" "utf-8"
+ else # detect encoding.
+ tmpencfile=$(mktemp -p "$TMPDIR")
+ fetchfeed "$2" "$1" "$lastupdated" > "$tmpencfile"
+ detectenc=$(sfeed_xmlenc < "$tmpencfile")
+ convertencoding "$detectenc" "utf-8" < "$tmpencfile"
+ rm -f "$tmpencfile"
+ fi | sfeed "$1 $2 $3" > "$tmpfeedfile") &
}
terminated() {
@@ -86,6 +87,11 @@ cleanup() {
rm -rf "$tmpfile" "$TMPDIR"
}
+feeds() {
+ echo "Configuration file \"$config\" is invalid or does not contain a \"feeds\" function." >&2
+ echo "See sfeedrc.example for an example." >&2
+}
+
# load config file.
loadconfig "$1"
# fetch feeds and store in temporary file.
@@ -93,17 +99,20 @@ TMPDIR=$(mktemp -d -t "sfeed_XXXXXX")
# get date of last modified feedfile in format:
# YYYYmmdd HH:MM:SS [+-][0-9]*
lastupdated=$(stat -c "%y" "$sfeedfile" 2> /dev/null | cut -c 1-4,6-7,9-10,11-19,30-)
-# Kill whole current process group on ^C.
+# kill whole current process group on ^C.
isrunning="1"
-trap -- "terminated" "15" # SIGTERM: signal to terminate parent.
-trap -- "kill -TERM -$$" "2" # SIGINT: kill all running childs >:D
+# SIGTERM: signal to terminate parent.
+trap -- "terminated" "15"
+# SIGINT: kill all running childs >:D
+trap -- "kill -TERM -$$" "2"
# fetch feeds specified in config file.
feeds
# make sure path exists.
mkdir -p "$sfeedpath"
# wait till all feeds are fetched (allows running in parallel).
wait
-[ "$isrunning" = "0" ] && cleanup && exit 1 # if terminated cleanup.
+# if terminated cleanup.
+[ "$isrunning" = "0" ] && cleanup && exit 1
# concat all individual feed files to a single file.
# NOTE: mktemp uses $TMPDIR for temporary directory.
tmpfile=$(mktemp -t "sfeed_XXXXXX")
diff --git a/sfeed_update.1 b/sfeed_update.1
@@ -59,9 +59,9 @@ This file is evaluated as a shellscript in sfeed_update.
You can for example override the fetchfeed() function to
use wget, fetch or an other download program or you can
override the merge() function to change the merge logic.
-The function feeds() is called to fetch the feeds. The
-function feed() can safely be executed as a parallel job
-in your sfeedrc config file to speedup updating.
+The function feeds() is called to fetch the feeds. By
+default the function feed() is executed as a parallel
+job to speedup updating.
.SH FILES WRITTEN
.TP
.B feeds
diff --git a/sfeed_web.c b/sfeed_web.c
@@ -0,0 +1,72 @@
+#include <stdio.h>
+#include <string.h>
+#include <strings.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "util.h"
+#include "xml.h"
+
+static unsigned int isbase = 0, islink = 0, isfeedlink = 0, found = 0;
+static char feedlink[4096], basehref[4096];
+
+static void
+xmltagstart(XMLParser *p, const char *tag, size_t taglen) {
+ isbase = islink = isfeedlink = 0;
+ if(taglen == 4) { /* optimization */
+ if(!strncasecmp(tag, "base", taglen))
+ isbase = 1;
+ else if(!strncasecmp(tag, "link", taglen))
+ islink = 1;
+ }
+}
+
+static void
+xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort) {
+ if(isfeedlink) {
+ printlink(feedlink, basehref, stdout);
+ putchar('\n');
+ found++;
+ }
+}
+
+static void
+xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name,
+ size_t namelen, const char *value, size_t valuelen) {
+
+ if(namelen != 4) /* optimization */
+ return;
+ if(isbase) {
+ if(!strncasecmp(name, "href", namelen))
+ strlcpy(basehref, value, sizeof(basehref) - 1);
+ } else if(islink) {
+ if(!strncasecmp(name, "type", namelen)) {
+ if(!strncasecmp(value, "application/atom", strlen("application/atom")) ||
+ !strncasecmp(value, "application/rss", strlen("application/rss"))) {
+ isfeedlink = 1;
+ }
+ } else if(!strncasecmp(name, "href", namelen))
+ strlcpy(feedlink, value, sizeof(feedlink) - 1);
+ }
+}
+
+int
+main(int argc, char **argv) {
+ XMLParser x;
+
+ feedlink[0] = '\0';
+ /* base href */
+ if(argc > 1)
+ strlcpy(basehref, argv[1], sizeof(basehref) - 1);
+ else
+ basehref[0] = '\0';
+
+ xmlparser_init(&x);
+ x.xmltagstart = xmltagstart;
+ x.xmlattr = xmlattr;
+ x.xmltagstartparsed = xmltagstartparsed;
+
+ xmlparser_parse(&x);
+
+ return found ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/sfeed_xmlenc.c b/sfeed_xmlenc.c
@@ -0,0 +1,45 @@
+#include <stdio.h>
+#include <string.h>
+#include <strings.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "xml.h"
+
+static int isxmlpi = 0, tags = 0;
+
+static void
+xmltagstart(XMLParser *p, const char *tag, size_t taglen) {
+ if(tags > 3) /* optimization: try to find processing instruction at start */
+ exit(EXIT_FAILURE);
+ isxmlpi = (tag[0] == '?' && (!strncasecmp(tag, "?xml", taglen))) ? 1 : 0;
+ tags++;
+}
+
+static void
+xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort) {
+ isxmlpi = 0;
+}
+
+static void
+xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name, size_t namelen, const char *value, size_t valuelen) {
+ if(isxmlpi && (!strncasecmp(name, "encoding", namelen))) {
+ for(; *value; value++)
+ putc(tolower((int)*value), stdout); /* output lowercase */
+ exit(EXIT_SUCCESS);
+ }
+}
+
+int
+main(int argc, char **argv) {
+ XMLParser x;
+
+ xmlparser_init(&x);
+ x.xmltagstart = xmltagstart;
+ x.xmltagend = xmltagend;
+ x.xmlattr = xmlattr;
+
+ xmlparser_parse(&x);
+
+ return EXIT_FAILURE;
+}
diff --git a/sfeedrc.example b/sfeedrc.example
@@ -6,7 +6,7 @@
# list of feeds to fetch:
feeds() {
- # feed <name> <feedurl> <basesiteurl> [encoding]
+ # feed <name> <feedurl> [basesiteurl] [encoding]
feed "codemadness" "http://www.codemadness.nl/blog/rss.xml"
feed "explosm" "http://feeds.feedburner.com/Explosm"
feed "linux kernel" "http://kernel.org/kdist/rss.xml" "http://kernel.org" "iso-8859-1"
diff --git a/style.css b/style.css
@@ -69,3 +69,14 @@ body.frame .content {
font-family: sans;
font-size: medium;
}
+/* show only new when body has newonly class */
+body.newonly tr,
+body.newonly li {
+ display: none;
+}
+body.newonly li.n {
+ display: list-item;
+}
+body.newonly tr.n {
+ display: table-row;
+}
diff --git a/util.c b/util.c
@@ -0,0 +1,202 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+#include <ctype.h>
+#include <sys/types.h>
+
+#include "util.h"
+
+#if 0
+/* TODO: optimize */
+char *
+afgets(char **p, size_t *size, FILE *fp) {
+ char buf[BUFSIZ], *alloc = NULL;
+ size_t n, len = 0, allocsiz;
+ int end = 0;
+
+ while(fgets(buf, sizeof(buf), fp)) {
+ n = strlen(buf);
+ if(buf[n - 1] == '\n') { /* dont store newlines. */
+ buf[n - 1] = '\0';
+ n--;
+ end = 1; /* newline found, end */
+ }
+ len += n;
+ allocsiz = len + 1;
+ if(allocsiz > *size) {
+ if((alloc = realloc(*p, allocsiz))) {
+ *p = alloc;
+ *size = allocsiz;
+ } else {
+ free(*p);
+ *p = NULL;
+ fputs("error: could not realloc\n", stderr);
+ exit(EXIT_FAILURE);
+ return NULL;
+ }
+ }
+ strncpy((*p + (len - n)), buf, n);
+ if(end || feof(fp))
+ break;
+ }
+ if(*p && len > 0) {
+ (*p)[len] = '\0';
+ return *p;
+ }
+ return NULL;
+}
+#endif
+
+/*
+ * Taken from OpenBSD.
+ * Copy src to string dst of size siz. At most siz-1 characters
+ * will be copied. Always NUL terminates (unless siz == 0).
+ * Returns strlen(src); if retval >= siz, truncation occurred.
+ */
+size_t
+strlcpy(char *dst, const char *src, size_t siz) {
+ char *d = dst;
+ const char *s = src;
+ size_t n = siz;
+
+ /* copy as many bytes as will fit */
+ if (n != 0) {
+ while (--n != 0) {
+ if ((*d++ = *s++) == '\0')
+ break;
+ }
+ }
+ /* not enough room in dst, add NUL and traverse rest of src */
+ if (n == 0) {
+ if (siz != 0)
+ *d = '\0'; /* NUL-terminate dst */
+ while (*s++)
+ ;
+ }
+ return(s - src - 1); /* count does not include NUL */
+}
+
+/* TODO: optimize */
+char *
+afgets(char **p, size_t *size, FILE *fp) {
+ char buf[BUFSIZ], *alloc = NULL;
+ size_t n, len = 0, allocsiz;
+ int end = 0;
+
+ while(!end && !feof(fp) && fgets(buf, sizeof(buf), fp)) {
+ n = strlen(buf);
+ if(buf[n - 1] == '\n') { /* dont store newlines. */
+ buf[n - 1] = '\0';
+ n--;
+ end = 1; /* newline found, end */
+ }
+ len += n;
+ allocsiz = len + 1;
+ if(allocsiz > *size) {
+ if((alloc = realloc(*p, allocsiz))) {
+ *p = alloc;
+ *size = allocsiz;
+ } else {
+ free(*p);
+ *p = NULL;
+ fputs("error: could not realloc\n", stderr);
+ exit(EXIT_FAILURE);
+ return NULL;
+ }
+ }
+ strlcpy((*p + (len - n)), buf, n + 1); /* TODO: dont depend on strlcpy */
+/* strncpy((*p + (len - n)), buf, n);*/
+ }
+ if(*p && len > 0) {
+ (*p)[len] = '\0';
+ return *p;
+ }
+ return NULL;
+}
+
+void /* print link; if link is relative use baseurl to make it absolute */
+printlink(const char *link, const char *baseurl, FILE *fp) {
+ const char *ebaseproto, *ebasedomain, *p;
+ int isrelative;
+
+ /* protocol part */
+ for(p = link; *p && (isalpha((int)*p) || isdigit((int)*p) || *p == '+' || *p == '-' || *p == '.'); p++);
+ isrelative = strncmp(p, "://", strlen("://"));
+ if(isrelative) { /* relative link (baseurl is used). */
+ if((ebaseproto = strstr(baseurl, "://"))) {
+ ebaseproto += strlen("://");
+ fwrite(baseurl, 1, ebaseproto - baseurl, fp);
+ } else {
+ ebaseproto = baseurl;
+ if(*baseurl || (link[0] == '/' && link[1] == '/'))
+ fputs("http://", fp);
+ }
+ if(link[0] == '/') { /* relative to baseurl domain (not path). */
+ if(link[1] == '/') /* absolute url but with protocol from baseurl. */
+ link += 2;
+ else if((ebasedomain = strchr(ebaseproto, '/'))) /* relative to baseurl and baseurl path. */
+ fwrite(ebaseproto, 1, ebasedomain - ebaseproto, fp);
+ else
+ fputs(ebaseproto, stdout);
+ } else if((ebasedomain = strrchr(ebaseproto, '/'))) /* relative to baseurl and baseurl path. */
+ fwrite(ebaseproto, 1, ebasedomain - ebaseproto + 1, fp);
+ else {
+ fputs(ebaseproto, fp);
+ if(*baseurl && *link)
+ fputc('/', fp);
+ }
+ }
+ fputs(link, fp);
+}
+
+unsigned int
+parseline(char **line, size_t *size, char **fields, unsigned int maxfields, int separator, FILE *fp) {
+ unsigned int i = 0;
+ char *prev, *s;
+
+ if(afgets(line, size, fp)) {
+ for(prev = *line; (s = strchr(prev, separator)) && i <= maxfields; i++) {
+ *s = '\0'; /* null terminate string. */
+ fields[i] = prev;
+ prev = s + 1;
+ }
+ fields[i] = prev;
+ for(i++; i < maxfields; i++) /* make non-parsed fields empty. */
+ fields[i] = "";
+ }
+ return i;
+}
+
+/* print feed name for id; spaces and tabs in string as "-" (spaces in anchors are not valid). */
+void
+printfeednameid(const char *s, FILE *fp) {
+ for(; *s; s++)
+ fputc(isspace((int)*s) ? '-' : tolower((int)*s), fp);
+}
+
+void
+printhtmlencoded(const char *s, FILE *fp) {
+ for(; *s; s++) {
+ switch(*s) {
+ case '<': fputs("<", fp); break;
+ case '>': fputs(">", fp); break;
+/* case '&': fputs("&", fp); break;*/
+ default:
+ fputc(*s, fp);
+ }
+ }
+}
+
+void
+feedsfree(struct feed *f) {
+ struct feed *next = NULL;
+
+ for(; f; f = next) {
+ next = f->next;
+ /*f->next = NULL;*/
+ free(f->name);
+ /*f->name = NULL;*/
+ free(f);
+ }
+}
diff --git a/util.h b/util.h
@@ -0,0 +1,26 @@
+#include <time.h>
+
+/* feed info */
+struct feed {
+ char *name; /* feed name */
+ unsigned long totalnew; /* amount of new items per feed */
+ unsigned long total; /* total items */
+ time_t timenewest;
+ char timenewestformat[64];
+ struct feed *next; /* linked list */
+};
+
+enum { FieldUnixTimestamp = 0, FieldTimeFormatted, FieldTitle, FieldLink,
+ FieldContent, FieldContentType, FieldId, FieldAuthor, FieldFeedType,
+ FieldFeedName, FieldFeedUrl, FieldBaseSiteUrl, FieldLast };
+
+#undef strlcpy
+size_t strlcpy(char *, const char *, size_t);
+
+char * afgets(char **p, size_t *size, FILE *fp);
+void feedsfree(struct feed *f);
+unsigned int parseline(char **line, size_t *size, char **fields,
+ unsigned int maxfields, int separator, FILE *fp);
+void printfeednameid(const char *s, FILE *fp);
+void printhtmlencoded(const char *s, FILE *fp);
+void printlink(const char *link, const char *baseurl, FILE *fp);
diff --git a/xml.c b/xml.c
@@ -2,8 +2,8 @@
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
-#include "xml.h"
+#include "xml.h"
void
xmlparser_init(XMLParser *x) {
@@ -22,76 +22,19 @@ xmlparser_getnext(XMLParser *x) {
}
__inline__ void
-xmlparser_parseattrvalue(XMLParser *x, const char *name, size_t namelen, int end) {
- size_t valuelen = 0;
- int c;
-
- if(x->xmlattrstart)
- x->xmlattrstart(x, x->tag, x->taglen, name, namelen);
- for(valuelen = 0; (c = xmlparser_getnext(x)) != EOF;) {
- if(c == '&' && x->xmlattrentity) { /* entities */
- x->data[valuelen] = '\0';
- /* call data function with data before entity if there is data */
- if(valuelen && x->xmlattr)
- x->xmlattr(x, x->tag, x->taglen, name, namelen, x->data, valuelen);
- x->data[0] = c;
- valuelen = 1;
- while((c = xmlparser_getnext(x)) != EOF) {
- if(c == end)
- goto parseattrvalueend;
- if(valuelen < sizeof(x->data) - 1)
- x->data[valuelen++] = c;
- else { /* TODO: entity too long? this should be very strange. */
- x->data[valuelen] = '\0';
- if(x->xmlattr)
- x->xmlattr(x, x->tag, x->taglen, name, namelen, x->data, valuelen);
- valuelen = 0; /* TODO: incorrect ? ';' is read in c below? */
-/* x->data[0] = '\0'; */
- break;
- }
- if(c == ';') {
- x->data[valuelen] = '\0';
- x->xmlattrentity(x, x->tag, x->taglen, name, namelen, x->data, valuelen);
- valuelen = 0; /* TODO: incorrect ? ';' is read in c below? */
- break;
- }
- }
- } else if(c == end) { /* TODO: ugly, remove goto?, simplify? duplicate code. */
-parseattrvalueend:
- x->data[valuelen] = '\0';
- if(x->xmlattr)
- x->xmlattr(x, x->tag, x->taglen, name, namelen, x->data, valuelen);
- if(x->xmlattrend)
- x->xmlattrend(x, x->tag, x->taglen, name, namelen);
- return;
- } else {
- if(valuelen < sizeof(x->data) - 1) {
- x->data[valuelen++] = c;
- } else {
- x->data[valuelen] = '\0';
- if(x->xmlattr)
- x->xmlattr(x, x->tag, x->taglen, name, namelen, x->data, valuelen);
- x->data[0] = c;
- valuelen = 1;
- }
- }
- }
-}
-
-__inline__ void
-xmlparser_parseattrs(XMLParser *x, int *isshorttag) {
- size_t namelen = 0;
- int c, endname = 0;
+xmlparser_parseattrs(XMLParser *x) {
+ size_t namelen = 0, valuelen;
+ int c, endsep, endname = 0;
while((c = xmlparser_getnext(x)) != EOF) {
- if(isspace(c)) {
- if(namelen) /* Do nothing */
+ if(isspace(c)) { /* TODO: simplify endname ? */
+ if(namelen) /* do nothing */
endname = 1;
else
continue;
}
- if(c == '?' && isspace(c)) { /* Do nothing */
- } else if(c == '=') {
+ if(c == '?'); /* ignore */
+ else if(c == '=') {
x->name[namelen] = '\0';
} else if(namelen && ((endname && isalpha(c)) || (c == '>' || c == '/'))) {
/* attribute without value */
@@ -107,7 +50,56 @@ xmlparser_parseattrs(XMLParser *x, int *isshorttag) {
namelen = 1;
} else if(namelen && (c == '\'' || c == '"')) {
/* attribute with value */
- xmlparser_parseattrvalue(x, x->name, namelen, c);
+ endsep = c; /* c is end separator */
+ if(x->xmlattrstart)
+ x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
+ for(valuelen = 0; (c = xmlparser_getnext(x)) != EOF;) {
+ if(c == '&' && x->xmlattrentity) { /* entities */
+ x->data[valuelen] = '\0';
+ /* call data function with data before entity if there is data */
+ if(valuelen && x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ x->data[0] = c;
+ valuelen = 1;
+ while((c = xmlparser_getnext(x)) != EOF) {
+ if(c == endsep)
+ break;
+ if(valuelen < sizeof(x->data) - 1)
+ x->data[valuelen++] = c;
+ else { /* TODO: entity too long? this should be very strange. */
+ x->data[valuelen] = '\0';
+ if(x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ valuelen = 0;
+ break;
+ }
+ if(c == ';') {
+ x->data[valuelen] = '\0';
+ x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ valuelen = 0;
+ break;
+ }
+ }
+ } else if(c != endsep) {
+ if(valuelen < sizeof(x->data) - 1) {
+ x->data[valuelen++] = c;
+ } else {
+ x->data[valuelen] = '\0';
+ if(x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ x->data[0] = c;
+ valuelen = 1;
+ }
+ }
+ if(c == endsep) {
+ x->data[valuelen] = '\0';
+ if(x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ if(x->xmlattrend)
+ x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
+ break;
+ }
+ }
namelen = 0;
endname = 0;
} else if(namelen < sizeof(x->name) - 1)
@@ -115,7 +107,7 @@ xmlparser_parseattrs(XMLParser *x, int *isshorttag) {
if(c == '>') {
break;
} else if(c == '/') {
- *isshorttag = 1;
+ x->isshorttag = 1;
namelen = 0;
x->name[0] = '\0';
}
@@ -133,15 +125,12 @@ xmlparser_parsecomment(XMLParser *x) {
if(c == '-' && i < 2)
i++;
else if(c == '>') {
- if(i == 2) { /* (!memcmp(cd, "-->", strlen("-->"))) { */
+ if(i == 2) { /* -- */
if(datalen >= 2) {
datalen -= 2;
x->data[datalen] = '\0';
if(x->xmlcomment)
x->xmlcomment(x, x->data, datalen);
-/* } else {
- datalen = 0;
- x->data[datalen] = '\0';*/
}
if(x->xmlcommentend)
x->xmlcommentend(x);
@@ -149,9 +138,9 @@ xmlparser_parsecomment(XMLParser *x) {
}
i = 0;
}
- if(datalen < sizeof(x->data) - 1) { /* || (c == '-' && d >= sizeof(x->data) - 4)) { */ /* TODO: what if the end has --, and its cut on the boundary, test this. */
+ if(datalen < sizeof(x->data) - 1) /* || (c == '-' && d >= sizeof(x->data) - 4)) { */ /* TODO: what if the end has --, and its cut on the boundary, test this. */
x->data[datalen++] = c;
- } else {
+ else {
x->data[datalen] = '\0';
if(x->xmlcomment)
x->xmlcomment(x, x->data, datalen);
@@ -161,6 +150,13 @@ xmlparser_parsecomment(XMLParser *x) {
}
}
+/* TODO:
+ * <test><![CDATA[1234567dddd8]]]>
+ *
+ * with x->data of sizeof(15) gives 2 ] at end of cdata, should be 1
+ * test comment function too for similar bug?
+ *
+ */
__inline__ void
xmlparser_parsecdata(XMLParser *x) {
size_t datalen = 0, i = 0;
@@ -172,15 +168,12 @@ xmlparser_parsecdata(XMLParser *x) {
if(c == ']' && i < 2) {
i++;
} else if(c == '>') {
- if(i == 2) { /* (!memcmp(cd, "]]", strlen("]]"))) { */
+ if(i == 2) { /* ]] */
if(datalen >= 2) {
datalen -= 2;
x->data[datalen] = '\0';
- if(x->xmlcdata)
+ if(x->xmlcdata && datalen)
x->xmlcdata(x, x->data, datalen);
-/* } else {
- datalen = 0;
- x->data[datalen] = '\0';*/
}
if(x->xmlcdataend)
x->xmlcdataend(x);
@@ -200,130 +193,122 @@ xmlparser_parsecdata(XMLParser *x) {
}
}
-__inline__ void
-xmlparser_parsetag(XMLParser *x) {
- size_t datalen, taglen;
- int c, s, isshorttag = 0;
-
- x->tag[0] = '\0';
- x->taglen = 0;
- while((c = xmlparser_getnext(x)) != EOF && isspace(c));
- if(c == '!') {
- for(datalen = 0; (c = xmlparser_getnext(x)) != EOF;) {
- if(datalen <= strlen("[CDATA[")) /* if(d < sizeof(x->data)) */
- x->data[datalen++] = c; /* TODO: prevent overflow */
- if(c == '>')
- break;
- else if(c == '-' && datalen == strlen("--") &&
- (x->data[0] == '-')) { /* comment */ /* TODO: optimize this bitch */
- xmlparser_parsecomment(x);
- break;
- } else if(c == '[' && datalen == strlen("[CDATA[") &&
- x->data[1] == 'C' && x->data[2] == 'D' &&
- x->data[3] == 'A' && x->data[4] == 'T' &&
- x->data[5] == 'A' && x->data[6] == '[') { /* cdata */
- xmlparser_parsecdata(x);
- break;
- }
- }
- } else if(c == '?') {
- while((c = xmlparser_getnext(x)) != EOF) {
- if(c == '"' || c == '\'')
- for(s = c; (c = xmlparser_getnext(x)) != EOF && c != s;);
- else if(c == '>')
- break;
- }
- /* TODO: find out why checking isalpha(c) gives "not enough memory"
- * also check if maybe when there is << or <> it might go into an infinite loop (unsure) */
- } else if(c != EOF && c != '>') { /* TODO: optimize and put above the other conditions ? */
- x->tag[0] = c;
- taglen = 1;
- while((c = xmlparser_getnext(x)) != EOF) {
- if(c == '/')
- isshorttag = 1; /* short tag */
- else if(c == '>' || isspace(c)) {
- x->tag[taglen] = '\0';
- if(x->tag[0] == '/') { /* end tag, starts with </ */
- x->taglen = --taglen; /* len -1 because of / */
- if(x->xmltagend)
- x->xmltagend(x, &(x->tag)[1], x->taglen, 0);
- } else {
- x->taglen = taglen;
- if(x->xmltagstart)
- x->xmltagstart(x, x->tag, x->taglen); /* start tag */
- if(isspace(c))
- xmlparser_parseattrs(x, &isshorttag);
- if(x->xmltagstartparsed)
- x->xmltagstartparsed(x, x->tag, x->taglen, isshorttag);
- }
- if(isshorttag && x->xmltagend)
- x->xmltagend(x, x->tag, x->taglen, 1);
- break;
- } else if(taglen < sizeof(x->tag) - 1)
- x->tag[taglen++] = c;
- }
- }
-}
-
void
-xmlparser_parsedata(XMLParser *x, int c) { /* TODO: remove int c, ugly */
- size_t datalen = 0;
+xmlparser_parse(XMLParser *x) {
+ int c, ispi;
+ size_t datalen, tagdatalen, taglen;
- if(x->xmldatastart)
- x->xmldatastart(x);
- do {
- if(c == '&' && x->xmldataentity) { /* TODO: test this, entity handler */
- x->data[datalen] = '\0';
- x->xmldata(x, x->data, datalen);
- x->data[0] = c;
- datalen = 1;
- while((c = xmlparser_getnext(x)) != EOF) {
- if(c == '<')
- goto parsedataend;
- if(datalen < sizeof(x->data) - 1)
- x->data[datalen++] = c;
+ while((c = xmlparser_getnext(x)) != EOF && c != '<'); /* skip until < */
+
+ while(c != EOF) {
+ if(c == '<') { /* parse tag */
+ if((c = xmlparser_getnext(x)) == EOF)
+ return;
+ x->tag[0] = '\0';
+ x->taglen = 0;
+ if(c == '!') { /* cdata and comments */
+ for(tagdatalen = 0; (c = xmlparser_getnext(x)) != EOF;) {
+ if(tagdatalen <= strlen("[CDATA[")) /* if(d < sizeof(x->data)) */
+ x->data[tagdatalen++] = c; /* TODO: prevent overflow */
+ if(c == '>')
+ break;
+ else if(c == '-' && tagdatalen == strlen("--") &&
+ (x->data[0] == '-')) { /* comment */
+ xmlparser_parsecomment(x);
+ break;
+ } else if(c == '[') {
+ if(tagdatalen == strlen("[CDATA[") &&
+ x->data[1] == 'C' && x->data[2] == 'D' &&
+ x->data[3] == 'A' && x->data[4] == 'T' &&
+ x->data[5] == 'A' && x->data[6] == '[') { /* cdata */
+ xmlparser_parsecdata(x);
+ break;
+ } else {
+ /* markup declaration section */
+ while((c = xmlparser_getnext(x)) != EOF && c != ']');
+ }
+ }
+ }
+ } else { /* normal tag (open, short open, close), processing instruction. */
if(isspace(c))
- break;
- else if(c == ';') {
+ while((c = xmlparser_getnext(x)) != EOF && isspace(c));
+ if(c == EOF)
+ return;
+ x->tag[0] = c;
+ ispi = (c == '?') ? 1 : 0;
+ x->isshorttag = ispi;
+ taglen = 1;
+ while((c = xmlparser_getnext(x)) != EOF) {
+ if(c == '/') /* TODO: simplify short tag? */
+ x->isshorttag = 1; /* short tag */
+ else if(c == '>' || isspace(c)) {
+ x->tag[taglen] = '\0';
+ if(x->tag[0] == '/') { /* end tag, starts with </ */
+ x->taglen = --taglen; /* len -1 because of / */
+ if(taglen && x->xmltagend)
+ x->xmltagend(x, &(x->tag)[1], x->taglen, 0);
+ } else {
+ x->taglen = taglen;
+ if(x->xmltagstart)
+ x->xmltagstart(x, x->tag, x->taglen); /* start tag */
+ if(isspace(c))
+ xmlparser_parseattrs(x);
+ if(x->xmltagstartparsed)
+ x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
+ }
+ if((x->isshorttag || ispi) && x->xmltagend) /* call tagend for shortform or processing instruction */
+ x->xmltagend(x, x->tag, x->taglen, 1);
+ break;
+ } else if(taglen < sizeof(x->tag) - 1)
+ x->tag[taglen++] = c;
+ }
+ }
+ } else { /* parse data */
+ datalen = 0;
+ if(x->xmldatastart)
+ x->xmldatastart(x);
+ while((c = xmlparser_getnext(x)) != EOF) {
+ if(c == '&' && x->xmldataentity) {
+ if(datalen) {
+ x->data[datalen] = '\0';
+ x->xmldata(x, x->data, datalen);
+ }
+ x->data[0] = c;
+ datalen = 1;
+ while((c = xmlparser_getnext(x)) != EOF) {
+ if(c == '<')
+ break;
+ if(datalen < sizeof(x->data) - 1)
+ x->data[datalen++] = c;
+ if(isspace(c))
+ break;
+ else if(c == ';') {
+ x->data[datalen] = '\0';
+ x->xmldataentity(x, x->data, datalen);
+ datalen = 0;
+ break;
+ }
+ }
+ } else if(c != '<') {
+ if(datalen < sizeof(x->data) - 1) {
+ x->data[datalen++] = c;
+ } else {
+ x->data[datalen] = '\0';
+ if(x->xmldata)
+ x->xmldata(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ }
+ }
+ if(c == '<') {
x->data[datalen] = '\0';
- x->xmldataentity(x, x->data, datalen);
- datalen = 0;
+ if(x->xmldata && datalen)
+ x->xmldata(x, x->data, datalen);
+ if(x->xmldataend)
+ x->xmldataend(x);
break;
}
}
- } else if(c == '<') { /* TODO: ugly, remove goto ? simplify? duplicate code. */
-parsedataend:
- x->data[datalen] = '\0';
- if(x->xmldata)
- x->xmldata(x, x->data, datalen);
- if(x->xmldataend)
- x->xmldataend(x);
- break;
- } else {
- if(datalen < sizeof(x->data) - 1) {
- x->data[datalen++] = c;
- } else {
- x->data[datalen] = '\0';
- if(x->xmldata)
- x->xmldata(x, x->data, datalen);
- x->data[0] = c;
- datalen = 1;
- }
- }
- } while((c = xmlparser_getnext(x)) != EOF);
-}
-
-void
-xmlparser_parse(XMLParser *x) {
- int c;
-
- while((c = xmlparser_getnext(x)) != EOF) {
- if(c == '<') /* tag */
- xmlparser_parsetag(x);
- else {
- xmlparser_parsedata(x, c);
- xmlparser_parsetag(x);
}
}
- return;
}
diff --git a/xml.h b/xml.h
@@ -22,9 +22,11 @@ typedef struct xmlparser {
void (*xmlcomment)(struct xmlparser *p, const char *comment, size_t commentlen);
void (*xmlcommentend)(struct xmlparser *p);
- FILE *fp; /* stream to read from */
+ FILE *fp; /* file stream to read from */
+
/* private; internal state */
char tag[1024]; /* current tag */
+ int isshorttag; /* current tag is in short form ? */
size_t taglen;
char name[256]; /* current attribute name */
char data[BUFSIZ]; /* data buffer used for tag and attribute data */