commit 13927fc6083c3d134e456ccfafb953c6cea17662
parent fc6c2a381742aba4deaf8538fa2c85750361a2d9
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sat, 27 Feb 2016 16:21:30 +0100
various improvements
- pledge tools and add define to enable it on platforms that support it, currently
only OpenBSD 5.9+
- separate getline and parseline functionality.
- use murmur3 hash instead of jenkins1: faster and less collisions.
- make some error messages a bit more clear, for example with path truncation.
- some small cleanups, move printutf8pad to util.
Diffstat:
12 files changed, 175 insertions(+), 126 deletions(-)
diff --git a/config.mk b/config.mk
@@ -22,5 +22,8 @@ LDFLAGS = -s ${LIBS}
# -D_POSIX_C_SOURCE=200809L -D_XOPEN_SOURCE=700 -D_BSD_SOURCE
#LDFLAGS = -static -s ${LIBS}
+# OpenBSD 5.9+: use pledge(2)
+#CFLAGS += -DUSE_PLEDGE
+
# compiler and linker
#CC = cc
diff --git a/sfeed.c b/sfeed.c
@@ -709,6 +709,9 @@ xml_handler_end_el(XMLParser *p, const char *name, size_t namelen, int isshort)
int
main(int argc, char *argv[])
{
+ if (pledge("stdio", NULL) == -1)
+ err(1, "pledge");
+
if (argc > 1)
baseurl = argv[1];
diff --git a/sfeed_frames.c b/sfeed_frames.c
@@ -138,13 +138,15 @@ printfeed(FILE *fpitems, FILE *fpin, struct feed *f)
}
fputs("<table cellpadding=\"0\" cellspacing=\"0\">\n", fpitems);
- while (parseline(&line, &linesize, fields, fpin) > 0) {
+ while (getline(&line, &linesize, fpin) > 0) {
+ if (!parseline(line, fields))
+ break;
/* write content */
if (!(namelen = normalizepath(fields[FieldTitle], name, sizeof(name))))
continue;
r = snprintf(filepath, sizeof(filepath), "%s/%s.html", dirpath, name);
if (r == -1 || (size_t)r >= sizeof(filepath))
- errx(1, "snprintf: path truncation");
+ errx(1, "snprintf: path truncation: '%s/%s.html'", dirpath, name);
/* content file doesn't exist yet and has write access */
if (access(filepath, F_OK) != 0) {
@@ -215,6 +217,9 @@ main(int argc, char *argv[])
int i;
struct feed *f;
+ if (pledge("stdio rpath wpath cpath", NULL) == -1)
+ err(1, "pledge");
+
if (!(feeds = calloc(argc, sizeof(struct feed *))))
err(1, "calloc");
diff --git a/sfeed_html.c b/sfeed_html.c
@@ -31,7 +31,9 @@ printfeed(FILE *fp, struct feed *f)
}
fputs("<table cellpadding=\"0\" cellspacing=\"0\">\n", stdout);
- while (parseline(&line, &linesize, fields, fp) > 0) {
+ while (getline(&line, &linesize, fp) > 0) {
+ if (!parseline(line, fields))
+ break;
parsedtime = 0;
strtotime(fields[FieldUnixTimestamp], &parsedtime);
@@ -73,6 +75,9 @@ main(int argc, char *argv[])
FILE *fp;
int i;
+ if (pledge(argc == 1 ? "stdio" : "stdio rpath", NULL) == -1)
+ err(1, "pledge");
+
if (!(feeds = calloc(argc, sizeof(struct feed *))))
err(1, "calloc");
if ((comparetime = time(NULL)) == -1)
diff --git a/sfeed_mbox.c b/sfeed_mbox.c
@@ -15,22 +15,7 @@ static char *line;
static size_t linesize;
static char host[256], *user, mtimebuf[32];
-/* jenkins one-at-a-time hash, used for Message-Id */
-static uint32_t
-jenkins1(const char *s)
-{
- uint32_t hash = 0;
-
- for (; *s; s++) {
- hash += (int)*s;
- hash += (hash << 10);
- hash ^= (hash >> 6);
- }
- hash += (hash << 3);
- hash ^= (hash >> 11);
-
- return hash + (hash << 15);
-}
+static const uint32_t seed = 0x45931287;
/* Unescape / decode fields printed by string_print_encoded()
* "\\" to "\", "\t", to TAB, "\n" to newline. Unrecognised escape sequences
@@ -79,8 +64,11 @@ printfeed(FILE *fp, const char *feedname)
struct tm tm;
char *fields[FieldLast], timebuf[32];
time_t parsedtime;
+ ssize_t linelen;
- while (parseline(&line, &linesize, fields, fp) > 0) {
+ while ((linelen = getline(&line, &linesize, fp)) > 0) {
+ if (!parseline(line, fields))
+ break;
parsedtime = 0;
strtotime(fields[FieldUnixTimestamp], &parsedtime);
/* can't convert: default to formatted time for time_t 0. */
@@ -106,7 +94,7 @@ printfeed(FILE *fp, const char *feedname)
user, user, host, fields[FieldTitle],
fields[FieldUnixTimestamp],
fields[FieldUnixTimestamp][0] ? "." : "",
- jenkins1(fields[FieldTitle]),
+ murmur3_32(line, (size_t)linelen, seed),
feedname[0] ? feedname : "unnamed",
fields[FieldContentType], feedname);
@@ -134,6 +122,9 @@ main(int argc, char *argv[])
char *name;
int i;
+ if (pledge(argc == 1 ? "stdio" : "stdio rpath", NULL) == -1)
+ err(1, "pledge");
+
if (!(user = getenv("USER")))
user = "you";
if (gethostname(host, sizeof(host)) == -1)
diff --git a/sfeed_opml_import.c b/sfeed_opml_import.c
@@ -1,4 +1,5 @@
#include <ctype.h>
+#include <err.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@@ -84,6 +85,9 @@ xml_handler_attrentity(XMLParser *p, const char *tag, size_t taglen,
int
main(void)
{
+ if (pledge("stdio", NULL) == -1)
+ err(1, "pledge");
+
parser.xmlattr = xml_handler_attr;
parser.xmlattrentity = xml_handler_attrentity;
parser.xmltagend = xml_handler_end_element;
diff --git a/sfeed_plain.c b/sfeed_plain.c
@@ -4,7 +4,6 @@
#include <stdlib.h>
#include <string.h>
#include <time.h>
-#include <wchar.h>
#include "util.h"
@@ -12,36 +11,15 @@ static time_t comparetime;
static char *line;
static size_t linesize;
-/* print `len' columns of characters. If string is shorter pad the rest
- * with characters `pad`. */
-static void
-printutf8pad(FILE *fp, const char *s, size_t len, int pad)
-{
- wchar_t w;
- size_t n = 0, i;
- int r;
-
- for (i = 0; *s && n < len; i++, s++) {
- if (ISUTF8(*s)) {
- if ((r = mbtowc(&w, s, 4)) == -1)
- break;
- if ((r = wcwidth(w)) == -1)
- r = 1;
- n += (size_t)r;
- }
- putc(*s, fp);
- }
- for (; n < len; n++)
- putc(pad, fp);
-}
-
static void
printfeed(FILE *fp, const char *feedname)
{
char *fields[FieldLast];
time_t parsedtime;
- while (parseline(&line, &linesize, fields, fp) > 0) {
+ while (getline(&line, &linesize, fp) > 0) {
+ if (!parseline(line, fields))
+ break;
parsedtime = 0;
strtotime(fields[FieldUnixTimestamp], &parsedtime);
@@ -65,6 +43,9 @@ main(int argc, char *argv[])
char *name;
int i;
+ if (pledge(argc == 1 ? "stdio" : "stdio rpath", NULL) == -1)
+ err(1, "pledge");
+
if ((comparetime = time(NULL)) == -1)
err(1, "time");
/* 1 day is old news */
diff --git a/sfeed_tail.c b/sfeed_tail.c
@@ -27,65 +27,7 @@ struct bucket {
static struct bucket *buckets;
static struct bucket *bucket;
-static char *
-estrdup(const char *s)
-{
- char *p;
-
- if (!(p = strdup(s)))
- err(1, "strdup");
- return p;
-}
-
-static void *
-ecalloc(size_t nmemb, size_t size)
-{
- void *p;
-
- if (!(p = calloc(nmemb, size)))
- err(1, "calloc");
- return p;
-}
-
-/* jenkins one-at-a-time hash */
-static uint32_t
-jenkins1(const char *s)
-{
- uint32_t hash = 0;
-
- for (; *s; s++) {
- hash += (int)*s;
- hash += (hash << 10);
- hash ^= (hash >> 6);
- }
- hash += (hash << 3);
- hash ^= (hash >> 11);
-
- return hash + (hash << 15);
-}
-
-/* print `len' columns of characters. If string is shorter pad the rest
- * with characters `pad`. */
-static void
-printutf8pad(FILE *fp, const char *s, size_t len, int pad)
-{
- wchar_t w;
- size_t n = 0, i;
- int r;
-
- for (i = 0; *s && n < len; i++, s++) {
- if (ISUTF8(*s)) {
- if ((r = mbtowc(&w, s, 4)) == -1)
- break;
- if ((r = wcwidth(w)) == -1)
- r = 1;
- n += (size_t)r;
- }
- putc(*s, fp);
- }
- for (; n < len; n++)
- putc(pad, fp);
-}
+static const uint32_t seed = 0x45931287;
static void
printfeed(FILE *fp, const char *feedname)
@@ -94,29 +36,37 @@ printfeed(FILE *fp, const char *feedname)
char *fields[FieldLast];
uint32_t hash;
int uniq;
+ ssize_t n;
+
+ while ((n = getline(&line, &linesize, fp)) > 0) {
+ if (line[n] == '\n')
+ line[--n] = '\0';
+ hash = murmur3_32(line, n, seed) % BUCKET_SIZE;
- while (parseline(&line, &linesize, fields, fp) > 0) {
- hash = (jenkins1(fields[FieldUnixTimestamp]) +
- jenkins1(fields[FieldId])) % BUCKET_SIZE;
for (uniq = 1, match = &(bucket->cols[hash]);
match;
match = match->next) {
/* check for collision, can still be unique. */
- if (match->id && !strcmp(match->id, fields[FieldId]) &&
- match->timestamp && !strcmp(match->timestamp, fields[FieldUnixTimestamp])) {
+ if (match->s && match->len == (size_t)n &&
+ !strcmp(line, match->s)) {
uniq = 0;
break;
}
/* nonexistent or no collision */
if (!match->next) {
- match = match->next = ecalloc(1, sizeof(struct line));
- match->id = estrdup(fields[FieldId]);
- match->timestamp = estrdup(fields[FieldUnixTimestamp]);
- break;
+ if (!(match = match->next = calloc(1, sizeof(struct line))))
+ err(1, "calloc");
+ if (!(match->s = strdup(line)))
+ err(1, "strdup");
+ match->len = (size_t)n;
+ break;
}
}
+
if (!uniq || firsttime)
continue;
+ if (!parseline(line, fields))
+ break;
if (feedname[0])
printf("%-15.15s %-30.30s",
feedname, fields[FieldTimeFormatted]);
@@ -132,7 +82,11 @@ main(int argc, char *argv[])
FILE *fp;
int i;
- bucket = buckets = ecalloc(argc, sizeof(struct bucket));
+ if (pledge("stdio", NULL) == -1)
+ err(1, "pledge");
+
+ if (!(bucket = buckets = calloc(argc, sizeof(struct bucket))))
+ err(1, "calloc");
for (firsttime = (argc > 1); ; firsttime = 0) {
if (argc == 1) {
printfeed(stdin, "");
diff --git a/sfeed_web.c b/sfeed_web.c
@@ -1,4 +1,5 @@
#include <ctype.h>
+#include <err.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@@ -19,10 +20,9 @@ static char abslink[4096], feedlink[4096], basehref[4096], feedtype[256];
static void
printfeedtype(const char *s, FILE *fp)
{
- for (; *s; s++) {
+ for (; *s; s++)
if (!isspace((int)*s))
fputc(*s, fp);
- }
}
static void
@@ -90,6 +90,9 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name,
int
main(int argc, char *argv[])
{
+ if (pledge("stdio", NULL) == -1)
+ err(1, "pledge");
+
if (argc > 1)
strlcpy(basehref, argv[1], sizeof(basehref));
diff --git a/sfeed_xmlenc.c b/sfeed_xmlenc.c
@@ -1,10 +1,12 @@
#include <ctype.h>
+#include <err.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
+#include "util.h"
#include "xml.h"
static XMLParser parser;
@@ -57,6 +59,9 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name,
int
main(void)
{
+ if (pledge("stdio", NULL) == -1)
+ err(1, "pledge");
+
parser.xmlattr = xmlattr;
parser.xmltagend = xmltagend;
parser.xmltagstart = xmltagstart;
diff --git a/util.c b/util.c
@@ -7,12 +7,25 @@
#include <limits.h>
#include <stdarg.h>
#include <stdio.h>
+#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
+#include <wchar.h>
#include "util.h"
+#ifndef USE_PLEDGE
+int
+pledge(const char *promises, const char *paths[])
+{
+ (void)promises;
+ (void)paths;
+
+ return 0;
+}
+#endif
+
static void
encodehex(unsigned char c, char *s)
{
@@ -130,7 +143,7 @@ absuri(const char *link, const char *base, char *buf, size_t bufsiz)
port[0] ? ":" : "",
port);
if (r == -1 || (size_t)r >= sizeof(tmp))
- return -1;
+ return -1; /* error or truncation */
/* relative to root */
if (!ulink.host[0] && ulink.path[0] != '/') {
@@ -191,16 +204,13 @@ encodeuri(const char *s, char *buf, size_t bufsiz)
* 'line' buffer is allocated using malloc, 'size' will contain the allocated
* buffer size.
* returns: amount of fields read (>0) or -1 on error. */
-ssize_t
-parseline(char **line, size_t *size, char *fields[FieldLast], FILE *fp)
+size_t
+parseline(char *line, char *fields[FieldLast])
{
char *prev, *s;
size_t i;
- if (getline(line, size, fp) <= 0)
- return -1;
-
- for (prev = *line, i = 0;
+ for (prev = line, i = 0;
(s = strchr(prev, '\t')) && i < FieldLast - 1;
i++) {
*s = '\0';
@@ -212,7 +222,7 @@ parseline(char **line, size_t *size, char *fields[FieldLast], FILE *fp)
for (; i < FieldLast; i++)
fields[i] = "";
- return (ssize_t)i;
+ return i;
}
/* Parse time to time_t, assumes time_t is signed. */
@@ -267,3 +277,78 @@ xbasename(const char *path)
free(p);
return b;
}
+
+/* print `len' columns of characters. If string is shorter pad the rest
+ * with characters `pad`. */
+void
+printutf8pad(FILE *fp, const char *s, size_t len, int pad)
+{
+ wchar_t w;
+ size_t n = 0, i;
+ int r;
+
+ for (i = 0; *s && n < len; i++, s++) {
+ if (ISUTF8(*s)) {
+ if ((r = mbtowc(&w, s, 4)) == -1)
+ break;
+ if ((r = wcwidth(w)) == -1)
+ r = 1;
+ n += (size_t)r;
+ }
+ putc(*s, fp);
+ }
+ for (; n < len; n++)
+ putc(pad, fp);
+}
+
+uint32_t
+murmur3_32(const char *key, uint32_t len, uint32_t seed)
+{
+ static const uint32_t c1 = 0xcc9e2d51;
+ static const uint32_t c2 = 0x1b873593;
+ static const uint32_t r1 = 15;
+ static const uint32_t r2 = 13;
+ static const uint32_t m = 5;
+ static const uint32_t n = 0xe6546b64;
+ uint32_t hash = seed;
+ const int nblocks = len / 4;
+ const uint32_t *blocks = (const uint32_t *) key;
+ int i;
+ uint32_t k, k1;
+ const uint8_t *tail;
+
+ for (i = 0; i < nblocks; i++) {
+ k = blocks[i];
+ k *= c1;
+ k = ROT32(k, r1);
+ k *= c2;
+
+ hash ^= k;
+ hash = ROT32(hash, r2) * m + n;
+ }
+ tail = (const uint8_t *) (key + nblocks * 4);
+
+ k1 = 0;
+ switch (len & 3) {
+ case 3:
+ k1 ^= tail[2] << 16;
+ case 2:
+ k1 ^= tail[1] << 8;
+ case 1:
+ k1 ^= tail[0];
+
+ k1 *= c1;
+ k1 = ROT32(k1, r1);
+ k1 *= c2;
+ hash ^= k1;
+ }
+
+ hash ^= len;
+ hash ^= (hash >> 16);
+ hash *= 0x85ebca6b;
+ hash ^= (hash >> 13);
+ hash *= 0xc2b2ae35;
+ hash ^= (hash >> 16);
+
+ return hash;
+}
diff --git a/util.h b/util.h
@@ -31,8 +31,18 @@ enum {
int absuri(const char *, const char *, char *, size_t);
int encodeuri(const char *, char *, size_t);
-ssize_t parseline(char **, size_t *, char *[FieldLast], FILE *);
+size_t parseline(char *, char *[FieldLast]);
int parseuri(const char *, struct uri *, int);
+void printutf8pad(FILE *, const char *, size_t, int);
int strtotime(const char *, time_t *);
char * xbasename(const char *);
void xmlencode(const char *, FILE *);
+
+#ifdef USE_PLEDGE
+#include <unistd.h>
+#else
+int pledge(const char *, const char *[]);
+#endif
+
+#define ROT32(x, y) ((x << y) | (x >> (32 - y)))
+uint32_t murmur3_32(const char *, uint32_t, uint32_t);