sfeed

simple feed reader - forked from git.codemadness.org/sfeed
git clone git://src.gearsix.net/sfeed
Log | Files | Refs | Atom | README | LICENSE

util.c (9023B)


      1 #include <errno.h>
      2 #include <stdarg.h>
      3 #include <stdio.h>
      4 #include <stdlib.h>
      5 #include <string.h>
      6 #include <wchar.h>
      7 
      8 #include "util.h"
      9 
     10 /* print to stderr, print error message of errno and exit().
     11    Unlike BSD err() it does not prefix __progname */
     12 __dead void
     13 err(int exitstatus, const char *fmt, ...)
     14 {
     15 	va_list ap;
     16 	int saved_errno;
     17 
     18 	saved_errno = errno;
     19 
     20 	if (fmt) {
     21 		va_start(ap, fmt);
     22 		vfprintf(stderr, fmt, ap);
     23 		va_end(ap);
     24 		fputs(": ", stderr);
     25 	}
     26 	fprintf(stderr, "%s\n", strerror(saved_errno));
     27 
     28 	exit(exitstatus);
     29 }
     30 
     31 /* print to stderr and exit().
     32    Unlike BSD errx() it does not prefix __progname */
     33 __dead void
     34 errx(int exitstatus, const char *fmt, ...)
     35 {
     36 	va_list ap;
     37 
     38 	if (fmt) {
     39 		va_start(ap, fmt);
     40 		vfprintf(stderr, fmt, ap);
     41 		va_end(ap);
     42 	}
     43 	fputs("\n", stderr);
     44 
     45 	exit(exitstatus);
     46 }
     47 
     48 /* Handle read or write errors for a FILE * stream */
     49 void
     50 checkfileerror(FILE *fp, const char *name, int mode)
     51 {
     52 	if (mode == 'r' && ferror(fp))
     53 		errx(1, "read error: %s", name);
     54 	else if (mode == 'w' && (fflush(fp) || ferror(fp)))
     55 		errx(1, "write error: %s", name);
     56 }
     57 
     58 /* strcasestr() included for portability */
     59 char *
     60 strcasestr(const char *h, const char *n)
     61 {
     62 	size_t i;
     63 
     64 	if (!n[0])
     65 		return (char *)h;
     66 
     67 	for (; *h; ++h) {
     68 		for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) ==
     69 		            TOLOWER((unsigned char)h[i]); ++i)
     70 			;
     71 		if (n[i] == '\0')
     72 			return (char *)h;
     73 	}
     74 
     75 	return NULL;
     76 }
     77 
     78 /* Check if string has a non-empty scheme / protocol part. */
     79 int
     80 uri_hasscheme(const char *s)
     81 {
     82 	const char *p = s;
     83 
     84 	for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
     85 		       *p == '+' || *p == '-' || *p == '.'; p++)
     86 		;
     87 	/* scheme, except if empty and starts with ":" then it is a path */
     88 	return (*p == ':' && p != s);
     89 }
     90 
     91 /* Parse URI string `s` into an uri structure `u`.
     92    Returns 0 on success or -1 on failure */
     93 int
     94 uri_parse(const char *s, struct uri *u)
     95 {
     96 	const char *p = s;
     97 	char *endptr;
     98 	size_t i;
     99 	long l;
    100 
    101 	u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
    102 	u->path[0] = u->query[0] = u->fragment[0] = '\0';
    103 
    104 	/* protocol-relative */
    105 	if (*p == '/' && *(p + 1) == '/') {
    106 		p += 2; /* skip "//" */
    107 		goto parseauth;
    108 	}
    109 
    110 	/* scheme / protocol part */
    111 	for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
    112 		       *p == '+' || *p == '-' || *p == '.'; p++)
    113 		;
    114 	/* scheme, except if empty and starts with ":" then it is a path */
    115 	if (*p == ':' && p != s) {
    116 		if (*(p + 1) == '/' && *(p + 2) == '/')
    117 			p += 3; /* skip "://" */
    118 		else
    119 			p++; /* skip ":" */
    120 
    121 		if ((size_t)(p - s) >= sizeof(u->proto))
    122 			return -1; /* protocol too long */
    123 		memcpy(u->proto, s, p - s);
    124 		u->proto[p - s] = '\0';
    125 
    126 		if (*(p - 1) != '/')
    127 			goto parsepath;
    128 	} else {
    129 		p = s; /* no scheme format, reset to start */
    130 		goto parsepath;
    131 	}
    132 
    133 parseauth:
    134 	/* userinfo (username:password) */
    135 	i = strcspn(p, "@/?#");
    136 	if (p[i] == '@') {
    137 		if (i >= sizeof(u->userinfo))
    138 			return -1; /* userinfo too long */
    139 		memcpy(u->userinfo, p, i);
    140 		u->userinfo[i] = '\0';
    141 		p += i + 1;
    142 	}
    143 
    144 	/* IPv6 address */
    145 	if (*p == '[') {
    146 		/* bracket not found, host too short or too long */
    147 		i = strcspn(p, "]");
    148 		if (p[i] != ']' || i < 3)
    149 			return -1;
    150 		i++; /* including "]" */
    151 	} else {
    152 		/* domain / host part, skip until port, path or end. */
    153 		i = strcspn(p, ":/?#");
    154 	}
    155 	if (i >= sizeof(u->host))
    156 		return -1; /* host too long */
    157 	memcpy(u->host, p, i);
    158 	u->host[i] = '\0';
    159 	p += i;
    160 
    161 	/* port */
    162 	if (*p == ':') {
    163 		p++;
    164 		if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
    165 			return -1; /* port too long */
    166 		memcpy(u->port, p, i);
    167 		u->port[i] = '\0';
    168 		/* check for valid port: range 1 - 65535, may be empty */
    169 		errno = 0;
    170 		l = strtol(u->port, &endptr, 10);
    171 		if (i && (errno || *endptr || l <= 0 || l > 65535))
    172 			return -1;
    173 		p += i;
    174 	}
    175 
    176 parsepath:
    177 	/* path */
    178 	if ((i = strcspn(p, "?#")) >= sizeof(u->path))
    179 		return -1; /* path too long */
    180 	memcpy(u->path, p, i);
    181 	u->path[i] = '\0';
    182 	p += i;
    183 
    184 	/* query */
    185 	if (*p == '?') {
    186 		p++;
    187 		if ((i = strcspn(p, "#")) >= sizeof(u->query))
    188 			return -1; /* query too long */
    189 		memcpy(u->query, p, i);
    190 		u->query[i] = '\0';
    191 		p += i;
    192 	}
    193 
    194 	/* fragment */
    195 	if (*p == '#') {
    196 		p++;
    197 		if ((i = strlen(p)) >= sizeof(u->fragment))
    198 			return -1; /* fragment too long */
    199 		memcpy(u->fragment, p, i);
    200 		u->fragment[i] = '\0';
    201 	}
    202 
    203 	return 0;
    204 }
    205 
    206 /* Transform and try to make the URI `u` absolute using base URI `b` into `a`.
    207    Follows some of the logic from "RFC 3986 - 5.2.2. Transform References".
    208    Returns 0 on success, -1 on error or truncation. */
    209 int
    210 uri_makeabs(struct uri *a, struct uri *u, struct uri *b)
    211 {
    212 	char *p;
    213 	int c;
    214 
    215 	strlcpy(a->fragment, u->fragment, sizeof(a->fragment));
    216 
    217 	if (u->proto[0] || u->host[0]) {
    218 		strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a->proto));
    219 		strlcpy(a->host, u->host, sizeof(a->host));
    220 		strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo));
    221 		strlcpy(a->host, u->host, sizeof(a->host));
    222 		strlcpy(a->port, u->port, sizeof(a->port));
    223 		strlcpy(a->path, u->path, sizeof(a->path));
    224 		strlcpy(a->query, u->query, sizeof(a->query));
    225 		return 0;
    226 	}
    227 
    228 	strlcpy(a->proto, b->proto, sizeof(a->proto));
    229 	strlcpy(a->host, b->host, sizeof(a->host));
    230 	strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo));
    231 	strlcpy(a->host, b->host, sizeof(a->host));
    232 	strlcpy(a->port, b->port, sizeof(a->port));
    233 
    234 	if (!u->path[0]) {
    235 		strlcpy(a->path, b->path, sizeof(a->path));
    236 	} else if (u->path[0] == '/') {
    237 		strlcpy(a->path, u->path, sizeof(a->path));
    238 	} else {
    239 		a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0';
    240 		a->path[1] = '\0';
    241 
    242 		if ((p = strrchr(b->path, '/'))) {
    243 			c = *(++p);
    244 			*p = '\0'; /* temporary NUL-terminate */
    245 			if (strlcat(a->path, b->path, sizeof(a->path)) >= sizeof(a->path))
    246 				return -1;
    247 			*p = c; /* restore */
    248 		}
    249 		if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->path))
    250 			return -1;
    251 	}
    252 
    253 	if (u->path[0] || u->query[0])
    254 		strlcpy(a->query, u->query, sizeof(a->query));
    255 	else
    256 		strlcpy(a->query, b->query, sizeof(a->query));
    257 
    258 	return 0;
    259 }
    260 
    261 int
    262 uri_format(char *buf, size_t bufsiz, struct uri *u)
    263 {
    264 	return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s",
    265 		u->proto,
    266 		u->userinfo[0] ? u->userinfo : "",
    267 		u->userinfo[0] ? "@" : "",
    268 		u->host,
    269 		u->port[0] ? ":" : "",
    270 		u->port,
    271 		u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "",
    272 		u->path,
    273 		u->query[0] ? "?" : "",
    274 		u->query,
    275 		u->fragment[0] ? "#" : "",
    276 		u->fragment);
    277 }
    278 
    279 /* Splits fields in the line buffer by replacing TAB separators with NUL ('\0')
    280  * terminators and assign these fields as pointers. If there are less fields
    281  * than expected then the field is an empty string constant. */
    282 void
    283 parseline(char *line, char *fields[FieldLast])
    284 {
    285 	char *prev, *s;
    286 	size_t i;
    287 
    288 	for (prev = line, i = 0;
    289 	    (s = strchr(prev, '\t')) && i < FieldLast - 1;
    290 	    i++) {
    291 		*s = '\0';
    292 		fields[i] = prev;
    293 		prev = s + 1;
    294 	}
    295 	fields[i++] = prev;
    296 	/* make non-parsed fields empty. */
    297 	for (; i < FieldLast; i++)
    298 		fields[i] = "";
    299 }
    300 
    301 /* Parse time to time_t, assumes time_t is signed, ignores fractions. */
    302 int
    303 strtotime(const char *s, time_t *t)
    304 {
    305 	long long l;
    306 	char *e;
    307 
    308 	errno = 0;
    309 	l = strtoll(s, &e, 10);
    310 	if (errno || *s == '\0' || *e)
    311 		return -1;
    312 
    313 	/* NOTE: the type long long supports the 64-bit range. If time_t is
    314 	   64-bit it is "2038-ready", otherwise it is truncated/wrapped. */
    315 	if (t)
    316 		*t = (time_t)l;
    317 
    318 	return 0;
    319 }
    320 
    321 /* Escape characters below as HTML 2.0 / XML 1.0. */
    322 void
    323 xmlencode(const char *s, FILE *fp)
    324 {
    325 	for (; *s; ++s) {
    326 		switch (*s) {
    327 		case '<':  fputs("&lt;",   fp); break;
    328 		case '>':  fputs("&gt;",   fp); break;
    329 		case '\'': fputs("&#39;",  fp); break;
    330 		case '&':  fputs("&amp;",  fp); break;
    331 		case '"':  fputs("&quot;", fp); break;
    332 		default:   putc(*s, fp);
    333 		}
    334 	}
    335 }
    336 
    337 /* print `len` columns of characters. If string is shorter pad the rest with
    338  * characters `pad`. */
    339 void
    340 printutf8pad(FILE *fp, const char *s, size_t len, int pad)
    341 {
    342 	wchar_t wc;
    343 	size_t col = 0, i, slen;
    344 	int inc, rl, w;
    345 
    346 	if (!len)
    347 		return;
    348 
    349 	slen = strlen(s);
    350 	for (i = 0; i < slen; i += inc) {
    351 		inc = 1; /* next byte */
    352 		if ((unsigned char)s[i] < 32) {
    353 			continue; /* skip control characters */
    354 		} else if ((unsigned char)s[i] >= 127) {
    355 			rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i : 4);
    356 			inc = rl;
    357 			if (rl < 0) {
    358 				mbtowc(NULL, NULL, 0); /* reset state */
    359 				inc = 1; /* invalid, seek next byte */
    360 				w = 1; /* replacement char is one width */
    361 			} else if ((w = wcwidth(wc)) == -1) {
    362 				continue;
    363 			}
    364 
    365 			if (col + w > len || (col + w == len && s[i + inc])) {
    366 				fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */
    367 				col++;
    368 				break;
    369 			} else if (rl < 0) {
    370 				fputs(UTF_INVALID_SYMBOL, fp); /* replacement */
    371 				col++;
    372 				continue;
    373 			}
    374 			fwrite(&s[i], 1, rl, fp);
    375 			col += w;
    376 		} else {
    377 			/* optimization: simple ASCII character */
    378 			if (col + 1 > len || (col + 1 == len && s[i + 1])) {
    379 				fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */
    380 				col++;
    381 				break;
    382 			}
    383 			putc(s[i], fp);
    384 			col++;
    385 		}
    386 
    387 	}
    388 	for (; col < len; ++col)
    389 		putc(pad, fp);
    390 }