sfeed

simple feed reader - forked from https://git.codemadness.org/sfeed
git clone git://src.gearsix.net/sfeedsfeed.zip
Log | Files | Refs | Atom | README | LICENSE

util.c (raw) (9023B)


   1 #include <errno.h>
   2 #include <stdarg.h>
   3 #include <stdio.h>
   4 #include <stdlib.h>
   5 #include <string.h>
   6 #include <wchar.h>
   7 
   8 #include "util.h"
   9 
  10 /* print to stderr, print error message of errno and exit().
  11    Unlike BSD err() it does not prefix __progname */
  12 __dead void
  13 err(int exitstatus, const char *fmt, ...)
  14 {
  15 	va_list ap;
  16 	int saved_errno;
  17 
  18 	saved_errno = errno;
  19 
  20 	if (fmt) {
  21 		va_start(ap, fmt);
  22 		vfprintf(stderr, fmt, ap);
  23 		va_end(ap);
  24 		fputs(": ", stderr);
  25 	}
  26 	fprintf(stderr, "%s\n", strerror(saved_errno));
  27 
  28 	exit(exitstatus);
  29 }
  30 
  31 /* print to stderr and exit().
  32    Unlike BSD errx() it does not prefix __progname */
  33 __dead void
  34 errx(int exitstatus, const char *fmt, ...)
  35 {
  36 	va_list ap;
  37 
  38 	if (fmt) {
  39 		va_start(ap, fmt);
  40 		vfprintf(stderr, fmt, ap);
  41 		va_end(ap);
  42 	}
  43 	fputs("\n", stderr);
  44 
  45 	exit(exitstatus);
  46 }
  47 
  48 /* Handle read or write errors for a FILE * stream */
  49 void
  50 checkfileerror(FILE *fp, const char *name, int mode)
  51 {
  52 	if (mode == 'r' && ferror(fp))
  53 		errx(1, "read error: %s", name);
  54 	else if (mode == 'w' && (fflush(fp) || ferror(fp)))
  55 		errx(1, "write error: %s", name);
  56 }
  57 
  58 /* strcasestr() included for portability */
  59 char *
  60 strcasestr(const char *h, const char *n)
  61 {
  62 	size_t i;
  63 
  64 	if (!n[0])
  65 		return (char *)h;
  66 
  67 	for (; *h; ++h) {
  68 		for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) ==
  69 		            TOLOWER((unsigned char)h[i]); ++i)
  70 			;
  71 		if (n[i] == '\0')
  72 			return (char *)h;
  73 	}
  74 
  75 	return NULL;
  76 }
  77 
  78 /* Check if string has a non-empty scheme / protocol part. */
  79 int
  80 uri_hasscheme(const char *s)
  81 {
  82 	const char *p = s;
  83 
  84 	for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
  85 		       *p == '+' || *p == '-' || *p == '.'; p++)
  86 		;
  87 	/* scheme, except if empty and starts with ":" then it is a path */
  88 	return (*p == ':' && p != s);
  89 }
  90 
  91 /* Parse URI string `s` into an uri structure `u`.
  92    Returns 0 on success or -1 on failure */
  93 int
  94 uri_parse(const char *s, struct uri *u)
  95 {
  96 	const char *p = s;
  97 	char *endptr;
  98 	size_t i;
  99 	long l;
 100 
 101 	u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
 102 	u->path[0] = u->query[0] = u->fragment[0] = '\0';
 103 
 104 	/* protocol-relative */
 105 	if (*p == '/' && *(p + 1) == '/') {
 106 		p += 2; /* skip "//" */
 107 		goto parseauth;
 108 	}
 109 
 110 	/* scheme / protocol part */
 111 	for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
 112 		       *p == '+' || *p == '-' || *p == '.'; p++)
 113 		;
 114 	/* scheme, except if empty and starts with ":" then it is a path */
 115 	if (*p == ':' && p != s) {
 116 		if (*(p + 1) == '/' && *(p + 2) == '/')
 117 			p += 3; /* skip "://" */
 118 		else
 119 			p++; /* skip ":" */
 120 
 121 		if ((size_t)(p - s) >= sizeof(u->proto))
 122 			return -1; /* protocol too long */
 123 		memcpy(u->proto, s, p - s);
 124 		u->proto[p - s] = '\0';
 125 
 126 		if (*(p - 1) != '/')
 127 			goto parsepath;
 128 	} else {
 129 		p = s; /* no scheme format, reset to start */
 130 		goto parsepath;
 131 	}
 132 
 133 parseauth:
 134 	/* userinfo (username:password) */
 135 	i = strcspn(p, "@/?#");
 136 	if (p[i] == '@') {
 137 		if (i >= sizeof(u->userinfo))
 138 			return -1; /* userinfo too long */
 139 		memcpy(u->userinfo, p, i);
 140 		u->userinfo[i] = '\0';
 141 		p += i + 1;
 142 	}
 143 
 144 	/* IPv6 address */
 145 	if (*p == '[') {
 146 		/* bracket not found, host too short or too long */
 147 		i = strcspn(p, "]");
 148 		if (p[i] != ']' || i < 3)
 149 			return -1;
 150 		i++; /* including "]" */
 151 	} else {
 152 		/* domain / host part, skip until port, path or end. */
 153 		i = strcspn(p, ":/?#");
 154 	}
 155 	if (i >= sizeof(u->host))
 156 		return -1; /* host too long */
 157 	memcpy(u->host, p, i);
 158 	u->host[i] = '\0';
 159 	p += i;
 160 
 161 	/* port */
 162 	if (*p == ':') {
 163 		p++;
 164 		if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
 165 			return -1; /* port too long */
 166 		memcpy(u->port, p, i);
 167 		u->port[i] = '\0';
 168 		/* check for valid port: range 1 - 65535, may be empty */
 169 		errno = 0;
 170 		l = strtol(u->port, &endptr, 10);
 171 		if (i && (errno || *endptr || l <= 0 || l > 65535))
 172 			return -1;
 173 		p += i;
 174 	}
 175 
 176 parsepath:
 177 	/* path */
 178 	if ((i = strcspn(p, "?#")) >= sizeof(u->path))
 179 		return -1; /* path too long */
 180 	memcpy(u->path, p, i);
 181 	u->path[i] = '\0';
 182 	p += i;
 183 
 184 	/* query */
 185 	if (*p == '?') {
 186 		p++;
 187 		if ((i = strcspn(p, "#")) >= sizeof(u->query))
 188 			return -1; /* query too long */
 189 		memcpy(u->query, p, i);
 190 		u->query[i] = '\0';
 191 		p += i;
 192 	}
 193 
 194 	/* fragment */
 195 	if (*p == '#') {
 196 		p++;
 197 		if ((i = strlen(p)) >= sizeof(u->fragment))
 198 			return -1; /* fragment too long */
 199 		memcpy(u->fragment, p, i);
 200 		u->fragment[i] = '\0';
 201 	}
 202 
 203 	return 0;
 204 }
 205 
 206 /* Transform and try to make the URI `u` absolute using base URI `b` into `a`.
 207    Follows some of the logic from "RFC 3986 - 5.2.2. Transform References".
 208    Returns 0 on success, -1 on error or truncation. */
 209 int
 210 uri_makeabs(struct uri *a, struct uri *u, struct uri *b)
 211 {
 212 	char *p;
 213 	int c;
 214 
 215 	strlcpy(a->fragment, u->fragment, sizeof(a->fragment));
 216 
 217 	if (u->proto[0] || u->host[0]) {
 218 		strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a->proto));
 219 		strlcpy(a->host, u->host, sizeof(a->host));
 220 		strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo));
 221 		strlcpy(a->host, u->host, sizeof(a->host));
 222 		strlcpy(a->port, u->port, sizeof(a->port));
 223 		strlcpy(a->path, u->path, sizeof(a->path));
 224 		strlcpy(a->query, u->query, sizeof(a->query));
 225 		return 0;
 226 	}
 227 
 228 	strlcpy(a->proto, b->proto, sizeof(a->proto));
 229 	strlcpy(a->host, b->host, sizeof(a->host));
 230 	strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo));
 231 	strlcpy(a->host, b->host, sizeof(a->host));
 232 	strlcpy(a->port, b->port, sizeof(a->port));
 233 
 234 	if (!u->path[0]) {
 235 		strlcpy(a->path, b->path, sizeof(a->path));
 236 	} else if (u->path[0] == '/') {
 237 		strlcpy(a->path, u->path, sizeof(a->path));
 238 	} else {
 239 		a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0';
 240 		a->path[1] = '\0';
 241 
 242 		if ((p = strrchr(b->path, '/'))) {
 243 			c = *(++p);
 244 			*p = '\0'; /* temporary NUL-terminate */
 245 			if (strlcat(a->path, b->path, sizeof(a->path)) >= sizeof(a->path))
 246 				return -1;
 247 			*p = c; /* restore */
 248 		}
 249 		if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->path))
 250 			return -1;
 251 	}
 252 
 253 	if (u->path[0] || u->query[0])
 254 		strlcpy(a->query, u->query, sizeof(a->query));
 255 	else
 256 		strlcpy(a->query, b->query, sizeof(a->query));
 257 
 258 	return 0;
 259 }
 260 
 261 int
 262 uri_format(char *buf, size_t bufsiz, struct uri *u)
 263 {
 264 	return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s",
 265 		u->proto,
 266 		u->userinfo[0] ? u->userinfo : "",
 267 		u->userinfo[0] ? "@" : "",
 268 		u->host,
 269 		u->port[0] ? ":" : "",
 270 		u->port,
 271 		u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "",
 272 		u->path,
 273 		u->query[0] ? "?" : "",
 274 		u->query,
 275 		u->fragment[0] ? "#" : "",
 276 		u->fragment);
 277 }
 278 
 279 /* Splits fields in the line buffer by replacing TAB separators with NUL ('\0')
 280  * terminators and assign these fields as pointers. If there are less fields
 281  * than expected then the field is an empty string constant. */
 282 void
 283 parseline(char *line, char *fields[FieldLast])
 284 {
 285 	char *prev, *s;
 286 	size_t i;
 287 
 288 	for (prev = line, i = 0;
 289 	    (s = strchr(prev, '\t')) && i < FieldLast - 1;
 290 	    i++) {
 291 		*s = '\0';
 292 		fields[i] = prev;
 293 		prev = s + 1;
 294 	}
 295 	fields[i++] = prev;
 296 	/* make non-parsed fields empty. */
 297 	for (; i < FieldLast; i++)
 298 		fields[i] = "";
 299 }
 300 
 301 /* Parse time to time_t, assumes time_t is signed, ignores fractions. */
 302 int
 303 strtotime(const char *s, time_t *t)
 304 {
 305 	long long l;
 306 	char *e;
 307 
 308 	errno = 0;
 309 	l = strtoll(s, &e, 10);
 310 	if (errno || *s == '\0' || *e)
 311 		return -1;
 312 
 313 	/* NOTE: the type long long supports the 64-bit range. If time_t is
 314 	   64-bit it is "2038-ready", otherwise it is truncated/wrapped. */
 315 	if (t)
 316 		*t = (time_t)l;
 317 
 318 	return 0;
 319 }
 320 
 321 /* Escape characters below as HTML 2.0 / XML 1.0. */
 322 void
 323 xmlencode(const char *s, FILE *fp)
 324 {
 325 	for (; *s; ++s) {
 326 		switch (*s) {
 327 		case '<':  fputs("&lt;",   fp); break;
 328 		case '>':  fputs("&gt;",   fp); break;
 329 		case '\'': fputs("&#39;",  fp); break;
 330 		case '&':  fputs("&amp;",  fp); break;
 331 		case '"':  fputs("&quot;", fp); break;
 332 		default:   putc(*s, fp);
 333 		}
 334 	}
 335 }
 336 
 337 /* print `len` columns of characters. If string is shorter pad the rest with
 338  * characters `pad`. */
 339 void
 340 printutf8pad(FILE *fp, const char *s, size_t len, int pad)
 341 {
 342 	wchar_t wc;
 343 	size_t col = 0, i, slen;
 344 	int inc, rl, w;
 345 
 346 	if (!len)
 347 		return;
 348 
 349 	slen = strlen(s);
 350 	for (i = 0; i < slen; i += inc) {
 351 		inc = 1; /* next byte */
 352 		if ((unsigned char)s[i] < 32) {
 353 			continue; /* skip control characters */
 354 		} else if ((unsigned char)s[i] >= 127) {
 355 			rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i : 4);
 356 			inc = rl;
 357 			if (rl < 0) {
 358 				mbtowc(NULL, NULL, 0); /* reset state */
 359 				inc = 1; /* invalid, seek next byte */
 360 				w = 1; /* replacement char is one width */
 361 			} else if ((w = wcwidth(wc)) == -1) {
 362 				continue;
 363 			}
 364 
 365 			if (col + w > len || (col + w == len && s[i + inc])) {
 366 				fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */
 367 				col++;
 368 				break;
 369 			} else if (rl < 0) {
 370 				fputs(UTF_INVALID_SYMBOL, fp); /* replacement */
 371 				col++;
 372 				continue;
 373 			}
 374 			fwrite(&s[i], 1, rl, fp);
 375 			col += w;
 376 		} else {
 377 			/* optimization: simple ASCII character */
 378 			if (col + 1 > len || (col + 1 == len && s[i + 1])) {
 379 				fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */
 380 				col++;
 381 				break;
 382 			}
 383 			putc(s[i], fp);
 384 			col++;
 385 		}
 386 
 387 	}
 388 	for (; col < len; ++col)
 389 		putc(pad, fp);
 390 }