util.c (9023B)
1 #include <errno.h> 2 #include <stdarg.h> 3 #include <stdio.h> 4 #include <stdlib.h> 5 #include <string.h> 6 #include <wchar.h> 7 8 #include "util.h" 9 10 /* print to stderr, print error message of errno and exit(). 11 Unlike BSD err() it does not prefix __progname */ 12 __dead void 13 err(int exitstatus, const char *fmt, ...) 14 { 15 va_list ap; 16 int saved_errno; 17 18 saved_errno = errno; 19 20 if (fmt) { 21 va_start(ap, fmt); 22 vfprintf(stderr, fmt, ap); 23 va_end(ap); 24 fputs(": ", stderr); 25 } 26 fprintf(stderr, "%s\n", strerror(saved_errno)); 27 28 exit(exitstatus); 29 } 30 31 /* print to stderr and exit(). 32 Unlike BSD errx() it does not prefix __progname */ 33 __dead void 34 errx(int exitstatus, const char *fmt, ...) 35 { 36 va_list ap; 37 38 if (fmt) { 39 va_start(ap, fmt); 40 vfprintf(stderr, fmt, ap); 41 va_end(ap); 42 } 43 fputs("\n", stderr); 44 45 exit(exitstatus); 46 } 47 48 /* Handle read or write errors for a FILE * stream */ 49 void 50 checkfileerror(FILE *fp, const char *name, int mode) 51 { 52 if (mode == 'r' && ferror(fp)) 53 errx(1, "read error: %s", name); 54 else if (mode == 'w' && (fflush(fp) || ferror(fp))) 55 errx(1, "write error: %s", name); 56 } 57 58 /* strcasestr() included for portability */ 59 char * 60 strcasestr(const char *h, const char *n) 61 { 62 size_t i; 63 64 if (!n[0]) 65 return (char *)h; 66 67 for (; *h; ++h) { 68 for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) == 69 TOLOWER((unsigned char)h[i]); ++i) 70 ; 71 if (n[i] == '\0') 72 return (char *)h; 73 } 74 75 return NULL; 76 } 77 78 /* Check if string has a non-empty scheme / protocol part. */ 79 int 80 uri_hasscheme(const char *s) 81 { 82 const char *p = s; 83 84 for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) || 85 *p == '+' || *p == '-' || *p == '.'; p++) 86 ; 87 /* scheme, except if empty and starts with ":" then it is a path */ 88 return (*p == ':' && p != s); 89 } 90 91 /* Parse URI string `s` into an uri structure `u`. 92 Returns 0 on success or -1 on failure */ 93 int 94 uri_parse(const char *s, struct uri *u) 95 { 96 const char *p = s; 97 char *endptr; 98 size_t i; 99 long l; 100 101 u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0'; 102 u->path[0] = u->query[0] = u->fragment[0] = '\0'; 103 104 /* protocol-relative */ 105 if (*p == '/' && *(p + 1) == '/') { 106 p += 2; /* skip "//" */ 107 goto parseauth; 108 } 109 110 /* scheme / protocol part */ 111 for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) || 112 *p == '+' || *p == '-' || *p == '.'; p++) 113 ; 114 /* scheme, except if empty and starts with ":" then it is a path */ 115 if (*p == ':' && p != s) { 116 if (*(p + 1) == '/' && *(p + 2) == '/') 117 p += 3; /* skip "://" */ 118 else 119 p++; /* skip ":" */ 120 121 if ((size_t)(p - s) >= sizeof(u->proto)) 122 return -1; /* protocol too long */ 123 memcpy(u->proto, s, p - s); 124 u->proto[p - s] = '\0'; 125 126 if (*(p - 1) != '/') 127 goto parsepath; 128 } else { 129 p = s; /* no scheme format, reset to start */ 130 goto parsepath; 131 } 132 133 parseauth: 134 /* userinfo (username:password) */ 135 i = strcspn(p, "@/?#"); 136 if (p[i] == '@') { 137 if (i >= sizeof(u->userinfo)) 138 return -1; /* userinfo too long */ 139 memcpy(u->userinfo, p, i); 140 u->userinfo[i] = '\0'; 141 p += i + 1; 142 } 143 144 /* IPv6 address */ 145 if (*p == '[') { 146 /* bracket not found, host too short or too long */ 147 i = strcspn(p, "]"); 148 if (p[i] != ']' || i < 3) 149 return -1; 150 i++; /* including "]" */ 151 } else { 152 /* domain / host part, skip until port, path or end. */ 153 i = strcspn(p, ":/?#"); 154 } 155 if (i >= sizeof(u->host)) 156 return -1; /* host too long */ 157 memcpy(u->host, p, i); 158 u->host[i] = '\0'; 159 p += i; 160 161 /* port */ 162 if (*p == ':') { 163 p++; 164 if ((i = strcspn(p, "/?#")) >= sizeof(u->port)) 165 return -1; /* port too long */ 166 memcpy(u->port, p, i); 167 u->port[i] = '\0'; 168 /* check for valid port: range 1 - 65535, may be empty */ 169 errno = 0; 170 l = strtol(u->port, &endptr, 10); 171 if (i && (errno || *endptr || l <= 0 || l > 65535)) 172 return -1; 173 p += i; 174 } 175 176 parsepath: 177 /* path */ 178 if ((i = strcspn(p, "?#")) >= sizeof(u->path)) 179 return -1; /* path too long */ 180 memcpy(u->path, p, i); 181 u->path[i] = '\0'; 182 p += i; 183 184 /* query */ 185 if (*p == '?') { 186 p++; 187 if ((i = strcspn(p, "#")) >= sizeof(u->query)) 188 return -1; /* query too long */ 189 memcpy(u->query, p, i); 190 u->query[i] = '\0'; 191 p += i; 192 } 193 194 /* fragment */ 195 if (*p == '#') { 196 p++; 197 if ((i = strlen(p)) >= sizeof(u->fragment)) 198 return -1; /* fragment too long */ 199 memcpy(u->fragment, p, i); 200 u->fragment[i] = '\0'; 201 } 202 203 return 0; 204 } 205 206 /* Transform and try to make the URI `u` absolute using base URI `b` into `a`. 207 Follows some of the logic from "RFC 3986 - 5.2.2. Transform References". 208 Returns 0 on success, -1 on error or truncation. */ 209 int 210 uri_makeabs(struct uri *a, struct uri *u, struct uri *b) 211 { 212 char *p; 213 int c; 214 215 strlcpy(a->fragment, u->fragment, sizeof(a->fragment)); 216 217 if (u->proto[0] || u->host[0]) { 218 strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a->proto)); 219 strlcpy(a->host, u->host, sizeof(a->host)); 220 strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo)); 221 strlcpy(a->host, u->host, sizeof(a->host)); 222 strlcpy(a->port, u->port, sizeof(a->port)); 223 strlcpy(a->path, u->path, sizeof(a->path)); 224 strlcpy(a->query, u->query, sizeof(a->query)); 225 return 0; 226 } 227 228 strlcpy(a->proto, b->proto, sizeof(a->proto)); 229 strlcpy(a->host, b->host, sizeof(a->host)); 230 strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo)); 231 strlcpy(a->host, b->host, sizeof(a->host)); 232 strlcpy(a->port, b->port, sizeof(a->port)); 233 234 if (!u->path[0]) { 235 strlcpy(a->path, b->path, sizeof(a->path)); 236 } else if (u->path[0] == '/') { 237 strlcpy(a->path, u->path, sizeof(a->path)); 238 } else { 239 a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0'; 240 a->path[1] = '\0'; 241 242 if ((p = strrchr(b->path, '/'))) { 243 c = *(++p); 244 *p = '\0'; /* temporary NUL-terminate */ 245 if (strlcat(a->path, b->path, sizeof(a->path)) >= sizeof(a->path)) 246 return -1; 247 *p = c; /* restore */ 248 } 249 if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->path)) 250 return -1; 251 } 252 253 if (u->path[0] || u->query[0]) 254 strlcpy(a->query, u->query, sizeof(a->query)); 255 else 256 strlcpy(a->query, b->query, sizeof(a->query)); 257 258 return 0; 259 } 260 261 int 262 uri_format(char *buf, size_t bufsiz, struct uri *u) 263 { 264 return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s", 265 u->proto, 266 u->userinfo[0] ? u->userinfo : "", 267 u->userinfo[0] ? "@" : "", 268 u->host, 269 u->port[0] ? ":" : "", 270 u->port, 271 u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "", 272 u->path, 273 u->query[0] ? "?" : "", 274 u->query, 275 u->fragment[0] ? "#" : "", 276 u->fragment); 277 } 278 279 /* Splits fields in the line buffer by replacing TAB separators with NUL ('\0') 280 * terminators and assign these fields as pointers. If there are less fields 281 * than expected then the field is an empty string constant. */ 282 void 283 parseline(char *line, char *fields[FieldLast]) 284 { 285 char *prev, *s; 286 size_t i; 287 288 for (prev = line, i = 0; 289 (s = strchr(prev, '\t')) && i < FieldLast - 1; 290 i++) { 291 *s = '\0'; 292 fields[i] = prev; 293 prev = s + 1; 294 } 295 fields[i++] = prev; 296 /* make non-parsed fields empty. */ 297 for (; i < FieldLast; i++) 298 fields[i] = ""; 299 } 300 301 /* Parse time to time_t, assumes time_t is signed, ignores fractions. */ 302 int 303 strtotime(const char *s, time_t *t) 304 { 305 long long l; 306 char *e; 307 308 errno = 0; 309 l = strtoll(s, &e, 10); 310 if (errno || *s == '\0' || *e) 311 return -1; 312 313 /* NOTE: the type long long supports the 64-bit range. If time_t is 314 64-bit it is "2038-ready", otherwise it is truncated/wrapped. */ 315 if (t) 316 *t = (time_t)l; 317 318 return 0; 319 } 320 321 /* Escape characters below as HTML 2.0 / XML 1.0. */ 322 void 323 xmlencode(const char *s, FILE *fp) 324 { 325 for (; *s; ++s) { 326 switch (*s) { 327 case '<': fputs("<", fp); break; 328 case '>': fputs(">", fp); break; 329 case '\'': fputs("'", fp); break; 330 case '&': fputs("&", fp); break; 331 case '"': fputs(""", fp); break; 332 default: putc(*s, fp); 333 } 334 } 335 } 336 337 /* print `len` columns of characters. If string is shorter pad the rest with 338 * characters `pad`. */ 339 void 340 printutf8pad(FILE *fp, const char *s, size_t len, int pad) 341 { 342 wchar_t wc; 343 size_t col = 0, i, slen; 344 int inc, rl, w; 345 346 if (!len) 347 return; 348 349 slen = strlen(s); 350 for (i = 0; i < slen; i += inc) { 351 inc = 1; /* next byte */ 352 if ((unsigned char)s[i] < 32) { 353 continue; /* skip control characters */ 354 } else if ((unsigned char)s[i] >= 127) { 355 rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i : 4); 356 inc = rl; 357 if (rl < 0) { 358 mbtowc(NULL, NULL, 0); /* reset state */ 359 inc = 1; /* invalid, seek next byte */ 360 w = 1; /* replacement char is one width */ 361 } else if ((w = wcwidth(wc)) == -1) { 362 continue; 363 } 364 365 if (col + w > len || (col + w == len && s[i + inc])) { 366 fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */ 367 col++; 368 break; 369 } else if (rl < 0) { 370 fputs(UTF_INVALID_SYMBOL, fp); /* replacement */ 371 col++; 372 continue; 373 } 374 fwrite(&s[i], 1, rl, fp); 375 col += w; 376 } else { 377 /* optimization: simple ASCII character */ 378 if (col + 1 > len || (col + 1 == len && s[i + 1])) { 379 fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */ 380 col++; 381 break; 382 } 383 putc(s[i], fp); 384 col++; 385 } 386 387 } 388 for (; col < len; ++col) 389 putc(pad, fp); 390 }