commit d5ee385b4b5f19934a00408a2addc70f965ea4a9
parent 880256b8bfde746cd54993f3abcb4dc648895af7
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Tue, 29 Mar 2022 11:03:54 +0200
compatibility: reduce the assumption the builtin libc locale is ASCII-compatible
This is not clearly defined by the C99 standard.
Define ctype-like macros to force it to be ASCII / UTF-8 (not extended ASCII or
something like noticed on OpenBSD 3.8).
(In practise modern libc libraries are all ASCII and UTF-8-compatible. Otherwise
this would break many programs)
Diffstat:
7 files changed, 48 insertions(+), 46 deletions(-)
diff --git a/sfeed.c b/sfeed.c
@@ -246,7 +246,7 @@ gettag(enum FeedType feedtype, const char *name, size_t namelen)
static char *
ltrim(const char *s)
{
- for (; isspace((unsigned char)*s); s++)
+ for (; ISSPACE((unsigned char)*s); s++)
;
return (char *)s;
}
@@ -256,7 +256,7 @@ rtrim(const char *s)
{
const char *e;
- for (e = s + strlen(s); e > s && isspace((unsigned char)*(e - 1)); e--)
+ for (e = s + strlen(s); e > s && ISSPACE((unsigned char)*(e - 1)); e--)
;
return (char *)e;
}
@@ -341,7 +341,7 @@ printtrimmed(const char *s)
p = ltrim(s);
e = rtrim(p);
for (; *p && p != e; p++) {
- if (isspace((unsigned char)*p))
+ if (ISSPACE((unsigned char)*p))
putchar(' '); /* any whitespace to space */
else if (!ISCNTRL((unsigned char)*p))
/* ignore other control chars */
@@ -514,20 +514,20 @@ gettzoffset(const char *s)
long tzhour = 0, tzmin = 0;
size_t i;
- for (; isspace((unsigned char)*s); s++)
+ for (; ISSPACE((unsigned char)*s); s++)
;
switch (*s) {
case '-': /* offset */
case '+':
- for (i = 0, p = s + 1; i < 2 && isdigit((unsigned char)*p); i++, p++)
+ for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
tzhour = (tzhour * 10) + (*p - '0');
if (*p == ':')
p++;
- for (i = 0; i < 2 && isdigit((unsigned char)*p); i++, p++)
+ for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
tzmin = (tzmin * 10) + (*p - '0');
return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : 1);
default: /* timezone name */
- for (i = 0; isalpha((unsigned char)s[i]); i++)
+ for (i = 0; ISALPHA((unsigned char)s[i]); i++)
;
if (i != 3)
return 0;
@@ -565,35 +565,35 @@ parsetime(const char *s, long long *tp)
int va[6] = { 0 }, i, j, v, vi;
size_t m;
- for (; isspace((unsigned char)*s); s++)
+ for (; ISSPACE((unsigned char)*s); s++)
;
- if (!isdigit((unsigned char)*s) && !isalpha((unsigned char)*s))
+ if (!ISDIGIT((unsigned char)*s) && !ISALPHA((unsigned char)*s))
return -1;
- if (isdigit((unsigned char)s[0]) &&
- isdigit((unsigned char)s[1]) &&
- isdigit((unsigned char)s[2]) &&
- isdigit((unsigned char)s[3])) {
+ if (ISDIGIT((unsigned char)s[0]) &&
+ ISDIGIT((unsigned char)s[1]) &&
+ ISDIGIT((unsigned char)s[2]) &&
+ ISDIGIT((unsigned char)s[3])) {
/* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" */
vi = 0;
} else {
/* format: "[%a, ]%d %b %Y %H:%M:%S" */
/* parse "[%a, ]%d %b %Y " part, then use time parsing as above */
- for (; isalpha((unsigned char)*s); s++)
+ for (; ISALPHA((unsigned char)*s); s++)
;
- for (; isspace((unsigned char)*s); s++)
+ for (; ISSPACE((unsigned char)*s); s++)
;
if (*s == ',')
s++;
- for (; isspace((unsigned char)*s); s++)
+ for (; ISSPACE((unsigned char)*s); s++)
;
- for (v = 0, i = 0; i < 2 && isdigit((unsigned char)*s); s++, i++)
+ for (v = 0, i = 0; i < 2 && ISDIGIT((unsigned char)*s); s++, i++)
v = (v * 10) + (*s - '0');
va[2] = v; /* day */
- for (; isspace((unsigned char)*s); s++)
+ for (; ISSPACE((unsigned char)*s); s++)
;
/* end of word month */
- for (j = 0; isalpha((unsigned char)s[j]); j++)
+ for (j = 0; ISALPHA((unsigned char)s[j]); j++)
;
/* check month name */
if (j < 3 || j > 9)
@@ -609,15 +609,15 @@ parsetime(const char *s, long long *tp)
}
if (m >= 12)
return -1; /* no month found */
- for (; isspace((unsigned char)*s); s++)
+ for (; ISSPACE((unsigned char)*s); s++)
;
- for (v = 0, i = 0; i < 4 && isdigit((unsigned char)*s); s++, i++)
+ for (v = 0, i = 0; i < 4 && ISDIGIT((unsigned char)*s); s++, i++)
v = (v * 10) + (*s - '0');
/* obsolete short year: RFC2822 4.3 */
if (i <= 3)
v += (v >= 0 && v <= 49) ? 2000 : 1900;
va[0] = v; /* year */
- for (; isspace((unsigned char)*s); s++)
+ for (; ISSPACE((unsigned char)*s); s++)
;
/* parse only regular time part, see below */
vi = 3;
@@ -626,20 +626,20 @@ parsetime(const char *s, long long *tp)
/* parse time parts (and possibly remaining date parts) */
for (; *s && vi < 6; vi++) {
for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
- isdigit((unsigned char)*s); s++, i++) {
+ ISDIGIT((unsigned char)*s); s++, i++) {
v = (v * 10) + (*s - '0');
}
va[vi] = v;
if ((vi < 2 && *s == '-') ||
- (vi == 2 && (*s == 'T' || isspace((unsigned char)*s))) ||
+ (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s))) ||
(vi > 2 && *s == ':'))
s++;
}
/* skip milliseconds in for example: "%Y-%m-%dT%H:%M:%S.000Z" */
if (*s == '.') {
- for (s++; isdigit((unsigned char)*s); s++)
+ for (s++; ISDIGIT((unsigned char)*s); s++)
;
}
diff --git a/sfeed_opml_import.c b/sfeed_opml_import.c
@@ -1,4 +1,3 @@
-#include <ctype.h>
#include <stdio.h>
#include <strings.h>
diff --git a/sfeed_web.c b/sfeed_web.c
@@ -1,4 +1,3 @@
-#include <ctype.h>
#include <stdio.h>
#include <strings.h>
diff --git a/sfeed_xmlenc.c b/sfeed_xmlenc.c
@@ -1,4 +1,3 @@
-#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
@@ -26,10 +25,10 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
return;
for (; *v; v++) {
- if (isalpha((unsigned char)*v) ||
- isdigit((unsigned char)*v) ||
+ if (ISALPHA((unsigned char)*v) ||
+ ISDIGIT((unsigned char)*v) ||
*v == '.' || *v == ':' || *v == '-' || *v == '_')
- putchar(tolower((unsigned char)*v));
+ putchar(TOLOWER((unsigned char)*v));
}
}
diff --git a/util.c b/util.c
@@ -1,4 +1,3 @@
-#include <ctype.h>
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
@@ -66,8 +65,8 @@ strcasestr(const char *h, const char *n)
return (char *)h;
for (; *h; ++h) {
- for (i = 0; n[i] && tolower((unsigned char)n[i]) ==
- tolower((unsigned char)h[i]); ++i)
+ for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) ==
+ TOLOWER((unsigned char)h[i]); ++i)
;
if (n[i] == '\0')
return (char *)h;
@@ -82,7 +81,7 @@ uri_hasscheme(const char *s)
{
const char *p = s;
- for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
+ for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
*p == '+' || *p == '-' || *p == '.'; p++)
;
/* scheme, except if empty and starts with ":" then it is a path */
@@ -109,7 +108,7 @@ uri_parse(const char *s, struct uri *u)
}
/* scheme / protocol part */
- for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
+ for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
*p == '+' || *p == '-' || *p == '.'; p++)
;
/* scheme, except if empty and starts with ":" then it is a path */
diff --git a/util.h b/util.h
@@ -8,8 +8,12 @@
#define unveil(p1,p2) 0
#endif
-/* control-character in the ASCII range 0-127: compatible with UTF-8 */
+/* ctype-like macros, but always compatible with ASCII / UTF-8 */
+#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
#define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f)
+#define ISDIGIT(c) (((unsigned)c) - '0' < 10)
+#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
+#define TOLOWER(c) ((((unsigned)c) - 'A' < 26) ? ((c) | 32) : (c))
#undef strcasestr
char *strcasestr(const char *, const char *);
diff --git a/xml.c b/xml.c
@@ -1,4 +1,3 @@
-#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
@@ -6,6 +5,9 @@
#include "xml.h"
+#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
+#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
+
static void
xml_parseattrs(XMLParser *x)
{
@@ -13,7 +15,7 @@ xml_parseattrs(XMLParser *x)
int c, endsep, endname = 0, valuestart = 0;
while ((c = GETNEXT()) != EOF) {
- if (isspace(c)) {
+ if (ISSPACE(c)) {
if (namelen)
endname = 1;
continue;
@@ -23,7 +25,7 @@ xml_parseattrs(XMLParser *x)
x->name[namelen] = '\0';
valuestart = 1;
endname = 1;
- } else if (namelen && ((endname && !valuestart && isalpha(c)) || (c == '>' || c == '/'))) {
+ } else if (namelen && ((endname && !valuestart && ISALPHA(c)) || (c == '>' || c == '/'))) {
/* attribute without value */
x->name[namelen] = '\0';
if (x->xmlattrstart)
@@ -44,7 +46,7 @@ xml_parseattrs(XMLParser *x)
if (c == '\'' || c == '"') {
endsep = c;
} else {
- endsep = ' '; /* isspace() */
+ endsep = ' '; /* ISSPACE() */
goto startvalue;
}
@@ -58,7 +60,7 @@ startvalue:
x->data[0] = c;
valuelen = 1;
while ((c = GETNEXT()) != EOF) {
- if (c == endsep || (endsep == ' ' && (c == '>' || isspace(c))))
+ if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c))))
break;
if (valuelen < sizeof(x->data) - 1)
x->data[valuelen++] = c;
@@ -79,7 +81,7 @@ startvalue:
break;
}
}
- } else if (c != endsep && !(endsep == ' ' && (c == '>' || isspace(c)))) {
+ } else if (c != endsep && !(endsep == ' ' && (c == '>' || ISSPACE(c)))) {
if (valuelen < sizeof(x->data) - 1) {
x->data[valuelen++] = c;
} else {
@@ -90,7 +92,7 @@ startvalue:
valuelen = 1;
}
}
- if (c == endsep || (endsep == ' ' && (c == '>' || isspace(c)))) {
+ if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) {
x->data[valuelen] = '\0';
if (x->xmlattr)
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
@@ -328,7 +330,7 @@ xml_parse(XMLParser *x)
while ((c = GETNEXT()) != EOF) {
if (c == '/')
x->isshorttag = 1; /* short tag */
- else if (c == '>' || isspace(c)) {
+ else if (c == '>' || ISSPACE(c)) {
x->tag[x->taglen] = '\0';
if (isend) { /* end tag, starts with </ */
if (x->xmltagend)
@@ -339,7 +341,7 @@ xml_parse(XMLParser *x)
/* start tag */
if (x->xmltagstart)
x->xmltagstart(x, x->tag, x->taglen);
- if (isspace(c))
+ if (ISSPACE(c))
xml_parseattrs(x);
if (x->xmltagstartparsed)
x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);