Skip to content

Commit 9eed62a

Browse files
committed
Use locale-independent alternatives to isalpha/isalnum/isctrl
- Avoid registering/detecting stream wrappers in locale-independent ways. - Avoid this in libmagic for detecting magic file headers. I don't believe these should be locale dependent. - Avoid locale dependence for http/ftp/network protocols. - Avoid locale dependence for Windows drive letter names in zend_virtual_cwd - Make parse_url stop depending on locale Related to https://bugs.php.net/bug.php?id=52923 iscntrl is locale-dependent which seems to corrupt certain bytes. Somewhat related to https://wiki.php.net/rfc/strtolower-ascii but I don't think most of these should have been locale-dependent in the first place - the code may not have considered locales E.g. on Linux, `setlocale(LC_ALL, 'de_DE');` (if the locale is installed and it succeeds) will have some values for alpha/cntrl in the range 128-256 where the C locale has no values. To avoid this locale-dependence in older php versions, applications can set `setlocale(LC_CTYPE, 'C')`.
1 parent 9b968ff commit 9eed62a

30 files changed

+113
-46
lines changed

Zend/zend_compile.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1890,7 +1890,7 @@ ZEND_API size_t zend_dirname(char *path, size_t len)
18901890
/* Note that on Win32 CWD is per drive (heritage from CP/M).
18911891
* This means dirname("c:foo") maps to "c:." or "c:" - which means CWD on C: drive.
18921892
*/
1893-
if ((2 <= len) && isalpha((int)((unsigned char *)path)[0]) && (':' == path[1])) {
1893+
if ((2 <= len) && zend_isalpha_ascii((int)((unsigned char *)path)[0]) && (':' == path[1])) {
18941894
/* Skip over the drive spec (if any) so as not to change */
18951895
path += 2;
18961896
len_adjust += 2;

Zend/zend_operators.c

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,70 @@ ZEND_API const unsigned char zend_toupper_map[256] = {
118118
0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
119119
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
120120
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
121-
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
121+
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
122+
};
123+
124+
/* ctype's isalpha varies based on locale, which is not what we want for many use cases.
125+
* This is what it'd be in the "C" locale. */
126+
ZEND_API const bool zend_isalpha_map[256] = {
127+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
128+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
129+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
130+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
131+
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
132+
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
133+
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
134+
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
135+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
136+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
137+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
138+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
139+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
140+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
141+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
142+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
122143
};
123144

145+
/* ctype's isalnum is isalpha + isdigit(0-9) */
146+
ZEND_API const bool zend_isalnum_map[256] = {
147+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
148+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
149+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
150+
1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
151+
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
152+
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
153+
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
154+
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
155+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
156+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
157+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
158+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
159+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
160+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
161+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
162+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
163+
};
164+
165+
/* ctype's iscntrl varies based on locale, which is not what we want for many use cases.
166+
* This is what it'd be in the "C" locale. */
167+
ZEND_API const bool zend_iscntrl_map[256] = {
168+
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
169+
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
170+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
171+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
172+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
173+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
174+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
175+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
176+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
177+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
178+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
179+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
180+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
181+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
182+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
183+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
184+
};
124185

125186
/**
126187
* Functions using locale lowercase:

Zend/zend_operators.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,9 +435,15 @@ ZEND_API int ZEND_FASTCALL string_locale_compare_function(zval *op1, zval *op2);
435435

436436
ZEND_API extern const unsigned char zend_tolower_map[256];
437437
ZEND_API extern const unsigned char zend_toupper_map[256];
438+
ZEND_API extern const bool zend_isalpha_map[256];
439+
ZEND_API extern const bool zend_isalnum_map[256];
440+
ZEND_API extern const bool zend_iscntrl_map[256];
438441

439442
#define zend_tolower_ascii(c) (zend_tolower_map[(unsigned char)(c)])
440443
#define zend_toupper_ascii(c) (zend_toupper_map[(unsigned char)(c)])
444+
#define zend_isalpha_ascii(c) (zend_isalpha_map[(unsigned char)(c)])
445+
#define zend_isalnum_ascii(c) (zend_isalnum_map[(unsigned char)(c)])
446+
#define zend_iscntrl_ascii(c) (zend_iscntrl_map[(unsigned char)(c)])
441447

442448
ZEND_API void ZEND_FASTCALL zend_str_tolower(char *str, size_t length);
443449
ZEND_API void ZEND_FASTCALL zend_str_toupper(char *str, size_t length);

Zend/zend_virtual_cwd.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ typedef unsigned short mode_t;
8282
#define IS_UNC_PATH(path, len) \
8383
(len >= 2 && IS_SLASH(path[0]) && IS_SLASH(path[1]))
8484
#define IS_ABSOLUTE_PATH(path, len) \
85-
(len >= 2 && (/* is local */isalpha(path[0]) && path[1] == ':' || /* is UNC */IS_SLASH(path[0]) && IS_SLASH(path[1])))
85+
(len >= 2 && (/* is local */zend_isalpha_ascii(path[0]) && path[1] == ':' || /* is UNC */IS_SLASH(path[0]) && IS_SLASH(path[1])))
8686

8787
#else
8888
#ifdef HAVE_DIRENT_H

ext/fileinfo/libmagic/apprentice.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ get_standard_integer_type(const char *l, const char **t)
299299
{
300300
int type;
301301

302-
if (isalpha(CAST(unsigned char, l[1]))) {
302+
if (zend_isalpha_ascii(CAST(unsigned char, l[1]))) {
303303
switch (l[1]) {
304304
case 'C':
305305
/* "dC" and "uC" */
@@ -1187,7 +1187,7 @@ load_1(struct magic_set *ms, int action, const char *fn, int *errs,
11871187
continue;
11881188
}
11891189
if ((*bang[i].fun)(ms, &me,
1190-
line + bang[i].len + 2,
1190+
line + bang[i].len + 2,
11911191
len - bang[i].len - 2) != 0) {
11921192
(*errs)++;
11931193
continue;
@@ -1419,7 +1419,7 @@ apprentice_load(struct magic_set *ms, const char *fn, int action)
14191419
/* coalesce per file arrays into a single one, if needed */
14201420
if (mset[j].count == 0)
14211421
continue;
1422-
1422+
14231423
if (coalesce_entries(ms, mset[j].me, mset[j].count,
14241424
&map->magic[j], &map->nmagic[j]) == -1) {
14251425
errs++;
@@ -2071,7 +2071,7 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line,
20712071
if (*l == 'd')
20722072
m->type = get_standard_integer_type(l, &l);
20732073
else if (*l == 's'
2074-
&& !isalpha(CAST(unsigned char, l[1]))) {
2074+
&& !zend_isalpha_ascii(CAST(unsigned char, l[1]))) {
20752075
m->type = FILE_STRING;
20762076
++l;
20772077
}
@@ -2287,7 +2287,7 @@ parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line,
22872287
private int
22882288
goodchar(unsigned char x, const char *extra)
22892289
{
2290-
return (isascii(x) && isalnum(x)) || strchr(extra, x);
2290+
return (zend_isalnum_ascii(x)) || strchr(extra, x);
22912291
}
22922292

22932293
private int

ext/fileinfo/libmagic/compress.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
237237
return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
238238

239239
for (p = buf; *p; p++)
240-
if (!isalnum(*p))
240+
if (!zend_isalnum_ascii(*p))
241241
*p = '-';
242242

243243
return file_printf(ms, "application/x-decompression-error-%s-%s",

ext/fileinfo/libmagic/encoding.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ looks_ucs32(const unsigned char *bf, size_t nbytes, file_unichar_t *ubf,
536536
| (CAST(file_unichar_t, bf[i]) << 24);
537537
else
538538
ubf[(*ulen)++] = CAST(file_unichar_t, bf[i + 0])
539-
| (CAST(file_unichar_t, bf[i + 1]) << 8)
539+
| (CAST(file_unichar_t, bf[i + 1]) << 8)
540540
| (CAST(file_unichar_t, bf[i + 2]) << 16)
541541
| (CAST(file_unichar_t, bf[i + 3]) << 24);
542542

ext/fileinfo/libmagic/funcs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ file_checkfmt(char *msg, size_t mlen, const char *fmt)
120120
return -1;
121121
}
122122

123-
if (!isalpha((unsigned char)*p)) {
123+
if (!zend_isalpha_ascii((unsigned char)*p)) {
124124
if (msg)
125125
snprintf(msg, mlen, "bad format char: %c", *p);
126126
return -1;

ext/filter/logical_filters.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -528,21 +528,21 @@ static int _php_filter_validate_domain(char * domain, int len, zend_long flags)
528528
}
529529

530530
/* First char must be alphanumeric */
531-
if(*s == '.' || (hostname && !isalnum((int)*(unsigned char *)s))) {
531+
if(*s == '.' || (hostname && !zend_isalnum_ascii((int)*(unsigned char *)s))) {
532532
return 0;
533533
}
534534

535535
while (s < e) {
536536
if (*s == '.') {
537537
/* The first and the last character of a label must be alphanumeric */
538-
if (*(s + 1) == '.' || (hostname && (!isalnum((int)*(unsigned char *)(s - 1)) || !isalnum((int)*(unsigned char *)(s + 1))))) {
538+
if (*(s + 1) == '.' || (hostname && (!zend_isalnum_ascii((int)*(unsigned char *)(s - 1)) || !zend_isalnum_ascii((int)*(unsigned char *)(s + 1))))) {
539539
return 0;
540540
}
541541

542542
/* Reset label length counter */
543543
i = 1;
544544
} else {
545-
if (i > 63 || (hostname && *s != '-' && !isalnum((int)*(unsigned char *)s))) {
545+
if (i > 63 || (hostname && *s != '-' && !zend_isalnum_ascii((int)*(unsigned char *)s))) {
546546
return 0;
547547
}
548548

@@ -569,7 +569,7 @@ static int is_userinfo_valid(zend_string *str)
569569
const char *valid = "-._~!$&'()*+,;=:";
570570
const char *p = ZSTR_VAL(str);
571571
while (p - ZSTR_VAL(str) < ZSTR_LEN(str)) {
572-
if (isalpha(*p) || isdigit(*p) || strchr(valid, *p)) {
572+
if (zend_isalnum_ascii(*p) || strchr(valid, *p)) {
573573
p++;
574574
} else if (*p == '%' && p - ZSTR_VAL(str) <= ZSTR_LEN(str) - 3 && isdigit(*(p+1)) && isxdigit(*(p+2))) {
575575
p += 3;

ext/gd/libgd/gd_xbm.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,8 @@ void gdImageXbmCtx(gdImagePtr image, char* file_name, int fg, gdIOCtx * out)
190190
name = estrdup("image");
191191
} else {
192192
for (i=0; i<l; i++) {
193-
/* only in C-locale isalnum() would work */
194-
if (!isupper(name[i]) && !islower(name[i]) && !isdigit(name[i])) {
193+
/* Locale-independent check */
194+
if (!zend_isalnum_ascii(name[i])) {
195195
name[i] = '_';
196196
}
197197
}

0 commit comments

Comments
 (0)