Skip to content

Commit 9c39a33

Browse files
committed
regmatch: Use new utf8_to_uv
There really is no good option for continuing a pattern match when we discover that something is illegal UTF-8. This changes to die when that happens.
1 parent 194d4ea commit 9c39a33

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

regexec.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7135,10 +7135,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
71357135

71367136
while (chars) {
71377137
if (utf8_target) {
7138-
/* XXX This assumes the length is well-formed, as
7139-
* does the UTF8SKIP below */
7140-
uvc = utf8n_to_uvchr((U8*)uc, UTF8_MAXLEN, &len,
7141-
uniflags);
7138+
(void) utf8_to_uv_flags((U8*)uc, uc + UTF8_MAXLEN,
7139+
&uvc, &len,
7140+
( uniflags|UTF8_DIE_IF_MALFORMED
7141+
|UTF8_NO_CONFIDENCE_IN_CURLEN_));
71427142
uc += len;
71437143
}
71447144
else {
@@ -7150,8 +7150,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
71507150
while (foldlen) {
71517151
if (!--chars)
71527152
break;
7153-
uvc = utf8n_to_uvchr(uscan, foldlen, &len,
7154-
uniflags);
7153+
(void) utf8_to_uv_flags((U8*)uscan,
7154+
uscan + foldlen, &uvc, &len,
7155+
uniflags|UTF8_DIE_IF_MALFORMED);
71557156
uscan += len;
71567157
foldlen -= len;
71577158
}

0 commit comments

Comments
 (0)