Skip to content

Commit 0777daa

Browse files
committed
pcre2test: refactor replication implementations
Allow for a nicer fallback in case of syntax errors in the pattern implementation while avoiding negative values masquerading as positive ones. While at it, make sure that in non LP64 environments UINT32_MAX is still considered valid. Update the data implementation to behave similarly and cleanup fixes that had accumlated unorganically.
1 parent 5281d67 commit 0777daa

File tree

4 files changed

+170
-85
lines changed

4 files changed

+170
-85
lines changed

doc/pcre2test.1

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,9 @@ part of the file. For example:
563563
\e[abc]{4}
564564
.sp
565565
is converted to "abcabcabcabc". This feature does not support nesting. To
566-
include a closing square bracket in the characters, code it as \ex5D.
566+
include a closing square bracket in the characters, code it with \ex followed
567+
by two hexadecimal digits that represent that letter in the character set used
568+
(e.g. \ex5D for ASCII or UTF-8).
567569
.P
568570
A backslash followed by an equals sign marks the end of the subject string and
569571
the start of a modifier list. For example:
@@ -1286,7 +1288,7 @@ the subject string. For more detail of REG_STARTEND, see the
12861288
\fBpcre2posix\fP
12871289
.\"
12881290
documentation. If the subject string contains binary zeros (coded as escapes
1289-
such as \ex{00} because \fBpcre2test\fP does not support actual binary zeros in
1291+
such as \ex00 because \fBpcre2test\fP does not support actual binary zeros in
12901292
its input), you must use \fBposix_startend\fP to specify its length.
12911293
.
12921294
.

src/pcre2test.c

Lines changed: 80 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -343,12 +343,24 @@ ints. They are defined not to be shorter. */
343343
#define U32OVERFLOW(x) (x == UINT32_MAX)
344344
#endif
345345

346+
#if ULONG_MAX > UINT32_MAX
347+
#define U32OVERFLOWE(x) U32OVERFLOW(x)
348+
#else
349+
#define U32OVERFLOWE(x) (errno != 0 && x == UINT32_MAX)
350+
#endif
351+
346352
#if LONG_MAX > INT32_MAX
347-
#define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
353+
#define S32OVERFLOW(x) (x > INT32_MAX || INT32_MIN > x)
348354
#else
349355
#define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
350356
#endif
351357

358+
#if LONG_MAX > INT32_MAX
359+
#define S32OVERFLOWE(x) S32OVERFLOW(x)
360+
#else
361+
#define S32OVERFLOWE(x) (errno != 0 && (x == INT32_MAX || x == INT32_MIN))
362+
#endif
363+
352364
/* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
353365
pcre2_intmodedep.h, which is where mode-dependent macros and structures are
354366
defined. We can now include it for each supported code unit width. Because
@@ -6007,7 +6019,6 @@ static int
60076019
process_pattern(void)
60086020
{
60096021
BOOL utf;
6010-
uint32_t k;
60116022
uint8_t *p = buffer;
60126023
unsigned int delimiter = *p++;
60136024
int errorcode;
@@ -6100,7 +6111,7 @@ if (pat_patctl.convert_type != CONVERT_UNSET &&
61006111
/* Check for mutually exclusive control modifiers. At present, these are all in
61016112
the first control word. */
61026113

6103-
for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
6114+
for (uint32_t k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
61046115
{
61056116
uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
61066117
if (c != 0 && c != (c & (~c+1)))
@@ -6198,54 +6209,62 @@ else if ((pat_patctl.control & CTL_EXPAND) != 0)
61986209
uint8_t *pc = pp;
61996210
uint32_t count = 1;
62006211
size_t length = 1;
6212+
size_t m = 1;
62016213

62026214
/* Check for replication syntax; if not found, the defaults just set will
6203-
prevail and one character will be copied. */
6215+
prevail and `length` characters will be copied once. */
62046216

62056217
if (pp[0] == '\\' && pp[1] == '[')
62066218
{
62076219
uint8_t *pe;
6208-
for (pe = pp + 2; *pe != 0; pe++)
6220+
6221+
/* Start the capture after skipping the prefix. This pointer will need
6222+
to be rolled back if a syntax problem is found later. */
6223+
pc += 2;
6224+
for (pe = pc; *pe != 0; pe++)
62096225
{
6210-
if (pe[0] == ']' && pe[1] == '{')
6226+
if (pe[0] == ']' && pe[1] == '{' && isdigit(pe[2]))
62116227
{
6212-
size_t clen = pe - pc - 2;
6213-
uint32_t i = 0;
62146228
unsigned long uli;
62156229
char *endptr;
62166230

6217-
pe += 2;
6218-
uli = strtoul((const char *)pe, &endptr, 10);
6219-
if (U32OVERFLOW(uli))
6231+
errno = 0;
6232+
uli = strtoul((const char *)pe + 2, &endptr, 10);
6233+
if (uli == 0 || U32OVERFLOWE(uli))
62206234
{
6221-
fprintf(outfile, "** Pattern repeat count too large\n");
6235+
fprintf(outfile, "** Invalid replication count (1..UINT_MAX)\n");
62226236
return PR_SKIP;
62236237
}
62246238

6225-
i = (uint32_t)uli;
6226-
pe = (uint8_t *)endptr;
6227-
if (*pe == '}')
6239+
if (*endptr == '}') count = (uint32_t)uli;
6240+
length = pe - pc;
6241+
if (length >= SIZE_MAX/count)
62286242
{
6229-
if (i == 0)
6230-
{
6231-
fprintf(outfile, "** Zero repeat not allowed\n");
6232-
return PR_SKIP;
6233-
}
6234-
pc += 2;
6235-
count = i;
6236-
length = clen;
6237-
pp = pe;
6238-
break;
6243+
fprintf(outfile, "** Expanded content too large\n");
6244+
return PR_SKIP;
62396245
}
6246+
pe = (uint8_t *)endptr;
6247+
break;
62406248
}
62416249
}
6250+
if (*pe != '}')
6251+
{
6252+
pc -= 2;
6253+
length = pe - pc;
6254+
}
6255+
m = length * count;
6256+
pp = pe;
6257+
6258+
/* The main loop increments pp, so if we are already at the end of
6259+
the pattern need to backtrack to avoid jumping over the NUL. */
6260+
if (*pe == 0) pp--;
62426261
}
62436262

62446263
/* Add to output. If the buffer is too small expand it. The function for
62456264
expanding buffers always keeps buffer and pbuffer8 in step as far as their
62466265
size goes. */
62476266

6248-
while (pt + count * length > pbuffer8 + pbuffer8_size)
6267+
while (pt + m > pbuffer8 + pbuffer8_size)
62496268
{
62506269
size_t pc_offset = pc - buffer;
62516270
size_t pp_offset = pp - buffer;
@@ -7990,7 +8009,7 @@ process_data(void)
79908009
{
79918010
PCRE2_SIZE len, ulen, arg_ulen;
79928011
uint32_t gmatched;
7993-
uint32_t c, k;
8012+
uint32_t c;
79948013
uint32_t g_notempty = 0;
79958014
uint8_t *p, *pp, *start_rep;
79968015
size_t needlen;
@@ -8121,14 +8140,15 @@ while ((c = *p++) != 0)
81218140

81228141
if (*p++ != '{')
81238142
{
8124-
fprintf(outfile, "** Expected '{' after \\[....]\n");
8143+
fprintf(outfile, "** Expected '{' after \\[...]\n");
81258144
return PR_OK;
81268145
}
81278146

8147+
errno = 0;
81288148
li = strtol((const char *)p, &endptr, 10);
8129-
if (S32OVERFLOW(li))
8149+
if (!isdigit(*p) || errno != 0 || li < 1 || S32OVERFLOW(li))
81308150
{
8131-
fprintf(outfile, "** Repeat count too large\n");
8151+
fprintf(outfile, "** Replication count missing or invalid (1..INT_MAX)\n");
81328152
return PR_OK;
81338153
}
81348154
i = (int)li;
@@ -8140,44 +8160,41 @@ while ((c = *p++) != 0)
81408160
return PR_OK;
81418161
}
81428162

8143-
if (i-- <= 0)
8163+
if (i-- > 1)
81448164
{
8145-
fprintf(outfile, "** Zero or negative repeat not allowed\n");
8146-
return PR_OK;
8147-
}
8148-
8149-
replen = CAST8VAR(q) - start_rep;
8150-
if (i > 0 && replen > (SIZE_MAX - needlen) / i)
8151-
{
8152-
fprintf(outfile, "** Expanded content too large\n");
8153-
return PR_OK;
8154-
}
8155-
needlen += replen * i;
8156-
8157-
if (needlen >= dbuffer_size)
8158-
{
8159-
size_t qoffset = CAST8VAR(q) - dbuffer;
8160-
size_t rep_offset = start_rep - dbuffer;
8161-
while (needlen >= dbuffer_size)
8165+
replen = CAST8VAR(q) - start_rep;
8166+
if (replen >= (SIZE_MAX - needlen) / i)
81628167
{
8163-
if (dbuffer_size < SIZE_MAX/2) dbuffer_size *= 2;
8164-
else dbuffer_size = needlen + 1;
8168+
fprintf(outfile, "** Expanded content too large\n");
8169+
return PR_OK;
81658170
}
8166-
dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
8167-
if (dbuffer == NULL)
8171+
needlen += replen * i;
8172+
8173+
if (needlen >= dbuffer_size)
81688174
{
8169-
fprintf(stderr, "pcre2test: realloc(%" SIZ_FORM ") failed\n",
8170-
dbuffer_size);
8171-
exit(1);
8175+
size_t qoffset = CAST8VAR(q) - dbuffer;
8176+
size_t rep_offset = start_rep - dbuffer;
8177+
while (needlen >= dbuffer_size)
8178+
{
8179+
if (dbuffer_size < SIZE_MAX / 2) dbuffer_size *= 2;
8180+
else dbuffer_size = needlen + 1;
8181+
}
8182+
dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
8183+
if (dbuffer == NULL)
8184+
{
8185+
fprintf(stderr, "pcre2test: realloc(%" SIZ_FORM ") failed\n",
8186+
dbuffer_size);
8187+
exit(1);
8188+
}
8189+
SETCASTPTR(q, dbuffer + qoffset);
8190+
start_rep = dbuffer + rep_offset;
81728191
}
8173-
SETCASTPTR(q, dbuffer + qoffset);
8174-
start_rep = dbuffer + rep_offset;
8175-
}
81768192

8177-
while (i-- > 0)
8178-
{
8179-
memcpy(CAST8VAR(q), start_rep, replen);
8180-
SETPLUS(q, replen/code_unit_size);
8193+
while (i-- > 0)
8194+
{
8195+
memcpy(CAST8VAR(q), start_rep, replen);
8196+
SETPLUS(q, replen/code_unit_size);
8197+
}
81818198
}
81828199

81838200
start_rep = NULL;
@@ -8458,7 +8475,7 @@ if (dat_datctl.substitute_skip != 0 || dat_datctl.substitute_stop != 0)
84588475
/* Check for mutually exclusive modifiers. At present, these are all in the
84598476
first control word. */
84608477

8461-
for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
8478+
for (uint32_t k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
84628479
{
84638480
c = dat_datctl.control & exclusive_dat_controls[k];
84648481
if (c != 0 && c != (c & (~c+1)))

testdata/testinput2

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4589,9 +4589,6 @@
45894589
/(abc)*/
45904590
\[abc]{1}
45914591

4592-
/(abc)*/
4593-
\[abc]{0}
4594-
45954592
/^/gm
45964593
\n\n\n
45974594

@@ -5136,12 +5133,29 @@ a)"xI
51365133
\= Expect no match
51375134
abc
51385135

5136+
# Expand tests
5137+
5138+
/(abc)*/
5139+
\[abc]{0}
5140+
\[abc]{}
5141+
51395142
/aaa/
5140-
\[abc]{10000000000000000000000000000}
5141-
\[a]{3}
5143+
\[X]{-10}
5144+
\[abc]{10000000000000000000000000000}
5145+
\[a]{3}
51425146

51435147
/\[AB]{6000000000000000000000}/expand
51445148

5149+
/\[AB]{0000000000000000000000}/expand
5150+
5151+
/a\[b]{-1}/BI,expand
5152+
5153+
/a\[b]{/BI,expand
5154+
5155+
/a\[/BI,expand
5156+
5157+
//BI,expand
5158+
51455159
# Hex uses pattern length, not zero-terminated. This tests for overrunning
51465160
# the given length of a pattern.
51475161

@@ -6324,9 +6338,6 @@ a)"xI
63246338
/[Aa]{2,3}/BI
63256339
aabcd
63266340

6327-
--
6328-
\[X]{-10}
6329-
63306341
# Check imposition of maximum by match_data_create().
63316342

63326343
/abcd/

0 commit comments

Comments
 (0)