Skip to content

Use compiler builtins to detect "simple common cases" in pp_add, pp_subtract, and pp_multiply #23503

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: blead
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 182 additions & 0 deletions inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -3405,6 +3405,188 @@ S_lossless_NV_to_IV(const NV nv, IV *ivp)
return FALSE;
}

/*
* S_iv_{add,sub,mul}_may_overflow(a, b, p) virtually compute "a <op> b"
* (where <op> is +, -, or *) in infinite precision, and, if the result
* is (or may be) not representable with IV, return true.
* Otherwise (no overflow), store the result to *p and return false.
* These functions allow false positives (so their names contain "may")
* to speed up simple common cases.
*/

/* Define IV_*_OVERFLOW_IS_EXPENSIVE below to nonzero value
* if strict overflow checks are too expensive
* (for example, for CPUs that have no hardware overflow detection flags).
* If these macros have nonzero value, or overflow-checking compiler intrinsics
* are not available, good-old heuristics (with some false positives)
* will be used. */
# ifndef IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE
# define IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE 0
# endif
# ifndef IV_MUL_OVERFLOW_IS_EXPENSIVE
/* Strict overflow check for IV multiplication is generally expensive
* when IV is a multi-word integer. */
# define IV_MUL_OVERFLOW_IS_EXPENSIVE (IVSIZE > LONGSIZE)
# endif

# if defined(I_STDCKDINT) && !IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE
/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
# define S_iv_add_may_overflow(il, ir, result) ckd_add(result, il, ir)
# elif defined(HAS_BUILTIN_ADD_OVERFLOW) && !IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE
# define S_iv_add_may_overflow __builtin_add_overflow
# else
PERL_STATIC_INLINE bool
S_iv_add_may_overflow (IV il, IV ir, IV *const result)
{
/* topl and topr hold only 2 bits */
PERL_UINT_FAST8_T const topl = ((UV)il) >> (UVSIZE * 8 - 2);
PERL_UINT_FAST8_T const topr = ((UV)ir) >> (UVSIZE * 8 - 2);

/* if both are in a range that can't under/overflow, do a simple integer
* add: if the top of both numbers are 00 or 11, then it's safe */
if (!( ((topl+1) | (topr+1)) & 2)) {
*result = il + ir;
return false;
}
return true; /* addition may overflow */
}
# endif

/*
* S_uv_{add,sub,mul}_overflow(a, b, p) are similar, but the results are UV
* and they should perform strict overflow check (no false positives).
*/

# if defined(I_STDCKDINT)
/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
# define S_uv_add_overflow(auv, buv, result) ckd_add(result, auv, buv)
# elif defined(HAS_BUILTIN_ADD_OVERFLOW)
# define S_uv_add_overflow __builtin_add_overflow
# else
PERL_STATIC_INLINE bool
S_uv_add_overflow (UV auv, UV buv, UV *const result)
{
/* (auv + buv) < auv means that the addition wrapped around,
i.e. overflowed. Note that unsigned integer overflow is well-defined
in standard C to wrap around, in constrast to signed integer overflow
whose behaviour is undefined. */
return (*result = auv + buv) < auv;
}
# endif

# if defined(I_STDCKDINT) && !IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE
/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
# define S_iv_sub_may_overflow(il, ir, result) ckd_sub(result, il, ir)
# elif defined(HAS_BUILTIN_SUB_OVERFLOW) && !IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE
# define S_iv_sub_may_overflow __builtin_sub_overflow
# else
PERL_STATIC_INLINE bool
S_iv_sub_may_overflow (IV il, IV ir, IV *const result)
{
PERL_UINT_FAST8_T const topl = ((UV)il) >> (UVSIZE * 8 - 2);
PERL_UINT_FAST8_T const topr = ((UV)ir) >> (UVSIZE * 8 - 2);

/* if both are in a range that can't under/overflow, do a simple integer
* subtract: if the top of both numbers are 00 or 11, then it's safe */
if (!( ((topl+1) | (topr+1)) & 2)) {
*result = il - ir;
return false;
}
return true; /* subtraction may overflow */
}
# endif

# if defined(I_STDCKDINT)
/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
# define S_uv_sub_overflow(auv, buv, result) ckd_sub(result, auv, buv)
# elif defined(HAS_BUILTIN_SUB_OVERFLOW)
# define S_uv_sub_overflow __builtin_sub_overflow
# else
PERL_STATIC_INLINE bool
S_uv_sub_overflow (UV auv, UV buv, UV *const result)
{
return (*result = auv - buv) > auv;
}
# endif

# if defined(I_STDCKDINT) && !IV_MUL_OVERFLOW_IS_EXPENSIVE
/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
# define S_iv_mul_may_overflow(il, ir, result) ckd_mul(result, il, ir)
# elif defined(HAS_BUILTIN_MUL_OVERFLOW) && !IV_MUL_OVERFLOW_IS_EXPENSIVE
# define S_iv_mul_may_overflow __builtin_mul_overflow
# else
PERL_STATIC_INLINE bool
S_iv_mul_may_overflow (IV il, IV ir, IV *const result)
{
UV const topl = ((UV)il) >> (UVSIZE * 4 - 1);
UV const topr = ((UV)ir) >> (UVSIZE * 4 - 1);

/* if both are in a range that can't under/overflow, do a simple integer
* multiply: if the top halves(*) of both numbers are 00...00 or 11...11,
* then it's safe.
* (*) for 32-bits, the "top half" is the top 17 bits,
* for 64-bits, its 33 bits */
if (!(
((topl+1) | (topr+1))
& ( (((UV)1) << (UVSIZE * 4 + 1)) - 2) /* 11..110 */
)) {
*result = il * ir;
return false;
}
return true; /* multiplication may overflow */
}
# endif

# if defined(I_STDCKDINT)
/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
# define S_uv_mul_overflow(auv, buv, result) ckd_mul(result, auv, buv)
# elif defined(HAS_BUILTIN_MUL_OVERFLOW)
# define S_uv_mul_overflow __builtin_mul_overflow
# else
PERL_STATIC_INLINE bool
S_uv_mul_overflow (UV auv, UV buv, UV *const result)
{
const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
const UV botmask = ~topmask;

# if UVSIZE > LONGSIZE && UVSIZE <= 2 * LONGSIZE
/* If UV is double-word integer, declare these variables as single-word
integers to help compiler to avoid double-word multiplication. */
unsigned long alow, ahigh, blow, bhigh;
# else
UV alow, ahigh, blow, bhigh;
# endif

/* If this does sign extension on unsigned it's time for plan B */
ahigh = auv >> (4 * sizeof (UV));
alow = auv & botmask;
bhigh = buv >> (4 * sizeof (UV));
blow = buv & botmask;

if (ahigh && bhigh)
/* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
which is overflow. */
return true;

UV product_middle = 0;
if (ahigh || bhigh) {
/* One operand is large, 1 small */
/* Either ahigh or bhigh is zero here, so the addition below
can't overflow. */
product_middle = (UV)ahigh * blow + (UV)alow * bhigh;
if (product_middle & topmask)
return true;
/* OK, product_middle won't lose bits when we shift it. */
product_middle <<= 4 * sizeof (UV);
}
/* else: eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
so the unsigned multiply cannot overflow. */

UV product_low = (UV)alow * blow;
return S_uv_add_overflow(product_middle, product_low, result);
}
# endif

#endif

/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
Expand Down
6 changes: 6 additions & 0 deletions pod/perldelta.pod
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ There may well be none in a stable release.

=item *

Simple (non-overflowing) addition (C<+>), subtraction (C<->) and
multiplication (C<*>) of IVs are slightly sped up, as long as
sufficient underlying C compiler support is available.

=item *

XXX

=back
Expand Down
84 changes: 8 additions & 76 deletions pp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1336,23 +1336,12 @@ PP(pp_multiply)
U32 flags = (svl->sv_flags & svr->sv_flags);
if (flags & SVf_IOK) {
/* both args are simple IVs */
UV topl, topr;
IV result;
il = SvIVX(svl);
ir = SvIVX(svr);
do_iv:
topl = ((UV)il) >> (UVSIZE * 4 - 1);
topr = ((UV)ir) >> (UVSIZE * 4 - 1);

/* if both are in a range that can't under/overflow, do a
* simple integer multiply: if the top halves(*) of both numbers
* are 00...00 or 11...11, then it's safe.
* (*) for 32-bits, the "top half" is the top 17 bits,
* for 64-bits, its 33 bits */
if (!(
((topl+1) | (topr+1))
& ( (((UV)1) << (UVSIZE * 4 + 1)) - 2) /* 11..110 */
)) {
TARGi(il * ir, 0); /* args not GMG, so can't be tainted */
if (!S_iv_mul_may_overflow(il, ir, &result)) {
TARGi(result, 0); /* args not GMG, so can't be tainted */
goto ret;
}
goto generic;
Expand Down Expand Up @@ -1388,12 +1377,9 @@ PP(pp_multiply)
if (SvIV_please_nomg(svl)) {
bool auvok = SvUOK(svl);
bool buvok = SvUOK(svr);
const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
UV alow;
UV ahigh;
UV blow;
UV bhigh;
UV product;

if (auvok) {
alow = SvUVX(svl);
Expand All @@ -1420,19 +1406,7 @@ PP(pp_multiply)
}
}

/* If this does sign extension on unsigned it's time for plan B */
ahigh = alow >> (4 * sizeof (UV));
alow &= botmask;
bhigh = blow >> (4 * sizeof (UV));
blow &= botmask;
if (ahigh && bhigh) {
NOOP;
/* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
which is overflow. Drop to NVs below. */
} else if (!ahigh && !bhigh) {
/* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
so the unsigned multiply cannot overflow. */
const UV product = alow * blow;
if (!S_uv_mul_overflow(alow, blow, &product)) {
if (auvok == buvok) {
/* -ve * -ve or +ve * +ve gives a +ve result. */
TARGu(product, 1);
Expand All @@ -1442,42 +1416,6 @@ PP(pp_multiply)
TARGi(NEGATE_2IV(product), 1);
goto ret;
} /* else drop to NVs below. */
} else {
/* One operand is large, 1 small */
UV product_middle;
if (bhigh) {
/* swap the operands */
ahigh = bhigh;
bhigh = blow; /* bhigh now the temp var for the swap */
blow = alow;
alow = bhigh;
}
/* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
multiplies can't overflow. shift can, add can, -ve can. */
product_middle = ahigh * blow;
if (!(product_middle & topmask)) {
/* OK, (ahigh * blow) won't lose bits when we shift it. */
UV product_low;
product_middle <<= (4 * sizeof (UV));
product_low = alow * blow;

/* as for pp_add, UV + something mustn't get smaller.
IIRC ANSI mandates this wrapping *behaviour* for
unsigned whatever the actual representation*/
product_low += product_middle;
if (product_low >= product_middle) {
/* didn't overflow */
if (auvok == buvok) {
/* -ve * -ve or +ve * +ve gives a +ve result. */
TARGu(product_low, 1);
goto ret;
} else if (product_low <= ABS_IV_MIN) {
/* -ve result, which could overflow an IV */
TARGi(NEGATE_2IV(product_low), 1);
goto ret;
} /* else drop to NVs below. */
}
} /* product_middle too large */
} /* ahigh && bhigh */
} /* SvIOK(svl) */
} /* SvIOK(svr) */
Expand Down Expand Up @@ -1929,18 +1867,12 @@ PP(pp_subtract)
U32 flags = (svl->sv_flags & svr->sv_flags);
if (flags & SVf_IOK) {
/* both args are simple IVs */
UV topl, topr;
IV result;
il = SvIVX(svl);
ir = SvIVX(svr);
do_iv:
topl = ((UV)il) >> (UVSIZE * 8 - 2);
topr = ((UV)ir) >> (UVSIZE * 8 - 2);

/* if both are in a range that can't under/overflow, do a
* simple integer subtract: if the top of both numbers
* are 00 or 11, then it's safe */
if (!( ((topl+1) | (topr+1)) & 2)) {
TARGi(il - ir, 0); /* args not GMG, so can't be tainted */
if (!S_iv_sub_may_overflow(il, ir, &result)) {
TARGi(result, 0); /* args not GMG, so can't be tainted */
goto ret;
}
goto generic;
Expand Down
12 changes: 3 additions & 9 deletions pp_hot.c
Original file line number Diff line number Diff line change
Expand Up @@ -1827,18 +1827,12 @@ PP(pp_add)
U32 flags = (svl->sv_flags & svr->sv_flags);
if (flags & SVf_IOK) {
/* both args are simple IVs */
UV topl, topr;
IV result;
il = SvIVX(svl);
ir = SvIVX(svr);
do_iv:
topl = ((UV)il) >> (UVSIZE * 8 - 2);
topr = ((UV)ir) >> (UVSIZE * 8 - 2);

/* if both are in a range that can't under/overflow, do a
* simple integer add: if the top of both numbers
* are 00 or 11, then it's safe */
if (!( ((topl+1) | (topr+1)) & 2)) {
TARGi(il + ir, 0); /* args not GMG, so can't be tainted */
if (!S_iv_add_may_overflow(il, ir, &result)) {
TARGi(result, 0); /* args not GMG, so can't be tainted */
goto ret;
}
goto generic;
Expand Down
26 changes: 26 additions & 0 deletions t/op/64bitint.t
Original file line number Diff line number Diff line change
Expand Up @@ -469,4 +469,30 @@ cmp_ok 0x3ffffffffffffffe % -0xc000000000000000, '==', -0x8000000000000002, 'mo
cmp_ok 0x3fffffffffffffff % -0xc000000000000000, '==', -0x8000000000000001, 'modulo is (IV_MIN-1)';
cmp_ok 0x4000000000000000 % -0xc000000000000000, '==', -0x8000000000000000, 'modulo is IV_MIN';

# Arithmetic close to IV overflow

# These had been handled in generic (slower) code, but now in fast path
# (as "simple common case"). Either way, these tests should pass.
$q = 9223372036854775800;
cmp_ok 5 + $q, '==', 9223372036854775805, "5 + $q";
cmp_ok $q - -5, '==', 9223372036854775805, "$q - -5";
$q = 1111111111111111111;
cmp_ok $q * 5, '==', 5555555555555555555, "$q * 5";

# IV <op> IV -> UV/NV promotion

$q = 7777777777777777777;
$r = 2222222222222222223;
# Note 10000000000000000000 can be represented accurately in both
# IEEE double (binary64; 0x1.158e460913dp+63) and decimal format (1e+19)
cmp_ok $q + $r, '==', 10000000000000000000, 'IV + IV promotes to UV';
cmp_ok -$q + -$r, '==', -10000000000000000000, 'IV + IV promotes to NV';
cmp_ok $q - -$r, '==', 10000000000000000000, 'IV - IV promotes to UV';
cmp_ok -$q - $r, '==', -10000000000000000000, 'IV - IV promotes to NV';
$q = 3000000000;
$r = 4000000000;
cmp_ok $q * $r, '==', 12000000000000000000, 'IV * IV promotes to UV';
cmp_ok $q * -$r, '==', -12000000000000000000, 'IV * IV promotes to UV then NV';
cmp_ok +($q * 2) * $r, '==', 24000000000000000000, 'IV * IV promotes to NV';

done_testing();
Loading