From f288bce4b2b4205e8b40056317c7cf0525e9033b Mon Sep 17 00:00:00 2001 From: TAKAI Kousuke <62541129+t-a-k@users.noreply.github.com> Date: Sat, 26 Apr 2025 02:15:40 +0900 Subject: [PATCH 1/3] pp_{add,subtract,multiply}: use __builtin_{add,sub,mul}_overflow if available This will hopefully make the code faster and smaller, and make more cases to be handled as "simple common cases". Note that this change uses HAS_BUILTIN_{ADD,SUB,MUL}_OVERFLOW macros which have already been defined in config.h but seem not to have been used by existing code. t/op/64bitint.t: Add tests to exercise "simple common cases". Note that these tests should pass even before this change. --- inline.h | 176 ++++++++++++++++++++++++++++++++++++++++++++++ pod/perldelta.pod | 6 ++ pp.c | 84 +++------------------- pp_hot.c | 12 +--- t/op/64bitint.t | 26 +++++++ 5 files changed, 219 insertions(+), 85 deletions(-) diff --git a/inline.h b/inline.h index bd428e011ac6..cace7315547d 100644 --- a/inline.h +++ b/inline.h @@ -3405,6 +3405,182 @@ S_lossless_NV_to_IV(const NV nv, IV *ivp) return FALSE; } +/* + * S_iv_{add,sub,mul}_may_overflow(a, b, p) virtually compute "a b" + * (where is +, -, or *) in infinite precision, and, if the result + * is (or may be) not representable with IV, return true. + * Otherwise (no overflow), store the result to *p and return false. + * These functions allow false positives (so their names contain "may") + * to speed up simple common cases. + */ + +/* Define IV_*_OVERFLOW_IS_EXPENSIVE below to nonzero value + * if strict overflow checks are too expensive + * (for example, for CPUs that has no hardware overflow detection flag). + * If these macro has nonzero value, or overflow-checking compiler intrinsics + * are not available, good-old heuristics (with some false positives) + * will be used. */ +# ifndef IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE +# define IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE 0 +# endif +# ifndef IV_MUL_OVERFLOW_IS_EXPENSIVE +/* Strict overflow check for IV multiplication is generally expensive + * when IV is a multi-word integer. */ +# define IV_MUL_OVERFLOW_IS_EXPENSIVE (IVSIZE > LONGSIZE) +# endif + +# if defined(I_STDCKDINT) && !IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE +/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */ +# define S_iv_add_may_overflow(il, ir, result) ckd_add(result, il, ir) +# elif defined(HAS_BUILTIN_ADD_OVERFLOW) && !IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE +# define S_iv_add_may_overflow __builtin_add_overflow +# else +PERL_STATIC_INLINE bool +S_iv_add_may_overflow (IV il, IV ir, IV *const result) +{ + /* topl and topr hold only 2 bits */ + PERL_UINT_FAST8_T const topl = ((UV)il) >> (UVSIZE * 8 - 2); + PERL_UINT_FAST8_T const topr = ((UV)ir) >> (UVSIZE * 8 - 2); + + /* if both are in a range that can't under/overflow, do a simple integer + * add: if the top of both numbers are 00 or 11, then it's safe */ + if (!( ((topl+1) | (topr+1)) & 2)) { + *result = il + ir; + return false; + } + return true; /* addition may overflow */ +} +# endif + +/* + * S_uv_{add,sub,mul}_overflow(a, b, p) are similar, but the results are UV + * and they should perform strict overflow check (no false positives). + */ + +# if defined(I_STDCKDINT) +/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */ +# define S_uv_add_overflow(auv, buv, result) ckd_add(result, auv, buv) +# elif defined(HAS_BUILTIN_ADD_OVERFLOW) +# define S_uv_add_overflow __builtin_add_overflow +# else +PERL_STATIC_INLINE bool +S_uv_add_overflow (UV auv, UV buv, UV *const result) +{ + return (*result = auv + buv) < auv; +} +# endif + +# if defined(I_STDCKDINT) && !IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE +/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */ +# define S_iv_sub_may_overflow(il, ir, result) ckd_sub(result, il, ir) +# elif defined(HAS_BUILTIN_SUB_OVERFLOW) && !IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE +# define S_iv_sub_may_overflow __builtin_sub_overflow +# else +PERL_STATIC_INLINE bool +S_iv_sub_may_overflow (IV il, IV ir, IV *const result) +{ + PERL_UINT_FAST8_T const topl = ((UV)il) >> (UVSIZE * 8 - 2); + PERL_UINT_FAST8_T const topr = ((UV)ir) >> (UVSIZE * 8 - 2); + + /* if both are in a range that can't under/overflow, do a simple integer + * subtract: if the top of both numbers are 00 or 11, then it's safe */ + if (!( ((topl+1) | (topr+1)) & 2)) { + *result = il - ir; + return false; + } + return true; /* subtraction may overflow */ +} +# endif + +# if defined(I_STDCKDINT) +/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */ +# define S_uv_sub_overflow(auv, buv, result) ckd_sub(result, auv, buv) +# elif defined(HAS_BUILTIN_SUB_OVERFLOW) +# define S_uv_sub_overflow __builtin_sub_overflow +# else +PERL_STATIC_INLINE bool +S_uv_sub_overflow (UV auv, UV buv, UV *const result) +{ + return (*result = auv - buv) > auv; +} +# endif + +# if defined(I_STDCKDINT) && !IV_MUL_OVERFLOW_IS_EXPENSIVE +/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */ +# define S_iv_mul_may_overflow(il, ir, result) ckd_mul(result, il, ir) +# elif defined(HAS_BUILTIN_MUL_OVERFLOW) && !IV_MUL_OVERFLOW_IS_EXPENSIVE +# define S_iv_mul_may_overflow __builtin_mul_overflow +# else +PERL_STATIC_INLINE bool +S_iv_mul_may_overflow (IV il, IV ir, IV *const result) +{ + UV const topl = ((UV)il) >> (UVSIZE * 4 - 1); + UV const topr = ((UV)ir) >> (UVSIZE * 4 - 1); + + /* if both are in a range that can't under/overflow, do a simple integer + * multiply: if the top halves(*) of both numbers are 00...00 or 11...11, + * then it's safe. + * (*) for 32-bits, the "top half" is the top 17 bits, + * for 64-bits, its 33 bits */ + if (!( + ((topl+1) | (topr+1)) + & ( (((UV)1) << (UVSIZE * 4 + 1)) - 2) /* 11..110 */ + )) { + *result = il * ir; + return false; + } + return true; /* multiplication may overflow */ +} +# endif + +# if defined(I_STDCKDINT) +/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */ +# define S_uv_mul_overflow(auv, buv, result) ckd_mul(result, auv, buv) +# elif defined(HAS_BUILTIN_MUL_OVERFLOW) +# define S_uv_mul_overflow __builtin_mul_overflow +# else +PERL_STATIC_INLINE bool +S_uv_mul_overflow (UV auv, UV buv, UV *const result) +{ + const UV topmask = (~ (UV)0) << (4 * sizeof (UV)); + const UV botmask = ~topmask; + +# if UVSIZE > LONGSIZE && UVSIZE <= 2 * LONGSIZE + unsigned long alow, ahigh, blow, bhigh; +# else + UV alow, ahigh, blow, bhigh; +# endif + + /* If this does sign extension on unsigned it's time for plan B */ + ahigh = auv >> (4 * sizeof (UV)); + alow = auv & botmask; + bhigh = buv >> (4 * sizeof (UV)); + blow = buv & botmask; + + if (ahigh && bhigh) + /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000 + which is overflow. */ + return true; + + UV product_middle = 0; + if (ahigh || bhigh) { + /* One operand is large, 1 small */ + /* Either ahigh or bhigh is zero here, so the addition below + can't overflow. */ + product_middle = ahigh * blow + alow * bhigh; + if (product_middle & topmask) + return true; + /* OK, product_middle won't lose bits when we shift it. */ + product_middle <<= 4 * sizeof (UV); + } + /* else: eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001 + so the unsigned multiply cannot overflow. */ + + UV product_low = alow * blow; + return S_uv_add_overflow(product_middle, product_low, result); +} +# endif + #endif /* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */ diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 495b14061d77..b0bbf9433623 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -89,6 +89,12 @@ There may well be none in a stable release. =item * +Simple (non-overflowing) addition (C<+>), subtraction (C<->) and +multiplication (C<*>) of IVs are slightly sped up, as long as +sufficient underlying C compiler support is available. + +=item * + XXX =back diff --git a/pp.c b/pp.c index 189b2ddbdd34..1e39cbf437b2 100644 --- a/pp.c +++ b/pp.c @@ -1336,23 +1336,12 @@ PP(pp_multiply) U32 flags = (svl->sv_flags & svr->sv_flags); if (flags & SVf_IOK) { /* both args are simple IVs */ - UV topl, topr; + IV result; il = SvIVX(svl); ir = SvIVX(svr); do_iv: - topl = ((UV)il) >> (UVSIZE * 4 - 1); - topr = ((UV)ir) >> (UVSIZE * 4 - 1); - - /* if both are in a range that can't under/overflow, do a - * simple integer multiply: if the top halves(*) of both numbers - * are 00...00 or 11...11, then it's safe. - * (*) for 32-bits, the "top half" is the top 17 bits, - * for 64-bits, its 33 bits */ - if (!( - ((topl+1) | (topr+1)) - & ( (((UV)1) << (UVSIZE * 4 + 1)) - 2) /* 11..110 */ - )) { - TARGi(il * ir, 0); /* args not GMG, so can't be tainted */ + if (!S_iv_mul_may_overflow(il, ir, &result)) { + TARGi(result, 0); /* args not GMG, so can't be tainted */ goto ret; } goto generic; @@ -1388,12 +1377,9 @@ PP(pp_multiply) if (SvIV_please_nomg(svl)) { bool auvok = SvUOK(svl); bool buvok = SvUOK(svr); - const UV topmask = (~ (UV)0) << (4 * sizeof (UV)); - const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV))); UV alow; - UV ahigh; UV blow; - UV bhigh; + UV product; if (auvok) { alow = SvUVX(svl); @@ -1420,19 +1406,7 @@ PP(pp_multiply) } } - /* If this does sign extension on unsigned it's time for plan B */ - ahigh = alow >> (4 * sizeof (UV)); - alow &= botmask; - bhigh = blow >> (4 * sizeof (UV)); - blow &= botmask; - if (ahigh && bhigh) { - NOOP; - /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000 - which is overflow. Drop to NVs below. */ - } else if (!ahigh && !bhigh) { - /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001 - so the unsigned multiply cannot overflow. */ - const UV product = alow * blow; + if (!S_uv_mul_overflow(alow, blow, &product)) { if (auvok == buvok) { /* -ve * -ve or +ve * +ve gives a +ve result. */ TARGu(product, 1); @@ -1442,42 +1416,6 @@ PP(pp_multiply) TARGi(NEGATE_2IV(product), 1); goto ret; } /* else drop to NVs below. */ - } else { - /* One operand is large, 1 small */ - UV product_middle; - if (bhigh) { - /* swap the operands */ - ahigh = bhigh; - bhigh = blow; /* bhigh now the temp var for the swap */ - blow = alow; - alow = bhigh; - } - /* now, ((ahigh * blow) << half_UV_len) + (alow * blow) - multiplies can't overflow. shift can, add can, -ve can. */ - product_middle = ahigh * blow; - if (!(product_middle & topmask)) { - /* OK, (ahigh * blow) won't lose bits when we shift it. */ - UV product_low; - product_middle <<= (4 * sizeof (UV)); - product_low = alow * blow; - - /* as for pp_add, UV + something mustn't get smaller. - IIRC ANSI mandates this wrapping *behaviour* for - unsigned whatever the actual representation*/ - product_low += product_middle; - if (product_low >= product_middle) { - /* didn't overflow */ - if (auvok == buvok) { - /* -ve * -ve or +ve * +ve gives a +ve result. */ - TARGu(product_low, 1); - goto ret; - } else if (product_low <= ABS_IV_MIN) { - /* -ve result, which could overflow an IV */ - TARGi(NEGATE_2IV(product_low), 1); - goto ret; - } /* else drop to NVs below. */ - } - } /* product_middle too large */ } /* ahigh && bhigh */ } /* SvIOK(svl) */ } /* SvIOK(svr) */ @@ -1929,18 +1867,12 @@ PP(pp_subtract) U32 flags = (svl->sv_flags & svr->sv_flags); if (flags & SVf_IOK) { /* both args are simple IVs */ - UV topl, topr; + IV result; il = SvIVX(svl); ir = SvIVX(svr); do_iv: - topl = ((UV)il) >> (UVSIZE * 8 - 2); - topr = ((UV)ir) >> (UVSIZE * 8 - 2); - - /* if both are in a range that can't under/overflow, do a - * simple integer subtract: if the top of both numbers - * are 00 or 11, then it's safe */ - if (!( ((topl+1) | (topr+1)) & 2)) { - TARGi(il - ir, 0); /* args not GMG, so can't be tainted */ + if (!S_iv_sub_may_overflow(il, ir, &result)) { + TARGi(result, 0); /* args not GMG, so can't be tainted */ goto ret; } goto generic; diff --git a/pp_hot.c b/pp_hot.c index 3f6cd29611bc..b0d5861c4f2f 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -1827,18 +1827,12 @@ PP(pp_add) U32 flags = (svl->sv_flags & svr->sv_flags); if (flags & SVf_IOK) { /* both args are simple IVs */ - UV topl, topr; + IV result; il = SvIVX(svl); ir = SvIVX(svr); do_iv: - topl = ((UV)il) >> (UVSIZE * 8 - 2); - topr = ((UV)ir) >> (UVSIZE * 8 - 2); - - /* if both are in a range that can't under/overflow, do a - * simple integer add: if the top of both numbers - * are 00 or 11, then it's safe */ - if (!( ((topl+1) | (topr+1)) & 2)) { - TARGi(il + ir, 0); /* args not GMG, so can't be tainted */ + if (!S_iv_add_may_overflow(il, ir, &result)) { + TARGi(result, 0); /* args not GMG, so can't be tainted */ goto ret; } goto generic; diff --git a/t/op/64bitint.t b/t/op/64bitint.t index dcaa94b7bbab..bbf52f92bdf6 100644 --- a/t/op/64bitint.t +++ b/t/op/64bitint.t @@ -469,4 +469,30 @@ cmp_ok 0x3ffffffffffffffe % -0xc000000000000000, '==', -0x8000000000000002, 'mo cmp_ok 0x3fffffffffffffff % -0xc000000000000000, '==', -0x8000000000000001, 'modulo is (IV_MIN-1)'; cmp_ok 0x4000000000000000 % -0xc000000000000000, '==', -0x8000000000000000, 'modulo is IV_MIN'; +# Arithmetic close to IV overflow + +# These had been handled in generic (slower) code, but now in fast path +# (as "simple common case"). Either way, these tests should pass. +$q = 9223372036854775800; +cmp_ok 5 + $q, '==', 9223372036854775805, "5 + $q"; +cmp_ok $q - -5, '==', 9223372036854775805, "$q - -5"; +$q = 1111111111111111111; +cmp_ok $q * 5, '==', 5555555555555555555, "$q * 5"; + +# IV IV -> UV/NV promotion + +$q = 7777777777777777777; +$r = 2222222222222222223; +# Note 10000000000000000000 can be represented accurately in both +# IEEE double (binary64; 0x1.158e460913dp+63) and decimal format (1e+19) +cmp_ok $q + $r, '==', 10000000000000000000, 'IV + IV promotes to UV'; +cmp_ok -$q + -$r, '==', -10000000000000000000, 'IV + IV promotes to NV'; +cmp_ok $q - -$r, '==', 10000000000000000000, 'IV - IV promotes to UV'; +cmp_ok -$q - $r, '==', -10000000000000000000, 'IV - IV promotes to NV'; +$q = 3000000000; +$r = 4000000000; +cmp_ok $q * $r, '==', 12000000000000000000, 'IV * IV promotes to UV'; +cmp_ok $q * -$r, '==', -12000000000000000000, 'IV * IV promotes to UV then NV'; +cmp_ok +($q * 2) * $r, '==', 24000000000000000000, 'IV * IV promotes to NV'; + done_testing(); From 4f3d212d416088ba0352529c24a0b0e3c7cd66c9 Mon Sep 17 00:00:00 2001 From: TAKAI Kousuke <62541129+t-a-k@users.noreply.github.com> Date: Thu, 31 Jul 2025 17:11:46 +0900 Subject: [PATCH 2/3] S_uv_mul_overflow in inline.h: Add casts to make multiplications done in UV If C compiler doesn't know __builtin_mul_overflow, S_uv_mul_overflow will be implemented with fallback "long multiplication" algorithm, but it had a bug that elemental multiplications were done in unsigned long precision instead of UV precision. It will lead wrong result when unsigned long is narrower than UV (for example -Duse64bitint on 32-bit platform). --- inline.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/inline.h b/inline.h index cace7315547d..4b1fccbf1d3a 100644 --- a/inline.h +++ b/inline.h @@ -3546,6 +3546,8 @@ S_uv_mul_overflow (UV auv, UV buv, UV *const result) const UV botmask = ~topmask; # if UVSIZE > LONGSIZE && UVSIZE <= 2 * LONGSIZE + /* If UV is double-word integer, declare these variables as single-word + integers to help compiler to avoid double-word multiplication. */ unsigned long alow, ahigh, blow, bhigh; # else UV alow, ahigh, blow, bhigh; @@ -3567,7 +3569,7 @@ S_uv_mul_overflow (UV auv, UV buv, UV *const result) /* One operand is large, 1 small */ /* Either ahigh or bhigh is zero here, so the addition below can't overflow. */ - product_middle = ahigh * blow + alow * bhigh; + product_middle = (UV)ahigh * blow + (UV)alow * bhigh; if (product_middle & topmask) return true; /* OK, product_middle won't lose bits when we shift it. */ @@ -3576,7 +3578,7 @@ S_uv_mul_overflow (UV auv, UV buv, UV *const result) /* else: eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001 so the unsigned multiply cannot overflow. */ - UV product_low = alow * blow; + UV product_low = (UV)alow * blow; return S_uv_add_overflow(product_middle, product_low, result); } # endif From 1e6ea20e26b84d313d555b6f7d4f0b3ee08da640 Mon Sep 17 00:00:00 2001 From: TAKAI Kousuke <62541129+t-a-k@users.noreply.github.com> Date: Wed, 6 Aug 2025 02:13:09 +0900 Subject: [PATCH 3/3] inline.h: Comments fixed and added (intended to be squashed before merge) --- inline.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/inline.h b/inline.h index 4b1fccbf1d3a..104a44adc596 100644 --- a/inline.h +++ b/inline.h @@ -3416,8 +3416,8 @@ S_lossless_NV_to_IV(const NV nv, IV *ivp) /* Define IV_*_OVERFLOW_IS_EXPENSIVE below to nonzero value * if strict overflow checks are too expensive - * (for example, for CPUs that has no hardware overflow detection flag). - * If these macro has nonzero value, or overflow-checking compiler intrinsics + * (for example, for CPUs that have no hardware overflow detection flags). + * If these macros have nonzero value, or overflow-checking compiler intrinsics * are not available, good-old heuristics (with some false positives) * will be used. */ # ifndef IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE @@ -3466,6 +3466,10 @@ S_iv_add_may_overflow (IV il, IV ir, IV *const result) PERL_STATIC_INLINE bool S_uv_add_overflow (UV auv, UV buv, UV *const result) { + /* (auv + buv) < auv means that the addition wrapped around, + i.e. overflowed. Note that unsigned integer overflow is well-defined + in standard C to wrap around, in constrast to signed integer overflow + whose behaviour is undefined. */ return (*result = auv + buv) < auv; } # endif