diff --git a/src/libc/ldexpf.src b/src/libc/ldexpf.src index 0d3edeafe..70f892696 100644 --- a/src/libc/ldexpf.src +++ b/src/libc/ldexpf.src @@ -16,159 +16,94 @@ _scalbn := _ldexpf else -if 1 +; (set to 0 or 1) avoid returning negative zero on underflow with Ti's floats +__ldexpf_avoid_negative_zero := 1 -; NOTE: since Ti floats are used, negative zero will not be returned unless the -; input was negative zero. -; -; normal inputs are handled correctly, unless the output is subnormal -; subnormal inputs/outputs return zero and set ERANGE and FE_INEXACT -; zero/infinite/NaN inputs are handled correctly -_ldexpf: -_ldexp: -_scalbnf: -_scalbn: - ld iy, 0 - lea bc, iy + 0 - add iy, sp +; ldexpf behaviour: +; - signed zero, infinity, and NaN inputs are returned unmodified +; - ERRNO and FE_INEXACT are set if a finite value becomes zero or infinite +; - FE_INEXACT is set if rounding occured +;------------------------------------------------------------------------------- + + private __ldexpf_helper +__ldexpf_helper: +.maybe_subnormal: + ; A = zero, carry = signbit + rra ; restore signbit and clear carry + adc hl, bc ; BC is zero +.ret_self: ld hl, (iy + 3) ; mant - add hl, hl - ld a, (iy + 6) ; expon - adc a, a - jr z, .maybe_subnormal - inc a - jr z, .ret_self ; inf NaN - dec a - ex de, hl - ld hl, (iy + 9) ; scale - ld c, a - add hl, bc ; add expon - ld a, l + ret z ; return zero/inf/NaN + dec bc ; BC is now -1 +; .subnormal_input: + ; BC is -1 here bit 7, (iy + 11) ; scale sign - jr z, .scale_up -.scale_down: - ; HL is not INT_MIN here - dec hl + ld de, (iy + 9) ; scale + jr nz, _ldexpf.move_subnormal_down +; .move_subnormal_up: +.norm_loop: add hl, hl - jr nc, .finish ; expon > 0 - ; expon <= 0 or subnormal -.underflow_to_zero: - ld hl, ___fe_cur_env - set 5, (hl) ; FE_INEXACT + jr c, .normalized + ex de, hl + add hl, bc ; --scale + ex de, hl + jr c, .norm_loop +; .still_subnormal: + ; DE is -1 here + ; saves 8F for this path at a cost of 3 bytes: if 0 - ld a, (iy + 6) - and a, $80 ; copysign -else - xor a, a ; avoid returning negative zero with Ti's floats + inc de ; ld e, 0 + jr _ldexpf.finish_subnormal end if - sbc hl, hl -.common_erange: - ld bc, 5 ; ERANGE - ld (_errno), bc - ld e, a - ret -.overflow_to_inf: - ld hl, $800000 - ld a, (iy + 6) - or a, $7F ; copysign - jr .common_erange - -.scale_up: - ld bc, -255 - add hl, bc - jr c, .overflow_to_inf -.finish: - ld l, a +.normalized: + inc de ; don't touch the Z flag ex de, hl - ; signbit(A) E:UHL >>= 1 - ld a, (iy + 6) ; expon - push hl - rla - rr e - rr (iy - 1) - pop hl - rr h - rr l - ret - -.maybe_subnormal: - dec bc ; BC is now -1 - add hl, bc - jr c, .underflow_to_zero - ; return zero -.ret_self: - ld hl, (iy + 3) - ld e, (iy + 6) - ret + ; Z is set here + jr _ldexpf.scale_up_subnormal -else - -; normal inputs are handled correctly, unless the output is subnormal -; subnormal inputs are handled correctly for positive scaling values -; subnormal outputs return zero and set ERANGE and FE_INEXACT for negative scaling values -; zero/infinite/NaN inputs are handled correctly -_ldexpf: -_ldexp: -_scalbnf: +;------------------------------------------------------------------------------- +; When the input and output are normal: +; scaling up : 60F + 12R + 4W + 2 +; scaling down: 60F + 12R + 4W + 4 _scalbn: +_scalbnf: +_ldexp: +_ldexpf: ld iy, 0 lea bc, iy + 0 add iy, sp ld hl, (iy + 3) ; mant add hl, hl - ld e, (iy + 6) ; expon - ld a, e - rl e - jr z, .maybe_subnormal - inc e - jr z, .ret_self ; inf NaN - dec e - ld c, e + ld a, (iy + 6) ; expon + ld e, a ; signbit + adc a, a + jr z, __ldexpf_helper.maybe_subnormal + ld c, a + inc a + jr z, __ldexpf_helper.ret_self ; inf NaN + ld a, e ; signbit + ld de, (iy + 9) ; scale ex de, hl - ld hl, (iy + 9) ; scale add hl, bc ; add expon - ld a, l bit 7, (iy + 11) ; scale sign - jr z, .scale_up -.scale_down: - ; test signbit - push hl - ; HL is not INT_MIN here - dec hl - add hl, hl - pop hl - jr nc, .finish ; expon > 0 - ; expon <= 0 or subnormal -; jr .underflow_to_zero -.underflow_to_zero: - ld hl, ___fe_cur_env - set 5, (hl) ; FE_INEXACT - ld a, (iy + 6) - and a, $80 ; copysign - sbc hl, hl -.common_erange: - ld bc, 5 ; ERANGE - ld (_errno), bc - ld e, a - ret -.overflow_to_inf: - ld hl, $800000 - ld a, (iy + 6) - or a, $7F ; copysign - jr .common_erange - + jr nz, .scale_down .scale_up: - ld bc, -255 + ; HL is [1, $8000FD] + ld c, b ; ld bc, 0 + dec bc ; ld bc, -1 +.scale_up_subnormal: ; <-- HL is [0, $7FFFFE] + inc c ; ld bc, $FFFF00 + inc c ; ld bc, $FFFF01 ; BC is -255 ; sets NZ + ; ld bc, -255 add hl, bc jr c, .overflow_to_inf -.finish: - ld l, a + ; sbc hl, bc ; restore hl + dec l ; we only care about the low 8 bits ex de, hl .finish_subnormal: - ; signbit(A) E:UHL >>= 1 - ld a, (iy + 6) ; expon push hl - rla +.finish: + rla ; extract signbit rr e rr (iy - 1) pop hl @@ -176,42 +111,118 @@ _scalbn: rr l ret -.maybe_subnormal: - dec bc ; BC is now -1 - add hl, bc -; jr c, .underflow_to_zero - jr c, .subnormal - ; return zero -.ret_self: - ld hl, (iy + 3) - ld e, (iy + 6) - ret +;------------------------------------------------------------------------------- -.subnormal: +.move_subnormal_down: + ; DE = scale ; BC is -1 here - bit 7, (iy + 11) ; scale sign - jr nz, .underflow_to_zero - ld de, (iy + 9) ; scale - ld hl, (iy + 3) ; mant -.norm_loop: - add hl, hl - jr c, .normalized - ex de, hl - add hl, bc ; --scale + ; first we need to test that the result won't be zero + call __ictlz + ex de, hl ; HL = scale + ; A is [1, 23] + ; return zero if (scale < clz_result - 24) or (clz_result - 25 >= scale) + add a, -24 ; A is [-23, -1] and carry is cleared + ld c, a ; sign extend A + ld a, l + sbc hl, bc + cpl + jr nc, _ldexpf.shru_common +; .underflow: + inc b ; ld b, 0 ; sets Z +.overflow_to_inf: ; <-- NZ is set when infinite + ld a, $28 ; FE_OVERFLOW | FE_INEXACT +.underflow_to_zero: ; <-- Z is set when underflowing to zero +.raise_erange: + ld hl, $800000 + jr nz, .overflow + ld a, $24 ; FE_UNDERFLOW | FE_INEXACT + add hl, hl ; ld hl, 0 +if __ldexpf_avoid_negative_zero + ; prevents negative zero from being emitted on underflow + res 7, (iy + 6) +end if +.overflow: ex de, hl - jr c, .norm_loop -; .still_subnormal: - ld e, 0 - jr .finish_subnormal -.normalized: - inc de + ld hl, 5 ; ERANGE + ld (_errno), hl +.raise_inexact: + ld hl, ___fe_cur_env + or a, (hl) + ld (hl), a +.result_is_exact: + ld a, (iy + 6) ; expon + rla ; extract signbit ex de, hl - ld a, l - jr .scale_up + ; B is $FF if infinite and $00 otherwise + rr b + ld e, b + ret -end if +;------------------------------------------------------------------------------- +.scale_down: + push de ; mant <<= 1 + ld e, l ; shift amount + ; HL is not INT_MIN here + dec hl + add hl, hl + jr nc, .finish ; expon > 0 +;------------------------------------------------------------------------------- +.shru_to_subnormal: + ; Z is set here + xor a, a + ld c, 48 ; ld bc, 24 << 1 + add hl, bc + pop hl ; reset SP + jr nc, .underflow_to_zero + sub a, e + set 7, (iy + 5) ; set implicit mantissa bit +.shru_common: + ; A should be [0, 23] + ld b, a + ld hl, (iy + 3) ; mantissa + push hl ; ld (iy - 3), hl + xor a, a + inc b + ; shift amount will be [1, 24] + ld d, a ; ld d, 0 + ld c, (iy - 1) +.shru_loop: + adc a, d ; collect sticky bits + srl c + rr h + rr l + djnz .shru_loop + ld (iy - 1), c + pop de + ld d, h + ld e, l + + ; round upwards to even if (round && (guard || sticky)) + jr nc, .no_round + ; we must ensure that FE_INEXACT is raised since rounding has occured + or a, a + jr nz, .round_up + inc a ; ld a, 1 + and a, e ; test guard bit + jr z, .no_round_inexact +.round_up: + inc de ; round upwards to even (wont overflow) +.no_round: + adc a, a ; test the sticky and round bits + jr z, .result_is_exact + ; carry wont be set +.no_round_inexact: + ; we need to raise ERANGE if the mantissa was rounded down to zero + ld a, c ; UDE + or a, d + or a, e + ld a, $20 ; FE_INEXACT + jr nz, .raise_inexact + ; NZ needs to be set here + jr .raise_erange - extern ___fe_cur_env extern _errno + extern ___fe_cur_env + extern __ictlz end if diff --git a/test/floating_point/float32_ldexp/src/f32_ldexp_LUT.h b/test/floating_point/float32_ldexp/src/f32_ldexp_LUT.h index f33c9e88b..319a0e06f 100644 --- a/test/floating_point/float32_ldexp/src/f32_ldexp_LUT.h +++ b/test/floating_point/float32_ldexp/src/f32_ldexp_LUT.h @@ -9,7 +9,7 @@ typedef struct { uint32_t value; int expon; } input_type; typedef uint32_t output_type; -static const input_type f32_ldexp_LUT_input[1080] = { +static const input_type f32_ldexp_LUT_input[1100] = { /* 0 */ {UINT32_C(0x00000000), 0}, /* 1 */ {UINT32_C(0x00000001), 0}, /* 2 */ {UINT32_C(0x00800000), 0}, @@ -1090,9 +1090,31 @@ static const input_type f32_ldexp_LUT_input[1080] = { /* 1077 */ {UINT32_C(0x7A9CE72A), 125}, /* 1078 */ {UINT32_C(0x3479E345), 129}, /* 1079 */ {UINT32_C(0xF96A3263), 19}, +/* 1080 */ {UINT32_C(0x1E8739EC), 109}, +/* extra tests */ +/* 1081 */ {UINT32_C(0x0001D191), -17}, +/* 1082 */ {UINT32_C(0x0003EF18), -18}, +/* 1083 */ {UINT32_C(0x000426F3), -19}, +/* 1084 */ {UINT32_C(0x000C70E3), -20}, +/* 1085 */ {UINT32_C(0x800FA056), -20}, +/* 1086 */ {UINT32_C(0x001BB2FA), -21}, +/* 1087 */ {UINT32_C(0x8016A81D), -21}, +/* 1088 */ {UINT32_C(0x002ECA33), -22}, +/* 1089 */ {UINT32_C(0x803655E0), -22}, +/* 1090 */ {UINT32_C(0x004A3993), -23}, +/* 1091 */ {UINT32_C(0x804024DC), -23}, +/* other tests */ +/* 1092 */ {UINT32_C(0x367B228A), -131}, +/* 1093 */ {UINT32_C(0xE1AFE9E3), -218}, +/* 1094 */ {UINT32_C(0x2B683524), -108}, +/* 1095 */ {UINT32_C(0xBF55E15C), -148}, +/* 1096 */ {UINT32_C(0x728C6D7F), -252}, +/* 1097 */ {UINT32_C(0xD9E79CEB), -202}, +/* 1098 */ {UINT32_C(0x19F6F465), -74}, +/* 1099 */ {UINT32_C(0xD014C299), -183}, }; -static const output_type f32_ldexp_LUT_output[1080] = { +static const output_type f32_ldexp_LUT_output[1100] = { /* 0 */ UINT32_C(0x00000000), /* 1 */ UINT32_C(0x00000001), /* 2 */ UINT32_C(0x00800000), @@ -2173,6 +2195,28 @@ static const output_type f32_ldexp_LUT_output[1080] = { /* 1077 */ UINT32_C(0x7F800000), /* 1078 */ UINT32_C(0x74F9E345), /* 1079 */ UINT32_C(0xFF800000), +/* 1080 */ UINT32_C(0x550739EC), +/* extra tests */ +/* 1081 */ UINT32_C(0x00000001), +/* 1082 */ UINT32_C(0x00000001), +/* 1083 */ UINT32_C(0x00000001), +/* 1084 */ UINT32_C(0x00000001), +/* 1085 */ UINT32_C(0x80000001), +/* 1086 */ UINT32_C(0x00000001), +/* 1087 */ UINT32_C(0x80000001), +/* 1088 */ UINT32_C(0x00000001), +/* 1089 */ UINT32_C(0x80000001), +/* 1090 */ UINT32_C(0x00000001), +/* 1091 */ UINT32_C(0x80000001), +/* other tests */ +/* 1092 */ UINT32_C(0x00000001), +/* 1093 */ UINT32_C(0x80000001), +/* 1094 */ UINT32_C(0x00000002), +/* 1095 */ UINT32_C(0x80000002), +/* 1096 */ UINT32_C(0x00000001), +/* 1097 */ UINT32_C(0x80000001), +/* 1098 */ UINT32_C(0x00000001), +/* 1099 */ UINT32_C(0x80000001), }; #endif /* F32_LDEXP_LUT_H */ diff --git a/test/floating_point/float32_ldexp/src/main.c b/test/floating_point/float32_ldexp/src/main.c index 12280819c..d3ef166ec 100644 --- a/test/floating_point/float32_ldexp/src/main.c +++ b/test/floating_point/float32_ldexp/src/main.c @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include #include @@ -11,42 +13,77 @@ #include "f32_ldexp_LUT.h" - #define ARRAY_LENGTH(x) (sizeof(x) / sizeof(x[0])) typedef union F32_pun { float flt; uint32_t bin; + struct { + uint24_t mant; + uint8_t expon; + }; } F32_pun; +#if 0 +#define test_printf printf +#else +#define test_printf(...) +#endif + size_t run_test(void) { - typedef struct { float value; int expon; } input_t; + typedef struct { F32_pun value; int expon; } input_t; typedef F32_pun output_t; const size_t length = ARRAY_LENGTH(f32_ldexp_LUT_input); const input_t *input = (const input_t* )((const void*)f32_ldexp_LUT_input ); const output_t *output = (const output_t*)((const void*)f32_ldexp_LUT_output); for (size_t i = 0; i < length; i++) { + feclearexcept(FE_ALL_EXCEPT); + errno = 0; F32_pun result; - result.flt = ldexpf(input[i].value, input[i].expon); + result.flt = ldexpf(input[i].value.flt, input[i].expon); if (result.bin != output[i].bin) { - // ignore NaN's with differing payloads - // treat signed zeros as equal for now if ( - (!(isnan(result.flt) && isnan(output[i].flt))) && - (!(result.bin == 0 && iszero(output[i].flt))) + /* ignore NaN's with differing payloads */ + (!(isnan(result.flt) && isnan(output[i].flt))) + #if 1 + /* treat signed zeros as equal for now */ + && (!((result.bin == 0) && (output[i].bin == UINT32_C(0x80000000)))) + #endif ) { - /* Float multiplication does not handle subnormals yet */ - if (!(iszero(result.flt) && (issubnormal(output[i].flt) || issubnormal(input[i].value)))) { - #if 1 - printf( - "%zu:\nI: %08lX %+d\nG: %08lX\nT: %08lX\n", - i, *(uint32_t*)(void*)&(input[i].value), input[i].expon, - result.bin, output[i].bin - ); - #endif - return i; - } + test_printf( + "%zu:\nI: %08lX %+d\nG: %08lX\nT: %08lX\n", + i, input[i].value.bin, input[i].expon, + result.bin, output[i].bin + ); + return i; + } + } + /* test exceptions */ + if (!(isnan(input[i].value.flt) || isnan(output[i].flt))) { + int temp; + F32_pun mant_0, mant_1; + mant_0.flt = frexpf(fabsf(input[i].value.flt), &temp); + mant_1.flt = frexpf(fabsf(output[i].flt ), &temp); + unsigned char fe_val = __fe_cur_env; + bool inexact_raised = (fe_val & FE_INEXACT); + bool underflow_raised = (fe_val & FE_UNDERFLOW); + bool overflow_raised = (fe_val & FE_OVERFLOW); + bool mant_equal = (mant_0.bin == mant_1.bin); + bool became_zero = (mant_0.bin != 0 && mant_1.bin == 0); + bool became_infinite = (mant_0.bin != UINT32_C(0x7F800000) && mant_1.bin == UINT32_C(0x7F800000)); + if (!( + (mant_equal != inexact_raised) && + ((became_zero || became_infinite) == (errno == ERANGE)) && + (became_zero == underflow_raised) && (became_infinite == overflow_raised) + )) { + test_printf( + "%zu: FE: %02X errno: %d\nI: %08lX %+d\nO: %08lX\n", + i, (unsigned int)__fe_cur_env, errno, + input[i].value.bin, input[i].expon, output[i].bin + ); + fputs("fenv/errno\n", stdout); + return i; } } }