Skip to content

ldexpf now correctly handles/rounds subnormal inputs #620

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
331 changes: 171 additions & 160 deletions src/libc/ldexpf.src
Original file line number Diff line number Diff line change
Expand Up @@ -16,202 +16,213 @@ _scalbn := _ldexpf

else

if 1
; (set to 0 or 1) avoid returning negative zero on underflow with Ti's floats
__ldexpf_avoid_negative_zero := 1

; NOTE: since Ti floats are used, negative zero will not be returned unless the
; input was negative zero.
;
; normal inputs are handled correctly, unless the output is subnormal
; subnormal inputs/outputs return zero and set ERANGE and FE_INEXACT
; zero/infinite/NaN inputs are handled correctly
_ldexpf:
_ldexp:
_scalbnf:
_scalbn:
ld iy, 0
lea bc, iy + 0
add iy, sp
; ldexpf behaviour:
; - signed zero, infinity, and NaN inputs are returned unmodified
; - ERRNO and FE_INEXACT are set if a finite value becomes zero or infinite
; - FE_INEXACT is set if rounding occured
;-------------------------------------------------------------------------------

private __ldexpf_helper
__ldexpf_helper:
.maybe_subnormal:
; A = zero, carry = signbit
rra ; restore signbit and clear carry
adc hl, bc ; BC is zero
.ret_self:
ld hl, (iy + 3) ; mant
add hl, hl
ld a, (iy + 6) ; expon
adc a, a
jr z, .maybe_subnormal
inc a
jr z, .ret_self ; inf NaN
dec a
ex de, hl
ld hl, (iy + 9) ; scale
ld c, a
add hl, bc ; add expon
ld a, l
ret z ; return zero/inf/NaN
dec bc ; BC is now -1
; .subnormal_input:
; BC is -1 here
bit 7, (iy + 11) ; scale sign
jr z, .scale_up
.scale_down:
; HL is not INT_MIN here
dec hl
ld de, (iy + 9) ; scale
jr nz, _ldexpf.move_subnormal_down
; .move_subnormal_up:
.norm_loop:
add hl, hl
jr nc, .finish ; expon > 0
; expon <= 0 or subnormal
.underflow_to_zero:
ld hl, ___fe_cur_env
set 5, (hl) ; FE_INEXACT
jr c, .normalized
ex de, hl
add hl, bc ; --scale
ex de, hl
jr c, .norm_loop
; .still_subnormal:
; DE is -1 here
; saves 8F for this path at a cost of 3 bytes:
if 0
ld a, (iy + 6)
and a, $80 ; copysign
else
xor a, a ; avoid returning negative zero with Ti's floats
inc de ; ld e, 0
jr _ldexpf.finish_subnormal
end if
sbc hl, hl
.common_erange:
ld bc, 5 ; ERANGE
ld (_errno), bc
ld e, a
ret
.overflow_to_inf:
ld hl, $800000
ld a, (iy + 6)
or a, $7F ; copysign
jr .common_erange

.scale_up:
ld bc, -255
add hl, bc
jr c, .overflow_to_inf
.finish:
ld l, a
.normalized:
inc de ; don't touch the Z flag
ex de, hl
; signbit(A) E:UHL >>= 1
ld a, (iy + 6) ; expon
push hl
rla
rr e
rr (iy - 1)
pop hl
rr h
rr l
ret

.maybe_subnormal:
dec bc ; BC is now -1
add hl, bc
jr c, .underflow_to_zero
; return zero
.ret_self:
ld hl, (iy + 3)
ld e, (iy + 6)
ret
; Z is set here
jr _ldexpf.scale_up_subnormal

else

; normal inputs are handled correctly, unless the output is subnormal
; subnormal inputs are handled correctly for positive scaling values
; subnormal outputs return zero and set ERANGE and FE_INEXACT for negative scaling values
; zero/infinite/NaN inputs are handled correctly
_ldexpf:
_ldexp:
_scalbnf:
;-------------------------------------------------------------------------------
; When the input and output are normal:
; scaling up : 60F + 12R + 4W + 2
; scaling down: 60F + 12R + 4W + 4
_scalbn:
_scalbnf:
_ldexp:
_ldexpf:
ld iy, 0
lea bc, iy + 0
add iy, sp
ld hl, (iy + 3) ; mant
add hl, hl
ld e, (iy + 6) ; expon
ld a, e
rl e
jr z, .maybe_subnormal
inc e
jr z, .ret_self ; inf NaN
dec e
ld c, e
ld a, (iy + 6) ; expon
ld e, a ; signbit
adc a, a
jr z, __ldexpf_helper.maybe_subnormal
ld c, a
inc a
jr z, __ldexpf_helper.ret_self ; inf NaN
ld a, e ; signbit
ld de, (iy + 9) ; scale
ex de, hl
ld hl, (iy + 9) ; scale
add hl, bc ; add expon
ld a, l
bit 7, (iy + 11) ; scale sign
jr z, .scale_up
.scale_down:
; test signbit
push hl
; HL is not INT_MIN here
dec hl
add hl, hl
pop hl
jr nc, .finish ; expon > 0
; expon <= 0 or subnormal
; jr .underflow_to_zero
.underflow_to_zero:
ld hl, ___fe_cur_env
set 5, (hl) ; FE_INEXACT
ld a, (iy + 6)
and a, $80 ; copysign
sbc hl, hl
.common_erange:
ld bc, 5 ; ERANGE
ld (_errno), bc
ld e, a
ret
.overflow_to_inf:
ld hl, $800000
ld a, (iy + 6)
or a, $7F ; copysign
jr .common_erange

jr nz, .scale_down
.scale_up:
ld bc, -255
; HL is [1, $8000FD]
ld c, b ; ld bc, 0
dec bc ; ld bc, -1
.scale_up_subnormal: ; <-- HL is [0, $7FFFFE]
inc c ; ld bc, $FFFF00
inc c ; ld bc, $FFFF01 ; BC is -255 ; sets NZ
; ld bc, -255
add hl, bc
jr c, .overflow_to_inf
.finish:
ld l, a
; sbc hl, bc ; restore hl
dec l ; we only care about the low 8 bits
ex de, hl
.finish_subnormal:
; signbit(A) E:UHL >>= 1
ld a, (iy + 6) ; expon
push hl
rla
.finish:
rla ; extract signbit
rr e
rr (iy - 1)
pop hl
rr h
rr l
ret

.maybe_subnormal:
dec bc ; BC is now -1
add hl, bc
; jr c, .underflow_to_zero
jr c, .subnormal
; return zero
.ret_self:
ld hl, (iy + 3)
ld e, (iy + 6)
ret
;-------------------------------------------------------------------------------

.subnormal:
.move_subnormal_down:
; DE = scale
; BC is -1 here
bit 7, (iy + 11) ; scale sign
jr nz, .underflow_to_zero
ld de, (iy + 9) ; scale
ld hl, (iy + 3) ; mant
.norm_loop:
add hl, hl
jr c, .normalized
ex de, hl
add hl, bc ; --scale
; first we need to test that the result won't be zero
call __ictlz
ex de, hl ; HL = scale
; A is [1, 23]
; return zero if (scale < clz_result - 24) or (clz_result - 25 >= scale)
add a, -24 ; A is [-23, -1] and carry is cleared
ld c, a ; sign extend A
ld a, l
sbc hl, bc
cpl
jr nc, _ldexpf.shru_common
; .underflow:
inc b ; ld b, 0 ; sets Z
.overflow_to_inf: ; <-- NZ is set when infinite
ld a, $28 ; FE_OVERFLOW | FE_INEXACT
.underflow_to_zero: ; <-- Z is set when underflowing to zero
.raise_erange:
ld hl, $800000
jr nz, .overflow
ld a, $24 ; FE_UNDERFLOW | FE_INEXACT
add hl, hl ; ld hl, 0
if __ldexpf_avoid_negative_zero
; prevents negative zero from being emitted on underflow
res 7, (iy + 6)
end if
.overflow:
ex de, hl
jr c, .norm_loop
; .still_subnormal:
ld e, 0
jr .finish_subnormal
.normalized:
inc de
ld hl, 5 ; ERANGE
ld (_errno), hl
.raise_inexact:
ld hl, ___fe_cur_env
or a, (hl)
ld (hl), a
.result_is_exact:
ld a, (iy + 6) ; expon
rla ; extract signbit
ex de, hl
ld a, l
jr .scale_up
; B is $FF if infinite and $00 otherwise
rr b
ld e, b
ret

end if
;-------------------------------------------------------------------------------
.scale_down:
push de ; mant <<= 1
ld e, l ; shift amount
; HL is not INT_MIN here
dec hl
add hl, hl
jr nc, .finish ; expon > 0
;-------------------------------------------------------------------------------
.shru_to_subnormal:
; Z is set here
xor a, a
ld c, 48 ; ld bc, 24 << 1
add hl, bc
pop hl ; reset SP
jr nc, .underflow_to_zero
sub a, e
set 7, (iy + 5) ; set implicit mantissa bit
.shru_common:
; A should be [0, 23]
ld b, a
ld hl, (iy + 3) ; mantissa
push hl ; ld (iy - 3), hl
xor a, a
inc b
; shift amount will be [1, 24]
ld d, a ; ld d, 0
ld c, (iy - 1)
.shru_loop:
adc a, d ; collect sticky bits
srl c
rr h
rr l
djnz .shru_loop
ld (iy - 1), c
pop de
ld d, h
ld e, l

; round upwards to even if (round && (guard || sticky))
jr nc, .no_round
; we must ensure that FE_INEXACT is raised since rounding has occured
or a, a
jr nz, .round_up
inc a ; ld a, 1
and a, e ; test guard bit
jr z, .no_round_inexact
.round_up:
inc de ; round upwards to even (wont overflow)
.no_round:
adc a, a ; test the sticky and round bits
jr z, .result_is_exact
; carry wont be set
.no_round_inexact:
; we need to raise ERANGE if the mantissa was rounded down to zero
ld a, c ; UDE
or a, d
or a, e
ld a, $20 ; FE_INEXACT
jr nz, .raise_inexact
; NZ needs to be set here
jr .raise_erange

extern ___fe_cur_env
extern _errno
extern ___fe_cur_env
extern __ictlz

end if
Loading
Loading