diff --git a/src/libc/ldexpf.src b/src/libc/ldexpf.src
index 0d3edeafe..70f892696 100644
--- a/src/libc/ldexpf.src
+++ b/src/libc/ldexpf.src
@@ -16,159 +16,94 @@ _scalbn := _ldexpf
 
 else
 
-if 1
+; (set to 0 or 1) avoid returning negative zero on underflow with Ti's floats
+__ldexpf_avoid_negative_zero := 1
 
-; NOTE: since Ti floats are used, negative zero will not be returned unless the
-; input was negative zero.
-;
-; normal inputs are handled correctly, unless the output is subnormal
-; subnormal inputs/outputs return zero and set ERANGE and FE_INEXACT
-; zero/infinite/NaN inputs are handled correctly
-_ldexpf:
-_ldexp:
-_scalbnf:
-_scalbn:
-	ld	iy, 0
-	lea	bc, iy + 0
-	add	iy, sp
+; ldexpf behaviour:
+; - signed zero, infinity, and NaN inputs are returned unmodified
+; - ERRNO and FE_INEXACT are set if a finite value becomes zero or infinite
+; - FE_INEXACT is set if rounding occured
+;-------------------------------------------------------------------------------
+
+	private	__ldexpf_helper
+__ldexpf_helper:
+.maybe_subnormal:
+	; A = zero, carry = signbit
+	rra	; restore signbit and clear carry
+	adc	hl, bc	; BC is zero
+.ret_self:
 	ld	hl, (iy + 3)	; mant
-	add	hl, hl
-	ld	a, (iy + 6)	; expon
-	adc	a, a
-	jr	z, .maybe_subnormal
-	inc	a
-	jr	z, .ret_self	; inf NaN
-	dec	a
-	ex	de, hl
-	ld	hl, (iy + 9)	; scale
-	ld	c, a
-	add	hl, bc	; add expon
-	ld	a, l
+	ret	z	; return zero/inf/NaN
+	dec	bc	; BC is now -1
+; .subnormal_input:
+	; BC is -1 here
 	bit	7, (iy + 11)	; scale sign
-	jr	z, .scale_up
-.scale_down:
-	; HL is not INT_MIN here
-	dec	hl
+	ld	de, (iy + 9)	; scale
+	jr	nz, _ldexpf.move_subnormal_down
+; .move_subnormal_up:
+.norm_loop:
 	add	hl, hl
-	jr	nc, .finish	; expon > 0
-	; expon <= 0 or subnormal
-.underflow_to_zero:
-	ld	hl, ___fe_cur_env
-	set	5, (hl)	; FE_INEXACT
+	jr	c, .normalized
+	ex	de, hl
+	add	hl, bc	; --scale
+	ex	de, hl
+	jr	c, .norm_loop
+; .still_subnormal:
+	; DE is -1 here
+	; saves 8F for this path at a cost of 3 bytes:
 if 0
-	ld	a, (iy + 6)
-	and	a, $80	; copysign
-else
-	xor	a, a	; avoid returning negative zero with Ti's floats
+	inc	de	; ld e, 0
+	jr	_ldexpf.finish_subnormal
 end if
-	sbc	hl, hl
-.common_erange:
-	ld	bc, 5	; ERANGE
-	ld	(_errno), bc
-	ld	e, a
-	ret
-.overflow_to_inf:
-	ld	hl, $800000
-	ld	a, (iy + 6)
-	or	a, $7F	; copysign
-	jr	.common_erange
-
-.scale_up:
-	ld	bc, -255
-	add	hl, bc
-	jr	c, .overflow_to_inf
-.finish:
-	ld	l, a
+.normalized:
+	inc	de	; don't touch the Z flag
 	ex	de, hl
-	; signbit(A) E:UHL >>= 1
-	ld	a, (iy + 6)	; expon
-	push	hl
-	rla
-	rr	e
-	rr	(iy - 1)
-	pop	hl
-	rr	h
-	rr	l
-	ret
-
-.maybe_subnormal:
-	dec	bc	; BC is now -1
-	add	hl, bc
-	jr	c, .underflow_to_zero
-	; return zero
-.ret_self:
-	ld	hl, (iy + 3)
-	ld	e, (iy + 6)
-	ret
+	; Z is set here
+	jr	_ldexpf.scale_up_subnormal
 
-else
-
-; normal inputs are handled correctly, unless the output is subnormal
-; subnormal inputs are handled correctly for positive scaling values
-; subnormal outputs return zero and set ERANGE and FE_INEXACT for negative scaling values
-; zero/infinite/NaN inputs are handled correctly
-_ldexpf:
-_ldexp:
-_scalbnf:
+;-------------------------------------------------------------------------------
+; When the input and output are normal:
+; scaling up  : 60F + 12R + 4W + 2
+; scaling down: 60F + 12R + 4W + 4
 _scalbn:
+_scalbnf:
+_ldexp:
+_ldexpf:
 	ld	iy, 0
 	lea	bc, iy + 0
 	add	iy, sp
 	ld	hl, (iy + 3)	; mant
 	add	hl, hl
-	ld	e, (iy + 6)	; expon
-	ld	a, e
-	rl	e
-	jr	z, .maybe_subnormal
-	inc	e
-	jr	z, .ret_self	; inf NaN
-	dec	e
-	ld	c, e
+	ld	a, (iy + 6)	; expon
+	ld	e, a		; signbit
+	adc	a, a
+	jr	z, __ldexpf_helper.maybe_subnormal
+	ld	c, a
+	inc	a
+	jr	z, __ldexpf_helper.ret_self	; inf NaN
+	ld	a, e		; signbit
+	ld	de, (iy + 9)	; scale
 	ex	de, hl
-	ld	hl, (iy + 9)	; scale
 	add	hl, bc	; add expon
-	ld	a, l
 	bit	7, (iy + 11)	; scale sign
-	jr	z, .scale_up
-.scale_down:
-	; test signbit
-	push	hl
-	; HL is not INT_MIN here
-	dec	hl
-	add	hl, hl
-	pop	hl
-	jr	nc, .finish	; expon > 0
-	; expon <= 0 or subnormal
-;	jr	.underflow_to_zero
-.underflow_to_zero:
-	ld	hl, ___fe_cur_env
-	set	5, (hl)	; FE_INEXACT
-	ld	a, (iy + 6)
-	and	a, $80	; copysign
-	sbc	hl, hl
-.common_erange:
-	ld	bc, 5	; ERANGE
-	ld	(_errno), bc
-	ld	e, a
-	ret
-.overflow_to_inf:
-	ld	hl, $800000
-	ld	a, (iy + 6)
-	or	a, $7F	; copysign
-	jr	.common_erange
-
+	jr	nz, .scale_down
 .scale_up:
-	ld	bc, -255
+	; HL is [1, $8000FD]
+	ld	c, b	; ld bc, 0
+	dec	bc	; ld bc, -1
+.scale_up_subnormal:	; <-- HL is [0, $7FFFFE]
+	inc	c	; ld bc, $FFFF00
+	inc	c	; ld bc, $FFFF01 ; BC is -255 ; sets NZ
+	; ld	bc, -255
 	add	hl, bc
 	jr	c, .overflow_to_inf
-.finish:
-	ld	l, a
+	; sbc	hl, bc	; restore hl
+	dec	l	; we only care about the low 8 bits
 	ex	de, hl
 .finish_subnormal:
-	; signbit(A) E:UHL >>= 1
-	ld	a, (iy + 6)	; expon
 	push	hl
-	rla
+.finish:
+	rla	; extract signbit
 	rr	e
 	rr	(iy - 1)
 	pop	hl
@@ -176,42 +111,118 @@ _scalbn:
 	rr	l
 	ret
 
-.maybe_subnormal:
-	dec	bc	; BC is now -1
-	add	hl, bc
-;	jr	c, .underflow_to_zero
-	jr	c, .subnormal
-	; return zero
-.ret_self:
-	ld	hl, (iy + 3)
-	ld	e, (iy + 6)
-	ret
+;-------------------------------------------------------------------------------
 
-.subnormal:
+.move_subnormal_down:
+	; DE = scale
 	; BC is -1 here
-	bit	7, (iy + 11)	; scale sign
-	jr	nz, .underflow_to_zero
-	ld	de, (iy + 9)	; scale
-	ld	hl, (iy + 3)	; mant
-.norm_loop:
-	add	hl, hl
-	jr	c, .normalized
-	ex	de, hl
-	add	hl, bc	; --scale
+	; first we need to test that the result won't be zero
+	call	__ictlz
+	ex	de, hl	; HL = scale
+	; A is [1, 23]
+	; return zero if (scale < clz_result - 24) or (clz_result - 25 >= scale)
+	add	a, -24	; A is [-23, -1] and carry is cleared
+	ld	c, a	; sign extend A
+	ld	a, l
+	sbc	hl, bc
+	cpl
+	jr	nc, _ldexpf.shru_common
+; .underflow:
+	inc	b	; ld b, 0 ; sets Z
+.overflow_to_inf:	; <-- NZ is set when infinite
+	ld	a, $28	; FE_OVERFLOW | FE_INEXACT
+.underflow_to_zero:	; <-- Z is set when underflowing to zero
+.raise_erange:
+	ld	hl, $800000
+	jr	nz, .overflow
+	ld	a, $24	; FE_UNDERFLOW | FE_INEXACT
+	add	hl, hl	; ld hl, 0
+if __ldexpf_avoid_negative_zero
+	; prevents negative zero from being emitted on underflow
+	res	7, (iy + 6)
+end if
+.overflow:
 	ex	de, hl
-	jr	c, .norm_loop
-; .still_subnormal:
-	ld	e, 0
-	jr	.finish_subnormal
-.normalized:
-	inc	de
+	ld	hl, 5	; ERANGE
+	ld	(_errno), hl
+.raise_inexact:
+	ld	hl, ___fe_cur_env
+	or	a, (hl)
+	ld	(hl), a
+.result_is_exact:
+	ld	a, (iy + 6)	; expon
+	rla	; extract signbit
 	ex	de, hl
-	ld	a, l
-	jr	.scale_up
+	; B is $FF if infinite and $00 otherwise
+	rr	b
+	ld	e, b
+	ret
 
-end if
+;-------------------------------------------------------------------------------
+.scale_down:
+	push	de	; mant <<= 1
+	ld	e, l	; shift amount
+	; HL is not INT_MIN here
+	dec	hl
+	add	hl, hl
+	jr	nc, .finish	; expon > 0
+;-------------------------------------------------------------------------------
+.shru_to_subnormal:
+	; Z is set here
+	xor	a, a
+	ld	c, 48	; ld bc, 24 << 1
+	add	hl, bc
+	pop	hl	; reset SP
+	jr	nc, .underflow_to_zero
+	sub	a, e
+	set	7, (iy + 5)	; set implicit mantissa bit
+.shru_common:
+	; A should be [0, 23]
+	ld	b, a
+	ld	hl, (iy + 3)	; mantissa
+	push	hl	; ld (iy - 3), hl
+	xor	a, a
+	inc	b
+	; shift amount will be [1, 24]
+	ld	d, a	; ld d, 0
+	ld	c, (iy - 1)
+.shru_loop:
+	adc	a, d	; collect sticky bits
+	srl	c
+	rr	h
+	rr	l
+	djnz	.shru_loop
+	ld	(iy - 1), c
+	pop	de
+	ld	d, h
+	ld	e, l
+
+	; round upwards to even if (round && (guard || sticky))
+	jr	nc, .no_round
+	; we must ensure that FE_INEXACT is raised since rounding has occured
+	or	a, a
+	jr	nz, .round_up
+	inc	a	; ld a, 1
+	and	a, e	; test guard bit
+	jr	z, .no_round_inexact
+.round_up:
+	inc	de	; round upwards to even (wont overflow)
+.no_round:
+	adc	a, a	; test the sticky and round bits
+	jr	z, .result_is_exact
+	; carry wont be set
+.no_round_inexact:
+	; we need to raise ERANGE if the mantissa was rounded down to zero
+	ld	a, c	; UDE
+	or	a, d
+	or	a, e
+	ld	a, $20	; FE_INEXACT
+	jr	nz, .raise_inexact
+	; NZ needs to be set here
+	jr	.raise_erange
 
-	extern	___fe_cur_env
 	extern	_errno
+	extern	___fe_cur_env
+	extern	__ictlz
 
 end if
diff --git a/test/floating_point/float32_ldexp/src/f32_ldexp_LUT.h b/test/floating_point/float32_ldexp/src/f32_ldexp_LUT.h
index f33c9e88b..319a0e06f 100644
--- a/test/floating_point/float32_ldexp/src/f32_ldexp_LUT.h
+++ b/test/floating_point/float32_ldexp/src/f32_ldexp_LUT.h
@@ -9,7 +9,7 @@ typedef struct { uint32_t value; int expon; } input_type;
 
 typedef uint32_t output_type;
 
-static const input_type f32_ldexp_LUT_input[1080] = {
+static const input_type f32_ldexp_LUT_input[1100] = {
 /*    0 */ {UINT32_C(0x00000000), 0},
 /*    1 */ {UINT32_C(0x00000001), 0},
 /*    2 */ {UINT32_C(0x00800000), 0},
@@ -1090,9 +1090,31 @@ static const input_type f32_ldexp_LUT_input[1080] = {
 /* 1077 */ {UINT32_C(0x7A9CE72A), 125},
 /* 1078 */ {UINT32_C(0x3479E345), 129},
 /* 1079 */ {UINT32_C(0xF96A3263), 19},
+/* 1080 */ {UINT32_C(0x1E8739EC), 109},
+/* extra tests */
+/* 1081 */ {UINT32_C(0x0001D191), -17},
+/* 1082 */ {UINT32_C(0x0003EF18), -18},
+/* 1083 */ {UINT32_C(0x000426F3), -19},
+/* 1084 */ {UINT32_C(0x000C70E3), -20},
+/* 1085 */ {UINT32_C(0x800FA056), -20},
+/* 1086 */ {UINT32_C(0x001BB2FA), -21},
+/* 1087 */ {UINT32_C(0x8016A81D), -21},
+/* 1088 */ {UINT32_C(0x002ECA33), -22},
+/* 1089 */ {UINT32_C(0x803655E0), -22},
+/* 1090 */ {UINT32_C(0x004A3993), -23},
+/* 1091 */ {UINT32_C(0x804024DC), -23},
+/* other tests */
+/* 1092 */ {UINT32_C(0x367B228A), -131},
+/* 1093 */ {UINT32_C(0xE1AFE9E3), -218},
+/* 1094 */ {UINT32_C(0x2B683524), -108},
+/* 1095 */ {UINT32_C(0xBF55E15C), -148},
+/* 1096 */ {UINT32_C(0x728C6D7F), -252},
+/* 1097 */ {UINT32_C(0xD9E79CEB), -202},
+/* 1098 */ {UINT32_C(0x19F6F465), -74},
+/* 1099 */ {UINT32_C(0xD014C299), -183},
 };
 
-static const output_type f32_ldexp_LUT_output[1080] = {
+static const output_type f32_ldexp_LUT_output[1100] = {
 /*    0 */ UINT32_C(0x00000000),
 /*    1 */ UINT32_C(0x00000001),
 /*    2 */ UINT32_C(0x00800000),
@@ -2173,6 +2195,28 @@ static const output_type f32_ldexp_LUT_output[1080] = {
 /* 1077 */ UINT32_C(0x7F800000),
 /* 1078 */ UINT32_C(0x74F9E345),
 /* 1079 */ UINT32_C(0xFF800000),
+/* 1080 */ UINT32_C(0x550739EC),
+/* extra tests */
+/* 1081 */ UINT32_C(0x00000001),
+/* 1082 */ UINT32_C(0x00000001),
+/* 1083 */ UINT32_C(0x00000001),
+/* 1084 */ UINT32_C(0x00000001),
+/* 1085 */ UINT32_C(0x80000001),
+/* 1086 */ UINT32_C(0x00000001),
+/* 1087 */ UINT32_C(0x80000001),
+/* 1088 */ UINT32_C(0x00000001),
+/* 1089 */ UINT32_C(0x80000001),
+/* 1090 */ UINT32_C(0x00000001),
+/* 1091 */ UINT32_C(0x80000001),
+/* other tests */
+/* 1092 */ UINT32_C(0x00000001),
+/* 1093 */ UINT32_C(0x80000001),
+/* 1094 */ UINT32_C(0x00000002),
+/* 1095 */ UINT32_C(0x80000002),
+/* 1096 */ UINT32_C(0x00000001),
+/* 1097 */ UINT32_C(0x80000001),
+/* 1098 */ UINT32_C(0x00000001),
+/* 1099 */ UINT32_C(0x80000001),
 };
 
 #endif /* F32_LDEXP_LUT_H */
diff --git a/test/floating_point/float32_ldexp/src/main.c b/test/floating_point/float32_ldexp/src/main.c
index 12280819c..d3ef166ec 100644
--- a/test/floating_point/float32_ldexp/src/main.c
+++ b/test/floating_point/float32_ldexp/src/main.c
@@ -3,6 +3,8 @@
 #include <stdint.h>
 #include <stdio.h>
 #include <math.h>
+#include <fenv.h>
+#include <errno.h>
 #include <assert.h>
 #include <ti/screen.h>
 #include <ti/getcsc.h>
@@ -11,42 +13,77 @@
 
 #include "f32_ldexp_LUT.h"
 
-
 #define ARRAY_LENGTH(x) (sizeof(x) / sizeof(x[0]))
 
 typedef union F32_pun {
     float flt;
     uint32_t bin;
+    struct {
+        uint24_t mant;
+        uint8_t expon;
+    };
 } F32_pun;
 
+#if 0
+#define test_printf printf
+#else
+#define test_printf(...)
+#endif
+
 size_t run_test(void) {
-    typedef struct { float value; int expon; } input_t;
+    typedef struct { F32_pun value; int expon; } input_t;
     typedef F32_pun output_t;
 
     const size_t length = ARRAY_LENGTH(f32_ldexp_LUT_input);
     const input_t  *input  = (const input_t* )((const void*)f32_ldexp_LUT_input );
     const output_t *output = (const output_t*)((const void*)f32_ldexp_LUT_output);
     for (size_t i = 0; i < length; i++) {
+        feclearexcept(FE_ALL_EXCEPT);
+        errno = 0;
         F32_pun result;
-        result.flt = ldexpf(input[i].value, input[i].expon);
+        result.flt = ldexpf(input[i].value.flt, input[i].expon);
         if (result.bin != output[i].bin) {
-            // ignore NaN's with differing payloads
-            // treat signed zeros as equal for now
             if (
-                (!(isnan(result.flt) && isnan(output[i].flt))) &&
-                (!(result.bin == 0 && iszero(output[i].flt)))
+                /* ignore NaN's with differing payloads */
+                (!(isnan(result.flt) && isnan(output[i].flt)))
+                #if 1
+                    /* treat signed zeros as equal for now */
+                    && (!((result.bin == 0) && (output[i].bin == UINT32_C(0x80000000))))
+                #endif
             ) {
-                /* Float multiplication does not handle subnormals yet */
-                if (!(iszero(result.flt) && (issubnormal(output[i].flt) || issubnormal(input[i].value)))) {
-                    #if 1
-                        printf(
-                            "%zu:\nI: %08lX %+d\nG: %08lX\nT: %08lX\n",
-                            i, *(uint32_t*)(void*)&(input[i].value), input[i].expon,
-                            result.bin, output[i].bin
-                        );
-                    #endif
-                    return i;
-                }
+                test_printf(
+                    "%zu:\nI: %08lX %+d\nG: %08lX\nT: %08lX\n",
+                    i, input[i].value.bin, input[i].expon,
+                    result.bin, output[i].bin
+                );
+                return i;
+            }
+        }
+        /* test exceptions */
+        if (!(isnan(input[i].value.flt) || isnan(output[i].flt))) {
+            int temp;
+            F32_pun mant_0, mant_1;
+            mant_0.flt = frexpf(fabsf(input[i].value.flt), &temp);
+            mant_1.flt = frexpf(fabsf(output[i].flt     ), &temp);
+            unsigned char fe_val = __fe_cur_env;
+            bool inexact_raised = (fe_val & FE_INEXACT);
+            bool underflow_raised = (fe_val & FE_UNDERFLOW);
+            bool overflow_raised = (fe_val & FE_OVERFLOW);
+            bool mant_equal = (mant_0.bin == mant_1.bin);
+            bool became_zero = (mant_0.bin != 0 && mant_1.bin == 0);
+            bool became_infinite = (mant_0.bin != UINT32_C(0x7F800000) && mant_1.bin == UINT32_C(0x7F800000));
+            if (!(
+                (mant_equal != inexact_raised) &&
+                ((became_zero || became_infinite) == (errno == ERANGE)) &&
+                (became_zero == underflow_raised) && (became_infinite == overflow_raised)
+            )) {
+                test_printf(
+                    "%zu: FE: %02X errno: %d\nI: %08lX %+d\nO: %08lX\n",
+                    i, (unsigned int)__fe_cur_env, errno,
+                    input[i].value.bin, input[i].expon, output[i].bin
+                );
+                fputs("fenv/errno\n", stdout);
+                return i;
             }
         }
     }