diff --git a/demo/test.c b/demo/test.c index f8685364..0132baee 100644 --- a/demo/test.c +++ b/demo/test.c @@ -1151,13 +1151,19 @@ static int test_mp_montgomery_reduce(void) } +#include static int test_mp_read_radix(void) { char buf[4096]; - size_t written; + size_t written, maxlen; + int bignum, i; - mp_int a; - DOR(mp_init_multi(&a, NULL)); + char *buffer, *bcpy, *startb; + + clock_t start, stop, t_slow, t_fast; + + mp_int a, b; + DOR(mp_init_multi(&a, &b, NULL)); DO(mp_read_radix(&a, "123456", 10)); @@ -1183,6 +1189,84 @@ static int test_mp_read_radix(void) DO(mp_to_radix(&a, buf, sizeof(buf), &written, 10)); printf("\r '0' a == %s, length = %zu", buf, written); + /* Test the fast method with a slightly larger number */ + + /* Needs to be big enough to make a sufficiently large timing difference */ + bignum = 30000; + buffer = (char *)malloc((size_t)(bignum + 2)); + if (buffer == NULL) { + goto LBL_ERR; + } + DO(mp_rand(&a, bignum / MP_DIGIT_BIT)); + fprintf(stderr,"\nNumber of limbs in &b = %d, bit_count of &b = %d\n", bignum / MP_DIGIT_BIT, mp_count_bits(&a)); + start = clock(); + for (i = 2; i < 65; i++) { + /* printf("FAST radix = %d\n",i); */ + DO(mp_to_radix(&a, buffer, (size_t)(bignum + 1), &written, i)); + mp_zero(&b); + DO(mp_read_radix(&b, buffer, i)); + EXPECT(mp_cmp(&a, &b) == MP_EQ); + } + stop = clock(); + t_fast = stop - start; + + printf("Same number, slow radix conversions\n"); + start = clock(); + for (i = 2; i < 65; i++) { + /* printf("SLOW radix = %d\n",i); */ + maxlen = (size_t)(bignum + 1); + bcpy = buffer; + /* s_mp_slower_to_radix is very rudimentary as a stand-alone */ + startb = bcpy; + DO(s_mp_slower_to_radix(&a, &bcpy, &maxlen, &written, i, false)); + bcpy = startb; + mp_zero(&b); + DO(s_mp_slower_read_radix(&b, bcpy, 0, strlen(bcpy), i)); + EXPECT(mp_cmp(&a, &b) == MP_EQ); + } + stop = clock(); + t_slow = stop - start; + + /* It is "long int" in GLibC but can be bigger and/or even a floating point elsewhere */ + fprintf(stderr,"SLOW: %.10f, FAST: %.10f\n", (double)t_slow/(double)CLOCKS_PER_SEC, + (double)t_fast/(double)CLOCKS_PER_SEC); + + /* Check if the branching works. */ + if (MP_HAS(S_MP_FASTER_READ_RADIX) && MP_HAS(S_MP_FASTER_TO_RADIX)) { + if (t_fast > t_slow) { + fprintf(stderr, "Timing suspicious in test_mp_read_radix. No fast multiplication? Cut-off too low?\n"); + DO(mp_fwrite(&a, 16, stderr)); + goto LBL_ERR; + } + } + + free(buffer); + +#if ((MP_DIGIT_BIT <= 16) && (defined MP_CHECK_RADIX_OVF)) + /* Check a number of size (MP_MAX_DIGIT_COUNT * MP_DIGIT_BIT - 1) at fixed radix "10". */ + /* Will not work if test is run on platforms with larger int's because + #define MP_MAX_DIGIT_COUNT ((INT_MAX - 2) / MP_DIGIT_BIT) + So we have to replace the value for INT_MAX with 2^15 - 1 = 32767 to test 16-bit int's. Not + very elegant but it works. + */ + bignum = ((32767 - 2) / MP_DIGIT_BIT); + bignum = ((bignum - 1) * MP_DIGIT_BIT) + (MP_DIGIT_BIT - 1); + /* Manual computation because the automatic methods might not have been included in the build */ + buffer = (char *)malloc(((bignum + 2)/1000) * 333); + if (buffer == NULL) { + goto LBL_ERR; + } + DO(mp_2expt(&a, bignum)); + DO(mp_decr(&a)); + printf("Number of limbs in &b = %d, bit_count of &b = %d\n", bignum / MP_DIGIT_BIT, mp_count_bits(&a)); + DO(mp_to_radix(&a, buffer, ((bignum + 2)/1000) * 333, &written, 10)); + DO(mp_read_radix(&b, buffer, 10)); + EXPECT(mp_cmp(&a, &b) == MP_EQ); + free(buffer); +#endif + + + while (0) { char *s = fgets(buf, sizeof(buf), stdin); if (s != buf) break; @@ -1192,10 +1276,10 @@ static int test_mp_read_radix(void) printf("%s, %lu\n", buf, (unsigned long)a.dp[0] & 3uL); } - mp_clear(&a); + mp_clear_multi(&a, &b, NULL); return EXIT_SUCCESS; LBL_ERR: - mp_clear(&a); + mp_clear_multi(&a, &b, NULL); return EXIT_FAILURE; } @@ -2479,7 +2563,7 @@ static int unit_tests(int argc, char **argv) T1(mp_prime_next_prime, MP_PRIME_NEXT_PRIME), T1(mp_prime_rand, MP_PRIME_RAND), T1(mp_rand, MP_RAND), - T1(mp_read_radix, MP_READ_RADIX), + T3(mp_read_radix, ONLY_PUBLIC_API, MP_READ_RADIX, MP_TO_RADIX), T1(mp_read_write_ubin, MP_TO_UBIN), T1(mp_read_write_sbin, MP_TO_SBIN), T1(mp_reduce_2k, MP_REDUCE_2K), diff --git a/libtommath_VS2008.vcproj b/libtommath_VS2008.vcproj index 13158a09..c90ff4b6 100644 --- a/libtommath_VS2008.vcproj +++ b/libtommath_VS2008.vcproj @@ -832,6 +832,18 @@ RelativePath="s_mp_exptmod_fast.c" > + + + + + + @@ -908,6 +920,14 @@ RelativePath="s_mp_rand_platform.c" > + + + + diff --git a/makefile b/makefile index f8feff7c..21a40483 100644 --- a/makefile +++ b/makefile @@ -44,12 +44,13 @@ mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \ mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o s_mp_div_recursive.o \ -s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o s_mp_fp_log_d.o \ -s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \ -s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \ -s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \ -s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o \ -s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o +s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_faster_read_radix.o \ +s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o \ +s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o \ +s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o \ +s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o s_mp_radix_size_overestimate.o \ +s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o s_mp_sqr.o s_mp_sqr_comba.o \ +s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o #END_INS diff --git a/makefile.mingw b/makefile.mingw index 532747be..3cd8f5b5 100644 --- a/makefile.mingw +++ b/makefile.mingw @@ -46,12 +46,13 @@ mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \ mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o s_mp_div_recursive.o \ -s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o s_mp_fp_log_d.o \ -s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \ -s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \ -s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \ -s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o \ -s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o +s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_faster_read_radix.o \ +s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o \ +s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o \ +s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o \ +s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o s_mp_radix_size_overestimate.o \ +s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o s_mp_sqr.o s_mp_sqr_comba.o \ +s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o HEADERS_PUB=tommath.h HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB) diff --git a/makefile.msvc b/makefile.msvc index 5d128549..903c3f4b 100644 --- a/makefile.msvc +++ b/makefile.msvc @@ -42,12 +42,13 @@ mp_reduce_setup.obj mp_root_n.obj mp_rshd.obj mp_sbin_size.obj mp_set.obj mp_set mp_set_l.obj mp_set_u32.obj mp_set_u64.obj mp_set_ul.obj mp_shrink.obj mp_signed_rsh.obj mp_sqrmod.obj mp_sqrt.obj \ mp_sqrtmod_prime.obj mp_sub.obj mp_sub_d.obj mp_submod.obj mp_to_radix.obj mp_to_sbin.obj mp_to_ubin.obj mp_ubin_size.obj \ mp_unpack.obj mp_xor.obj mp_zero.obj s_mp_add.obj s_mp_copy_digs.obj s_mp_div_3.obj s_mp_div_recursive.obj \ -s_mp_div_school.obj s_mp_div_small.obj s_mp_exptmod.obj s_mp_exptmod_fast.obj s_mp_fp_log.obj s_mp_fp_log_d.obj \ -s_mp_get_bit.obj s_mp_invmod.obj s_mp_invmod_odd.obj s_mp_log_2expt.obj s_mp_montgomery_reduce_comba.obj s_mp_mul.obj \ -s_mp_mul_balance.obj s_mp_mul_comba.obj s_mp_mul_high.obj s_mp_mul_high_comba.obj s_mp_mul_karatsuba.obj \ -s_mp_mul_toom.obj s_mp_prime_is_divisible.obj s_mp_prime_tab.obj s_mp_radix_map.obj \ -s_mp_radix_size_overestimate.obj s_mp_rand_platform.obj s_mp_sqr.obj s_mp_sqr_comba.obj s_mp_sqr_karatsuba.obj \ -s_mp_sqr_toom.obj s_mp_sub.obj s_mp_zero_buf.obj s_mp_zero_digs.obj +s_mp_div_school.obj s_mp_div_small.obj s_mp_exptmod.obj s_mp_exptmod_fast.obj s_mp_faster_read_radix.obj \ +s_mp_faster_to_radix.obj s_mp_floor_ilog2.obj s_mp_fp_log.obj s_mp_fp_log_d.obj s_mp_get_bit.obj s_mp_invmod.obj \ +s_mp_invmod_odd.obj s_mp_log_2expt.obj s_mp_montgomery_reduce_comba.obj s_mp_mul.obj s_mp_mul_balance.obj \ +s_mp_mul_comba.obj s_mp_mul_high.obj s_mp_mul_high_comba.obj s_mp_mul_karatsuba.obj s_mp_mul_toom.obj \ +s_mp_prime_is_divisible.obj s_mp_prime_tab.obj s_mp_radix_map.obj s_mp_radix_size_overestimate.obj \ +s_mp_rand_platform.obj s_mp_slower_read_radix.obj s_mp_slower_to_radix.obj s_mp_sqr.obj s_mp_sqr_comba.obj \ +s_mp_sqr_karatsuba.obj s_mp_sqr_toom.obj s_mp_sub.obj s_mp_zero_buf.obj s_mp_zero_digs.obj HEADERS_PUB=tommath.h HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB) diff --git a/makefile.shared b/makefile.shared index c9b93351..c48efdd6 100644 --- a/makefile.shared +++ b/makefile.shared @@ -41,12 +41,13 @@ mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \ mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o s_mp_div_recursive.o \ -s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o s_mp_fp_log_d.o \ -s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \ -s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \ -s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \ -s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o \ -s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o +s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_faster_read_radix.o \ +s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o \ +s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o \ +s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o \ +s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o s_mp_radix_size_overestimate.o \ +s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o s_mp_sqr.o s_mp_sqr_comba.o \ +s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o #END_INS diff --git a/makefile.unix b/makefile.unix index 9fe939f0..737a7337 100644 --- a/makefile.unix +++ b/makefile.unix @@ -47,12 +47,13 @@ mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \ mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o s_mp_div_recursive.o \ -s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o s_mp_fp_log_d.o \ -s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \ -s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \ -s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \ -s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o \ -s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o +s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_faster_read_radix.o \ +s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o \ +s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o \ +s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o \ +s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o s_mp_radix_size_overestimate.o \ +s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o s_mp_sqr.o s_mp_sqr_comba.o \ +s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o HEADERS_PUB=tommath.h diff --git a/mp_prime_is_prime.c b/mp_prime_is_prime.c index bb24f594..55c054e8 100644 --- a/mp_prime_is_prime.c +++ b/mp_prime_is_prime.c @@ -3,16 +3,6 @@ /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ -/* portable integer log of two with small footprint */ -static unsigned int s_floor_ilog2(int value) -{ - unsigned int r = 0; - while ((value >>= 1) != 0) { - r++; - } - return r; -} - mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result) { mp_int b; @@ -186,7 +176,7 @@ mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result) * Hence the ugly type-fiddling in the following code. */ size_a = mp_count_bits(a); - mask = (1u << s_floor_ilog2(size_a)) - 1u; + mask = (1u << s_mp_floor_ilog2((unsigned int)size_a)) - 1u; /* Assuming the General Rieman hypothesis (never thought to write that in a comment) the upper bound can be lowered to 2*(log a)^2. diff --git a/mp_read_radix.c b/mp_read_radix.c index 28e6eb60..4b4313a6 100644 --- a/mp_read_radix.c +++ b/mp_read_radix.c @@ -3,11 +3,30 @@ /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ +#ifdef MP_USE_MEMOPS +# include + +# define MP_STRLEN(s) strlen(s) +#else +static size_t s_mp_strlen(const char *s) +{ + const char *p; + p = s; + while (*p != '\0') { + p++; + } + return (size_t)(p - s); +} +# define MP_STRLEN(s) s_mp_strlen(s) +#endif + /* read a string [ASCII] in a given radix */ mp_err mp_read_radix(mp_int *a, const char *str, int radix) { + mp_err err; mp_sign sign = MP_ZPOS; + size_t slen; /* make sure the radix is ok */ if ((radix < 2) || (radix > 64)) { @@ -22,48 +41,24 @@ mp_err mp_read_radix(mp_int *a, const char *str, int radix) sign = MP_NEG; } - /* set the integer to the default of zero */ - mp_zero(a); + slen = MP_STRLEN(str); - /* process each digit of the string */ - while (*str != '\0') { - /* if the radix <= 36 the conversion is case insensitive - * this allows numbers like 1AB and 1ab to represent the same value - * [e.g. in hex] - */ - uint8_t y; - char ch = (radix <= 36) ? (char)MP_TOUPPER((int)*str) : *str; - unsigned pos = (unsigned)(ch - '+'); - if (MP_RADIX_MAP_REVERSE_SIZE <= pos) { - break; - } - y = s_mp_radix_map_reverse[pos]; + mp_zero(a); - /* if the char was found in the map - * and is less than the given radix add it - * to the number, otherwise exit the loop. - */ - if (y >= radix) { - break; - } - if ((err = mp_mul_d(a, (mp_digit)radix, a)) != MP_OKAY) { - return err; - } - if ((err = mp_add_d(a, y, a)) != MP_OKAY) { - return err; - } - ++str; - } - - /* if an illegal character was found, fail. */ - if ((*str != '\0') && (*str != '\r') && (*str != '\n')) { - return MP_VAL; + /* Try faster version first */ + if (MP_HAS(S_MP_FASTER_READ_RADIX)) { + if ((err = s_mp_faster_read_radix(a, str, 0, slen, radix)) != MP_OKAY) goto LTM_ERR; + } else if (MP_HAS(S_MP_SLOWER_READ_RADIX)) { + if ((err = s_mp_slower_read_radix(a, str, 0, slen, radix)) != MP_OKAY) goto LTM_ERR; } /* set the sign only if a != 0 */ if (!mp_iszero(a)) { a->sign = sign; } - return MP_OKAY; + +LTM_ERR: + return err; } + #endif diff --git a/mp_to_radix.c b/mp_to_radix.c index 1e5e6711..e7509efe 100644 --- a/mp_to_radix.c +++ b/mp_to_radix.c @@ -3,17 +3,6 @@ /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ -/* reverse an array, used for radix code */ -static void s_reverse(char *s, size_t len) -{ - size_t ix = 0, iy = len - 1u; - while (ix < iy) { - MP_EXCH(char, s[ix], s[iy]); - ++ix; - --iy; - } -} - /* stores a bignum as a ASCII string in a given radix (2..64) * * Stores upto "size - 1" chars and always a NULL byte, puts the number of characters @@ -21,11 +10,9 @@ static void s_reverse(char *s, size_t len) */ mp_err mp_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix) { - size_t digs; - mp_err err; - mp_int t; - mp_digit d; - char *_s = str; + mp_err err; + mp_int a_bar = *a; + size_t part_written = 0; /* check range of radix and size*/ if (maxlen < 2u) { @@ -45,50 +32,38 @@ mp_err mp_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, i return MP_OKAY; } - if ((err = mp_init_copy(&t, a)) != MP_OKAY) { - return err; - } - /* if it is negative output a - */ - if (mp_isneg(&t)) { - /* we have to reverse our digits later... but not the - sign!! */ - ++_s; - + if (mp_isneg(a)) { /* store the flag and mark the number as positive */ *str++ = '-'; - t.sign = MP_ZPOS; + a_bar.sign = MP_ZPOS; /* subtract a char */ --maxlen; } - digs = 0u; - while (!mp_iszero(&t)) { - if (--maxlen < 1u) { - /* no more room */ - err = MP_BUF; - goto LBL_ERR; - } - if ((err = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) { - goto LBL_ERR; + + /* TODO: check if it can be done better */ + if (MP_HAS(S_MP_FASTER_TO_RADIX)) { + if ((err = s_mp_faster_to_radix(&a_bar, str, maxlen, &part_written, radix)) != MP_OKAY) goto LBL_ERR; + } else { + if (MP_HAS(S_MP_SLOWER_TO_RADIX)) { + char *start = str; + if ((err = s_mp_slower_to_radix(&a_bar, &str, &maxlen, &part_written, radix, false)) != MP_OKAY) goto LBL_ERR; + str = start; + /* part_written does not count EOS */ + part_written++; } - *str++ = s_mp_radix_map[d]; - ++digs; } - /* reverse the digits of the string. In this case _s points - * to the first digit [excluding the sign] of the number - */ - s_reverse(_s, digs); - /* append a NULL so the string is properly terminated */ - *str = '\0'; - digs++; + /* TODO: Think about adding a function for base-2 radices only although + s_faster_to_radix is rather quick with such radices. */ if (written != NULL) { - *written = mp_isneg(a) ? (digs + 1u): digs; + part_written += mp_isneg(a) ? 1: 0; + *written = part_written; } LBL_ERR: - mp_clear(&t); return err; } diff --git a/s_mp_faster_read_radix.c b/s_mp_faster_read_radix.c new file mode 100644 index 00000000..f35d8947 --- /dev/null +++ b/s_mp_faster_read_radix.c @@ -0,0 +1,60 @@ +#include "tommath_private.h" +#ifdef S_MP_FASTER_READ_RADIX_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +/* TODO: It is tunable. Or is it? */ +#define MP_READ_RADIX_CUTOFF_MULTIPLICATOR 3 + +/* for(n=2,64,t = ceil(log(2^100)/log(n)); printf(t", "); ) */ +static const uint8_t s_read_radix_cutoff[65] = { 0, 0, /* 0 .. 1*/ + 100, 64, 50, 44, 39, 36, 34, 32, /* 2 .. 9 */ + 31, 29, 28, 28, 27, 26, 25, 25, /* 10 .. 17 */ + 24, 24, 24, 23, 23, 23, 22, 22, /* 18 .. 25 */ + 22, 22, 21, 21, 21, 21, 20, 20, /* 26 .. 33 */ + 20, 20, 20, 20, 20, 19, 19, 19, /* 34 .. 41 */ + 19, 19, 19, 19, 19, 19, 18, 18, /* 42 .. 49 */ + 18, 18, 18, 18, 18, 18, 18, 18, /* 51 .. 57 */ + 18, 17, 17, 17, 17, 17, 17 /* 58 .. 64 */ + }; + +mp_err s_mp_faster_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix) +{ + size_t len, mid; + mp_int A, B, m; + mp_digit radix_ = (mp_digit)radix; + mp_err err = MP_OKAY; + + len = end - start; + + if (len < (size_t)(s_read_radix_cutoff[radix] * MP_READ_RADIX_CUTOFF_MULTIPLICATOR)) { + return s_mp_slower_read_radix(a, str, start, end, radix); + } + + mid = len / 2u; + + if ((err = mp_init_set(&m, radix_)) != MP_OKAY) { + return err; + } + if ((err = mp_init_multi(&A, &B, NULL)) != MP_OKAY) { + mp_clear(&m); + return err; + } + + if ((err = s_mp_faster_read_radix(&A, str, start, start + mid + 1, radix)) != MP_OKAY) goto LTM_ERR; + if ((err = s_mp_faster_read_radix(&B, str, start + mid +1, end, radix)) != MP_OKAY) goto LTM_ERR; + + if (MP_IS_2EXPT(radix_)) { + if ((err = mp_mul_2d(&A, (int)(((len - mid) - 1u) * s_mp_floor_ilog2(radix_)), &A)) != MP_OKAY) goto LTM_ERR; + } else { + if ((err = mp_expt_n(&m, (int)((len - mid) - 1u), &m)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_mul(&A, &m, &A)) != MP_OKAY) goto LTM_ERR; + } + if ((err = mp_add(&A, &B, a)) != MP_OKAY) goto LTM_ERR; + +LTM_ERR: + mp_clear_multi(&A, &B, &m, NULL); + return err; +} + +#endif diff --git a/s_mp_faster_to_radix.c b/s_mp_faster_to_radix.c new file mode 100644 index 00000000..e45a059d --- /dev/null +++ b/s_mp_faster_to_radix.c @@ -0,0 +1,260 @@ +#include "tommath_private.h" +#ifdef S_MP_FASTER_TO_RADIX_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +/* Portable integer log of two with small footprint */ +static int32_t s_floor_ilog2(int32_t value) +{ + int r = 0; + while ((value /= 2) != 0) { + r++; + } + return r; +} + +/* Exponentiation with small footprint */ + +static int32_t s_pow(int32_t base, int32_t exponent) +{ + int32_t result = 1; + while (exponent != 0) { + if ((exponent % 2) == 1) { + result *= base; + } + exponent /= 2; + if (exponent == 0) { + break; + } + base *= base; + } + return result; +} + +#define MP_COMPUTE_ESS(T) ((int)((int32_t)((uint32_t)1 << (T)) * k)) + +static mp_err s_mp_to_radix_recursive(const mp_int *a, char **str, size_t *part_maxlen, size_t *part_written, + int radix, int32_t k, int32_t t, bool pad, bool first, mp_int *P, mp_int *R) +{ + + mp_int r, q, a1; + mp_err err; + int Beta; + + if (t < 0) { + /* Print the string from the number given*/ + if ((err = s_mp_slower_to_radix(a, str, part_maxlen, part_written, radix, pad)) != MP_OKAY) goto LTM_ERR; + + } else { + if ((err = mp_init_multi(&q, &r, &a1, NULL)) != MP_OKAY) goto LTM_ERR; + if (first) { + if ((err = mp_div(a, &P[t], &q, &r)) != MP_OKAY) goto LTM_ERR; + } else { + /* + Barrett reduction. A step by step proof can be found at + https://www.nayuki.io/page/barrett-reduction-algorithm + + See also: Modern Computer Arithmetic, version 0.5.9, page 59 + */ + + Beta = MP_COMPUTE_ESS(t+1); + + /* Q = floor(A1 * I / 2^Beta) */ + /* I = floor( (2^(2*Beta)) / B) Here we have R[t] = I, P[t] = B */ + if ((err = mp_mul(a, &R[t], &q)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_div_2d(&q, Beta, &q, NULL)) != MP_OKAY) goto LTM_ERR; + + /* R = A - Q*B */ + if ((err = mp_mul(&q, &P[t], &r)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_sub(a, &r, &r)) != MP_OKAY) goto LTM_ERR; + + /* We can use this simple correction because of the way we computed the reciprocal */ + if (r.sign == MP_NEG) { + if ((err = mp_decr(&q)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_add(&r, &P[t], &r)) != MP_OKAY) goto LTM_ERR; + } + } + /* Go down the lists while climbing up the tree. */ + t--; + + /* Follow branches */ + if (mp_iszero(&q) && (!pad)) { + if ((err = s_mp_to_radix_recursive(&r, str, part_maxlen, part_written, radix, + k, t, false, false, P, R)) != MP_OKAY) goto LTM_ERR; + } else { + if ((err = s_mp_to_radix_recursive(&q, str, part_maxlen, part_written, radix, + k, t, pad, false, P, R)) != MP_OKAY) goto LTM_ERR; + if ((err = s_mp_to_radix_recursive(&r, str, part_maxlen, part_written, radix, + k, t, true, false, P, R)) != MP_OKAY) goto LTM_ERR; + } + mp_clear_multi(&q, &r, &a1, NULL); + } + + err = MP_OKAY; +LTM_ERR: + return err; +} + +mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix) +{ + mp_err err; + int32_t n = 0, k, t = 0, steps = 0; + int ilog2a; + + /* Use given buffer directly, no temporary buffers for the individual chunks */ + char **sptr = &str; + /* Size of the chunk */ + size_t part_written = 0; + size_t part_maxlen = maxlen; + + bool num_ovf = false; + + /* List of reciprocals */ + mp_int *R = NULL; + /* List of moduli */ + mp_int *P = NULL; + + /* Denominator for the reciprocal: b^y */ + n = s_pow((int32_t)radix, (int32_t)s_mp_radix_exponent_y[radix]); + + /* Numerator of the reciprocal: ceil(log_2(n)) */ + k = s_floor_ilog2(n) + 1; + + /* steps = floor(log_2(floor(log_2(a))))*/ + ilog2a = mp_count_bits(a) - 1; + + /* Cutoff at about twice the size of P[0]. */ + if (ilog2a < (2 * k * MP_RADIX_BARRETT_START_MULTIPLICATOR)) { + if ((err = s_mp_slower_to_radix(a, sptr, &part_maxlen, &part_written, radix, false)) != MP_OKAY) goto LTM_ERR; + /* part_written does not count EOS */ + *written = part_written + 1; + return err; + } + /* + floor(log_2(floor(log_2(a)))) is not enough but we check for + the end inside the loop and the list is just a list of pointers, + not much memory wasted here. + */ + steps = s_floor_ilog2((int32_t)ilog2a) + 2; + + /* Allocate memory for list of reciprocals */ + R = (mp_int *) MP_MALLOC((size_t) steps * sizeof(mp_int)); + if (R == NULL) { + return MP_MEM; + } + /* Allocate memory for list of moduli */ + P = (mp_int *) MP_MALLOC((size_t) steps * sizeof(mp_int)); + if (P == NULL) { + MP_FREE_BUF(R, (size_t) steps * sizeof(mp_int)); + return MP_MEM; + } + + /* + The approximation to the reciprocal used in Barrett's method is + R_t = ceil(2^((2^t)*k)/n^(2^t)) + with R_0 = (2^(2*k))/b^y and k = ceil(log_2(n)) as computed above. + */ + + /* To get the tree a bit flatter. Alternative: do it iteratively instead of recursively */ + k = k * MP_RADIX_BARRETT_START_MULTIPLICATOR; + + /* Compute initial reciprocal R[0] and expand it (R[0]^(2^k) */ + if ((err = mp_init_i32(&P[0], n)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_expt_n(&P[0], MP_RADIX_BARRETT_START_MULTIPLICATOR, &P[0])) != MP_OKAY) goto LTM_ERR; + + if ((err = mp_init(&R[0])) != MP_OKAY) goto LTM_ERR; + if ((err = mp_2expt(&R[0], 2*k)) != MP_OKAY) goto LTM_ERR; + + if ((err = mp_div(&R[0], &P[0], &R[0], NULL)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_incr(&R[0])) != MP_OKAY) goto LTM_ERR; + + + /* Compute the rest of the reciprocals if as needed */ + for (t = 1; t < steps; t++) { + /* P_t = (b^y)^(2^t) = n^(2^t) */ + /* + We cannot just square because it can + a) overflow MP_MAX_DIGIT_COUNT + b) it can get bigger than "a" which it shouldn't + which also means that + c) if it gets bigger than "a" we have all necessary + reciprocals and can break out of the loop + */ + /* Check for overflow of 2^((2^t)*k) i.e. bigger than 2^MP_MAX_DIGIT_COUNT */ + if (((int)(1u << t)*k) > MP_MAX_DIGIT_COUNT) { + /* TODO: This can only happen near MP_MAX_DIGIT_COUNT and we can use + the reciprocal R[t-1] to do the division but R[t] != R[t-1]^2 + so we cannot just divide by R[t-1] twice. + + But as it is the root of the tree it is used only once and caching + makes no sense in the first place, we can divide a/P[last] directly + + This is always the case for the first division and we can do it + in general to save about half of the cache memory and a bit of + computation time by avoiding the overhead of the Barrett division. + + We can set a flag (MP_OVF is an error and it might be frowned upon + using it as a flag) or R[last] to zero (minus one) or just start + with a plain division every time as described above. + + Problem with the "always dividing directly" is that it is not known + for sure if P[t-1]^2 > a without actualy computing P[t-1]^2 but it + is a rare event that the heuristic check below fails, so the cost is + not as high as it seems. + */ + num_ovf = true; + } + + /* P[t-1]^2 > a is most likely more than just a bit or too, so check if we + can bail out early without actually computing the square. */ + if ((2 * mp_count_bits(&P[t-1]) - 4) > ilog2a) { + /* Correct index */ + t--; + break; + } + + /* Compute denominator */ + if ((err = mp_init(&P[t])) != MP_OKAY) goto LTM_ERR; + /* P[t] = P[t-1]^2 */ + if ((err = mp_sqr(&P[t-1], &P[t])) != MP_OKAY) goto LTM_ERR; + /* Check if P[t]^2 > a */ + if (mp_cmp(&P[t],a) == MP_GT) { + /* We don't need P[t] anymore */ + mp_clear(&P[t]); + /* Correct index */ + t--; + break; + } + + /* We cannot evaluate the numerator if the computation would overflow */ + if (!num_ovf) { + /* Compute numerator */ + if ((err = mp_init(&R[t])) != MP_OKAY) goto LTM_ERR; + /* R[t] = R[t] << (2^t * k) The factor cannot overflow, we checked that above */ + if ((err = mp_2expt(&(R[t]), MP_COMPUTE_ESS(t + 1))) != MP_OKAY) goto LTM_ERR; + /* Compute reciprocal */ + /* R[t] = floor(2^(2^t * k) / P[t] */ + if ((err = mp_div(&R[t], &P[t], &R[t], NULL)) != MP_OKAY) goto LTM_ERR; + /* Ceiling if P[t] is not a power of two but it is not a problem if P[t] is a power of two. */ + if ((err = mp_incr(&R[t])) != MP_OKAY) goto LTM_ERR; + } + } + + /* And finally: start the recursion. */ + if ((err = s_mp_to_radix_recursive(a, sptr, &part_maxlen, &part_written, radix, + k, t, false, num_ovf, P, R)) != MP_OKAY) goto LTM_ERR; + /* part_written does not account for EOS */ + *written = part_written + 1; + + err = MP_OKAY; +LTM_ERR: + do { + mp_clear(&P[t]); + mp_clear(&R[t]); + } while (t--); + MP_FREE_BUF(P, (size_t) steps * sizeof(mp_int)); + MP_FREE_BUF(R, (size_t) steps * sizeof(mp_int)); + return err; +} + +#endif diff --git a/s_mp_floor_ilog2.c b/s_mp_floor_ilog2.c new file mode 100644 index 00000000..ec32b133 --- /dev/null +++ b/s_mp_floor_ilog2.c @@ -0,0 +1,15 @@ +#include "tommath_private.h" +#ifdef S_MP_FLOOR_ILOG2_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +size_t s_mp_floor_ilog2(mp_word value) +{ + size_t r = 0u; + while ((value >>= 1) != 0u) { + r++; + } + return r; +} + +#endif diff --git a/s_mp_fp_log_d.c b/s_mp_fp_log_d.c index 71d82dc4..e7ce5f6e 100644 --- a/s_mp_fp_log_d.c +++ b/s_mp_fp_log_d.c @@ -3,22 +3,13 @@ /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ -static mp_word s_mp_flog2_mp_word_d(mp_word value) -{ - mp_word r = 0u; - while ((value >>= 1) != 0u) { - r++; - } - return r; -} - /* Fixed point bitwise logarithm base two of "x" with precision "p" */ static mp_err s_mp_fp_log_fraction_d(mp_word x, int p, mp_word *c) { mp_word b, L_out, L, a_bar, twoep; int i; - L = s_mp_flog2_mp_word_d(x); + L = s_mp_floor_ilog2(x); if ((L + (mp_word)p) > MP_UPPER_LIMIT_FIXED_LOG) { return MP_VAL; diff --git a/s_mp_radix_map.c b/s_mp_radix_map.c index 68e21f32..950a698f 100644 --- a/s_mp_radix_map.c +++ b/s_mp_radix_map.c @@ -15,5 +15,21 @@ const uint8_t s_mp_radix_map_reverse[] = { 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, /* ghijklmnop */ 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d /* qrstuvwxyz */ }; + MP_STATIC_ASSERT(correct_radix_map_reverse_size, sizeof(s_mp_radix_map_reverse) == MP_RADIX_MAP_REVERSE_SIZE) + +/* Exponents chosen such that b^(y) <= 2^20 */ +const uint8_t s_mp_radix_exponent_y[] = { 0, 0, /* 0 .. 1*/ + 20, 12, 10, 8, 7, 7, 6, 6, /* 2 .. 9 */ + 6, 5, 5, 5, 5, 5, 5, 4, /* 10 .. 17 */ + 4, 4, 4, 4, 4, 4, 4, 4, /* 18 .. 25 */ + 4, 4, 4, 4, 4, 4, 4, 3, /* 26 .. 33 */ + 3, 3, 3, 3, 3, 3, 3, 3, /* 34 .. 41 */ + 3, 3, 3, 3, 3, 3, 3, 3, /* 42 .. 49 */ + 3, 3, 3, 3, 3, 3, 3, 3, /* 51 .. 57 */ + 3, 3, 3, 3, 3, 3, 3 /* 58 .. 64 */ + }; + +MP_STATIC_ASSERT(correct_mp_radix_exponent_y, sizeof(s_mp_radix_exponent_y) == MP_RADIX_EXPONENT_Y_SIZE) + #endif diff --git a/s_mp_slower_read_radix.c b/s_mp_slower_read_radix.c new file mode 100644 index 00000000..852c47cb --- /dev/null +++ b/s_mp_slower_read_radix.c @@ -0,0 +1,39 @@ +#include "tommath_private.h" +#ifdef S_MP_SLOWER_READ_RADIX_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +mp_err s_mp_slower_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix) +{ + mp_err err; + size_t i; + + /* checks are done by caller */ + + char *_s = (char *)(str + start); + for (i = start; (i < end) && (*_s != '\0'); i++) { + uint8_t y; + + char ch = (radix <= 36) ? (char)MP_TOUPPER((int)*_s) : *_s; + unsigned int pos = (unsigned int)(ch - '+'); + if (MP_RADIX_MAP_REVERSE_SIZE <= pos) { + err = MP_VAL; + goto LBL_ERR; + } + y = s_mp_radix_map_reverse[pos]; + if (y >= radix) { + err = MP_VAL; + goto LBL_ERR; + } + if ((err = mp_mul_d(a, (mp_digit)radix, a)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_add_d(a, (mp_digit)y, a)) != MP_OKAY) goto LBL_ERR; + _s++; + } + + return MP_OKAY; +LBL_ERR: + mp_zero(a); + return err; +} + +#endif diff --git a/s_mp_slower_to_radix.c b/s_mp_slower_to_radix.c new file mode 100644 index 00000000..0dffb2b7 --- /dev/null +++ b/s_mp_slower_to_radix.c @@ -0,0 +1,76 @@ +#include "tommath_private.h" +#ifdef S_MP_SLOWER_TO_RADIX_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +static void s_reverse(char *s, size_t len) +{ + size_t ix = 0, iy = len - 1u; + while (ix < iy) { + MP_EXCH(char, s[ix], s[iy]); + ++ix; + --iy; + } +} + +mp_err s_mp_slower_to_radix(const mp_int *a, char **str, + size_t *part_maxlen, size_t *part_written, int radix, bool pad) +{ + size_t digs = 0u; + mp_int t; + mp_digit d; + mp_err err = MP_OKAY; + int ybar = 0; + + /* A temporary pointer to the output string to make reversal simpler */ + char *s = *str; + + /* The number of digits of "radix" to be filled if this chunk is not the most significant one. */ + if (pad) { + ybar = s_mp_radix_exponent_y[radix] * MP_RADIX_BARRETT_START_MULTIPLICATOR; + } + + if ((err = mp_init_copy(&t, a)) != MP_OKAY) goto LTM_ERR; + + while (!mp_iszero(&t)) { + if ((--(*part_maxlen)) < 1u) { + /* no more room */ + err = MP_BUF; + goto LTM_ERR; + } + if ((err = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) goto LTM_ERR; + *s++ = s_mp_radix_map[d]; + ++digs; + if (pad) { + ybar--; + } + } + + /* Fill in leading zeros if this chunk does not contain the most significant digits. */ + if (pad) { + while ((ybar-- > 0) && (((*part_maxlen)--) > 0)) { + *s++ = '0'; + digs++; + } + } + + /* "rewind" */ + s = *str; + /* reverse */ + s_reverse(s, digs); + /* step forward */ + *str += digs; + /* Add EOS at the end of every chunk to allow this function to be used stand-alone */ + **str = '\0'; + + if (part_written != NULL) { + *part_written = *part_written + digs; + } + + err = MP_OKAY; +LTM_ERR: + mp_clear(&t); + return err; +} + +#endif diff --git a/sources.cmake b/sources.cmake index bbb2aeab..87c6b982 100644 --- a/sources.cmake +++ b/sources.cmake @@ -132,6 +132,9 @@ s_mp_div_school.c s_mp_div_small.c s_mp_exptmod.c s_mp_exptmod_fast.c +s_mp_faster_read_radix.c +s_mp_faster_to_radix.c +s_mp_floor_ilog2.c s_mp_fp_log.c s_mp_fp_log_d.c s_mp_get_bit.c @@ -151,6 +154,8 @@ s_mp_prime_tab.c s_mp_radix_map.c s_mp_radix_size_overestimate.c s_mp_rand_platform.c +s_mp_slower_read_radix.c +s_mp_slower_to_radix.c s_mp_sqr.c s_mp_sqr_comba.c s_mp_sqr_karatsuba.c diff --git a/tommath_class.h b/tommath_class.h index e08bc5f3..c5edbc64 100644 --- a/tommath_class.h +++ b/tommath_class.h @@ -141,6 +141,9 @@ # define S_MP_DIV_SMALL_C # define S_MP_EXPTMOD_C # define S_MP_EXPTMOD_FAST_C +# define S_MP_FASTER_READ_RADIX_C +# define S_MP_FASTER_TO_RADIX_C +# define S_MP_FLOOR_ILOG2_C # define S_MP_FP_LOG_C # define S_MP_FP_LOG_D_C # define S_MP_GET_BIT_C @@ -160,6 +163,8 @@ # define S_MP_RADIX_MAP_C # define S_MP_RADIX_SIZE_OVERESTIMATE_C # define S_MP_RAND_PLATFORM_C +# define S_MP_SLOWER_READ_RADIX_C +# define S_MP_SLOWER_TO_RADIX_C # define S_MP_SQR_C # define S_MP_SQR_COMBA_C # define S_MP_SQR_KARATSUBA_C @@ -647,6 +652,7 @@ # define MP_RAND_C # define MP_READ_RADIX_C # define MP_SET_C +# define S_MP_FLOOR_ILOG2_C # define S_MP_PRIME_IS_DIVISIBLE_C #endif @@ -734,9 +740,10 @@ #endif #if defined(MP_READ_RADIX_C) -# define MP_ADD_D_C -# define MP_MUL_D_C # define MP_ZERO_C +# define S_MP_FASTER_READ_RADIX_C +# define S_MP_SLOWER_READ_RADIX_C +# define S_MP_STRLEN_C #endif #if defined(MP_REDUCE_C) @@ -931,9 +938,8 @@ #endif #if defined(MP_TO_RADIX_C) -# define MP_CLEAR_C -# define MP_DIV_D_C -# define MP_INIT_COPY_C +# define S_MP_FASTER_TO_RADIX_C +# define S_MP_SLOWER_TO_RADIX_C #endif #if defined(MP_TO_SBIN_C) @@ -1069,6 +1075,44 @@ # define S_MP_MONTGOMERY_REDUCE_COMBA_C #endif +#if defined(S_MP_FASTER_READ_RADIX_C) +# define MP_ADD_C +# define MP_CLEAR_C +# define MP_CLEAR_MULTI_C +# define MP_EXPT_N_C +# define MP_INIT_MULTI_C +# define MP_INIT_SET_C +# define MP_MUL_2D_C +# define MP_MUL_C +# define S_MP_FLOOR_ILOG2_C +# define S_MP_SLOWER_READ_RADIX_C +#endif + +#if defined(S_MP_FASTER_TO_RADIX_C) +# define MP_2EXPT_C +# define MP_ADD_C +# define MP_ADD_D_C +# define MP_CLEAR_C +# define MP_CLEAR_MULTI_C +# define MP_CMP_C +# define MP_COUNT_BITS_C +# define MP_DIV_2D_C +# define MP_DIV_C +# define MP_EXPT_N_C +# define MP_INIT_C +# define MP_INIT_I32_C +# define MP_INIT_MULTI_C +# define MP_MUL_C +# define MP_SUB_C +# define MP_SUB_D_C +# define S_MP_SLOWER_TO_RADIX_C +# define S_MP_TO_RADIX_RECURSIVE_C +# define S_MP_ZERO_BUF_C +#endif + +#if defined(S_MP_FLOOR_ILOG2_C) +#endif + #if defined(S_MP_FP_LOG_C) # define MP_2EXPT_C # define MP_ADD_C @@ -1091,7 +1135,7 @@ # define MP_DIV_2D_C # define MP_GET_I64_C # define MP_INIT_C -# define S_MP_FLOG2_MP_WORD_D_C +# define S_MP_FLOOR_ILOG2_C # define S_MP_FP_LOG_FRACTION_D_C #endif @@ -1234,6 +1278,18 @@ #if defined(S_MP_RAND_PLATFORM_C) #endif +#if defined(S_MP_SLOWER_READ_RADIX_C) +# define MP_ADD_D_C +# define MP_MUL_D_C +# define MP_ZERO_C +#endif + +#if defined(S_MP_SLOWER_TO_RADIX_C) +# define MP_CLEAR_C +# define MP_DIV_D_C +# define MP_INIT_COPY_C +#endif + #if defined(S_MP_SQR_C) # define MP_CLAMP_C # define MP_CLEAR_C diff --git a/tommath_private.h b/tommath_private.h index d319a1db..1c506140 100644 --- a/tommath_private.h +++ b/tommath_private.h @@ -223,16 +223,38 @@ MP_PRIVATE void s_mp_copy_digs(mp_digit *d, const mp_digit *s, int digits); MP_PRIVATE void s_mp_zero_buf(void *mem, size_t size); MP_PRIVATE void s_mp_zero_digs(mp_digit *d, int digits); MP_PRIVATE mp_err s_mp_radix_size_overestimate(const mp_int *a, const int radix, size_t *size); +MP_PRIVATE size_t s_mp_floor_ilog2(mp_word value); #define MP_PRECISION_FIXED_LOG ( (int) (((sizeof(mp_word) * CHAR_BIT) / 2) - 1)) #define MP_UPPER_LIMIT_FIXED_LOG ( (int) ( (sizeof(mp_word) * CHAR_BIT) - 1)) MP_PRIVATE mp_err s_mp_fp_log(const mp_int *a, mp_int *c) MP_WUR; MP_PRIVATE mp_err s_mp_fp_log_d(const mp_int *a, mp_word *c) MP_WUR; +/* Radix conversion O(M(n) log n) Schoenhage method */ +MP_PRIVATE mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix) MP_WUR; +MP_PRIVATE mp_err s_mp_faster_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix) MP_WUR; +/* Radix conversion O(n^2) method */ +MP_PRIVATE mp_err s_mp_slower_to_radix(const mp_int *a, char **str, size_t *part_maxlen, size_t *part_written, + int radix, bool pad) MP_WUR; +MP_PRIVATE mp_err s_mp_slower_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix) MP_WUR; + + + #define MP_RADIX_MAP_REVERSE_SIZE 80u +#define MP_RADIX_EXPONENT_Y_SIZE 65u extern MP_PRIVATE const char s_mp_radix_map[]; extern MP_PRIVATE const uint8_t s_mp_radix_map_reverse[]; extern MP_PRIVATE const mp_digit s_mp_prime_tab[]; +extern MP_PRIVATE const uint8_t s_mp_radix_exponent_y[]; + +/* + This is the value without the Newton-Raphson optimization. + Tuneable? + */ +#ifndef MP_RADIX_BARRETT_START_MULTIPLICATOR +# define MP_RADIX_BARRETT_START_MULTIPLICATOR 50 +#endif + /* number of primes */ #define MP_PRIME_TAB_SIZE 256