From c1bcc9e6a6e7a0b48479deeb41bd996aaf91a294 Mon Sep 17 00:00:00 2001 From: czurnieden Date: Thu, 24 Dec 2020 01:44:59 +0100 Subject: [PATCH 1/8] Addition of faster to_radix function --- demo/test.c | 33 +++++- mp_to_radix.c | 63 ++++------- s_mp_faster_to_radix.c | 235 +++++++++++++++++++++++++++++++++++++++++ s_mp_radix_map.c | 16 +++ s_mp_slower_to_radix.c | 77 ++++++++++++++ tommath_private.h | 17 +++ 6 files changed, 392 insertions(+), 49 deletions(-) create mode 100644 s_mp_faster_to_radix.c create mode 100644 s_mp_slower_to_radix.c diff --git a/demo/test.c b/demo/test.c index f86853644..8f2b0381d 100644 --- a/demo/test.c +++ b/demo/test.c @@ -1155,9 +1155,10 @@ static int test_mp_read_radix(void) { char buf[4096]; size_t written; + int bignum, i; - mp_int a; - DOR(mp_init_multi(&a, NULL)); + mp_int a, b; + DOR(mp_init_multi(&a, &b, NULL)); DO(mp_read_radix(&a, "123456", 10)); @@ -1183,6 +1184,30 @@ static int test_mp_read_radix(void) DO(mp_to_radix(&a, buf, sizeof(buf), &written, 10)); printf("\r '0' a == %s, length = %zu", buf, written); + /* Test the fast method with a slightly larger number */ + + /* Must be bigger than the cut-off value, of course */ + bignum = 2* (2 * s_mp_radix_exponent_y[2] * MP_RADIX_BARRETT_START_MULTIPLICATOR); + printf("Size of bignum_size = %d\n", bignum); + /* Check if "bignum" is small enough for the result to fit into "buf" + otherwise lead tester to this function */ + if (bignum >= 4096) { + fprintf(stderr, "Buffer too small, please check function \"test_mp_read_radix\" in \"test.c\""); + goto LBL_ERR; + } + /* Produce a random number */ + bignum /= MP_DIGIT_BIT; + DO(mp_rand(&b, bignum)); + /* Check if it makes the round */ + printf("Number of limbs in &b = %d, bit_count of &b = %d\n", bignum, mp_count_bits(&b)); + for (i = 2; i < 65; i++) { + DO(mp_to_radix(&b, buf, sizeof(buf), &written, i)); + DO(mp_read_radix(&a, buf, i)); + EXPECT(mp_cmp(&a, &b) == MP_EQ); + /* fprintf(stderr,"radix = %d\n",i); */ + } + + while (0) { char *s = fgets(buf, sizeof(buf), stdin); if (s != buf) break; @@ -1192,10 +1217,10 @@ static int test_mp_read_radix(void) printf("%s, %lu\n", buf, (unsigned long)a.dp[0] & 3uL); } - mp_clear(&a); + mp_clear_multi(&a, &b, NULL); return EXIT_SUCCESS; LBL_ERR: - mp_clear(&a); + mp_clear_multi(&a, &b, NULL); return EXIT_FAILURE; } diff --git a/mp_to_radix.c b/mp_to_radix.c index 1e5e67110..114902f36 100644 --- a/mp_to_radix.c +++ b/mp_to_radix.c @@ -3,17 +3,6 @@ /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ -/* reverse an array, used for radix code */ -static void s_reverse(char *s, size_t len) -{ - size_t ix = 0, iy = len - 1u; - while (ix < iy) { - MP_EXCH(char, s[ix], s[iy]); - ++ix; - --iy; - } -} - /* stores a bignum as a ASCII string in a given radix (2..64) * * Stores upto "size - 1" chars and always a NULL byte, puts the number of characters @@ -21,11 +10,9 @@ static void s_reverse(char *s, size_t len) */ mp_err mp_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix) { - size_t digs; - mp_err err; - mp_int t; - mp_digit d; - char *_s = str; + mp_err err; + mp_int a_bar = *a; + size_t part_written = 0; /* check range of radix and size*/ if (maxlen < 2u) { @@ -45,50 +32,36 @@ mp_err mp_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, i return MP_OKAY; } - if ((err = mp_init_copy(&t, a)) != MP_OKAY) { - return err; - } - /* if it is negative output a - */ - if (mp_isneg(&t)) { - /* we have to reverse our digits later... but not the - sign!! */ - ++_s; - + if (mp_isneg(a)) { /* store the flag and mark the number as positive */ *str++ = '-'; - t.sign = MP_ZPOS; + a_bar.sign = MP_ZPOS; /* subtract a char */ --maxlen; } - digs = 0u; - while (!mp_iszero(&t)) { - if (--maxlen < 1u) { - /* no more room */ - err = MP_BUF; - goto LBL_ERR; - } - if ((err = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) { - goto LBL_ERR; + + /* TODO: check if it can be done better */ + if (MP_HAS(S_MP_FASTER_TO_RADIX)) { + if ((err = s_mp_faster_to_radix(&a_bar, str, maxlen, &part_written, radix)) != MP_OKAY) goto LBL_ERR; + } else { + if (MP_HAS(S_MP_SLOWER_TO_RADIX)) { + if ((err = s_mp_slower_to_radix(&a_bar, &str, &maxlen, &part_written, radix, false)) != MP_OKAY) goto LBL_ERR; + /* part_written does not count EOS */ + part_written++; } - *str++ = s_mp_radix_map[d]; - ++digs; } - /* reverse the digits of the string. In this case _s points - * to the first digit [excluding the sign] of the number - */ - s_reverse(_s, digs); - /* append a NULL so the string is properly terminated */ - *str = '\0'; - digs++; + /* TODO: Think about adding a function for base-2 radices only although + s_faster_to_radix is rather quick with such radices. */ if (written != NULL) { - *written = mp_isneg(a) ? (digs + 1u): digs; + part_written += mp_isneg(a) ? 1: 0; + *written = part_written; } LBL_ERR: - mp_clear(&t); return err; } diff --git a/s_mp_faster_to_radix.c b/s_mp_faster_to_radix.c new file mode 100644 index 000000000..c621bf2e4 --- /dev/null +++ b/s_mp_faster_to_radix.c @@ -0,0 +1,235 @@ +#include "tommath_private.h" +#ifdef S_MP_FASTER_TO_RADIX_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +/* Portable integer log of two with small footprint */ +static int32_t s_floor_ilog2(int32_t value) +{ + int r = 0; + while ((value /= 2) != 0) { + r++; + } + return r; +} + +/* Exponentiation with small footprint */ +static int32_t s_pow(int32_t base, int32_t exponent) +{ + int32_t result = 1; + while (exponent != 0) { + if ((exponent % 2) == 1) { + result *= base; + } + exponent /= 2; + base *= base; + } + return result; +} + +static mp_err s_mp_to_radix_recursive(const mp_int *a, char **str, size_t *part_maxlen, size_t *part_written, + int radix, int32_t k, int32_t t, bool pad, mp_int *P, mp_int *R) +{ + + mp_int r, q, a1; + mp_err err; + int Beta; + + if (t < 0) { + /* Print the string from the number given*/ + if ((err = s_mp_slower_to_radix(a, str, part_maxlen, part_written, radix, pad)) != MP_OKAY) goto LTM_ERR; + + } else { + if ((err = mp_init_multi(&q, &r, &a1, NULL)) != MP_OKAY) goto LTM_ERR; + /* + Barrett reduction. A step by step proof can be found at + https://www.nayuki.io/page/barrett-reduction-algorithm + + See also: Modern Computer Arithmetic, version 0.5.9, page 59 + */ + + /* If this cast-feast looks familiar: it is the numerator from computing the reciprocal*/ + Beta = (int)((int32_t)((uint32_t)1 << (t+1)) * k); + + /* Q = floor(A1 * I / 2^Beta) */ + /* I = floor( (2^(2*Beta)) / B) Here we have R[t] = I, P[t] = B */ + if ((err = mp_mul(a, &R[t], &q)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_div_2d(&q, Beta, &q, NULL)) != MP_OKAY) goto LTM_ERR; + + /* R = A - Q*B */ + if ((err = mp_mul(&q, &P[t], &r)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_sub(a, &r, &r)) != MP_OKAY) goto LTM_ERR; + + /* We can use this simple correction because of the way we computed the reciprocal */ + if (r.sign == MP_NEG) { + if ((err = mp_decr(&q)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_add(&r, &P[t], &r)) != MP_OKAY) goto LTM_ERR; + } + + /* Go down the lists while climbing up the tree. */ + t--; + + /* Follow branches */ + if (mp_iszero(&q) && (!pad)) { + if ((err = s_mp_to_radix_recursive(&r, str, part_maxlen, part_written, radix, + k, t, false, P, R)) != MP_OKAY) goto LTM_ERR; + } else { + if ((err = s_mp_to_radix_recursive(&q, str, part_maxlen, part_written, radix, + k, t, pad, P, R)) != MP_OKAY) goto LTM_ERR; + if ((err = s_mp_to_radix_recursive(&r, str, part_maxlen, part_written, radix, + k, t, true, P, R)) != MP_OKAY) goto LTM_ERR; + } + mp_clear_multi(&q, &r, &a1, NULL); + } + + err = MP_OKAY; +LTM_ERR: + return err; +} + + +mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix) +{ + mp_err err; + int32_t n = 0, k, t = 0, steps; + int ilog2a; + + /* Use given buffer directly, no temporary buffers for the individual chunks */ + char **sptr = &str; + /* Size of the chunk */ + size_t part_written = 0; + size_t part_maxlen = maxlen; + + /* List of reciprocals */ + mp_int *R = NULL; + /* List of moduli */ + mp_int *P = NULL; + + /* Denominator for the reciprocal: b^y */ + n = s_pow((int32_t)radix, (int32_t)s_mp_radix_exponent_y[radix]); + + /* Numerator of the reciprocal: ceil(log_2(n)) */ + k = s_floor_ilog2(n) + 1; + + /* steps = floor(log_2(floor(log_2(a))))*/ + ilog2a = mp_count_bits(a) - 1; + + /* Cutoff at about twice the size of P[0]. Interestingly far below Karatsuba cut-off. */ + if (ilog2a < (2 * k * MP_RADIX_BARRETT_START_MULTIPLICATOR)) { + if ((err = s_mp_slower_to_radix(a, sptr, &part_maxlen, &part_written, radix, false)) != MP_OKAY) goto LTM_ERR; + /* part_written does not count EOS */ + *written = part_written + 1; + return err; + } + /* + floor(log_2(floor(log_2(a)))) is not enough but we check for + the end inside the loop and the list is just a list of pointers, + not much memory wasted here. + */ + steps = s_floor_ilog2((int32_t)ilog2a) + 2; + + /* Allocate memory for list of reciprocals */ + R = (mp_int *) MP_MALLOC((size_t) steps * sizeof(mp_int)); + if (R == NULL) { + return MP_MEM; + } + /* Allocate memory for list of moduli */ + P = (mp_int *) MP_MALLOC((size_t) steps * sizeof(mp_int)); + if (P == NULL) { + MP_FREE_BUF(R, (size_t) steps * sizeof(mp_int)); + return MP_MEM; + } + + /* + The approximation to the reciprocal used in Barrett's method is + R_t = ceil(2^((2^t)*k)/n^(2^t)) + with R_0 = (2^(2*k))/b^y and k = ceil(log_2(n)) as computed above. + */ + + /* To get the tree a bit flatter. Alternative: do it iteratively instead of recursively */ + k = k * MP_RADIX_BARRETT_START_MULTIPLICATOR; + + /* Compute initial reciprocal R[0] and expand it (R[0]^(2^k) */ + if ((err = mp_init_i32(&P[0], n)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_expt_n(&P[0], MP_RADIX_BARRETT_START_MULTIPLICATOR, &P[0])) != MP_OKAY) goto LTM_ERR; + + if ((err = mp_init(&R[0])) != MP_OKAY) goto LTM_ERR; + if ((err = mp_2expt(&R[0], 2*k)) != MP_OKAY) goto LTM_ERR; + + if ((err = mp_div(&R[0], &P[0], &R[0], NULL)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_incr(&R[0])) != MP_OKAY) goto LTM_ERR; + + /* Compute the rest of the reciprocals if as needed */ + for (t = 1; t < steps; t++) { + /* P_t = (b^y)^(2^t) = n^(2^t) */ + /* + We cannot just square because it can + a) overflow MP_MAX_DIGIT_COUNT + b) it can get bigger than "a" which it shouldn't + which also means that + c) if it gets bigger than "a" we have all necessary + reciprocals and can break out of the loop + */ + /* Check for overflow of 2^((2^t)*k) i.e. bigger than 2^MP_MAX_DIGIT_COUNT */ + if (((int)(1u << t)*k) > MP_MAX_DIGIT_COUNT) { + /* TODO: This can only happen near MP_MAX_DIGIT_COUNT and we can use + the reciprocal R[t-1] to do the division but R[t] != R[t-1]^2 + so we cannot just divide by R[t-1] twice. + */ + err = MP_OVF; + goto LTM_ERR; + } + + /* P[t-1]^2 > a at most likely more than just a bit or too, so check if we + can bail out early without actually computing the square. The + constant "10" is comprised of unity plus some angst-allowance */ + if ((2 * mp_count_bits(&P[t-1]) - 10) > ilog2a) { + /* Correct index */ + t--; + break; + } + + /* Compute denominator */ + if ((err = mp_init(&P[t])) != MP_OKAY) goto LTM_ERR; + /* P[t] = P[t-1]^2 */ + if ((err = mp_sqr(&P[t-1], &P[t])) != MP_OKAY) goto LTM_ERR; + /* Check if P[t]^2 > a */ + if (mp_cmp(&P[t],a) == MP_GT) { + /* We don't need P[t] anymore */ + mp_clear(&P[t]); + /* Correct index */ + t--; + break; + } + /* Compute numerator */ + if ((err = mp_init(&R[t])) != MP_OKAY) goto LTM_ERR; + + /* R[t] = R[t] << (2^t * k) The factor cannot overflow, we checked that above */ + /* TODO: these are more castings than in the ER in Mayrhofen at New Year's Eve! */ + if ((err = mp_2expt(&(R[t]), (int)((int32_t)((uint32_t)1 << (t+1)) * k))) != MP_OKAY) goto LTM_ERR; + + /* Compute reciprocal */ + /* R[t] = floor(2^(2^t * k) / P[t] */ + if ((err = mp_div(&R[t], &P[t], &R[t], NULL)) != MP_OKAY) goto LTM_ERR; + /* Ceiling if P[t] is not a power of two but it is not a problem if P[t] is a power of two. */ + if ((err = mp_incr(&R[t])) != MP_OKAY) goto LTM_ERR; + } + + /* And finally: start the recursion. */ + if ((err = s_mp_to_radix_recursive(a, sptr, &part_maxlen, &part_written, radix, + k, t, false, P, R)) != MP_OKAY) goto LTM_ERR; + /* part_written does not account for EOS */ + *written = part_written + 1; + + err = MP_OKAY; +LTM_ERR: + do { + mp_clear(&P[t]); + mp_clear(&R[t]); + } while (t--); + MP_FREE_BUF(P, (size_t) steps * sizeof(mp_int)); + MP_FREE_BUF(R, (size_t) steps * sizeof(mp_int)); + return err; +} + +#endif diff --git a/s_mp_radix_map.c b/s_mp_radix_map.c index 68e21f32e..950a698fe 100644 --- a/s_mp_radix_map.c +++ b/s_mp_radix_map.c @@ -15,5 +15,21 @@ const uint8_t s_mp_radix_map_reverse[] = { 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, /* ghijklmnop */ 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d /* qrstuvwxyz */ }; + MP_STATIC_ASSERT(correct_radix_map_reverse_size, sizeof(s_mp_radix_map_reverse) == MP_RADIX_MAP_REVERSE_SIZE) + +/* Exponents chosen such that b^(y) <= 2^20 */ +const uint8_t s_mp_radix_exponent_y[] = { 0, 0, /* 0 .. 1*/ + 20, 12, 10, 8, 7, 7, 6, 6, /* 2 .. 9 */ + 6, 5, 5, 5, 5, 5, 5, 4, /* 10 .. 17 */ + 4, 4, 4, 4, 4, 4, 4, 4, /* 18 .. 25 */ + 4, 4, 4, 4, 4, 4, 4, 3, /* 26 .. 33 */ + 3, 3, 3, 3, 3, 3, 3, 3, /* 34 .. 41 */ + 3, 3, 3, 3, 3, 3, 3, 3, /* 42 .. 49 */ + 3, 3, 3, 3, 3, 3, 3, 3, /* 51 .. 57 */ + 3, 3, 3, 3, 3, 3, 3 /* 58 .. 64 */ + }; + +MP_STATIC_ASSERT(correct_mp_radix_exponent_y, sizeof(s_mp_radix_exponent_y) == MP_RADIX_EXPONENT_Y_SIZE) + #endif diff --git a/s_mp_slower_to_radix.c b/s_mp_slower_to_radix.c new file mode 100644 index 000000000..f0f5caf52 --- /dev/null +++ b/s_mp_slower_to_radix.c @@ -0,0 +1,77 @@ +#include "tommath_private.h" +#ifdef S_MP_SLOWER_TO_RADIX_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +static void s_reverse(char *s, size_t len) +{ + size_t ix = 0, iy = len - 1u; + while (ix < iy) { + MP_EXCH(char, s[ix], s[iy]); + ++ix; + --iy; + } +} + +mp_err s_mp_slower_to_radix(const mp_int *a, char **str, + size_t *part_maxlen, size_t *part_written, int radix, bool pad) +{ + size_t digs = 0u; + mp_int t; + mp_digit d; + mp_err err = MP_OKAY; + + /* The number of digits of "radix" to be filled if this chunk is not the most significant one. */ + int ybar = s_mp_radix_exponent_y[radix] * MP_RADIX_BARRETT_START_MULTIPLICATOR; + + /* A temporary pointer to the output string to make reversal simpler */ + char *s = *str; + + /* TODO: input a is already a copy of the original and we could use it destructively? */ + if ((err = mp_init_copy(&t, a)) != MP_OKAY) goto LTM_ERR; + + while (!mp_iszero(&t)) { + /* TODO: this method to decrease "maxlen" is not threadsafe! */ + if ((--(*part_maxlen)) < 1u) { + /* no more room */ + err = MP_BUF; + goto LTM_ERR; + } + if ((err = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) { + goto LTM_ERR; + } + *s++ = s_mp_radix_map[d]; + ++digs; + ybar--; + } + + /* Fill in leading zeros if this chunk contains the most significant digits. */ + if (pad) { + while ((ybar-- > 0) && (((*part_maxlen)--) > 0)) { + *s++ = '0'; + digs++; + } + } + + /* TODO: I think that can be done more elegantly */ + /* "rewind" */ + s = *str; + /* reverse */ + s_reverse(s, digs); + /* step forward */ + *str += digs; + /* Add EOS at teh end of every chunk to allow this function to be used stand-alone */ + **str = '\0'; + + /* TODO: this method to increase "written" is not threadsafe! */ + if (part_written != NULL) { + *part_written = *part_written + digs; + } + + err = MP_OKAY; +LTM_ERR: + mp_clear(&t); + return err; +} + +#endif diff --git a/tommath_private.h b/tommath_private.h index d319a1db0..b55392c49 100644 --- a/tommath_private.h +++ b/tommath_private.h @@ -229,10 +229,27 @@ MP_PRIVATE mp_err s_mp_radix_size_overestimate(const mp_int *a, const int radix, MP_PRIVATE mp_err s_mp_fp_log(const mp_int *a, mp_int *c) MP_WUR; MP_PRIVATE mp_err s_mp_fp_log_d(const mp_int *a, mp_word *c) MP_WUR; +/* Radix conversion O(M(n) log n) Schoenhage method */ +MP_PRIVATE mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix) MP_WUR; +/* Radix conversion O(n^2) method */ +MP_PRIVATE mp_err s_mp_slower_to_radix(const mp_int *a, char **str, size_t *part_maxlen, size_t *part_written, + int radix, bool pad) MP_WUR; + #define MP_RADIX_MAP_REVERSE_SIZE 80u +#define MP_RADIX_EXPONENT_Y_SIZE 65u extern MP_PRIVATE const char s_mp_radix_map[]; extern MP_PRIVATE const uint8_t s_mp_radix_map_reverse[]; extern MP_PRIVATE const mp_digit s_mp_prime_tab[]; +extern MP_PRIVATE const uint8_t s_mp_radix_exponent_y[]; + +/* + There is not much to tune here, the steps are of the form 2^k and too large + for tuning to make a alot of sense. + */ +#ifndef MP_RADIX_BARRETT_START_MULTIPLICATOR +# define MP_RADIX_BARRETT_START_MULTIPLICATOR 8 +#endif + /* number of primes */ #define MP_PRIME_TAB_SIZE 256 From 6c9fa6b0af90c899d4a086da43bc39b6729601b8 Mon Sep 17 00:00:00 2001 From: czurnieden Date: Thu, 24 Dec 2020 04:09:09 +0100 Subject: [PATCH 2/8] Addition of faster read_radix method --- demo/test.c | 85 +++++++++++++++++++++++------ mp_read_radix.c | 58 ++++++++------------ s_mp_faster_read_radix.c | 69 +++++++++++++++++++++++ s_mp_faster_to_radix.c | 115 ++++++++++++++++++++++++--------------- s_mp_slower_read_radix.c | 39 +++++++++++++ s_mp_slower_to_radix.c | 4 +- tommath_private.h | 10 +++- 7 files changed, 279 insertions(+), 101 deletions(-) create mode 100644 s_mp_faster_read_radix.c create mode 100644 s_mp_slower_read_radix.c diff --git a/demo/test.c b/demo/test.c index 8f2b0381d..c48d5d80f 100644 --- a/demo/test.c +++ b/demo/test.c @@ -1151,12 +1151,17 @@ static int test_mp_montgomery_reduce(void) } +#include static int test_mp_read_radix(void) { char buf[4096]; - size_t written; + size_t written, maxlen; int bignum, i; + char *buffer, *bcpy; + + clock_t start, stop, t_slow, t_fast; + mp_int a, b; DOR(mp_init_multi(&a, &b, NULL)); @@ -1187,25 +1192,73 @@ static int test_mp_read_radix(void) /* Test the fast method with a slightly larger number */ /* Must be bigger than the cut-off value, of course */ - bignum = 2* (2 * s_mp_radix_exponent_y[2] * MP_RADIX_BARRETT_START_MULTIPLICATOR); - printf("Size of bignum_size = %d\n", bignum); - /* Check if "bignum" is small enough for the result to fit into "buf" - otherwise lead tester to this function */ - if (bignum >= 4096) { - fprintf(stderr, "Buffer too small, please check function \"test_mp_read_radix\" in \"test.c\""); + bignum = (2 * 20 * MP_RADIX_BARRETT_START_MULTIPLICATOR) * 10; + buffer = (char *)malloc((size_t)(bignum + 2)); + if (buffer == NULL) { goto LBL_ERR; } - /* Produce a random number */ - bignum /= MP_DIGIT_BIT; - DO(mp_rand(&b, bignum)); - /* Check if it makes the round */ - printf("Number of limbs in &b = %d, bit_count of &b = %d\n", bignum, mp_count_bits(&b)); + DO(mp_rand(&a, bignum / MP_DIGIT_BIT)); + printf("\nNumber of limbs in &b = %d, bit_count of &b = %d\n", bignum / MP_DIGIT_BIT, mp_count_bits(&a)); + start = clock(); + for (i = 2; i < 65; i++) { + /* printf("FAST radix = %d\n",i); */ + DO(mp_to_radix(&a, buffer, (size_t)(bignum + 1), &written, i)); + DO(mp_read_radix(&b, buffer, i)); + EXPECT(mp_cmp(&a, &b) == MP_EQ); + } + stop = clock(); + t_fast = stop - start; + + printf("Same number, slow radix conversions\n"); + start = clock(); for (i = 2; i < 65; i++) { - DO(mp_to_radix(&b, buf, sizeof(buf), &written, i)); - DO(mp_read_radix(&a, buf, i)); + /* printf("SLOW radix = %d\n",i); */ + maxlen = (size_t)(bignum + 1); + bcpy = buffer; + DO(s_mp_slower_to_radix(&a, &bcpy, &maxlen, &written, i, false)); + DO(s_mp_slower_read_radix(&b, bcpy, 0, strlen(bcpy), i)); EXPECT(mp_cmp(&a, &b) == MP_EQ); - /* fprintf(stderr,"radix = %d\n",i); */ } + stop = clock(); + t_slow = stop - start; + + /* It is "long int" in GLibC but can be bigger and/or even a floating point elsewhere */ + printf("SLOW: %.10f, FAST: %.10f\n", (double)t_slow/(double)CLOCKS_PER_SEC, (double)t_fast/(double)CLOCKS_PER_SEC); + + /* Check if the branching works. */ + if (MP_HAS(S_MP_FASTER_READ_RADIX) && MP_HAS(S_MP_FASTER_TO_RADIX)) { + if (t_fast > t_slow) { + fprintf(stderr, "Timing suspicious in test_mp_read_radix. No fast multiplication? Cut-off too low?\n"); + goto LBL_ERR; + } + } + + + free(buffer); + +#if ((MP_DIGIT_BIT <= 16) && (defined MP_CHECK_RADIX_OVF)) + /* Check a number of size (MP_MAX_DIGIT_COUNT * MP_DIGIT_BIT - 1) at fixed radix "10". */ + /* Will not work if test is run on platforms with larger int's because + #define MP_MAX_DIGIT_COUNT ((INT_MAX - 2) / MP_DIGIT_BIT) + So we have to replace the value for INT_MAX with 2^15 - 1 = 32767 to test 16-bit int's. Not + very elegant but it works. + */ + bignum = ((32767 - 2) / MP_DIGIT_BIT); + bignum = ((bignum - 1) * MP_DIGIT_BIT) + (MP_DIGIT_BIT - 1); + /* Manual computation because the automatic methods might not have been included in the build */ + buffer = (char *)malloc(((bignum + 2)/1000) * 333); + if (buffer == NULL) { + goto LBL_ERR; + } + DO(mp_2expt(&a, bignum)); + DO(mp_decr(&a)); + printf("Number of limbs in &b = %d, bit_count of &b = %d\n", bignum / MP_DIGIT_BIT, mp_count_bits(&a)); + DO(mp_to_radix(&a, buffer, ((bignum + 2)/1000) * 333, &written, 10)); + DO(mp_read_radix(&b, buffer, 10)); + EXPECT(mp_cmp(&a, &b) == MP_EQ); + free(buffer); +#endif + while (0) { @@ -2504,7 +2557,7 @@ static int unit_tests(int argc, char **argv) T1(mp_prime_next_prime, MP_PRIME_NEXT_PRIME), T1(mp_prime_rand, MP_PRIME_RAND), T1(mp_rand, MP_RAND), - T1(mp_read_radix, MP_READ_RADIX), + T3(mp_read_radix, ONLY_PUBLIC_API, MP_READ_RADIX, MP_TO_RADIX), T1(mp_read_write_ubin, MP_TO_UBIN), T1(mp_read_write_sbin, MP_TO_SBIN), T1(mp_reduce_2k, MP_REDUCE_2K), diff --git a/mp_read_radix.c b/mp_read_radix.c index 28e6eb60a..f02d8c592 100644 --- a/mp_read_radix.c +++ b/mp_read_radix.c @@ -3,11 +3,23 @@ /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ +static size_t s_mp_strlen(const char *s) +{ + const char *p; + p = s; + while (*p != '\0') { + p++; + } + return (size_t)(p - s); +} + /* read a string [ASCII] in a given radix */ mp_err mp_read_radix(mp_int *a, const char *str, int radix) { + mp_err err; mp_sign sign = MP_ZPOS; + size_t slen; /* make sure the radix is ok */ if ((radix < 2) || (radix > 64)) { @@ -22,48 +34,24 @@ mp_err mp_read_radix(mp_int *a, const char *str, int radix) sign = MP_NEG; } - /* set the integer to the default of zero */ - mp_zero(a); - - /* process each digit of the string */ - while (*str != '\0') { - /* if the radix <= 36 the conversion is case insensitive - * this allows numbers like 1AB and 1ab to represent the same value - * [e.g. in hex] - */ - uint8_t y; - char ch = (radix <= 36) ? (char)MP_TOUPPER((int)*str) : *str; - unsigned pos = (unsigned)(ch - '+'); - if (MP_RADIX_MAP_REVERSE_SIZE <= pos) { - break; - } - y = s_mp_radix_map_reverse[pos]; + slen = s_mp_strlen(str); - /* if the char was found in the map - * and is less than the given radix add it - * to the number, otherwise exit the loop. - */ - if (y >= radix) { - break; - } - if ((err = mp_mul_d(a, (mp_digit)radix, a)) != MP_OKAY) { - return err; - } - if ((err = mp_add_d(a, y, a)) != MP_OKAY) { - return err; - } - ++str; - } + mp_zero(a); - /* if an illegal character was found, fail. */ - if ((*str != '\0') && (*str != '\r') && (*str != '\n')) { - return MP_VAL; + /* Try faster version first */ + if (MP_HAS(S_MP_FASTER_READ_RADIX)) { + if ((err = s_mp_faster_read_radix(a, str, 0, slen, radix)) != MP_OKAY) goto LTM_ERR; + } else if (MP_HAS(S_MP_SLOWER_READ_RADIX)) { + if ((err = s_mp_slower_read_radix(a, str, 0, slen, radix)) != MP_OKAY) goto LTM_ERR; } /* set the sign only if a != 0 */ if (!mp_iszero(a)) { a->sign = sign; } - return MP_OKAY; + +LTM_ERR: + return err; } + #endif diff --git a/s_mp_faster_read_radix.c b/s_mp_faster_read_radix.c new file mode 100644 index 000000000..76c0441b9 --- /dev/null +++ b/s_mp_faster_read_radix.c @@ -0,0 +1,69 @@ +#include "tommath_private.h" +#ifdef S_MP_FASTER_READ_RADIX_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +/* TODO: It is tunable */ +#define MP_READ_RADIX_CUTOFF_MULTIPLICATOR 3 + +/* for(n=2,64,t = ceil(log(2^100)/log(n)); printf(t", "); ) */ +static const uint8_t s_read_radix_cutoff[65] = { 0, 0, /* 0 .. 1*/ + 100, 64, 50, 44, 39, 36, 34, 32, /* 2 .. 9 */ + 31, 29, 28, 28, 27, 26, 25, 25, /* 10 .. 17 */ + 24, 24, 24, 23, 23, 23, 22, 22, /* 18 .. 25 */ + 22, 22, 21, 21, 21, 21, 20, 20, /* 26 .. 33 */ + 20, 20, 20, 20, 20, 19, 19, 19, /* 34 .. 41 */ + 19, 19, 19, 19, 19, 19, 18, 18, /* 42 .. 49 */ + 18, 18, 18, 18, 18, 18, 18, 18, /* 51 .. 57 */ + 18, 17, 17, 17, 17, 17, 17 /* 58 .. 64 */ + }; + +/* This is in mp_prime_is_prime.c and can be reused */ +static int s_floor_ilog2(int value) +{ + int r = 0; + while ((value >>= 1) != 0) { + r++; + } + return r; +} + +mp_err s_mp_faster_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix) +{ + size_t len, mid; + mp_int A, B, m; + mp_err err = MP_OKAY; + + len = end - start; + + if (len < (size_t)(s_read_radix_cutoff[radix] * MP_READ_RADIX_CUTOFF_MULTIPLICATOR)) { + return s_mp_slower_read_radix(a, str, start, end, radix); + } + + mid = len / 2u; + + if ((err = mp_init_set(&m, (mp_digit)radix)) != MP_OKAY) { + return err; + } + if ((err = mp_init_multi(&A, &B, NULL)) != MP_OKAY) { + mp_clear(&m); + return err; + } + + if ((err = s_mp_slower_read_radix(&A, str, start, start + mid + 1, radix)) != MP_OKAY) goto LTM_ERR; + if ((err = s_mp_slower_read_radix(&B, str, start + mid +1, end, radix)) != MP_OKAY) goto LTM_ERR; + + if (MP_IS_2EXPT((unsigned int)radix)) { + if ((err = mp_mul_2d(&A, (int)(((len - mid) - 1u) * (size_t)s_floor_ilog2(radix)), &A)) != MP_OKAY)goto LTM_ERR; + } else { + if ((err = mp_expt_n(&m, (int)((len - mid) - 1u), &m)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_mul(&A, &m, &A)) != MP_OKAY) goto LTM_ERR; + } + if ((err = mp_add(&A, &B, a)) != MP_OKAY) goto LTM_ERR; + +LTM_ERR: + mp_clear_multi(&A, &B, &m, NULL); + return err; +} + +#endif diff --git a/s_mp_faster_to_radix.c b/s_mp_faster_to_radix.c index c621bf2e4..e45a059dd 100644 --- a/s_mp_faster_to_radix.c +++ b/s_mp_faster_to_radix.c @@ -14,6 +14,7 @@ static int32_t s_floor_ilog2(int32_t value) } /* Exponentiation with small footprint */ + static int32_t s_pow(int32_t base, int32_t exponent) { int32_t result = 1; @@ -22,13 +23,18 @@ static int32_t s_pow(int32_t base, int32_t exponent) result *= base; } exponent /= 2; + if (exponent == 0) { + break; + } base *= base; } return result; } +#define MP_COMPUTE_ESS(T) ((int)((int32_t)((uint32_t)1 << (T)) * k)) + static mp_err s_mp_to_radix_recursive(const mp_int *a, char **str, size_t *part_maxlen, size_t *part_written, - int radix, int32_t k, int32_t t, bool pad, mp_int *P, mp_int *R) + int radix, int32_t k, int32_t t, bool pad, bool first, mp_int *P, mp_int *R) { mp_int r, q, a1; @@ -41,43 +47,45 @@ static mp_err s_mp_to_radix_recursive(const mp_int *a, char **str, size_t *part_ } else { if ((err = mp_init_multi(&q, &r, &a1, NULL)) != MP_OKAY) goto LTM_ERR; - /* - Barrett reduction. A step by step proof can be found at - https://www.nayuki.io/page/barrett-reduction-algorithm + if (first) { + if ((err = mp_div(a, &P[t], &q, &r)) != MP_OKAY) goto LTM_ERR; + } else { + /* + Barrett reduction. A step by step proof can be found at + https://www.nayuki.io/page/barrett-reduction-algorithm - See also: Modern Computer Arithmetic, version 0.5.9, page 59 - */ + See also: Modern Computer Arithmetic, version 0.5.9, page 59 + */ - /* If this cast-feast looks familiar: it is the numerator from computing the reciprocal*/ - Beta = (int)((int32_t)((uint32_t)1 << (t+1)) * k); + Beta = MP_COMPUTE_ESS(t+1); - /* Q = floor(A1 * I / 2^Beta) */ - /* I = floor( (2^(2*Beta)) / B) Here we have R[t] = I, P[t] = B */ - if ((err = mp_mul(a, &R[t], &q)) != MP_OKAY) goto LTM_ERR; - if ((err = mp_div_2d(&q, Beta, &q, NULL)) != MP_OKAY) goto LTM_ERR; + /* Q = floor(A1 * I / 2^Beta) */ + /* I = floor( (2^(2*Beta)) / B) Here we have R[t] = I, P[t] = B */ + if ((err = mp_mul(a, &R[t], &q)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_div_2d(&q, Beta, &q, NULL)) != MP_OKAY) goto LTM_ERR; - /* R = A - Q*B */ - if ((err = mp_mul(&q, &P[t], &r)) != MP_OKAY) goto LTM_ERR; - if ((err = mp_sub(a, &r, &r)) != MP_OKAY) goto LTM_ERR; + /* R = A - Q*B */ + if ((err = mp_mul(&q, &P[t], &r)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_sub(a, &r, &r)) != MP_OKAY) goto LTM_ERR; - /* We can use this simple correction because of the way we computed the reciprocal */ - if (r.sign == MP_NEG) { - if ((err = mp_decr(&q)) != MP_OKAY) goto LTM_ERR; - if ((err = mp_add(&r, &P[t], &r)) != MP_OKAY) goto LTM_ERR; + /* We can use this simple correction because of the way we computed the reciprocal */ + if (r.sign == MP_NEG) { + if ((err = mp_decr(&q)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_add(&r, &P[t], &r)) != MP_OKAY) goto LTM_ERR; + } } - /* Go down the lists while climbing up the tree. */ t--; /* Follow branches */ if (mp_iszero(&q) && (!pad)) { if ((err = s_mp_to_radix_recursive(&r, str, part_maxlen, part_written, radix, - k, t, false, P, R)) != MP_OKAY) goto LTM_ERR; + k, t, false, false, P, R)) != MP_OKAY) goto LTM_ERR; } else { if ((err = s_mp_to_radix_recursive(&q, str, part_maxlen, part_written, radix, - k, t, pad, P, R)) != MP_OKAY) goto LTM_ERR; + k, t, pad, false, P, R)) != MP_OKAY) goto LTM_ERR; if ((err = s_mp_to_radix_recursive(&r, str, part_maxlen, part_written, radix, - k, t, true, P, R)) != MP_OKAY) goto LTM_ERR; + k, t, true, false, P, R)) != MP_OKAY) goto LTM_ERR; } mp_clear_multi(&q, &r, &a1, NULL); } @@ -87,11 +95,10 @@ static mp_err s_mp_to_radix_recursive(const mp_int *a, char **str, size_t *part_ return err; } - mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix) { mp_err err; - int32_t n = 0, k, t = 0, steps; + int32_t n = 0, k, t = 0, steps = 0; int ilog2a; /* Use given buffer directly, no temporary buffers for the individual chunks */ @@ -100,6 +107,8 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w size_t part_written = 0; size_t part_maxlen = maxlen; + bool num_ovf = false; + /* List of reciprocals */ mp_int *R = NULL; /* List of moduli */ @@ -114,7 +123,7 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w /* steps = floor(log_2(floor(log_2(a))))*/ ilog2a = mp_count_bits(a) - 1; - /* Cutoff at about twice the size of P[0]. Interestingly far below Karatsuba cut-off. */ + /* Cutoff at about twice the size of P[0]. */ if (ilog2a < (2 * k * MP_RADIX_BARRETT_START_MULTIPLICATOR)) { if ((err = s_mp_slower_to_radix(a, sptr, &part_maxlen, &part_written, radix, false)) != MP_OKAY) goto LTM_ERR; /* part_written does not count EOS */ @@ -159,6 +168,7 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w if ((err = mp_div(&R[0], &P[0], &R[0], NULL)) != MP_OKAY) goto LTM_ERR; if ((err = mp_incr(&R[0])) != MP_OKAY) goto LTM_ERR; + /* Compute the rest of the reciprocals if as needed */ for (t = 1; t < steps; t++) { /* P_t = (b^y)^(2^t) = n^(2^t) */ @@ -175,15 +185,29 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w /* TODO: This can only happen near MP_MAX_DIGIT_COUNT and we can use the reciprocal R[t-1] to do the division but R[t] != R[t-1]^2 so we cannot just divide by R[t-1] twice. + + But as it is the root of the tree it is used only once and caching + makes no sense in the first place, we can divide a/P[last] directly + + This is always the case for the first division and we can do it + in general to save about half of the cache memory and a bit of + computation time by avoiding the overhead of the Barrett division. + + We can set a flag (MP_OVF is an error and it might be frowned upon + using it as a flag) or R[last] to zero (minus one) or just start + with a plain division every time as described above. + + Problem with the "always dividing directly" is that it is not known + for sure if P[t-1]^2 > a without actualy computing P[t-1]^2 but it + is a rare event that the heuristic check below fails, so the cost is + not as high as it seems. */ - err = MP_OVF; - goto LTM_ERR; + num_ovf = true; } - /* P[t-1]^2 > a at most likely more than just a bit or too, so check if we - can bail out early without actually computing the square. The - constant "10" is comprised of unity plus some angst-allowance */ - if ((2 * mp_count_bits(&P[t-1]) - 10) > ilog2a) { + /* P[t-1]^2 > a is most likely more than just a bit or too, so check if we + can bail out early without actually computing the square. */ + if ((2 * mp_count_bits(&P[t-1]) - 4) > ilog2a) { /* Correct index */ t--; break; @@ -201,23 +225,24 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w t--; break; } - /* Compute numerator */ - if ((err = mp_init(&R[t])) != MP_OKAY) goto LTM_ERR; - - /* R[t] = R[t] << (2^t * k) The factor cannot overflow, we checked that above */ - /* TODO: these are more castings than in the ER in Mayrhofen at New Year's Eve! */ - if ((err = mp_2expt(&(R[t]), (int)((int32_t)((uint32_t)1 << (t+1)) * k))) != MP_OKAY) goto LTM_ERR; - - /* Compute reciprocal */ - /* R[t] = floor(2^(2^t * k) / P[t] */ - if ((err = mp_div(&R[t], &P[t], &R[t], NULL)) != MP_OKAY) goto LTM_ERR; - /* Ceiling if P[t] is not a power of two but it is not a problem if P[t] is a power of two. */ - if ((err = mp_incr(&R[t])) != MP_OKAY) goto LTM_ERR; + + /* We cannot evaluate the numerator if the computation would overflow */ + if (!num_ovf) { + /* Compute numerator */ + if ((err = mp_init(&R[t])) != MP_OKAY) goto LTM_ERR; + /* R[t] = R[t] << (2^t * k) The factor cannot overflow, we checked that above */ + if ((err = mp_2expt(&(R[t]), MP_COMPUTE_ESS(t + 1))) != MP_OKAY) goto LTM_ERR; + /* Compute reciprocal */ + /* R[t] = floor(2^(2^t * k) / P[t] */ + if ((err = mp_div(&R[t], &P[t], &R[t], NULL)) != MP_OKAY) goto LTM_ERR; + /* Ceiling if P[t] is not a power of two but it is not a problem if P[t] is a power of two. */ + if ((err = mp_incr(&R[t])) != MP_OKAY) goto LTM_ERR; + } } /* And finally: start the recursion. */ if ((err = s_mp_to_radix_recursive(a, sptr, &part_maxlen, &part_written, radix, - k, t, false, P, R)) != MP_OKAY) goto LTM_ERR; + k, t, false, num_ovf, P, R)) != MP_OKAY) goto LTM_ERR; /* part_written does not account for EOS */ *written = part_written + 1; diff --git a/s_mp_slower_read_radix.c b/s_mp_slower_read_radix.c new file mode 100644 index 000000000..852c47cb4 --- /dev/null +++ b/s_mp_slower_read_radix.c @@ -0,0 +1,39 @@ +#include "tommath_private.h" +#ifdef S_MP_SLOWER_READ_RADIX_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +mp_err s_mp_slower_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix) +{ + mp_err err; + size_t i; + + /* checks are done by caller */ + + char *_s = (char *)(str + start); + for (i = start; (i < end) && (*_s != '\0'); i++) { + uint8_t y; + + char ch = (radix <= 36) ? (char)MP_TOUPPER((int)*_s) : *_s; + unsigned int pos = (unsigned int)(ch - '+'); + if (MP_RADIX_MAP_REVERSE_SIZE <= pos) { + err = MP_VAL; + goto LBL_ERR; + } + y = s_mp_radix_map_reverse[pos]; + if (y >= radix) { + err = MP_VAL; + goto LBL_ERR; + } + if ((err = mp_mul_d(a, (mp_digit)radix, a)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_add_d(a, (mp_digit)y, a)) != MP_OKAY) goto LBL_ERR; + _s++; + } + + return MP_OKAY; +LBL_ERR: + mp_zero(a); + return err; +} + +#endif diff --git a/s_mp_slower_to_radix.c b/s_mp_slower_to_radix.c index f0f5caf52..6dacb13a1 100644 --- a/s_mp_slower_to_radix.c +++ b/s_mp_slower_to_radix.c @@ -45,7 +45,7 @@ mp_err s_mp_slower_to_radix(const mp_int *a, char **str, ybar--; } - /* Fill in leading zeros if this chunk contains the most significant digits. */ + /* Fill in leading zeros if this chunk does not contain the most significant digits. */ if (pad) { while ((ybar-- > 0) && (((*part_maxlen)--) > 0)) { *s++ = '0'; @@ -60,7 +60,7 @@ mp_err s_mp_slower_to_radix(const mp_int *a, char **str, s_reverse(s, digs); /* step forward */ *str += digs; - /* Add EOS at teh end of every chunk to allow this function to be used stand-alone */ + /* Add EOS at the end of every chunk to allow this function to be used stand-alone */ **str = '\0'; /* TODO: this method to increase "written" is not threadsafe! */ diff --git a/tommath_private.h b/tommath_private.h index b55392c49..b510be254 100644 --- a/tommath_private.h +++ b/tommath_private.h @@ -231,9 +231,13 @@ MP_PRIVATE mp_err s_mp_fp_log_d(const mp_int *a, mp_word *c) MP_WUR; /* Radix conversion O(M(n) log n) Schoenhage method */ MP_PRIVATE mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix) MP_WUR; +MP_PRIVATE mp_err s_mp_faster_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix) MP_WUR; /* Radix conversion O(n^2) method */ MP_PRIVATE mp_err s_mp_slower_to_radix(const mp_int *a, char **str, size_t *part_maxlen, size_t *part_written, int radix, bool pad) MP_WUR; +MP_PRIVATE mp_err s_mp_slower_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix) MP_WUR; + + #define MP_RADIX_MAP_REVERSE_SIZE 80u #define MP_RADIX_EXPONENT_Y_SIZE 65u @@ -243,11 +247,11 @@ extern MP_PRIVATE const mp_digit s_mp_prime_tab[]; extern MP_PRIVATE const uint8_t s_mp_radix_exponent_y[]; /* - There is not much to tune here, the steps are of the form 2^k and too large - for tuning to make a alot of sense. + This is the value without the Newton-Raphson optimization. + Tuneable? */ #ifndef MP_RADIX_BARRETT_START_MULTIPLICATOR -# define MP_RADIX_BARRETT_START_MULTIPLICATOR 8 +# define MP_RADIX_BARRETT_START_MULTIPLICATOR 50 #endif From 11e735039e5eeabf68a0fa2f5a6e09a3920e4097 Mon Sep 17 00:00:00 2001 From: Steffen Jaeckel Date: Tue, 11 Apr 2023 14:36:40 +0200 Subject: [PATCH 3/8] add option to use standard `strlen()` instead of home-baked version Signed-off-by: Steffen Jaeckel --- mp_read_radix.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mp_read_radix.c b/mp_read_radix.c index f02d8c592..4b4313a68 100644 --- a/mp_read_radix.c +++ b/mp_read_radix.c @@ -3,6 +3,11 @@ /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ +#ifdef MP_USE_MEMOPS +# include + +# define MP_STRLEN(s) strlen(s) +#else static size_t s_mp_strlen(const char *s) { const char *p; @@ -12,6 +17,8 @@ static size_t s_mp_strlen(const char *s) } return (size_t)(p - s); } +# define MP_STRLEN(s) s_mp_strlen(s) +#endif /* read a string [ASCII] in a given radix */ mp_err mp_read_radix(mp_int *a, const char *str, int radix) @@ -34,7 +41,7 @@ mp_err mp_read_radix(mp_int *a, const char *str, int radix) sign = MP_NEG; } - slen = s_mp_strlen(str); + slen = MP_STRLEN(str); mp_zero(a); From fec4f5ca4c9c69e247c9ab669c3d1420ccd23682 Mon Sep 17 00:00:00 2001 From: Steffen Jaeckel Date: Tue, 11 Apr 2023 16:08:55 +0200 Subject: [PATCH 4/8] add `s_mp_floor_ilog2()` instead of all those copies Signed-off-by: Steffen Jaeckel --- libtommath_VS2008.vcproj | 20 ++++++++++++ makefile | 13 ++++---- makefile.mingw | 13 ++++---- makefile.msvc | 13 ++++---- makefile.shared | 13 ++++---- makefile.unix | 13 ++++---- mp_prime_is_prime.c | 12 +------ s_mp_faster_read_radix.c | 17 +++------- s_mp_floor_ilog2.c | 15 +++++++++ s_mp_fp_log_d.c | 11 +------ sources.cmake | 5 +++ tommath_class.h | 68 ++++++++++++++++++++++++++++++++++++---- tommath_private.h | 1 + 13 files changed, 144 insertions(+), 70 deletions(-) create mode 100644 s_mp_floor_ilog2.c diff --git a/libtommath_VS2008.vcproj b/libtommath_VS2008.vcproj index 13158a09d..c90ff4b6f 100644 --- a/libtommath_VS2008.vcproj +++ b/libtommath_VS2008.vcproj @@ -832,6 +832,18 @@ RelativePath="s_mp_exptmod_fast.c" > + + + + + + @@ -908,6 +920,14 @@ RelativePath="s_mp_rand_platform.c" > + + + + diff --git a/makefile b/makefile index f8feff7cc..21a40483f 100644 --- a/makefile +++ b/makefile @@ -44,12 +44,13 @@ mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \ mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o s_mp_div_recursive.o \ -s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o s_mp_fp_log_d.o \ -s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \ -s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \ -s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \ -s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o \ -s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o +s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_faster_read_radix.o \ +s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o \ +s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o \ +s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o \ +s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o s_mp_radix_size_overestimate.o \ +s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o s_mp_sqr.o s_mp_sqr_comba.o \ +s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o #END_INS diff --git a/makefile.mingw b/makefile.mingw index 532747be0..3cd8f5b53 100644 --- a/makefile.mingw +++ b/makefile.mingw @@ -46,12 +46,13 @@ mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \ mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o s_mp_div_recursive.o \ -s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o s_mp_fp_log_d.o \ -s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \ -s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \ -s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \ -s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o \ -s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o +s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_faster_read_radix.o \ +s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o \ +s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o \ +s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o \ +s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o s_mp_radix_size_overestimate.o \ +s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o s_mp_sqr.o s_mp_sqr_comba.o \ +s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o HEADERS_PUB=tommath.h HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB) diff --git a/makefile.msvc b/makefile.msvc index 5d1285490..903c3f4b1 100644 --- a/makefile.msvc +++ b/makefile.msvc @@ -42,12 +42,13 @@ mp_reduce_setup.obj mp_root_n.obj mp_rshd.obj mp_sbin_size.obj mp_set.obj mp_set mp_set_l.obj mp_set_u32.obj mp_set_u64.obj mp_set_ul.obj mp_shrink.obj mp_signed_rsh.obj mp_sqrmod.obj mp_sqrt.obj \ mp_sqrtmod_prime.obj mp_sub.obj mp_sub_d.obj mp_submod.obj mp_to_radix.obj mp_to_sbin.obj mp_to_ubin.obj mp_ubin_size.obj \ mp_unpack.obj mp_xor.obj mp_zero.obj s_mp_add.obj s_mp_copy_digs.obj s_mp_div_3.obj s_mp_div_recursive.obj \ -s_mp_div_school.obj s_mp_div_small.obj s_mp_exptmod.obj s_mp_exptmod_fast.obj s_mp_fp_log.obj s_mp_fp_log_d.obj \ -s_mp_get_bit.obj s_mp_invmod.obj s_mp_invmod_odd.obj s_mp_log_2expt.obj s_mp_montgomery_reduce_comba.obj s_mp_mul.obj \ -s_mp_mul_balance.obj s_mp_mul_comba.obj s_mp_mul_high.obj s_mp_mul_high_comba.obj s_mp_mul_karatsuba.obj \ -s_mp_mul_toom.obj s_mp_prime_is_divisible.obj s_mp_prime_tab.obj s_mp_radix_map.obj \ -s_mp_radix_size_overestimate.obj s_mp_rand_platform.obj s_mp_sqr.obj s_mp_sqr_comba.obj s_mp_sqr_karatsuba.obj \ -s_mp_sqr_toom.obj s_mp_sub.obj s_mp_zero_buf.obj s_mp_zero_digs.obj +s_mp_div_school.obj s_mp_div_small.obj s_mp_exptmod.obj s_mp_exptmod_fast.obj s_mp_faster_read_radix.obj \ +s_mp_faster_to_radix.obj s_mp_floor_ilog2.obj s_mp_fp_log.obj s_mp_fp_log_d.obj s_mp_get_bit.obj s_mp_invmod.obj \ +s_mp_invmod_odd.obj s_mp_log_2expt.obj s_mp_montgomery_reduce_comba.obj s_mp_mul.obj s_mp_mul_balance.obj \ +s_mp_mul_comba.obj s_mp_mul_high.obj s_mp_mul_high_comba.obj s_mp_mul_karatsuba.obj s_mp_mul_toom.obj \ +s_mp_prime_is_divisible.obj s_mp_prime_tab.obj s_mp_radix_map.obj s_mp_radix_size_overestimate.obj \ +s_mp_rand_platform.obj s_mp_slower_read_radix.obj s_mp_slower_to_radix.obj s_mp_sqr.obj s_mp_sqr_comba.obj \ +s_mp_sqr_karatsuba.obj s_mp_sqr_toom.obj s_mp_sub.obj s_mp_zero_buf.obj s_mp_zero_digs.obj HEADERS_PUB=tommath.h HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB) diff --git a/makefile.shared b/makefile.shared index c9b933513..c48efdd6a 100644 --- a/makefile.shared +++ b/makefile.shared @@ -41,12 +41,13 @@ mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \ mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o s_mp_div_recursive.o \ -s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o s_mp_fp_log_d.o \ -s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \ -s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \ -s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \ -s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o \ -s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o +s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_faster_read_radix.o \ +s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o \ +s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o \ +s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o \ +s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o s_mp_radix_size_overestimate.o \ +s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o s_mp_sqr.o s_mp_sqr_comba.o \ +s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o #END_INS diff --git a/makefile.unix b/makefile.unix index 9fe939f04..737a73377 100644 --- a/makefile.unix +++ b/makefile.unix @@ -47,12 +47,13 @@ mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \ mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o s_mp_div_recursive.o \ -s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o s_mp_fp_log_d.o \ -s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \ -s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \ -s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \ -s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o \ -s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o +s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_faster_read_radix.o \ +s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o \ +s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o \ +s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o \ +s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o s_mp_radix_size_overestimate.o \ +s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o s_mp_sqr.o s_mp_sqr_comba.o \ +s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o HEADERS_PUB=tommath.h diff --git a/mp_prime_is_prime.c b/mp_prime_is_prime.c index bb24f5944..55c054e8a 100644 --- a/mp_prime_is_prime.c +++ b/mp_prime_is_prime.c @@ -3,16 +3,6 @@ /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ -/* portable integer log of two with small footprint */ -static unsigned int s_floor_ilog2(int value) -{ - unsigned int r = 0; - while ((value >>= 1) != 0) { - r++; - } - return r; -} - mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result) { mp_int b; @@ -186,7 +176,7 @@ mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result) * Hence the ugly type-fiddling in the following code. */ size_a = mp_count_bits(a); - mask = (1u << s_floor_ilog2(size_a)) - 1u; + mask = (1u << s_mp_floor_ilog2((unsigned int)size_a)) - 1u; /* Assuming the General Rieman hypothesis (never thought to write that in a comment) the upper bound can be lowered to 2*(log a)^2. diff --git a/s_mp_faster_read_radix.c b/s_mp_faster_read_radix.c index 76c0441b9..0843c0da4 100644 --- a/s_mp_faster_read_radix.c +++ b/s_mp_faster_read_radix.c @@ -18,20 +18,11 @@ static const uint8_t s_read_radix_cutoff[65] = { 0, 0, 18, 17, 17, 17, 17, 17, 17 /* 58 .. 64 */ }; -/* This is in mp_prime_is_prime.c and can be reused */ -static int s_floor_ilog2(int value) -{ - int r = 0; - while ((value >>= 1) != 0) { - r++; - } - return r; -} - mp_err s_mp_faster_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix) { size_t len, mid; mp_int A, B, m; + mp_digit radix_ = (mp_digit)radix; mp_err err = MP_OKAY; len = end - start; @@ -42,7 +33,7 @@ mp_err s_mp_faster_read_radix(mp_int *a, const char *str, size_t start, size_t e mid = len / 2u; - if ((err = mp_init_set(&m, (mp_digit)radix)) != MP_OKAY) { + if ((err = mp_init_set(&m, radix_)) != MP_OKAY) { return err; } if ((err = mp_init_multi(&A, &B, NULL)) != MP_OKAY) { @@ -53,8 +44,8 @@ mp_err s_mp_faster_read_radix(mp_int *a, const char *str, size_t start, size_t e if ((err = s_mp_slower_read_radix(&A, str, start, start + mid + 1, radix)) != MP_OKAY) goto LTM_ERR; if ((err = s_mp_slower_read_radix(&B, str, start + mid +1, end, radix)) != MP_OKAY) goto LTM_ERR; - if (MP_IS_2EXPT((unsigned int)radix)) { - if ((err = mp_mul_2d(&A, (int)(((len - mid) - 1u) * (size_t)s_floor_ilog2(radix)), &A)) != MP_OKAY)goto LTM_ERR; + if (MP_IS_2EXPT(radix_)) { + if ((err = mp_mul_2d(&A, (int)(((len - mid) - 1u) * s_mp_floor_ilog2(radix_)), &A)) != MP_OKAY) goto LTM_ERR; } else { if ((err = mp_expt_n(&m, (int)((len - mid) - 1u), &m)) != MP_OKAY) goto LTM_ERR; if ((err = mp_mul(&A, &m, &A)) != MP_OKAY) goto LTM_ERR; diff --git a/s_mp_floor_ilog2.c b/s_mp_floor_ilog2.c new file mode 100644 index 000000000..ec32b1337 --- /dev/null +++ b/s_mp_floor_ilog2.c @@ -0,0 +1,15 @@ +#include "tommath_private.h" +#ifdef S_MP_FLOOR_ILOG2_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +size_t s_mp_floor_ilog2(mp_word value) +{ + size_t r = 0u; + while ((value >>= 1) != 0u) { + r++; + } + return r; +} + +#endif diff --git a/s_mp_fp_log_d.c b/s_mp_fp_log_d.c index 71d82dc41..e7ce5f6e9 100644 --- a/s_mp_fp_log_d.c +++ b/s_mp_fp_log_d.c @@ -3,22 +3,13 @@ /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ -static mp_word s_mp_flog2_mp_word_d(mp_word value) -{ - mp_word r = 0u; - while ((value >>= 1) != 0u) { - r++; - } - return r; -} - /* Fixed point bitwise logarithm base two of "x" with precision "p" */ static mp_err s_mp_fp_log_fraction_d(mp_word x, int p, mp_word *c) { mp_word b, L_out, L, a_bar, twoep; int i; - L = s_mp_flog2_mp_word_d(x); + L = s_mp_floor_ilog2(x); if ((L + (mp_word)p) > MP_UPPER_LIMIT_FIXED_LOG) { return MP_VAL; diff --git a/sources.cmake b/sources.cmake index bbb2aeab6..87c6b9820 100644 --- a/sources.cmake +++ b/sources.cmake @@ -132,6 +132,9 @@ s_mp_div_school.c s_mp_div_small.c s_mp_exptmod.c s_mp_exptmod_fast.c +s_mp_faster_read_radix.c +s_mp_faster_to_radix.c +s_mp_floor_ilog2.c s_mp_fp_log.c s_mp_fp_log_d.c s_mp_get_bit.c @@ -151,6 +154,8 @@ s_mp_prime_tab.c s_mp_radix_map.c s_mp_radix_size_overestimate.c s_mp_rand_platform.c +s_mp_slower_read_radix.c +s_mp_slower_to_radix.c s_mp_sqr.c s_mp_sqr_comba.c s_mp_sqr_karatsuba.c diff --git a/tommath_class.h b/tommath_class.h index e08bc5f3c..c5edbc641 100644 --- a/tommath_class.h +++ b/tommath_class.h @@ -141,6 +141,9 @@ # define S_MP_DIV_SMALL_C # define S_MP_EXPTMOD_C # define S_MP_EXPTMOD_FAST_C +# define S_MP_FASTER_READ_RADIX_C +# define S_MP_FASTER_TO_RADIX_C +# define S_MP_FLOOR_ILOG2_C # define S_MP_FP_LOG_C # define S_MP_FP_LOG_D_C # define S_MP_GET_BIT_C @@ -160,6 +163,8 @@ # define S_MP_RADIX_MAP_C # define S_MP_RADIX_SIZE_OVERESTIMATE_C # define S_MP_RAND_PLATFORM_C +# define S_MP_SLOWER_READ_RADIX_C +# define S_MP_SLOWER_TO_RADIX_C # define S_MP_SQR_C # define S_MP_SQR_COMBA_C # define S_MP_SQR_KARATSUBA_C @@ -647,6 +652,7 @@ # define MP_RAND_C # define MP_READ_RADIX_C # define MP_SET_C +# define S_MP_FLOOR_ILOG2_C # define S_MP_PRIME_IS_DIVISIBLE_C #endif @@ -734,9 +740,10 @@ #endif #if defined(MP_READ_RADIX_C) -# define MP_ADD_D_C -# define MP_MUL_D_C # define MP_ZERO_C +# define S_MP_FASTER_READ_RADIX_C +# define S_MP_SLOWER_READ_RADIX_C +# define S_MP_STRLEN_C #endif #if defined(MP_REDUCE_C) @@ -931,9 +938,8 @@ #endif #if defined(MP_TO_RADIX_C) -# define MP_CLEAR_C -# define MP_DIV_D_C -# define MP_INIT_COPY_C +# define S_MP_FASTER_TO_RADIX_C +# define S_MP_SLOWER_TO_RADIX_C #endif #if defined(MP_TO_SBIN_C) @@ -1069,6 +1075,44 @@ # define S_MP_MONTGOMERY_REDUCE_COMBA_C #endif +#if defined(S_MP_FASTER_READ_RADIX_C) +# define MP_ADD_C +# define MP_CLEAR_C +# define MP_CLEAR_MULTI_C +# define MP_EXPT_N_C +# define MP_INIT_MULTI_C +# define MP_INIT_SET_C +# define MP_MUL_2D_C +# define MP_MUL_C +# define S_MP_FLOOR_ILOG2_C +# define S_MP_SLOWER_READ_RADIX_C +#endif + +#if defined(S_MP_FASTER_TO_RADIX_C) +# define MP_2EXPT_C +# define MP_ADD_C +# define MP_ADD_D_C +# define MP_CLEAR_C +# define MP_CLEAR_MULTI_C +# define MP_CMP_C +# define MP_COUNT_BITS_C +# define MP_DIV_2D_C +# define MP_DIV_C +# define MP_EXPT_N_C +# define MP_INIT_C +# define MP_INIT_I32_C +# define MP_INIT_MULTI_C +# define MP_MUL_C +# define MP_SUB_C +# define MP_SUB_D_C +# define S_MP_SLOWER_TO_RADIX_C +# define S_MP_TO_RADIX_RECURSIVE_C +# define S_MP_ZERO_BUF_C +#endif + +#if defined(S_MP_FLOOR_ILOG2_C) +#endif + #if defined(S_MP_FP_LOG_C) # define MP_2EXPT_C # define MP_ADD_C @@ -1091,7 +1135,7 @@ # define MP_DIV_2D_C # define MP_GET_I64_C # define MP_INIT_C -# define S_MP_FLOG2_MP_WORD_D_C +# define S_MP_FLOOR_ILOG2_C # define S_MP_FP_LOG_FRACTION_D_C #endif @@ -1234,6 +1278,18 @@ #if defined(S_MP_RAND_PLATFORM_C) #endif +#if defined(S_MP_SLOWER_READ_RADIX_C) +# define MP_ADD_D_C +# define MP_MUL_D_C +# define MP_ZERO_C +#endif + +#if defined(S_MP_SLOWER_TO_RADIX_C) +# define MP_CLEAR_C +# define MP_DIV_D_C +# define MP_INIT_COPY_C +#endif + #if defined(S_MP_SQR_C) # define MP_CLAMP_C # define MP_CLEAR_C diff --git a/tommath_private.h b/tommath_private.h index b510be254..1c5061404 100644 --- a/tommath_private.h +++ b/tommath_private.h @@ -223,6 +223,7 @@ MP_PRIVATE void s_mp_copy_digs(mp_digit *d, const mp_digit *s, int digits); MP_PRIVATE void s_mp_zero_buf(void *mem, size_t size); MP_PRIVATE void s_mp_zero_digs(mp_digit *d, int digits); MP_PRIVATE mp_err s_mp_radix_size_overestimate(const mp_int *a, const int radix, size_t *size); +MP_PRIVATE size_t s_mp_floor_ilog2(mp_word value); #define MP_PRECISION_FIXED_LOG ( (int) (((sizeof(mp_word) * CHAR_BIT) / 2) - 1)) #define MP_UPPER_LIMIT_FIXED_LOG ( (int) ( (sizeof(mp_word) * CHAR_BIT) - 1)) From f7d5d32b3ab44cc21d1e14ac46d497875c2513a7 Mon Sep 17 00:00:00 2001 From: czurnieden Date: Wed, 21 Jun 2023 15:21:05 +0200 Subject: [PATCH 5/8] Print additional debugging information in test.c --- demo/test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/demo/test.c b/demo/test.c index c48d5d80f..c3983fd2b 100644 --- a/demo/test.c +++ b/demo/test.c @@ -1223,12 +1223,13 @@ static int test_mp_read_radix(void) t_slow = stop - start; /* It is "long int" in GLibC but can be bigger and/or even a floating point elsewhere */ - printf("SLOW: %.10f, FAST: %.10f\n", (double)t_slow/(double)CLOCKS_PER_SEC, (double)t_fast/(double)CLOCKS_PER_SEC); + fprintf(stderr,"SLOW: %.10f, FAST: %.10f\n", (double)t_slow/(double)CLOCKS_PER_SEC, (double)t_fast/(double)CLOCKS_PER_SEC); /* Check if the branching works. */ if (MP_HAS(S_MP_FASTER_READ_RADIX) && MP_HAS(S_MP_FASTER_TO_RADIX)) { if (t_fast > t_slow) { fprintf(stderr, "Timing suspicious in test_mp_read_radix. No fast multiplication? Cut-off too low?\n"); + DO(mp_fwrite(&a, 16, stderr)); goto LBL_ERR; } } From 7d8209acb379cc91dce0adf9eeeced8f8697f80a Mon Sep 17 00:00:00 2001 From: czurnieden Date: Wed, 21 Jun 2023 15:22:15 +0200 Subject: [PATCH 6/8] formatted --- demo/test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/demo/test.c b/demo/test.c index c3983fd2b..c793de897 100644 --- a/demo/test.c +++ b/demo/test.c @@ -1223,7 +1223,8 @@ static int test_mp_read_radix(void) t_slow = stop - start; /* It is "long int" in GLibC but can be bigger and/or even a floating point elsewhere */ - fprintf(stderr,"SLOW: %.10f, FAST: %.10f\n", (double)t_slow/(double)CLOCKS_PER_SEC, (double)t_fast/(double)CLOCKS_PER_SEC); + fprintf(stderr,"SLOW: %.10f, FAST: %.10f\n", (double)t_slow/(double)CLOCKS_PER_SEC, + (double)t_fast/(double)CLOCKS_PER_SEC); /* Check if the branching works. */ if (MP_HAS(S_MP_FASTER_READ_RADIX) && MP_HAS(S_MP_FASTER_TO_RADIX)) { From 816833c730c2757e341f7b8d9486a043375ddd71 Mon Sep 17 00:00:00 2001 From: czurnieden Date: Wed, 21 Jun 2023 16:13:42 +0200 Subject: [PATCH 7/8] temporarily removed timing --- demo/test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/demo/test.c b/demo/test.c index c793de897..a93620366 100644 --- a/demo/test.c +++ b/demo/test.c @@ -1226,7 +1226,7 @@ static int test_mp_read_radix(void) fprintf(stderr,"SLOW: %.10f, FAST: %.10f\n", (double)t_slow/(double)CLOCKS_PER_SEC, (double)t_fast/(double)CLOCKS_PER_SEC); - /* Check if the branching works. */ + /* Check if the branching works. if (MP_HAS(S_MP_FASTER_READ_RADIX) && MP_HAS(S_MP_FASTER_TO_RADIX)) { if (t_fast > t_slow) { fprintf(stderr, "Timing suspicious in test_mp_read_radix. No fast multiplication? Cut-off too low?\n"); @@ -1234,7 +1234,7 @@ static int test_mp_read_radix(void) goto LBL_ERR; } } - + */ free(buffer); From 28b373dae6409f7bd771fe36760a1dddb9290b0c Mon Sep 17 00:00:00 2001 From: czurnieden Date: Sat, 1 Jul 2023 01:50:49 +0200 Subject: [PATCH 8/8] clean up --- demo/test.c | 16 ++++++++++------ mp_to_radix.c | 2 ++ s_mp_faster_read_radix.c | 6 +++--- s_mp_slower_to_radix.c | 21 ++++++++++----------- 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/demo/test.c b/demo/test.c index a93620366..0132baeea 100644 --- a/demo/test.c +++ b/demo/test.c @@ -1158,7 +1158,7 @@ static int test_mp_read_radix(void) size_t written, maxlen; int bignum, i; - char *buffer, *bcpy; + char *buffer, *bcpy, *startb; clock_t start, stop, t_slow, t_fast; @@ -1191,18 +1191,19 @@ static int test_mp_read_radix(void) /* Test the fast method with a slightly larger number */ - /* Must be bigger than the cut-off value, of course */ - bignum = (2 * 20 * MP_RADIX_BARRETT_START_MULTIPLICATOR) * 10; + /* Needs to be big enough to make a sufficiently large timing difference */ + bignum = 30000; buffer = (char *)malloc((size_t)(bignum + 2)); if (buffer == NULL) { goto LBL_ERR; } DO(mp_rand(&a, bignum / MP_DIGIT_BIT)); - printf("\nNumber of limbs in &b = %d, bit_count of &b = %d\n", bignum / MP_DIGIT_BIT, mp_count_bits(&a)); + fprintf(stderr,"\nNumber of limbs in &b = %d, bit_count of &b = %d\n", bignum / MP_DIGIT_BIT, mp_count_bits(&a)); start = clock(); for (i = 2; i < 65; i++) { /* printf("FAST radix = %d\n",i); */ DO(mp_to_radix(&a, buffer, (size_t)(bignum + 1), &written, i)); + mp_zero(&b); DO(mp_read_radix(&b, buffer, i)); EXPECT(mp_cmp(&a, &b) == MP_EQ); } @@ -1215,7 +1216,11 @@ static int test_mp_read_radix(void) /* printf("SLOW radix = %d\n",i); */ maxlen = (size_t)(bignum + 1); bcpy = buffer; + /* s_mp_slower_to_radix is very rudimentary as a stand-alone */ + startb = bcpy; DO(s_mp_slower_to_radix(&a, &bcpy, &maxlen, &written, i, false)); + bcpy = startb; + mp_zero(&b); DO(s_mp_slower_read_radix(&b, bcpy, 0, strlen(bcpy), i)); EXPECT(mp_cmp(&a, &b) == MP_EQ); } @@ -1226,7 +1231,7 @@ static int test_mp_read_radix(void) fprintf(stderr,"SLOW: %.10f, FAST: %.10f\n", (double)t_slow/(double)CLOCKS_PER_SEC, (double)t_fast/(double)CLOCKS_PER_SEC); - /* Check if the branching works. + /* Check if the branching works. */ if (MP_HAS(S_MP_FASTER_READ_RADIX) && MP_HAS(S_MP_FASTER_TO_RADIX)) { if (t_fast > t_slow) { fprintf(stderr, "Timing suspicious in test_mp_read_radix. No fast multiplication? Cut-off too low?\n"); @@ -1234,7 +1239,6 @@ static int test_mp_read_radix(void) goto LBL_ERR; } } - */ free(buffer); diff --git a/mp_to_radix.c b/mp_to_radix.c index 114902f36..e7509efe5 100644 --- a/mp_to_radix.c +++ b/mp_to_radix.c @@ -47,7 +47,9 @@ mp_err mp_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, i if ((err = s_mp_faster_to_radix(&a_bar, str, maxlen, &part_written, radix)) != MP_OKAY) goto LBL_ERR; } else { if (MP_HAS(S_MP_SLOWER_TO_RADIX)) { + char *start = str; if ((err = s_mp_slower_to_radix(&a_bar, &str, &maxlen, &part_written, radix, false)) != MP_OKAY) goto LBL_ERR; + str = start; /* part_written does not count EOS */ part_written++; } diff --git a/s_mp_faster_read_radix.c b/s_mp_faster_read_radix.c index 0843c0da4..f35d89472 100644 --- a/s_mp_faster_read_radix.c +++ b/s_mp_faster_read_radix.c @@ -3,7 +3,7 @@ /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ -/* TODO: It is tunable */ +/* TODO: It is tunable. Or is it? */ #define MP_READ_RADIX_CUTOFF_MULTIPLICATOR 3 /* for(n=2,64,t = ceil(log(2^100)/log(n)); printf(t", "); ) */ @@ -41,8 +41,8 @@ mp_err s_mp_faster_read_radix(mp_int *a, const char *str, size_t start, size_t e return err; } - if ((err = s_mp_slower_read_radix(&A, str, start, start + mid + 1, radix)) != MP_OKAY) goto LTM_ERR; - if ((err = s_mp_slower_read_radix(&B, str, start + mid +1, end, radix)) != MP_OKAY) goto LTM_ERR; + if ((err = s_mp_faster_read_radix(&A, str, start, start + mid + 1, radix)) != MP_OKAY) goto LTM_ERR; + if ((err = s_mp_faster_read_radix(&B, str, start + mid +1, end, radix)) != MP_OKAY) goto LTM_ERR; if (MP_IS_2EXPT(radix_)) { if ((err = mp_mul_2d(&A, (int)(((len - mid) - 1u) * s_mp_floor_ilog2(radix_)), &A)) != MP_OKAY) goto LTM_ERR; diff --git a/s_mp_slower_to_radix.c b/s_mp_slower_to_radix.c index 6dacb13a1..0dffb2b79 100644 --- a/s_mp_slower_to_radix.c +++ b/s_mp_slower_to_radix.c @@ -20,29 +20,30 @@ mp_err s_mp_slower_to_radix(const mp_int *a, char **str, mp_int t; mp_digit d; mp_err err = MP_OKAY; - - /* The number of digits of "radix" to be filled if this chunk is not the most significant one. */ - int ybar = s_mp_radix_exponent_y[radix] * MP_RADIX_BARRETT_START_MULTIPLICATOR; + int ybar = 0; /* A temporary pointer to the output string to make reversal simpler */ char *s = *str; - /* TODO: input a is already a copy of the original and we could use it destructively? */ + /* The number of digits of "radix" to be filled if this chunk is not the most significant one. */ + if (pad) { + ybar = s_mp_radix_exponent_y[radix] * MP_RADIX_BARRETT_START_MULTIPLICATOR; + } + if ((err = mp_init_copy(&t, a)) != MP_OKAY) goto LTM_ERR; while (!mp_iszero(&t)) { - /* TODO: this method to decrease "maxlen" is not threadsafe! */ if ((--(*part_maxlen)) < 1u) { /* no more room */ err = MP_BUF; goto LTM_ERR; } - if ((err = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) { - goto LTM_ERR; - } + if ((err = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) goto LTM_ERR; *s++ = s_mp_radix_map[d]; ++digs; - ybar--; + if (pad) { + ybar--; + } } /* Fill in leading zeros if this chunk does not contain the most significant digits. */ @@ -53,7 +54,6 @@ mp_err s_mp_slower_to_radix(const mp_int *a, char **str, } } - /* TODO: I think that can be done more elegantly */ /* "rewind" */ s = *str; /* reverse */ @@ -63,7 +63,6 @@ mp_err s_mp_slower_to_radix(const mp_int *a, char **str, /* Add EOS at the end of every chunk to allow this function to be used stand-alone */ **str = '\0'; - /* TODO: this method to increase "written" is not threadsafe! */ if (part_written != NULL) { *part_written = *part_written + digs; }