diff --git a/demo/test.c b/demo/test.c index 2fa6e08d..9cc8f7cb 100644 --- a/demo/test.c +++ b/demo/test.c @@ -4,6 +4,19 @@ #define S_MP_RAND_JENKINS_C #include "s_mp_rand_jenkins.c" +/* TODO: Make it an environment variable via main.yml? + This is for testing only, so no reason to add checks to the build process. */ +#ifdef __has_include +# if __has_include () +# include +# else +# define RUNNING_ON_VALGRIND 1 +# endif +#else +# define RUNNING_ON_VALGRIND 1 +#endif + + static long rand_long(void) { long x; @@ -1157,30 +1170,27 @@ static int test_mp_montgomery_reduce(void) } +#include static int test_mp_read_radix(void) { char buf[4096]; - size_t written; + size_t written, maxlen; - mp_int a; - DOR(mp_init_multi(&a, NULL)); + int bignum, i, j, k, limit_test; + char *buffer, *bcpy, *startb; + clock_t start, stop, t_slow, t_fast; + double slow = 0.0, fast = 0.0, sum_slow = 0.0, sum_fast = 0.0; + double s_bases_slow[65] = {0.0}; + double s_bases_fast[65] = {0.0}; + + mp_int a, b, c; + DOR(mp_init_multi(&a, &b, &c, NULL)); DO(mp_read_radix(&a, "123456", 10)); DO(mp_to_radix(&a, buf, sizeof(buf), &written, 10)); printf(" '123456' a == %s, length = %zu", buf, written); - /* See comment in mp_to_radix.c */ - /* - if( (err = mp_to_radix(&a, buf, 3u, &written, 10) ) != MP_OKAY) goto LBL_ERR; - printf(" '56' a == %s, length = %zu\n", buf, written); - - if( (err = mp_to_radix(&a, buf, 4u, &written, 10) ) != MP_OKAY) goto LBL_ERR; - printf(" '456' a == %s, length = %zu\n", buf, written); - if( (err = mp_to_radix(&a, buf, 30u, &written, 10) ) != MP_OKAY) goto LBL_ERR; - printf(" '123456' a == %s, length = %zu, error = %s\n", - buf, written, mp_error_to_string(err)); - */ DO(mp_read_radix(&a, "-123456", 10)); DO(mp_to_radix(&a, buf, sizeof(buf), &written, 10)); printf("\r '-123456' a == %s, length = %zu", buf, written); @@ -1198,10 +1208,81 @@ static int test_mp_read_radix(void) printf("%s, %lu\n", buf, (unsigned long)a.dp[0] & 3uL); } - mp_clear(&a); + /* Safe a bit of testing time */ + if (RUNNING_ON_VALGRIND != 0) { + limit_test = 2000; + } else { + limit_test = 6000; + } + + /* Test the fast method with a slightly larger number (about a minute on an older machine) */ + for (k = 100; k < limit_test; k += 1000) { + bignum = k; + buffer = (char *)malloc((size_t)(bignum + 2)); + if (buffer == NULL) { + goto LBL_ERR; + } + DO(mp_rand(&a, bignum / MP_DIGIT_BIT)); + for (i = 2; i < 65; i++) { + start = clock(); + for (j = 0; j < 100; j++) { + DO(mp_to_radix(&a, buffer, (size_t)(bignum + 1), &written, i)); + mp_zero(&b); + DO(mp_read_radix(&b, buffer, i)); + /* Check roundabout */ + EXPECT(mp_cmp(&a, &b) == MP_EQ); + } + stop = clock(); + t_fast = stop - start; + + start = clock(); + for (j = 0; j < 100; j++) { + maxlen = (size_t)(bignum + 1); + bcpy = buffer; + /* s_mp_slower_to_radix is very rudimentary and needs some help to work as a stand-alone */ + startb = bcpy; + DO(s_mp_slower_to_radix(&a, &bcpy, &maxlen, &written, i, false)); + bcpy = startb; + mp_zero(&c); + DO(s_mp_slower_read_radix(&c, bcpy, 0, strlen(bcpy), i)); + /* Check roundabout */ + EXPECT(mp_cmp(&a, &c) == MP_EQ); + /* Check against result of fast algorithms above */ + EXPECT(mp_cmp(&b, &c) == MP_EQ); + } + stop = clock(); + t_slow = stop - start; + + slow = (double)t_slow/(double)CLOCKS_PER_SEC; + fast = (double)t_fast/(double)CLOCKS_PER_SEC; + + fprintf(stderr,"Bits %d Base %d SLOW: %.10f, FAST: %.10f\n", mp_count_bits(&a), i, slow, fast); + + sum_slow += slow; + sum_fast += fast; + s_bases_slow[i] += slow; + s_bases_fast[i] += fast; + } + free(buffer); + } + + fprintf(stderr,"\nSUM: SLOW: %.10f, FAST: %.10f\n",sum_slow, sum_fast); + + for (i = 2; i < 65; i++) { + fprintf(stderr,"Sums for Base %d SLOW: %.10f, FAST: %.10f\n",i, s_bases_slow[i], s_bases_fast[i]); + } + + /* Valgrind overhead does not allow for timings. */ + if ((RUNNING_ON_VALGRIND == 0) && (MP_DIGIT_BIT >= 20)) { + /* Very basic check if the fast algorithms are actually faster. */ + EXPECT(sum_slow > sum_fast); + } + + + mp_clear_multi(&a, &b, &c, NULL); return EXIT_SUCCESS; LBL_ERR: - mp_clear(&a); + mp_clear_multi(&a, &b, &c, NULL); return EXIT_FAILURE; } @@ -2583,7 +2664,7 @@ static int unit_tests(int argc, char **argv) T1(mp_prime_next_prime, MP_PRIME_NEXT_PRIME), T1(mp_prime_rand, MP_PRIME_RAND), T1(mp_rand, MP_RAND), - T1(mp_read_radix, MP_READ_RADIX), + T2(mp_read_radix,ONLY_PUBLIC_API, MP_READ_RADIX), T1(mp_read_write_ubin, MP_TO_UBIN), T1(mp_read_write_sbin, MP_TO_SBIN), T1(mp_reduce_2k, MP_REDUCE_2K), @@ -2600,13 +2681,16 @@ static int unit_tests(int argc, char **argv) T3(s_mp_div_recursive, ONLY_PUBLIC_API, S_MP_DIV_RECURSIVE, S_MP_DIV_SCHOOL), T3(s_mp_div_small, ONLY_PUBLIC_API, S_MP_DIV_SMALL, S_MP_DIV_SCHOOL), T2(s_mp_sqr, ONLY_PUBLIC_API, S_MP_SQR), + /* s_mp_mul_comba not (yet) testable because s_mp_mul branches to s_mp_mul_comba automatically */ + T2(s_mp_sqr_comba, ONLY_PUBLIC_API, S_MP_SQR_COMBA), T2(s_mp_mul_balance, ONLY_PUBLIC_API, S_MP_MUL_BALANCE), T2(s_mp_mul_karatsuba, ONLY_PUBLIC_API, S_MP_MUL_KARATSUBA), T2(s_mp_sqr_karatsuba, ONLY_PUBLIC_API, S_MP_SQR_KARATSUBA), T2(s_mp_mul_toom, ONLY_PUBLIC_API, S_MP_MUL_TOOM), T2(s_mp_sqr_toom, ONLY_PUBLIC_API, S_MP_SQR_TOOM) + #undef T3 #undef T2 #undef T1 diff --git a/etc/tune.c b/etc/tune.c index 8ad20289..3f44cb12 100644 --- a/etc/tune.c +++ b/etc/tune.c @@ -148,6 +148,169 @@ static uint64_t s_time_sqr(int size) return t1; } +/* Set cutoff for radix conversion (base 10 only for now but should be good enough) */ +#include +static mp_err random_number(char **string, size_t length) +{ + char alphabet[] = "0123456789", *str_cpy; + + *string = malloc(length + 1); + if (*string == NULL) { + return MP_MEM; + } + str_cpy = *string; + /* No leading zeros */ + do { + *str_cpy = alphabet[rand() % 10]; + } while (*str_cpy == '0'); + length--; + str_cpy++; + + do { + *str_cpy = alphabet[rand() % 10]; + str_cpy++; + } while (--length > 0); + + *str_cpy = '\0'; + + return MP_OKAY; +} + +#include +static uint64_t s_time_radix_conversion_read(int size) +{ + int x; + size_t length; + size_t written; + mp_err err; + mp_int a; + char *str_a, *str_b; + uint64_t t1; + + /* "size" is given as "number of limbs" and starts at 8 */ + length = (size_t)(size * MP_DIGIT_BIT); + + /* Over-estimate number of base 10 digits + Magick number: 28/93 = CF(log_10(2))_(p_3, q_3) + */ + written = (length * 28u); + /* May happen e.g. if size > 2184 with MP_16BIT + but cutoff should be about a couple of thousand bits + at most (around or above Karatsuba cutoff). + */ + if (length != written / 28u) { + t1 = UINT64_MAX; + goto LBL_ERR_1; + } + length = written / 93u + 2u; + + if ((err = random_number(&str_a, length)) != MP_OKAY) { + t1 = UINT64_MAX; + goto LBL_ERR_1; + } + + if ((err = mp_init(&a)) != MP_OKAY) { + t1 = UINT64_MAX; + goto LBL_ERR_2; + } + s_timer_start(); + for (x = 0; x < s_number_of_test_loops; x++) { + if ((err = mp_read_radix(&a, str_a, 10)) != MP_OKAY) { + t1 = UINT64_MAX; + goto LBL_ERR_3; + } + } + t1 = s_timer_stop(); + + if ((err = mp_radix_size(&a, 10, &length)) != MP_OKAY) { + t1 = UINT64_MAX; + goto LBL_ERR_3; + } + + str_b = malloc(length + 1); + if (str_b == NULL) { + t1 = UINT64_MAX; + goto LBL_ERR_3; + } + if ((err = mp_to_radix(&a, str_b, length, &written, 10)) != MP_OKAY) { + t1 = UINT64_MAX; + goto LBL_ERR; + } + + if (strcmp(str_a, str_b) != 0) { + t1 = 0u; + goto LBL_ERR; + } + +LBL_ERR: + free(str_b); +LBL_ERR_3: + mp_clear(&a); +LBL_ERR_2: + free(str_a); +LBL_ERR_1: + return t1; +} + +static uint64_t s_time_radix_conversion_write(int size) +{ + int x; + size_t written, length; + mp_err err; + mp_int a, b; + char *str_a; + uint64_t t1; + + + if ((err = mp_init_multi(&a, &b, NULL)) != MP_OKAY) { + t1 = UINT64_MAX; + goto LBL_ERR_1; + } + if ((err = mp_rand(&a, size)) != MP_OKAY) { + t1 = UINT64_MAX; + goto LBL_ERR_2; + } + + if ((err = mp_radix_size(&a, 10, &length)) != MP_OKAY) { + t1 = UINT64_MAX; + goto LBL_ERR_2; + } + + str_a = malloc(length + 1); + if (str_a == NULL) { + t1 = UINT64_MAX; + goto LBL_ERR_2; + } + + s_timer_start(); + for (x = 0; x < s_number_of_test_loops; x++) { + if ((err = mp_to_radix(&a, str_a, length, &written, 10)) != MP_OKAY) { + t1 = UINT64_MAX; + goto LBL_ERR_2; + } + } + t1 = s_timer_stop(); + + if ((err = mp_read_radix(&b, str_a, 10)) != MP_OKAY) { + t1 = UINT64_MAX; + goto LBL_ERR; + } + + if (mp_cmp(&a, &b) != MP_EQ) { + t1 = 0u; + goto LBL_ERR; + } + + +LBL_ERR: + free(str_a); +LBL_ERR_2: + mp_clear_multi(&a, &b, NULL); +LBL_ERR_1: + return t1; +} + + struct tune_args { int testmode; int verbose; @@ -238,11 +401,13 @@ static void s_usage(char *s) fprintf(stderr," (Not for computing the cut-offs!)\n"); fprintf(stderr," -s 'preset' use values in 'preset' for printing.\n"); fprintf(stderr," 'preset' is a comma separated string with cut-offs for\n"); - fprintf(stderr," ksm, kss, tc3m, tc3s in that order\n"); + fprintf(stderr," ksm, kss, tc3m, tc3s, rcr, rcw in that order\n"); fprintf(stderr," ksm = karatsuba multiplication\n"); fprintf(stderr," kss = karatsuba squaring\n"); fprintf(stderr," tc3m = Toom-Cook 3-way multiplication\n"); fprintf(stderr," tc3s = Toom-Cook 3-way squaring\n"); + fprintf(stderr," rcr = Fast radix conversion, reading\n"); + fprintf(stderr," rcw = Fast radix conversion, writing\n"); fprintf(stderr," Implies '-p'\n"); fprintf(stderr," -h this message\n"); exit(s_exit_code); @@ -251,10 +416,11 @@ static void s_usage(char *s) struct cutoffs { int MUL_KARATSUBA, SQR_KARATSUBA; int MUL_TOOM, SQR_TOOM; + int RADIX_READ, RADIX_WRITE; }; const struct cutoffs max_cutoffs = -{ INT_MAX, INT_MAX, INT_MAX, INT_MAX }; +{ INT_MAX, INT_MAX, INT_MAX, INT_MAX,INT_MAX, INT_MAX }; static void set_cutoffs(const struct cutoffs *c) { @@ -262,6 +428,8 @@ static void set_cutoffs(const struct cutoffs *c) MP_SQR_KARATSUBA_CUTOFF = c->SQR_KARATSUBA; MP_MUL_TOOM_CUTOFF = c->MUL_TOOM; MP_SQR_TOOM_CUTOFF = c->SQR_TOOM; + MP_RADIX_READ_CUTOFF = c->RADIX_READ; + MP_RADIX_WRITE_CUTOFF = c->RADIX_WRITE; } static void get_cutoffs(struct cutoffs *c) @@ -270,7 +438,8 @@ static void get_cutoffs(struct cutoffs *c) c->SQR_KARATSUBA = MP_SQR_KARATSUBA_CUTOFF; c->MUL_TOOM = MP_MUL_TOOM_CUTOFF; c->SQR_TOOM = MP_SQR_TOOM_CUTOFF; - + c->RADIX_READ = MP_RADIX_READ_CUTOFF; + c->RADIX_WRITE = MP_RADIX_WRITE_CUTOFF; } int main(int argc, char **argv) @@ -416,13 +585,17 @@ int main(int argc, char **argv) s_usage(argv[0]); } str = argv[opt]; - MP_MUL_KARATSUBA_CUTOFF = (int)s_strtol(str, &endptr, "[1/4] No value for MP_MUL_KARATSUBA_CUTOFF given"); + MP_MUL_KARATSUBA_CUTOFF = (int)s_strtol(str, &endptr, "[1/6] No value for MP_MUL_KARATSUBA_CUTOFF given"); + str = endptr + 1; + MP_SQR_KARATSUBA_CUTOFF = (int)s_strtol(str, &endptr, "[2/6] No value for MP_SQR_KARATSUBA_CUTOFF given"); str = endptr + 1; - MP_SQR_KARATSUBA_CUTOFF = (int)s_strtol(str, &endptr, "[2/4] No value for MP_SQR_KARATSUBA_CUTOFF given"); + MP_MUL_TOOM_CUTOFF = (int)s_strtol(str, &endptr, "[3/6] No value for MP_MUL_TOOM_CUTOFF given"); str = endptr + 1; - MP_MUL_TOOM_CUTOFF = (int)s_strtol(str, &endptr, "[3/4] No value for MP_MUL_TOOM_CUTOFF given"); + MP_SQR_TOOM_CUTOFF = (int)s_strtol(str, &endptr, "[4/6] No value for MP_SQR_TOOM_CUTOFF given"); str = endptr + 1; - MP_SQR_TOOM_CUTOFF = (int)s_strtol(str, &endptr, "[4/4] No value for MP_SQR_TOOM_CUTOFF given"); + MP_RADIX_READ_CUTOFF = (int)s_strtol(str, &endptr, "[5/6] No value for MP_RADIX_READ_CUTOFF given"); + str = endptr + 1; + MP_RADIX_WRITE_CUTOFF = (int)s_strtol(str, &endptr, "[6/6] No value for MP_RADIX_WRITE_CUTOFF given"); break; case 'h': s_exit_code = EXIT_SUCCESS; @@ -461,31 +634,64 @@ int main(int argc, char **argv) T_MUL_SQR("Karatsuba squaring", SQR_KARATSUBA, s_time_sqr), T_MUL_SQR("Toom-Cook 3-way multiplying", MUL_TOOM, s_time_mul), T_MUL_SQR("Toom-Cook 3-way squaring", SQR_TOOM, s_time_sqr), + /* TODO: adapt macro above (or the names of the cutoffs and/or functions) */ + { + "\"Faster radix conversion (reading)\"", &MP_RADIX_READ_CUTOFF, + &(updated.RADIX_READ),MP_HAS(S_MP_FASTER_READ_RADIX) ? s_time_radix_conversion_read : NULL + }, + { + "\"Faster radix conversion (writing)\"", &MP_RADIX_WRITE_CUTOFF, + &(updated.RADIX_WRITE),MP_HAS(S_MP_FASTER_TO_RADIX) ? s_time_radix_conversion_write : NULL + } + #undef T_MUL_SQR }; /* Turn all limits from bncore.c to the max */ set_cutoffs(&max_cutoffs); - for (n = 0; n < sizeof(test)/sizeof(test[0]); ++n) { + + for (n = 0; n < (sizeof(test)/sizeof(test[0]) - 2); ++n) { if (test[n].fn != NULL) { s_run(test[n].name, test[n].fn, test[n].cutoff); *test[n].update = *test[n].cutoff; *test[n].cutoff = INT_MAX; + }; + } + + /* We need the updated fast multiplication cutoffs for the radix conversion, set them */ + for (n = 0; n < (sizeof(test)/sizeof(test[0]) - 2); ++n) { + if (test[n].fn != NULL) { + *test[n].cutoff = *test[n].update; + }; + } + + /* Cutoffs for radix conversions are in bits to make handling of 62 different radices easier */ + for (; n < sizeof(test)/sizeof(test[0]); ++n) { + if (test[n].fn != NULL) { + s_run(test[n].name, test[n].fn, test[n].cutoff); + /* TODO: can overflow for small INT_MAX */ + *test[n].update = (*test[n].cutoff) * MP_DIGIT_BIT; + *test[n].cutoff = INT_MAX; } } + } if (args.terse == 1) { - printf("%d %d %d %d\n", + printf("%d %d %d %d %d %d\n", updated.MUL_KARATSUBA, updated.SQR_KARATSUBA, updated.MUL_TOOM, - updated.SQR_TOOM); + updated.SQR_TOOM, + updated.RADIX_READ, + updated.RADIX_WRITE); } else { printf("MUL_KARATSUBA_CUTOFF = %d\n", updated.MUL_KARATSUBA); printf("SQR_KARATSUBA_CUTOFF = %d\n", updated.SQR_KARATSUBA); printf("MUL_TOOM_CUTOFF = %d\n", updated.MUL_TOOM); printf("SQR_TOOM_CUTOFF = %d\n", updated.SQR_TOOM); + printf("RADIX_READ_CUTOFF = %d\n", updated.RADIX_READ); + printf("RADIX_WRITE_CUTOFF = %d\n", updated.RADIX_WRITE); } - + /* TODO: add graphs for radix conversion, too? */ if (args.print == 1) { printf("Printing data for graphing to \"%s\" and \"%s\"\n",mullog, sqrlog); diff --git a/etc/tune_it.sh b/etc/tune_it.sh index dba5b696..625a20be 100755 --- a/etc/tune_it.sh +++ b/etc/tune_it.sh @@ -56,7 +56,7 @@ KEEP_TEMP=1 echo "You might like to watch the numbers go up to $LIMIT but it will take a long time!" # Might not have sufficient rights or disc full. -echo "km ks tc3m tc3s" > $FILE_NAME || die "Writing header to $FILE_NAME" $? +echo "km ks tc3m tc3s rcr rcw" > $FILE_NAME || die "Writing header to $FILE_NAME" $? i=1 while [ $i -le $LIMIT ]; do RNUM=$(LCG) @@ -104,3 +104,23 @@ echo "#define MP_DEFAULT_MUL_TOOM_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die TMP=$(median $FILE_NAME 4 $i) echo "#define MP_DEFAULT_SQR_TOOM_CUTOFF $TMP" echo "#define MP_DEFAULT_SQR_TOOM_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(tc3s) Appending to $TOMMATH_CUTOFFS_H" $? + +TMP=$(median $FILE_NAME 5 $i) +echo "#define MP_DEFAULT_RADIX_READ_CUTOFF $TMP" +echo "#define MP_DEFAULT_RADIX_READ_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(rcr) Appending to $TOMMATH_CUTOFFS_H" $? +TMP=$(median $FILE_NAME 6 $i) +echo "#define MP_DEFAULT_RADIX_WRITE_CUTOFF $TMP" +echo "#define MP_DEFAULT_RADIX_WRITE_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(rcw) Appending to $TOMMATH_CUTOFFS_H" $? + + + + + + + + + + + + + diff --git a/helper.pl b/helper.pl index ffc592a7..7026f53c 100755 --- a/helper.pl +++ b/helper.pl @@ -476,6 +476,8 @@ sub generate_def { MP_SQR_KARATSUBA_CUTOFF MP_MUL_TOOM_CUTOFF MP_SQR_TOOM_CUTOFF + MP_RADIX_READ_CUTOFF + MP_RADIX_WRITE_CUTOFF "; return 0; } diff --git a/libtommath_VS2008.vcproj b/libtommath_VS2008.vcproj index 71dd3807..56b6f044 100644 --- a/libtommath_VS2008.vcproj +++ b/libtommath_VS2008.vcproj @@ -836,6 +836,18 @@ RelativePath="s_mp_exptmod_fast.c" > + + + + + + @@ -912,6 +924,14 @@ RelativePath="s_mp_rand_platform.c" > + + + + diff --git a/makefile b/makefile index 8f211f5f..92fa9690 100644 --- a/makefile +++ b/makefile @@ -44,13 +44,14 @@ mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \ mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \ -s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o \ -s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o \ -s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o \ -s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o \ -s_mp_radix_map.o s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o \ -s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o s_mp_warray_get.o s_mp_warray_put.o \ -s_mp_zero_buf.o s_mp_zero_digs.o +s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o \ +s_mp_faster_read_radix.o s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o \ +s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \ +s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \ +s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \ +s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o \ +s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o \ +s_mp_warray_get.o s_mp_warray_put.o s_mp_zero_buf.o s_mp_zero_digs.o #END_INS diff --git a/makefile.mingw b/makefile.mingw index e2445e8a..f90d3ecc 100644 --- a/makefile.mingw +++ b/makefile.mingw @@ -46,13 +46,14 @@ mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \ mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \ -s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o \ -s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o \ -s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o \ -s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o \ -s_mp_radix_map.o s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o \ -s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o s_mp_warray_get.o s_mp_warray_put.o \ -s_mp_zero_buf.o s_mp_zero_digs.o +s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o \ +s_mp_faster_read_radix.o s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o \ +s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \ +s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \ +s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \ +s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o \ +s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o \ +s_mp_warray_get.o s_mp_warray_put.o s_mp_zero_buf.o s_mp_zero_digs.o HEADERS_PUB=tommath.h HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB) diff --git a/makefile.msvc b/makefile.msvc index 8feb425c..60462911 100644 --- a/makefile.msvc +++ b/makefile.msvc @@ -42,13 +42,14 @@ mp_reduce_setup.obj mp_root_n.obj mp_rshd.obj mp_sbin_size.obj mp_set.obj mp_set mp_set_l.obj mp_set_u32.obj mp_set_u64.obj mp_set_ul.obj mp_shrink.obj mp_signed_rsh.obj mp_sqrmod.obj mp_sqrt.obj \ mp_sqrtmod_prime.obj mp_sub.obj mp_sub_d.obj mp_submod.obj mp_to_radix.obj mp_to_sbin.obj mp_to_ubin.obj mp_ubin_size.obj \ mp_unpack.obj mp_warray_free.obj mp_xor.obj mp_zero.obj s_mp_add.obj s_mp_copy_digs.obj s_mp_div_3.obj \ -s_mp_div_recursive.obj s_mp_div_school.obj s_mp_div_small.obj s_mp_exptmod.obj s_mp_exptmod_fast.obj s_mp_fp_log.obj \ -s_mp_fp_log_d.obj s_mp_get_bit.obj s_mp_invmod.obj s_mp_invmod_odd.obj s_mp_log_2expt.obj \ -s_mp_montgomery_reduce_comba.obj s_mp_mul.obj s_mp_mul_balance.obj s_mp_mul_comba.obj s_mp_mul_high.obj \ -s_mp_mul_high_comba.obj s_mp_mul_karatsuba.obj s_mp_mul_toom.obj s_mp_prime_is_divisible.obj s_mp_prime_tab.obj \ -s_mp_radix_map.obj s_mp_radix_size_overestimate.obj s_mp_rand_platform.obj s_mp_sqr.obj s_mp_sqr_comba.obj \ -s_mp_sqr_karatsuba.obj s_mp_sqr_toom.obj s_mp_sub.obj s_mp_warray.obj s_mp_warray_get.obj s_mp_warray_put.obj \ -s_mp_zero_buf.obj s_mp_zero_digs.obj +s_mp_div_recursive.obj s_mp_div_school.obj s_mp_div_small.obj s_mp_exptmod.obj s_mp_exptmod_fast.obj \ +s_mp_faster_read_radix.obj s_mp_faster_to_radix.obj s_mp_floor_ilog2.obj s_mp_fp_log.obj s_mp_fp_log_d.obj \ +s_mp_get_bit.obj s_mp_invmod.obj s_mp_invmod_odd.obj s_mp_log_2expt.obj s_mp_montgomery_reduce_comba.obj s_mp_mul.obj \ +s_mp_mul_balance.obj s_mp_mul_comba.obj s_mp_mul_high.obj s_mp_mul_high_comba.obj s_mp_mul_karatsuba.obj \ +s_mp_mul_toom.obj s_mp_prime_is_divisible.obj s_mp_prime_tab.obj s_mp_radix_map.obj \ +s_mp_radix_size_overestimate.obj s_mp_rand_platform.obj s_mp_slower_read_radix.obj s_mp_slower_to_radix.obj \ +s_mp_sqr.obj s_mp_sqr_comba.obj s_mp_sqr_karatsuba.obj s_mp_sqr_toom.obj s_mp_sub.obj s_mp_warray.obj \ +s_mp_warray_get.obj s_mp_warray_put.obj s_mp_zero_buf.obj s_mp_zero_digs.obj HEADERS_PUB=tommath.h HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB) diff --git a/makefile.shared b/makefile.shared index 50c33526..887303a3 100644 --- a/makefile.shared +++ b/makefile.shared @@ -41,13 +41,14 @@ mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \ mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \ -s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o \ -s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o \ -s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o \ -s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o \ -s_mp_radix_map.o s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o \ -s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o s_mp_warray_get.o s_mp_warray_put.o \ -s_mp_zero_buf.o s_mp_zero_digs.o +s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o \ +s_mp_faster_read_radix.o s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o \ +s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \ +s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \ +s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \ +s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o \ +s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o \ +s_mp_warray_get.o s_mp_warray_put.o s_mp_zero_buf.o s_mp_zero_digs.o #END_INS diff --git a/makefile.unix b/makefile.unix index 58642098..00bc3265 100644 --- a/makefile.unix +++ b/makefile.unix @@ -47,13 +47,14 @@ mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \ mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \ -s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o \ -s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o \ -s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o \ -s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o \ -s_mp_radix_map.o s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o \ -s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o s_mp_warray_get.o s_mp_warray_put.o \ -s_mp_zero_buf.o s_mp_zero_digs.o +s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o \ +s_mp_faster_read_radix.o s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o \ +s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \ +s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \ +s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \ +s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o \ +s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o \ +s_mp_warray_get.o s_mp_warray_put.o s_mp_zero_buf.o s_mp_zero_digs.o HEADERS_PUB=tommath.h diff --git a/mp_cutoffs.c b/mp_cutoffs.c index 45b0beec..6e6bf005 100644 --- a/mp_cutoffs.c +++ b/mp_cutoffs.c @@ -8,7 +8,9 @@ int MP_MUL_KARATSUBA_CUTOFF = MP_DEFAULT_MUL_KARATSUBA_CUTOFF, MP_SQR_KARATSUBA_CUTOFF = MP_DEFAULT_SQR_KARATSUBA_CUTOFF, MP_MUL_TOOM_CUTOFF = MP_DEFAULT_MUL_TOOM_CUTOFF, - MP_SQR_TOOM_CUTOFF = MP_DEFAULT_SQR_TOOM_CUTOFF; + MP_SQR_TOOM_CUTOFF = MP_DEFAULT_SQR_TOOM_CUTOFF, + MP_RADIX_READ_CUTOFF = MP_DEFAULT_RADIX_READ_CUTOFF, + MP_RADIX_WRITE_CUTOFF = MP_DEFAULT_RADIX_WRITE_CUTOFF; #endif #endif diff --git a/mp_fread.c b/mp_fread.c index 53c35e82..baf539f6 100644 --- a/mp_fread.c +++ b/mp_fread.c @@ -4,10 +4,11 @@ /* SPDX-License-Identifier: Unlicense */ #ifndef MP_NO_FILE + /* read a bigint from a file stream in ASCII */ mp_err mp_fread(mp_int *a, int radix, FILE *stream) { - mp_err err; + mp_err err = MP_OKAY; mp_sign sign = MP_ZPOS; int ch; @@ -47,19 +48,17 @@ mp_err mp_fread(mp_int *a, int radix, FILE *stream) } /* shift up and add */ - if ((err = mp_mul_d(a, (mp_digit)radix, a)) != MP_OKAY) { - return err; - } - if ((err = mp_add_d(a, y, a)) != MP_OKAY) { - return err; - } + if ((err = mp_mul_d(a, (mp_digit)radix, a)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_add_d(a, y, a)) != MP_OKAY) goto LBL_ERR; + } while ((ch = fgetc(stream)) != EOF); if (!mp_iszero(a)) { a->sign = sign; } - return MP_OKAY; +LBL_ERR: + return err; } #endif diff --git a/mp_fwrite.c b/mp_fwrite.c index 8ea9d327..93f59d58 100644 --- a/mp_fwrite.c +++ b/mp_fwrite.c @@ -7,7 +7,7 @@ mp_err mp_fwrite(const mp_int *a, int radix, FILE *stream) { char *buf; - mp_err err; + mp_err err = MP_OKAY; size_t size, written; if ((err = mp_radix_size_overestimate(a, radix, &size)) != MP_OKAY) { diff --git a/mp_prime_is_prime.c b/mp_prime_is_prime.c index bb24f594..99bfb0f8 100644 --- a/mp_prime_is_prime.c +++ b/mp_prime_is_prime.c @@ -3,16 +3,6 @@ /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ -/* portable integer log of two with small footprint */ -static unsigned int s_floor_ilog2(int value) -{ - unsigned int r = 0; - while ((value >>= 1) != 0) { - r++; - } - return r; -} - mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result) { mp_int b; @@ -186,7 +176,7 @@ mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result) * Hence the ugly type-fiddling in the following code. */ size_a = mp_count_bits(a); - mask = (1u << s_floor_ilog2(size_a)) - 1u; + mask = (1u << s_mp_floor_ilog2(size_a)) - 1u; /* Assuming the General Rieman hypothesis (never thought to write that in a comment) the upper bound can be lowered to 2*(log a)^2. diff --git a/mp_read_radix.c b/mp_read_radix.c index 28e6eb60..adfbc70d 100644 --- a/mp_read_radix.c +++ b/mp_read_radix.c @@ -3,11 +3,29 @@ /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ +#ifdef MP_USE_MEMOPS +# include +# define MP_STRLEN(s) strlen(s) +#else +static size_t s_mp_strlen(const char *s) +{ + const char *p; + p = s; + while (*p != '\0') { + p++; + } + return (size_t)(p - s); +} +# define MP_STRLEN(s) s_mp_strlen(s) +#endif + /* read a string [ASCII] in a given radix */ mp_err mp_read_radix(mp_int *a, const char *str, int radix) { - mp_err err; + + mp_err err = MP_OKAY; mp_sign sign = MP_ZPOS; + size_t slen, slen_2; /* make sure the radix is ok */ if ((radix < 2) || (radix > 64)) { @@ -22,48 +40,67 @@ mp_err mp_read_radix(mp_int *a, const char *str, int radix) sign = MP_NEG; } - /* set the integer to the default of zero */ - mp_zero(a); + slen = MP_STRLEN(str); + /* "slen" is log_b(str) with b = radix and with log_a(x) = log_b(x)/log_b(a) we can use log_2(str) = slen/log_b(2) + but we do not have floats (there is a fixed point version of log_2(x) in s_mp_fp_log_d.c, though). + We could use a table with rational approximations instead which costs quite some stack-memory because + we would need it for every MP_DIGIT_BIT size.. - /* process each digit of the string */ - while (*str != '\0') { - /* if the radix <= 36 the conversion is case insensitive - * this allows numbers like 1AB and 1ab to represent the same value - * [e.g. in hex] - */ - uint8_t y; - char ch = (radix <= 36) ? (char)MP_TOUPPER((int)*str) : *str; - unsigned pos = (unsigned)(ch - '+'); - if (MP_RADIX_MAP_REVERSE_SIZE <= pos) { - break; - } - y = s_mp_radix_map_reverse[pos]; - - /* if the char was found in the map - * and is less than the given radix add it - * to the number, otherwise exit the loop. - */ - if (y >= radix) { - break; - } - if ((err = mp_mul_d(a, (mp_digit)radix, a)) != MP_OKAY) { - return err; - } - if ((err = mp_add_d(a, y, a)) != MP_OKAY) { - return err; - } - ++str; + So to keep things simple we restrict our length checks to bases 10 and powers of two for now. + + Bases that are a power of two are the easiest: just multiply "slen" with log_2(radix) to get the bits + necessary to compute the bit-size. The result shall not be bigger than (MP_MAX_DIGIT_COUNT - 2) * MP_DIGIT_BIT. + This is in almost all cases an overestimate because the MSD is most likely not full but only by a couple of bits, + at most 63 in case of radix = 64. That means that at most 5 limbs (MP_16BIT), 3 limbs (MP_28BIT and MP_31BIT), + or 2 limbs (MP_64BIT) are going to waste. + + For base 10 and (INT_MAX = 2^(31) - 1) the limits are: + MP_DIGIT_BIT limit tested notes + 15 2183 y (with INT_MAX = 32767) which is MP_MAX_DIGIT_COUNT - 1 + 28 76695844 y + 31 69273664 n Not tested because there is no FFT for MP_31BIT and I + don't have the patients of a Buddha. + 60 35791392 y + + */ + + if (MP_IS_2EXPT((unsigned int)radix) && + ((slen * (size_t) s_mp_log2_radix[radix]) > ((MP_MAX_DIGIT_COUNT - 2) * MP_DIGIT_BIT))) { + return MP_OVF; + } else if ((radix == 10) && (slen > +#if (MP_DIGIT_BIT == 15) + 2183 +#elif (MP_DIGIT_BIT == 28) + 76695844 +#elif (MP_DIGIT_BIT == 31) + 69273664 +#elif (MP_DIGIT_BIT == 60) + 35791392 +#endif + )) { + return MP_OVF; } - /* if an illegal character was found, fail. */ - if ((*str != '\0') && (*str != '\r') && (*str != '\n')) { - return MP_VAL; + /* Roughly (over)estimate bit-size for cutoff by assuming slen to be ceil(log_{radix}(input)) + so bits(slen) ~ slen_{radix} * ceil(log_2(radix)) */ + slen_2 = slen * (size_t)(s_mp_log2_radix[radix] + 1); + + mp_zero(a); + + /* Try faster version first */ + if (MP_HAS(S_MP_FASTER_READ_RADIX) && (slen_2 < (size_t)MP_RADIX_READ_CUTOFF)) { + if ((err = s_mp_faster_read_radix(a, str, 0, slen, radix)) != MP_OKAY) goto LTM_ERR; + } else if (MP_HAS(S_MP_SLOWER_READ_RADIX)) { + if ((err = s_mp_slower_read_radix(a, str, 0, slen, radix)) != MP_OKAY) goto LTM_ERR; } /* set the sign only if a != 0 */ if (!mp_iszero(a)) { a->sign = sign; } - return MP_OKAY; + +LTM_ERR: + return err; } + #endif diff --git a/mp_to_radix.c b/mp_to_radix.c index 1e5e6711..09435ac8 100644 --- a/mp_to_radix.c +++ b/mp_to_radix.c @@ -3,17 +3,6 @@ /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ -/* reverse an array, used for radix code */ -static void s_reverse(char *s, size_t len) -{ - size_t ix = 0, iy = len - 1u; - while (ix < iy) { - MP_EXCH(char, s[ix], s[iy]); - ++ix; - --iy; - } -} - /* stores a bignum as a ASCII string in a given radix (2..64) * * Stores upto "size - 1" chars and always a NULL byte, puts the number of characters @@ -21,11 +10,9 @@ static void s_reverse(char *s, size_t len) */ mp_err mp_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix) { - size_t digs; - mp_err err; - mp_int t; - mp_digit d; - char *_s = str; + mp_err err; + mp_int a_bar = *a; + size_t part_written = 0; /* check range of radix and size*/ if (maxlen < 2u) { @@ -35,6 +22,11 @@ mp_err mp_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, i return MP_VAL; } + /* Check upper limits */ + if (!MP_IS_2EXPT((unsigned int)radix) && (a->used > (MP_MAX_DIGIT_COUNT - 4))) { + return MP_OVF; + } + /* quick out if its zero */ if (mp_iszero(a)) { *str++ = '0'; @@ -45,50 +37,33 @@ mp_err mp_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, i return MP_OKAY; } - if ((err = mp_init_copy(&t, a)) != MP_OKAY) { - return err; - } - /* if it is negative output a - */ - if (mp_isneg(&t)) { - /* we have to reverse our digits later... but not the - sign!! */ - ++_s; - + if (mp_isneg(a)) { /* store the flag and mark the number as positive */ *str++ = '-'; - t.sign = MP_ZPOS; + a_bar.sign = MP_ZPOS; /* subtract a char */ --maxlen; } - digs = 0u; - while (!mp_iszero(&t)) { - if (--maxlen < 1u) { - /* no more room */ - err = MP_BUF; - goto LBL_ERR; - } - if ((err = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) { - goto LBL_ERR; - } - *str++ = s_mp_radix_map[d]; - ++digs; - } - /* reverse the digits of the string. In this case _s points - * to the first digit [excluding the sign] of the number - */ - s_reverse(_s, digs); - /* append a NULL so the string is properly terminated */ - *str = '\0'; - digs++; + + if (MP_HAS(S_MP_FASTER_TO_RADIX) && (a->used > (MP_RADIX_WRITE_CUTOFF / MP_DIGIT_BIT))) { + if ((err = s_mp_faster_to_radix(&a_bar, str, maxlen, &part_written, radix)) != MP_OKAY) goto LBL_ERR; + } else if (MP_HAS(S_MP_SLOWER_TO_RADIX)) { + char *start = str; + if ((err = s_mp_slower_to_radix(&a_bar, &str, &maxlen, &part_written, radix, false)) != MP_OKAY) goto LBL_ERR; + str = start; + /* part_written does not count EOS */ + part_written++; + } if (written != NULL) { - *written = mp_isneg(a) ? (digs + 1u): digs; + part_written += mp_isneg(a) ? 1: 0; + *written = part_written; } LBL_ERR: - mp_clear(&t); return err; } diff --git a/s_mp_faster_read_radix.c b/s_mp_faster_read_radix.c new file mode 100644 index 00000000..99e2d9c8 --- /dev/null +++ b/s_mp_faster_read_radix.c @@ -0,0 +1,46 @@ +#include "tommath_private.h" +#ifdef S_MP_FASTER_READ_RADIX_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + + +mp_err s_mp_faster_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix) +{ + size_t len, mid; + mp_int A, B, m; + mp_digit radix_ = (mp_digit)radix; + mp_err err = MP_OKAY; + + len = end - start; + + if ((len * (size_t)(s_mp_log2_radix[radix]) + 1) < (size_t)MP_RADIX_READ_CUTOFF) { + return s_mp_slower_read_radix(a, str, start, end, radix); + } + mid = len / 2u; + + if ((err = mp_init_set(&m, radix_)) != MP_OKAY) { + return err; + } + if ((err = mp_init_multi(&A, &B, NULL)) != MP_OKAY) { + mp_clear(&m); + return err; + } + + if ((err = s_mp_faster_read_radix(&A, str, start, start + mid + 1, radix)) != MP_OKAY) goto LTM_ERR; + if ((err = s_mp_faster_read_radix(&B, str, start + mid + 1, end, radix)) != MP_OKAY) goto LTM_ERR; + + if (MP_IS_2EXPT((unsigned int)radix_)) { + if ((err = mp_mul_2d(&A, (int)(((len - mid) - 1u) * s_mp_log2_radix[radix_]), &A)) != MP_OKAY)goto LTM_ERR; + } else { + if ((err = mp_expt_n(&m, (int)((len - mid) - 1u), &m)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_mul(&A, &m, &A)) != MP_OKAY) goto LTM_ERR; + } + if ((err = mp_add(&A, &B, a)) != MP_OKAY) goto LTM_ERR; + +LTM_ERR: + mp_clear_multi(&A, &B, &m, NULL); + return err; +} + + +#endif diff --git a/s_mp_faster_to_radix.c b/s_mp_faster_to_radix.c new file mode 100644 index 00000000..8a6bb69f --- /dev/null +++ b/s_mp_faster_to_radix.c @@ -0,0 +1,252 @@ +#include "tommath_private.h" +#ifdef S_MP_FASTER_TO_RADIX_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +static int s_mp_compute_s(int t, int k) +{ + uint32_t r = 0u; + int log2_intmax, log2_k; + + log2_k = (int)s_mp_floor_ilog2(k) + 1; + log2_intmax = (int)s_mp_floor_ilog2(INT_MAX) + 1; + + /* Rough first check for overflow */ + if (t > (log2_intmax - log2_k)) { + return 0; + } + + r = 1u << t; + r = r * (uint32_t)k; + + /* Final check for overflow */ + return (r > (MP_MAX_DIGIT_COUNT * MP_DIGIT_BIT)) ? 0 : (int)r; +} + +static mp_err s_mp_to_radix_recursive(const mp_int *a, char **str, size_t *part_maxlen, size_t *part_written, + int radix, int32_t k, int32_t t, bool pad, bool first, mp_int *P, mp_int *R) +{ + mp_int r, q, a1; + mp_err err; + int Beta; + + if (t < 0) { + /* Print the string from the number given */ + if ((err = s_mp_slower_to_radix(a, str, part_maxlen, part_written, radix, pad)) != MP_OKAY) goto LTM_ERR; + } else { + if ((err = mp_init_multi(&q, &r, &a1, NULL)) != MP_OKAY) goto LTM_ERR; + if (MP_IS_POWER_OF_TWO(&P[t])) { + if ((err = mp_div_2d(a, mp_count_bits(&P[t]) - 1, &q, &r)) != MP_OKAY) goto LTM_ERR; + } else if (first) { + /* Largest division, only one time, no reason for Barret division in the first place */ + if ((err = mp_div(a, &P[t], &q, &r)) != MP_OKAY) goto LTM_ERR; + /* Release early and often, they say. */ + mp_clear(&P[t]); + mp_clear(&R[t]); + } else { + /* + Barrett reduction. A step by step proof can be found at + https://www.nayuki.io/page/barrett-reduction-algorithm + + See also: Modern Computer Arithmetic, version 0.5.9, page 59 + */ + Beta = (int)s_mp_compute_s(t+1, k); + if (Beta == 0) { + err = MP_OVF; + goto LTM_ERR; + } + /* Q = floor(A1 * I / 2^Beta) */ + /* I = floor( (2^(2*Beta)) / B) Here we have R[t] = I, P[t] = B */ + /* TODO: We don't need the full "a" only the upper part: a = a_1\beta + a_0 with 0 < a_0 < \beta + The cutoff with s_mp_mul_high is so low that the gap between that and the general cutoff + is too small to be worth the hassle. + But if somebody implements Thom Mulder's short products... + (There are successors. See e.g. D. Harvey and P. Zimmermann "Short Division of Long Integers", + Laszlo Hars "Fast Truncated Multiplication for Cryptographic Applications", Daniel Lemire "Exact Short + Products From Truncated Multipliers", and many^Wsome more. + */ + if ((err = mp_mul(a, &R[t], &q)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_div_2d(&q, Beta, &q, NULL)) != MP_OKAY) goto LTM_ERR; + + /* R = A - Q*B */ + /* TODO: Q*B can be a low short-product */ + if ((err = mp_mul(&q, &P[t], &r)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_sub(a, &r, &r)) != MP_OKAY) goto LTM_ERR; + + /* We can use this simple correction because of the way we computed the reciprocal */ + if (r.sign == MP_NEG) { + if ((err = mp_decr(&q)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_add(&r, &P[t], &r)) != MP_OKAY) goto LTM_ERR; + } + } + /* Go down the lists while climbing up the tree. */ + t--; + + if (mp_iszero(&q) && (!pad)) { + if ((err = s_mp_to_radix_recursive(&r, str, part_maxlen, part_written, radix, + k, t, false, false, P, R)) != MP_OKAY) goto LTM_ERR; + } else { + if ((err = s_mp_to_radix_recursive(&q, str, part_maxlen, part_written, radix, + k, t, pad, false, P, R)) != MP_OKAY) goto LTM_ERR; + if ((err = s_mp_to_radix_recursive(&r, str, part_maxlen, part_written, radix, + k, t, true, false, P, R)) != MP_OKAY) goto LTM_ERR; + } + mp_clear_multi(&q, &r, &a1, NULL); + } + + err = MP_OKAY; +LTM_ERR: + return err; +} + +mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix) +{ + mp_err err = MP_OKAY; + int n = 0, k, t = 0, steps = 0, ilog2a, s; + + /* Use given buffer directly, no temporary buffers for the individual chunks */ + char **sptr = &str; + /* Size of the chunk */ + size_t part_written = 0; + size_t part_maxlen = maxlen; + + /* List of reciprocals */ + mp_int *R = NULL; + /* List of moduli */ + mp_int *P = NULL; + + mp_int T; + + + /* Denominator for the reciprocal: b^y. */ + if ((err = mp_init_set(&T, (mp_digit)radix)) != MP_OKAY) { + return err; + } + if ((err = mp_expt_n(&T, (int)s_mp_radix_exponent_y[radix], &T)) != MP_OKAY) goto LTM_ERR0; + n = (int)T.dp[0]; + + /* Numerator of the reciprocal: ceil(log_2(n)) */ + k = (int)s_mp_floor_ilog2(n) + 1; + + /* steps = floor(log_2(floor(log_2(a))))*/ + ilog2a = mp_count_bits(a) - 1; + + /* Cutoff at about twice the size of P[0]. */ + if (ilog2a < (2 * k * MP_RADIX_BARRETT_START_MULTIPLICATOR)) { + if ((err = s_mp_slower_to_radix(a, sptr, &part_maxlen, &part_written, radix, false)) != MP_OKAY) goto LTM_ERR; + /* part_written does not count EOS */ + *written = part_written + 1; + mp_clear(&T); + return err; + } + /* + floor(log_2(floor(log_2(a)))) is a bit too much (we do not start at zero) + but we check for the end inside the loop and the list is just a list of pointers, + not much memory wasted here if we allocate too much steps. + */ + steps = (int)s_mp_floor_ilog2(ilog2a) + 1; + /* Allocate memory for list of reciprocals */ + R = (mp_int *) MP_MALLOC((size_t) steps * sizeof(mp_int)); + if (R == NULL) { + return MP_MEM; + } + /* Allocate memory for list of moduli */ + P = (mp_int *) MP_MALLOC((size_t) steps * sizeof(mp_int)); + if (P == NULL) { + MP_FREE_BUF(R, (size_t) steps * sizeof(mp_int)); + return MP_MEM; + } + + /* + The approximation for the reciprocal used in Barrett's method is + R_t = ceil(2^((2^t)*k)/n^(2^t)) + with R_0 = (2^(2*k))/b^y and k = ceil(log_2(n)) as computed above. + */ + + /* To get the tree a bit flatter. Alternative: do it iteratively instead of recursively */ + k = k * MP_RADIX_BARRETT_START_MULTIPLICATOR; + + + /* Compute initial reciprocal R[0] and expand it (R[0]^(2^k) */ + if ((err = mp_init_i32(&P[0], n)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_expt_n(&P[0], MP_RADIX_BARRETT_START_MULTIPLICATOR, &P[0])) != MP_OKAY) goto LTM_ERR; + if ((err = mp_init(&R[0])) != MP_OKAY) goto LTM_ERR; + if ((err = mp_2expt(&R[0], 2*k)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_div(&R[0], &P[0], &R[0], NULL)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_incr(&R[0])) != MP_OKAY) goto LTM_ERR; + + + /* Compute the rest of the reciprocals as needed */ + for (t = 1; t < steps; t++) { + /* P_t = (b^y)^(2^t) = n^(2^t) */ + /* + We cannot just square because it can + a) overflow MP_MAX_DIGIT_COUNT + b) it can get bigger than "a" which it shouldn't + which also means that + c) if it gets bigger than "a" we have all necessary + reciprocals and can break out of the loop + */ + + /* P[t-1]^2 > a is most likely more than just a bit or too, so check if we + can bail out early without actually computing the square. */ + if ((2 * mp_count_bits(&P[t-1])) > ilog2a) { + /* Correct index */ + t--; + break; + } + + /* Compute denominator */ + if ((err = mp_init(&P[t])) != MP_OKAY) goto LTM_ERR; + /* P[t] = P[t-1]^2 */ + if ((err = mp_sqr(&P[t-1], &P[t])) != MP_OKAY) goto LTM_ERR; + /* Check if P[t]^2 > a */ + if (mp_cmp(&P[t],a) == MP_GT) { + /* We don't need P[t] anymore */ + mp_clear(&P[t]); + /* Correct index */ + t--; + break; + } + + /* Compute numerator */ + if ((err = mp_init(&R[t])) != MP_OKAY) goto LTM_ERR; + s = s_mp_compute_s(t + 1, k); + /* Overflow, we have enough divisors */ + if (s == 0) { + break; + } + if ((err = mp_2expt(&(R[t]), s)) != MP_OKAY) goto LTM_ERR; + /* Compute reciprocal */ + /* R[t] = floor(2^(2^t * k) / P[t] */ + if (MP_IS_POWER_OF_TWO(&P[t])) { + if ((err = mp_div_2d(&R[t], mp_count_bits(&P[t]) - 1, &R[t], NULL)) != MP_OKAY) goto LTM_ERR; + } else { + if ((radix == 10) && ((2 * mp_count_bits(&P[t])) > ilog2a)) { + break; + } + if ((err = mp_div(&R[t], &P[t], &R[t], NULL)) != MP_OKAY) goto LTM_ERR; + } + if ((err = mp_incr(&R[t])) != MP_OKAY) goto LTM_ERR; + } + + /* And finally: start the recursion. */ + if ((err = s_mp_to_radix_recursive(a, sptr, &part_maxlen, &part_written, radix, + k, t, false, true, P, R)) != MP_OKAY) goto LTM_ERR; + /* part_written does not account for EOS */ + *written = part_written + 1; + +LTM_ERR: + do { + mp_clear(&P[t]); + mp_clear(&R[t]); + } while (t--); + MP_FREE_BUF(P, (size_t) steps * sizeof(mp_int)); + MP_FREE_BUF(R, (size_t) steps * sizeof(mp_int)); +LTM_ERR0: + mp_clear(&T); + return err; +} + + +#endif diff --git a/s_mp_floor_ilog2.c b/s_mp_floor_ilog2.c new file mode 100644 index 00000000..50d44c3b --- /dev/null +++ b/s_mp_floor_ilog2.c @@ -0,0 +1,18 @@ +#include "tommath_private.h" +#ifdef S_MP_FLOOR_ILOG2_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + + + +unsigned int s_mp_floor_ilog2(int value) +{ + unsigned int r = 0; + while ((value >>= 1) != 0) { + r++; + } + return r; +} + + +#endif diff --git a/s_mp_radix_map.c b/s_mp_radix_map.c index 68e21f32..34d77c43 100644 --- a/s_mp_radix_map.c +++ b/s_mp_radix_map.c @@ -16,4 +16,46 @@ const uint8_t s_mp_radix_map_reverse[] = { 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d /* qrstuvwxyz */ }; MP_STATIC_ASSERT(correct_radix_map_reverse_size, sizeof(s_mp_radix_map_reverse) == MP_RADIX_MAP_REVERSE_SIZE) + +/* TODO: Branch out (preproc) if not used */ +/* Exponents chosen such that b^(y) < 2^15 */ +#if ((INT_MAX < 1048576) || (MP_DIGIT_BIT <= 20) ) +const uint8_t s_mp_radix_exponent_y[] = { 0, 0, /* 0 .. 1*/ + 14, 9, 7, 6, 5, 5, 4, 4, /* 2 .. 9 */ + 4, 4, 4, 3, 3, 3, 3, 3, /* 10 .. 17 */ + 3, 3, 3, 3, 3, 3, 3, 3, /* 18 .. 25 */ + 3, 3, 3, 3, 3, 3, 2, 2, /* 26 .. 33 */ + 2, 2, 2, 2, 2, 2, 2, 2, /* 34 .. 41 */ + 2, 2, 2, 2, 2, 2, 2, 2, /* 42 .. 49 */ + 2, 2, 2, 2, 2, 2, 2, 2, /* 51 .. 57 */ + 2, 2, 2, 2, 2, 2, 2 /* 58 .. 64 */ + }; +#else +/* Exponents chosen such that b^(y) <= 2^20 */ +const uint8_t s_mp_radix_exponent_y[] = { 0, 0, /* 0 .. 1*/ + 20, 12, 10, 8, 7, 7, 6, 6, /* 2 .. 9 */ + 6, 5, 5, 5, 5, 5, 5, 4, /* 10 .. 17 */ + 4, 4, 4, 4, 4, 4, 4, 4, /* 18 .. 25 */ + 4, 4, 4, 4, 4, 4, 4, 3, /* 26 .. 33 */ + 3, 3, 3, 3, 3, 3, 3, 3, /* 34 .. 41 */ + 3, 3, 3, 3, 3, 3, 3, 3, /* 42 .. 49 */ + 3, 3, 3, 3, 3, 3, 3, 3, /* 51 .. 57 */ + 3, 3, 3, 3, 3, 3, 3 /* 58 .. 64 */ + }; +#endif +MP_STATIC_ASSERT(correct_radix_exponent_y, sizeof(s_mp_radix_exponent_y) == MP_RADIX_EXPONENT_Y_SIZE) + +/* floor(log_2(radix)) */ +const uint8_t s_mp_log2_radix[] = { 0, 0, /* 0 .. 1*/ + 1, 1, 2, 2, 2, 2, 3, 3, /* 2 .. 9 */ + 3, 3, 3, 3, 3, 3, 4, 4, /* 10 .. 17 */ + 4, 4, 4, 4, 4, 4, 4, 4, /* 18 .. 25 */ + 4, 4, 4, 4, 4, 4, 5, 5, /* 26 .. 33 */ + 5, 5, 5, 5, 5, 5, 5, 5, /* 34 .. 41 */ + 5, 5, 5, 5, 5, 5, 5, 5, /* 42 .. 49 */ + 5, 5, 5, 5, 5, 5, 5, 5, /* 51 .. 57 */ + 5, 5, 5, 5, 5, 5, 6 /* 58 .. 64 */ + }; +MP_STATIC_ASSERT(correct_log2_radix, sizeof(s_mp_log2_radix) == MP_LOG2_RADIX_SIZE) + #endif diff --git a/s_mp_slower_read_radix.c b/s_mp_slower_read_radix.c new file mode 100644 index 00000000..7e0e74d6 --- /dev/null +++ b/s_mp_slower_read_radix.c @@ -0,0 +1,41 @@ +#include "tommath_private.h" +#ifdef S_MP_SLOWER_READ_RADIX_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +mp_err s_mp_slower_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix) +{ + mp_err err; + size_t i; + + /* checks are done by caller */ + char *_s = (char *)(str + start); + + for (i = start; (i < end) && (*_s != '\0') ; i++) { + uint8_t y; + + char ch = (radix <= 36) ? (char)MP_TOUPPER((int)*_s) : *_s; + unsigned int pos = (unsigned int)(ch - '+'); + + if (MP_RADIX_MAP_REVERSE_SIZE <= pos) { + err = MP_VAL; + goto LBL_ERR; + } + y = s_mp_radix_map_reverse[pos]; + if (y >= radix) { + err = MP_VAL; + goto LBL_ERR; + } + if ((err = mp_mul_d(a, (mp_digit)radix, a)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_add_d(a, (mp_digit)y, a)) != MP_OKAY) goto LBL_ERR; + _s++; + } + + return MP_OKAY; +LBL_ERR: + mp_zero(a); + return err; +} + + +#endif diff --git a/s_mp_slower_to_radix.c b/s_mp_slower_to_radix.c new file mode 100644 index 00000000..a21e6c08 --- /dev/null +++ b/s_mp_slower_to_radix.c @@ -0,0 +1,76 @@ +#include "tommath_private.h" +#ifdef S_MP_SLOWER_TO_RADIX_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +static void s_reverse(char *s, size_t len) +{ + size_t ix = 0, iy = len - 1u; + while (ix < iy) { + MP_EXCH(char, s[ix], s[iy]); + ++ix; + --iy; + } +} + +mp_err s_mp_slower_to_radix(const mp_int *a, char **str, size_t *part_maxlen, size_t *part_written, int radix, bool pad) +{ + size_t digs = 0u; + mp_int t; + mp_digit d; + mp_err err = MP_OKAY; + int ybar = 0; + + /* A temporary pointer to the output string to make reversal simpler */ + char *s = *str; + + /* The number of digits of "radix" to be filled if this chunk is not the most significant one. */ + if (pad) { + ybar = s_mp_radix_exponent_y[radix] * MP_RADIX_BARRETT_START_MULTIPLICATOR; + } + + if ((err = mp_init_copy(&t, a)) != MP_OKAY) goto LTM_ERR; + + while (!mp_iszero(&t)) { + if ((--(*part_maxlen)) < 1u) { + /* no more room */ + err = MP_BUF; + goto LTM_ERR; + } + if ((err = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) goto LTM_ERR; + *s++ = s_mp_radix_map[d]; + ++digs; + if (pad) { + ybar--; + } + } + + /* Fill in leading zeros if this chunk does not contain the most significant digits. */ + if (pad) { + while ((ybar-- > 0) && (((*part_maxlen)--) > 0)) { + *s++ = '0'; + digs++; + } + } + + /* "rewind" */ + s = *str; + /* reverse */ + s_reverse(s, digs); + /* step forward */ + *str += digs; + /* Add EOS at the end of every chunk to allow this function to be used stand-alone */ + **str = '\0'; + + if (part_written != NULL) { + *part_written = *part_written + digs; + } + + err = MP_OKAY; +LTM_ERR: + mp_clear(&t); + return err; +} + + +#endif diff --git a/sources.cmake b/sources.cmake index 103e9c09..14262202 100644 --- a/sources.cmake +++ b/sources.cmake @@ -133,6 +133,9 @@ s_mp_div_school.c s_mp_div_small.c s_mp_exptmod.c s_mp_exptmod_fast.c +s_mp_faster_read_radix.c +s_mp_faster_to_radix.c +s_mp_floor_ilog2.c s_mp_fp_log.c s_mp_fp_log_d.c s_mp_get_bit.c @@ -152,6 +155,8 @@ s_mp_prime_tab.c s_mp_radix_map.c s_mp_radix_size_overestimate.c s_mp_rand_platform.c +s_mp_slower_read_radix.c +s_mp_slower_to_radix.c s_mp_sqr.c s_mp_sqr_comba.c s_mp_sqr_karatsuba.c diff --git a/tommath.def b/tommath.def index ed5aa8b0..2628095c 100644 --- a/tommath.def +++ b/tommath.def @@ -132,3 +132,5 @@ EXPORTS MP_SQR_KARATSUBA_CUTOFF MP_MUL_TOOM_CUTOFF MP_SQR_TOOM_CUTOFF + MP_RADIX_READ_CUTOFF + MP_RADIX_WRITE_CUTOFF diff --git a/tommath.h b/tommath.h index 1820d243..36111da4 100644 --- a/tommath.h +++ b/tommath.h @@ -121,7 +121,9 @@ extern int MP_MUL_KARATSUBA_CUTOFF, MP_SQR_KARATSUBA_CUTOFF, MP_MUL_TOOM_CUTOFF, -MP_SQR_TOOM_CUTOFF; +MP_SQR_TOOM_CUTOFF, +MP_RADIX_READ_CUTOFF, +MP_RADIX_WRITE_CUTOFF; #endif /* define this to use lower memory usage routines (exptmods mostly) */ diff --git a/tommath_class.h b/tommath_class.h index 09bb3ea6..038748a2 100644 --- a/tommath_class.h +++ b/tommath_class.h @@ -142,6 +142,9 @@ # define S_MP_DIV_SMALL_C # define S_MP_EXPTMOD_C # define S_MP_EXPTMOD_FAST_C +# define S_MP_FASTER_READ_RADIX_C +# define S_MP_FASTER_TO_RADIX_C +# define S_MP_FLOOR_ILOG2_C # define S_MP_FP_LOG_C # define S_MP_FP_LOG_D_C # define S_MP_GET_BIT_C @@ -161,6 +164,8 @@ # define S_MP_RADIX_MAP_C # define S_MP_RADIX_SIZE_OVERESTIMATE_C # define S_MP_RAND_PLATFORM_C +# define S_MP_SLOWER_READ_RADIX_C +# define S_MP_SLOWER_TO_RADIX_C # define S_MP_SQR_C # define S_MP_SQR_COMBA_C # define S_MP_SQR_KARATSUBA_C @@ -651,6 +656,7 @@ # define MP_RAND_C # define MP_READ_RADIX_C # define MP_SET_C +# define S_MP_FLOOR_ILOG2_C # define S_MP_PRIME_IS_DIVISIBLE_C #endif @@ -738,9 +744,10 @@ #endif #if defined(MP_READ_RADIX_C) -# define MP_ADD_D_C -# define MP_MUL_D_C # define MP_ZERO_C +# define S_MP_FASTER_READ_RADIX_C +# define S_MP_SLOWER_READ_RADIX_C +# define S_MP_STRLEN_C #endif #if defined(MP_REDUCE_C) @@ -935,9 +942,8 @@ #endif #if defined(MP_TO_RADIX_C) -# define MP_CLEAR_C -# define MP_DIV_D_C -# define MP_INIT_COPY_C +# define S_MP_FASTER_TO_RADIX_C +# define S_MP_SLOWER_TO_RADIX_C #endif #if defined(MP_TO_SBIN_C) @@ -1077,6 +1083,47 @@ # define S_MP_MONTGOMERY_REDUCE_COMBA_C #endif +#if defined(S_MP_FASTER_READ_RADIX_C) +# define MP_ADD_C +# define MP_CLEAR_C +# define MP_CLEAR_MULTI_C +# define MP_EXPT_N_C +# define MP_INIT_MULTI_C +# define MP_INIT_SET_C +# define MP_MUL_2D_C +# define MP_MUL_C +# define S_MP_SLOWER_READ_RADIX_C +#endif + +#if defined(S_MP_FASTER_TO_RADIX_C) +# define MP_2EXPT_C +# define MP_ADD_C +# define MP_ADD_D_C +# define MP_CLEAR_C +# define MP_CLEAR_MULTI_C +# define MP_CMP_C +# define MP_CNT_LSB_C +# define MP_COUNT_BITS_C +# define MP_DIV_2D_C +# define MP_DIV_C +# define MP_EXPT_N_C +# define MP_INIT_C +# define MP_INIT_I32_C +# define MP_INIT_MULTI_C +# define MP_INIT_SET_C +# define MP_MUL_C +# define MP_SUB_C +# define MP_SUB_D_C +# define S_MP_COMPUTE_S_C +# define S_MP_FLOOR_ILOG2_C +# define S_MP_SLOWER_TO_RADIX_C +# define S_MP_TO_RADIX_RECURSIVE_C +# define S_MP_ZERO_BUF_C +#endif + +#if defined(S_MP_FLOOR_ILOG2_C) +#endif + #if defined(S_MP_FP_LOG_C) # define MP_2EXPT_C # define MP_ADD_C @@ -1248,6 +1295,18 @@ #if defined(S_MP_RAND_PLATFORM_C) #endif +#if defined(S_MP_SLOWER_READ_RADIX_C) +# define MP_ADD_D_C +# define MP_MUL_D_C +# define MP_ZERO_C +#endif + +#if defined(S_MP_SLOWER_TO_RADIX_C) +# define MP_CLEAR_C +# define MP_DIV_D_C +# define MP_INIT_COPY_C +#endif + #if defined(S_MP_SQR_C) # define MP_CLAMP_C # define MP_CLEAR_C diff --git a/tommath_cutoffs.h b/tommath_cutoffs.h index fb841601..20f4d4ec 100644 --- a/tommath_cutoffs.h +++ b/tommath_cutoffs.h @@ -11,3 +11,5 @@ #define MP_DEFAULT_SQR_KARATSUBA_CUTOFF 120 #define MP_DEFAULT_MUL_TOOM_CUTOFF 350 #define MP_DEFAULT_SQR_TOOM_CUTOFF 400 +#define MP_DEFAULT_RADIX_READ_CUTOFF 600 +#define MP_DEFAULT_RADIX_WRITE_CUTOFF 600 diff --git a/tommath_private.h b/tommath_private.h index be620dbc..a2c8dd7d 100644 --- a/tommath_private.h +++ b/tommath_private.h @@ -86,8 +86,27 @@ do { \ # define MP_SQR_KARATSUBA_CUTOFF MP_DEFAULT_SQR_KARATSUBA_CUTOFF # define MP_MUL_TOOM_CUTOFF MP_DEFAULT_MUL_TOOM_CUTOFF # define MP_SQR_TOOM_CUTOFF MP_DEFAULT_SQR_TOOM_CUTOFF +# define MP_RADIX_READ_CUTOFF MP_DEFAULT_RADIX_READ_CUTOFF +# define MP_RADIX_WRITE_CUTOFF MP_DEFAULT_RADIX_WRITE_CUTOFF #endif +#ifndef MP_RADIX_BARRETT_START_MULTIPLICATOR +# define MP_RADIX_BARRETT_START_MULTIPLICATOR 10 +/* Better safe than sorry */ +# if (MP_RADIX_BARRETT_START_MULTIPLICATOR <= 0) +# ifdef _MSC_VER +# pragma message("MP_RADIX_BARRETT_START_MULTIPLICATOR must be bigger than zero, setting it to one") +# else +# warning "MP_RADIX_BARRETT_START_MULTIPLICATOR must be bigger than zero, setting it to one" +# endif +# define MP_RADIX_BARRETT_START_MULTIPLICATOR 1 +# endif +#endif + + + + + /* define heap macros */ #ifndef MP_MALLOC /* default to libc stuff */ @@ -234,6 +253,14 @@ MP_PRIVATE mp_err s_mp_radix_size_overestimate(const mp_int *a, const int radix, MP_PRIVATE mp_err s_mp_fp_log(const mp_int *a, mp_int *c) MP_WUR; MP_PRIVATE mp_err s_mp_fp_log_d(const mp_int *a, mp_word *c) MP_WUR; +MP_PRIVATE unsigned int s_mp_floor_ilog2(int value); + +MP_PRIVATE mp_err s_mp_faster_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix) MP_WUR; +MP_PRIVATE mp_err s_mp_slower_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix) MP_WUR; +MP_PRIVATE mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix) MP_WUR; +MP_PRIVATE mp_err s_mp_slower_to_radix(const mp_int *a, char **str, size_t *part_maxlen, size_t *part_written, + int radix, bool pad) MP_WUR; + #ifdef MP_SMALL_STACK_SIZE #if defined(__GNUC__) @@ -276,9 +303,14 @@ MP_PRIVATE void *s_mp_warray_get(void); MP_PRIVATE void s_mp_warray_put(void *w); #define MP_RADIX_MAP_REVERSE_SIZE 80u +#define MP_RADIX_EXPONENT_Y_SIZE 65u +#define MP_LOG2_RADIX_SIZE 65u extern MP_PRIVATE const char s_mp_radix_map[]; extern MP_PRIVATE const uint8_t s_mp_radix_map_reverse[]; extern MP_PRIVATE const mp_digit s_mp_prime_tab[]; +extern MP_PRIVATE const uint8_t s_mp_radix_exponent_y[]; +extern MP_PRIVATE const uint8_t s_mp_log2_radix[]; + /* number of primes */ #define MP_PRIME_TAB_SIZE 256