diff --git a/demo/test.c b/demo/test.c
index 2fa6e08d..9cc8f7cb 100644
--- a/demo/test.c
+++ b/demo/test.c
@@ -4,6 +4,19 @@
 #define S_MP_RAND_JENKINS_C
 #include "s_mp_rand_jenkins.c"
 
+/* TODO: Make it an environment variable via main.yml?
+         This is for testing only, so no reason to add checks to the build process. */
+#ifdef __has_include
+#   if __has_include (<valgrind/valgrind.h>)
+#      include <valgrind/valgrind.h>
+#   else
+#      define RUNNING_ON_VALGRIND 1
+#   endif
+#else
+#   define RUNNING_ON_VALGRIND 1
+#endif
+
+
 static long rand_long(void)
 {
    long x;
@@ -1157,30 +1170,27 @@ static int test_mp_montgomery_reduce(void)
 
 }
 
+#include <time.h>
 static int test_mp_read_radix(void)
 {
    char buf[4096];
-   size_t written;
+   size_t written, maxlen;
 
-   mp_int a;
-   DOR(mp_init_multi(&a, NULL));
+   int bignum, i, j, k, limit_test;
+   char *buffer, *bcpy, *startb;
+   clock_t start, stop, t_slow, t_fast;
+   double slow = 0.0, fast = 0.0, sum_slow = 0.0, sum_fast = 0.0;
+   double s_bases_slow[65] = {0.0};
+   double s_bases_fast[65] = {0.0};
+
+   mp_int a, b, c;
+   DOR(mp_init_multi(&a, &b, &c, NULL));
 
    DO(mp_read_radix(&a, "123456", 10));
 
    DO(mp_to_radix(&a, buf, sizeof(buf), &written, 10));
    printf(" '123456' a == %s, length = %zu", buf, written);
 
-   /* See comment in mp_to_radix.c */
-   /*
-      if( (err = mp_to_radix(&a, buf, 3u, &written, 10) ) != MP_OKAY)              goto LBL_ERR;
-      printf(" '56' a == %s, length = %zu\n", buf, written);
-
-      if( (err = mp_to_radix(&a, buf, 4u, &written, 10) ) != MP_OKAY)              goto LBL_ERR;
-      printf(" '456' a == %s, length = %zu\n", buf, written);
-      if( (err = mp_to_radix(&a, buf, 30u, &written, 10) ) != MP_OKAY)             goto LBL_ERR;
-      printf(" '123456' a == %s, length = %zu, error = %s\n",
-             buf, written, mp_error_to_string(err));
-   */
    DO(mp_read_radix(&a, "-123456", 10));
    DO(mp_to_radix(&a, buf, sizeof(buf), &written, 10));
    printf("\r '-123456' a == %s, length = %zu", buf, written);
@@ -1198,10 +1208,81 @@ static int test_mp_read_radix(void)
       printf("%s, %lu\n", buf, (unsigned long)a.dp[0] & 3uL);
    }
 
-   mp_clear(&a);
+   /* Safe a bit of testing time */
+   if (RUNNING_ON_VALGRIND != 0) {
+      limit_test = 2000;
+   } else {
+      limit_test = 6000;
+   }
+
+   /* Test the fast method with a slightly larger number (about a minute on an older machine) */
+   for (k = 100; k < limit_test; k += 1000) {
+      bignum = k;
+      buffer = (char *)malloc((size_t)(bignum + 2));
+      if (buffer == NULL) {
+         goto LBL_ERR;
+      }
+      DO(mp_rand(&a, bignum / MP_DIGIT_BIT));
+      for (i = 2; i < 65; i++) {
+         start = clock();
+         for (j = 0; j < 100; j++) {
+            DO(mp_to_radix(&a, buffer, (size_t)(bignum + 1), &written, i));
+            mp_zero(&b);
+            DO(mp_read_radix(&b, buffer, i));
+            /* Check roundabout */
+            EXPECT(mp_cmp(&a, &b) == MP_EQ);
+         }
+         stop = clock();
+         t_fast = stop - start;
+
+         start = clock();
+         for (j = 0; j < 100; j++) {
+            maxlen = (size_t)(bignum + 1);
+            bcpy = buffer;
+            /* s_mp_slower_to_radix is very rudimentary and needs some help to work as a stand-alone */
+            startb = bcpy;
+            DO(s_mp_slower_to_radix(&a, &bcpy, &maxlen, &written, i, false));
+            bcpy = startb;
+            mp_zero(&c);
+            DO(s_mp_slower_read_radix(&c, bcpy, 0, strlen(bcpy), i));
+            /* Check roundabout */
+            EXPECT(mp_cmp(&a, &c) == MP_EQ);
+            /* Check against result of fast algorithms above */
+            EXPECT(mp_cmp(&b, &c) == MP_EQ);
+         }
+         stop = clock();
+         t_slow = stop - start;
+
+         slow = (double)t_slow/(double)CLOCKS_PER_SEC;
+         fast = (double)t_fast/(double)CLOCKS_PER_SEC;
+
+         fprintf(stderr,"Bits %d Base %d SLOW: %.10f, FAST: %.10f\n", mp_count_bits(&a), i, slow, fast);
+
+         sum_slow += slow;
+         sum_fast += fast;
+         s_bases_slow[i] += slow;
+         s_bases_fast[i] += fast;
+      }
+      free(buffer);
+   }
+
+   fprintf(stderr,"\nSUM: SLOW: %.10f, FAST: %.10f\n",sum_slow, sum_fast);
+
+   for (i = 2; i < 65; i++) {
+      fprintf(stderr,"Sums for Base %d SLOW: %.10f, FAST: %.10f\n",i, s_bases_slow[i], s_bases_fast[i]);
+   }
+
+   /* Valgrind overhead does not allow for timings. */
+   if ((RUNNING_ON_VALGRIND == 0) && (MP_DIGIT_BIT >= 20)) {
+      /* Very basic check if the fast algorithms are actually faster. */
+      EXPECT(sum_slow > sum_fast);
+   }
+
+
+   mp_clear_multi(&a, &b, &c, NULL);
    return EXIT_SUCCESS;
 LBL_ERR:
-   mp_clear(&a);
+   mp_clear_multi(&a, &b, &c, NULL);
    return EXIT_FAILURE;
 }
 
@@ -2583,7 +2664,7 @@ static int unit_tests(int argc, char **argv)
       T1(mp_prime_next_prime, MP_PRIME_NEXT_PRIME),
       T1(mp_prime_rand, MP_PRIME_RAND),
       T1(mp_rand, MP_RAND),
-      T1(mp_read_radix, MP_READ_RADIX),
+      T2(mp_read_radix,ONLY_PUBLIC_API, MP_READ_RADIX),
       T1(mp_read_write_ubin, MP_TO_UBIN),
       T1(mp_read_write_sbin, MP_TO_SBIN),
       T1(mp_reduce_2k, MP_REDUCE_2K),
@@ -2600,13 +2681,16 @@ static int unit_tests(int argc, char **argv)
       T3(s_mp_div_recursive, ONLY_PUBLIC_API, S_MP_DIV_RECURSIVE, S_MP_DIV_SCHOOL),
       T3(s_mp_div_small, ONLY_PUBLIC_API, S_MP_DIV_SMALL, S_MP_DIV_SCHOOL),
       T2(s_mp_sqr, ONLY_PUBLIC_API, S_MP_SQR),
+
       /* s_mp_mul_comba not (yet) testable because s_mp_mul branches to s_mp_mul_comba automatically */
+
       T2(s_mp_sqr_comba, ONLY_PUBLIC_API, S_MP_SQR_COMBA),
       T2(s_mp_mul_balance, ONLY_PUBLIC_API, S_MP_MUL_BALANCE),
       T2(s_mp_mul_karatsuba, ONLY_PUBLIC_API, S_MP_MUL_KARATSUBA),
       T2(s_mp_sqr_karatsuba, ONLY_PUBLIC_API, S_MP_SQR_KARATSUBA),
       T2(s_mp_mul_toom, ONLY_PUBLIC_API, S_MP_MUL_TOOM),
       T2(s_mp_sqr_toom, ONLY_PUBLIC_API, S_MP_SQR_TOOM)
+
 #undef T3
 #undef T2
 #undef T1
diff --git a/etc/tune.c b/etc/tune.c
index 8ad20289..3f44cb12 100644
--- a/etc/tune.c
+++ b/etc/tune.c
@@ -148,6 +148,169 @@ static uint64_t s_time_sqr(int size)
    return t1;
 }
 
+/* Set cutoff for radix conversion (base 10 only for now but should be good enough) */
+#include <stdlib.h>
+static mp_err random_number(char **string, size_t length)
+{
+   char alphabet[] = "0123456789", *str_cpy;
+
+   *string = malloc(length + 1);
+   if (*string == NULL) {
+      return MP_MEM;
+   }
+   str_cpy = *string;
+   /* No leading zeros */
+   do {
+      *str_cpy = alphabet[rand() % 10];
+   } while (*str_cpy == '0');
+   length--;
+   str_cpy++;
+
+   do {
+      *str_cpy = alphabet[rand() % 10];
+      str_cpy++;
+   } while (--length > 0);
+
+   *str_cpy = '\0';
+
+   return MP_OKAY;
+}
+
+#include <string.h>
+static uint64_t s_time_radix_conversion_read(int size)
+{
+   int x;
+   size_t length;
+   size_t written;
+   mp_err  err;
+   mp_int  a;
+   char *str_a, *str_b;
+   uint64_t t1;
+
+   /* "size" is given as "number of limbs" and starts at 8 */
+   length = (size_t)(size * MP_DIGIT_BIT);
+
+   /* Over-estimate number of base 10 digits
+      Magick number: 28/93 = CF(log_10(2))_(p_3, q_3)
+    */
+   written = (length * 28u);
+   /* May happen e.g. if size > 2184  with MP_16BIT
+      but cutoff should be about a couple of thousand bits
+      at most (around or above Karatsuba cutoff).
+    */
+   if (length != written / 28u) {
+      t1 = UINT64_MAX;
+      goto LBL_ERR_1;
+   }
+   length = written / 93u + 2u;
+
+   if ((err = random_number(&str_a, length)) != MP_OKAY) {
+      t1 = UINT64_MAX;
+      goto LBL_ERR_1;
+   }
+
+   if ((err = mp_init(&a)) != MP_OKAY) {
+      t1 = UINT64_MAX;
+      goto LBL_ERR_2;
+   }
+   s_timer_start();
+   for (x = 0; x < s_number_of_test_loops; x++) {
+      if ((err = mp_read_radix(&a, str_a, 10)) != MP_OKAY) {
+         t1 = UINT64_MAX;
+         goto LBL_ERR_3;
+      }
+   }
+   t1 = s_timer_stop();
+
+   if ((err = mp_radix_size(&a, 10, &length)) != MP_OKAY) {
+      t1 = UINT64_MAX;
+      goto LBL_ERR_3;
+   }
+
+   str_b = malloc(length + 1);
+   if (str_b == NULL) {
+      t1 = UINT64_MAX;
+      goto LBL_ERR_3;
+   }
+   if ((err = mp_to_radix(&a, str_b, length, &written, 10)) != MP_OKAY) {
+      t1 = UINT64_MAX;
+      goto LBL_ERR;
+   }
+
+   if (strcmp(str_a, str_b) != 0) {
+      t1 = 0u;
+      goto LBL_ERR;
+   }
+
+LBL_ERR:
+   free(str_b);
+LBL_ERR_3:
+   mp_clear(&a);
+LBL_ERR_2:
+   free(str_a);
+LBL_ERR_1:
+   return t1;
+}
+
+static uint64_t s_time_radix_conversion_write(int size)
+{
+   int x;
+   size_t written, length;
+   mp_err  err;
+   mp_int  a, b;
+   char *str_a;
+   uint64_t t1;
+
+
+   if ((err = mp_init_multi(&a, &b, NULL)) != MP_OKAY) {
+      t1 = UINT64_MAX;
+      goto LBL_ERR_1;
+   }
+   if ((err = mp_rand(&a, size)) != MP_OKAY) {
+      t1 = UINT64_MAX;
+      goto LBL_ERR_2;
+   }
+
+   if ((err = mp_radix_size(&a, 10, &length)) != MP_OKAY) {
+      t1 = UINT64_MAX;
+      goto LBL_ERR_2;
+   }
+
+   str_a = malloc(length + 1);
+   if (str_a == NULL) {
+      t1 = UINT64_MAX;
+      goto LBL_ERR_2;
+   }
+
+   s_timer_start();
+   for (x = 0; x < s_number_of_test_loops; x++) {
+      if ((err =  mp_to_radix(&a, str_a, length, &written, 10)) != MP_OKAY) {
+         t1 = UINT64_MAX;
+         goto LBL_ERR_2;
+      }
+   }
+   t1 = s_timer_stop();
+
+   if ((err = mp_read_radix(&b, str_a, 10)) != MP_OKAY) {
+      t1 = UINT64_MAX;
+      goto LBL_ERR;
+   }
+
+   if (mp_cmp(&a, &b) != MP_EQ) {
+      t1 = 0u;
+      goto LBL_ERR;
+   }
+
+
+LBL_ERR:
+   free(str_a);
+LBL_ERR_2:
+   mp_clear_multi(&a, &b, NULL);
+LBL_ERR_1:
+   return t1;
+}
+
+
 struct tune_args {
    int testmode;
    int verbose;
@@ -238,11 +401,13 @@ static void s_usage(char *s)
    fprintf(stderr,"             (Not for computing the cut-offs!)\n");
    fprintf(stderr,"          -s 'preset' use values in 'preset' for printing.\n");
    fprintf(stderr,"             'preset' is a comma separated string with cut-offs for\n");
-   fprintf(stderr,"             ksm, kss, tc3m, tc3s in that order\n");
+   fprintf(stderr,"             ksm, kss, tc3m, tc3s, rcr, rcw in that order\n");
    fprintf(stderr,"             ksm  = karatsuba multiplication\n");
    fprintf(stderr,"             kss  = karatsuba squaring\n");
    fprintf(stderr,"             tc3m = Toom-Cook 3-way multiplication\n");
    fprintf(stderr,"             tc3s = Toom-Cook 3-way squaring\n");
+   fprintf(stderr,"             rcr = Fast radix conversion, reading\n");
+   fprintf(stderr,"             rcw = Fast radix conversion, writing\n");
    fprintf(stderr,"             Implies '-p'\n");
    fprintf(stderr,"          -h this message\n");
    exit(s_exit_code);
@@ -251,10 +416,11 @@ static void s_usage(char *s)
 struct cutoffs {
    int MUL_KARATSUBA, SQR_KARATSUBA;
    int MUL_TOOM, SQR_TOOM;
+   int RADIX_READ, RADIX_WRITE;
 };
 
 const struct cutoffs max_cutoffs =
-{ INT_MAX, INT_MAX, INT_MAX, INT_MAX };
+{ INT_MAX, INT_MAX, INT_MAX, INT_MAX,INT_MAX, INT_MAX };
 
 static void set_cutoffs(const struct cutoffs *c)
 {
@@ -262,6 +428,8 @@ static void set_cutoffs(const struct cutoffs *c)
    MP_SQR_KARATSUBA_CUTOFF = c->SQR_KARATSUBA;
    MP_MUL_TOOM_CUTOFF = c->MUL_TOOM;
    MP_SQR_TOOM_CUTOFF = c->SQR_TOOM;
+   MP_RADIX_READ_CUTOFF = c->RADIX_READ;
+   MP_RADIX_WRITE_CUTOFF = c->RADIX_WRITE;
 }
 
 static void get_cutoffs(struct cutoffs *c)
@@ -270,7 +438,8 @@ static void get_cutoffs(struct cutoffs *c)
    c->SQR_KARATSUBA  = MP_SQR_KARATSUBA_CUTOFF;
    c->MUL_TOOM = MP_MUL_TOOM_CUTOFF;
    c->SQR_TOOM = MP_SQR_TOOM_CUTOFF;
-
+   c->RADIX_READ = MP_RADIX_READ_CUTOFF;
+   c->RADIX_WRITE = MP_RADIX_WRITE_CUTOFF;
 }
 
 int main(int argc, char **argv)
@@ -416,13 +585,17 @@ int main(int argc, char **argv)
                s_usage(argv[0]);
             }
             str = argv[opt];
-            MP_MUL_KARATSUBA_CUTOFF = (int)s_strtol(str, &endptr, "[1/4] No value for MP_MUL_KARATSUBA_CUTOFF given");
+            MP_MUL_KARATSUBA_CUTOFF = (int)s_strtol(str, &endptr, "[1/6] No value for MP_MUL_KARATSUBA_CUTOFF given");
+            str = endptr + 1;
+            MP_SQR_KARATSUBA_CUTOFF = (int)s_strtol(str, &endptr, "[2/6] No value for MP_SQR_KARATSUBA_CUTOFF given");
             str = endptr + 1;
-            MP_SQR_KARATSUBA_CUTOFF = (int)s_strtol(str, &endptr, "[2/4] No value for MP_SQR_KARATSUBA_CUTOFF given");
+            MP_MUL_TOOM_CUTOFF = (int)s_strtol(str, &endptr, "[3/6] No value for MP_MUL_TOOM_CUTOFF given");
             str = endptr + 1;
-            MP_MUL_TOOM_CUTOFF = (int)s_strtol(str, &endptr, "[3/4] No value for MP_MUL_TOOM_CUTOFF given");
+            MP_SQR_TOOM_CUTOFF = (int)s_strtol(str, &endptr, "[4/6] No value for MP_SQR_TOOM_CUTOFF given");
             str = endptr + 1;
-            MP_SQR_TOOM_CUTOFF = (int)s_strtol(str, &endptr, "[4/4] No value for MP_SQR_TOOM_CUTOFF given");
+            MP_RADIX_READ_CUTOFF = (int)s_strtol(str, &endptr, "[5/6] No value for MP_RADIX_READ_CUTOFF given");
+            str = endptr + 1;
+            MP_RADIX_WRITE_CUTOFF = (int)s_strtol(str, &endptr, "[6/6] No value for MP_RADIX_WRITE_CUTOFF given");
             break;
          case 'h':
             s_exit_code = EXIT_SUCCESS;
@@ -461,31 +634,64 @@ int main(int argc, char **argv)
          T_MUL_SQR("Karatsuba squaring", SQR_KARATSUBA, s_time_sqr),
          T_MUL_SQR("Toom-Cook 3-way multiplying", MUL_TOOM, s_time_mul),
          T_MUL_SQR("Toom-Cook 3-way squaring", SQR_TOOM, s_time_sqr),
+         /* TODO: adapt macro above (or the names of the cutoffs and/or functions) */
+         {
+            "\"Faster radix conversion (reading)\"", &MP_RADIX_READ_CUTOFF,
+            &(updated.RADIX_READ),MP_HAS(S_MP_FASTER_READ_RADIX) ? s_time_radix_conversion_read : NULL
+         },
+         {
+            "\"Faster radix conversion (writing)\"", &MP_RADIX_WRITE_CUTOFF,
+            &(updated.RADIX_WRITE),MP_HAS(S_MP_FASTER_TO_RADIX) ? s_time_radix_conversion_write : NULL
+         }
+
 #undef T_MUL_SQR
       };
       /* Turn all limits from bncore.c to the max */
       set_cutoffs(&max_cutoffs);
-      for (n = 0; n < sizeof(test)/sizeof(test[0]); ++n) {
+
+      for (n = 0; n < (sizeof(test)/sizeof(test[0]) - 2); ++n) {
          if (test[n].fn != NULL) {
             s_run(test[n].name, test[n].fn, test[n].cutoff);
             *test[n].update = *test[n].cutoff;
             *test[n].cutoff = INT_MAX;
+         };
+      }
+
+      /* We need the updated fast multiplication cutoffs for the radix conversion, set them */
+      for (n = 0; n < (sizeof(test)/sizeof(test[0]) - 2); ++n) {
+         if (test[n].fn != NULL) {
+            *test[n].cutoff = *test[n].update;
+         };
+      }
+
+      /* Cutoffs for radix conversions are in bits to make handling of 62 different radices easier  */
+      for (; n < sizeof(test)/sizeof(test[0]); ++n) {
+         if (test[n].fn != NULL) {
+            s_run(test[n].name, test[n].fn, test[n].cutoff);
+            /* TODO: can overflow for small INT_MAX */
+            *test[n].update = (*test[n].cutoff) * MP_DIGIT_BIT;
+            *test[n].cutoff = INT_MAX;
          }
       }
+
    }
    if (args.terse == 1) {
-      printf("%d %d %d %d\n",
+      printf("%d %d %d %d %d %d\n",
              updated.MUL_KARATSUBA,
              updated.SQR_KARATSUBA,
              updated.MUL_TOOM,
-             updated.SQR_TOOM);
+             updated.SQR_TOOM,
+             updated.RADIX_READ,
+             updated.RADIX_WRITE);
    } else {
       printf("MUL_KARATSUBA_CUTOFF = %d\n", updated.MUL_KARATSUBA);
       printf("SQR_KARATSUBA_CUTOFF = %d\n", updated.SQR_KARATSUBA);
       printf("MUL_TOOM_CUTOFF = %d\n", updated.MUL_TOOM);
       printf("SQR_TOOM_CUTOFF = %d\n", updated.SQR_TOOM);
+      printf("RADIX_READ_CUTOFF = %d\n", updated.RADIX_READ);
+      printf("RADIX_WRITE_CUTOFF = %d\n", updated.RADIX_WRITE);
    }
-
+   /* TODO: add graphs for radix conversion, too? */
    if (args.print == 1) {
       printf("Printing data for graphing to \"%s\" and \"%s\"\n",mullog, sqrlog);
 
diff --git a/etc/tune_it.sh b/etc/tune_it.sh
index dba5b696..625a20be 100755
--- a/etc/tune_it.sh
+++ b/etc/tune_it.sh
@@ -56,7 +56,7 @@ KEEP_TEMP=1
 echo "You might like to watch the numbers go up to $LIMIT but it will take a long time!"
 
 # Might not have sufficient rights or disc full.
-echo "km ks tc3m tc3s" > $FILE_NAME || die "Writing header to $FILE_NAME" $?
+echo "km ks tc3m tc3s rcr rcw" > $FILE_NAME || die "Writing header to $FILE_NAME" $?
 i=1
 while [ $i -le $LIMIT ]; do
    RNUM=$(LCG)
@@ -104,3 +104,23 @@ echo "#define MP_DEFAULT_MUL_TOOM_CUTOFF      $TMP" >> $TOMMATH_CUTOFFS_H || die
 TMP=$(median $FILE_NAME 4 $i)
 echo "#define MP_DEFAULT_SQR_TOOM_CUTOFF      $TMP"
 echo "#define MP_DEFAULT_SQR_TOOM_CUTOFF      $TMP" >> $TOMMATH_CUTOFFS_H || die "(tc3s) Appending to $TOMMATH_CUTOFFS_H" $?
+
+TMP=$(median $FILE_NAME 5 $i)
+echo "#define MP_DEFAULT_RADIX_READ_CUTOFF      $TMP"
+echo "#define MP_DEFAULT_RADIX_READ_CUTOFF      $TMP" >> $TOMMATH_CUTOFFS_H || die "(rcr) Appending to $TOMMATH_CUTOFFS_H" $?
+TMP=$(median $FILE_NAME 6 $i)
+echo "#define MP_DEFAULT_RADIX_WRITE_CUTOFF      $TMP"
+echo "#define MP_DEFAULT_RADIX_WRITE_CUTOFF      $TMP" >> $TOMMATH_CUTOFFS_H || die "(rcw) Appending to $TOMMATH_CUTOFFS_H" $?
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/helper.pl b/helper.pl
index ffc592a7..7026f53c 100755
--- a/helper.pl
+++ b/helper.pl
@@ -476,6 +476,8 @@ sub generate_def {
     MP_SQR_KARATSUBA_CUTOFF
     MP_MUL_TOOM_CUTOFF
     MP_SQR_TOOM_CUTOFF
+    MP_RADIX_READ_CUTOFF
+    MP_RADIX_WRITE_CUTOFF
 ";
     return 0;
 }
diff --git a/libtommath_VS2008.vcproj b/libtommath_VS2008.vcproj
index 71dd3807..56b6f044 100644
--- a/libtommath_VS2008.vcproj
+++ b/libtommath_VS2008.vcproj
@@ -836,6 +836,18 @@
 			RelativePath="s_mp_exptmod_fast.c"
 			>
 		</File>
+		<File
+			RelativePath="s_mp_faster_read_radix.c"
+			>
+		</File>
+		<File
+			RelativePath="s_mp_faster_to_radix.c"
+			>
+		</File>
+		<File
+			RelativePath="s_mp_floor_ilog2.c"
+			>
+		</File>
 		<File
 			RelativePath="s_mp_fp_log.c"
 			>
@@ -912,6 +924,14 @@
 			RelativePath="s_mp_rand_platform.c"
 			>
 		</File>
+		<File
+			RelativePath="s_mp_slower_read_radix.c"
+			>
+		</File>
+		<File
+			RelativePath="s_mp_slower_to_radix.c"
+			>
+		</File>
 		<File
 			RelativePath="s_mp_sqr.c"
 			>
diff --git a/makefile b/makefile
index 8f211f5f..92fa9690 100644
--- a/makefile
+++ b/makefile
@@ -44,13 +44,14 @@ mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o
 mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \
 mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \
 mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \
-s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o \
-s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o \
-s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o \
-s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o \
-s_mp_radix_map.o s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o \
-s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o s_mp_warray_get.o s_mp_warray_put.o \
-s_mp_zero_buf.o s_mp_zero_digs.o
+s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o \
+s_mp_faster_read_radix.o s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o \
+s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
+s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
+s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \
+s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o \
+s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o \
+s_mp_warray_get.o s_mp_warray_put.o s_mp_zero_buf.o s_mp_zero_digs.o
 
 #END_INS
 
diff --git a/makefile.mingw b/makefile.mingw
index e2445e8a..f90d3ecc 100644
--- a/makefile.mingw
+++ b/makefile.mingw
@@ -46,13 +46,14 @@ mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o
 mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \
 mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \
 mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \
-s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o \
-s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o \
-s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o \
-s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o \
-s_mp_radix_map.o s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o \
-s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o s_mp_warray_get.o s_mp_warray_put.o \
-s_mp_zero_buf.o s_mp_zero_digs.o
+s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o \
+s_mp_faster_read_radix.o s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o \
+s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
+s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
+s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \
+s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o \
+s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o \
+s_mp_warray_get.o s_mp_warray_put.o s_mp_zero_buf.o s_mp_zero_digs.o
 
 HEADERS_PUB=tommath.h
 HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB)
diff --git a/makefile.msvc b/makefile.msvc
index 8feb425c..60462911 100644
--- a/makefile.msvc
+++ b/makefile.msvc
@@ -42,13 +42,14 @@ mp_reduce_setup.obj mp_root_n.obj mp_rshd.obj mp_sbin_size.obj mp_set.obj mp_set
 mp_set_l.obj mp_set_u32.obj mp_set_u64.obj mp_set_ul.obj mp_shrink.obj mp_signed_rsh.obj mp_sqrmod.obj mp_sqrt.obj \
 mp_sqrtmod_prime.obj mp_sub.obj mp_sub_d.obj mp_submod.obj mp_to_radix.obj mp_to_sbin.obj mp_to_ubin.obj mp_ubin_size.obj \
 mp_unpack.obj mp_warray_free.obj mp_xor.obj mp_zero.obj s_mp_add.obj s_mp_copy_digs.obj s_mp_div_3.obj \
-s_mp_div_recursive.obj s_mp_div_school.obj s_mp_div_small.obj s_mp_exptmod.obj s_mp_exptmod_fast.obj s_mp_fp_log.obj \
-s_mp_fp_log_d.obj s_mp_get_bit.obj s_mp_invmod.obj s_mp_invmod_odd.obj s_mp_log_2expt.obj \
-s_mp_montgomery_reduce_comba.obj s_mp_mul.obj s_mp_mul_balance.obj s_mp_mul_comba.obj s_mp_mul_high.obj \
-s_mp_mul_high_comba.obj s_mp_mul_karatsuba.obj s_mp_mul_toom.obj s_mp_prime_is_divisible.obj s_mp_prime_tab.obj \
-s_mp_radix_map.obj s_mp_radix_size_overestimate.obj s_mp_rand_platform.obj s_mp_sqr.obj s_mp_sqr_comba.obj \
-s_mp_sqr_karatsuba.obj s_mp_sqr_toom.obj s_mp_sub.obj s_mp_warray.obj s_mp_warray_get.obj s_mp_warray_put.obj \
-s_mp_zero_buf.obj s_mp_zero_digs.obj
+s_mp_div_recursive.obj s_mp_div_school.obj s_mp_div_small.obj s_mp_exptmod.obj s_mp_exptmod_fast.obj \
+s_mp_faster_read_radix.obj s_mp_faster_to_radix.obj s_mp_floor_ilog2.obj s_mp_fp_log.obj s_mp_fp_log_d.obj \
+s_mp_get_bit.obj s_mp_invmod.obj s_mp_invmod_odd.obj s_mp_log_2expt.obj s_mp_montgomery_reduce_comba.obj s_mp_mul.obj \
+s_mp_mul_balance.obj s_mp_mul_comba.obj s_mp_mul_high.obj s_mp_mul_high_comba.obj s_mp_mul_karatsuba.obj \
+s_mp_mul_toom.obj s_mp_prime_is_divisible.obj s_mp_prime_tab.obj s_mp_radix_map.obj \
+s_mp_radix_size_overestimate.obj s_mp_rand_platform.obj s_mp_slower_read_radix.obj s_mp_slower_to_radix.obj \
+s_mp_sqr.obj s_mp_sqr_comba.obj s_mp_sqr_karatsuba.obj s_mp_sqr_toom.obj s_mp_sub.obj s_mp_warray.obj \
+s_mp_warray_get.obj s_mp_warray_put.obj s_mp_zero_buf.obj s_mp_zero_digs.obj
 
 HEADERS_PUB=tommath.h
 HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB)
diff --git a/makefile.shared b/makefile.shared
index 50c33526..887303a3 100644
--- a/makefile.shared
+++ b/makefile.shared
@@ -41,13 +41,14 @@ mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o
 mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \
 mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \
 mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \
-s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o \
-s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o \
-s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o \
-s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o \
-s_mp_radix_map.o s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o \
-s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o s_mp_warray_get.o s_mp_warray_put.o \
-s_mp_zero_buf.o s_mp_zero_digs.o
+s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o \
+s_mp_faster_read_radix.o s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o \
+s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
+s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
+s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \
+s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o \
+s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o \
+s_mp_warray_get.o s_mp_warray_put.o s_mp_zero_buf.o s_mp_zero_digs.o
 
 #END_INS
 
diff --git a/makefile.unix b/makefile.unix
index 58642098..00bc3265 100644
--- a/makefile.unix
+++ b/makefile.unix
@@ -47,13 +47,14 @@ mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o
 mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \
 mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \
 mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \
-s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o \
-s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o \
-s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o \
-s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o \
-s_mp_radix_map.o s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o \
-s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o s_mp_warray_get.o s_mp_warray_put.o \
-s_mp_zero_buf.o s_mp_zero_digs.o
+s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o \
+s_mp_faster_read_radix.o s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o \
+s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
+s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
+s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \
+s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o \
+s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o \
+s_mp_warray_get.o s_mp_warray_put.o s_mp_zero_buf.o s_mp_zero_digs.o
 
 
 HEADERS_PUB=tommath.h
diff --git a/mp_cutoffs.c b/mp_cutoffs.c
index 45b0beec..6e6bf005 100644
--- a/mp_cutoffs.c
+++ b/mp_cutoffs.c
@@ -8,7 +8,9 @@
 int MP_MUL_KARATSUBA_CUTOFF = MP_DEFAULT_MUL_KARATSUBA_CUTOFF,
     MP_SQR_KARATSUBA_CUTOFF = MP_DEFAULT_SQR_KARATSUBA_CUTOFF,
     MP_MUL_TOOM_CUTOFF = MP_DEFAULT_MUL_TOOM_CUTOFF,
-    MP_SQR_TOOM_CUTOFF = MP_DEFAULT_SQR_TOOM_CUTOFF;
+    MP_SQR_TOOM_CUTOFF = MP_DEFAULT_SQR_TOOM_CUTOFF,
+    MP_RADIX_READ_CUTOFF = MP_DEFAULT_RADIX_READ_CUTOFF,
+    MP_RADIX_WRITE_CUTOFF = MP_DEFAULT_RADIX_WRITE_CUTOFF;
 #endif
 
 #endif
diff --git a/mp_fread.c b/mp_fread.c
index 53c35e82..baf539f6 100644
--- a/mp_fread.c
+++ b/mp_fread.c
@@ -4,10 +4,11 @@
 /* SPDX-License-Identifier: Unlicense */
 
 #ifndef MP_NO_FILE
+
 /* read a bigint from a file stream in ASCII */
 mp_err mp_fread(mp_int *a, int radix, FILE *stream)
 {
-   mp_err err;
+   mp_err err = MP_OKAY;
    mp_sign sign = MP_ZPOS;
    int ch;
 
@@ -47,19 +48,17 @@ mp_err mp_fread(mp_int *a, int radix, FILE *stream)
       }
 
       /* shift up and add */
-      if ((err = mp_mul_d(a, (mp_digit)radix, a)) != MP_OKAY) {
-         return err;
-      }
-      if ((err = mp_add_d(a, y, a)) != MP_OKAY) {
-         return err;
-      }
+      if ((err = mp_mul_d(a, (mp_digit)radix, a)) != MP_OKAY)                                          goto LBL_ERR;
+      if ((err = mp_add_d(a, y, a)) != MP_OKAY)                                                        goto LBL_ERR;
+
    } while ((ch = fgetc(stream)) != EOF);
 
    if (!mp_iszero(a)) {
       a->sign = sign;
    }
 
-   return MP_OKAY;
+LBL_ERR:
+   return err;
 }
 #endif
 
diff --git a/mp_fwrite.c b/mp_fwrite.c
index 8ea9d327..93f59d58 100644
--- a/mp_fwrite.c
+++ b/mp_fwrite.c
@@ -7,7 +7,7 @@
 mp_err mp_fwrite(const mp_int *a, int radix, FILE *stream)
 {
    char *buf;
-   mp_err err;
+   mp_err err = MP_OKAY;
    size_t size, written;
 
    if ((err = mp_radix_size_overestimate(a, radix, &size)) != MP_OKAY) {
diff --git a/mp_prime_is_prime.c b/mp_prime_is_prime.c
index bb24f594..99bfb0f8 100644
--- a/mp_prime_is_prime.c
+++ b/mp_prime_is_prime.c
@@ -3,16 +3,6 @@
 /* LibTomMath, multiple-precision integer library -- Tom St Denis */
 /* SPDX-License-Identifier: Unlicense */
 
-/* portable integer log of two with small footprint */
-static unsigned int s_floor_ilog2(int value)
-{
-   unsigned int r = 0;
-   while ((value >>= 1) != 0) {
-      r++;
-   }
-   return r;
-}
-
 mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result)
 {
    mp_int  b;
@@ -186,7 +176,7 @@ mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result)
        * Hence the ugly type-fiddling in the following code.
        */
       size_a = mp_count_bits(a);
-      mask = (1u << s_floor_ilog2(size_a)) - 1u;
+      mask = (1u << s_mp_floor_ilog2(size_a)) - 1u;
       /*
          Assuming the General Rieman hypothesis (never thought to write that in a
          comment) the upper bound can be lowered to  2*(log a)^2.
diff --git a/mp_read_radix.c b/mp_read_radix.c
index 28e6eb60..adfbc70d 100644
--- a/mp_read_radix.c
+++ b/mp_read_radix.c
@@ -3,11 +3,29 @@
 /* LibTomMath, multiple-precision integer library -- Tom St Denis */
 /* SPDX-License-Identifier: Unlicense */
 
+#ifdef MP_USE_MEMOPS
+#  include <string.h>
+#  define MP_STRLEN(s) strlen(s)
+#else
+static size_t s_mp_strlen(const char *s)
+{
+   const char *p;
+   p = s;
+   while (*p != '\0') {
+      p++;
+   }
+   return (size_t)(p - s);
+}
+#  define MP_STRLEN(s) s_mp_strlen(s)
+#endif
+
 /* read a string [ASCII] in a given radix */
 mp_err mp_read_radix(mp_int *a, const char *str, int radix)
 {
-   mp_err   err;
+
+   mp_err   err = MP_OKAY;
    mp_sign  sign = MP_ZPOS;
+   size_t slen, slen_2;
 
    /* make sure the radix is ok */
    if ((radix < 2) || (radix > 64)) {
@@ -22,48 +40,67 @@ mp_err mp_read_radix(mp_int *a, const char *str, int radix)
       sign = MP_NEG;
    }
 
-   /* set the integer to the default of zero */
-   mp_zero(a);
+   slen = MP_STRLEN(str);
+   /* "slen" is log_b(str) with b = radix and with log_a(x) = log_b(x)/log_b(a) we can use log_2(str) = slen/log_b(2)
+      but we do not have floats (there is a fixed point version of log_2(x) in s_mp_fp_log_d.c, though).
+      We could use a table with rational approximations instead which costs quite some stack-memory because
+      we would need it for every MP_DIGIT_BIT size..
 
-   /* process each digit of the string */
-   while (*str != '\0') {
-      /* if the radix <= 36 the conversion is case insensitive
-       * this allows numbers like 1AB and 1ab to represent the same  value
-       * [e.g. in hex]
-       */
-      uint8_t y;
-      char ch = (radix <= 36) ? (char)MP_TOUPPER((int)*str) : *str;
-      unsigned pos = (unsigned)(ch - '+');
-      if (MP_RADIX_MAP_REVERSE_SIZE <= pos) {
-         break;
-      }
-      y = s_mp_radix_map_reverse[pos];
-
-      /* if the char was found in the map
-       * and is less than the given radix add it
-       * to the number, otherwise exit the loop.
-       */
-      if (y >= radix) {
-         break;
-      }
-      if ((err = mp_mul_d(a, (mp_digit)radix, a)) != MP_OKAY) {
-         return err;
-      }
-      if ((err = mp_add_d(a, y, a)) != MP_OKAY) {
-         return err;
-      }
-      ++str;
+      So to keep things simple we restrict our length checks to bases 10 and powers of two for now.
+
+      Bases that are a power of two are the easiest: just multiply "slen" with log_2(radix) to get the bits
+      necessary to compute the bit-size. The result shall not be  bigger than (MP_MAX_DIGIT_COUNT - 2) * MP_DIGIT_BIT.
+      This is in almost all cases an overestimate  because the MSD is most likely not full but only by a couple of bits,
+      at most 63 in case of radix = 64.  That means that at most 5 limbs (MP_16BIT), 3 limbs (MP_28BIT and MP_31BIT),
+      or 2 limbs (MP_64BIT) are going to waste.
+
+      For base 10 and (INT_MAX = 2^(31) - 1) the limits are:
+      MP_DIGIT_BIT   limit     tested          notes
+      15              2183       y        (with INT_MAX = 32767) which is MP_MAX_DIGIT_COUNT - 1
+      28            76695844     y
+      31            69273664     n        Not tested because there is no FFT for MP_31BIT and I
+                                          don't have the patients of a Buddha.
+      60            35791392     y
+
+   */
+
+   if (MP_IS_2EXPT((unsigned int)radix) &&
+       ((slen * (size_t) s_mp_log2_radix[radix]) > ((MP_MAX_DIGIT_COUNT - 2) * MP_DIGIT_BIT))) {
+      return MP_OVF;
+   } else  if ((radix == 10) && (slen >
+#if (MP_DIGIT_BIT == 15)
+                                 2183
+#elif (MP_DIGIT_BIT == 28)
+                                 76695844
+#elif (MP_DIGIT_BIT == 31)
+                                 69273664
+#elif (MP_DIGIT_BIT == 60)
+                                 35791392
+#endif
+                                )) {
+      return MP_OVF;
    }
 
-   /* if an illegal character was found, fail. */
-   if ((*str != '\0') && (*str != '\r') && (*str != '\n')) {
-      return MP_VAL;
+   /* Roughly (over)estimate bit-size for cutoff by assuming slen to be ceil(log_{radix}(input))
+      so  bits(slen) ~ slen_{radix} * ceil(log_2(radix)) */
+   slen_2 = slen * (size_t)(s_mp_log2_radix[radix] + 1);
+
+   mp_zero(a);
+
+   /* Try faster version first */
+   if (MP_HAS(S_MP_FASTER_READ_RADIX) && (slen_2 < (size_t)MP_RADIX_READ_CUTOFF)) {
+      if ((err = s_mp_faster_read_radix(a, str, 0, slen, radix)) != MP_OKAY)                             goto LTM_ERR;
+   } else if (MP_HAS(S_MP_SLOWER_READ_RADIX)) {
+      if ((err = s_mp_slower_read_radix(a, str, 0, slen, radix)) != MP_OKAY)                             goto LTM_ERR;
    }
 
    /* set the sign only if a != 0 */
    if (!mp_iszero(a)) {
       a->sign = sign;
    }
-   return MP_OKAY;
+
+LTM_ERR:
+   return err;
 }
+
 #endif
diff --git a/mp_to_radix.c b/mp_to_radix.c
index 1e5e6711..09435ac8 100644
--- a/mp_to_radix.c
+++ b/mp_to_radix.c
@@ -3,17 +3,6 @@
 /* LibTomMath, multiple-precision integer library -- Tom St Denis */
 /* SPDX-License-Identifier: Unlicense */
 
-/* reverse an array, used for radix code */
-static void s_reverse(char *s, size_t len)
-{
-   size_t ix = 0, iy = len - 1u;
-   while (ix < iy) {
-      MP_EXCH(char, s[ix], s[iy]);
-      ++ix;
-      --iy;
-   }
-}
-
 /* stores a bignum as a ASCII string in a given radix (2..64)
  *
  * Stores upto "size - 1" chars and always a NULL byte, puts the number of characters
@@ -21,11 +10,9 @@ static void s_reverse(char *s, size_t len)
  */
 mp_err mp_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix)
 {
-   size_t  digs;
-   mp_err  err;
-   mp_int  t;
-   mp_digit d;
-   char   *_s = str;
+   mp_err err;
+   mp_int a_bar = *a;
+   size_t part_written = 0;
 
    /* check range of radix and size*/
    if (maxlen < 2u) {
@@ -35,6 +22,11 @@ mp_err mp_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, i
       return MP_VAL;
    }
 
+   /* Check upper limits */
+   if (!MP_IS_2EXPT((unsigned int)radix)  && (a->used > (MP_MAX_DIGIT_COUNT - 4))) {
+      return MP_OVF;
+   }
+
    /* quick out if its zero */
    if (mp_iszero(a)) {
       *str++ = '0';
@@ -45,50 +37,33 @@ mp_err mp_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, i
       return MP_OKAY;
    }
 
-   if ((err = mp_init_copy(&t, a)) != MP_OKAY) {
-      return err;
-   }
-
    /* if it is negative output a - */
-   if (mp_isneg(&t)) {
-      /* we have to reverse our digits later... but not the - sign!! */
-      ++_s;
-
+   if (mp_isneg(a)) {
       /* store the flag and mark the number as positive */
       *str++ = '-';
-      t.sign = MP_ZPOS;
+      a_bar.sign = MP_ZPOS;
 
       /* subtract a char */
       --maxlen;
    }
-   digs = 0u;
-   while (!mp_iszero(&t)) {
-      if (--maxlen < 1u) {
-         /* no more room */
-         err = MP_BUF;
-         goto LBL_ERR;
-      }
-      if ((err = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) {
-         goto LBL_ERR;
-      }
-      *str++ = s_mp_radix_map[d];
-      ++digs;
-   }
-   /* reverse the digits of the string.  In this case _s points
-    * to the first digit [excluding the sign] of the number
-    */
-   s_reverse(_s, digs);
 
-   /* append a NULL so the string is properly terminated */
-   *str = '\0';
-   digs++;
+
+   if (MP_HAS(S_MP_FASTER_TO_RADIX) && (a->used > (MP_RADIX_WRITE_CUTOFF / MP_DIGIT_BIT))) {
+      if ((err = s_mp_faster_to_radix(&a_bar, str, maxlen, &part_written, radix)) != MP_OKAY)            goto LBL_ERR;
+   } else if (MP_HAS(S_MP_SLOWER_TO_RADIX)) {
+      char *start = str;
+      if ((err = s_mp_slower_to_radix(&a_bar, &str, &maxlen, &part_written, radix, false)) != MP_OKAY) goto LBL_ERR;
+      str = start;
+      /* part_written does not count EOS */
+      part_written++;
+   }
 
    if (written != NULL) {
-      *written = mp_isneg(a) ? (digs + 1u): digs;
+      part_written += mp_isneg(a) ? 1: 0;
+      *written = part_written;
    }
 
 LBL_ERR:
-   mp_clear(&t);
    return err;
 }
 
diff --git a/s_mp_faster_read_radix.c b/s_mp_faster_read_radix.c
new file mode 100644
index 00000000..99e2d9c8
--- /dev/null
+++ b/s_mp_faster_read_radix.c
@@ -0,0 +1,46 @@
+#include "tommath_private.h"
+#ifdef S_MP_FASTER_READ_RADIX_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis */
+/* SPDX-License-Identifier: Unlicense */
+
+
+mp_err s_mp_faster_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix)
+{
+   size_t len, mid;
+   mp_int A, B, m;
+   mp_digit radix_ = (mp_digit)radix;
+   mp_err err = MP_OKAY;
+
+   len = end - start;
+
+   if ((len * (size_t)(s_mp_log2_radix[radix]) + 1) < (size_t)MP_RADIX_READ_CUTOFF) {
+      return s_mp_slower_read_radix(a, str, start, end, radix);
+   }
+   mid = len / 2u;
+
+   if ((err = mp_init_set(&m, radix_)) != MP_OKAY) {
+      return err;
+   }
+   if ((err = mp_init_multi(&A, &B, NULL)) != MP_OKAY) {
+      mp_clear(&m);
+      return err;
+   }
+
+   if ((err = s_mp_faster_read_radix(&A, str, start, start + mid + 1, radix)) != MP_OKAY)                goto LTM_ERR;
+   if ((err = s_mp_faster_read_radix(&B, str, start + mid + 1, end, radix)) != MP_OKAY)                  goto LTM_ERR;
+
+   if (MP_IS_2EXPT((unsigned int)radix_)) {
+      if ((err = mp_mul_2d(&A, (int)(((len - mid) - 1u) * s_mp_log2_radix[radix_]), &A)) != MP_OKAY)goto LTM_ERR;
+   } else {
+      if ((err = mp_expt_n(&m, (int)((len - mid) - 1u), &m)) != MP_OKAY)                                 goto LTM_ERR;
+      if ((err = mp_mul(&A, &m, &A)) != MP_OKAY)                                                         goto LTM_ERR;
+   }
+   if ((err = mp_add(&A, &B, a)) != MP_OKAY)                                                             goto LTM_ERR;
+
+LTM_ERR:
+   mp_clear_multi(&A, &B, &m, NULL);
+   return err;
+}
+
+
+#endif
diff --git a/s_mp_faster_to_radix.c b/s_mp_faster_to_radix.c
new file mode 100644
index 00000000..8a6bb69f
--- /dev/null
+++ b/s_mp_faster_to_radix.c
@@ -0,0 +1,252 @@
+#include "tommath_private.h"
+#ifdef S_MP_FASTER_TO_RADIX_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis */
+/* SPDX-License-Identifier: Unlicense */
+
+static int s_mp_compute_s(int t, int k)
+{
+   uint32_t r = 0u;
+   int log2_intmax, log2_k;
+
+   log2_k = (int)s_mp_floor_ilog2(k) + 1;
+   log2_intmax = (int)s_mp_floor_ilog2(INT_MAX) + 1;
+
+   /* Rough first check for overflow */
+   if (t > (log2_intmax - log2_k)) {
+      return 0;
+   }
+
+   r = 1u << t;
+   r = r * (uint32_t)k;
+
+   /* Final check for overflow */
+   return (r > (MP_MAX_DIGIT_COUNT * MP_DIGIT_BIT)) ? 0 : (int)r;
+}
+
+static mp_err s_mp_to_radix_recursive(const mp_int *a, char **str, size_t *part_maxlen, size_t *part_written,
+                                      int radix, int32_t k, int32_t t, bool pad, bool first, mp_int *P, mp_int *R)
+{
+   mp_int r, q, a1;
+   mp_err err;
+   int Beta;
+
+   if (t < 0) {
+      /* Print the string from the number given */
+      if ((err = s_mp_slower_to_radix(a, str, part_maxlen, part_written, radix, pad)) != MP_OKAY)        goto LTM_ERR;
+   } else {
+      if ((err = mp_init_multi(&q, &r, &a1, NULL)) != MP_OKAY)                                           goto LTM_ERR;
+      if (MP_IS_POWER_OF_TWO(&P[t])) {
+         if ((err = mp_div_2d(a, mp_count_bits(&P[t]) - 1, &q, &r)) != MP_OKAY)                          goto LTM_ERR;
+      } else if (first) {
+         /* Largest division, only one time, no reason for Barret division in the first place */
+         if ((err = mp_div(a, &P[t], &q, &r)) != MP_OKAY)                                                goto LTM_ERR;
+         /* Release early and often, they say. */
+         mp_clear(&P[t]);
+         mp_clear(&R[t]);
+      } else {
+         /*
+            Barrett reduction. A step by step proof can be found at
+            https://www.nayuki.io/page/barrett-reduction-algorithm
+
+            See also: Modern Computer Arithmetic, version 0.5.9, page 59
+          */
+         Beta = (int)s_mp_compute_s(t+1, k);
+         if (Beta == 0) {
+            err = MP_OVF;
+            goto LTM_ERR;
+         }
+         /* Q = floor(A1 * I / 2^Beta) */
+         /* I = floor( (2^(2*Beta)) / B) Here we have R[t] = I, P[t] = B */
+         /* TODO: We don't need the full "a" only  the upper part: a = a_1\beta + a_0 with 0 < a_0 < \beta
+                  The cutoff with s_mp_mul_high is so low that the gap between that and the general cutoff
+                  is too small to be worth the hassle.
+                  But if somebody implements Thom Mulder's short products...
+                  (There are successors. See e.g. D. Harvey and P. Zimmermann "Short Division of Long Integers",
+                   Laszlo Hars "Fast Truncated Multiplication for Cryptographic Applications", Daniel Lemire "Exact Short
+                   Products From Truncated Multipliers", and many^Wsome more.
+          */
+         if ((err = mp_mul(a, &R[t], &q)) != MP_OKAY)                                                    goto LTM_ERR;
+         if ((err = mp_div_2d(&q, Beta, &q, NULL)) != MP_OKAY)                                           goto LTM_ERR;
+
+         /* R = A - Q*B */
+         /* TODO: Q*B can be a low short-product */
+         if ((err = mp_mul(&q, &P[t], &r)) != MP_OKAY)                                                   goto LTM_ERR;
+         if ((err = mp_sub(a, &r, &r)) != MP_OKAY)                                                       goto LTM_ERR;
+
+         /* We can use this simple correction because of the way we computed the reciprocal */
+         if (r.sign == MP_NEG) {
+            if ((err = mp_decr(&q)) != MP_OKAY)                                                          goto LTM_ERR;
+            if ((err = mp_add(&r, &P[t], &r)) != MP_OKAY)                                                goto LTM_ERR;
+         }
+      }
+      /* Go down the lists while climbing up the tree. */
+      t--;
+
+      if (mp_iszero(&q) && (!pad)) {
+         if ((err = s_mp_to_radix_recursive(&r, str, part_maxlen, part_written, radix,
+                                            k, t, false, false, P, R)) != MP_OKAY)                       goto LTM_ERR;
+      } else {
+         if ((err = s_mp_to_radix_recursive(&q, str, part_maxlen, part_written, radix,
+                                            k, t,  pad, false, P, R)) != MP_OKAY)                        goto LTM_ERR;
+         if ((err = s_mp_to_radix_recursive(&r, str, part_maxlen, part_written, radix,
+                                            k, t, true, false, P, R)) != MP_OKAY)                        goto LTM_ERR;
+      }
+      mp_clear_multi(&q, &r, &a1, NULL);
+   }
+
+   err = MP_OKAY;
+LTM_ERR:
+   return err;
+}
+
+mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix)
+{
+   mp_err err = MP_OKAY;
+   int n = 0, k, t = 0, steps = 0, ilog2a, s;
+
+   /* Use given buffer directly, no temporary buffers for the individual chunks */
+   char **sptr = &str;
+   /* Size of the chunk */
+   size_t part_written = 0;
+   size_t part_maxlen = maxlen;
+
+   /* List of reciprocals */
+   mp_int *R = NULL;
+   /* List of moduli */
+   mp_int *P = NULL;
+
+   mp_int T;
+
+
+   /* Denominator for the reciprocal: b^y. */
+   if ((err = mp_init_set(&T, (mp_digit)radix)) != MP_OKAY) {
+      return err;
+   }
+   if ((err = mp_expt_n(&T, (int)s_mp_radix_exponent_y[radix],  &T)) != MP_OKAY)                         goto LTM_ERR0;
+   n = (int)T.dp[0];
+
+   /* Numerator of the reciprocal: ceil(log_2(n)) */
+   k = (int)s_mp_floor_ilog2(n) + 1;
+
+   /* steps = floor(log_2(floor(log_2(a))))*/
+   ilog2a = mp_count_bits(a) - 1;
+
+   /* Cutoff at about twice the size of P[0]. */
+   if (ilog2a < (2 * k * MP_RADIX_BARRETT_START_MULTIPLICATOR)) {
+      if ((err = s_mp_slower_to_radix(a, sptr, &part_maxlen, &part_written, radix, false)) != MP_OKAY)   goto LTM_ERR;
+      /* part_written does not count EOS */
+      *written = part_written + 1;
+      mp_clear(&T);
+      return err;
+   }
+   /*
+      floor(log_2(floor(log_2(a)))) is a bit too much (we do not start at zero)
+      but we check for the end inside the loop and the list is just a list of pointers,
+      not much memory wasted here if we allocate too much steps.
+    */
+   steps  = (int)s_mp_floor_ilog2(ilog2a) + 1;
+   /* Allocate memory for list of reciprocals */
+   R = (mp_int *) MP_MALLOC((size_t) steps * sizeof(mp_int));
+   if (R == NULL) {
+      return MP_MEM;
+   }
+   /* Allocate memory for list of moduli */
+   P = (mp_int *) MP_MALLOC((size_t) steps * sizeof(mp_int));
+   if (P == NULL) {
+      MP_FREE_BUF(R, (size_t) steps * sizeof(mp_int));
+      return MP_MEM;
+   }
+
+   /*
+      The approximation for the reciprocal used in Barrett's method is
+          R_t = ceil(2^((2^t)*k)/n^(2^t))
+      with R_0 = (2^(2*k))/b^y and k = ceil(log_2(n)) as computed above.
+    */
+
+   /* To get the tree a bit flatter. Alternative: do it iteratively instead of recursively */
+   k = k * MP_RADIX_BARRETT_START_MULTIPLICATOR;
+
+
+   /* Compute initial reciprocal R[0] and expand it (R[0]^(2^k) */
+   if ((err = mp_init_i32(&P[0], n)) != MP_OKAY)                                                         goto LTM_ERR;
+   if ((err = mp_expt_n(&P[0], MP_RADIX_BARRETT_START_MULTIPLICATOR, &P[0])) != MP_OKAY)                 goto LTM_ERR;
+   if ((err = mp_init(&R[0])) != MP_OKAY)                                                                goto LTM_ERR;
+   if ((err = mp_2expt(&R[0], 2*k)) != MP_OKAY)                                                          goto LTM_ERR;
+   if ((err = mp_div(&R[0], &P[0], &R[0], NULL)) != MP_OKAY)                                             goto LTM_ERR;
+   if ((err = mp_incr(&R[0])) != MP_OKAY)                                                                goto LTM_ERR;
+
+
+   /* Compute the rest of the reciprocals as needed */
+   for (t = 1; t < steps; t++) {
+      /* P_t = (b^y)^(2^t) = n^(2^t) */
+      /*
+         We cannot just square because it can
+            a) overflow MP_MAX_DIGIT_COUNT
+            b) it can get bigger than "a" which it shouldn't
+               which also means that
+            c) if it gets bigger than "a" we have all necessary
+               reciprocals and can break out of the loop
+      */
+
+      /* P[t-1]^2 > a is most likely more than just a bit or too, so check if we
+         can bail out early without actually computing the square. */
+      if ((2 * mp_count_bits(&P[t-1])) > ilog2a) {
+         /* Correct index */
+         t--;
+         break;
+      }
+
+      /* Compute denominator */
+      if ((err = mp_init(&P[t])) != MP_OKAY)                                                             goto LTM_ERR;
+      /* P[t] = P[t-1]^2 */
+      if ((err = mp_sqr(&P[t-1], &P[t])) != MP_OKAY)                                                     goto LTM_ERR;
+      /* Check if P[t]^2 > a */
+      if (mp_cmp(&P[t],a) == MP_GT) {
+         /* We don't need P[t] anymore */
+         mp_clear(&P[t]);
+         /* Correct index */
+         t--;
+         break;
+      }
+
+      /* Compute numerator */
+      if ((err = mp_init(&R[t])) != MP_OKAY)                                                            goto LTM_ERR;
+      s = s_mp_compute_s(t + 1, k);
+      /* Overflow, we have enough divisors */
+      if (s == 0) {
+         break;
+      }
+      if ((err = mp_2expt(&(R[t]), s)) != MP_OKAY)                                                    goto LTM_ERR;
+      /* Compute reciprocal */
+      /* R[t] = floor(2^(2^t * k) / P[t] */
+      if (MP_IS_POWER_OF_TWO(&P[t])) {
+         if ((err = mp_div_2d(&R[t], mp_count_bits(&P[t]) - 1, &R[t], NULL)) != MP_OKAY)              goto LTM_ERR;
+      } else {
+         if ((radix == 10) && ((2 * mp_count_bits(&P[t])) > ilog2a)) {
+            break;
+         }
+         if ((err = mp_div(&R[t], &P[t], &R[t], NULL)) != MP_OKAY)                                    goto LTM_ERR;
+      }
+      if ((err = mp_incr(&R[t])) != MP_OKAY)                                                          goto LTM_ERR;
+   }
+
+   /* And finally: start the recursion. */
+   if ((err = s_mp_to_radix_recursive(a, sptr, &part_maxlen, &part_written, radix,
+                                      k, t, false, true, P, R)) != MP_OKAY)                              goto LTM_ERR;
+   /* part_written does not account for EOS */
+   *written = part_written + 1;
+
+LTM_ERR:
+   do {
+      mp_clear(&P[t]);
+      mp_clear(&R[t]);
+   } while (t--);
+   MP_FREE_BUF(P, (size_t) steps * sizeof(mp_int));
+   MP_FREE_BUF(R, (size_t) steps * sizeof(mp_int));
+LTM_ERR0:
+   mp_clear(&T);
+   return err;
+}
+
+
+#endif
diff --git a/s_mp_floor_ilog2.c b/s_mp_floor_ilog2.c
new file mode 100644
index 00000000..50d44c3b
--- /dev/null
+++ b/s_mp_floor_ilog2.c
@@ -0,0 +1,18 @@
+#include "tommath_private.h"
+#ifdef S_MP_FLOOR_ILOG2_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis */
+/* SPDX-License-Identifier: Unlicense */
+
+
+
+unsigned int s_mp_floor_ilog2(int value)
+{
+   unsigned int r = 0;
+   while ((value >>= 1) != 0) {
+      r++;
+   }
+   return r;
+}
+
+
+#endif
diff --git a/s_mp_radix_map.c b/s_mp_radix_map.c
index 68e21f32..34d77c43 100644
--- a/s_mp_radix_map.c
+++ b/s_mp_radix_map.c
@@ -16,4 +16,46 @@ const uint8_t s_mp_radix_map_reverse[] = {
    0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d  /* qrstuvwxyz */
 };
 MP_STATIC_ASSERT(correct_radix_map_reverse_size, sizeof(s_mp_radix_map_reverse) == MP_RADIX_MAP_REVERSE_SIZE)
+
+/* TODO: Branch out (preproc) if not used */
+/* Exponents chosen such that b^(y) < 2^15 */
+#if ((INT_MAX < 1048576) || (MP_DIGIT_BIT <= 20) )
+const uint8_t s_mp_radix_exponent_y[] = {  0, 0,                      /*  0 .. 1*/
+                                           14,  9,  7, 6, 5, 5, 4, 4, /*  2 .. 9 */
+                                           4, 4, 4, 3, 3, 3, 3, 3,    /* 10 .. 17 */
+                                           3, 3, 3, 3, 3, 3, 3, 3,    /* 18 .. 25 */
+                                           3, 3, 3, 3, 3, 3, 2, 2,    /* 26 .. 33 */
+                                           2, 2, 2, 2, 2, 2, 2, 2,    /* 34 .. 41 */
+                                           2, 2, 2, 2, 2, 2, 2, 2,    /* 42 .. 49 */
+                                           2, 2, 2, 2, 2, 2, 2, 2,    /* 51 .. 57 */
+                                           2, 2, 2, 2, 2, 2, 2        /* 58 .. 64 */
+                                        };
+#else
+/* Exponents chosen such that b^(y) <= 2^20 */
+const uint8_t s_mp_radix_exponent_y[] = {  0, 0,                      /*  0 .. 1*/
+                                           20, 12, 10, 8, 7, 7, 6, 6, /*  2 .. 9 */
+                                           6, 5, 5, 5, 5, 5, 5, 4,    /* 10 .. 17 */
+                                           4, 4, 4, 4, 4, 4, 4, 4,    /* 18 .. 25 */
+                                           4, 4, 4, 4, 4, 4, 4, 3,    /* 26 .. 33 */
+                                           3, 3, 3, 3, 3, 3, 3, 3,    /* 34 .. 41 */
+                                           3, 3, 3, 3, 3, 3, 3, 3,    /* 42 .. 49 */
+                                           3, 3, 3, 3, 3, 3, 3, 3,    /* 51 .. 57 */
+                                           3, 3, 3, 3, 3, 3, 3        /* 58 .. 64 */
+                                        };
+#endif
+MP_STATIC_ASSERT(correct_radix_exponent_y, sizeof(s_mp_radix_exponent_y) == MP_RADIX_EXPONENT_Y_SIZE)
+
+/* floor(log_2(radix)) */
+const uint8_t s_mp_log2_radix[] = { 0, 0,                   /*  0 .. 1*/
+                                    1, 1, 2, 2, 2, 2, 3, 3, /*  2 .. 9 */
+                                    3, 3, 3, 3, 3, 3, 4, 4, /* 10 .. 17 */
+                                    4, 4, 4, 4, 4, 4, 4, 4, /* 18 .. 25 */
+                                    4, 4, 4, 4, 4, 4, 5, 5, /* 26 .. 33 */
+                                    5, 5, 5, 5, 5, 5, 5, 5, /* 34 .. 41 */
+                                    5, 5, 5, 5, 5, 5, 5, 5, /* 42 .. 49 */
+                                    5, 5, 5, 5, 5, 5, 5, 5, /* 51 .. 57 */
+                                    5, 5, 5, 5, 5, 5, 6     /* 58 .. 64 */
+                                  };
+MP_STATIC_ASSERT(correct_log2_radix, sizeof(s_mp_log2_radix) == MP_LOG2_RADIX_SIZE)
+
 #endif
diff --git a/s_mp_slower_read_radix.c b/s_mp_slower_read_radix.c
new file mode 100644
index 00000000..7e0e74d6
--- /dev/null
+++ b/s_mp_slower_read_radix.c
@@ -0,0 +1,41 @@
+#include "tommath_private.h"
+#ifdef S_MP_SLOWER_READ_RADIX_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis */
+/* SPDX-License-Identifier: Unlicense */
+
+mp_err s_mp_slower_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix)
+{
+   mp_err err;
+   size_t i;
+
+   /* checks are done by caller */
+   char *_s = (char *)(str + start);
+
+   for (i = start; (i < end) && (*_s != '\0')  ; i++) {
+      uint8_t y;
+
+      char ch = (radix <= 36) ? (char)MP_TOUPPER((int)*_s) : *_s;
+      unsigned int pos = (unsigned int)(ch - '+');
+
+      if (MP_RADIX_MAP_REVERSE_SIZE <= pos) {
+         err = MP_VAL;
+         goto LBL_ERR;
+      }
+      y = s_mp_radix_map_reverse[pos];
+      if (y >= radix) {
+         err = MP_VAL;
+         goto LBL_ERR;
+      }
+      if ((err = mp_mul_d(a, (mp_digit)radix, a)) != MP_OKAY)                                          goto LBL_ERR;
+      if ((err = mp_add_d(a, (mp_digit)y, a)) != MP_OKAY)                                              goto LBL_ERR;
+      _s++;
+   }
+
+   return MP_OKAY;
+LBL_ERR:
+   mp_zero(a);
+   return err;
+}
+
+
+#endif
diff --git a/s_mp_slower_to_radix.c b/s_mp_slower_to_radix.c
new file mode 100644
index 00000000..a21e6c08
--- /dev/null
+++ b/s_mp_slower_to_radix.c
@@ -0,0 +1,76 @@
+#include "tommath_private.h"
+#ifdef S_MP_SLOWER_TO_RADIX_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis */
+/* SPDX-License-Identifier: Unlicense */
+
+static void s_reverse(char *s, size_t len)
+{
+   size_t ix = 0, iy = len - 1u;
+   while (ix < iy) {
+      MP_EXCH(char, s[ix], s[iy]);
+      ++ix;
+      --iy;
+   }
+}
+
+mp_err s_mp_slower_to_radix(const mp_int *a, char **str, size_t *part_maxlen, size_t *part_written, int radix, bool pad)
+{
+   size_t digs = 0u;
+   mp_int t;
+   mp_digit d;
+   mp_err err = MP_OKAY;
+   int ybar = 0;
+
+   /* A temporary pointer to the output string to make reversal simpler */
+   char *s = *str;
+
+   /* The number of digits of "radix" to be filled if this chunk is not the most significant one. */
+   if (pad) {
+      ybar = s_mp_radix_exponent_y[radix] * MP_RADIX_BARRETT_START_MULTIPLICATOR;
+   }
+
+   if ((err = mp_init_copy(&t, a)) != MP_OKAY)                                                           goto LTM_ERR;
+
+   while (!mp_iszero(&t)) {
+      if ((--(*part_maxlen)) < 1u) {
+         /* no more room */
+         err = MP_BUF;
+         goto LTM_ERR;
+      }
+      if ((err = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY)                                    goto LTM_ERR;
+      *s++ = s_mp_radix_map[d];
+      ++digs;
+      if (pad) {
+         ybar--;
+      }
+   }
+
+   /* Fill in leading zeros if this chunk does not contain the most significant digits. */
+   if (pad) {
+      while ((ybar-- > 0) && (((*part_maxlen)--) > 0)) {
+         *s++ = '0';
+         digs++;
+      }
+   }
+
+   /* "rewind" */
+   s = *str;
+   /* reverse */
+   s_reverse(s, digs);
+   /* step forward */
+   *str += digs;
+   /* Add EOS at the end of every chunk to allow this function to be used stand-alone */
+   **str = '\0';
+
+   if (part_written != NULL) {
+      *part_written = *part_written + digs;
+   }
+
+   err = MP_OKAY;
+LTM_ERR:
+   mp_clear(&t);
+   return err;
+}
+
+
+#endif
diff --git a/sources.cmake b/sources.cmake
index 103e9c09..14262202 100644
--- a/sources.cmake
+++ b/sources.cmake
@@ -133,6 +133,9 @@ s_mp_div_school.c
 s_mp_div_small.c
 s_mp_exptmod.c
 s_mp_exptmod_fast.c
+s_mp_faster_read_radix.c
+s_mp_faster_to_radix.c
+s_mp_floor_ilog2.c
 s_mp_fp_log.c
 s_mp_fp_log_d.c
 s_mp_get_bit.c
@@ -152,6 +155,8 @@ s_mp_prime_tab.c
 s_mp_radix_map.c
 s_mp_radix_size_overestimate.c
 s_mp_rand_platform.c
+s_mp_slower_read_radix.c
+s_mp_slower_to_radix.c
 s_mp_sqr.c
 s_mp_sqr_comba.c
 s_mp_sqr_karatsuba.c
diff --git a/tommath.def b/tommath.def
index ed5aa8b0..2628095c 100644
--- a/tommath.def
+++ b/tommath.def
@@ -132,3 +132,5 @@ EXPORTS
     MP_SQR_KARATSUBA_CUTOFF
     MP_MUL_TOOM_CUTOFF
     MP_SQR_TOOM_CUTOFF
+    MP_RADIX_READ_CUTOFF
+    MP_RADIX_WRITE_CUTOFF
diff --git a/tommath.h b/tommath.h
index 1820d243..36111da4 100644
--- a/tommath.h
+++ b/tommath.h
@@ -121,7 +121,9 @@ extern int
 MP_MUL_KARATSUBA_CUTOFF,
 MP_SQR_KARATSUBA_CUTOFF,
 MP_MUL_TOOM_CUTOFF,
-MP_SQR_TOOM_CUTOFF;
+MP_SQR_TOOM_CUTOFF,
+MP_RADIX_READ_CUTOFF,
+MP_RADIX_WRITE_CUTOFF;
 #endif
 
 /* define this to use lower memory usage routines (exptmods mostly) */
diff --git a/tommath_class.h b/tommath_class.h
index 09bb3ea6..038748a2 100644
--- a/tommath_class.h
+++ b/tommath_class.h
@@ -142,6 +142,9 @@
 #   define S_MP_DIV_SMALL_C
 #   define S_MP_EXPTMOD_C
 #   define S_MP_EXPTMOD_FAST_C
+#   define S_MP_FASTER_READ_RADIX_C
+#   define S_MP_FASTER_TO_RADIX_C
+#   define S_MP_FLOOR_ILOG2_C
 #   define S_MP_FP_LOG_C
 #   define S_MP_FP_LOG_D_C
 #   define S_MP_GET_BIT_C
@@ -161,6 +164,8 @@
 #   define S_MP_RADIX_MAP_C
 #   define S_MP_RADIX_SIZE_OVERESTIMATE_C
 #   define S_MP_RAND_PLATFORM_C
+#   define S_MP_SLOWER_READ_RADIX_C
+#   define S_MP_SLOWER_TO_RADIX_C
 #   define S_MP_SQR_C
 #   define S_MP_SQR_COMBA_C
 #   define S_MP_SQR_KARATSUBA_C
@@ -651,6 +656,7 @@
 #   define MP_RAND_C
 #   define MP_READ_RADIX_C
 #   define MP_SET_C
+#   define S_MP_FLOOR_ILOG2_C
 #   define S_MP_PRIME_IS_DIVISIBLE_C
 #endif
 
@@ -738,9 +744,10 @@
 #endif
 
 #if defined(MP_READ_RADIX_C)
-#   define MP_ADD_D_C
-#   define MP_MUL_D_C
 #   define MP_ZERO_C
+#   define S_MP_FASTER_READ_RADIX_C
+#   define S_MP_SLOWER_READ_RADIX_C
+#   define S_MP_STRLEN_C
 #endif
 
 #if defined(MP_REDUCE_C)
@@ -935,9 +942,8 @@
 #endif
 
 #if defined(MP_TO_RADIX_C)
-#   define MP_CLEAR_C
-#   define MP_DIV_D_C
-#   define MP_INIT_COPY_C
+#   define S_MP_FASTER_TO_RADIX_C
+#   define S_MP_SLOWER_TO_RADIX_C
 #endif
 
 #if defined(MP_TO_SBIN_C)
@@ -1077,6 +1083,47 @@
 #   define S_MP_MONTGOMERY_REDUCE_COMBA_C
 #endif
 
+#if defined(S_MP_FASTER_READ_RADIX_C)
+#   define MP_ADD_C
+#   define MP_CLEAR_C
+#   define MP_CLEAR_MULTI_C
+#   define MP_EXPT_N_C
+#   define MP_INIT_MULTI_C
+#   define MP_INIT_SET_C
+#   define MP_MUL_2D_C
+#   define MP_MUL_C
+#   define S_MP_SLOWER_READ_RADIX_C
+#endif
+
+#if defined(S_MP_FASTER_TO_RADIX_C)
+#   define MP_2EXPT_C
+#   define MP_ADD_C
+#   define MP_ADD_D_C
+#   define MP_CLEAR_C
+#   define MP_CLEAR_MULTI_C
+#   define MP_CMP_C
+#   define MP_CNT_LSB_C
+#   define MP_COUNT_BITS_C
+#   define MP_DIV_2D_C
+#   define MP_DIV_C
+#   define MP_EXPT_N_C
+#   define MP_INIT_C
+#   define MP_INIT_I32_C
+#   define MP_INIT_MULTI_C
+#   define MP_INIT_SET_C
+#   define MP_MUL_C
+#   define MP_SUB_C
+#   define MP_SUB_D_C
+#   define S_MP_COMPUTE_S_C
+#   define S_MP_FLOOR_ILOG2_C
+#   define S_MP_SLOWER_TO_RADIX_C
+#   define S_MP_TO_RADIX_RECURSIVE_C
+#   define S_MP_ZERO_BUF_C
+#endif
+
+#if defined(S_MP_FLOOR_ILOG2_C)
+#endif
+
 #if defined(S_MP_FP_LOG_C)
 #   define MP_2EXPT_C
 #   define MP_ADD_C
@@ -1248,6 +1295,18 @@
 #if defined(S_MP_RAND_PLATFORM_C)
 #endif
 
+#if defined(S_MP_SLOWER_READ_RADIX_C)
+#   define MP_ADD_D_C
+#   define MP_MUL_D_C
+#   define MP_ZERO_C
+#endif
+
+#if defined(S_MP_SLOWER_TO_RADIX_C)
+#   define MP_CLEAR_C
+#   define MP_DIV_D_C
+#   define MP_INIT_COPY_C
+#endif
+
 #if defined(S_MP_SQR_C)
 #   define MP_CLAMP_C
 #   define MP_CLEAR_C
diff --git a/tommath_cutoffs.h b/tommath_cutoffs.h
index fb841601..20f4d4ec 100644
--- a/tommath_cutoffs.h
+++ b/tommath_cutoffs.h
@@ -11,3 +11,5 @@
 #define MP_DEFAULT_SQR_KARATSUBA_CUTOFF 120
 #define MP_DEFAULT_MUL_TOOM_CUTOFF      350
 #define MP_DEFAULT_SQR_TOOM_CUTOFF      400
+#define MP_DEFAULT_RADIX_READ_CUTOFF    600
+#define MP_DEFAULT_RADIX_WRITE_CUTOFF   600
diff --git a/tommath_private.h b/tommath_private.h
index be620dbc..a2c8dd7d 100644
--- a/tommath_private.h
+++ b/tommath_private.h
@@ -86,8 +86,27 @@ do {                                                    \
 #  define MP_SQR_KARATSUBA_CUTOFF MP_DEFAULT_SQR_KARATSUBA_CUTOFF
 #  define MP_MUL_TOOM_CUTOFF      MP_DEFAULT_MUL_TOOM_CUTOFF
 #  define MP_SQR_TOOM_CUTOFF      MP_DEFAULT_SQR_TOOM_CUTOFF
+#  define MP_RADIX_READ_CUTOFF    MP_DEFAULT_RADIX_READ_CUTOFF
+#  define MP_RADIX_WRITE_CUTOFF   MP_DEFAULT_RADIX_WRITE_CUTOFF
 #endif
 
+#ifndef MP_RADIX_BARRETT_START_MULTIPLICATOR
+#   define MP_RADIX_BARRETT_START_MULTIPLICATOR  10
+/* Better safe than sorry */
+#  if (MP_RADIX_BARRETT_START_MULTIPLICATOR <= 0)
+#     ifdef _MSC_VER
+#        pragma message("MP_RADIX_BARRETT_START_MULTIPLICATOR must be bigger than zero, setting it to one")
+#     else
+#        warning "MP_RADIX_BARRETT_START_MULTIPLICATOR must be bigger than zero, setting it to one"
+#     endif
+#  define MP_RADIX_BARRETT_START_MULTIPLICATOR   1
+#  endif
+#endif
+
+
+
+
+
 /* define heap macros */
 #ifndef MP_MALLOC
 /* default to libc stuff */
@@ -234,6 +253,14 @@ MP_PRIVATE mp_err s_mp_radix_size_overestimate(const mp_int *a, const int radix,
 MP_PRIVATE mp_err s_mp_fp_log(const mp_int *a, mp_int *c) MP_WUR;
 MP_PRIVATE mp_err s_mp_fp_log_d(const mp_int *a, mp_word *c) MP_WUR;
 
+MP_PRIVATE unsigned int s_mp_floor_ilog2(int value);
+
+MP_PRIVATE mp_err s_mp_faster_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix) MP_WUR;
+MP_PRIVATE mp_err s_mp_slower_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix) MP_WUR;
+MP_PRIVATE mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix) MP_WUR;
+MP_PRIVATE mp_err s_mp_slower_to_radix(const mp_int *a, char **str, size_t *part_maxlen, size_t *part_written,
+                                       int radix, bool pad) MP_WUR;
+
 #ifdef MP_SMALL_STACK_SIZE
 
 #if defined(__GNUC__)
@@ -276,9 +303,14 @@ MP_PRIVATE void *s_mp_warray_get(void);
 MP_PRIVATE void s_mp_warray_put(void *w);
 
 #define MP_RADIX_MAP_REVERSE_SIZE 80u
+#define MP_RADIX_EXPONENT_Y_SIZE  65u
+#define MP_LOG2_RADIX_SIZE        65u
 extern MP_PRIVATE const char s_mp_radix_map[];
 extern MP_PRIVATE const uint8_t s_mp_radix_map_reverse[];
 extern MP_PRIVATE const mp_digit s_mp_prime_tab[];
+extern MP_PRIVATE const uint8_t s_mp_radix_exponent_y[];
+extern MP_PRIVATE const uint8_t s_mp_log2_radix[];
+
 
 /* number of primes */
 #define MP_PRIME_TAB_SIZE 256