Skip to content

Commit

Permalink
add MP_SMALL_STACK_SIZE option
Browse files Browse the repository at this point in the history
This adds an option to use a heap-buffer for the usually stack-based
`MP_WARRAY`-sized temporary buffers.

Per default it will reserve a single buffer, which can be modified
* at compile-time via the `MP_WARRAY_NUM` define
* at run-time by calling `mp_warray_init()`

The internal structure can only be created once. If one wants to modify
the maximum number of elements, the entire structure has to be free'd
by calling `mp_warray_free()`.

In case one wants to use this option with multiple threads, one shall
use the `mp_warray_init()` function and pass appropriate locking functions.

Signed-off-by: Steffen Jaeckel <[email protected]>
  • Loading branch information
sjaeckel committed Mar 11, 2024
1 parent 7f39a72 commit 41a1437
Show file tree
Hide file tree
Showing 20 changed files with 394 additions and 9 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ jobs:
# RSA superclass with tests (no sanitizer, but debug info)
- { BUILDOPTIONS: '--with-cc=gcc --with-m64 --cflags=-DLTM_NOTHING --cflags=-DSC_RSA_1_WITH_TESTS --limit-valgrind', SANITIZER: '', COMPILE_DEBUG: '1', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: '' }

# Build with small stack-size
- { BUILDOPTIONS: '--with-cc=gcc --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE', SANITIZER: '', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'gcc-multilib' }
- { BUILDOPTIONS: '--with-cc=gcc --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE --cflags=-DMP_NO_LOCKING', SANITIZER: '', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'gcc-multilib' }
- { BUILDOPTIONS: '--with-cc=clang-10 --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE', SANITIZER: '1', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'clang-10 llvm-10 gcc-multilib' }
- { BUILDOPTIONS: '--with-cc=clang-10 --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE --cflags=-DMP_TEST_LOCKING', SANITIZER: '1', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'clang-10 llvm-10 gcc-multilib' }

# Test "autotuning", the automatic evaluation and setting of the Toom-Cook cut-offs.
#- env: SANITIZER=1 BUILDOPTIONS='--with-cc=gcc-5 --cflags=-DMP_16BIT --limit-valgrind --make-option=tune'
#- env: SANITIZER=1 BUILDOPTIONS='--with-cc=gcc-5 --cflags=-DMP_32BIT --limit-valgrind --make-option=tune'
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ if(COMPILE_LTO)
if(COMPILER_SUPPORTS_LTO)
set_property(TARGET ${PROJECT_NAME} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
else()
message(SEND_ERROR "This compiler does not support LTO. Reconfigure ${PROJECT_NAME} with -DCOMPILE_LTO=OFF.")
message(FATAL_ERROR "This compiler does not support LTO. Reconfigure ${PROJECT_NAME} with -DCOMPILE_LTO=OFF.")
endif()
endif()

Expand Down
31 changes: 29 additions & 2 deletions demo/test.c
Original file line number Diff line number Diff line change
Expand Up @@ -2451,6 +2451,21 @@ static int test_mp_pack_unpack(void)
return EXIT_FAILURE;
}


#ifdef MP_TEST_LOCKING
#ifdef MP_NO_LOCKING
#error "Can't test locking when locking is disabled"
#endif
static mp_lock lock_ctx;
static int noop_lock_unlock(void *ctx)
{
EXPECT(ctx == &lock_ctx);
return 0;
LBL_ERR:
return -1;
}
#endif

#ifndef LTM_TEST_DYNAMIC
#define ONLY_PUBLIC_API_C
#endif
Expand Down Expand Up @@ -2525,14 +2540,22 @@ static int unit_tests(int argc, char **argv)
unsigned long i, ok, fail, nop;
uint64_t t;
int j;
#ifdef MP_TEST_LOCKING
lock_ctx.lock = noop_lock_unlock;
lock_ctx.unlock = noop_lock_unlock;
lock_ctx.ctx = &lock_ctx;

if (mp_warray_init(MP_WARRAY_NUM, true, &lock_ctx) != MP_OKAY)
return EXIT_FAILURE;
#endif
ok = fail = nop = 0;

t = (uint64_t)time(NULL);
printf("SEED: 0x%" PRIx64 "\n\n", t);
s_mp_rand_jenkins_init(t);
mp_rand_source(s_mp_rand_jenkins);


for (i = 0; i < (sizeof(test) / sizeof(test[0])); ++i) {
if (argc > 1) {
for (j = 1; j < argc; ++j) {
Expand All @@ -2556,8 +2579,12 @@ static int unit_tests(int argc, char **argv)
}
fprintf(fail?stderr:stdout, "Tests OK/NOP/FAIL: %lu/%lu/%lu\n", ok, nop, fail);

if (fail != 0) return EXIT_FAILURE;
else return EXIT_SUCCESS;
EXPECT(mp_warray_free() != -2);

if (fail == 0)
return EXIT_SUCCESS;
LBL_ERR:
return EXIT_FAILURE;
}

int main(int argc, char **argv)
Expand Down
64 changes: 63 additions & 1 deletion doc/bn.tex
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,20 @@ \subsubsection{Operand Size Related}
\end{center}
\end{small}

\subsection{Small-Stack option}
\label{ch:SMALL_STACK_INTRO}
The library can be compiled with the symbol \texttt{MP\_SMALL\_STACK\_SIZE} defined, which results in
the temporary \texttt{MP\_WARRAY}-sized stack buffers being put on the heap.
This comes with one problem, namely: formerly promised thread-safety isn't given anymore.
Therefore if the Small-Stack option is enabled while doing multi threading, the provided locking
mechanism shall be used.
For some use cases it can be desired to use the Small-Stack option, but there are no threads and
therefore we provide the possibility to disable locking by defining the symbol \texttt{MP\_NO\_LOCKING}.

In case one already knows how many threads must be supported, the symbol \texttt{MP\_WARRAY\_NUM} can
be useful. It can be pre-defined at compile time to the number of heap buffers created on automatic
initialisation. C.f. \ref{ch:SMALL_STACK_API} for the dynamic API and further details.

\section{Purpose of LibTomMath}
Unlike GNU MP (GMP) Library, LIP, OpenSSL or various other commercial kits (Miracl), LibTomMath
was not written with bleeding edge performance in mind. First and foremost LibTomMath was written
Expand Down Expand Up @@ -428,7 +442,9 @@ \chapter{Getting Started with LibTomMath}
\section{Building Programs}
In order to use LibTomMath you must include ``tommath.h'' and link against the appropriate library
file (typically
libtommath.a). There is no library initialization required and the entire library is thread safe.
libtommath.a). There is no library initialization required and the entire library is thread safe
if it is used in its default configuration. Locking is recommended if the small-stack option
is enabled and multiple threads are used, c.f. \ref{ch:SMALL_STACK_INTRO} resp. \ref{ch:SMALL_STACK_API}

\section{Return Codes}
There are five possible return codes a function may return.
Expand Down Expand Up @@ -813,6 +829,52 @@ \subsection{Adding additional digits}
\end{alltt}
\end{small}

\section{Small-Stack option}
\label{ch:SMALL_STACK_API}

In case the \texttt{MP\_SMALL\_STACK\_SIZE} symbol is defined the following functions
can be useful.

To initialize the internal structure the following function shall be called.

\index{mp\_warray\_init}
\begin{alltt}
mp_err mp_warray_init(size_t n_alloc, bool preallocate, mp_lock *lock);
\end{alltt}

The flag \texttt{preallocate} controls whether the internal buffers --
\texttt{n\_alloc} buffers of size \texttt{MP\_WARRAY} -- will be allocated when
\texttt{mp\_warray\_init()} is called, or whether they will be allocated when required.
The \texttt{mp\_lock} struct looks as follows and shall be used to protect the
internal structure when using the library in a multi-threaded application.

\index{mp\_lock}
\begin{alltt}
typedef struct {
int (*lock)(void *ctx);
int (*unlock)(void *ctx);
void *ctx;
} mp_lock;
\end{alltt}

The \texttt{mp\_lock.lock} resp. \texttt{mp\_lock.unlock} functions will be called before resp.
after modifying the internal struct.
The \texttt{mp\_lock.ctx} element will be passed to those functions.

To free the internally allocated memory the following function shall be called.

\index{mp\_warray\_free}
\begin{alltt}
int mp_warray_free(void);
\end{alltt}


Those two API functions are always available, even if the \texttt{MP\_SMALL\_STACK\_SIZE} option
has been disabled at compile time.
In that case \texttt{mp\_warray\_init()} will return \texttt{MP\_ERR} and \texttt{mp\_warray\_free()}
will return $-1$.


\chapter{Basic Operations}
\section{Copying}

Expand Down
2 changes: 1 addition & 1 deletion helper.pl
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ sub update_dep
foreach my $filename (glob '*mp_*.c') {
my $content;
my $cc = $ENV{'CC'} || 'gcc';
$content = `$cc -E -x c -DLTM_ALL $filename`;
$content = `$cc -E -x c -DLTM_ALL -DMP_SMALL_STACK_SIZE $filename`;
$content =~ s/^# 1 "$filename".*?^# 2 "$filename"//ms;

# convert filename to upper case so we can use it as a define
Expand Down
36 changes: 36 additions & 0 deletions mp_warray_free.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#include "tommath_private.h"
#ifdef MP_WARRAY_FREE_C
/* LibTomMath, multiple-precision integer library -- Tom St Denis */
/* SPDX-License-Identifier: Unlicense */

/* static check that the multiplication won't overflow */
MP_STATIC_ASSERT(warray_free_sz_does_not_overflow, (sizeof(mp_word) * MP_WARRAY) >= MP_WARRAY)

static int s_warray_free(void)
{
int ret = 0;
size_t n;
S_MP_WARRAY_LOCK();
for (n = 0; n < s_mp_warray.allocated; ++n) {
if (s_mp_warray.l_used[n].warray) {
ret = -2;
goto ERR_OUT;
}
}
for (n = 0; n < s_mp_warray.allocated; ++n) {
MP_FREE(s_mp_warray.l_free[n].warray, sizeof(mp_word) * MP_WARRAY);
s_mp_warray.l_free[n].warray = NULL;
}
s_mp_warray_free(s_mp_warray.usable);
ERR_OUT:
S_MP_WARRAY_UNLOCK();
return ret;
}

int mp_warray_free(void)
{
if (MP_HAS(MP_SMALL_STACK_SIZE)) return s_warray_free();
return -1;
}

#endif
55 changes: 55 additions & 0 deletions mp_warray_init.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#include "tommath_private.h"
#ifdef MP_WARRAY_INIT_C
/* LibTomMath, multiple-precision integer library -- Tom St Denis */
/* SPDX-License-Identifier: Unlicense */

static mp_err s_warray_init(size_t n_alloc, bool preallocate, mp_lock *lock)
{
size_t n;
if (s_mp_warray.l_free != NULL || s_mp_warray.l_used != NULL) {
return MP_VAL;
}

if (MP_HAS(MP_USE_LOCKING) && (lock != NULL)) {
if (lock->lock == NULL || lock->unlock == NULL)
return MP_VAL;
s_mp_warray.lock = *lock;
s_mp_warray.locking_enabled = true;
} else {
s_mp_zero_buf(&s_mp_warray.lock, sizeof(s_mp_warray.lock));
}

s_mp_warray.l_free = MP_CALLOC(n_alloc, sizeof(*(s_mp_warray.l_free)));
s_mp_warray.l_used = MP_CALLOC(n_alloc, sizeof(*(s_mp_warray.l_used)));
if (s_mp_warray.l_free == NULL || s_mp_warray.l_used == NULL) {
s_mp_warray_free(n_alloc);
return MP_MEM;
}

if (preallocate) {
for (n = 0; n < n_alloc; ++n) {
s_mp_warray.l_free[n].warray = MP_CALLOC(MP_WARRAY, sizeof(mp_word));
if (s_mp_warray.l_free[n].warray == NULL) {
while (n > 0) {
n--;
MP_FREE(s_mp_warray.l_free[n].warray, MP_WARRAY * sizeof(mp_word));
s_mp_warray.l_free[n].warray = NULL;
}
s_mp_warray_free(n_alloc);
return MP_MEM;
}
}
s_mp_warray.allocated = n_alloc;
}

s_mp_warray.usable = n_alloc;
return MP_OKAY;
}

mp_err mp_warray_init(size_t n_alloc, bool preallocate, mp_lock *lock)
{
if (MP_HAS(MP_SMALL_STACK_SIZE)) return s_warray_init(n_alloc, preallocate, lock);
return MP_ERR;
}

#endif
7 changes: 6 additions & 1 deletion s_mp_montgomery_reduce_comba.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@ mp_err s_mp_montgomery_reduce_comba(mp_int *x, const mp_int *n, mp_digit rho)
{
int ix, oldused;
mp_err err;
mp_word W[MP_WARRAY];
mp_word MP_ALLOC_WARRAY(W);

MP_CHECK_WARRAY(W);

if (x->used > MP_WARRAY) {
MP_FREE_WARRAY(W);
return MP_VAL;
}

Expand All @@ -26,6 +29,7 @@ mp_err s_mp_montgomery_reduce_comba(mp_int *x, const mp_int *n, mp_digit rho)

/* grow a as required */
if ((err = mp_grow(x, n->used + 1)) != MP_OKAY) {
MP_FREE_WARRAY(W);
return err;
}

Expand Down Expand Up @@ -110,6 +114,7 @@ mp_err s_mp_montgomery_reduce_comba(mp_int *x, const mp_int *n, mp_digit rho)

mp_clamp(x);

MP_FREE_WARRAY(W);
/* if A >= m then A = A - m */
if (mp_cmp_mag(x, n) != MP_LT) {
return s_mp_sub(x, n, x);
Expand Down
6 changes: 5 additions & 1 deletion s_mp_mul_comba.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,18 @@ mp_err s_mp_mul_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs)
{
int oldused, pa, ix;
mp_err err;
mp_digit W[MP_WARRAY];
mp_digit MP_ALLOC_WARRAY(W);
mp_word _W;

if (digs < 0) {
return MP_VAL;
}

MP_CHECK_WARRAY(W);

/* grow the destination as required */
if ((err = mp_grow(c, digs)) != MP_OKAY) {
MP_FREE_WARRAY(W);
return err;
}

Expand Down Expand Up @@ -77,6 +80,7 @@ mp_err s_mp_mul_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs)
s_mp_zero_digs(c->dp + c->used, oldused - c->used);

mp_clamp(c);
MP_FREE_WARRAY(W);
return MP_OKAY;
}
#endif
6 changes: 5 additions & 1 deletion s_mp_mul_high_comba.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,19 @@ mp_err s_mp_mul_high_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs
{
int oldused, pa, ix;
mp_err err;
mp_digit W[MP_WARRAY];
mp_digit MP_ALLOC_WARRAY(W);
mp_word _W;

if (digs < 0) {
return MP_VAL;
}

MP_CHECK_WARRAY(W);

/* grow the destination as required */
pa = a->used + b->used;
if ((err = mp_grow(c, pa)) != MP_OKAY) {
MP_FREE_WARRAY(W);
return err;
}

Expand Down Expand Up @@ -69,6 +72,7 @@ mp_err s_mp_mul_high_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs
s_mp_zero_digs(c->dp + c->used, oldused - c->used);

mp_clamp(c);
MP_FREE_WARRAY(W);
return MP_OKAY;
}
#endif
6 changes: 5 additions & 1 deletion s_mp_sqr_comba.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@ After that loop you do the squares and add them in.
mp_err s_mp_sqr_comba(const mp_int *a, mp_int *b)
{
int oldused, pa, ix;
mp_digit W[MP_WARRAY];
mp_digit MP_ALLOC_WARRAY(W);
mp_word W1;
mp_err err;

MP_CHECK_WARRAY(W);

/* grow the destination as required */
pa = a->used + a->used;
if ((err = mp_grow(b, pa)) != MP_OKAY) {
MP_FREE_WARRAY(W);
return err;
}

Expand Down Expand Up @@ -82,6 +85,7 @@ mp_err s_mp_sqr_comba(const mp_int *a, mp_int *b)
s_mp_zero_digs(b->dp + b->used, oldused - b->used);

mp_clamp(b);
MP_FREE_WARRAY(W);
return MP_OKAY;
}
#endif
8 changes: 8 additions & 0 deletions s_mp_warray.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#include "tommath_private.h"
#ifdef S_MP_WARRAY_C
/* LibTomMath, multiple-precision integer library -- Tom St Denis */
/* SPDX-License-Identifier: Unlicense */

st_warray s_mp_warray;

#endif
Loading

0 comments on commit 41a1437

Please sign in to comment.