Skip to content

Commit

Permalink
optimize non-asm code through flto and inline
Browse files Browse the repository at this point in the history
  • Loading branch information
zilong-dai committed Nov 4, 2024
1 parent 429cc78 commit af9eec1
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 66 deletions.
10 changes: 3 additions & 7 deletions src/bls12-381/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
ACLOCAL_AMFLAGS = -I build-aux/m4

AM_CXXFLAGS = -std=c++11 -fpic -O2
AM_CPPFLAGS = -std=c++11 -fpic -O2
AM_CXXFLAGS = $(CXXFLAGS)

AM_CFLAGS = --fpic -O2
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = libbls.pc

lib_LTLIBRARIES = libbls.la
libbls_la_SOURCES = src/groth16.cpp src/arithmetic.cpp src/fp.cpp src/g.cpp src/pairing.cpp src/scalar.cpp src/utils.cpp

pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = libbls.pc
65 changes: 29 additions & 36 deletions src/bls12-381/configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -13,55 +13,47 @@ AC_INIT([libbls], m4_join([.], _PKG_VERSION_MAJOR, _PKG_VERSION_MINOR, _PKG_VERS
AC_SUBST(LIB_VERSION_CURRENT, _LIB_VERSION_CURRENT)
AC_SUBST(LIB_VERSION_REVISION, _LIB_VERSION_REVISION)
AC_SUBST(LIB_VERSION_AGE, _LIB_VERSION_AGE)
AC_CONFIG_AUX_DIR([build-aux])
AC_CONFIG_MACRO_DIR([build-aux/m4])
AC_CANONICAL_HOST
AH_TOP([#ifndef LIBMCL_CONFIG_H])
AH_TOP([#define LIBMCL_CONFIG_H])
AH_BOTTOM([#endif /*LIBMCL_CONFIG_H*/])

AM_INIT_AUTOMAKE([1.11.2 foreign subdir-objects])

m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
AC_ARG_ENABLE([blsasm], [AS_HELP_STRING([--enable-blsasm=yes|no], [Enable blsasm option])], [], [enable_blsasm=no])

AC_PROG_CXX
AC_PROG_CC
AM_PROG_AR

LT_INIT([win32-dll])

AC_CANONICAL_HOST
case "$host_os" in
*mingw64*)
CXXFLAGS="$CXXFLAGS -D__USE_MINGW_ANSI_STDIO=1"
;;
*cygwin*)
;;
*darwin*)
AC_DEFINE([DARWIN], [1], [Define if on Darwin/Mac OS X])
;;
*openbsd*)
CXXFLAGS="$CXXFLAGS -I/usr/local/include"
LDFLAGS="$LDFLAGS -L/usr/local/lib"
;;
*freebsd*)
CXXFLAGS="$CXXFLAGS -I/usr/local/include"
LDFLAGS="$LDFLAGS -L/usr/local/lib"
;;
*linux*)
CXXFLAGS="$CXXFLAGS -I/usr/local/include"
LDFLAGS="$LDFLAGS -L/usr/local/lib"
;;
esac

CXXFLAGS="-O3 -I include"

AC_CANONICAL_TARGET
case "$target_cpu" in
"x86_64")
AC_MSG_NOTICE(["The target platform is $target_cpu"])
if test "x$enable_blsasm" = "xyes"; then
AC_MSG_NOTICE(["Enable blsasm"])
CXXFLAGS="$CXXFLAGS -D__x86_64_asm__"
else
AC_MSG_NOTICE(["Disable blsasm"])
case "$host_os" in
"*darwin*")
CXXFLAGS="$CXXFLAGS"
;;
"*")
CXXFLAGS="$CXXFLAGS -flto"
;;
esac
fi
;;
"*")
AC_MSG_NOTICE([The target platform is not x86_64])
CXXFLAGS="$CXXFLAGS -flto"
;;
esac

CXXFLAGS_WARN="-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith -Wundef"
CXXFLAGS="$CXXFLAGS $CXXFLAGS_WARN -I include -I src"

AC_SUBST(CXXFLAGS)
AC_SUBST(LDFLAGS)
AC_SUBST(CXX)
AC_SUBST(CC)
AC_SUBST(CXXFLAGS)

AC_CONFIG_FILES([Makefile libbls.pc])

Expand All @@ -76,4 +68,5 @@ echo " CPPFLAGS = $CPPFLAGS"
echo " CXX = $CXX"
echo " CXXFLAGS = $CXXFLAGS"
echo " LDFLAGS = $LDFLAGS"
echo " ENABLE_BLSASM = $enable_blsasm"
echo
24 changes: 12 additions & 12 deletions src/bls12-381/include/bls12-381/arithmetic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ void _lsubtract(fp* z, const fp* x, const fp* y);
// The "smaller than 4p" here means the montgomery form itself as number is less than 4p.
// Therefore, at most ONE _ladd/_lsubstract/_ldouble is allowed before passing the result to _multiply,
// unless the algorithm makes sure the number is small.
#if defined(__x86_64__)
#if defined(__x86_64_asm__)
extern void (*_multiply)(fp*, const fp*, const fp*);
#else
void _multiply(fp*, const fp*, const fp*);
Expand All @@ -41,7 +41,7 @@ void _multiply(fp*, const fp*, const fp*);
// The carryOut output is guaranteed to be 0 or 1.
//
// This function's execution time does not depend on the inputs.
std::tuple<uint64_t, uint64_t> Add64(
inline std::tuple<uint64_t, uint64_t> Add64(
const uint64_t& x,
const uint64_t& y,
const uint64_t& carry
Expand All @@ -63,12 +63,12 @@ std::tuple<uint64_t, uint64_t> Sub64(
// half returned in lo.
//
// This function's execution time does not depend on the inputs.
std::tuple<uint64_t, uint64_t> Mul64(
inline std::tuple<uint64_t, uint64_t> Mul64(
const uint64_t& x,
const uint64_t& y
);

std::tuple<uint64_t, uint64_t, uint64_t> madd(
inline std::tuple<uint64_t, uint64_t, uint64_t> madd(
const uint64_t& a,
const uint64_t& b,
const uint64_t& t,
Expand All @@ -77,57 +77,57 @@ std::tuple<uint64_t, uint64_t, uint64_t> madd(
);

// madd0 hi = a*b + c (discards lo bits)
uint64_t madd0(
inline uint64_t madd0(
const uint64_t& a,
const uint64_t& b,
const uint64_t& c
);

// madd1 hi, lo = a*b + c
std::tuple<uint64_t, uint64_t> madd1(
inline std::tuple<uint64_t, uint64_t> madd1(
const uint64_t& a,
const uint64_t& b,
const uint64_t& c
);

// madd2 hi, lo = a*b + c + d
std::tuple<uint64_t, uint64_t> madd2(
inline std::tuple<uint64_t, uint64_t> madd2(
const uint64_t& a,
const uint64_t& b,
const uint64_t& c,
const uint64_t& d
);

// madd2s superhi, hi, lo = 2*a*b + c + d + e
std::tuple<uint64_t, uint64_t, uint64_t> madd2s(
inline std::tuple<uint64_t, uint64_t, uint64_t> madd2s(
const uint64_t& a,
const uint64_t& b,
const uint64_t& c,
const uint64_t& d,
const uint64_t& e
);

std::tuple<uint64_t, uint64_t, uint64_t> madd1s(
inline std::tuple<uint64_t, uint64_t, uint64_t> madd1s(
const uint64_t& a,
const uint64_t& b,
const uint64_t& d,
const uint64_t& e
);

std::tuple<uint64_t, uint64_t, uint64_t> madd2sb(
inline std::tuple<uint64_t, uint64_t, uint64_t> madd2sb(
const uint64_t& a,
const uint64_t& b,
const uint64_t& c,
const uint64_t& e
);

std::tuple<uint64_t, uint64_t, uint64_t> madd1sb(
inline std::tuple<uint64_t, uint64_t, uint64_t> madd1sb(
const uint64_t& a,
const uint64_t& b,
const uint64_t& e
);

std::tuple<uint64_t, uint64_t> madd3(
inline std::tuple<uint64_t, uint64_t> madd3(
const uint64_t& a,
const uint64_t& b,
const uint64_t& c,
Expand Down
2 changes: 1 addition & 1 deletion src/bls12-381/libbls.pc.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ exec_prefix=@exec_prefix@
libdir=@libdir@
includedir=@includedir@

Name: libmcl
Name: libbls
Description: Pairing Library
URL: https://github.com/QEDProtocol/bls12-381
Version: @PACKAGE_VERSION@
Expand Down
20 changes: 10 additions & 10 deletions src/bls12-381/src/arithmetic.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include <bls12-381/bls12-381.hpp>
#ifdef __x86_64__
#ifdef __x86_64_asm__
#include <cpuid.h>
#endif

Expand All @@ -8,7 +8,7 @@ using namespace std;
namespace bls12_381
{

#ifdef __x86_64__
#ifdef __x86_64_asm__
void _add(fp* z, const fp* x, const fp* y)
{
// x86_64 calling convention (https://en.wikipedia.org/wiki/X86_calling_conventions#System_V_AMD64_ABI):
Expand Down Expand Up @@ -96,7 +96,7 @@ void _add(fp* z, const fp* x, const fp* y)
}
#endif

#ifdef __x86_64__
#ifdef __x86_64_asm__
void _ladd(fp* z, const fp* x, const fp* y)
{
// x86_64 calling convention (https://en.wikipedia.org/wiki/X86_calling_conventions#System_V_AMD64_ABI):
Expand Down Expand Up @@ -136,7 +136,7 @@ void _ladd(fp* z, const fp* x, const fp* y)
}
#endif

#ifdef __x86_64__
#ifdef __x86_64_asm__
void _double(fp* z, const fp* x)
{
// x86_64 calling convention (https://en.wikipedia.org/wiki/X86_calling_conventions#System_V_AMD64_ABI):
Expand Down Expand Up @@ -223,7 +223,7 @@ void _double(fp* z, const fp* x)
}
#endif

#ifdef __x86_64__
#ifdef __x86_64_asm__
void _ldouble(fp* z, const fp* x)
{
// x86_64 calling convention (https://en.wikipedia.org/wiki/X86_calling_conventions#System_V_AMD64_ABI):
Expand Down Expand Up @@ -263,7 +263,7 @@ void _ldouble(fp* z, const fp* x)
}
#endif

#ifdef __x86_64__
#ifdef __x86_64_asm__
void _subtract(fp* z, const fp* x, const fp* y)
{
// x86_64 calling convention (https://en.wikipedia.org/wiki/X86_calling_conventions#System_V_AMD64_ABI):
Expand Down Expand Up @@ -342,7 +342,7 @@ void _subtract(fp* z, const fp* x, const fp* y)
}
#endif

#ifdef __x86_64__
#ifdef __x86_64_asm__
void _lsubtract(fp* z, const fp* x, const fp* y)
{
// x86_64 calling convention (https://en.wikipedia.org/wiki/X86_calling_conventions#System_V_AMD64_ABI):
Expand Down Expand Up @@ -382,7 +382,7 @@ void _lsubtract(fp* z, const fp* x, const fp* y)
}
#endif

#ifdef __x86_64__
#ifdef __x86_64_asm__
void __negate(fp* z, const fp* x)
{
// x86_64 calling convention (https://en.wikipedia.org/wiki/X86_calling_conventions#System_V_AMD64_ABI):
Expand Down Expand Up @@ -436,7 +436,7 @@ void _negate(fp* z, const fp* x)
}
#endif

#ifdef __x86_64__
#ifdef __x86_64_asm__
void __multiply(fp* z, const fp* x, const fp* y)
{
// x86_64 calling convention (https://en.wikipedia.org/wiki/X86_calling_conventions#System_V_AMD64_ABI):
Expand Down Expand Up @@ -1624,7 +1624,7 @@ void _multiply(fp* z, const fp* x, const fp* y)
}
#endif

#ifdef __x86_64__
#ifdef __x86_64_asm__
void _square(fp* z, const fp* x)
{
#ifdef __clang__
Expand Down

0 comments on commit af9eec1

Please sign in to comment.