v23.8

JayDDee · Nov 11, 2023 · 26b9429 · 26b9429
1 parent e043698
commit 26b9429
Show file tree

Hide file tree

Showing 44 changed files with 4,152 additions and 15,325 deletions.
diff --git a/Makefile.am b/Makefile.am
@@ -79,11 +79,6 @@ cpuminer_SOURCES = \
   algo/hamsi/hamsi-hash-4way.c \
   algo/haval/haval.c \
   algo/haval/haval-hash-4way.c \
-  algo/hodl/aes.c \
-  algo/hodl/hodl-gate.c \
-  algo/hodl/hodl-wolf.c \
-  algo/hodl/sha512_avx.c \
-  algo/hodl/sha512_avx2.c \
   algo/jh/sph_jh.c \
   algo/jh/jh-hash-4way.c \
   algo/jh/jha-gate.c \
@@ -148,6 +143,8 @@ cpuminer_SOURCES = \
   algo/scrypt/scrypt.c \
   algo/scrypt/scrypt-core-4way.c \
   algo/scrypt/neoscrypt.c \
+  algo/sha/sha1.c \
+  algo/sha/sha1-hash.c \
   algo/sha/sha256-hash.c \
   algo/sha/sph_sha2.c \
   algo/sha/sph_sha2big.c \
@@ -278,20 +275,10 @@ cpuminer_SOURCES = \
   algo/yespower/yespower-ref.c \
   algo/yespower/yespower-blake2b-ref.c
 
-
 disable_flags =
 
 if USE_ASM
    cpuminer_SOURCES += asm/neoscrypt_asm.S
-if ARCH_x86
-   cpuminer_SOURCES += asm/sha2-x86.S asm/scrypt-x86.S
-endif
-if ARCH_x86_64
-   cpuminer_SOURCES += asm/sha2-x64.S asm/scrypt-x64.S
-endif
-if ARCH_ARM
-   cpuminer_SOURCES += asm/sha2-arm.S asm/scrypt-arm.S
-endif
 else
    disable_flags += -DNOASM
 endif
@@ -301,7 +288,7 @@ if HAVE_WINDOWS
 endif
 
 cpuminer_LDFLAGS	= @LDFLAGS@
-cpuminer_LDADD	= @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ -lssl -lcrypto -lgmp
+cpuminer_LDADD	= @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@  -lgmp
 cpuminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ $(ALL_INCLUDES)
 cpuminer_CFLAGS   = -Wno-pointer-sign -Wno-pointer-to-int-cast $(disable_flags)
 

diff --git a/RELEASE_NOTES b/RELEASE_NOTES
@@ -73,6 +73,13 @@ If not what makes it happen or not happen?
 Change Log
 ----------
 
+v23.8
+
+Cpuminer-opt is no longer dependant on OpenSSL.
+Removed Hodl algo.
+Removed legacy Sha256 & Scrypt ASM code.
+ARM: Echo AES is working and enabled for x17.
+
 v23.7
 
 Fixed blakes2s, broken in v3.23.4.

diff --git a/algo-gate-api.c b/algo-gate-api.c
@@ -310,7 +310,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
     case ALGO_GROESTL:      rc = register_groestl_algo       ( gate ); break;
     case ALGO_HEX:          rc = register_hex_algo           ( gate ); break;
     case ALGO_HMQ1725:      rc = register_hmq1725_algo       ( gate ); break;
-    case ALGO_HODL:         rc = register_hodl_algo          ( gate ); break;
     case ALGO_JHA:          rc = register_jha_algo           ( gate ); break;
     case ALGO_KECCAK:       rc = register_keccak_algo        ( gate ); break;
     case ALGO_KECCAKC:      rc = register_keccakc_algo       ( gate ); break;

diff --git a/algo/echo/aes_ni/hash.c b/algo/echo/aes_ni/hash.c
@@ -21,112 +21,92 @@
 #include "hash_api.h"
 #include "simd-utils.h"
 
-MYALIGN const unsigned int _k_s0F[] = {0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F};
-MYALIGN const unsigned int _k_ipt[] = {0x5A2A7000, 0xC2B2E898, 0x52227808, 0xCABAE090, 0x317C4D00, 0x4C01307D, 0xB0FDCC81, 0xCD80B1FC};
-MYALIGN const unsigned int _k_opt[] = {0xD6B66000, 0xFF9F4929, 0xDEBE6808, 0xF7974121, 0x50BCEC00, 0x01EDBD51, 0xB05C0CE0, 0xE10D5DB1};
-MYALIGN const unsigned int _k_inv[] = {0x0D080180, 0x0E05060F, 0x0A0B0C02, 0x04070309, 0x0F0B0780, 0x01040A06, 0x02050809, 0x030D0E0C};
-MYALIGN const unsigned int _k_sb1[] = {0xCB503E00, 0xB19BE18F, 0x142AF544, 0xA5DF7A6E, 0xFAE22300, 0x3618D415, 0x0D2ED9EF, 0x3BF7CCC1};
-MYALIGN const unsigned int _k_sb2[] = {0x0B712400, 0xE27A93C6, 0xBC982FCD, 0x5EB7E955, 0x0AE12900, 0x69EB8840, 0xAB82234A, 0xC2A163C8};
-MYALIGN const unsigned int _k_sb3[] = {0xC0211A00, 0x53E17249, 0xA8B2DA89, 0xFB68933B, 0xF0030A00, 0x5FF35C55, 0xA6ACFAA5, 0xF956AF09};
-MYALIGN const unsigned int _k_sb4[] = {0x3FD64100, 0xE1E937A0, 0x49087E9F, 0xA876DE97, 0xC393EA00, 0x3D50AED7, 0x876D2914, 0xBA44FE79};
-MYALIGN const unsigned int _k_sb5[] = {0xF4867F00, 0x5072D62F, 0x5D228BDB, 0x0DA9A4F9, 0x3971C900, 0x0B487AC2, 0x8A43F0FB, 0x81B332B8};
-MYALIGN const unsigned int _k_sb7[] = {0xFFF75B00, 0xB20845E9, 0xE1BAA416, 0x531E4DAC, 0x3390E000, 0x62A3F282, 0x21C1D3B1, 0x43125170};
-MYALIGN const unsigned int _k_sbo[] = {0x6FBDC700, 0xD0D26D17, 0xC502A878, 0x15AABF7A, 0x5FBB6A00, 0xCFE474A5, 0x412B35FA, 0x8E1E90D1};
-MYALIGN const unsigned int _k_h63[] = {0x63636363, 0x63636363, 0x63636363, 0x63636363};
-MYALIGN const unsigned int _k_hc6[] = {0xc6c6c6c6, 0xc6c6c6c6, 0xc6c6c6c6, 0xc6c6c6c6};
-MYALIGN const unsigned int _k_h5b[] = {0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b};
-MYALIGN const unsigned int _k_h4e[] = {0x4e4e4e4e, 0x4e4e4e4e, 0x4e4e4e4e, 0x4e4e4e4e};
-MYALIGN const unsigned int _k_h0e[] = {0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e};
-MYALIGN const unsigned int _k_h15[] = {0x15151515, 0x15151515, 0x15151515, 0x15151515};
-MYALIGN const unsigned int _k_aesmix1[] = {0x0f0a0500, 0x030e0904, 0x07020d08, 0x0b06010c};
-MYALIGN const unsigned int _k_aesmix2[] = {0x000f0a05, 0x04030e09, 0x0807020d, 0x0c0b0601};
-MYALIGN const unsigned int _k_aesmix3[] = {0x05000f0a, 0x0904030e, 0x0d080702, 0x010c0b06};
-MYALIGN const unsigned int _k_aesmix4[] = {0x0a05000f, 0x0e090403, 0x020d0807, 0x06010c0b};
-
-
-MYALIGN const unsigned int 	const1[]		= {0x00000001, 0x00000000, 0x00000000, 0x00000000};
-MYALIGN const unsigned int	mul2mask[]		= {0x00001b00, 0x00000000, 0x00000000, 0x00000000};
-MYALIGN const unsigned int	lsbmask[]		= {0x01010101, 0x01010101, 0x01010101, 0x01010101};
-MYALIGN const unsigned int	invshiftrows[]	= {0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c};
-MYALIGN const unsigned int	zero[]			= {0x00000000, 0x00000000, 0x00000000, 0x00000000};
-MYALIGN const unsigned int	mul2ipt[]		= {0x728efc00, 0x6894e61a, 0x3fc3b14d, 0x25d9ab57, 0xfd5ba600, 0x2a8c71d7, 0x1eb845e3, 0xc96f9234};
-
-
-#define ECHO_SUBBYTES4(state, j) \
-   state[0][j] = v128_aesenc(state[0][j], k1);\
-   k1 = v128_add32(k1, cast_v128(const1));\
-   state[1][j] = v128_aesenc(state[1][j], k1);\
-   k1 = v128_add32(k1, cast_v128(const1));\
-   state[2][j] = v128_aesenc(state[2][j], k1);\
-   k1 = v128_add32(k1, cast_v128(const1));\
-   state[3][j] = v128_aesenc(state[3][j], k1);\
-   k1 = v128_add32(k1, cast_v128(const1));\
-   state[0][j] = v128_aesenc(state[0][j], v128_zero ); \
-   state[1][j] = v128_aesenc(state[1][j], v128_zero ); \
-   state[2][j] = v128_aesenc(state[2][j], v128_zero ); \
-   state[3][j] = v128_aesenc(state[3][j], v128_zero )
-
-#define ECHO_SUBBYTES(state, i, j) \
-	state[i][j] = v128_aesenc(state[i][j], k1);\
-   k1 = v128_add32(k1, cast_v128(const1));\
-	state[i][j] = v128_aesenc(state[i][j], cast_v128(zero))
-
-#define ECHO_MIXBYTES(state1, state2, j, t1, t2, s2) \
-	s2 = v128_add8(state1[0][j], state1[0][j]);\
-	t1 = v128_sr16(state1[0][j], 7);\
-	t1 = v128_and(t1, cast_v128(lsbmask));\
-	t2 = v128_shuffle8(cast_v128(mul2mask), t1);\
-	s2 = v128_xor(s2, t2);\
-	state2[0][j] = s2;\
-	state2[1][j] = state1[0][j];\
-	state2[2][j] = state1[0][j];\
-	state2[3][j] = v128_xor(s2, state1[0][j]);\
-	s2 = v128_add8(state1[1][(j + 1) & 3], state1[1][(j + 1) & 3]);\
-	t1 = v128_sr16(state1[1][(j + 1) & 3], 7);\
-	t1 = v128_and(t1, cast_v128(lsbmask));\
-	t2 = v128_shuffle8(cast_v128(mul2mask), t1);\
-	s2 = v128_xor(s2, t2);\
-	state2[0][j] = v128_xor3(state2[0][j], s2, state1[1][(j + 1) & 3] );\
-	state2[1][j] = v128_xor(state2[1][j], s2);\
-	state2[2][j] = v128_xor(state2[2][j], state1[1][(j + 1) & 3]);\
-	state2[3][j] = v128_xor(state2[3][j], state1[1][(j + 1) & 3]);\
-	s2 = v128_add8(state1[2][(j + 2) & 3], state1[2][(j + 2) & 3]);\
-	t1 = v128_sr16(state1[2][(j + 2) & 3], 7);\
-	t1 = v128_and(t1, cast_v128(lsbmask));\
-	t2 = v128_shuffle8(cast_v128(mul2mask), t1);\
-	s2 = v128_xor(s2, t2);\
-	state2[0][j] = v128_xor(state2[0][j], state1[2][(j + 2) & 3]);\
-	state2[1][j] = v128_xor3(state2[1][j], s2, state1[2][(j + 2) & 3] );\
-	state2[2][j] = v128_xor(state2[2][j], s2);\
-	state2[3][j] = v128_xor(state2[3][j], state1[2][(j + 2) & 3]);\
-	s2 = v128_add8(state1[3][(j + 3) & 3], state1[3][(j + 3) & 3]);\
-	t1 = v128_sr16(state1[3][(j + 3) & 3], 7);\
-	t1 = v128_and(t1, cast_v128(lsbmask));\
-	t2 = v128_shuffle8(cast_v128(mul2mask), t1);\
-	s2 = v128_xor(s2, t2);\
-	state2[0][j] = v128_xor(state2[0][j], state1[3][(j + 3) & 3]);\
-	state2[1][j] = v128_xor(state2[1][j], state1[3][(j + 3) & 3]);\
-	state2[2][j] = v128_xor3(state2[2][j], s2, state1[3][(j + 3) & 3] );\
-	state2[3][j] = v128_xor(state2[3][j], s2)
+const uint32_t	const1[]	      __attribute__ ((aligned (32))) =
+   { 0x00000001, 0x00000000, 0x00000000, 0x00000000 };
+const uint32_t	mul2mask[]     __attribute__ ((aligned (16))) =
+   { 0x00001b00, 0x00000000, 0x00000000, 0x00000000 };
+const uint32_t	lsbmask[]      __attribute__ ((aligned (16))) =
+   { 0x01010101, 0x01010101, 0x01010101, 0x01010101 };
+const uint32_t	invshiftrows[]	__attribute__ ((aligned (16))) =
+   { 0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c };
+
+#define ECHO_SUBBYTES4( state, j ) \
+   state[0][j] = v128_aesenc( state[0][j], k1 ); \
+   k1 = v128_add32( k1, cast_v128(const1) ); \
+   state[1][j] = v128_aesenc( state[1][j], k1 ); \
+   k1 = v128_add32( k1, cast_v128(const1) ); \
+   state[2][j] = v128_aesenc( state[2][j], k1 ); \
+   k1 = v128_add32( k1, cast_v128(const1) ); \
+   state[3][j] = v128_aesenc( state[3][j], k1 ); \
+   k1 = v128_add32( k1, cast_v128(const1) ); \
+   state[0][j] = v128_aesenc_nokey( state[0][j] ); \
+   state[1][j] = v128_aesenc_nokey( state[1][j] ); \
+   state[2][j] = v128_aesenc_nokey( state[2][j] ); \
+   state[3][j] = v128_aesenc_nokey( state[3][j] )
+
+#define ECHO_SUBBYTES( state, i, j ) \
+	state[i][j] = v128_aesenc( state[i][j], k1 ); \
+   k1 = v128_add32( k1, cast_v128(const1) ); \
+	state[i][j] = v128_aesenc_nokey( state[i][j] )
+
+#define ECHO_MIXBYTES( state1, state2, j, t1, t2, s2 ) \
+	s2 = v128_add8( state1[0][j], state1[0][j] ); \
+	t1 = v128_sr16( state1[0][j], 7 ); \
+	t1 = v128_and( t1, cast_v128(lsbmask) ); \
+	t2 = v128_shuffle8( cast_v128(mul2mask), t1 ); \
+	s2 = v128_xor( s2, t2 ); \
+	state2[0][j] = s2; \
+	state2[1][j] = state1[0][j]; \
+	state2[2][j] = state1[0][j]; \
+	state2[3][j] = v128_xor(s2, state1[0][j] ); \
+	s2 = v128_add8( state1[1][(j + 1) & 3], state1[1][(j + 1) & 3] ); \
+	t1 = v128_sr16( state1[1][(j + 1) & 3], 7 ); \
+	t1 = v128_and( t1, cast_v128(lsbmask) ); \
+	t2 = v128_shuffle8( cast_v128(mul2mask), t1 ); \
+	s2 = v128_xor( s2, t2 ); \
+	state2[0][j] = v128_xor3( state2[0][j], s2, state1[1][(j + 1) & 3] );\
+	state2[1][j] = v128_xor( state2[1][j], s2 ); \
+	state2[2][j] = v128_xor( state2[2][j], state1[1][(j + 1) & 3] ); \
+	state2[3][j] = v128_xor( state2[3][j], state1[1][(j + 1) & 3] ); \
+	s2 = v128_add8( state1[2][(j + 2) & 3], state1[2][(j + 2) & 3] ); \
+	t1 = v128_sr16( state1[2][(j + 2) & 3], 7 ); \
+	t1 = v128_and( t1, cast_v128(lsbmask) ); \
+	t2 = v128_shuffle8( cast_v128(mul2mask), t1 ); \
+	s2 = v128_xor( s2, t2 ); \
+	state2[0][j] = v128_xor( state2[0][j], state1[2][(j + 2) & 3] ); \
+	state2[1][j] = v128_xor3( state2[1][j], s2, state1[2][(j + 2) & 3] ); \
+	state2[2][j] = v128_xor( state2[2][j], s2 ); \
+	state2[3][j] = v128_xor( state2[3][j], state1[2][(j + 2) & 3] ); \
+	s2 = v128_add8( state1[3][(j + 3) & 3], state1[3][(j + 3) & 3] ); \
+	t1 = v128_sr16( state1[3][(j + 3) & 3], 7 ); \
+	t1 = v128_and( t1, cast_v128(lsbmask) ); \
+	t2 = v128_shuffle8( cast_v128(mul2mask), t1 ); \
+	s2 = v128_xor( s2, t2 ); \
+	state2[0][j] = v128_xor( state2[0][j], state1[3][(j + 3) & 3] ); \
+	state2[1][j] = v128_xor( state2[1][j], state1[3][(j + 3) & 3] ); \
+	state2[2][j] = v128_xor3( state2[2][j], s2, state1[3][(j + 3) & 3] ); \
+	state2[3][j] = v128_xor( state2[3][j], s2 )
 
 
 #define ECHO_ROUND_UNROLL2 \
-   ECHO_SUBBYTES4(_state, 0);\
-   ECHO_SUBBYTES4(_state, 1);\
-   ECHO_SUBBYTES4(_state, 2);\
-   ECHO_SUBBYTES4(_state, 3);\
-   ECHO_MIXBYTES(_state, _state2, 0, t1, t2, s2);\
-   ECHO_MIXBYTES(_state, _state2, 1, t1, t2, s2);\
-   ECHO_MIXBYTES(_state, _state2, 2, t1, t2, s2);\
-   ECHO_MIXBYTES(_state, _state2, 3, t1, t2, s2);\
-   ECHO_SUBBYTES4(_state2, 0);\
-   ECHO_SUBBYTES4(_state2, 1);\
-   ECHO_SUBBYTES4(_state2, 2);\
-   ECHO_SUBBYTES4(_state2, 3);\
-   ECHO_MIXBYTES(_state2, _state, 0, t1, t2, s2);\
-   ECHO_MIXBYTES(_state2, _state, 1, t1, t2, s2);\
-   ECHO_MIXBYTES(_state2, _state, 2, t1, t2, s2);\
-   ECHO_MIXBYTES(_state2, _state, 3, t1, t2, s2)
+{ \
+   ECHO_SUBBYTES4( _state, 0 ); \
+   ECHO_SUBBYTES4( _state, 1 ); \
+   ECHO_SUBBYTES4( _state, 2 ); \
+   ECHO_SUBBYTES4( _state, 3 ); \
+   ECHO_MIXBYTES( _state, _state2, 0, t1, t2, s2 ); \
+   ECHO_MIXBYTES( _state, _state2, 1, t1, t2, s2 ); \
+   ECHO_MIXBYTES( _state, _state2, 2, t1, t2, s2 ); \
+   ECHO_MIXBYTES( _state, _state2, 3, t1, t2, s2 ); \
+   ECHO_SUBBYTES4( _state2, 0 ); \
+   ECHO_SUBBYTES4( _state2, 1 ); \
+   ECHO_SUBBYTES4( _state2, 2 ); \
+   ECHO_SUBBYTES4( _state2, 3 ); \
+   ECHO_MIXBYTES( _state2, _state, 0, t1, t2, s2 ); \
+   ECHO_MIXBYTES( _state2, _state, 1, t1, t2, s2 ); \
+   ECHO_MIXBYTES( _state2, _state, 2, t1, t2, s2 ); \
+   ECHO_MIXBYTES( _state2, _state, 3, t1, t2, s2 ); \
+}
 
 /*
 #define ECHO_ROUND_UNROLL2 \

diff --git a/algo/groestl/aes_ni/groestl-intr-aes.h b/algo/groestl/aes_ni/groestl-intr-aes.h
@@ -61,9 +61,12 @@ static const v128u64_t SUBSH_MASK7 = { 0x06090c0f0205080b, 0x0e0104070a0d0003 };
 #if defined(__ARM_NEON)
 
 // No fast shuffle on NEON
-static const uint32x4_t vmask_d8 = {  3, 1, 2, 0 };  
+//static const uint32x4_t vmask_d8 = {  3, 1, 2, 0 };  
+static const v128u32_t BLEND_MASK = { 0xffffffff, 0, 0, 0xffffffff };
 
-#define gr_shuffle32( v )       v128_shufflev32( v, vmask_d8 )
+#define gr_shuffle32( v )      v128_blendv( v128_qrev32( v ), v, BLEND_MASK )
+
+//#define gr_shuffle32( v )       v128_shufflev32( v, vmask_d8 )
 
 #else
 

diff --git a/algo/hamsi/hamsi-hash-4way.c b/algo/hamsi/hamsi-hash-4way.c
@@ -35,15 +35,16 @@
 #include <stdio.h>
 #include "hamsi-hash-4way.h"
 
-static const uint32_t HAMSI_IV512[] =
+static const uint32_t HAMSI_IV512[] __attribute__ ((aligned (32))) =
 {
 	 0x73746565, 0x6c706172, 0x6b204172, 0x656e6265,
     0x72672031, 0x302c2062, 0x75732032, 0x3434362c,
     0x20422d33, 0x30303120, 0x4c657576, 0x656e2d48,
 	 0x65766572, 0x6c65652c, 0x2042656c, 0x6769756d
 };
 
-static const uint32_t alpha_n[] = {
+static const uint32_t alpha_n[] __attribute__ ((aligned (32))) =
+{
 	0xff00f0f0, 0xccccaaaa, 0xf0f0cccc, 0xff00aaaa,
    0xccccaaaa, 0xf0f0ff00, 0xaaaacccc, 0xf0f0ff00,
    0xf0f0cccc, 0xaaaaff00, 0xccccff00, 0xaaaaf0f0,
@@ -54,7 +55,8 @@ static const uint32_t alpha_n[] = {
    0xff00cccc, 0xaaaaf0f0,	0xff00aaaa, 0xccccf0f0
 };
 
-static const uint32_t alpha_f[] = {
+static const uint32_t alpha_f[] __attribute__ ((aligned (32))) =
+{
 	0xcaf9639c, 0x0ff0f9c0, 0x639c0ff0,	0xcaf9f9c0,
    0x0ff0f9c0, 0x639ccaf9,	0xf9c00ff0, 0x639ccaf9,
    0x639c0ff0,	0xf9c0caf9, 0x0ff0caf9, 0xf9c0639c,
@@ -69,7 +71,8 @@ static const uint32_t alpha_f[] = {
 
 /* Note: this table lists bits within each byte from least
    siginificant to most significant. */
-static const uint32_t T512[64][16] = {
+static const uint32_t T512[64][16] __attribute__ ((aligned (32))) =
+{
 	{  0xef0b0270, 0x3afd0000, 0x5dae0000, 0x69490000,
       0x9b0f3c06, 0x4405b5f9, 0x66140a51, 0x924f5d0a,
       0xc96b0030, 0xe7250000, 0x2f840000, 0x264f0000,
@@ -2260,4 +2263,4 @@ void hamsi512_2x64( void *dst, const void *data, size_t len )
    hamsi512_2x64_close( &sc, dst );
 }   
 
-#endif   // SSE4.1 or NEON
+#endif   // SSE4.2 or NEON