Skip to content

Commit

Permalink
v23.8
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Nov 11, 2023
1 parent e043698 commit 26b9429
Show file tree
Hide file tree
Showing 44 changed files with 4,152 additions and 15,325 deletions.
19 changes: 3 additions & 16 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,6 @@ cpuminer_SOURCES = \
algo/hamsi/hamsi-hash-4way.c \
algo/haval/haval.c \
algo/haval/haval-hash-4way.c \
algo/hodl/aes.c \
algo/hodl/hodl-gate.c \
algo/hodl/hodl-wolf.c \
algo/hodl/sha512_avx.c \
algo/hodl/sha512_avx2.c \
algo/jh/sph_jh.c \
algo/jh/jh-hash-4way.c \
algo/jh/jha-gate.c \
Expand Down Expand Up @@ -148,6 +143,8 @@ cpuminer_SOURCES = \
algo/scrypt/scrypt.c \
algo/scrypt/scrypt-core-4way.c \
algo/scrypt/neoscrypt.c \
algo/sha/sha1.c \
algo/sha/sha1-hash.c \
algo/sha/sha256-hash.c \
algo/sha/sph_sha2.c \
algo/sha/sph_sha2big.c \
Expand Down Expand Up @@ -278,20 +275,10 @@ cpuminer_SOURCES = \
algo/yespower/yespower-ref.c \
algo/yespower/yespower-blake2b-ref.c


disable_flags =

if USE_ASM
cpuminer_SOURCES += asm/neoscrypt_asm.S
if ARCH_x86
cpuminer_SOURCES += asm/sha2-x86.S asm/scrypt-x86.S
endif
if ARCH_x86_64
cpuminer_SOURCES += asm/sha2-x64.S asm/scrypt-x64.S
endif
if ARCH_ARM
cpuminer_SOURCES += asm/sha2-arm.S asm/scrypt-arm.S
endif
else
disable_flags += -DNOASM
endif
Expand All @@ -301,7 +288,7 @@ if HAVE_WINDOWS
endif

cpuminer_LDFLAGS = @LDFLAGS@
cpuminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ -lssl -lcrypto -lgmp
cpuminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ -lgmp
cpuminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ $(ALL_INCLUDES)
cpuminer_CFLAGS = -Wno-pointer-sign -Wno-pointer-to-int-cast $(disable_flags)

Expand Down
7 changes: 7 additions & 0 deletions RELEASE_NOTES
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,13 @@ If not what makes it happen or not happen?
Change Log
----------

v23.8

Cpuminer-opt is no longer dependant on OpenSSL.
Removed Hodl algo.
Removed legacy Sha256 & Scrypt ASM code.
ARM: Echo AES is working and enabled for x17.

v23.7

Fixed blakes2s, broken in v3.23.4.
Expand Down
1 change: 0 additions & 1 deletion algo-gate-api.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_GROESTL: rc = register_groestl_algo ( gate ); break;
case ALGO_HEX: rc = register_hex_algo ( gate ); break;
case ALGO_HMQ1725: rc = register_hmq1725_algo ( gate ); break;
case ALGO_HODL: rc = register_hodl_algo ( gate ); break;
case ALGO_JHA: rc = register_jha_algo ( gate ); break;
case ALGO_KECCAK: rc = register_keccak_algo ( gate ); break;
case ALGO_KECCAKC: rc = register_keccakc_algo ( gate ); break;
Expand Down
186 changes: 83 additions & 103 deletions algo/echo/aes_ni/hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,112 +21,92 @@
#include "hash_api.h"
#include "simd-utils.h"

MYALIGN const unsigned int _k_s0F[] = {0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F};
MYALIGN const unsigned int _k_ipt[] = {0x5A2A7000, 0xC2B2E898, 0x52227808, 0xCABAE090, 0x317C4D00, 0x4C01307D, 0xB0FDCC81, 0xCD80B1FC};
MYALIGN const unsigned int _k_opt[] = {0xD6B66000, 0xFF9F4929, 0xDEBE6808, 0xF7974121, 0x50BCEC00, 0x01EDBD51, 0xB05C0CE0, 0xE10D5DB1};
MYALIGN const unsigned int _k_inv[] = {0x0D080180, 0x0E05060F, 0x0A0B0C02, 0x04070309, 0x0F0B0780, 0x01040A06, 0x02050809, 0x030D0E0C};
MYALIGN const unsigned int _k_sb1[] = {0xCB503E00, 0xB19BE18F, 0x142AF544, 0xA5DF7A6E, 0xFAE22300, 0x3618D415, 0x0D2ED9EF, 0x3BF7CCC1};
MYALIGN const unsigned int _k_sb2[] = {0x0B712400, 0xE27A93C6, 0xBC982FCD, 0x5EB7E955, 0x0AE12900, 0x69EB8840, 0xAB82234A, 0xC2A163C8};
MYALIGN const unsigned int _k_sb3[] = {0xC0211A00, 0x53E17249, 0xA8B2DA89, 0xFB68933B, 0xF0030A00, 0x5FF35C55, 0xA6ACFAA5, 0xF956AF09};
MYALIGN const unsigned int _k_sb4[] = {0x3FD64100, 0xE1E937A0, 0x49087E9F, 0xA876DE97, 0xC393EA00, 0x3D50AED7, 0x876D2914, 0xBA44FE79};
MYALIGN const unsigned int _k_sb5[] = {0xF4867F00, 0x5072D62F, 0x5D228BDB, 0x0DA9A4F9, 0x3971C900, 0x0B487AC2, 0x8A43F0FB, 0x81B332B8};
MYALIGN const unsigned int _k_sb7[] = {0xFFF75B00, 0xB20845E9, 0xE1BAA416, 0x531E4DAC, 0x3390E000, 0x62A3F282, 0x21C1D3B1, 0x43125170};
MYALIGN const unsigned int _k_sbo[] = {0x6FBDC700, 0xD0D26D17, 0xC502A878, 0x15AABF7A, 0x5FBB6A00, 0xCFE474A5, 0x412B35FA, 0x8E1E90D1};
MYALIGN const unsigned int _k_h63[] = {0x63636363, 0x63636363, 0x63636363, 0x63636363};
MYALIGN const unsigned int _k_hc6[] = {0xc6c6c6c6, 0xc6c6c6c6, 0xc6c6c6c6, 0xc6c6c6c6};
MYALIGN const unsigned int _k_h5b[] = {0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b};
MYALIGN const unsigned int _k_h4e[] = {0x4e4e4e4e, 0x4e4e4e4e, 0x4e4e4e4e, 0x4e4e4e4e};
MYALIGN const unsigned int _k_h0e[] = {0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e};
MYALIGN const unsigned int _k_h15[] = {0x15151515, 0x15151515, 0x15151515, 0x15151515};
MYALIGN const unsigned int _k_aesmix1[] = {0x0f0a0500, 0x030e0904, 0x07020d08, 0x0b06010c};
MYALIGN const unsigned int _k_aesmix2[] = {0x000f0a05, 0x04030e09, 0x0807020d, 0x0c0b0601};
MYALIGN const unsigned int _k_aesmix3[] = {0x05000f0a, 0x0904030e, 0x0d080702, 0x010c0b06};
MYALIGN const unsigned int _k_aesmix4[] = {0x0a05000f, 0x0e090403, 0x020d0807, 0x06010c0b};


MYALIGN const unsigned int const1[] = {0x00000001, 0x00000000, 0x00000000, 0x00000000};
MYALIGN const unsigned int mul2mask[] = {0x00001b00, 0x00000000, 0x00000000, 0x00000000};
MYALIGN const unsigned int lsbmask[] = {0x01010101, 0x01010101, 0x01010101, 0x01010101};
MYALIGN const unsigned int invshiftrows[] = {0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c};
MYALIGN const unsigned int zero[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000};
MYALIGN const unsigned int mul2ipt[] = {0x728efc00, 0x6894e61a, 0x3fc3b14d, 0x25d9ab57, 0xfd5ba600, 0x2a8c71d7, 0x1eb845e3, 0xc96f9234};


#define ECHO_SUBBYTES4(state, j) \
state[0][j] = v128_aesenc(state[0][j], k1);\
k1 = v128_add32(k1, cast_v128(const1));\
state[1][j] = v128_aesenc(state[1][j], k1);\
k1 = v128_add32(k1, cast_v128(const1));\
state[2][j] = v128_aesenc(state[2][j], k1);\
k1 = v128_add32(k1, cast_v128(const1));\
state[3][j] = v128_aesenc(state[3][j], k1);\
k1 = v128_add32(k1, cast_v128(const1));\
state[0][j] = v128_aesenc(state[0][j], v128_zero ); \
state[1][j] = v128_aesenc(state[1][j], v128_zero ); \
state[2][j] = v128_aesenc(state[2][j], v128_zero ); \
state[3][j] = v128_aesenc(state[3][j], v128_zero )

#define ECHO_SUBBYTES(state, i, j) \
state[i][j] = v128_aesenc(state[i][j], k1);\
k1 = v128_add32(k1, cast_v128(const1));\
state[i][j] = v128_aesenc(state[i][j], cast_v128(zero))

#define ECHO_MIXBYTES(state1, state2, j, t1, t2, s2) \
s2 = v128_add8(state1[0][j], state1[0][j]);\
t1 = v128_sr16(state1[0][j], 7);\
t1 = v128_and(t1, cast_v128(lsbmask));\
t2 = v128_shuffle8(cast_v128(mul2mask), t1);\
s2 = v128_xor(s2, t2);\
state2[0][j] = s2;\
state2[1][j] = state1[0][j];\
state2[2][j] = state1[0][j];\
state2[3][j] = v128_xor(s2, state1[0][j]);\
s2 = v128_add8(state1[1][(j + 1) & 3], state1[1][(j + 1) & 3]);\
t1 = v128_sr16(state1[1][(j + 1) & 3], 7);\
t1 = v128_and(t1, cast_v128(lsbmask));\
t2 = v128_shuffle8(cast_v128(mul2mask), t1);\
s2 = v128_xor(s2, t2);\
state2[0][j] = v128_xor3(state2[0][j], s2, state1[1][(j + 1) & 3] );\
state2[1][j] = v128_xor(state2[1][j], s2);\
state2[2][j] = v128_xor(state2[2][j], state1[1][(j + 1) & 3]);\
state2[3][j] = v128_xor(state2[3][j], state1[1][(j + 1) & 3]);\
s2 = v128_add8(state1[2][(j + 2) & 3], state1[2][(j + 2) & 3]);\
t1 = v128_sr16(state1[2][(j + 2) & 3], 7);\
t1 = v128_and(t1, cast_v128(lsbmask));\
t2 = v128_shuffle8(cast_v128(mul2mask), t1);\
s2 = v128_xor(s2, t2);\
state2[0][j] = v128_xor(state2[0][j], state1[2][(j + 2) & 3]);\
state2[1][j] = v128_xor3(state2[1][j], s2, state1[2][(j + 2) & 3] );\
state2[2][j] = v128_xor(state2[2][j], s2);\
state2[3][j] = v128_xor(state2[3][j], state1[2][(j + 2) & 3]);\
s2 = v128_add8(state1[3][(j + 3) & 3], state1[3][(j + 3) & 3]);\
t1 = v128_sr16(state1[3][(j + 3) & 3], 7);\
t1 = v128_and(t1, cast_v128(lsbmask));\
t2 = v128_shuffle8(cast_v128(mul2mask), t1);\
s2 = v128_xor(s2, t2);\
state2[0][j] = v128_xor(state2[0][j], state1[3][(j + 3) & 3]);\
state2[1][j] = v128_xor(state2[1][j], state1[3][(j + 3) & 3]);\
state2[2][j] = v128_xor3(state2[2][j], s2, state1[3][(j + 3) & 3] );\
state2[3][j] = v128_xor(state2[3][j], s2)
const uint32_t const1[] __attribute__ ((aligned (32))) =
{ 0x00000001, 0x00000000, 0x00000000, 0x00000000 };
const uint32_t mul2mask[] __attribute__ ((aligned (16))) =
{ 0x00001b00, 0x00000000, 0x00000000, 0x00000000 };
const uint32_t lsbmask[] __attribute__ ((aligned (16))) =
{ 0x01010101, 0x01010101, 0x01010101, 0x01010101 };
const uint32_t invshiftrows[] __attribute__ ((aligned (16))) =
{ 0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c };

#define ECHO_SUBBYTES4( state, j ) \
state[0][j] = v128_aesenc( state[0][j], k1 ); \
k1 = v128_add32( k1, cast_v128(const1) ); \
state[1][j] = v128_aesenc( state[1][j], k1 ); \
k1 = v128_add32( k1, cast_v128(const1) ); \
state[2][j] = v128_aesenc( state[2][j], k1 ); \
k1 = v128_add32( k1, cast_v128(const1) ); \
state[3][j] = v128_aesenc( state[3][j], k1 ); \
k1 = v128_add32( k1, cast_v128(const1) ); \
state[0][j] = v128_aesenc_nokey( state[0][j] ); \
state[1][j] = v128_aesenc_nokey( state[1][j] ); \
state[2][j] = v128_aesenc_nokey( state[2][j] ); \
state[3][j] = v128_aesenc_nokey( state[3][j] )

#define ECHO_SUBBYTES( state, i, j ) \
state[i][j] = v128_aesenc( state[i][j], k1 ); \
k1 = v128_add32( k1, cast_v128(const1) ); \
state[i][j] = v128_aesenc_nokey( state[i][j] )

#define ECHO_MIXBYTES( state1, state2, j, t1, t2, s2 ) \
s2 = v128_add8( state1[0][j], state1[0][j] ); \
t1 = v128_sr16( state1[0][j], 7 ); \
t1 = v128_and( t1, cast_v128(lsbmask) ); \
t2 = v128_shuffle8( cast_v128(mul2mask), t1 ); \
s2 = v128_xor( s2, t2 ); \
state2[0][j] = s2; \
state2[1][j] = state1[0][j]; \
state2[2][j] = state1[0][j]; \
state2[3][j] = v128_xor(s2, state1[0][j] ); \
s2 = v128_add8( state1[1][(j + 1) & 3], state1[1][(j + 1) & 3] ); \
t1 = v128_sr16( state1[1][(j + 1) & 3], 7 ); \
t1 = v128_and( t1, cast_v128(lsbmask) ); \
t2 = v128_shuffle8( cast_v128(mul2mask), t1 ); \
s2 = v128_xor( s2, t2 ); \
state2[0][j] = v128_xor3( state2[0][j], s2, state1[1][(j + 1) & 3] );\
state2[1][j] = v128_xor( state2[1][j], s2 ); \
state2[2][j] = v128_xor( state2[2][j], state1[1][(j + 1) & 3] ); \
state2[3][j] = v128_xor( state2[3][j], state1[1][(j + 1) & 3] ); \
s2 = v128_add8( state1[2][(j + 2) & 3], state1[2][(j + 2) & 3] ); \
t1 = v128_sr16( state1[2][(j + 2) & 3], 7 ); \
t1 = v128_and( t1, cast_v128(lsbmask) ); \
t2 = v128_shuffle8( cast_v128(mul2mask), t1 ); \
s2 = v128_xor( s2, t2 ); \
state2[0][j] = v128_xor( state2[0][j], state1[2][(j + 2) & 3] ); \
state2[1][j] = v128_xor3( state2[1][j], s2, state1[2][(j + 2) & 3] ); \
state2[2][j] = v128_xor( state2[2][j], s2 ); \
state2[3][j] = v128_xor( state2[3][j], state1[2][(j + 2) & 3] ); \
s2 = v128_add8( state1[3][(j + 3) & 3], state1[3][(j + 3) & 3] ); \
t1 = v128_sr16( state1[3][(j + 3) & 3], 7 ); \
t1 = v128_and( t1, cast_v128(lsbmask) ); \
t2 = v128_shuffle8( cast_v128(mul2mask), t1 ); \
s2 = v128_xor( s2, t2 ); \
state2[0][j] = v128_xor( state2[0][j], state1[3][(j + 3) & 3] ); \
state2[1][j] = v128_xor( state2[1][j], state1[3][(j + 3) & 3] ); \
state2[2][j] = v128_xor3( state2[2][j], s2, state1[3][(j + 3) & 3] ); \
state2[3][j] = v128_xor( state2[3][j], s2 )


#define ECHO_ROUND_UNROLL2 \
ECHO_SUBBYTES4(_state, 0);\
ECHO_SUBBYTES4(_state, 1);\
ECHO_SUBBYTES4(_state, 2);\
ECHO_SUBBYTES4(_state, 3);\
ECHO_MIXBYTES(_state, _state2, 0, t1, t2, s2);\
ECHO_MIXBYTES(_state, _state2, 1, t1, t2, s2);\
ECHO_MIXBYTES(_state, _state2, 2, t1, t2, s2);\
ECHO_MIXBYTES(_state, _state2, 3, t1, t2, s2);\
ECHO_SUBBYTES4(_state2, 0);\
ECHO_SUBBYTES4(_state2, 1);\
ECHO_SUBBYTES4(_state2, 2);\
ECHO_SUBBYTES4(_state2, 3);\
ECHO_MIXBYTES(_state2, _state, 0, t1, t2, s2);\
ECHO_MIXBYTES(_state2, _state, 1, t1, t2, s2);\
ECHO_MIXBYTES(_state2, _state, 2, t1, t2, s2);\
ECHO_MIXBYTES(_state2, _state, 3, t1, t2, s2)
{ \
ECHO_SUBBYTES4( _state, 0 ); \
ECHO_SUBBYTES4( _state, 1 ); \
ECHO_SUBBYTES4( _state, 2 ); \
ECHO_SUBBYTES4( _state, 3 ); \
ECHO_MIXBYTES( _state, _state2, 0, t1, t2, s2 ); \
ECHO_MIXBYTES( _state, _state2, 1, t1, t2, s2 ); \
ECHO_MIXBYTES( _state, _state2, 2, t1, t2, s2 ); \
ECHO_MIXBYTES( _state, _state2, 3, t1, t2, s2 ); \
ECHO_SUBBYTES4( _state2, 0 ); \
ECHO_SUBBYTES4( _state2, 1 ); \
ECHO_SUBBYTES4( _state2, 2 ); \
ECHO_SUBBYTES4( _state2, 3 ); \
ECHO_MIXBYTES( _state2, _state, 0, t1, t2, s2 ); \
ECHO_MIXBYTES( _state2, _state, 1, t1, t2, s2 ); \
ECHO_MIXBYTES( _state2, _state, 2, t1, t2, s2 ); \
ECHO_MIXBYTES( _state2, _state, 3, t1, t2, s2 ); \
}

/*
#define ECHO_ROUND_UNROLL2 \
Expand Down
7 changes: 5 additions & 2 deletions algo/groestl/aes_ni/groestl-intr-aes.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,12 @@ static const v128u64_t SUBSH_MASK7 = { 0x06090c0f0205080b, 0x0e0104070a0d0003 };
#if defined(__ARM_NEON)

// No fast shuffle on NEON
static const uint32x4_t vmask_d8 = { 3, 1, 2, 0 };
//static const uint32x4_t vmask_d8 = { 3, 1, 2, 0 };
static const v128u32_t BLEND_MASK = { 0xffffffff, 0, 0, 0xffffffff };

#define gr_shuffle32( v ) v128_shufflev32( v, vmask_d8 )
#define gr_shuffle32( v ) v128_blendv( v128_qrev32( v ), v, BLEND_MASK )

//#define gr_shuffle32( v ) v128_shufflev32( v, vmask_d8 )

#else

Expand Down
13 changes: 8 additions & 5 deletions algo/hamsi/hamsi-hash-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,16 @@
#include <stdio.h>
#include "hamsi-hash-4way.h"

static const uint32_t HAMSI_IV512[] =
static const uint32_t HAMSI_IV512[] __attribute__ ((aligned (32))) =
{
0x73746565, 0x6c706172, 0x6b204172, 0x656e6265,
0x72672031, 0x302c2062, 0x75732032, 0x3434362c,
0x20422d33, 0x30303120, 0x4c657576, 0x656e2d48,
0x65766572, 0x6c65652c, 0x2042656c, 0x6769756d
};

static const uint32_t alpha_n[] = {
static const uint32_t alpha_n[] __attribute__ ((aligned (32))) =
{
0xff00f0f0, 0xccccaaaa, 0xf0f0cccc, 0xff00aaaa,
0xccccaaaa, 0xf0f0ff00, 0xaaaacccc, 0xf0f0ff00,
0xf0f0cccc, 0xaaaaff00, 0xccccff00, 0xaaaaf0f0,
Expand All @@ -54,7 +55,8 @@ static const uint32_t alpha_n[] = {
0xff00cccc, 0xaaaaf0f0, 0xff00aaaa, 0xccccf0f0
};

static const uint32_t alpha_f[] = {
static const uint32_t alpha_f[] __attribute__ ((aligned (32))) =
{
0xcaf9639c, 0x0ff0f9c0, 0x639c0ff0, 0xcaf9f9c0,
0x0ff0f9c0, 0x639ccaf9, 0xf9c00ff0, 0x639ccaf9,
0x639c0ff0, 0xf9c0caf9, 0x0ff0caf9, 0xf9c0639c,
Expand All @@ -69,7 +71,8 @@ static const uint32_t alpha_f[] = {

/* Note: this table lists bits within each byte from least
siginificant to most significant. */
static const uint32_t T512[64][16] = {
static const uint32_t T512[64][16] __attribute__ ((aligned (32))) =
{
{ 0xef0b0270, 0x3afd0000, 0x5dae0000, 0x69490000,
0x9b0f3c06, 0x4405b5f9, 0x66140a51, 0x924f5d0a,
0xc96b0030, 0xe7250000, 0x2f840000, 0x264f0000,
Expand Down Expand Up @@ -2260,4 +2263,4 @@ void hamsi512_2x64( void *dst, const void *data, size_t len )
hamsi512_2x64_close( &sc, dst );
}

#endif // SSE4.1 or NEON
#endif // SSE4.2 or NEON
Loading

0 comments on commit 26b9429

Please sign in to comment.