diff --git a/cpu-miner.c b/cpu-miner.c index b42cdf8..55a0486 100755 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -55,7 +55,7 @@ extern "C" } #endif -extern void cryptonight_hash(void* output, const void* input, size_t len); +extern void cryptonight_hash(void* output, const void* input, size_t len, int variant); void parse_device_config(int device, char *config, int *blocks, int *threads); #ifdef __linux /* Linux specific policy and affinity management */ @@ -633,8 +633,9 @@ static bool submit_upstream_work(CURL *curl, struct work *work) char *noncestr; noncestr = bin2hex(((const unsigned char*)work->data) + 39, 4); + int variant = ((unsigned char*)work->data)[0] >= 7 ? ((unsigned char*)work->data)[0] - 6 : 0; char hash[32]; - cryptonight_hash((void *)hash, (const void *)work->data, 76); + cryptonight_hash((void *)hash, (const void *)work->data, 76, variant); char *hashhex = bin2hex((const unsigned char *)hash, 32); snprintf(s, sizeof(s), "{\"method\": \"submit\", \"params\": {\"id\": \"%s\", \"job_id\": \"%s\", \"nonce\": \"%s\", \"result\": \"%s\"}, \"id\":1}", @@ -653,8 +654,9 @@ static bool submit_upstream_work(CURL *curl, struct work *work) { /* build JSON-RPC request */ char *noncestr = bin2hex(((const unsigned char*)work->data) + 39, 4); + int variant = ((unsigned char*)work->data)[0] >= 7 ? ((unsigned char*)work->data)[0] - 6 : 0; char hash[32]; - cryptonight_hash((void *)hash, (const void *)work->data, 76); + cryptonight_hash((void *)hash, (const void *)work->data, 76, variant); char *hashhex = bin2hex((const unsigned char *)hash, 32); snprintf(s, sizeof(s), "{\"method\": \"submit\", \"params\": {\"id\": \"%s\", \"job_id\": \"%s\", \"nonce\": \"%s\", \"result\": \"%s\"}, \"id\":1}", diff --git a/cryptonight.c b/cryptonight.c index a31ebf8..66ff663 100755 --- a/cryptonight.c +++ b/cryptonight.c @@ -14,6 +14,19 @@ #include "crypto/c_skein.h" #include "cryptonight.h" +#define VARIANT1_1(p) \ + do if (variant > 0) \ + { \ + uint8_t tmp = ((const uint8_t*)p)[11]; \ + uint8_t tmp1 = (tmp>>4)&1, tmp2 = (tmp>>5)&1, tmp3 = tmp1^tmp2; \ + uint8_t tmp0 = nonce_flag ? tmp3 : tmp1 + 1; \ + ((uint8_t*)p)[11] = (tmp & 0xef) | (tmp0<<4); \ + } while(0) + +#define VARIANT1_2(p) VARIANT1_1(p) +#define VARIANT1_INIT() \ + const uint8_t nonce_flag = variant > 0 ? ((const uint8_t*)input)[39] & 0x01 : 0 + struct cryptonight_ctx { uint8_t long_state[MEMORY]; union cn_slow_hash_state state; @@ -130,12 +143,14 @@ static void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) { ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1]; } -void cryptonight_hash_ctx(void* output, const void* input, size_t len, struct cryptonight_ctx* ctx) { +void cryptonight_hash_ctx(void* output, const void* input, size_t len, struct cryptonight_ctx* ctx, int variant) { size_t i, j; hash_process(&ctx->state.hs, (const uint8_t*) input, len); ctx->aes_ctx = (oaes_ctx*) oaes_alloc(); memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); + VARIANT1_INIT(); + oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE); for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) { #undef RND @@ -158,14 +173,18 @@ void cryptonight_hash_ctx(void* output, const void* input, size_t len, struct cr j = e2i(ctx->a) * AES_BLOCK_SIZE; aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a); xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]); + VARIANT1_1(&ctx->long_state[j]); mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c) * AES_BLOCK_SIZE]); + VARIANT1_2(&ctx->long_state[e2i(ctx->c) * AES_BLOCK_SIZE]); j = e2i(ctx->a) * AES_BLOCK_SIZE; aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a); xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]); + VARIANT1_1(&ctx->long_state[j]); mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b) * AES_BLOCK_SIZE]); + VARIANT1_2(&ctx->long_state[e2i(ctx->b) * AES_BLOCK_SIZE]); } memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); @@ -189,8 +208,8 @@ void cryptonight_hash_ctx(void* output, const void* input, size_t len, struct cr oaes_free((OAES_CTX **) &ctx->aes_ctx); } -void cryptonight_hash(void* output, const void* input, size_t len) { +void cryptonight_hash(void* output, const void* input, size_t len, int variant) { struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx)); - cryptonight_hash_ctx(output, input, len, ctx); + cryptonight_hash_ctx(output, input, len, ctx, variant); free(ctx); } diff --git a/cryptonight.h b/cryptonight.h index 607714d..a0527ee 100755 --- a/cryptonight.h +++ b/cryptonight.h @@ -159,7 +159,7 @@ static inline void exit_if_cudaerror(int thr_id, const char *file, int line) void hash_permutation(union hash_state *state); void hash_process(union hash_state *state, const uint8_t *buf, size_t count); -void cryptonight_core_cpu_hash(int thr_id, int blocks, int threads, uint32_t *d_long_state, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2); +void cryptonight_core_cpu_hash(int thr_id, int blocks, int threads, uint32_t *d_long_state, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2, int variant, uint8_t nonce_flag); void cryptonight_extra_cpu_setData(int thr_id, const void *data, const void *pTargetIn); void cryptonight_extra_cpu_init(int thr_id); diff --git a/cryptonight/cryptonight.cu b/cryptonight/cryptonight.cu index 04dc109..923c81b 100755 --- a/cryptonight/cryptonight.cu +++ b/cryptonight/cryptonight.cu @@ -173,13 +173,14 @@ extern bool stop_mining; extern volatile bool mining_has_stopped[MAX_GPU]; -extern "C" void cryptonight_hash(void* output, const void* input, size_t len); +extern "C" void cryptonight_hash(void* output, const void* input, size_t len, int variant); extern "C" int scanhash_cryptonight(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done, uint32_t *results) { cudaError_t err; int res; uint32_t *nonceptr = (uint32_t*)(((char*)pdata) + 39); + int variant = ((uint8_t*)pdata)[0] >= 7 ? ((uint8_t*)pdata)[0] - 6 : 0; const uint32_t first_nonce = *nonceptr; uint32_t nonce = *nonceptr; int cn_blocks = device_config[thr_id][0]; @@ -237,7 +238,7 @@ extern "C" int scanhash_cryptonight(int thr_id, uint32_t *pdata, const uint32_t uint32_t foundNonce[2]; cryptonight_extra_cpu_prepare(thr_id, throughput, nonce, d_ctx_state[thr_id], d_ctx_a[thr_id], d_ctx_b[thr_id], d_ctx_key1[thr_id], d_ctx_key2[thr_id]); - cryptonight_core_cpu_hash(thr_id, cn_blocks, cn_threads, d_long_state[thr_id], d_ctx_state[thr_id], d_ctx_a[thr_id], d_ctx_b[thr_id], d_ctx_key1[thr_id], d_ctx_key2[thr_id]); + cryptonight_core_cpu_hash(thr_id, cn_blocks, cn_threads, d_long_state[thr_id], d_ctx_state[thr_id], d_ctx_a[thr_id], d_ctx_b[thr_id], d_ctx_key1[thr_id], d_ctx_key2[thr_id], variant, nonce); cryptonight_extra_cpu_final(thr_id, throughput, nonce, foundNonce, d_ctx_state[thr_id]); if(stop_mining) @@ -253,7 +254,7 @@ extern "C" int scanhash_cryptonight(int thr_id, uint32_t *pdata, const uint32_t memcpy(tempdata, pdata, 76); uint32_t *tempnonceptr = (uint32_t*)(((char*)tempdata) + 39); *tempnonceptr = foundNonce[0]; - cryptonight_hash(vhash64, tempdata, 76); + cryptonight_hash(vhash64, tempdata, 76, variant); if((vhash64[7] <= Htarg) && fulltest(vhash64, ptarget)) { res = 1; @@ -264,7 +265,7 @@ extern "C" int scanhash_cryptonight(int thr_id, uint32_t *pdata, const uint32_t if(foundNonce[1] < 0xffffffff) { *tempnonceptr = foundNonce[1]; - cryptonight_hash(vhash64, tempdata, 76); + cryptonight_hash(vhash64, tempdata, 76, variant); if((vhash64[7] <= Htarg) && fulltest(vhash64, ptarget)) { res++; diff --git a/cryptonight/cuda_cryptonight_core.cu b/cryptonight/cuda_cryptonight_core.cu index e71bf8f..e21a084 100755 --- a/cryptonight/cuda_cryptonight_core.cu +++ b/cryptonight/cuda_cryptonight_core.cu @@ -16,6 +16,21 @@ extern int device_bsleep[MAX_GPU]; #include "cuda_cryptonight_aes.cu" +#define VARIANT1_1(p) \ + do if (variant > 0 && sub == 2) \ + { \ + uint32_t tmp32 = loadGlobal32(p); \ + uint8_t tmp = tmp32 >> 24; \ + uint8_t tmp1 = (tmp>>4)&1, tmp2 = (tmp>>5)&1, tmp3 = tmp1^tmp2; \ + uint8_t tmp0 = nonce_flag ? tmp3 : tmp1 + 1; \ + tmp32 &= 0x00ffffff; tmp32 |= ((tmp & 0xef) | (tmp0<<4)) << 24; \ + storeGlobal32(p, tmp32); \ + } while(0) + +#define VARIANT1_2(p) VARIANT1_1(p) +#define VARIANT1_INIT() \ + nonce_flag ^= (thread & 1) + __device__ __forceinline__ uint64_t cuda_mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) { *product_hi = __umul64hi(multiplier, multiplicand); @@ -87,7 +102,7 @@ __device__ __forceinline__ void MUL_SUM_XOR_DST(uint64_t a, uint64_t *__restrict dst[1] = lo; } -__global__ void cryptonight_core_gpu_phase2(uint32_t threads, int bfactor, int partidx, uint32_t * __restrict__ d_long_state, uint32_t * __restrict__ d_ctx_a, uint32_t * __restrict__ d_ctx_b) +__global__ void cryptonight_core_gpu_phase2(uint32_t threads, int bfactor, int partidx, uint32_t * __restrict__ d_long_state, uint32_t * __restrict__ d_ctx_a, uint32_t * __restrict__ d_ctx_b, int variant, uint8_t nonce_flag) { __shared__ uint32_t sharedMemory[1024]; @@ -98,6 +113,8 @@ __global__ void cryptonight_core_gpu_phase2(uint32_t threads, int bfactor, int p if (thread >= threads) return; + VARIANT1_INIT(); + const int sub = threadIdx.x & 3; const int sub2 = threadIdx.x & 2; @@ -137,6 +154,7 @@ __global__ void cryptonight_core_gpu_phase2(uint32_t threads, int bfactor, int p t1[0] = __shfl(d[x], 0, 4); //long_state[j] = d[0] ^ d[1]; storeGlobal32(long_state + j, d[0] ^ d[1]); + VARIANT1_1(long_state + j); //MUL_SUM_XOR_DST(c, a, &long_state[((uint32_t *)c)[0] & 0x1FFFF0]); j = ((*t1 & 0x1FFFF0) >> 2) + sub; @@ -157,6 +175,7 @@ __global__ void cryptonight_core_gpu_phase2(uint32_t threads, int bfactor, int p res = *((uint64_t *)t2) >> (sub & 1 ? 32 : 0); storeGlobal32(long_state + j, res); + VARIANT1_2(long_state + j); a = (sub & 1 ? yy[1] : yy[0]) ^ res; } } @@ -198,7 +217,7 @@ __global__ void cryptonight_core_gpu_phase3(int threads, const uint32_t * __rest } } -__host__ void cryptonight_core_cpu_hash(int thr_id, int blocks, int threads, uint32_t *d_long_state, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2) +__host__ void cryptonight_core_cpu_hash(int thr_id, int blocks, int threads, uint32_t *d_long_state, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2, int variant, uint8_t nonce_flag) { dim3 grid(blocks); dim3 block(threads); @@ -213,7 +232,7 @@ __host__ void cryptonight_core_cpu_hash(int thr_id, int blocks, int threads, uin for(i = 0; i < partcount; i++) { - cryptonight_core_gpu_phase2 <<< grid, block4 >>>(blocks*threads, device_bfactor[thr_id], i, d_long_state, d_ctx_a, d_ctx_b); + cryptonight_core_gpu_phase2 <<< grid, block4 >>>(blocks*threads, device_bfactor[thr_id], i, d_long_state, d_ctx_a, d_ctx_b, variant, nonce_flag); exit_if_cudaerror(thr_id, __FILE__, __LINE__); if(partcount > 1) usleep(device_bsleep[thr_id]); } diff --git a/miner.h b/miner.h index 9d39aae..2019cf3 100755 --- a/miner.h +++ b/miner.h @@ -200,7 +200,7 @@ extern int scanhash_cryptonight(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done, uint32_t *results); -extern void cryptonight_hash(void* output, const void* input, size_t len); +extern void cryptonight_hash(void* output, const void* input, size_t len, int variant); struct thr_info { int id;