Skip to content

Commit

Permalink
Scrypt ARM64 assembly
Browse files Browse the repository at this point in the history
20% faster than C on mobile
  • Loading branch information
hectorchu committed Feb 28, 2025
1 parent 79182db commit 62ee959
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 102 deletions.
91 changes: 0 additions & 91 deletions ltcutil/scrypt/scrypt.c

This file was deleted.

16 changes: 5 additions & 11 deletions ltcutil/scrypt/scrypt.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
package scrypt

// void scrypt_aux(unsigned char*);
import "C"
//go:build !arm64

import (
"crypto/sha256"
package scrypt

"golang.org/x/crypto/pbkdf2"
)
import scrypt2 "golang.org/x/crypto/scrypt"

func Scrypt(x []byte) []byte {
X := pbkdf2.Key(x, x, 1, 128, sha256.New)
C.scrypt_aux((*C.uchar)(&X[0]))
return pbkdf2.Key(x, X, 1, 32, sha256.New)
x, _ = scrypt2.Key(x, x, 1024, 1, 1, 32)
return x
}
24 changes: 24 additions & 0 deletions ltcutil/scrypt/scrypt_arm64.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package scrypt

import (
"crypto/sha256"
"sync"

"golang.org/x/crypto/pbkdf2"
)

type scratch [1024][32]uint32

var pool = sync.Pool{New: func() interface{} {
return &scratch{}
}}

func Scrypt(x []byte) []byte {
X := pbkdf2.Key(x, x, 1, 128, sha256.New)
V := pool.Get().(*scratch)
scrypt(&X[0], V)
pool.Put(V)
return pbkdf2.Key(x, X, 1, 32, sha256.New)
}

func scrypt(X *byte, V *scratch)
122 changes: 122 additions & 0 deletions ltcutil/scrypt/scrypt_arm64.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#include "textflag.h"

#define EORP(n, Ra1, Ra2, Rb1, Rb2, Rc1, Rc2) \
LDP n(Ra1), (Rb1, Rb2) \
LDP n(Ra2), (Rc1, Rc2) \
EOR Rb1, Rc1, Rc1 \
EOR Rb2, Rc2, Rc2 \
STP (Rc1, Rc2), n(Ra1)

#define EORPW(n, Ra1, Ra2, Rb1, Rb2, Rc1, Rc2) \
LDPW n(Ra1), (Rb1, Rb2) \
LDPW n(Ra2), (Rc1, Rc2) \
EORW Rb1, Rc1, Rc1 \
EORW Rb2, Rc2, Rc2 \
STPW (Rc1, Rc2), n(Ra1)

#define ADDPW(n, Ra, _, Rb1, Rb2, Rc1, Rc2) \
LDPW n(Ra), (Rb1, Rb2) \
ADDW Rb1, Rc1, Rc1 \
ADDW Rb2, Rc2, Rc2 \
STPW (Rc1, Rc2), n(Ra)

#define BLK(OP, w, n) \
OP(0*w+n, R16, R17, R19, R20, R0, R1) \
OP(1*w+n, R16, R17, R21, R22, R2, R3) \
OP(2*w+n, R16, R17, R23, R24, R4, R5) \
OP(3*w+n, R16, R17, R25, R26, R6, R7) \
OP(4*w+n, R16, R17, R19, R20, R8, R9) \
OP(5*w+n, R16, R17, R21, R22, R10, R11) \
OP(6*w+n, R16, R17, R23, R24, R12, R13) \
OP(7*w+n, R16, R17, R25, R26, R14, R15)

#define ADDEORW(Ra, Rb, Rc, n, Rd) \
ADDW Ra, Rb, Rc \
EORW Rc@>n, Rd, Rd

#define QUARTRND(Ra1, Ra2, Ra3, Rb1, Rb2, Rb3, Rc1, Rc2, Rc3, Rd1, Rd2, Rd3, n) \
ADDEORW(Ra1, Ra2, R19, n, Ra3) \
ADDEORW(Rb1, Rb2, R20, n, Rb3) \
ADDEORW(Rc1, Rc2, R21, n, Rc3) \
ADDEORW(Rd1, Rd2, R22, n, Rd3)

TEXT ·scrypt(SB), NOSPLIT, $8-16
MOVD V+8(FP), R1

loop1:
MOVD X+0(FP), R0

FLDPQ 0(R0), (F0, F1)
FSTPQ (F0, F1), 0(R1)
FLDPQ 32(R0), (F2, F3)
FSTPQ (F2, F3), 32(R1)
FLDPQ 64(R0), (F4, F5)
FSTPQ (F4, F5), 64(R1)
FLDPQ 96(R0), (F6, F7)
FSTPQ (F6, F7), 96(R1)

MOVD R1, 8(RSP)
ADD $64, R0, R1
CALL eor_salsa8(SB)
MOVD X+0(FP), R1
ADD $64, R1, R0
CALL eor_salsa8(SB)

MOVD 8(RSP), R1
ADD $128, R1, R1
MOVD V+8(FP), R2
ADD $0x20000, R2, R2
CMP R1, R2
BNE loop1

MOVD $0, R1

loop2:
MOVD R1, 8(RSP)
MOVD X+0(FP), R16
MOVD V+8(FP), R17
MOVWU 64(R16), R0
AND $1023, R0, R0
ADD R0<<7, R17, R17

BLK(EORP, 16, 0)

MOVD R16, R0
ADD $64, R0, R1
CALL eor_salsa8(SB)
MOVD X+0(FP), R1
ADD $64, R1, R0
CALL eor_salsa8(SB)

MOVD 8(RSP), R1
ADD $1, R1, R1
CMP $1024, R1
BNE loop2

RET

TEXT eor_salsa8(SB), NOSPLIT, $0
MOVD R0, R16
MOVD R1, R17

BLK(EORPW, 8, 0)

MOVD $0, R17

loop:
QUARTRND(R0, R12, R4, R5, R1, R9, R10, R6, R14, R15, R11, R3, 25)
QUARTRND(R4, R0, R8, R9, R5, R13, R14, R10, R2, R3, R15, R7, 23)
QUARTRND(R8, R4, R12, R13, R9, R1, R2, R14, R6, R7, R3, R11, 19)
QUARTRND(R12, R8, R0, R1, R13, R5, R6, R2, R10, R11, R7, R15, 14)
QUARTRND(R0, R3, R1, R5, R4, R6, R10, R9, R11, R15, R14, R12, 25)
QUARTRND(R1, R0, R2, R6, R5, R7, R11, R10, R8, R12, R15, R13, 23)
QUARTRND(R2, R1, R3, R7, R6, R4, R8, R11, R9, R13, R12, R14, 19)
QUARTRND(R3, R2, R0, R4, R7, R5, R9, R8, R10, R14, R13, R15, 14)

ADD $1, R17, R17
CMP $4, R17
BNE loop

BLK(ADDPW, 8, 0)

RET

0 comments on commit 62ee959

Please sign in to comment.