Skip to content

Commit 5657d94

Browse files
feat(sha2): add aarch64 implementation for sha256 for the M1 chip (#35)
1 parent 2c998b2 commit 5657d94

File tree

2 files changed

+300
-1
lines changed

2 files changed

+300
-1
lines changed

sha2/build.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,24 @@
11
fn main() {
2-
let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default();
2+
use std::env;
3+
4+
let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default();
5+
let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap_or_default();
36

47
let mut build256 = cc::Build::new();
58
let (sha256_path, sha512_path) = if target_arch == "x86" {
69
("src/sha256_x86.S", "src/sha512_x86.S")
710
} else if target_arch == "x86_64" {
811
("src/sha256_x64.S", "src/sha512_x64.S")
12+
} else if target_arch == "aarch64" && target_os == "macos" {
13+
build256.flag("-march=armv8-a+crypto");
14+
("src/sha256_aarch64_macos.S", "")
915
} else if target_arch == "aarch64" {
1016
build256.flag("-march=armv8-a+crypto");
1117
("src/sha256_aarch64.S", "")
1218
} else {
1319
panic!("Unsupported target architecture");
1420
};
21+
1522
if target_arch != "aarch64" {
1623
cc::Build::new()
1724
.flag("-c")

sha2/src/sha256_aarch64_macos.S

Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
/*
2+
* SHA-256 hash in AArch64 assembly for macos/M1
3+
*
4+
* Based on the following C intrinsics implementation:
5+
* <https://github.com/noloader/SHA-Intrinsics/blob/master/sha256-arm.c>
6+
*
7+
* Original C written and placed in public domain by Jeffrey Walton.
8+
* Based on code from ARM, and by Johannes Schneiders, Skip Hovsmith and
9+
* Barry O'Rourke for the mbedTLS project.
10+
*/
11+
12+
13+
/* void sha256_compress(uint32_t state[8], const uint8_t block[64]) */
14+
.global _sha256_compress
15+
_sha256_compress:
16+
mov x8, #0
17+
ldp q0, q1, [x0]
18+
ldp q2, q3, [x1]
19+
ldp q4, q5, [x1, #32]
20+
stp q2, q3, [sp, #-64]!
21+
stp q4, q5, [sp, #32]
22+
mov x9, sp
23+
LBB0_1:
24+
ldr q2, [x9, x8]
25+
rev32.16b v2, v2
26+
str q2, [x9, x8]
27+
add x8, x8, #16
28+
cmp x8, #64
29+
b.ne LBB0_1
30+
adrp x8, K_0@PAGE
31+
ldr q2, [x8, K_0@PAGEOFF]
32+
ldp q6, q4, [sp]
33+
add.4s v3, v6, v2
34+
35+
// Rounds 0-3
36+
sha256su0.4s v6, v4
37+
adrp x8, K_1@PAGE
38+
ldr q2, [x8, K_1@PAGEOFF]
39+
add.4s v7, v4, v2
40+
mov.16b v16, v0
41+
sha256h.4s q16, q1, v3
42+
mov.16b v2, v1
43+
sha256h2.4s q2, q0, v3
44+
ldp q5, q3, [sp, #32]
45+
sha256su1.4s v6, v5, v3
46+
47+
// Rounds 4-7
48+
sha256su0.4s v4, v5
49+
adrp x8, K_2@PAGE
50+
ldr q17, [x8, K_2@PAGEOFF]
51+
add.4s v17, v5, v17
52+
mov.16b v18, v16
53+
sha256h.4s q18, q2, v7
54+
sha256h2.4s q2, q16, v7
55+
sha256su1.4s v4, v3, v6
56+
57+
// Rounds 8-11
58+
sha256su0.4s v5, v3
59+
adrp x8, K_3@PAGE
60+
ldr q7, [x8, K_3@PAGEOFF]
61+
add.4s v7, v3, v7
62+
mov.16b v16, v18
63+
sha256h.4s q16, q2, v17
64+
sha256h2.4s q2, q18, v17
65+
sha256su1.4s v5, v6, v4
66+
67+
// Rounds 12-15
68+
sha256su0.4s v3, v6
69+
adrp x8, K_4@PAGE
70+
ldr q17, [x8, K_4@PAGEOFF]
71+
add.4s v17, v6, v17
72+
mov.16b v18, v16
73+
sha256h.4s q18, q2, v7
74+
sha256h2.4s q2, q16, v7
75+
sha256su1.4s v3, v4, v5
76+
77+
// Rounds 16-19
78+
sha256su0.4s v6, v4
79+
adrp x8, K_5@PAGE
80+
ldr q7, [x8, K_5@PAGEOFF]
81+
add.4s v7, v4, v7
82+
mov.16b v16, v18
83+
sha256h.4s q16, q2, v17
84+
sha256h2.4s q2, q18, v17
85+
sha256su1.4s v6, v5, v3
86+
87+
// Rounds 20-23
88+
sha256su0.4s v4, v5
89+
adrp x8, K_6@PAGE
90+
ldr q17, [x8, K_6@PAGEOFF]
91+
add.4s v17, v5, v17
92+
mov.16b v18, v16
93+
sha256h.4s q18, q2, v7
94+
sha256h2.4s q2, q16, v7
95+
sha256su1.4s v4, v3, v6
96+
97+
// Rounds 24-27
98+
sha256su0.4s v5, v3
99+
adrp x8, K_7@PAGE
100+
ldr q7, [x8, K_7@PAGEOFF]
101+
add.4s v7, v3, v7
102+
mov.16b v16, v18
103+
sha256h.4s q16, q2, v17
104+
sha256h2.4s q2, q18, v17
105+
sha256su1.4s v5, v6, v4
106+
107+
// Rounds 28-31
108+
sha256su0.4s v3, v6
109+
adrp x8, K_8@PAGE
110+
ldr q17, [x8, K_8@PAGEOFF]
111+
add.4s v17, v6, v17
112+
mov.16b v18, v16
113+
sha256h.4s q18, q2, v7
114+
sha256h2.4s q2, q16, v7
115+
sha256su1.4s v3, v4, v5
116+
117+
// Rounds 32-35
118+
sha256su0.4s v6, v4
119+
adrp x8, K_9@PAGE
120+
ldr q7, [x8, K_9@PAGEOFF]
121+
add.4s v7, v4, v7
122+
mov.16b v16, v18
123+
sha256h.4s q16, q2, v17
124+
sha256h2.4s q2, q18, v17
125+
sha256su1.4s v6, v5, v3
126+
127+
// Rounds 36-39
128+
sha256su0.4s v4, v5
129+
adrp x8, K_10@PAGE
130+
ldr q17, [x8, K_10@PAGEOFF]
131+
add.4s v17, v5, v17
132+
mov.16b v18, v16
133+
sha256h.4s q18, q2, v7
134+
sha256h2.4s q2, q16, v7
135+
sha256su1.4s v4, v3, v6
136+
137+
// Rounds 40-43
138+
sha256su0.4s v5, v3
139+
adrp x8, K_11@PAGE
140+
ldr q7, [x8, K_11@PAGEOFF]
141+
add.4s v7, v3, v7
142+
mov.16b v16, v18
143+
sha256h.4s q16, q2, v17
144+
sha256h2.4s q2, q18, v17
145+
sha256su1.4s v5, v6, v4
146+
147+
// Rounds 44-47
148+
sha256su0.4s v3, v6
149+
adrp x8, K_12@PAGE
150+
ldr q17, [x8, K_12@PAGEOFF]
151+
add.4s v6, v6, v17
152+
mov.16b v17, v16
153+
sha256h.4s q17, q2, v7
154+
sha256h2.4s q2, q16, v7
155+
sha256su1.4s v3, v4, v5
156+
157+
// Rounds 48-51
158+
adrp x8, K_13@PAGE
159+
ldr q7, [x8, K_13@PAGEOFF]
160+
add.4s v4, v4, v7
161+
mov.16b v7, v17
162+
sha256h.4s q7, q2, v6
163+
sha256h2.4s q2, q17, v6
164+
165+
// Rounds 52-55
166+
adrp x8, K_14@PAGE
167+
ldr q6, [x8, K_14@PAGEOFF]
168+
add.4s v5, v5, v6
169+
mov.16b v6, v7
170+
sha256h.4s q6, q2, v4
171+
sha256h2.4s q2, q7, v4
172+
173+
// Rounds 56-59
174+
adrp x8, K_15@PAGE
175+
ldr q4, [x8, K_15@PAGEOFF]
176+
add.4s v3, v3, v4
177+
mov.16b v4, v6
178+
sha256h.4s q4, q2, v5
179+
sha256h2.4s q2, q6, v5
180+
181+
// Rounds 60-63
182+
mov.16b v5, v4
183+
sha256h.4s q5, q2, v3
184+
sha256h2.4s q2, q4, v3
185+
186+
// Update state
187+
add.4s v0, v5, v0
188+
add.4s v1, v2, v1
189+
190+
// restore
191+
stp q0, q1, [x0]
192+
add sp, sp, #64
193+
194+
ret
195+
196+
197+
.align 4
198+
K_0:
199+
.long 1116352408
200+
.long 1899447441
201+
.long 3049323471
202+
.long 3921009573
203+
.align 4
204+
K_1:
205+
.long 961987163
206+
.long 1508970993
207+
.long 2453635748
208+
.long 2870763221
209+
.align 4
210+
K_2:
211+
.long 3624381080
212+
.long 310598401
213+
.long 607225278
214+
.long 1426881987
215+
.align 4
216+
K_3:
217+
.long 1925078388
218+
.long 2162078206
219+
.long 2614888103
220+
.long 3248222580
221+
.align 4
222+
K_4:
223+
.long 3835390401
224+
.long 4022224774
225+
.long 264347078
226+
.long 604807628
227+
.align 4
228+
K_5:
229+
.long 770255983
230+
.long 1249150122
231+
.long 1555081692
232+
.long 1996064986
233+
.align 4
234+
K_6:
235+
.long 2554220882
236+
.long 2821834349
237+
.long 2952996808
238+
.long 3210313671
239+
.align 4
240+
K_7:
241+
.long 3336571891
242+
.long 3584528711
243+
.long 113926993
244+
.long 338241895
245+
.align 4
246+
K_8:
247+
.long 666307205
248+
.long 773529912
249+
.long 1294757372
250+
.long 1396182291
251+
.align 4
252+
K_9:
253+
.long 1695183700
254+
.long 1986661051
255+
.long 2177026350
256+
.long 2456956037
257+
.align 4
258+
K_10:
259+
.long 2730485921
260+
.long 2820302411
261+
.long 3259730800
262+
.long 3345764771
263+
.align 4
264+
K_11:
265+
.long 3516065817
266+
.long 3600352804
267+
.long 4094571909
268+
.long 275423344
269+
.align 4
270+
K_12:
271+
.long 430227734
272+
.long 506948616
273+
.long 659060556
274+
.long 883997877
275+
.align 4
276+
K_13:
277+
.long 958139571
278+
.long 1322822218
279+
.long 1537002063
280+
.long 1747873779
281+
.align 4
282+
K_14:
283+
.long 1955562222
284+
.long 2024104815
285+
.long 2227730452
286+
.long 2361852424
287+
.align 4
288+
K_15:
289+
.long 2428436474
290+
.long 2756734187
291+
.long 3204031479
292+
.long 3329325298

0 commit comments

Comments
 (0)