diff --git a/sha2/build.rs b/sha2/build.rs index 820113e..8acb64a 100644 --- a/sha2/build.rs +++ b/sha2/build.rs @@ -1,17 +1,24 @@ fn main() { - let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); + use std::env; + + let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); + let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap_or_default(); let mut build256 = cc::Build::new(); let (sha256_path, sha512_path) = if target_arch == "x86" { ("src/sha256_x86.S", "src/sha512_x86.S") } else if target_arch == "x86_64" { ("src/sha256_x64.S", "src/sha512_x64.S") + } else if target_arch == "aarch64" && target_os == "macos" { + build256.flag("-march=armv8-a+crypto"); + ("src/sha256_aarch64_macos.S", "") } else if target_arch == "aarch64" { build256.flag("-march=armv8-a+crypto"); ("src/sha256_aarch64.S", "") } else { panic!("Unsupported target architecture"); }; + if target_arch != "aarch64" { cc::Build::new() .flag("-c") diff --git a/sha2/src/sha256_aarch64_macos.S b/sha2/src/sha256_aarch64_macos.S new file mode 100644 index 0000000..a6d0f2d --- /dev/null +++ b/sha2/src/sha256_aarch64_macos.S @@ -0,0 +1,292 @@ +/* + * SHA-256 hash in AArch64 assembly for macos/M1 + * + * Based on the following C intrinsics implementation: + * + * + * Original C written and placed in public domain by Jeffrey Walton. + * Based on code from ARM, and by Johannes Schneiders, Skip Hovsmith and + * Barry O'Rourke for the mbedTLS project. + */ + + +/* void sha256_compress(uint32_t state[8], const uint8_t block[64]) */ +.global _sha256_compress +_sha256_compress: + mov x8, #0 + ldp q0, q1, [x0] + ldp q2, q3, [x1] + ldp q4, q5, [x1, #32] + stp q2, q3, [sp, #-64]! + stp q4, q5, [sp, #32] + mov x9, sp +LBB0_1: + ldr q2, [x9, x8] + rev32.16b v2, v2 + str q2, [x9, x8] + add x8, x8, #16 + cmp x8, #64 + b.ne LBB0_1 + adrp x8, K_0@PAGE + ldr q2, [x8, K_0@PAGEOFF] + ldp q6, q4, [sp] + add.4s v3, v6, v2 + + // Rounds 0-3 + sha256su0.4s v6, v4 + adrp x8, K_1@PAGE + ldr q2, [x8, K_1@PAGEOFF] + add.4s v7, v4, v2 + mov.16b v16, v0 + sha256h.4s q16, q1, v3 + mov.16b v2, v1 + sha256h2.4s q2, q0, v3 + ldp q5, q3, [sp, #32] + sha256su1.4s v6, v5, v3 + + // Rounds 4-7 + sha256su0.4s v4, v5 + adrp x8, K_2@PAGE + ldr q17, [x8, K_2@PAGEOFF] + add.4s v17, v5, v17 + mov.16b v18, v16 + sha256h.4s q18, q2, v7 + sha256h2.4s q2, q16, v7 + sha256su1.4s v4, v3, v6 + + // Rounds 8-11 + sha256su0.4s v5, v3 + adrp x8, K_3@PAGE + ldr q7, [x8, K_3@PAGEOFF] + add.4s v7, v3, v7 + mov.16b v16, v18 + sha256h.4s q16, q2, v17 + sha256h2.4s q2, q18, v17 + sha256su1.4s v5, v6, v4 + + // Rounds 12-15 + sha256su0.4s v3, v6 + adrp x8, K_4@PAGE + ldr q17, [x8, K_4@PAGEOFF] + add.4s v17, v6, v17 + mov.16b v18, v16 + sha256h.4s q18, q2, v7 + sha256h2.4s q2, q16, v7 + sha256su1.4s v3, v4, v5 + + // Rounds 16-19 + sha256su0.4s v6, v4 + adrp x8, K_5@PAGE + ldr q7, [x8, K_5@PAGEOFF] + add.4s v7, v4, v7 + mov.16b v16, v18 + sha256h.4s q16, q2, v17 + sha256h2.4s q2, q18, v17 + sha256su1.4s v6, v5, v3 + + // Rounds 20-23 + sha256su0.4s v4, v5 + adrp x8, K_6@PAGE + ldr q17, [x8, K_6@PAGEOFF] + add.4s v17, v5, v17 + mov.16b v18, v16 + sha256h.4s q18, q2, v7 + sha256h2.4s q2, q16, v7 + sha256su1.4s v4, v3, v6 + + // Rounds 24-27 + sha256su0.4s v5, v3 + adrp x8, K_7@PAGE + ldr q7, [x8, K_7@PAGEOFF] + add.4s v7, v3, v7 + mov.16b v16, v18 + sha256h.4s q16, q2, v17 + sha256h2.4s q2, q18, v17 + sha256su1.4s v5, v6, v4 + + // Rounds 28-31 + sha256su0.4s v3, v6 + adrp x8, K_8@PAGE + ldr q17, [x8, K_8@PAGEOFF] + add.4s v17, v6, v17 + mov.16b v18, v16 + sha256h.4s q18, q2, v7 + sha256h2.4s q2, q16, v7 + sha256su1.4s v3, v4, v5 + + // Rounds 32-35 + sha256su0.4s v6, v4 + adrp x8, K_9@PAGE + ldr q7, [x8, K_9@PAGEOFF] + add.4s v7, v4, v7 + mov.16b v16, v18 + sha256h.4s q16, q2, v17 + sha256h2.4s q2, q18, v17 + sha256su1.4s v6, v5, v3 + + // Rounds 36-39 + sha256su0.4s v4, v5 + adrp x8, K_10@PAGE + ldr q17, [x8, K_10@PAGEOFF] + add.4s v17, v5, v17 + mov.16b v18, v16 + sha256h.4s q18, q2, v7 + sha256h2.4s q2, q16, v7 + sha256su1.4s v4, v3, v6 + + // Rounds 40-43 + sha256su0.4s v5, v3 + adrp x8, K_11@PAGE + ldr q7, [x8, K_11@PAGEOFF] + add.4s v7, v3, v7 + mov.16b v16, v18 + sha256h.4s q16, q2, v17 + sha256h2.4s q2, q18, v17 + sha256su1.4s v5, v6, v4 + + // Rounds 44-47 + sha256su0.4s v3, v6 + adrp x8, K_12@PAGE + ldr q17, [x8, K_12@PAGEOFF] + add.4s v6, v6, v17 + mov.16b v17, v16 + sha256h.4s q17, q2, v7 + sha256h2.4s q2, q16, v7 + sha256su1.4s v3, v4, v5 + + // Rounds 48-51 + adrp x8, K_13@PAGE + ldr q7, [x8, K_13@PAGEOFF] + add.4s v4, v4, v7 + mov.16b v7, v17 + sha256h.4s q7, q2, v6 + sha256h2.4s q2, q17, v6 + + // Rounds 52-55 + adrp x8, K_14@PAGE + ldr q6, [x8, K_14@PAGEOFF] + add.4s v5, v5, v6 + mov.16b v6, v7 + sha256h.4s q6, q2, v4 + sha256h2.4s q2, q7, v4 + + // Rounds 56-59 + adrp x8, K_15@PAGE + ldr q4, [x8, K_15@PAGEOFF] + add.4s v3, v3, v4 + mov.16b v4, v6 + sha256h.4s q4, q2, v5 + sha256h2.4s q2, q6, v5 + + // Rounds 60-63 + mov.16b v5, v4 + sha256h.4s q5, q2, v3 + sha256h2.4s q2, q4, v3 + + // Update state + add.4s v0, v5, v0 + add.4s v1, v2, v1 + + // restore + stp q0, q1, [x0] + add sp, sp, #64 + + ret + + +.align 4 +K_0: + .long 1116352408 + .long 1899447441 + .long 3049323471 + .long 3921009573 +.align 4 +K_1: + .long 961987163 + .long 1508970993 + .long 2453635748 + .long 2870763221 +.align 4 +K_2: + .long 3624381080 + .long 310598401 + .long 607225278 + .long 1426881987 +.align 4 +K_3: + .long 1925078388 + .long 2162078206 + .long 2614888103 + .long 3248222580 +.align 4 +K_4: + .long 3835390401 + .long 4022224774 + .long 264347078 + .long 604807628 +.align 4 +K_5: + .long 770255983 + .long 1249150122 + .long 1555081692 + .long 1996064986 +.align 4 +K_6: + .long 2554220882 + .long 2821834349 + .long 2952996808 + .long 3210313671 +.align 4 +K_7: + .long 3336571891 + .long 3584528711 + .long 113926993 + .long 338241895 +.align 4 +K_8: + .long 666307205 + .long 773529912 + .long 1294757372 + .long 1396182291 +.align 4 +K_9: + .long 1695183700 + .long 1986661051 + .long 2177026350 + .long 2456956037 +.align 4 +K_10: + .long 2730485921 + .long 2820302411 + .long 3259730800 + .long 3345764771 +.align 4 +K_11: + .long 3516065817 + .long 3600352804 + .long 4094571909 + .long 275423344 +.align 4 +K_12: + .long 430227734 + .long 506948616 + .long 659060556 + .long 883997877 +.align 4 +K_13: + .long 958139571 + .long 1322822218 + .long 1537002063 + .long 1747873779 +.align 4 +K_14: + .long 1955562222 + .long 2024104815 + .long 2227730452 + .long 2361852424 +.align 4 +K_15: + .long 2428436474 + .long 2756734187 + .long 3204031479 + .long 3329325298