|
| 1 | +/* |
| 2 | + * SHA-256 hash in AArch64 assembly for macos/M1 |
| 3 | + * |
| 4 | + * Based on the following C intrinsics implementation: |
| 5 | + * <https://github.com/noloader/SHA-Intrinsics/blob/master/sha256-arm.c> |
| 6 | + * |
| 7 | + * Original C written and placed in public domain by Jeffrey Walton. |
| 8 | + * Based on code from ARM, and by Johannes Schneiders, Skip Hovsmith and |
| 9 | + * Barry O'Rourke for the mbedTLS project. |
| 10 | + */ |
| 11 | + |
| 12 | + |
| 13 | +/* void sha256_compress(uint32_t state[8], const uint8_t block[64]) */ |
| 14 | +.global _sha256_compress |
| 15 | +_sha256_compress: |
| 16 | + mov x8, #0 |
| 17 | + ldp q0, q1, [x0] |
| 18 | + ldp q2, q3, [x1] |
| 19 | + ldp q4, q5, [x1, #32] |
| 20 | + stp q2, q3, [sp, #-64]! |
| 21 | + stp q4, q5, [sp, #32] |
| 22 | + mov x9, sp |
| 23 | +LBB0_1: |
| 24 | + ldr q2, [x9, x8] |
| 25 | + rev32.16b v2, v2 |
| 26 | + str q2, [x9, x8] |
| 27 | + add x8, x8, #16 |
| 28 | + cmp x8, #64 |
| 29 | + b.ne LBB0_1 |
| 30 | + adrp x8, K_0@PAGE |
| 31 | + ldr q2, [x8, K_0@PAGEOFF] |
| 32 | + ldp q6, q4, [sp] |
| 33 | + add.4s v3, v6, v2 |
| 34 | + |
| 35 | + // Rounds 0-3 |
| 36 | + sha256su0.4s v6, v4 |
| 37 | + adrp x8, K_1@PAGE |
| 38 | + ldr q2, [x8, K_1@PAGEOFF] |
| 39 | + add.4s v7, v4, v2 |
| 40 | + mov.16b v16, v0 |
| 41 | + sha256h.4s q16, q1, v3 |
| 42 | + mov.16b v2, v1 |
| 43 | + sha256h2.4s q2, q0, v3 |
| 44 | + ldp q5, q3, [sp, #32] |
| 45 | + sha256su1.4s v6, v5, v3 |
| 46 | + |
| 47 | + // Rounds 4-7 |
| 48 | + sha256su0.4s v4, v5 |
| 49 | + adrp x8, K_2@PAGE |
| 50 | + ldr q17, [x8, K_2@PAGEOFF] |
| 51 | + add.4s v17, v5, v17 |
| 52 | + mov.16b v18, v16 |
| 53 | + sha256h.4s q18, q2, v7 |
| 54 | + sha256h2.4s q2, q16, v7 |
| 55 | + sha256su1.4s v4, v3, v6 |
| 56 | + |
| 57 | + // Rounds 8-11 |
| 58 | + sha256su0.4s v5, v3 |
| 59 | + adrp x8, K_3@PAGE |
| 60 | + ldr q7, [x8, K_3@PAGEOFF] |
| 61 | + add.4s v7, v3, v7 |
| 62 | + mov.16b v16, v18 |
| 63 | + sha256h.4s q16, q2, v17 |
| 64 | + sha256h2.4s q2, q18, v17 |
| 65 | + sha256su1.4s v5, v6, v4 |
| 66 | + |
| 67 | + // Rounds 12-15 |
| 68 | + sha256su0.4s v3, v6 |
| 69 | + adrp x8, K_4@PAGE |
| 70 | + ldr q17, [x8, K_4@PAGEOFF] |
| 71 | + add.4s v17, v6, v17 |
| 72 | + mov.16b v18, v16 |
| 73 | + sha256h.4s q18, q2, v7 |
| 74 | + sha256h2.4s q2, q16, v7 |
| 75 | + sha256su1.4s v3, v4, v5 |
| 76 | + |
| 77 | + // Rounds 16-19 |
| 78 | + sha256su0.4s v6, v4 |
| 79 | + adrp x8, K_5@PAGE |
| 80 | + ldr q7, [x8, K_5@PAGEOFF] |
| 81 | + add.4s v7, v4, v7 |
| 82 | + mov.16b v16, v18 |
| 83 | + sha256h.4s q16, q2, v17 |
| 84 | + sha256h2.4s q2, q18, v17 |
| 85 | + sha256su1.4s v6, v5, v3 |
| 86 | + |
| 87 | + // Rounds 20-23 |
| 88 | + sha256su0.4s v4, v5 |
| 89 | + adrp x8, K_6@PAGE |
| 90 | + ldr q17, [x8, K_6@PAGEOFF] |
| 91 | + add.4s v17, v5, v17 |
| 92 | + mov.16b v18, v16 |
| 93 | + sha256h.4s q18, q2, v7 |
| 94 | + sha256h2.4s q2, q16, v7 |
| 95 | + sha256su1.4s v4, v3, v6 |
| 96 | + |
| 97 | + // Rounds 24-27 |
| 98 | + sha256su0.4s v5, v3 |
| 99 | + adrp x8, K_7@PAGE |
| 100 | + ldr q7, [x8, K_7@PAGEOFF] |
| 101 | + add.4s v7, v3, v7 |
| 102 | + mov.16b v16, v18 |
| 103 | + sha256h.4s q16, q2, v17 |
| 104 | + sha256h2.4s q2, q18, v17 |
| 105 | + sha256su1.4s v5, v6, v4 |
| 106 | + |
| 107 | + // Rounds 28-31 |
| 108 | + sha256su0.4s v3, v6 |
| 109 | + adrp x8, K_8@PAGE |
| 110 | + ldr q17, [x8, K_8@PAGEOFF] |
| 111 | + add.4s v17, v6, v17 |
| 112 | + mov.16b v18, v16 |
| 113 | + sha256h.4s q18, q2, v7 |
| 114 | + sha256h2.4s q2, q16, v7 |
| 115 | + sha256su1.4s v3, v4, v5 |
| 116 | + |
| 117 | + // Rounds 32-35 |
| 118 | + sha256su0.4s v6, v4 |
| 119 | + adrp x8, K_9@PAGE |
| 120 | + ldr q7, [x8, K_9@PAGEOFF] |
| 121 | + add.4s v7, v4, v7 |
| 122 | + mov.16b v16, v18 |
| 123 | + sha256h.4s q16, q2, v17 |
| 124 | + sha256h2.4s q2, q18, v17 |
| 125 | + sha256su1.4s v6, v5, v3 |
| 126 | + |
| 127 | + // Rounds 36-39 |
| 128 | + sha256su0.4s v4, v5 |
| 129 | + adrp x8, K_10@PAGE |
| 130 | + ldr q17, [x8, K_10@PAGEOFF] |
| 131 | + add.4s v17, v5, v17 |
| 132 | + mov.16b v18, v16 |
| 133 | + sha256h.4s q18, q2, v7 |
| 134 | + sha256h2.4s q2, q16, v7 |
| 135 | + sha256su1.4s v4, v3, v6 |
| 136 | + |
| 137 | + // Rounds 40-43 |
| 138 | + sha256su0.4s v5, v3 |
| 139 | + adrp x8, K_11@PAGE |
| 140 | + ldr q7, [x8, K_11@PAGEOFF] |
| 141 | + add.4s v7, v3, v7 |
| 142 | + mov.16b v16, v18 |
| 143 | + sha256h.4s q16, q2, v17 |
| 144 | + sha256h2.4s q2, q18, v17 |
| 145 | + sha256su1.4s v5, v6, v4 |
| 146 | + |
| 147 | + // Rounds 44-47 |
| 148 | + sha256su0.4s v3, v6 |
| 149 | + adrp x8, K_12@PAGE |
| 150 | + ldr q17, [x8, K_12@PAGEOFF] |
| 151 | + add.4s v6, v6, v17 |
| 152 | + mov.16b v17, v16 |
| 153 | + sha256h.4s q17, q2, v7 |
| 154 | + sha256h2.4s q2, q16, v7 |
| 155 | + sha256su1.4s v3, v4, v5 |
| 156 | + |
| 157 | + // Rounds 48-51 |
| 158 | + adrp x8, K_13@PAGE |
| 159 | + ldr q7, [x8, K_13@PAGEOFF] |
| 160 | + add.4s v4, v4, v7 |
| 161 | + mov.16b v7, v17 |
| 162 | + sha256h.4s q7, q2, v6 |
| 163 | + sha256h2.4s q2, q17, v6 |
| 164 | + |
| 165 | + // Rounds 52-55 |
| 166 | + adrp x8, K_14@PAGE |
| 167 | + ldr q6, [x8, K_14@PAGEOFF] |
| 168 | + add.4s v5, v5, v6 |
| 169 | + mov.16b v6, v7 |
| 170 | + sha256h.4s q6, q2, v4 |
| 171 | + sha256h2.4s q2, q7, v4 |
| 172 | + |
| 173 | + // Rounds 56-59 |
| 174 | + adrp x8, K_15@PAGE |
| 175 | + ldr q4, [x8, K_15@PAGEOFF] |
| 176 | + add.4s v3, v3, v4 |
| 177 | + mov.16b v4, v6 |
| 178 | + sha256h.4s q4, q2, v5 |
| 179 | + sha256h2.4s q2, q6, v5 |
| 180 | + |
| 181 | + // Rounds 60-63 |
| 182 | + mov.16b v5, v4 |
| 183 | + sha256h.4s q5, q2, v3 |
| 184 | + sha256h2.4s q2, q4, v3 |
| 185 | + |
| 186 | + // Update state |
| 187 | + add.4s v0, v5, v0 |
| 188 | + add.4s v1, v2, v1 |
| 189 | + |
| 190 | + // restore |
| 191 | + stp q0, q1, [x0] |
| 192 | + add sp, sp, #64 |
| 193 | + |
| 194 | + ret |
| 195 | + |
| 196 | + |
| 197 | +.align 4 |
| 198 | +K_0: |
| 199 | + .long 1116352408 |
| 200 | + .long 1899447441 |
| 201 | + .long 3049323471 |
| 202 | + .long 3921009573 |
| 203 | +.align 4 |
| 204 | +K_1: |
| 205 | + .long 961987163 |
| 206 | + .long 1508970993 |
| 207 | + .long 2453635748 |
| 208 | + .long 2870763221 |
| 209 | +.align 4 |
| 210 | +K_2: |
| 211 | + .long 3624381080 |
| 212 | + .long 310598401 |
| 213 | + .long 607225278 |
| 214 | + .long 1426881987 |
| 215 | +.align 4 |
| 216 | +K_3: |
| 217 | + .long 1925078388 |
| 218 | + .long 2162078206 |
| 219 | + .long 2614888103 |
| 220 | + .long 3248222580 |
| 221 | +.align 4 |
| 222 | +K_4: |
| 223 | + .long 3835390401 |
| 224 | + .long 4022224774 |
| 225 | + .long 264347078 |
| 226 | + .long 604807628 |
| 227 | +.align 4 |
| 228 | +K_5: |
| 229 | + .long 770255983 |
| 230 | + .long 1249150122 |
| 231 | + .long 1555081692 |
| 232 | + .long 1996064986 |
| 233 | +.align 4 |
| 234 | +K_6: |
| 235 | + .long 2554220882 |
| 236 | + .long 2821834349 |
| 237 | + .long 2952996808 |
| 238 | + .long 3210313671 |
| 239 | +.align 4 |
| 240 | +K_7: |
| 241 | + .long 3336571891 |
| 242 | + .long 3584528711 |
| 243 | + .long 113926993 |
| 244 | + .long 338241895 |
| 245 | +.align 4 |
| 246 | +K_8: |
| 247 | + .long 666307205 |
| 248 | + .long 773529912 |
| 249 | + .long 1294757372 |
| 250 | + .long 1396182291 |
| 251 | +.align 4 |
| 252 | +K_9: |
| 253 | + .long 1695183700 |
| 254 | + .long 1986661051 |
| 255 | + .long 2177026350 |
| 256 | + .long 2456956037 |
| 257 | +.align 4 |
| 258 | +K_10: |
| 259 | + .long 2730485921 |
| 260 | + .long 2820302411 |
| 261 | + .long 3259730800 |
| 262 | + .long 3345764771 |
| 263 | +.align 4 |
| 264 | +K_11: |
| 265 | + .long 3516065817 |
| 266 | + .long 3600352804 |
| 267 | + .long 4094571909 |
| 268 | + .long 275423344 |
| 269 | +.align 4 |
| 270 | +K_12: |
| 271 | + .long 430227734 |
| 272 | + .long 506948616 |
| 273 | + .long 659060556 |
| 274 | + .long 883997877 |
| 275 | +.align 4 |
| 276 | +K_13: |
| 277 | + .long 958139571 |
| 278 | + .long 1322822218 |
| 279 | + .long 1537002063 |
| 280 | + .long 1747873779 |
| 281 | +.align 4 |
| 282 | +K_14: |
| 283 | + .long 1955562222 |
| 284 | + .long 2024104815 |
| 285 | + .long 2227730452 |
| 286 | + .long 2361852424 |
| 287 | +.align 4 |
| 288 | +K_15: |
| 289 | + .long 2428436474 |
| 290 | + .long 2756734187 |
| 291 | + .long 3204031479 |
| 292 | + .long 3329325298 |
0 commit comments