diff --git a/README.md b/README.md index 75223f2..6cd4a4d 100644 --- a/README.md +++ b/README.md @@ -21,11 +21,9 @@ For more information, see [#45]. All crates are tested on the following platforms: - Linux (32-bit and 64-bit x86) -- Windows (64-bit x86, GNU only) +- Windows (64-bit x86) - ARM64 (except `md5`, which is x86 only) -Windows MSVC builds are known to be broken. See [#17]. - ## Minimum Supported Rust Version All crates in this repository support **Rust 1.43** or higher. diff --git a/md5/build.rs b/md5/build.rs index b6376d0..e80d976 100644 --- a/md5/build.rs +++ b/md5/build.rs @@ -1,10 +1,13 @@ fn main() { let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); + let target_family = std::env::var("CARGO_CFG_TARGET_FAMILY").unwrap_or_default(); let asm_path = if target_arch == "x86" { "src/x86.S" - } else if target_arch == "x86_64" { + } else if target_arch == "x86_64" && target_family == "unix" { "src/x64.S" + } else if target_arch == "x86_64" && target_family == "windows" { + "src/x64_masm.asm" } else { panic!("Unsupported target architecture"); }; diff --git a/md5/src/x64_masm.asm b/md5/src/x64_masm.asm new file mode 100644 index 0000000..9d41988 --- /dev/null +++ b/md5/src/x64_masm.asm @@ -0,0 +1,160 @@ +; +; MD5 hash in x64 MASM +; +; Copyright (c) 2023 Chong Yeol Nah (MIT License) +; +; Permission is hereby granted, free of charge, to any person obtaining a copy of +; this software and associated documentation files (the "Software"), to deal in +; the Software without restriction, including without limitation the rights to +; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +; the Software, and to permit persons to whom the Software is furnished to do so, +; subject to the following conditions: +; - The above copyright notice and this permission notice shall be included in +; all copies or substantial portions of the Software. +; - The Software is provided "as is", without warranty of any kind, express or +; implied, including but not limited to the warranties of merchantability, +; fitness for a particular purpose and noninfringement. In no event shall the +; authors or copyright holders be liable for any claim, damages or other +; liability, whether in an action of contract, tort or otherwise, arising from, +; out of or in connection with the Software or the use or other dealings in the +; Software. +; +; +; Storage usage: +; Bytes Location Volatile Description +; 4 eax yes Temporary w-bit word used in the hash computation +; 8 rcx yes Base address of message block array argument (read-only) +; 8 rdx yes Base address of hash value array argument (read-only) +; 4 r8d yes MD5 working variable A +; 4 r9d yes MD5 working variable B +; 4 r10d yes MD5 working variable C +; 4 r11d yes MD5 working variable D + + option casemap:none + + .const +ROUND macro i, a, b, c, d, k, s, t + +if i LT 16 + + ; eax = F(b,c,d) = (b & c) | (!b & d) = d ^ (b & (c ^ d)) + mov eax, c + xor eax, d + and eax, b + xor eax, d + +elseif i LT 32 + + ; eax = G(b,c,d) = (b & d) | (c & !d) = c ^ (d & (b ^ c)) + mov eax, c + xor eax, b + and eax, d + xor eax, c + +elseif i LT 48 + + ; eax = H(b,c,d) = b ^ c ^ d + mov eax, c + xor eax, d + xor eax, b + +else + + ; eax = I(b,c,d) = c ^ (b | !d) + mov eax, d + not eax + or eax, b + xor eax, c + +endif + + lea a, [eax + a + t] + add a, [rcx + k*4] + rol a, s + add a, b + endm + + .code + ; void md5_compress(const uint8_t block[64], uint32_t state[4]) + public md5_compress +md5_compress proc + ; Initialize working variables with previous hash value + mov r8d, [rdx] ; a + mov r9d, [rdx + 4] ; b + mov r10d, [rdx + 8] ; c + mov r11d, [rdx + 12] ; d + + ; 64 rounds of hashing + ROUND 0, r8d, r9d, r10d, r11d, 0, 7, -28955B88h + ROUND 1, r11d, r8d, r9d, r10d, 1, 12, -173848AAh + ROUND 2, r10d, r11d, r8d, r9d, 2, 17, 242070DBh + ROUND 3, r9d, r10d, r11d, r8d, 3, 22, -3E423112h + ROUND 4, r8d, r9d, r10d, r11d, 4, 7, -0A83F051h + ROUND 5, r11d, r8d, r9d, r10d, 5, 12, 4787C62Ah + ROUND 6, r10d, r11d, r8d, r9d, 6, 17, -57CFB9EDh + ROUND 7, r9d, r10d, r11d, r8d, 7, 22, -02B96AFFh + ROUND 8, r8d, r9d, r10d, r11d, 8, 7, 698098D8h + ROUND 9, r11d, r8d, r9d, r10d, 9, 12, -74BB0851h + ROUND 10, r10d, r11d, r8d, r9d, 10, 17, -0000A44Fh + ROUND 11, r9d, r10d, r11d, r8d, 11, 22, -76A32842h + ROUND 12, r8d, r9d, r10d, r11d, 12, 7, 6B901122h + ROUND 13, r11d, r8d, r9d, r10d, 13, 12, -02678E6Dh + ROUND 14, r10d, r11d, r8d, r9d, 14, 17, -5986BC72h + ROUND 15, r9d, r10d, r11d, r8d, 15, 22, 49B40821h + ROUND 16, r8d, r9d, r10d, r11d, 1, 5, -09E1DA9Eh + ROUND 17, r11d, r8d, r9d, r10d, 6, 9, -3FBF4CC0h + ROUND 18, r10d, r11d, r8d, r9d, 11, 14, 265E5A51h + ROUND 19, r9d, r10d, r11d, r8d, 0, 20, -16493856h + ROUND 20, r8d, r9d, r10d, r11d, 5, 5, -29D0EFA3h + ROUND 21, r11d, r8d, r9d, r10d, 10, 9, 02441453h + ROUND 22, r10d, r11d, r8d, r9d, 15, 14, -275E197Fh + ROUND 23, r9d, r10d, r11d, r8d, 4, 20, -182C0438h + ROUND 24, r8d, r9d, r10d, r11d, 9, 5, 21E1CDE6h + ROUND 25, r11d, r8d, r9d, r10d, 14, 9, -3CC8F82Ah + ROUND 26, r10d, r11d, r8d, r9d, 3, 14, -0B2AF279h + ROUND 27, r9d, r10d, r11d, r8d, 8, 20, 455A14EDh + ROUND 28, r8d, r9d, r10d, r11d, 13, 5, -561C16FBh + ROUND 29, r11d, r8d, r9d, r10d, 2, 9, -03105C08h + ROUND 30, r10d, r11d, r8d, r9d, 7, 14, 676F02D9h + ROUND 31, r9d, r10d, r11d, r8d, 12, 20, -72D5B376h + ROUND 32, r8d, r9d, r10d, r11d, 5, 4, -0005C6BEh + ROUND 33, r11d, r8d, r9d, r10d, 8, 11, -788E097Fh + ROUND 34, r10d, r11d, r8d, r9d, 11, 16, 6D9D6122h + ROUND 35, r9d, r10d, r11d, r8d, 14, 23, -021AC7F4h + ROUND 36, r8d, r9d, r10d, r11d, 1, 4, -5B4115BCh + ROUND 37, r11d, r8d, r9d, r10d, 4, 11, 4BDECFA9h + ROUND 38, r10d, r11d, r8d, r9d, 7, 16, -0944B4A0h + ROUND 39, r9d, r10d, r11d, r8d, 10, 23, -41404390h + ROUND 40, r8d, r9d, r10d, r11d, 13, 4, 289B7EC6h + ROUND 41, r11d, r8d, r9d, r10d, 0, 11, -155ED806h + ROUND 42, r10d, r11d, r8d, r9d, 3, 16, -2B10CF7Bh + ROUND 43, r9d, r10d, r11d, r8d, 6, 23, 04881D05h + ROUND 44, r8d, r9d, r10d, r11d, 9, 4, -262B2FC7h + ROUND 45, r11d, r8d, r9d, r10d, 12, 11, -1924661Bh + ROUND 46, r10d, r11d, r8d, r9d, 15, 16, 1FA27CF8h + ROUND 47, r9d, r10d, r11d, r8d, 2, 23, -3B53A99Bh + ROUND 48, r8d, r9d, r10d, r11d, 0, 6, -0BD6DDBCh + ROUND 49, r11d, r8d, r9d, r10d, 7, 10, 432AFF97h + ROUND 50, r10d, r11d, r8d, r9d, 14, 15, -546BDC59h + ROUND 51, r9d, r10d, r11d, r8d, 5, 21, -036C5FC7h + ROUND 52, r8d, r9d, r10d, r11d, 12, 6, 655B59C3h + ROUND 53, r11d, r8d, r9d, r10d, 3, 10, -70F3336Eh + ROUND 54, r10d, r11d, r8d, r9d, 10, 15, -00100B83h + ROUND 55, r9d, r10d, r11d, r8d, 1, 21, -7A7BA22Fh + ROUND 56, r8d, r9d, r10d, r11d, 8, 6, 6FA87E4Fh + ROUND 57, r11d, r8d, r9d, r10d, 15, 10, -01D31920h + ROUND 58, r10d, r11d, r8d, r9d, 6, 15, -5CFEBCECh + ROUND 59, r9d, r10d, r11d, r8d, 13, 21, 4E0811A1h + ROUND 60, r8d, r9d, r10d, r11d, 4, 6, -08AC817Eh + ROUND 61, r11d, r8d, r9d, r10d, 11, 10, -42C50DCBh + ROUND 62, r10d, r11d, r8d, r9d, 2, 15, 2AD7D2BBh + ROUND 63, r9d, r10d, r11d, r8d, 9, 21, -14792C6Fh + + ; Compute intermediate hash value + add [rdx] , r8d + add [rdx + 4], r9d + add [rdx + 8], r10d + add [rdx + 12], r11d + ret +md5_compress endp + end diff --git a/sha1/build.rs b/sha1/build.rs index afed737..b97e0bd 100644 --- a/sha1/build.rs +++ b/sha1/build.rs @@ -1,11 +1,14 @@ fn main() { let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); let target_vendor = std::env::var("CARGO_CFG_TARGET_VENDOR").unwrap_or_default(); + let target_family = std::env::var("CARGO_CFG_TARGET_FAMILY").unwrap_or_default(); let asm_path = if target_arch == "x86" { "src/x86.S" - } else if target_arch == "x86_64" { + } else if target_arch == "x86_64" && target_family == "unix" { "src/x64.S" + } else if target_arch == "x86_64" && target_family == "windows" { + "src/x64_masm.asm" } else if target_arch == "aarch64" && target_vendor == "apple" { "src/aarch64_apple.S" } else if target_arch == "aarch64" { diff --git a/sha1/src/x64_masm.asm b/sha1/src/x64_masm.asm new file mode 100644 index 0000000..92032da --- /dev/null +++ b/sha1/src/x64_masm.asm @@ -0,0 +1,231 @@ +; +; SHA1 hash in x64 MASM +; +; Copyright (c) 2023 Chong Yeol Nah (MIT License) +; +; Permission is hereby granted, free of charge, to any person obtaining a copy of +; this software and associated documentation files (the "Software"), to deal in +; the Software without restriction, including without limitation the rights to +; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +; the Software, and to permit persons to whom the Software is furnished to do so, +; subject to the following conditions: +; - The above copyright notice and this permission notice shall be included in +; all copies or substantial portions of the Software. +; - The Software is provided "as is", without warranty of any kind, express or +; implied, including but not limited to the warranties of merchantability, +; fitness for a particular purpose and noninfringement. In no event shall the +; authors or copyright holders be liable for any claim, damages or other +; liability, whether in an action of contract, tort or otherwise, arising from, +; out of or in connection with the Software or the use or other dealings in the +; Software. +; +; +; Storage usage: +; Bytes Location Volatile Description +; 4 eax yes Temporary w-bit word used in the hash computation +; 4 ebx no Temporary w-bit word used in the hash computation +; 8 rcx yes Base address of message block array argument (read-only) +; 8 rdx yes Base address of hash value array argument (read-only) +; 8 rsp no x64 stack pointer +; 4 r8d yes SHA1 working variable A +; 4 r9d yes SHA1 working variable B +; 4 r10d yes SHA1 working variable C +; 4 r11d yes SHA1 working variable D +; 4 r12d no SHA1 working variable E +; 64 [rsp+0] no Circular buffer of most recent 16 message schedule items, 4 bytes each + + option casemap:none + + .const +SCHED macro i + index textequ %i AND 0fh ; i mod 16 + exitm <[rsp + index*4]> + endm + +ROUNDTAIL macro a, b, e, k ; eax = f[i], e -> e + w[i] + ; (obj1) e -> a rol 5 + f[i] + e + w[i] + k[i] + ; (obj2) b -> b rol 30 + mov ebx, a + rol ebx, 5 + lea e, [ebx + e + k] ; e -> a rol 5 + e + w[i] + k[i] + add e, eax ; e -> a rol 5 + f[i] + e + w[i] + k[i] (obj1) + rol b, 30 ; b -> b rol 30 (obj2) + endm + +ROUND macro i, a, b, c, d, e + +if i LT 16 + + mov eax, [rcx + i*4] + bswap eax + +else + + mov eax, SCHED(i - 3) + xor eax, SCHED(i - 8) + xor eax, SCHED(i - 14) + xor eax, SCHED(i - 16) + rol eax, 1 + +endif + + mov SCHED(i), eax + add e, eax ; e -> e + w[i] + +if i LT 20 + + ; eax = f[i] = (b & c) ^ (~b & d) = d ^ b & (c ^ d) + ; & and ^ form the Z/2Z ring (& is *, ^ is +) + ; ~b is (1 + b) + ; bc + (1 + b)d = bc + d + bd = d + b(c + d) + mov eax, c + xor eax, d + and eax, b + xor eax, d + ROUNDTAIL a, b, e, 5A827999h + +elseif i GE 40 AND i LT 60 + + ; eax = f[i] = (b & c) ^ (b & d) ^ (c & d) = (b & (c | d)) | (c & d) + ; https://www.wolframalpha.com/input?i=simplify+%28b+%26%26+c%29+xor+%28b+%26%26+d%29+xor+%28c+%26%26+d%29 + mov eax, c + mov ebx, c + or eax, d + and eax, b + and ebx, d + or eax, ebx + ROUNDTAIL a, b, e, -70E44324h + +else + + ; eax = f[i] = b ^ c ^ d + mov eax, b + xor eax, c + xor eax, d + + if i LT 40 + + ROUNDTAIL a, b, e, 6ED9EBA1h + + else + + ROUNDTAIL a, b, e, -359D3E2Ah + + endif + +endif + + endm + + .code + ; void sha1_compress(const uint8_t block[64], uint32_t state[5]) + public sha1_compress +sha1_compress proc + ; Save nonvolatile registers, allocate scratch space + push rbx + push r12 + sub rsp, 64 + + ; Initialize working variables with previous hash value + mov r8d, [rdx] ; a + mov r9d, [rdx + 4] ; b + mov r10d, [rdx + 8] ; c + mov r11d, [rdx + 12] ; d + mov r12d, [rdx + 16] ; e + + ; 80 rounds of hashing + ROUND 0, r8d, r9d, r10d, r11d, r12d + ROUND 1, r12d, r8d, r9d, r10d, r11d + ROUND 2, r11d, r12d, r8d, r9d, r10d + ROUND 3, r10d, r11d, r12d, r8d, r9d + ROUND 4, r9d, r10d, r11d, r12d, r8d + ROUND 5, r8d, r9d, r10d, r11d, r12d + ROUND 6, r12d, r8d, r9d, r10d, r11d + ROUND 7, r11d, r12d, r8d, r9d, r10d + ROUND 8, r10d, r11d, r12d, r8d, r9d + ROUND 9, r9d, r10d, r11d, r12d, r8d + ROUND 10, r8d, r9d, r10d, r11d, r12d + ROUND 11, r12d, r8d, r9d, r10d, r11d + ROUND 12, r11d, r12d, r8d, r9d, r10d + ROUND 13, r10d, r11d, r12d, r8d, r9d + ROUND 14, r9d, r10d, r11d, r12d, r8d + ROUND 15, r8d, r9d, r10d, r11d, r12d + ROUND 16, r12d, r8d, r9d, r10d, r11d + ROUND 17, r11d, r12d, r8d, r9d, r10d + ROUND 18, r10d, r11d, r12d, r8d, r9d + ROUND 19, r9d, r10d, r11d, r12d, r8d + ROUND 20, r8d, r9d, r10d, r11d, r12d + ROUND 21, r12d, r8d, r9d, r10d, r11d + ROUND 22, r11d, r12d, r8d, r9d, r10d + ROUND 23, r10d, r11d, r12d, r8d, r9d + ROUND 24, r9d, r10d, r11d, r12d, r8d + ROUND 25, r8d, r9d, r10d, r11d, r12d + ROUND 26, r12d, r8d, r9d, r10d, r11d + ROUND 27, r11d, r12d, r8d, r9d, r10d + ROUND 28, r10d, r11d, r12d, r8d, r9d + ROUND 29, r9d, r10d, r11d, r12d, r8d + ROUND 30, r8d, r9d, r10d, r11d, r12d + ROUND 31, r12d, r8d, r9d, r10d, r11d + ROUND 32, r11d, r12d, r8d, r9d, r10d + ROUND 33, r10d, r11d, r12d, r8d, r9d + ROUND 34, r9d, r10d, r11d, r12d, r8d + ROUND 35, r8d, r9d, r10d, r11d, r12d + ROUND 36, r12d, r8d, r9d, r10d, r11d + ROUND 37, r11d, r12d, r8d, r9d, r10d + ROUND 38, r10d, r11d, r12d, r8d, r9d + ROUND 39, r9d, r10d, r11d, r12d, r8d + ROUND 40, r8d, r9d, r10d, r11d, r12d + ROUND 41, r12d, r8d, r9d, r10d, r11d + ROUND 42, r11d, r12d, r8d, r9d, r10d + ROUND 43, r10d, r11d, r12d, r8d, r9d + ROUND 44, r9d, r10d, r11d, r12d, r8d + ROUND 45, r8d, r9d, r10d, r11d, r12d + ROUND 46, r12d, r8d, r9d, r10d, r11d + ROUND 47, r11d, r12d, r8d, r9d, r10d + ROUND 48, r10d, r11d, r12d, r8d, r9d + ROUND 49, r9d, r10d, r11d, r12d, r8d + ROUND 50, r8d, r9d, r10d, r11d, r12d + ROUND 51, r12d, r8d, r9d, r10d, r11d + ROUND 52, r11d, r12d, r8d, r9d, r10d + ROUND 53, r10d, r11d, r12d, r8d, r9d + ROUND 54, r9d, r10d, r11d, r12d, r8d + ROUND 55, r8d, r9d, r10d, r11d, r12d + ROUND 56, r12d, r8d, r9d, r10d, r11d + ROUND 57, r11d, r12d, r8d, r9d, r10d + ROUND 58, r10d, r11d, r12d, r8d, r9d + ROUND 59, r9d, r10d, r11d, r12d, r8d + ROUND 60, r8d, r9d, r10d, r11d, r12d + ROUND 61, r12d, r8d, r9d, r10d, r11d + ROUND 62, r11d, r12d, r8d, r9d, r10d + ROUND 63, r10d, r11d, r12d, r8d, r9d + ROUND 64, r9d, r10d, r11d, r12d, r8d + ROUND 65, r8d, r9d, r10d, r11d, r12d + ROUND 66, r12d, r8d, r9d, r10d, r11d + ROUND 67, r11d, r12d, r8d, r9d, r10d + ROUND 68, r10d, r11d, r12d, r8d, r9d + ROUND 69, r9d, r10d, r11d, r12d, r8d + ROUND 70, r8d, r9d, r10d, r11d, r12d + ROUND 71, r12d, r8d, r9d, r10d, r11d + ROUND 72, r11d, r12d, r8d, r9d, r10d + ROUND 73, r10d, r11d, r12d, r8d, r9d + ROUND 74, r9d, r10d, r11d, r12d, r8d + ROUND 75, r8d, r9d, r10d, r11d, r12d + ROUND 76, r12d, r8d, r9d, r10d, r11d + ROUND 77, r11d, r12d, r8d, r9d, r10d + ROUND 78, r10d, r11d, r12d, r8d, r9d + ROUND 79, r9d, r10d, r11d, r12d, r8d + + ; Compute intermediate hash value + add [rdx] , r8d + add [rdx + 4], r9d + add [rdx + 8], r10d + add [rdx + 12], r11d + add [rdx + 16], r12d + + ; Restore nonvolatile registers + add rsp, 64 + pop r12 + pop rbx + ret +sha1_compress endp + end diff --git a/sha2/build.rs b/sha2/build.rs index 4fd331f..66689d3 100644 --- a/sha2/build.rs +++ b/sha2/build.rs @@ -3,12 +3,15 @@ fn main() { let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); let target_vendor = env::var("CARGO_CFG_TARGET_VENDOR").unwrap_or_default(); + let target_family = env::var("CARGO_CFG_TARGET_FAMILY").unwrap_or_default(); let mut build256 = cc::Build::new(); let (sha256_path, sha512_path) = if target_arch == "x86" { ("src/sha256_x86.S", "src/sha512_x86.S") - } else if target_arch == "x86_64" { + } else if target_arch == "x86_64" && target_family == "unix" { ("src/sha256_x64.S", "src/sha512_x64.S") + } else if target_arch == "x86_64" && target_family == "windows" { + ("src/sha256_x64_masm.asm", "src/sha512_x64_masm.asm") } else if target_arch == "aarch64" && target_vendor == "apple" { build256.flag("-march=armv8-a+crypto"); ("src/sha256_aarch64_apple.S", "") diff --git a/sha2/src/sha256_x64_masm.asm b/sha2/src/sha256_x64_masm.asm new file mode 100644 index 0000000..6a3d94c --- /dev/null +++ b/sha2/src/sha256_x64_masm.asm @@ -0,0 +1,247 @@ +; +; SHA256 hash in x64 MASM +; +; Copyright (c) 2023 Chong Yeol Nah (MIT License) +; +; Permission is hereby granted, free of charge, to any person obtaining a copy of +; this software and associated documentation files (the "Software"), to deal in +; the Software without restriction, including without limitation the rights to +; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +; the Software, and to permit persons to whom the Software is furnished to do so, +; subject to the following conditions: +; - The above copyright notice and this permission notice shall be included in +; all copies or substantial portions of the Software. +; - The Software is provided "as is", without warranty of any kind, express or +; implied, including but not limited to the warranties of merchantability, +; fitness for a particular purpose and noninfringement. In no event shall the +; authors or copyright holders be liable for any claim, damages or other +; liability, whether in an action of contract, tort or otherwise, arising from, +; out of or in connection with the Software or the use or other dealings in the +; Software. +; +; +; Storage usage: +; Bytes Location Volatile Description +; 4 eax yes Temporary w-bit word used in the hash computation +; 4 ebx no Temporary w-bit word used in the hash computation +; 8 rcx yes Base address of message block array argument (read-only) +; 8 rdx yes Base address of hash value array argument (read-only) +; 4 edi no Temporary w-bit word used in the hash computation +; 4 esi no Temporary w-bit word used in the hash computation +; 8 rsp no x64 stack pointer +; 4 r8d yes SHA256 working variable A +; 4 r9d yes SHA256 working variable B +; 4 r10d yes SHA256 working variable C +; 4 r11d yes SHA256 working variable D +; 4 r12d no SHA256 working variable E +; 4 r13d no SHA256 working variable F +; 4 r14d no SHA256 working variable G +; 4 r15d no SHA256 working variable H +; 64 [rsp+0] no Circular buffer of most recent 16 message schedule items, 4 bytes each + + option casemap:none + + .const +SCHED macro i + index textequ %i AND 0fh ; i mod 16 + exitm <[rsp + index*4]> + endm + +ROUNDTAIL macro a, b, c, d, e, f, g, h, k ; ebx = w[i] + ; temp1 = h + S1 + ch + k[i] + w[i] + ; temp2 = S0 + maj + ; (obj1) h -> temp1 + temp2 = h + S1 + ch + k[i] + w[i] + S0 + maj + ; (obj2) d -> d + temp1 + ; Part 0 + mov eax, e + mov edi, e + mov esi, e + ror eax, 6 + ror edi, 11 + ror esi, 25 + xor edi, esi + xor eax, edi ; eax = S1 + ; ch = (e & f) ^ (~e & g) = (g ^ (e & (f ^ g))) + ; & and ^ form the Z/2Z ring (& is *, ^ is +) + ; ~e is (1 + e) + ; ef + (1 + e)g = ef + g + eg = g + ef + eg = g + e(f + g) + mov edi, g + xor edi, f + and edi, e + xor edi, g ; edi = ch + lea eax, [eax + edi + k] ; eax = S1 + ch + k[i] + add h, eax ; h -> h + S1 + ch + k[i] + add h, ebx ; h -> h + S1 + ch + k[i] + w[i] = temp1 + ; Part 1 + add d, h ; d -> d + temp1 (obj2) + ; Part 2 + mov eax, a + mov edi, a + mov esi, a + ror eax, 2 + ror edi, 13 + ror esi, 22 + xor edi, esi + xor eax, edi ; eax = S0 + add h, eax ; h -> temp1 + S0 + ; maj = (a and b) xor (a and c) xor (b and c) = (a and (b or c)) or (b and c) + ; https://www.wolframalpha.com/input?i=simplify+%28A+%26%26+B%29+xor+%28A+%26%26+C%29+xor+%28B+%26%26+C%29 + mov edi, c + mov eax, c + or eax, b + and edi, b + and eax, a + or eax, edi ; eax = maj + add h, eax ; h -> temp1 + S0 + maj = temp1 + temp2 (obj1) + endm + +ROUND macro i, a, b, c, d, e, f, g, h, k + +if i LT 16 + + mov ebx, [rcx + i*4] + bswap ebx + mov SCHED(i), ebx + +else + + ; (obj) w[i] -> w[i-16] + s0 + w[i-7] + s1 + mov ebx, SCHED(i - 16) ; ebx = w[i-16] + mov eax, SCHED(i - 15) + mov edi, eax + mov esi, eax + ror edi, 18 + shr esi, 3 + ror eax, 7 + xor edi, esi + xor eax, edi ; s0 = eax + add ebx, eax ; ebx = w[i-16] + s0 + add ebx, SCHED(i - 7) ; ebx = w[i-16] + s0 + w[i-7] + mov eax, SCHED(i - 2) + mov edi, eax + mov esi, eax + ror edi, 19 + shr esi, 10 + ror eax, 17 + xor edi, esi + xor eax, edi ; eax = s1 + add ebx, eax ; ebx = w[i-16] + s0 + w[i-7] + s1 + mov SCHED(i), ebx ; w[i] -> w[i-16] + s0 + w[i-7] + s1 (obj) + +endif + + ROUNDTAIL a, b, c, d, e, f, g, h, k ; ebx = w[i] + endm + + .code + ; void sha256_compress(const uint8_t block[64], uint32_t state[8]) + public sha256_compress +sha256_compress proc + ; Save nonvolatile registers, allocate scratch space + push rbx + push rdi + push rsi + push r12 + push r13 + push r14 + push r15 + sub rsp, 64 + + ; Initialize working variables with previous hash value + mov r8d, [rdx] ; a + mov r9d, [rdx + 4] ; b + mov r10d, [rdx + 8] ; c + mov r11d, [rdx + 12] ; d + mov r12d, [rdx + 16] ; e + mov r13d, [rdx + 20] ; f + mov r14d, [rdx + 24] ; g + mov r15d, [rdx + 28] ; h + + ; 64 rounds of hashing + ROUND 0, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 428A2F98h + ROUND 1, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 71374491h + ROUND 2, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -4A3F0431h + ROUND 3, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -164A245Bh + ROUND 4, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 3956C25Bh + ROUND 5, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 59F111F1h + ROUND 6, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -6DC07D5Ch + ROUND 7, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -54E3A12Bh + ROUND 8, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -27F85568h + ROUND 9, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 12835B01h + ROUND 10, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 243185BEh + ROUND 11, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 550C7DC3h + ROUND 12, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 72BE5D74h + ROUND 13, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -7F214E02h + ROUND 14, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -6423F959h + ROUND 15, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -3E640E8Ch + ROUND 16, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -1B64963Fh + ROUND 17, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -1041B87Ah + ROUND 18, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 0FC19DC6h + ROUND 19, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 240CA1CCh + ROUND 20, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 2DE92C6Fh + ROUND 21, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 4A7484AAh + ROUND 22, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 5CB0A9DCh + ROUND 23, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 76F988DAh + ROUND 24, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -67C1AEAEh + ROUND 25, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -57CE3993h + ROUND 26, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -4FFCD838h + ROUND 27, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -40A68039h + ROUND 28, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -391FF40Dh + ROUND 29, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -2A586EB9h + ROUND 30, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 06CA6351h + ROUND 31, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 14292967h + ROUND 32, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 27B70A85h + ROUND 33, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 2E1B2138h + ROUND 34, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 4D2C6DFCh + ROUND 35, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 53380D13h + ROUND 36, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 650A7354h + ROUND 37, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 766A0ABBh + ROUND 38, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -7E3D36D2h + ROUND 39, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -6D8DD37Bh + ROUND 40, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -5D40175Fh + ROUND 41, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -57E599B5h + ROUND 42, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -3DB47490h + ROUND 43, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -3893AE5Dh + ROUND 44, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -2E6D17E7h + ROUND 45, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -2966F9DCh + ROUND 46, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0BF1CA7Bh + ROUND 47, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 106AA070h + ROUND 48, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 19A4C116h + ROUND 49, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 1E376C08h + ROUND 50, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 2748774Ch + ROUND 51, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 34B0BCB5h + ROUND 52, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 391C0CB3h + ROUND 53, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 4ED8AA4Ah + ROUND 54, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 5B9CCA4Fh + ROUND 55, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 682E6FF3h + ROUND 56, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 748F82EEh + ROUND 57, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 78A5636Fh + ROUND 58, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -7B3787ECh + ROUND 59, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -7338FDF8h + ROUND 60, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -6F410006h + ROUND 61, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -5BAF9315h + ROUND 62, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -41065C09h + ROUND 63, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -398E870Eh + + ; Compute intermediate hash value + add [rdx] , r8d + add [rdx + 4], r9d + add [rdx + 8], r10d + add [rdx + 12], r11d + add [rdx + 16], r12d + add [rdx + 20], r13d + add [rdx + 24], r14d + add [rdx + 28], r15d + + ; Restore nonvolatile registers + add rsp, 64 + pop r15 + pop r14 + pop r13 + pop r12 + pop rsi + pop rdi + pop rbx + ret +sha256_compress endp + end diff --git a/sha2/src/sha512_x64_masm.asm b/sha2/src/sha512_x64_masm.asm new file mode 100644 index 0000000..18c8aee --- /dev/null +++ b/sha2/src/sha512_x64_masm.asm @@ -0,0 +1,265 @@ +; +; SHA512 hash in x64 MASM +; +; Copyright (c) 2023 Chong Yeol Nah (MIT License) +; +; Permission is hereby granted, free of charge, to any person obtaining a copy of +; this software and associated documentation files (the "Software"), to deal in +; the Software without restriction, including without limitation the rights to +; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +; the Software, and to permit persons to whom the Software is furnished to do so, +; subject to the following conditions: +; - The above copyright notice and this permission notice shall be included in +; all copies or substantial portions of the Software. +; - The Software is provided "as is", without warranty of any kind, express or +; implied, including but not limited to the warranties of merchantability, +; fitness for a particular purpose and noninfringement. In no event shall the +; authors or copyright holders be liable for any claim, damages or other +; liability, whether in an action of contract, tort or otherwise, arising from, +; out of or in connection with the Software or the use or other dealings in the +; Software. +; +; +; Storage usage: +; Bytes Location Volatile Description +; 8 rax yes Temporary w-bit word used in the hash computation +; 8 rbx no Temporary w-bit word used in the hash computation +; 8 rcx yes Base address of message block array argument (read-only) +; 8 rdx yes Base address of hash value array argument (read-only) +; 8 rdi no Temporary w-bit word used in the hash computation +; 8 rsi no Temporary w-bit word used in the hash computation +; 8 rsp no x64 stack pointer +; 8 r8 yes SHA512 working variable A +; 8 r9 yes SHA512 working variable B +; 8 r10 yes SHA512 working variable C +; 8 r11 yes SHA512 working variable D +; 8 r12 no SHA512 working variable E +; 8 r13 no SHA512 working variable F +; 8 r14 no SHA512 working variable G +; 8 r15 no SHA512 working variable H +; 128 [rsp+0] no Circular buffer of most recent 16 message schedule items, 8 bytes each + + option casemap:none + + .const +SCHED macro i + index textequ %i AND 0fh ; i mod 16 + exitm <[rsp + index*8]> + endm + +ROUNDTAIL macro a, b, c, d, e, f, g, h, k ; rbx = w[i] + ; temp1 = h + S1 + ch + k[i] + w[i] + ; temp2 = S0 + maj + ; (obj1) h -> temp1 + temp2 = h + S1 + ch + k[i] + w[i] + S0 + maj + ; (obj2) d -> d + temp1 + ; Part 0 + mov rax, e + mov rdi, e + mov rsi, e + ror rax, 14 + ror rdi, 18 + ror rsi, 41 + xor rdi, rsi + xor rax, rdi ; rax = S1 + ; ch = (e & f) ^ (~e & g) = (g ^ (e & (f ^ g))) + ; & and ^ form the Z/2Z ring (& is *, ^ is +) + ; ~e is (1 + e) + ; ef + (1 + e)g = ef + g + eg = g + ef + eg = g + e(f + g) + mov rdi, g + xor rdi, f + and rdi, e + xor rdi, g ; rdi = ch + add h, rax ; h -> h + S1 + add h, rdi ; h -> h + S1 + ch + mov rax, k + add h, rax ; h -> h + S1 + ch + k[i] + add h, rbx ; h -> h + S1 + ch + k[i] + w[i] = temp1 + ; Part 1 + add d, h ; d -> d + temp1 (obj2) + ; Part 2 + mov rax, a + mov rdi, a + mov rsi, a + ror rax, 28 + ror rdi, 34 + ror rsi, 39 + xor rdi, rsi + xor rax, rdi ; rax = S0 + add h, rax ; h -> temp1 + S0 + ; maj = (a and b) xor (a and c) xor (b and c) = (a and (b or c)) or (b and c) + ; https://www.wolframalpha.com/input?i=simplify+%28A+%26%26+B%29+xor+%28A+%26%26+C%29+xor+%28B+%26%26+C%29 + mov rdi, c + mov rax, c + or rax, b + and rdi, b + and rax, a + or rax, rdi ; rax = maj + add h, rax ; h -> temp1 + S0 + maj = temp1 + temp2 (obj1) + endm + +ROUND macro i, a, b, c, d, e, f, g, h, k + +if i LT 16 + + mov rbx, [rcx + i*8] + bswap rbx + mov SCHED(i), rbx + +else + + ; (obj) w[i] -> w[i-16] + s0 + w[i-7] + s1 + mov rbx, SCHED(i - 16) ; rbx = w[i-16] + mov rax, SCHED(i - 15) + mov rdi, rax + mov rsi, rax + ror rdi, 8 + shr rsi, 7 + ror rax, 1 + xor rdi, rsi + xor rax, rdi ; s0 = rax + add rbx, rax ; rbx = w[i-16] + s0 + add rbx, SCHED(i - 7) ; rbx = w[i-16] + s0 + w[i-7] + mov rax, SCHED(i - 2) + mov rdi, rax + mov rsi, rax + ror rdi, 61 + shr rsi, 6 + ror rax, 19 + xor rdi, rsi + xor rax, rdi ; rax = s1 + add rbx, rax ; rbx = w[i-16] + s0 + w[i-7] + s1 + mov SCHED(i), rbx ; w[i] -> w[i-16] + s0 + w[i-7] + s1 (obj) + +endif + + ROUNDTAIL a, b, c, d, e, f, g, h, k ; rbx = w[i] + endm + + .code + ; void sha512_compress(const uint8_t block[128], uint64_t state[8]) + public sha512_compress +sha512_compress proc + ; Save nonvolatile registers, allocate scratch space + push rbx + push rdi + push rsi + push r12 + push r13 + push r14 + push r15 + sub rsp, 128 + + ; Initialize working variables with previous hash value + mov r8, [rdx] ; a + mov r9, [rdx + 8] ; b + mov r10, [rdx + 16] ; c + mov r11, [rdx + 24] ; d + mov r12, [rdx + 32] ; e + mov r13, [rdx + 40] ; f + mov r14, [rdx + 48] ; g + mov r15, [rdx + 56] ; h + + ; 80 rounds of hashing + ROUND 0, r8, r9, r10, r11, r12, r13, r14, r15, 0428A2F98D728AE22h + ROUND 1, r15, r8, r9, r10, r11, r12, r13, r14, 07137449123EF65CDh + ROUND 2, r14, r15, r8, r9, r10, r11, r12, r13, 0B5C0FBCFEC4D3B2Fh + ROUND 3, r13, r14, r15, r8, r9, r10, r11, r12, 0E9B5DBA58189DBBCh + ROUND 4, r12, r13, r14, r15, r8, r9, r10, r11, 03956C25BF348B538h + ROUND 5, r11, r12, r13, r14, r15, r8, r9, r10, 059F111F1B605D019h + ROUND 6, r10, r11, r12, r13, r14, r15, r8, r9, 0923F82A4AF194F9Bh + ROUND 7, r9, r10, r11, r12, r13, r14, r15, r8, 0AB1C5ED5DA6D8118h + ROUND 8, r8, r9, r10, r11, r12, r13, r14, r15, 0D807AA98A3030242h + ROUND 9, r15, r8, r9, r10, r11, r12, r13, r14, 012835B0145706FBEh + ROUND 10, r14, r15, r8, r9, r10, r11, r12, r13, 0243185BE4EE4B28Ch + ROUND 11, r13, r14, r15, r8, r9, r10, r11, r12, 0550C7DC3D5FFB4E2h + ROUND 12, r12, r13, r14, r15, r8, r9, r10, r11, 072BE5D74F27B896Fh + ROUND 13, r11, r12, r13, r14, r15, r8, r9, r10, 080DEB1FE3B1696B1h + ROUND 14, r10, r11, r12, r13, r14, r15, r8, r9, 09BDC06A725C71235h + ROUND 15, r9, r10, r11, r12, r13, r14, r15, r8, 0C19BF174CF692694h + ROUND 16, r8, r9, r10, r11, r12, r13, r14, r15, 0E49B69C19EF14AD2h + ROUND 17, r15, r8, r9, r10, r11, r12, r13, r14, 0EFBE4786384F25E3h + ROUND 18, r14, r15, r8, r9, r10, r11, r12, r13, 00FC19DC68B8CD5B5h + ROUND 19, r13, r14, r15, r8, r9, r10, r11, r12, 0240CA1CC77AC9C65h + ROUND 20, r12, r13, r14, r15, r8, r9, r10, r11, 02DE92C6F592B0275h + ROUND 21, r11, r12, r13, r14, r15, r8, r9, r10, 04A7484AA6EA6E483h + ROUND 22, r10, r11, r12, r13, r14, r15, r8, r9, 05CB0A9DCBD41FBD4h + ROUND 23, r9, r10, r11, r12, r13, r14, r15, r8, 076F988DA831153B5h + ROUND 24, r8, r9, r10, r11, r12, r13, r14, r15, 0983E5152EE66DFABh + ROUND 25, r15, r8, r9, r10, r11, r12, r13, r14, 0A831C66D2DB43210h + ROUND 26, r14, r15, r8, r9, r10, r11, r12, r13, 0B00327C898FB213Fh + ROUND 27, r13, r14, r15, r8, r9, r10, r11, r12, 0BF597FC7BEEF0EE4h + ROUND 28, r12, r13, r14, r15, r8, r9, r10, r11, 0C6E00BF33DA88FC2h + ROUND 29, r11, r12, r13, r14, r15, r8, r9, r10, 0D5A79147930AA725h + ROUND 30, r10, r11, r12, r13, r14, r15, r8, r9, 006CA6351E003826Fh + ROUND 31, r9, r10, r11, r12, r13, r14, r15, r8, 0142929670A0E6E70h + ROUND 32, r8, r9, r10, r11, r12, r13, r14, r15, 027B70A8546D22FFCh + ROUND 33, r15, r8, r9, r10, r11, r12, r13, r14, 02E1B21385C26C926h + ROUND 34, r14, r15, r8, r9, r10, r11, r12, r13, 04D2C6DFC5AC42AEDh + ROUND 35, r13, r14, r15, r8, r9, r10, r11, r12, 053380D139D95B3DFh + ROUND 36, r12, r13, r14, r15, r8, r9, r10, r11, 0650A73548BAF63DEh + ROUND 37, r11, r12, r13, r14, r15, r8, r9, r10, 0766A0ABB3C77B2A8h + ROUND 38, r10, r11, r12, r13, r14, r15, r8, r9, 081C2C92E47EDAEE6h + ROUND 39, r9, r10, r11, r12, r13, r14, r15, r8, 092722C851482353Bh + ROUND 40, r8, r9, r10, r11, r12, r13, r14, r15, 0A2BFE8A14CF10364h + ROUND 41, r15, r8, r9, r10, r11, r12, r13, r14, 0A81A664BBC423001h + ROUND 42, r14, r15, r8, r9, r10, r11, r12, r13, 0C24B8B70D0F89791h + ROUND 43, r13, r14, r15, r8, r9, r10, r11, r12, 0C76C51A30654BE30h + ROUND 44, r12, r13, r14, r15, r8, r9, r10, r11, 0D192E819D6EF5218h + ROUND 45, r11, r12, r13, r14, r15, r8, r9, r10, 0D69906245565A910h + ROUND 46, r10, r11, r12, r13, r14, r15, r8, r9, 0F40E35855771202Ah + ROUND 47, r9, r10, r11, r12, r13, r14, r15, r8, 0106AA07032BBD1B8h + ROUND 48, r8, r9, r10, r11, r12, r13, r14, r15, 019A4C116B8D2D0C8h + ROUND 49, r15, r8, r9, r10, r11, r12, r13, r14, 01E376C085141AB53h + ROUND 50, r14, r15, r8, r9, r10, r11, r12, r13, 02748774CDF8EEB99h + ROUND 51, r13, r14, r15, r8, r9, r10, r11, r12, 034B0BCB5E19B48A8h + ROUND 52, r12, r13, r14, r15, r8, r9, r10, r11, 0391C0CB3C5C95A63h + ROUND 53, r11, r12, r13, r14, r15, r8, r9, r10, 04ED8AA4AE3418ACBh + ROUND 54, r10, r11, r12, r13, r14, r15, r8, r9, 05B9CCA4F7763E373h + ROUND 55, r9, r10, r11, r12, r13, r14, r15, r8, 0682E6FF3D6B2B8A3h + ROUND 56, r8, r9, r10, r11, r12, r13, r14, r15, 0748F82EE5DEFB2FCh + ROUND 57, r15, r8, r9, r10, r11, r12, r13, r14, 078A5636F43172F60h + ROUND 58, r14, r15, r8, r9, r10, r11, r12, r13, 084C87814A1F0AB72h + ROUND 59, r13, r14, r15, r8, r9, r10, r11, r12, 08CC702081A6439ECh + ROUND 60, r12, r13, r14, r15, r8, r9, r10, r11, 090BEFFFA23631E28h + ROUND 61, r11, r12, r13, r14, r15, r8, r9, r10, 0A4506CEBDE82BDE9h + ROUND 62, r10, r11, r12, r13, r14, r15, r8, r9, 0BEF9A3F7B2C67915h + ROUND 63, r9, r10, r11, r12, r13, r14, r15, r8, 0C67178F2E372532Bh + ROUND 64, r8, r9, r10, r11, r12, r13, r14, r15, 0CA273ECEEA26619Ch + ROUND 65, r15, r8, r9, r10, r11, r12, r13, r14, 0D186B8C721C0C207h + ROUND 66, r14, r15, r8, r9, r10, r11, r12, r13, 0EADA7DD6CDE0EB1Eh + ROUND 67, r13, r14, r15, r8, r9, r10, r11, r12, 0F57D4F7FEE6ED178h + ROUND 68, r12, r13, r14, r15, r8, r9, r10, r11, 006F067AA72176FBAh + ROUND 69, r11, r12, r13, r14, r15, r8, r9, r10, 00A637DC5A2C898A6h + ROUND 70, r10, r11, r12, r13, r14, r15, r8, r9, 0113F9804BEF90DAEh + ROUND 71, r9, r10, r11, r12, r13, r14, r15, r8, 01B710B35131C471Bh + ROUND 72, r8, r9, r10, r11, r12, r13, r14, r15, 028DB77F523047D84h + ROUND 73, r15, r8, r9, r10, r11, r12, r13, r14, 032CAAB7B40C72493h + ROUND 74, r14, r15, r8, r9, r10, r11, r12, r13, 03C9EBE0A15C9BEBCh + ROUND 75, r13, r14, r15, r8, r9, r10, r11, r12, 0431D67C49C100D4Ch + ROUND 76, r12, r13, r14, r15, r8, r9, r10, r11, 04CC5D4BECB3E42B6h + ROUND 77, r11, r12, r13, r14, r15, r8, r9, r10, 0597F299CFC657E2Ah + ROUND 78, r10, r11, r12, r13, r14, r15, r8, r9, 05FCB6FAB3AD6FAECh + ROUND 79, r9, r10, r11, r12, r13, r14, r15, r8, 06C44198C4A475817h + + ; Compute intermediate hash value + add [rdx] , r8 + add [rdx + 8], r9 + add [rdx + 16], r10 + add [rdx + 24], r11 + add [rdx + 32], r12 + add [rdx + 40], r13 + add [rdx + 48], r14 + add [rdx + 56], r15 + + ; Restore nonvolatile registers + add rsp, 128 + pop r15 + pop r14 + pop r13 + pop r12 + pop rsi + pop rdi + pop rbx + ret +sha512_compress endp + end diff --git a/whirlpool/build.rs b/whirlpool/build.rs index 88d3542..1d5d80c 100644 --- a/whirlpool/build.rs +++ b/whirlpool/build.rs @@ -1,10 +1,13 @@ fn main() { let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); + let target_family = std::env::var("CARGO_CFG_TARGET_FAMILY").unwrap_or_default(); let asm_path = if target_arch == "x86" { "src/x86.S" - } else if target_arch == "x86_64" { + } else if target_arch == "x86_64" && target_family == "unix" { "src/x64.S" + } else if target_arch == "x86_64" && target_family == "windows" { + "src/x64_masm.asm" } else { panic!("Unsupported target architecture"); }; diff --git a/whirlpool/src/x64_masm.asm b/whirlpool/src/x64_masm.asm new file mode 100644 index 0000000..ff9a1c4 --- /dev/null +++ b/whirlpool/src/x64_masm.asm @@ -0,0 +1,343 @@ +; +; Whirlpool hash in x64 MASM +; +; Copyright (c) 2023 Chong Yeol Nah (MIT License) +; +; Permission is hereby granted, free of charge, to any person obtaining a copy of +; this software and associated documentation files (the "Software"), to deal in +; the Software without restriction, including without limitation the rights to +; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +; the Software, and to permit persons to whom the Software is furnished to do so, +; subject to the following conditions: +; - The above copyright notice and this permission notice shall be included in +; all copies or substantial portions of the Software. +; - The Software is provided "as is", without warranty of any kind, express or +; implied, including but not limited to the warranties of merchantability, +; fitness for a particular purpose and noninfringement. In no event shall the +; authors or copyright holders be liable for any claim, damages or other +; liability, whether in an action of contract, tort or otherwise, arising from, +; out of or in connection with the Software or the use or other dealings in the +; Software. +; +; +; Storage usage: +; Bytes Location Volatile Description +; 8 rax yes Temporary w-bit word used in the hash computation +; 8 rbx no Temporary w-bit word used in the hash computation +; 8 rcx yes Base address of message block array argument (read-only) +; 8 rdx yes Base address of hash value array argument (read-only) +; 8 rdi no Temporary w-bit word used in the hash computation +; 4 esi no Upward loop counter for 10 rounds +; 8 rsp no x64 stack pointer +; 8 r8...r11 yes Output rows for current round being computed, in little endian (8 bytes per register) +; 8 r12..r15 no Output rows for current round being computed, in little endian (8 bytes per register) +; 64 xmm0..3 yes All contents of current state array, in little endian (16 bytes per register) +; 64 xmm4..7 no(6,7) All contents of current block array, in little endian (16 bytes per register) +; 8 [rsp+0] no Temporary storage for transferring r15 to xmm + + option casemap:none + +NUM_ROUNDS = 10 + .const +; Starting from the beginning, each round uses 8 bytes +roundconstants byte 018h, 023h, 0C6h, 0E8h, 087h, 0B8h, 001h, 04Fh, 036h, 0A6h, 0D2h, 0F5h, 079h, 06Fh, 091h, 052h + byte 060h, 0BCh, 09Bh, 08Eh, 0A3h, 00Ch, 07Bh, 035h, 01Dh, 0E0h, 0D7h, 0C2h, 02Eh, 04Bh, 0FEh, 057h + byte 015h, 077h, 037h, 0E5h, 09Fh, 0F0h, 04Ah, 0DAh, 058h, 0C9h, 029h, 00Ah, 0B1h, 0A0h, 06Bh, 085h + byte 0BDh, 05Dh, 010h, 0F4h, 0CBh, 03Eh, 005h, 067h, 0E4h, 027h, 041h, 08Bh, 0A7h, 07Dh, 095h, 0D8h + byte 0FBh, 0EEh, 07Ch, 066h, 0DDh, 017h, 047h, 09Eh, 0CAh, 02Dh, 0BFh, 007h, 0ADh, 05Ah, 083h, 033h + byte 063h, 002h, 0AAh, 071h, 0C8h, 019h, 049h, 0D9h, 0F2h, 0E3h, 05Bh, 088h, 09Ah, 026h, 032h, 0B0h + byte 0E9h, 00Fh, 0D5h, 080h, 0BEh, 0CDh, 034h, 048h, 0FFh, 07Ah, 090h, 05Fh, 020h, 068h, 01Ah, 0AEh + byte 0B4h, 054h, 093h, 022h, 064h, 0F1h, 073h, 012h, 040h, 008h, 0C3h, 0ECh, 0DBh, 0A1h, 08Dh, 03Dh + byte 097h, 000h, 0CFh, 02Bh, 076h, 082h, 0D6h, 01Bh, 0B5h, 0AFh, 06Ah, 050h, 045h, 0F3h, 030h, 0EFh + byte 03Fh, 055h, 0A2h, 0EAh, 065h, 0BAh, 02Fh, 0C0h, 0DEh, 01Ch, 0FDh, 04Dh, 092h, 075h, 006h, 08Ah + byte 0B2h, 0E6h, 00Eh, 01Fh, 062h, 0D4h, 0A8h, 096h, 0F9h, 0C5h, 025h, 059h, 084h, 072h, 039h, 04Ch + byte 05Eh, 078h, 038h, 08Ch, 0D1h, 0A5h, 0E2h, 061h, 0B3h, 021h, 09Ch, 01Eh, 043h, 0C7h, 0FCh, 004h + byte 051h, 099h, 06Dh, 00Dh, 0FAh, 0DFh, 07Eh, 024h, 03Bh, 0ABh, 0CEh, 011h, 08Fh, 04Eh, 0B7h, 0EBh + byte 03Ch, 081h, 094h, 0F7h, 0B9h, 013h, 02Ch, 0D3h, 0E7h, 06Eh, 0C4h, 003h, 056h, 044h, 07Fh, 0A9h + byte 02Ah, 0BBh, 0C1h, 053h, 0DCh, 00Bh, 09Dh, 06Ch, 031h, 074h, 0F6h, 046h, 0ACh, 089h, 014h, 0E1h + byte 016h, 03Ah, 069h, 009h, 070h, 0B6h, 0D0h, 0EDh, 0CCh, 042h, 098h, 0A4h, 028h, 05Ch, 0F8h, 086h + +; The combined effect of gamma (SubBytes) and theta (MixRows) +magictable0 qword 0D83078C018601818h, 02646AF05238C2323h, 0B891F97EC63FC6C6h, 0FBCD6F13E887E8E8h, 0CB13A14C87268787h, 0116D62A9B8DAB8B8h, 00902050801040101h, 00D9E6E424F214F4Fh + qword 09B6CEEAD36D83636h, 0FF510459A6A2A6A6h, 00CB9BDDED26FD2D2h, 00EF706FBF5F3F5F5h, 096F280EF79F97979h, 030DECE5F6FA16F6Fh, 06D3FEFFC917E9191h, 0F8A407AA52555252h + qword 047C0FD27609D6060h, 035657689BCCABCBCh, 0372BCDAC9B569B9Bh, 08A018C048E028E8Eh, 0D25B1571A3B6A3A3h, 06C183C600C300C0Ch, 084F68AFF7BF17B7Bh, 0806AE1B535D43535h + qword 0F53A69E81D741D1Dh, 0B3DD4753E0A7E0E0h, 021B3ACF6D77BD7D7h, 09C99ED5EC22FC2C2h, 0435C966D2EB82E2Eh, 029967A624B314B4Bh, 05DE121A3FEDFFEFEh, 0D5AE168257415757h + qword 0BD2A41A815541515h, 0E8EEB69F77C17777h, 0926EEBA537DC3737h, 09ED7567BE5B3E5E5h, 01323D98C9F469F9Fh, 023FD17D3F0E7F0F0h, 020947F6A4A354A4Ah, 044A9959EDA4FDADAh + qword 0A2B025FA587D5858h, 0CF8FCA06C903C9C9h, 07C528D5529A42929h, 05A1422500A280A0Ah, 0507F4FE1B1FEB1B1h, 0C95D1A69A0BAA0A0h, 014D6DA7F6BB16B6Bh, 0D917AB5C852E8585h + qword 03C677381BDCEBDBDh, 08FBA34D25D695D5Dh, 09020508010401010h, 007F503F3F4F7F4F4h, 0DD8BC016CB0BCBCBh, 0D37CC6ED3EF83E3Eh, 02D0A112805140505h, 078CEE61F67816767h + qword 097D55373E4B7E4E4h, 0024EBB25279C2727h, 07382583241194141h, 0A70B9D2C8B168B8Bh, 0F6530151A7A6A7A7h, 0B2FA94CF7DE97D7Dh, 04937FBDC956E9595h, 056AD9F8ED847D8D8h + qword 070EB308BFBCBFBFBh, 0CDC17123EE9FEEEEh, 0BBF891C77CED7C7Ch, 071CCE31766856666h, 07BA78EA6DD53DDDDh, 0AF2E4BB8175C1717h, 0458E460247014747h, 01A21DC849E429E9Eh + qword 0D489C51ECA0FCACAh, 0585A99752DB42D2Dh, 02E637991BFC6BFBFh, 03F0E1B38071C0707h, 0AC472301AD8EADADh, 0B0B42FEA5A755A5Ah, 0EF1BB56C83368383h, 0B666FF8533CC3333h + qword 05CC6F23F63916363h, 012040A1002080202h, 093493839AA92AAAAh, 0DEE2A8AF71D97171h, 0C68DCF0EC807C8C8h, 0D1327DC819641919h, 03B92707249394949h, 05FAF9A86D943D9D9h + qword 031F91DC3F2EFF2F2h, 0A8DB484BE3ABE3E3h, 0B9B62AE25B715B5Bh, 0BC0D9234881A8888h, 03E29C8A49A529A9Ah, 00B4CBE2D26982626h, 0BF64FA8D32C83232h, 0597D4AE9B0FAB0B0h + qword 0F2CF6A1BE983E9E9h, 0771E33780F3C0F0Fh, 033B7A6E6D573D5D5h, 0F41DBA74803A8080h, 027617C99BEC2BEBEh, 0EB87DE26CD13CDCDh, 08968E4BD34D03434h, 03290757A483D4848h + qword 054E324ABFFDBFFFFh, 08DF48FF77AF57A7Ah, 0643DEAF4907A9090h, 09DBE3EC25F615F5Fh, 03D40A01D20802020h, 00FD0D56768BD6868h, 0CA3472D01A681A1Ah, 0B7412C19AE82AEAEh + qword 07D755EC9B4EAB4B4h, 0CEA8199A544D5454h, 07F3BE5EC93769393h, 02F44AA0D22882222h, 063C8E907648D6464h, 02AFF12DBF1E3F1F1h, 0CCE6A2BF73D17373h, 082245A9012481212h + qword 07A805D3A401D4040h, 04810284008200808h, 0959BE856C32BC3C3h, 0DFC57B33EC97ECECh, 04DAB9096DB4BDBDBh, 0C05F1F61A1BEA1A1h, 09107831C8D0E8D8Dh, 0C87AC9F53DF43D3Dh + qword 05B33F1CC97669797h, 00000000000000000h, 0F983D436CF1BCFCFh, 06E5687452BAC2B2Bh, 0E1ECB39776C57676h, 0E619B06482328282h, 028B1A9FED67FD6D6h, 0C33677D81B6C1B1Bh + qword 074775BC1B5EEB5B5h, 0BE432911AF86AFAFh, 01DD4DF776AB56A6Ah, 0EAA00DBA505D5050h, 0578A4C1245094545h, 038FB18CBF3EBF3F3h, 0AD60F09D30C03030h, 0C4C3742BEF9BEFEFh + qword 0DA7EC3E53FFC3F3Fh, 0C7AA1C9255495555h, 0DB591079A2B2A2A2h, 0E9C96503EA8FEAEAh, 06ACAEC0F65896565h, 0036968B9BAD2BABAh, 04A5E93652FBC2F2Fh, 08E9DE74EC027C0C0h + qword 060A181BEDE5FDEDEh, 0FC386CE01C701C1Ch, 046E72EBBFDD3FDFDh, 01F9A64524D294D4Dh, 07639E0E492729292h, 0FAEABC8F75C97575h, 0360C1E3006180606h, 0AE0998248A128A8Ah + qword 04B7940F9B2F2B2B2h, 085D15963E6BFE6E6h, 07E1C36700E380E0Eh, 0E73E63F81F7C1F1Fh, 055C4F73762956262h, 03AB5A3EED477D4D4h, 0814D3229A89AA8A8h, 05231F4C496629696h + qword 062EF3A9BF9C3F9F9h, 0A397F666C533C5C5h, 0104AB13525942525h, 0ABB220F259795959h, 0D015AE54842A8484h, 0C5E4A7B772D57272h, 0EC72DDD539E43939h, 01698615A4C2D4C4Ch + qword 094BC3BCA5E655E5Eh, 09FF085E778FD7878h, 0E570D8DD38E03838h, 0980586148C0A8C8Ch, 017BFB2C6D163D1D1h, 0E4570B41A5AEA5A5h, 0A1D94D43E2AFE2E2h, 04EC2F82F61996161h + qword 0427B45F1B3F6B3B3h, 03442A51521842121h, 00825D6949C4A9C9Ch, 0EE3C66F01E781E1Eh, 06186522243114343h, 0B193FC76C73BC7C7h, 04FE52BB3FCD7FCFCh, 02408142004100404h + qword 0E3A208B251595151h, 0252FC7BC995E9999h, 022DAC44F6DA96D6Dh, 0651A39680D340D0Dh, 079E93583FACFFAFAh, 069A384B6DF5BDFDFh, 0A9FC9BD77EE57E7Eh, 01948B43D24902424h + qword 0FE76D7C53BEC3B3Bh, 09A4B3D31AB96ABABh, 0F081D13ECE1FCECEh, 09922558811441111h, 08303890C8F068F8Fh, 0049C6B4A4E254E4Eh, 0667351D1B7E6B7B7h, 0E0CB600BEB8BEBEBh + qword 0C178CCFD3CF03C3Ch, 0FD1FBF7C813E8181h, 04035FED4946A9494h, 01CF30CEBF7FBF7F7h, 0186F67A1B9DEB9B9h, 08B265F98134C1313h, 051589C7D2CB02C2Ch, 005BBB8D6D36BD3D3h + qword 08CD35C6BE7BBE7E7h, 039DCCB576EA56E6Eh, 0AA95F36EC437C4C4h, 01B060F18030C0303h, 0DCAC138A56455656h, 05E88491A440D4444h, 0A0FE9EDF7FE17F7Fh, 0884F3721A99EA9A9h + qword 06754824D2AA82A2Ah, 00A6B6DB1BBD6BBBBh, 0879FE246C123C1C1h, 0F1A602A253515353h, 072A58BAEDC57DCDCh, 0531627580B2C0B0Bh, 00127D39C9D4E9D9Dh, 02BD8C1476CAD6C6Ch + qword 0A462F59531C43131h, 0F3E8B98774CD7474h, 015F109E3F6FFF6F6h, 04C8C430A46054646h, 0A5452609AC8AACACh, 0B50F973C891E8989h, 0B42844A014501414h, 0BADF425BE1A3E1E1h + qword 0A62C4EB016581616h, 0F774D2CD3AE83A3Ah, 006D2D06F69B96969h, 041122D4809240909h, 0D7E0ADA770DD7070h, 06F7154D9B6E2B6B6h, 01EBDB7CED067D0D0h, 0D6C77E3BED93EDEDh + qword 0E285DB2ECC17CCCCh, 06884572A42154242h, 02C2DC2B4985A9898h, 0ED550E49A4AAA4A4h, 07550885D28A02828h, 086B831DA5C6D5C5Ch, 06BED3F93F8C7F8F8h, 0C211A44486228686h + +; Same table but rotated by 1 byte +magictable1 qword 03078C018601818D8h, 046AF05238C232326h, 091F97EC63FC6C6B8h, 0CD6F13E887E8E8FBh, 013A14C87268787CBh, 06D62A9B8DAB8B811h, 00205080104010109h, 09E6E424F214F4F0Dh + qword 06CEEAD36D836369Bh, 0510459A6A2A6A6FFh, 0B9BDDED26FD2D20Ch, 0F706FBF5F3F5F50Eh, 0F280EF79F9797996h, 0DECE5F6FA16F6F30h, 03FEFFC917E91916Dh, 0A407AA52555252F8h + qword 0C0FD27609D606047h, 0657689BCCABCBC35h, 02BCDAC9B569B9B37h, 0018C048E028E8E8Ah, 05B1571A3B6A3A3D2h, 0183C600C300C0C6Ch, 0F68AFF7BF17B7B84h, 06AE1B535D4353580h + qword 03A69E81D741D1DF5h, 0DD4753E0A7E0E0B3h, 0B3ACF6D77BD7D721h, 099ED5EC22FC2C29Ch, 05C966D2EB82E2E43h, 0967A624B314B4B29h, 0E121A3FEDFFEFE5Dh, 0AE168257415757D5h + qword 02A41A815541515BDh, 0EEB69F77C17777E8h, 06EEBA537DC373792h, 0D7567BE5B3E5E59Eh, 023D98C9F469F9F13h, 0FD17D3F0E7F0F023h, 0947F6A4A354A4A20h, 0A9959EDA4FDADA44h + qword 0B025FA587D5858A2h, 08FCA06C903C9C9CFh, 0528D5529A429297Ch, 01422500A280A0A5Ah, 07F4FE1B1FEB1B150h, 05D1A69A0BAA0A0C9h, 0D6DA7F6BB16B6B14h, 017AB5C852E8585D9h + qword 0677381BDCEBDBD3Ch, 0BA34D25D695D5D8Fh, 02050801040101090h, 0F503F3F4F7F4F407h, 08BC016CB0BCBCBDDh, 07CC6ED3EF83E3ED3h, 00A1128051405052Dh, 0CEE61F6781676778h + qword 0D55373E4B7E4E497h, 04EBB25279C272702h, 08258324119414173h, 00B9D2C8B168B8BA7h, 0530151A7A6A7A7F6h, 0FA94CF7DE97D7DB2h, 037FBDC956E959549h, 0AD9F8ED847D8D856h + qword 0EB308BFBCBFBFB70h, 0C17123EE9FEEEECDh, 0F891C77CED7C7CBBh, 0CCE3176685666671h, 0A78EA6DD53DDDD7Bh, 02E4BB8175C1717AFh, 08E46024701474745h, 021DC849E429E9E1Ah + qword 089C51ECA0FCACAD4h, 05A99752DB42D2D58h, 0637991BFC6BFBF2Eh, 00E1B38071C07073Fh, 0472301AD8EADADACh, 0B42FEA5A755A5AB0h, 01BB56C83368383EFh, 066FF8533CC3333B6h + qword 0C6F23F639163635Ch, 0040A100208020212h, 0493839AA92AAAA93h, 0E2A8AF71D97171DEh, 08DCF0EC807C8C8C6h, 0327DC819641919D1h, 0927072493949493Bh, 0AF9A86D943D9D95Fh + qword 0F91DC3F2EFF2F231h, 0DB484BE3ABE3E3A8h, 0B62AE25B715B5BB9h, 00D9234881A8888BCh, 029C8A49A529A9A3Eh, 04CBE2D269826260Bh, 064FA8D32C83232BFh, 07D4AE9B0FAB0B059h + qword 0CF6A1BE983E9E9F2h, 01E33780F3C0F0F77h, 0B7A6E6D573D5D533h, 01DBA74803A8080F4h, 0617C99BEC2BEBE27h, 087DE26CD13CDCDEBh, 068E4BD34D0343489h, 090757A483D484832h + qword 0E324ABFFDBFFFF54h, 0F48FF77AF57A7A8Dh, 03DEAF4907A909064h, 0BE3EC25F615F5F9Dh, 040A01D208020203Dh, 0D0D56768BD68680Fh, 03472D01A681A1ACAh, 0412C19AE82AEAEB7h + qword 0755EC9B4EAB4B47Dh, 0A8199A544D5454CEh, 03BE5EC937693937Fh, 044AA0D228822222Fh, 0C8E907648D646463h, 0FF12DBF1E3F1F12Ah, 0E6A2BF73D17373CCh, 0245A901248121282h + qword 0805D3A401D40407Ah, 01028400820080848h, 09BE856C32BC3C395h, 0C57B33EC97ECECDFh, 0AB9096DB4BDBDB4Dh, 05F1F61A1BEA1A1C0h, 007831C8D0E8D8D91h, 07AC9F53DF43D3DC8h + qword 033F1CC976697975Bh, 00000000000000000h, 083D436CF1BCFCFF9h, 05687452BAC2B2B6Eh, 0ECB39776C57676E1h, 019B06482328282E6h, 0B1A9FED67FD6D628h, 03677D81B6C1B1BC3h + qword 0775BC1B5EEB5B574h, 0432911AF86AFAFBEh, 0D4DF776AB56A6A1Dh, 0A00DBA505D5050EAh, 08A4C124509454557h, 0FB18CBF3EBF3F338h, 060F09D30C03030ADh, 0C3742BEF9BEFEFC4h + qword 07EC3E53FFC3F3FDAh, 0AA1C9255495555C7h, 0591079A2B2A2A2DBh, 0C96503EA8FEAEAE9h, 0CAEC0F658965656Ah, 06968B9BAD2BABA03h, 05E93652FBC2F2F4Ah, 09DE74EC027C0C08Eh + qword 0A181BEDE5FDEDE60h, 0386CE01C701C1CFCh, 0E72EBBFDD3FDFD46h, 09A64524D294D4D1Fh, 039E0E49272929276h, 0EABC8F75C97575FAh, 00C1E300618060636h, 00998248A128A8AAEh + qword 07940F9B2F2B2B24Bh, 0D15963E6BFE6E685h, 01C36700E380E0E7Eh, 03E63F81F7C1F1FE7h, 0C4F7376295626255h, 0B5A3EED477D4D43Ah, 04D3229A89AA8A881h, 031F4C49662969652h + qword 0EF3A9BF9C3F9F962h, 097F666C533C5C5A3h, 04AB1352594252510h, 0B220F259795959ABh, 015AE54842A8484D0h, 0E4A7B772D57272C5h, 072DDD539E43939ECh, 098615A4C2D4C4C16h + qword 0BC3BCA5E655E5E94h, 0F085E778FD78789Fh, 070D8DD38E03838E5h, 00586148C0A8C8C98h, 0BFB2C6D163D1D117h, 0570B41A5AEA5A5E4h, 0D94D43E2AFE2E2A1h, 0C2F82F619961614Eh + qword 07B45F1B3F6B3B342h, 042A5152184212134h, 025D6949C4A9C9C08h, 03C66F01E781E1EEEh, 08652224311434361h, 093FC76C73BC7C7B1h, 0E52BB3FCD7FCFC4Fh, 00814200410040424h + qword 0A208B251595151E3h, 02FC7BC995E999925h, 0DAC44F6DA96D6D22h, 01A39680D340D0D65h, 0E93583FACFFAFA79h, 0A384B6DF5BDFDF69h, 0FC9BD77EE57E7EA9h, 048B43D2490242419h + qword 076D7C53BEC3B3BFEh, 04B3D31AB96ABAB9Ah, 081D13ECE1FCECEF0h, 02255881144111199h, 003890C8F068F8F83h, 09C6B4A4E254E4E04h, 07351D1B7E6B7B766h, 0CB600BEB8BEBEBE0h + qword 078CCFD3CF03C3CC1h, 01FBF7C813E8181FDh, 035FED4946A949440h, 0F30CEBF7FBF7F71Ch, 06F67A1B9DEB9B918h, 0265F98134C13138Bh, 0589C7D2CB02C2C51h, 0BBB8D6D36BD3D305h + qword 0D35C6BE7BBE7E78Ch, 0DCCB576EA56E6E39h, 095F36EC437C4C4AAh, 0060F18030C03031Bh, 0AC138A56455656DCh, 088491A440D44445Eh, 0FE9EDF7FE17F7FA0h, 04F3721A99EA9A988h + qword 054824D2AA82A2A67h, 06B6DB1BBD6BBBB0Ah, 09FE246C123C1C187h, 0A602A253515353F1h, 0A58BAEDC57DCDC72h, 01627580B2C0B0B53h, 027D39C9D4E9D9D01h, 0D8C1476CAD6C6C2Bh + qword 062F59531C43131A4h, 0E8B98774CD7474F3h, 0F109E3F6FFF6F615h, 08C430A460546464Ch, 0452609AC8AACACA5h, 00F973C891E8989B5h, 02844A014501414B4h, 0DF425BE1A3E1E1BAh + qword 02C4EB016581616A6h, 074D2CD3AE83A3AF7h, 0D2D06F69B9696906h, 0122D480924090941h, 0E0ADA770DD7070D7h, 07154D9B6E2B6B66Fh, 0BDB7CED067D0D01Eh, 0C77E3BED93EDEDD6h + qword 085DB2ECC17CCCCE2h, 084572A4215424268h, 02DC2B4985A98982Ch, 0550E49A4AAA4A4EDh, 050885D28A0282875h, 0B831DA5C6D5C5C86h, 0ED3F93F8C7F8F86Bh, 011A44486228686C2h + +DOBYTEPAIRFIRST macro inreg, offset, outreg0, outreg1 + pextrw eax, inreg, offset + movzx ebx, ah + and eax, 0FFh + lea rdi, magictable0 + mov outreg0, [rdi + rax*8] + lea rdi, magictable1 + mov outreg1, [rdi + rbx*8] + endm + +DOBYTEPAIR macro inreg, offset, outreg0, outreg1 + pextrw eax, inreg, offset + movzx ebx, ah + and eax, 0FFh + lea rdi, magictable0 + xor outreg0, [rdi + rax*8] + lea rdi, magictable1 + xor outreg1, [rdi + rbx*8] + endm + +ROTATERIGHT macro + ror r8, 16 + ror r9, 16 + ror r10, 16 + ror r11, 16 + ror r12, 16 + ror r13, 16 + ror r14, 16 + ror r15, 16 + endm + +XORSTATETOBLOCK macro ; Used for sigma (AddRoundKey) + xorpd xmm4, xmm0 + xorpd xmm5, xmm1 + xorpd xmm6, xmm2 + xorpd xmm7, xmm3 + endm + + .code + ; void whirlpool_compress(const uint64_t block[8], uint8_t state[64]) + public whirlpool_compress +whirlpool_compress proc + ; Save nonvolatile registers, allocate scratch space + push rbx + push rdi + push rsi + push r12 + push r13 + push r14 + push r15 + sub rsp, 30h + movdqu [rsp + 10h], xmm6 + movdqu [rsp + 20h], xmm7 + + ; Load state into XMM + movdqu xmm0, [rdx] + movdqu xmm1, [rdx + 16] + movdqu xmm2, [rdx + 32] + movdqu xmm3, [rdx + 48] + + ; Load block into XMM + movdqu xmm4, [rcx] + movdqu xmm5, [rcx + 16] + movdqu xmm6, [rcx + 32] + movdqu xmm7, [rcx + 48] + + ; XOR block with state + XORSTATETOBLOCK + + ; 10 rounds of hashing + mov esi, 0 + + ; Process all 64 state bytes +looptop: DOBYTEPAIRFIRST xmm0, 0, r8 , r9 + DOBYTEPAIRFIRST xmm1, 0, r10, r11 + DOBYTEPAIRFIRST xmm2, 0, r12, r13 + DOBYTEPAIRFIRST xmm3, 0, r14, r15 + DOBYTEPAIR xmm0, 4, r9 , r10 + DOBYTEPAIR xmm1, 4, r11, r12 + DOBYTEPAIR xmm2, 4, r13, r14 + DOBYTEPAIR xmm3, 4, r15, r8 + ROTATERIGHT + DOBYTEPAIR xmm3, 1, r8 , r9 + DOBYTEPAIR xmm0, 1, r10, r11 + DOBYTEPAIR xmm1, 1, r12, r13 + DOBYTEPAIR xmm2, 1, r14, r15 + DOBYTEPAIR xmm3, 5, r9 , r10 + DOBYTEPAIR xmm0, 5, r11, r12 + DOBYTEPAIR xmm1, 5, r13, r14 + DOBYTEPAIR xmm2, 5, r15, r8 + ROTATERIGHT + DOBYTEPAIR xmm2, 2, r8 , r9 + DOBYTEPAIR xmm3, 2, r10, r11 + DOBYTEPAIR xmm0, 2, r12, r13 + DOBYTEPAIR xmm1, 2, r14, r15 + DOBYTEPAIR xmm2, 6, r9 , r10 + DOBYTEPAIR xmm3, 6, r11, r12 + DOBYTEPAIR xmm0, 6, r13, r14 + DOBYTEPAIR xmm1, 6, r15, r8 + ROTATERIGHT + DOBYTEPAIR xmm1, 3, r8 , r9 + DOBYTEPAIR xmm2, 3, r10, r11 + DOBYTEPAIR xmm3, 3, r12, r13 + DOBYTEPAIR xmm0, 3, r14, r15 + DOBYTEPAIR xmm1, 7, r9 , r10 + DOBYTEPAIR xmm2, 7, r11, r12 + DOBYTEPAIR xmm3, 7, r13, r14 + DOBYTEPAIR xmm0, 7, r15, r8 + ROTATERIGHT + lea rax, roundconstants + xor r8, [rax + rsi*8] ; Add round constant + + ; Copy state back to XMM + mov [rsp], r15 + movq xmm0, r8 + movq xmm1, r9 + shufpd xmm0, xmm1, 0 + movq xmm1, r10 + movq xmm2, r11 + shufpd xmm1, xmm2, 0 + movq xmm2, r12 + movq xmm3, r13 + shufpd xmm2, xmm3, 0 + movq xmm3, r14 + movhps xmm3, qword ptr [rsp] + + ; Process all 64 block bytes + DOBYTEPAIRFIRST xmm4, 0, r8 , r9 + DOBYTEPAIRFIRST xmm5, 0, r10, r11 + DOBYTEPAIRFIRST xmm6, 0, r12, r13 + DOBYTEPAIRFIRST xmm7, 0, r14, r15 + DOBYTEPAIR xmm4, 4, r9 , r10 + DOBYTEPAIR xmm5, 4, r11, r12 + DOBYTEPAIR xmm6, 4, r13, r14 + DOBYTEPAIR xmm7, 4, r15, r8 + ROTATERIGHT + DOBYTEPAIR xmm7, 1, r8 , r9 + DOBYTEPAIR xmm4, 1, r10, r11 + DOBYTEPAIR xmm5, 1, r12, r13 + DOBYTEPAIR xmm6, 1, r14, r15 + DOBYTEPAIR xmm7, 5, r9 , r10 + DOBYTEPAIR xmm4, 5, r11, r12 + DOBYTEPAIR xmm5, 5, r13, r14 + DOBYTEPAIR xmm6, 5, r15, r8 + ROTATERIGHT + DOBYTEPAIR xmm6, 2, r8 , r9 + DOBYTEPAIR xmm7, 2, r10, r11 + DOBYTEPAIR xmm4, 2, r12, r13 + DOBYTEPAIR xmm5, 2, r14, r15 + DOBYTEPAIR xmm6, 6, r9 , r10 + DOBYTEPAIR xmm7, 6, r11, r12 + DOBYTEPAIR xmm4, 6, r13, r14 + DOBYTEPAIR xmm5, 6, r15, r8 + ROTATERIGHT + DOBYTEPAIR xmm5, 3, r8 , r9 + DOBYTEPAIR xmm6, 3, r10, r11 + DOBYTEPAIR xmm7, 3, r12, r13 + DOBYTEPAIR xmm4, 3, r14, r15 + DOBYTEPAIR xmm5, 7, r9 , r10 + DOBYTEPAIR xmm6, 7, r11, r12 + DOBYTEPAIR xmm7, 7, r13, r14 + DOBYTEPAIR xmm4, 7, r15, r8 + ROTATERIGHT + + ; Copy block back to XMM + mov [rsp], r15 + movq xmm4, r8 + movq xmm5, r9 + shufpd xmm4, xmm5, 0 + movq xmm5, r10 + movq xmm6, r11 + shufpd xmm5, xmm6, 0 + movq xmm6, r12 + movq xmm7, r13 + shufpd xmm6, xmm7, 0 + movq xmm7, r14 + movhps xmm7, qword ptr [rsp] + + ; Add state to block + XORSTATETOBLOCK + + ; Loop back */ + inc esi + cmp esi, NUM_ROUNDS + jne looptop + + ; XOR old state (in memory) with old block (in memory) and new block (in XMM) + movdqu xmm0, [rdx] ; Load old state + movdqu xmm1, [rdx + 16] + movdqu xmm2, [rdx + 32] + movdqu xmm3, [rdx + 48] + XORSTATETOBLOCK ; XOR into new block + movdqu xmm0, [rcx] ; Load old block + movdqu xmm1, [rcx + 16] + movdqu xmm2, [rcx + 32] + movdqu xmm3, [rcx + 48] + XORSTATETOBLOCK ; XOR into new block + movdqu [rdx] , xmm4 ; Store new state + movdqu [rdx + 16], xmm5 + movdqu [rdx + 32], xmm6 + movdqu [rdx + 48], xmm7 + + ; Restore nonvolatile registers + movdqu xmm7, [rsp + 20h] + movdqu xmm6, [rsp + 10h] + add rsp, 30h + pop r15 + pop r14 + pop r13 + pop r12 + pop rsi + pop rdi + pop rbx + ret +whirlpool_compress endp + end