From 06bca112a2577f5bb6dd25234e2e20127527b5b4 Mon Sep 17 00:00:00 2001 From: Mamy Ratsimbazafy Date: Sun, 22 Dec 2024 14:46:50 +0100 Subject: [PATCH] keccak: initial implementation of keccak256 and sha3-256 [skip ci] --- constantine/ciphers/chacha20.nim | 10 +- constantine/hashes/h_keccak.nim | 275 ++++++++++++++++ constantine/hashes/h_sha256.nim | 4 +- constantine/hashes/keccak/keccak_generic.nim | 330 +++++++++++++++++++ constantine/hashes/sha256/sha256_generic.nim | 21 +- constantine/serialization/endians.nim | 89 ++--- 6 files changed, 668 insertions(+), 61 deletions(-) create mode 100644 constantine/hashes/h_keccak.nim create mode 100644 constantine/hashes/keccak/keccak_generic.nim diff --git a/constantine/ciphers/chacha20.nim b/constantine/ciphers/chacha20.nim index 42600799b..e8d487da7 100644 --- a/constantine/ciphers/chacha20.nim +++ b/constantine/ciphers/chacha20.nim @@ -106,12 +106,14 @@ func chacha20_cipher*( var keyU{.noInit.}: array[8, uint32] var nonceU{.noInit.}: array[3, uint32] - var pos = 0'u + var pos = 0 for i in 0 ..< 8: - keyU[i].parseFromBlob(key, pos, littleEndian) - pos = 0'u + keyU[i] = uint32.fromBytes(key, pos, littleEndian) + pos += sizeof(uint32) + pos = 0 for i in 0 ..< 3: - nonceU[i].parseFromBlob(nonce, pos, littleEndian) + nonceU[i] = uint32.fromBytes(nonce, pos, littleEndian) + pos += sizeof(uint32) var counter = counter var eaten = 0 diff --git a/constantine/hashes/h_keccak.nim b/constantine/hashes/h_keccak.nim new file mode 100644 index 000000000..3c3312cc4 --- /dev/null +++ b/constantine/hashes/h_keccak.nim @@ -0,0 +1,275 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import ../zoo_exports + +import + ../platforms/[abstractions, views], + ./keccak/keccak_generic + +# Keccak, the hash function underlying SHA3 +# -------------------------------------------------------------------------------- +# +# References: +# - https://keccak.team/keccak_specs_summary.html +# - https://keccak.team/files/Keccak-reference-3.0.pdf +# - https://keccak.team/files/Keccak-implementation-3.2.pdf +# - SHA3 (different padding): https://csrc.nist.gov/publications/detail/fips/202/final +# - https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf + +# Sponge API +# -------------------------------------------------------------------------------- +# +# References: +# - https://keccak.team/keccak_specs_summary.html +# - https://keccak.team/files/SpongeFunctions.pdf +# - https://keccak.team/files/CSF-0.1.pdf +# +# Keccak[r,c](Mbytes || Mbits) { +# # Padding +# d = 2^|Mbits| + sum for i=0..|Mbits|-1 of 2^i*Mbits[i] +# P = Mbytes || d || 0x00 || … || 0x00 +# P = P xor (0x00 || … || 0x00 || 0x80) +# +# # Initialization +# S[x,y] = 0, for (x,y) in (0…4,0…4) +# +# # Absorbing phase +# for each block Pi in P +# S[x,y] = S[x,y] xor Pi[x+5*y], for (x,y) such that x+5*y < r/w +# S = Keccak-f[r+c](S) +# +# # Squeezing phase +# Z = empty string +# while output is requested +# Z = Z || S[x,y], for (x,y) such that x+5*y < r/w +# S = Keccak-f[r+c](S) +# +# return Z +# } + +# Duplex construction +# -------------------------------------------------------- +# - https://keccak.team/sponge_duplex.html +# - https://keccak.team/files/SpongeDuplex.pdf +# - https://eprint.iacr.org/2011/499.pdf: Duplexing the Sponge +# - https://eprint.iacr.org/2023/522.pdf: SAFE - Sponge API for Field Element +# - https://hackmd.io/@7dpNYqjKQGeYC7wMlPxHtQ/ByIbpfX9c +# +# The original duplex construction described by the Keccak team +# is "absorb-permute-squeeze" +# Paper https://eprint.iacr.org/2022/1340.pdf +# goes over other approaches. +# +# We follow the original intent: +# - permute required when transitioning between absorb->squeeze +# - no permute required when transitioning between squeeze->absorb +# This may change depending on protocol requirement. +# This is inline with the SAFE (Sponge API for FIeld Element) approach + +# Types and constants +# ---------------------------------------------------------------- + +type + KeccakContext*[bits: static int, delimiter: static byte] = object + + # Context description + # - `state` is the permutation state, it is update only + # prior to a permutation + # - `buf` is a message buffer to store partial state updates + # - `absorb_offset` tracks how filled the message buffer is + # - `squeeze_offset` tracks the write position in the output buffer + # + # Subtilities: + # Duplex construction requires a state permutation when + # transitioning between absorb and squeezing phase. + # After an absorb, squeeze_offset is incremented by the sponge `rate` + # This signals the need of a permutation before squeeze. + # Similarly after a squeeze, absorb_offset is incremented by the sponge rate. + # The real offset can be recovered with a substraction + # to properly update the state. + + H {.align: 64.}: KeccakState + buf {.align: 64.}: array[bits div 8, byte] + absorb_offset: int32 + squeeze_offset: int32 + + keccak256* = KeccakContext[256, 0x01] + sha3_256* = KeccakContext[256, 0x06] + +template rate(ctx: KeccakContext): int = + 200 - 2*(ctx.bits div 8) + +# Internals +# ---------------------------------------------------------------- + +# No exceptions allowed in core cryptographic operations +{.push raises: [].} +{.push checks: off.} + +func absorbBuffer(ctx: var KeccakContext) {.inline.} = + ctx.H.hashMessageBlocks_generic(ctx.buf.asUnchecked(), numBlocks = 1) + ctx.buf.setZero() + # Note: in certain case like authenticated encryption + # we might want to absorb at the same position that have been squeezed + # hence we don't reset the absorb_offset to 0 + # The buf is zeroed which is the neutral element for xor. + +# Public API +# ---------------------------------------------------------------- + +template digestSize*(H: type KeccakContext): int = + ## Returns the output size in bytes + KeccakContext.bits shr 3 + +template internalBlockSize*(H: type KeccakContext): int = + ## Returns the byte size of the hash function ingested blocks + 2 * (KeccakContext.bits shr 3) + +func init*(ctx: var KeccakContext) {.inline.} = + ## Initialize or reinitialize a Keccak context + ctx.reset() + +func absorb*(ctx: var KeccakContext, message: openArray[byte]) = + ## Absorb a message in the Keccak sponge state + ## + ## Security note: the tail of your message might be stored + ## in an internal buffer. + ## if sensitive content is used, ensure that + ## `ctx.finish(...)` and `ctx.clear()` are called as soon as possible. + ## Additionally ensure that the message(s) passed were stored + ## in memory considered secure for your threat model. + + if message.len == 0: + return + + var pos = int ctx.absorb_offset + var cur = 0 + var bytesLeft = message.len + + # We follow the "absorb-permute-squeeze" approach + # originally defined by the Keccak team. + # It is compatible with SHA-3 hash spec. + # See https://eprint.iacr.org/2022/1340.pdf + # + # There are no transition/permutation between squeezing -> absorbing + # And within this `absorb` function + # the state pos == ctx.rate() + # is always followed by a permute and setting `pos = 0` + + if (pos mod ctx.rate()) != 0 and pos+bytesLeft >= ctx.rate(): + # Previous partial update, fill the state and do one permutation + let free = ctx.rate() - pos + ctx.buf.rawCopy(dStart = pos, message, sStart = 0, len = free) + ctx.absorbBuffer() + pos = 0 + cur = free + bytesLeft -= free + + if bytesLeft >= ctx.rate(): + # Process multiple blocks + let numBlocks = bytesLeft div ctx.rate() + ctx.H.hashMessageBlocks_generic(message.asUnchecked() +% cur, numBlocks) + cur += numBlocks * ctx.rate() + bytesLeft -= numBlocks * ctx.rate() + + if bytesLeft != 0: + # Store the tail in buffer + ctx.buf.rawCopy(dStart = pos, message, sStart = cur, len = bytesLeft) + + # Epilogue + ctx.absorb_offset = int32 bytesLeft + # Signal that the next squeeze transition needs a permute + ctx.squeeze_offset = int32 ctx.rate() + +func squeeze*(ctx: var KeccakContext, digest: var openArray[byte]) = + if digest.len == 0: + return + + var pos = ctx.squeeze_offset + var cur = 0 + var bytesLeft = digest.len + + if pos == ctx.rate(): + # Transition from absorbing to squeezing + # This state can only come from `absorb` function + # as within `squeeze`, pos == ctx.rate() is always followed + # by a permute and pos = 0 + ctx.H.xorInPartial(ctx.buf.toOpenArray(0, ctx.absorb_offset-1)) + ctx.H.pad(ctx.absorb_offset, ctx.delimiter, ctx.rate()) + ctx.H.permute_generic(NumRounds = 24) + pos = 0 + ctx.absorb_offset = 0 + + if (pos mod ctx.rate()) != 0 and pos+bytesLeft >= ctx.rate(): + # Previous partial squeeze, fill up to rate and do one permutation + let free = ctx.rate() - pos + ctx.H.copyOutPartial(hByteOffset = pos, digest.toOpenArray(0, free-1)) + ctx.H.permute_generic(NumRounds = 24) + pos = 0 + ctx.absorb_offset = 0 + cur = free + bytesLeft -= free + + if bytesLeft >= ctx.rate(): + # Process multiple blocks + let numBlocks = bytesLeft div ctx.rate() + ctx.H.squeezeDigestBlocks_generic(digest.asUnchecked() +% cur, numBlocks) + ctx.absorb_offset = 0 + cur += numBlocks * ctx.rate() + bytesLeft -= numBlocks * ctx.rate() + + if bytesLeft != 0: + # Output the tail + ctx.H.copyOutPartial(hByteOffset = pos, digest.toOpenArray(cur, bytesLeft-1)) + + # Epilogue + ctx.squeeze_offset = int32 bytesLeft + # We don't signal absorb_offset to permute the state if called next + # as per https://eprint.iacr.org/2023/522.pdf + # https://hackmd.io/@7dpNYqjKQGeYC7wMlPxHtQ/ByIbpfX9c#2-SAFE-definition + +func update*(ctx: var KeccakContext, message: openArray[byte]) = + ## Append a message to a Keccak context + ## for incremental Keccak computation + ## + ## Security note: the tail of your message might be stored + ## in an internal buffer. + ## if sensitive content is used, ensure that + ## `ctx.finish(...)` and `ctx.clear()` are called as soon as possible. + ## Additionally ensure that the message(s) passed was(were) stored + ## in memory considered secure for your threat model. + ctx.absorb(message) + +func finish*[N: static int](ctx: var KeccakContext, digest: var array[N, byte]) = + ## Finalize a Keccak computation and output the + ## message digest to the `digest` buffer + ## + ## Security note: this does not clear the internal buffer. + ## if sensitive content is used, use "ctx.clear()" + ## and also make sure that the message(s) passed were stored + ## in memory considered secure for your threat model. + ctx.squeeze(digest) + +func clear*(ctx: var KeccakContext) = + ## Clear the context internal buffers + # TODO: ensure compiler cannot optimize the code away + ctx.reset() + +when isMainModule: + import constantine/serialization/codecs + + var msg: array[32, byte] + var digest: array[32, byte] + var ctx: keccak256 + + ctx.init() + ctx.update(msg) + ctx.finish(digest) + + echo digest.toHex() \ No newline at end of file diff --git a/constantine/hashes/h_sha256.nim b/constantine/hashes/h_sha256.nim index 0f8b85807..ccd46da86 100644 --- a/constantine/hashes/h_sha256.nim +++ b/constantine/hashes/h_sha256.nim @@ -33,7 +33,7 @@ when UseASM_X86_32: type Sha256Context* = object - ## Align to 64 for cache line and SIMD friendliness + # Align to 64 for cache line and SIMD friendliness s{.align: 64}: Sha256_state buf{.align: 64}: array[BlockSize, byte] msgLen: uint64 @@ -130,7 +130,7 @@ func update*(ctx: var Sha256Context, message: openarray[byte]) {.libPrefix: pref ## in an internal buffer. ## if sensitive content is used, ensure that ## `ctx.finish(...)` and `ctx.clear()` are called as soon as possible. - ## Additionally ensure that the message(s) passed were stored + ## Additionally ensure that the message(s) passed was(were) stored ## in memory considered secure for your threat model. ## ## For passwords and secret keys, you MUST NOT use raw SHA-256 diff --git a/constantine/hashes/keccak/keccak_generic.nim b/constantine/hashes/keccak/keccak_generic.nim new file mode 100644 index 000000000..1835d5bd3 --- /dev/null +++ b/constantine/hashes/keccak/keccak_generic.nim @@ -0,0 +1,330 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + constantine/platforms/primitives, + constantine/serialization/endians + +# Keccak +# -------------------------------------------------------------------------------- +# +# References: +# - https://keccak.team/keccak_specs_summary.html +# - https://keccak.team/files/Keccak-reference-3.0.pdf +# - https://keccak.team/files/Keccak-implementation-3.2.pdf +# - SHA3 (different padding): https://csrc.nist.gov/publications/detail/fips/202/final +# +# Pseudo-code +# ~~~~~~~~~~~ +# Keccak-f[b](A) { +# for i in 0…n-1 +# A = Round[b](A, RC[i]) +# return A +# } +# +# Round[b](A,RC) { +# # θ step +# C[x] = A[x,0] xor A[x,1] xor A[x,2] xor A[x,3] xor A[x,4], for x in 0…4 +# D[x] = C[x-1] xor rot(C[x+1],1), for x in 0…4 +# A[x,y] = A[x,y] xor D[x], for (x,y) in (0…4,0…4) +# +# # ρ and π steps +# B[y,2*x+3*y] = rot(A[x,y], r[x,y]), for (x,y) in (0…4,0…4) +# +# # χ step +# A[x,y] = B[x,y] xor ((not B[x+1,y]) and B[x+2,y]), for (x,y) in (0…4,0…4) +# +# # ι step +# A[0,0] = A[0,0] xor RC +# +# return A +# } + +# No exceptions allowed in core cryptographic operations +{.push raises: [].} +{.push checks: off.} + +# Hardware acceleration considerations +# ------------------------------------------------ +# +# 1. The χ step uses "and not", the Keccak implementation guide suggest a "lane-complementing technique" +# to reduce the number of `not` from 5 to 1. +# However, the BM1 CPU features introduced `andn` in AMD Piledriver (2012) and Intel Haswell (2013) +# ARM has the BIC instruction (Bit Clear) for ANDNOT + +# Types & Constants +# ------------------------------------------------ + +type KeccakState* = object + ## A Keccak state matrix: 5*5*uint64 = 1600 bits, in column major order + ## ┌─┬─┬─┬─┬─┐ + ## ┌─┬─┬─┬─┬─┐┤ + ## ┌─┬─┬─┬─┬─┐┤┤ + ## ┌─┬─┬─┬─┬─┐┤┤┤ + ## ┌─┬─┬─┬─┬─┐┤┤┤┤ + ## ┌─┬─┬─┬─┬─┐┤┤┤┤┘ + ## ┌─┬─┬─┬─┬─┐┤┤┤┤┘ + ## ┌─┬─┬─┬─┬─┐┤┤┤┤┘ + ## ├─┼─┼─┼─┼─┤┤┤┤┘ + ## ├─┼─┼─┼─┼─┤┤┤┘ + ## ├─┼─┼─┼─┼─┤┤┘ ┌─┐ bit + ## ├─┼─┼─┼─┼─┤┘ └─┘ ┌─┐ + ## └─┴─┴─┴─┴─┘ ┌─┐┘ + ## state ┌─┐┘ + ## ┌─┐┘ + ## ┌─┐ ┌─┐┘ + ## ├─┤ column ┌─┐┘ + ## row ├─┤ ┌─┐┘ + ## ┌─┬─┬─┬─┬─┐ ├─┤ ┌─┐┘ lane + ## └─┴─┴─┴─┴─┘ ├─┤ └─┘ + ## └─┘ + ## + ## plane = row * lane + ## slice = row * column + ## sheet = column * lane + ## + ## Credit: https://github.com/tecosaur/KangarooTwelve.jl + state {.align: 64.}: array[5*5, uint64] + +func lin_idx(x, y: int): int {.inline.} = + 5*y+x + +func `[]`(A: KeccakState, x, y: int): uint64 {.inline.} = + A.state[lin_idx(x, y)] + +func `[]=`(A: var KeccakState, x, y: int, val: uint64) {.inline.} = + A.state[lin_idx(x, y)] = val + +func N(exponent: static int, x, y: int): int {.inline.} = + # We use algorithm 4 in https://keccak.team/files/Keccak-implementation-3.2.pdf + # We have a coordinate displacement matrix N = [1 0] + # [1 2] + # to store data without overwriting it + const exponent = exponent and 3 # exponent mod 4 as N has order 4 + when exponent == 0: + # N⁰ = [1 0] + # [0 1] + lin_idx(x, y) + elif exponent == 1: + # N¹ = [1 0] + # [1 2] + lin_idx(x, (x+2*y) mod 5) + elif exponent == 2: + # N² = [1 0] + # [3 4] + lin_idx(x, (3*x+4*y) mod 5) + elif exponent == 3: + # N³ = [1 0] + # [2 3] + lin_idx(x, (2*x+3*y) mod 5) + else: + {.error: "unreachable".} + +func N(A: KeccakState, i: static int, x, y: int): uint64 {.inline.} = + A.state[N(i, x, y)] + +func N(A: var KeccakState, i: static int, x, y: int): var uint64 {.inline.} = + A.state[N(i, x, y)] + +# Keccak round constants +# are iteratively computed via a linear feedback shift register +# rc[t] = (xᵗ mod x⁸ + x⁶ + x⁵ + x⁴ + 1) mod x in GF(2)[x] +const KRC: array[24, uint64] = [ + 0x0000000000000001'u64, + 0x0000000000008082'u64, + 0x800000000000808a'u64, + 0x8000000080008000'u64, + 0x000000000000808b'u64, + 0x0000000080000001'u64, + 0x8000000080008081'u64, + 0x8000000000008009'u64, + 0x000000000000008a'u64, + 0x0000000000000088'u64, + 0x0000000080008009'u64, + 0x000000008000000a'u64, + 0x000000008000808b'u64, + 0x800000000000008b'u64, + 0x8000000000008089'u64, + 0x8000000000008003'u64, + 0x8000000000008002'u64, + 0x8000000000000080'u64, + 0x000000000000800a'u64, + 0x800000008000000a'u64, + 0x8000000080008081'u64, + 0x8000000000008080'u64, + 0x0000000080000001'u64, + 0x8000000080008008'u64, +] + +func genRho(): array[5*5, int] = + result[lin_idx(0, 0)] = 0 + var (x, y) = (1, 0) + + for t in 0 ..< result.len-1: # skip 0 + # rotation constant r = i(i+1)/2, skipping (0, 0) hence (t+1)(t+2)/2 + result[lin_idx(x, y)] = + (((t+1) * (t+2)) shr 1) and (64-1) + + let Y = (2*x + 3*y) mod 5 + let X = y + x = X + y = Y + +func rotl(x: uint64, k: static int): uint64 {.inline.} = + return (x shl k) or (x shr (64 - k)) + +func permute_generic*(A: var KeccakState, NumRounds: static int) = + # We use algorithm 4 in https://keccak.team/files/Keccak-implementation-3.2.pdf + const Rho = genRho() + + var C {.noinit.}: array[5, uint64] + var D {.noinit.}: array[5, uint64] + template B: array[5, uint64] = C # Reuse C statefer for B + + # We unroll the loop by 4 to: + # - reuse memory locations as N is cyclic of order 4 + # - minimize code size vs unrolling by 24 + static: doAssert((NumRounds and 3) == 0, "The number of rounds must be a multiple of 4") + for j in countup(0, NumRounds-1, 4): + staticFor i, 0, 4: + # θ₁: Column-parity via sum reduction in GF(2) (i.e. addition is xor) + staticFor x, 0, 5: + C[x] = A.N(i, x, 0) xor + A.N(i, x, 1) xor + A.N(i, x, 2) xor + A.N(i, x, 3) xor + A.N(i, x, 4) + + # θ₂: Sum adjacent column parities + staticFor x, 0, 5: + D[x] = C[(x+4) mod 5] xor rotl(C[(x+1) mod 5], 1) + + # Keccak state matrix is column major + # so y should be the outer loop for cache-friendliness + staticFor y, 0, 5: + staticFor x, 0, 5: + # θ₃: Diffusion + # ρ: inter-slice diffusion + # π: long-term diffusion + B[(x + 2*y) mod 5] = rotl(A.N(i+1, x, y) xor D[x], Rho[N(1, x, y)]) + staticFor x, 0, 5: + # χ: non-linearity + A.N(i+1, x, y) = B[x] xor (not(B[(x+1) mod 5]) and B[(x+2) mod 5]) + + # ι step: break symmetries + A[0, 0] = A[0, 0] xor KRC[i+j] + +template `^=`(accum: var SomeInteger, b: SomeInteger) = + accum = accum xor b + +func xorInSingle(H: var KeccakState, val: byte, offset: int) {.inline.} = + ## Add a single byte in the Keccak state + + # Shift of 3 = log2(sizeof(byte) * 8) - Find the word to read/write + # WordMask of 7 = sizeof(byte) * 8 - 1 - In the word, shift to the offset to read/write + let slot = (offset and 7) shl 3 + let lane = uint64(val) shl slot # All bits but the one set in `val` are 0, and 0 is neutral element of xor + H.state[offset shr 3] ^= lane + +func xorInBlock_generic(H: var KeccakState, msg: array[64, byte]) {.inline.} = + ## Add new data into the Keccak state + # This can benefit from vectorized instructions + for i in 0 ..< 8: + H.state[i] ^= uint64.fromBytes(msg, i*8, littleEndian) + +func xorInPartial*(H: var KeccakState, msg: openArray[byte]) = + ## Add multiple bytes to the state + ## The length MUST be less than the state length. + debug: doAssert msg.len <= H.state + + # Implementation detail: + # We could avoid an intermediate variable but + # dealing with non-multiple of size(T) length + # would be verbose, and require less than size(T) + # endianness handling. + # Furthermore 2 copies without the "multiple-of" + # tracking overhead might be faster, especially + # if the compiler vectorize the second one + # or is able to fuse the 2 together. + # Lastly, this is only called when transitioning + # between absorbing and squeezing, for hashing + # this means once, however long a message to hash is. + var blck: array[64, byte] # zero-init + rawCopy(blck, 0, msg, 0, msg.len) + H.xorInBlock_generic(blck) + +func copyOutWords[W: static int]( + H: KeccakState, + dst: var array[W*8, byte]) {.inline.} = + ## Read data from the Keccak state + ## and write it into `dst` + debug: doAssert dst.len <= sizeof(H.state) + + for w in 0 ..< W: + let word = H.state[w] + for i in 0 ..< 8: + dst[w*8+i] = toByte(word shr (i*8)) + +func copyOutPartial*( + H: KeccakState, + hByteOffset: int, + dst: var openArray[byte]) {.inline.} = + ## Read data from the Keccak state + ## and write it into `dst` + ## starting from the state byte offset `hByteOffset` + ## hByteOffset + dst length MUST be less than the Keccak rate + debug: doAssert dst.len + hByteOffset <= sizeof(H.state.size) + + # Implementation details: + # we could avoid a temporary block + # see `xorInPartial` for rationale + var blck {.noInit.}: array[64, byte] + H.copyOutWords(blck) + rawCopy(dst, 0, blck, hByteOffset, dst.len) + +func pad*(H: var KeccakState, hByteOffset: int, delim: static byte, rate: static int) {.inline.} = + debug: doAssert hByteOffset < rate + H.xorInSingle(delim, hByteOffset) + H.xorInSingle(0x80, rate-1) + +func hashMessageBlocks_generic*( + H: var KeccakState, + message: ptr UncheckedArray[byte], + numBlocks: int) = + ## Hash a message block by block + ## Keccak block size is the rate: 64 + ## The state MUST be absorb ready + ## i.e. previous operation cannot be a squeeze + ## a permutation is needed in-between + + var message = message + const rate = 64 # TODO: make a generic Keccak state with auto-derived rate + const numRounds = 24 # TODO: auto derive number of rounds + for _ in 0 ..< numBlocks: + let msg = cast[ptr array[rate, byte]](message) + H.xorInBlock_generic(msg[]) + H.permute_generic(numRounds) + message +%= rate + +func squeezeDigestBlocks_generic*( + H: var KeccakState, + digest: ptr UncheckedArray[byte], + numBlocks: int) = + ## Squeeze a digest block by block + ## Keccak block digest is the rate: 64 + ## The state MUST be squeeze ready + ## i.e. previous operation cannot be an absorb + ## a permutation is needed in-between + var digest = digest + const rate = 64 # TODO: make a generic Keccak state with auto-derived rate + const numRounds = 24 # TODO: auto derive number of rounds + for _ in 0 ..< numBlocks: + let msg = cast[ptr array[rate, byte]](digest) + H.copyOutWords(msg[]) + H.permute_generic(numRounds) + digest +%= rate \ No newline at end of file diff --git a/constantine/hashes/sha256/sha256_generic.nim b/constantine/hashes/sha256/sha256_generic.nim index b89291077..fd52b6013 100644 --- a/constantine/hashes/sha256/sha256_generic.nim +++ b/constantine/hashes/sha256/sha256_generic.nim @@ -7,7 +7,8 @@ # at your option. This file may not be copied, modified, or distributed except according to those terms. import - constantine/platforms/primitives + constantine/platforms/primitives, + constantine/serialization/endians # SHA256, a hash function from the SHA2 family # -------------------------------------------------------------------------------- @@ -90,22 +91,6 @@ template s1(x: uint32): uint32 = # σ₁ rotr(x, 17) xor rotr(x, 19) xor (x shr 10) -# Message schedule -# ------------------------------------------------ - -template u32BE(blob: array[4, byte]): uint32 = - ## Interpret a data blob as a big-endian uint32 - when nimvm: - (blob[0].uint32 shl 24) or (blob[1].uint32 shl 16) or (blob[2].uint32 shl 8) or blob[3].uint32 - else: - when cpuEndian == littleEndian: - (blob[0].uint32 shl 24) or (blob[1].uint32 shl 16) or (blob[2].uint32 shl 8) or blob[3].uint32 - else: - cast[uint32](blob) - -template getU32at(msg: ptr UncheckedArray[byte], pos: SomeInteger): uint32 = - u32BE(cast[ptr array[4, byte]](msg[pos].addr)[]) - # State updates # ------------------------------------------------ @@ -147,7 +132,7 @@ func sha256_rounds_0_15( ms: var Sha256_MessageSchedule, message: ptr UncheckedArray[byte]) {.inline.} = staticFor t, 0, 16: - ms.w[t] = message.getU32at(t * sizeof(Word)) + ms.w[t] = uint32.fromBytes(message, t * sizeof(Word), bigEndian) sha256_round(s, ms.w[t], K256[t]) func sha256_rounds_16_63( diff --git a/constantine/serialization/endians.nim b/constantine/serialization/endians.nim index fd7f0f0c6..25ef9e41a 100644 --- a/constantine/serialization/endians.nim +++ b/constantine/serialization/endians.nim @@ -22,8 +22,9 @@ template toByte*(x: SomeUnsignedInt): byte = else: byte(x) -template blobFrom*(dst: var openArray[byte], src: SomeUnsignedInt, startIdx: int, endian: static Endianness) = +func blobFrom*(dst: var openArray[byte], src: SomeUnsignedInt, startIdx: int, endian: static Endianness) {.inline.} = ## Write an integer into a raw binary blob + ## The whole binary blob is interpreted as big-endian/little-endian ## Swapping endianness if needed ## startidx is the first written array item if littleEndian is requested ## or the last if bigEndian is requested @@ -34,42 +35,12 @@ template blobFrom*(dst: var openArray[byte], src: SomeUnsignedInt, startIdx: int for i in 0 ..< sizeof(src): dst[startIdx+sizeof(src)-1-i] = toByte(src shr (i * 8)) -func parseFromBlob*( - dst: var SomeUnsignedInt, - src: openArray[byte], - cursor: var uint, endian: static Endianness) {.inline.} = - ## Read an unsigned integer from a raw binary blob. - ## The `cursor` represents the current index in the array and is updated - ## by N bytes where N is the size of `dst` type in bytes. - ## The binary blob is interpreted as: - ## - an array of words traversed from 0 ..< len (little-endian), via an incremented `cursor` - ## - with each word being of `endian` ordering for deserialization purpose. - debug: - doAssert 0 <= cursor and cursor < src.len.uint - doAssert cursor + sizeof(dst).uint <= src.len.uint, - "cursor (" & $cursor & ") + sizeof(dst) (" & $sizeof(dst) & - ") <= src.len (" & $src.len & ")" - - type U = typeof(dst) - const L = sizeof(dst) - - var accum: U = 0 - when endian == littleEndian: - for i in 0'u ..< L: - accum = accum or (U(src[cursor+i]) shl (i * 8)) - else: - for i in 0'u ..< L: - accum = accum or (U(src[cursor+i]) shl ((L - 1 - i) * 8)) - dst = accum - cursor.inc(L) - func dumpRawInt*( dst: var openArray[byte], src: SomeUnsignedInt, cursor: uint, endian: static Endianness) {.inline.} = ## Dump an integer into raw binary form - ## The `cursor` represents the current index in the array and is updated - ## by N bytes where N is the size of `src` type in bytes. + ## The `cursor` represents the current index in the array ## The binary blob is interpreted as: ## - an array of words traversed from 0 ..< len (little-endian), via an incremented `cursor` ## - with each word being of `endian` ordering for deserialization purpose. @@ -99,15 +70,59 @@ func toBytes*(num: SomeUnsignedInt, endianness: static Endianness): array[sizeof for i in 0 ..< L: result[i] = toByte(num shr (i * 8)) -func fromBytes*(T: type SomeUnsignedInt, bytes: openArray[byte], endianness: static Endianness): T {.inline.} = +func fromBytes*(T: type SomeUnsignedInt, bytes: array[sizeof(T), byte], endianness: static Endianness): T {.inline.} = const L = sizeof(T) - debug: - doAssert bytes.len == L - # Note: result is zero-init when endianness == cpuEndian: for i in 0 ..< L: result = result or (T(bytes[i]) shl (i*8)) else: for i in 0 ..< L: - result = result or (T(bytes[i]) shl ((L-1-i) * 8)) \ No newline at end of file + result = result or (T(bytes[i]) shl ((L-1-i) * 8)) + +template fromBytesImpl( + r: var SomeUnsignedInt, + bytes: openArray[byte] or ptr UncheckedArray[byte], + offset: int, + endianness: static Endianness) = + # With a function array[N, byte] doesn't match "openArray[byte] or something" + # https://github.com/nim-lang/Nim/issues/7432 + type T = typeof(r) + const L = sizeof(r) + r.reset() + when endianness == cpuEndian: + for i in 0 ..< L: + r = r or (T(bytes[i+offset]) shl (i*8)) + else: + for i in 0 ..< L: + r = r or (T(bytes[i+offset]) shl ((L-1-i) * 8)) + +func fromBytes*( + T: type SomeUnsignedInt, + bytes: openArray[byte], + offset: int, + endianness: static Endianness): T {.inline.} = + ## Read an unsigned integer from a raw binary blob. + ## The `offset` represents the current index in the array + ## The binary blob is interpreted as: + ## - an array of words traversed from 0 ..< len (little-endian) + ## - with each word being of `endian` ordering for deserialization purpose. + debug: + doAssert 0 <= offset and offset < bytes.len + doAssert offset + sizeof(T) <= bytes.len, + "offset (" & $offset & ") + sizeof(T) (" & $sizeof(T) & + ") <= bytes.len (" & $bytes.len & ")" + + result.fromBytesImpl(bytes, offset, endianness) + +func fromBytes*( + T: type SomeUnsignedInt, + bytes: ptr UncheckedArray[byte], + offset: int, + endianness: static Endianness): T {.inline.} = + ## Read an unsigned integer from a raw binary blob. + ## The `offset` represents the current index in the array + ## The binary blob is interpreted as: + ## - an array of words traversed from 0 ..< len (little-endian) + ## - with each word being of `endian` ordering for deserialization purpose. + result.fromBytesImpl(bytes, offset, endianness)