mratsim · mratsim · Dec 25, 2024 · Dec 25, 2024 · Dec 25, 2024 · Dec 25, 2024
diff --git a/benchmarks/bench_h_keccak.nim b/benchmarks/bench_h_keccak.nim
@@ -84,5 +84,6 @@ when isMainModule:
       let iters = int(target_cycles div (s.int64 * worst_cycles_per_bytes))
       benchKeccak256_constantine(msg, $s & "B", iters)
       benchSHA3_256_openssl(msg, $s & "B", iters)
+      echo "----"
 
   main()
diff --git a/constantine/ciphers/chacha20.nim b/constantine/ciphers/chacha20.nim
@@ -80,12 +80,12 @@ func chacha20_block(
     state.inner_block()
 
   # uint32 are 4 bytes so multiply destination by 4
-  for i in 0'u ..< 4:
+  for i in 0 ..< 4:
     key_stream.dumpRawInt(state[i] + cccc[i], i shl 2, littleEndian)
-  for i in 4'u ..< 12:
+  for i in 4 ..< 12:
     key_stream.dumpRawInt(state[i] + key[i-4], i shl 2, littleEndian)
   key_stream.dumpRawInt(state[12] + block_counter, 12 shl 2, littleEndian)
-  for i in 13'u ..< 16:
+  for i in 13 ..< 16:
     key_stream.dumpRawInt(state[i] + nonce[i-13], i shl 2, littleEndian)
 
 func chacha20_cipher*(

diff --git a/constantine/hashes/h_keccak.nim b/constantine/hashes/h_keccak.nim
@@ -10,6 +10,9 @@ import
   constantine/platforms/[abstractions, views],
   ./keccak/keccak_generic
 
+when UseASM_X86_32:
+  import ./keccak/keccak_x86_bmi1
+
 # Keccak, the hash function underlying SHA3
 # --------------------------------------------------------------------------------
 #
@@ -68,7 +71,7 @@ import
 # - permute required when transitioning between absorb->squeeze
 # - no permute required when transitioning between squeeze->absorb
 # This may change depending on protocol requirement.
-# This is inline with the SAFE (Sponge API for FIeld Element) approach
+# This is in-line with the SAFE (Sponge API for FIeld Element) approach
 
 # Types and constants
 # ----------------------------------------------------------------
@@ -92,7 +95,6 @@ type
     #   The real offset can be recovered with a substraction
     #   to properly update the state.
     H {.align: 64.}: KeccakState
-    buf {.align: 64.}: array[200 - 2*(bits div 8), byte]
     absorb_offset: int32
     squeeze_offset: int32
 
@@ -109,14 +111,6 @@ template rate(ctx: KeccakContext): int =
 {.push raises: [].}
 {.push checks: off.}
 
-func absorbBuffer(ctx: var KeccakContext) {.inline.} =
-  ctx.H.hashMessageBlocks_generic(ctx.buf.asUnchecked(), numBlocks = 1)
-  ctx.buf.setZero()
-  # Note: in certain case like authenticated encryption
-  # we might want to absorb at the same position that have been squeezed
-  # hence we don't reset the absorb_offset to 0
-  # The buf is zeroed which is the neutral element for xor.
-
 # Public API
 # ----------------------------------------------------------------
 
@@ -134,6 +128,111 @@ func init*(ctx: var KeccakContext) {.inline.} =
   ## Initialize or reinitialize a Keccak context
   ctx.reset()
 
+template genAbsorb(isaFeatures: untyped) =
+  func `absorb _ isaFeatures`*(ctx: var KeccakContext, message: openArray[byte]) =
+    ## Absorb a message in the Keccak sponge state
+    ##
+    ## Security note: the tail of your message might be stored
+    ## in an internal buffer.
+    ## if sensitive content is used, ensure that
+    ## `ctx.finish(...)` and `ctx.clear()` are called as soon as possible.
+    ## Additionally ensure that the message(s) passed were stored
+    ## in memory considered secure for your threat model.
+
+    var pos = int ctx.absorb_offset # offset in Keccak state
+    var cur = 0                     # offset in message
+    var bytesLeft = message.len
+
+    # We follow the "absorb-permute-squeeze" approach
+    # originally defined by the Keccak team.
+    # It is compatible with SHA-3 hash spec.
+    # See https://eprint.iacr.org/2022/1340.pdf
+    #
+    # There are no transition/permutation between squeezing -> absorbing
+    # And within this `absorb` function
+    #    the state pos == ctx.rate()
+    # is always followed by a permute and setting `pos = 0`
+
+    if (pos mod ctx.rate()) != 0 and pos+bytesLeft >= ctx.rate():
+      # Previous partial update, fill the state and do one permutation
+      let free = ctx.rate() - pos
+      ctx.H.`xorInPartial _ isaFeatures`(pos, message.toOpenArray(0, free-1))
+      ctx.H.`permute _ isaFeatures`(NumRounds = 24)
+      pos = 0
+      cur = free
+      bytesLeft -= free
+
+    if bytesLeft >= ctx.rate():
+      # Process multiple blocks
+      let numBlocks = bytesLeft div ctx.rate()
+      ctx.H.`hashMessageBlocks _ isaFeatures`(message.asUnchecked() +% cur, numBlocks)
+      cur += numBlocks * ctx.rate()
+      bytesLeft -= numBlocks * ctx.rate()
+
+    if bytesLeft != 0:
+      # Store the tail in buffer
+      ctx.H.`xorInPartial _ isaFeatures`(pos, message.toOpenArray(cur, cur+bytesLeft-1))
+
+    # Epilogue
+    ctx.absorb_offset = int32(pos+bytesLeft)
+    # Signal that the next squeeze transition needs a permute
+    ctx.squeeze_offset = int32 ctx.rate()
+
+genAbsorb(generic)
+when UseASM_X86_32:
+  genAbsorb(x86_bmi1)
+
+template genSqueeze(isaFeatures: untyped) =
+  func `squeeze _ isaFeatures`*(ctx: var KeccakContext, digest: var openArray[byte]) =
+    var pos = ctx.squeeze_offset # offset in Keccak state
+    var cur = 0                  # offset in message
+    var bytesLeft = digest.len
+
+    if pos == ctx.rate():
+      # Transition from absorbing to squeezing
+      #   This state can only come from `absorb` function
+      #   as within `squeeze`, pos == ctx.rate() is always followed
+      #   by a permute and pos = 0
+      ctx.H.pad(ctx.absorb_offset, ctx.delimiter, ctx.rate())
+      ctx.H.`permute _ isaFeatures`(NumRounds = 24)
+      pos = 0
+      ctx.absorb_offset = 0
+
+    if (pos mod ctx.rate()) != 0 and pos+bytesLeft >= ctx.rate():
+      # Previous partial squeeze, fill up to rate and do one permutation
+      let free = ctx.rate() - pos
+      ctx.H.`copyOutPartial _ isaFeatures`(hByteOffset = pos, digest.toOpenArray(0, free-1))
+      ctx.H.`permute _ isaFeatures`(NumRounds = 24)
+      pos = 0
+      ctx.absorb_offset = 0
+      cur = free
+      bytesLeft -= free
+
+    if bytesLeft >= ctx.rate():
+      # Process multiple blocks
+      let numBlocks = bytesLeft div ctx.rate()
+      ctx.H.`squeezeDigestBlocks _ isaFeatures`(digest.asUnchecked() +% cur, numBlocks)
+      ctx.absorb_offset = 0
+      cur += numBlocks * ctx.rate()
+      bytesLeft -= numBlocks * ctx.rate()
+
+    if bytesLeft != 0:
+      # Output the tail
+      ctx.H.`copyOutPartial _ isaFeatures`(hByteOffset = pos, digest.toOpenArray(cur, bytesLeft-1))
+
+    # Epilogue
+    ctx.squeeze_offset = int32 bytesLeft
+    # We don't signal absorb_offset to permute the state if called next
+    # as per
+    #   - original keccak spec that uses "absorb-permute-squeeze" protocol
+    #   - https://eprint.iacr.org/2022/1340.pdf
+    #   - https://eprint.iacr.org/2023/522.pdf
+    #     https://hackmd.io/@7dpNYqjKQGeYC7wMlPxHtQ/ByIbpfX9c#2-SAFE-definition
+
+genSqueeze(generic)
+when UseASM_X86_32:
+  genSqueeze(x86_bmi1)
+
 func absorb*(ctx: var KeccakContext, message: openArray[byte]) =
   ## Absorb a message in the Keccak sponge state
   ##
@@ -143,90 +242,22 @@ func absorb*(ctx: var KeccakContext, message: openArray[byte]) =
   ## `ctx.finish(...)` and `ctx.clear()` are called as soon as possible.
   ## Additionally ensure that the message(s) passed were stored
   ## in memory considered secure for your threat model.
-
-  var pos = int ctx.absorb_offset
-  var cur = 0
-  var bytesLeft = message.len
-
-  # We follow the "absorb-permute-squeeze" approach
-  # originally defined by the Keccak team.
-  # It is compatible with SHA-3 hash spec.
-  # See https://eprint.iacr.org/2022/1340.pdf
-  #
-  # There are no transition/permutation between squeezing -> absorbing
-  # And within this `absorb` function
-  #    the state pos == ctx.rate()
-  # is always followed by a permute and setting `pos = 0`
-
-  if (pos mod ctx.rate()) != 0 and pos+bytesLeft >= ctx.rate():
-    # Previous partial update, fill the state and do one permutation
-    let free = ctx.rate() - pos
-    ctx.buf.rawCopy(dStart = pos, message, sStart = 0, len = free)
-    ctx.absorbBuffer()
-    pos = 0
-    cur = free
-    bytesLeft -= free
-
-  if bytesLeft >= ctx.rate():
-    # Process multiple blocks
-    let numBlocks = bytesLeft div ctx.rate()
-    ctx.H.hashMessageBlocks_generic(message.asUnchecked() +% cur, numBlocks)
-    cur += numBlocks * ctx.rate()
-    bytesLeft -= numBlocks * ctx.rate()
-
-  if bytesLeft != 0:
-    # Store the tail in buffer
-    ctx.buf.rawCopy(dStart = pos, message, sStart = cur, len = bytesLeft)
-
-  # Epilogue
-  ctx.absorb_offset = int32(pos+bytesLeft)
-  # Signal that the next squeeze transition needs a permute
-  ctx.squeeze_offset = int32 ctx.rate()
-
-func squeeze*(ctx: var KeccakContext, digest: var openArray[byte]) =
-
-  var pos = ctx.squeeze_offset
-  var cur = 0
-  var bytesLeft = digest.len
-
-  if pos == ctx.rate():
-    # Transition from absorbing to squeezing
-    #   This state can only come from `absorb` function
-    #   as within `squeeze`, pos == ctx.rate() is always followed
-    #   by a permute and pos = 0
-    ctx.H.xorInPartial(ctx.buf.toOpenArray(0, ctx.absorb_offset-1))
-    ctx.H.pad(ctx.absorb_offset, ctx.delimiter, ctx.rate())
-    ctx.H.permute_generic(NumRounds = 24)
-    pos = 0
-    ctx.absorb_offset = 0
-
-  if (pos mod ctx.rate()) != 0 and pos+bytesLeft >= ctx.rate():
-    # Previous partial squeeze, fill up to rate and do one permutation
-    let free = ctx.rate() - pos
-    ctx.H.copyOutPartial(hByteOffset = pos, digest.toOpenArray(0, free-1))
-    ctx.H.permute_generic(NumRounds = 24)
-    pos = 0
-    ctx.absorb_offset = 0
-    cur = free
-    bytesLeft -= free
-
-  if bytesLeft >= ctx.rate():
-    # Process multiple blocks
-    let numBlocks = bytesLeft div ctx.rate()
-    ctx.H.squeezeDigestBlocks_generic(digest.asUnchecked() +% cur, numBlocks)
-    ctx.absorb_offset = 0
-    cur += numBlocks * ctx.rate()
-    bytesLeft -= numBlocks * ctx.rate()
-
-  if bytesLeft != 0:
-    # Output the tail
-    ctx.H.copyOutPartial(hByteOffset = pos, digest.toOpenArray(cur, bytesLeft-1))
-
-  # Epilogue
-  ctx.squeeze_offset = int32 bytesLeft
-  # We don't signal absorb_offset to permute the state if called next
-  # as per https://eprint.iacr.org/2023/522.pdf
-  #   https://hackmd.io/@7dpNYqjKQGeYC7wMlPxHtQ/ByIbpfX9c#2-SAFE-definition
+  when UseASM_X86_32:
+    if ({.noSideEffect.}: hasBmi1()):
+      ctx.absorb_x86_bmi1(message)
+    else:
+      ctx.absorb_generic(message)
+  else:
+    ctx.absorb_generic(message)
+
+func squeeze*(ctx: var KeccakContext, message: var openArray[byte]) =
+  when UseASM_X86_32:
+    if ({.noSideEffect.}: hasBmi1()):
+      ctx.squeeze_x86_bmi1(message)
+    else:
+      ctx.squeeze_generic(message)
+  else:
+    ctx.squeeze_generic(message)
 
 func update*(ctx: var KeccakContext, message: openArray[byte]) =
   ## Append a message to a Keccak context

diff --git a/constantine/hashes/h_sha256.nim b/constantine/hashes/h_sha256.nim
@@ -65,10 +65,10 @@ func dumpHash(
        digest: var array[DigestSize, byte],
        s: Sha256_state) {.inline.} =
   ## Convert the internal hash into a message digest
-  var dstIdx = 0'u
+  var dstIdx = 0
   for i in 0 ..< s.H.len:
     digest.dumpRawInt(s.H[i], dstIdx, bigEndian)
-    dstIdx += uint sizeof(uint32)
+    dstIdx += sizeof(uint32)
 
 func hashBuffer(ctx: var Sha256Context) {.inline.} =
   ctx.s.hashMessageBlocks(ctx.buf.asUnchecked(), numBlocks = 1)