Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Keccak optimizations #498

Merged
merged 6 commits into from
Dec 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarks/bench_h_keccak.nim
Original file line number Diff line number Diff line change
Expand Up @@ -84,5 +84,6 @@ when isMainModule:
let iters = int(target_cycles div (s.int64 * worst_cycles_per_bytes))
benchKeccak256_constantine(msg, $s & "B", iters)
benchSHA3_256_openssl(msg, $s & "B", iters)
echo "----"

main()
6 changes: 3 additions & 3 deletions constantine/ciphers/chacha20.nim
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,12 @@ func chacha20_block(
state.inner_block()

# uint32 are 4 bytes so multiply destination by 4
for i in 0'u ..< 4:
for i in 0 ..< 4:
key_stream.dumpRawInt(state[i] + cccc[i], i shl 2, littleEndian)
for i in 4'u ..< 12:
for i in 4 ..< 12:
key_stream.dumpRawInt(state[i] + key[i-4], i shl 2, littleEndian)
key_stream.dumpRawInt(state[12] + block_counter, 12 shl 2, littleEndian)
for i in 13'u ..< 16:
for i in 13 ..< 16:
key_stream.dumpRawInt(state[i] + nonce[i-13], i shl 2, littleEndian)

func chacha20_cipher*(
Expand Down
219 changes: 125 additions & 94 deletions constantine/hashes/h_keccak.nim
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ import
constantine/platforms/[abstractions, views],
./keccak/keccak_generic

when UseASM_X86_32:
import ./keccak/keccak_x86_bmi1

# Keccak, the hash function underlying SHA3
# --------------------------------------------------------------------------------
#
Expand Down Expand Up @@ -68,7 +71,7 @@ import
# - permute required when transitioning between absorb->squeeze
# - no permute required when transitioning between squeeze->absorb
# This may change depending on protocol requirement.
# This is inline with the SAFE (Sponge API for FIeld Element) approach
# This is in-line with the SAFE (Sponge API for FIeld Element) approach

# Types and constants
# ----------------------------------------------------------------
Expand All @@ -92,7 +95,6 @@ type
# The real offset can be recovered with a substraction
# to properly update the state.
H {.align: 64.}: KeccakState
buf {.align: 64.}: array[200 - 2*(bits div 8), byte]
absorb_offset: int32
squeeze_offset: int32

Expand All @@ -109,14 +111,6 @@ template rate(ctx: KeccakContext): int =
{.push raises: [].}
{.push checks: off.}

func absorbBuffer(ctx: var KeccakContext) {.inline.} =
ctx.H.hashMessageBlocks_generic(ctx.buf.asUnchecked(), numBlocks = 1)
ctx.buf.setZero()
# Note: in certain case like authenticated encryption
# we might want to absorb at the same position that have been squeezed
# hence we don't reset the absorb_offset to 0
# The buf is zeroed which is the neutral element for xor.

# Public API
# ----------------------------------------------------------------

Expand All @@ -134,6 +128,111 @@ func init*(ctx: var KeccakContext) {.inline.} =
## Initialize or reinitialize a Keccak context
ctx.reset()

template genAbsorb(isaFeatures: untyped) =
func `absorb _ isaFeatures`*(ctx: var KeccakContext, message: openArray[byte]) =
## Absorb a message in the Keccak sponge state
##
## Security note: the tail of your message might be stored
## in an internal buffer.
## if sensitive content is used, ensure that
## `ctx.finish(...)` and `ctx.clear()` are called as soon as possible.
## Additionally ensure that the message(s) passed were stored
## in memory considered secure for your threat model.

var pos = int ctx.absorb_offset # offset in Keccak state
var cur = 0 # offset in message
var bytesLeft = message.len

# We follow the "absorb-permute-squeeze" approach
# originally defined by the Keccak team.
# It is compatible with SHA-3 hash spec.
# See https://eprint.iacr.org/2022/1340.pdf
#
# There are no transition/permutation between squeezing -> absorbing
# And within this `absorb` function
# the state pos == ctx.rate()
# is always followed by a permute and setting `pos = 0`

if (pos mod ctx.rate()) != 0 and pos+bytesLeft >= ctx.rate():
# Previous partial update, fill the state and do one permutation
let free = ctx.rate() - pos
ctx.H.`xorInPartial _ isaFeatures`(pos, message.toOpenArray(0, free-1))
ctx.H.`permute _ isaFeatures`(NumRounds = 24)
pos = 0
cur = free
bytesLeft -= free

if bytesLeft >= ctx.rate():
# Process multiple blocks
let numBlocks = bytesLeft div ctx.rate()
ctx.H.`hashMessageBlocks _ isaFeatures`(message.asUnchecked() +% cur, numBlocks)
cur += numBlocks * ctx.rate()
bytesLeft -= numBlocks * ctx.rate()

if bytesLeft != 0:
# Store the tail in buffer
ctx.H.`xorInPartial _ isaFeatures`(pos, message.toOpenArray(cur, cur+bytesLeft-1))

# Epilogue
ctx.absorb_offset = int32(pos+bytesLeft)
# Signal that the next squeeze transition needs a permute
ctx.squeeze_offset = int32 ctx.rate()

genAbsorb(generic)
when UseASM_X86_32:
genAbsorb(x86_bmi1)

template genSqueeze(isaFeatures: untyped) =
func `squeeze _ isaFeatures`*(ctx: var KeccakContext, digest: var openArray[byte]) =
var pos = ctx.squeeze_offset # offset in Keccak state
var cur = 0 # offset in message
var bytesLeft = digest.len

if pos == ctx.rate():
# Transition from absorbing to squeezing
# This state can only come from `absorb` function
# as within `squeeze`, pos == ctx.rate() is always followed
# by a permute and pos = 0
ctx.H.pad(ctx.absorb_offset, ctx.delimiter, ctx.rate())
ctx.H.`permute _ isaFeatures`(NumRounds = 24)
pos = 0
ctx.absorb_offset = 0

if (pos mod ctx.rate()) != 0 and pos+bytesLeft >= ctx.rate():
# Previous partial squeeze, fill up to rate and do one permutation
let free = ctx.rate() - pos
ctx.H.`copyOutPartial _ isaFeatures`(hByteOffset = pos, digest.toOpenArray(0, free-1))
ctx.H.`permute _ isaFeatures`(NumRounds = 24)
pos = 0
ctx.absorb_offset = 0
cur = free
bytesLeft -= free

if bytesLeft >= ctx.rate():
# Process multiple blocks
let numBlocks = bytesLeft div ctx.rate()
ctx.H.`squeezeDigestBlocks _ isaFeatures`(digest.asUnchecked() +% cur, numBlocks)
ctx.absorb_offset = 0
cur += numBlocks * ctx.rate()
bytesLeft -= numBlocks * ctx.rate()

if bytesLeft != 0:
# Output the tail
ctx.H.`copyOutPartial _ isaFeatures`(hByteOffset = pos, digest.toOpenArray(cur, bytesLeft-1))

# Epilogue
ctx.squeeze_offset = int32 bytesLeft
# We don't signal absorb_offset to permute the state if called next
# as per
# - original keccak spec that uses "absorb-permute-squeeze" protocol
# - https://eprint.iacr.org/2022/1340.pdf
# - https://eprint.iacr.org/2023/522.pdf
# https://hackmd.io/@7dpNYqjKQGeYC7wMlPxHtQ/ByIbpfX9c#2-SAFE-definition

genSqueeze(generic)
when UseASM_X86_32:
genSqueeze(x86_bmi1)

func absorb*(ctx: var KeccakContext, message: openArray[byte]) =
## Absorb a message in the Keccak sponge state
##
Expand All @@ -143,90 +242,22 @@ func absorb*(ctx: var KeccakContext, message: openArray[byte]) =
## `ctx.finish(...)` and `ctx.clear()` are called as soon as possible.
## Additionally ensure that the message(s) passed were stored
## in memory considered secure for your threat model.

var pos = int ctx.absorb_offset
var cur = 0
var bytesLeft = message.len

# We follow the "absorb-permute-squeeze" approach
# originally defined by the Keccak team.
# It is compatible with SHA-3 hash spec.
# See https://eprint.iacr.org/2022/1340.pdf
#
# There are no transition/permutation between squeezing -> absorbing
# And within this `absorb` function
# the state pos == ctx.rate()
# is always followed by a permute and setting `pos = 0`

if (pos mod ctx.rate()) != 0 and pos+bytesLeft >= ctx.rate():
# Previous partial update, fill the state and do one permutation
let free = ctx.rate() - pos
ctx.buf.rawCopy(dStart = pos, message, sStart = 0, len = free)
ctx.absorbBuffer()
pos = 0
cur = free
bytesLeft -= free

if bytesLeft >= ctx.rate():
# Process multiple blocks
let numBlocks = bytesLeft div ctx.rate()
ctx.H.hashMessageBlocks_generic(message.asUnchecked() +% cur, numBlocks)
cur += numBlocks * ctx.rate()
bytesLeft -= numBlocks * ctx.rate()

if bytesLeft != 0:
# Store the tail in buffer
ctx.buf.rawCopy(dStart = pos, message, sStart = cur, len = bytesLeft)

# Epilogue
ctx.absorb_offset = int32(pos+bytesLeft)
# Signal that the next squeeze transition needs a permute
ctx.squeeze_offset = int32 ctx.rate()

func squeeze*(ctx: var KeccakContext, digest: var openArray[byte]) =

var pos = ctx.squeeze_offset
var cur = 0
var bytesLeft = digest.len

if pos == ctx.rate():
# Transition from absorbing to squeezing
# This state can only come from `absorb` function
# as within `squeeze`, pos == ctx.rate() is always followed
# by a permute and pos = 0
ctx.H.xorInPartial(ctx.buf.toOpenArray(0, ctx.absorb_offset-1))
ctx.H.pad(ctx.absorb_offset, ctx.delimiter, ctx.rate())
ctx.H.permute_generic(NumRounds = 24)
pos = 0
ctx.absorb_offset = 0

if (pos mod ctx.rate()) != 0 and pos+bytesLeft >= ctx.rate():
# Previous partial squeeze, fill up to rate and do one permutation
let free = ctx.rate() - pos
ctx.H.copyOutPartial(hByteOffset = pos, digest.toOpenArray(0, free-1))
ctx.H.permute_generic(NumRounds = 24)
pos = 0
ctx.absorb_offset = 0
cur = free
bytesLeft -= free

if bytesLeft >= ctx.rate():
# Process multiple blocks
let numBlocks = bytesLeft div ctx.rate()
ctx.H.squeezeDigestBlocks_generic(digest.asUnchecked() +% cur, numBlocks)
ctx.absorb_offset = 0
cur += numBlocks * ctx.rate()
bytesLeft -= numBlocks * ctx.rate()

if bytesLeft != 0:
# Output the tail
ctx.H.copyOutPartial(hByteOffset = pos, digest.toOpenArray(cur, bytesLeft-1))

# Epilogue
ctx.squeeze_offset = int32 bytesLeft
# We don't signal absorb_offset to permute the state if called next
# as per https://eprint.iacr.org/2023/522.pdf
# https://hackmd.io/@7dpNYqjKQGeYC7wMlPxHtQ/ByIbpfX9c#2-SAFE-definition
when UseASM_X86_32:
if ({.noSideEffect.}: hasBmi1()):
ctx.absorb_x86_bmi1(message)
else:
ctx.absorb_generic(message)
else:
ctx.absorb_generic(message)

func squeeze*(ctx: var KeccakContext, message: var openArray[byte]) =
when UseASM_X86_32:
if ({.noSideEffect.}: hasBmi1()):
ctx.squeeze_x86_bmi1(message)
else:
ctx.squeeze_generic(message)
else:
ctx.squeeze_generic(message)

func update*(ctx: var KeccakContext, message: openArray[byte]) =
## Append a message to a Keccak context
Expand Down
4 changes: 2 additions & 2 deletions constantine/hashes/h_sha256.nim
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ func dumpHash(
digest: var array[DigestSize, byte],
s: Sha256_state) {.inline.} =
## Convert the internal hash into a message digest
var dstIdx = 0'u
var dstIdx = 0
for i in 0 ..< s.H.len:
digest.dumpRawInt(s.H[i], dstIdx, bigEndian)
dstIdx += uint sizeof(uint32)
dstIdx += sizeof(uint32)

func hashBuffer(ctx: var Sha256Context) {.inline.} =
ctx.s.hashMessageBlocks(ctx.buf.asUnchecked(), numBlocks = 1)
Expand Down
Loading
Loading