Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove dead tail code from (non-SHA3) AES-GCM AArch64 kernel #1639

Merged
merged 4 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 0 additions & 35 deletions crypto/fipsmodule/modes/asm/aesv8-gcm-armv8.pl
hanno-becker marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -849,23 +849,8 @@
eor $acc_mb, $acc_mb, $rk4v.16b // GHASH final-1 block - mid
eor $acc_lb, $acc_lb, $rk3 // GHASH final-1 block - low
.Lenc_blocks_less_than_1: // blocks left <= 1
hanno-becker marked this conversation as resolved.
Show resolved Hide resolved
and $bit_length, $bit_length, #127 // bit_length %= 128
mvn $rkN_l, xzr // rkN_l = 0xffffffffffffffff
sub $bit_length, $bit_length, #128 // bit_length -= 128
neg $bit_length, $bit_length // bit_length = 128 - #bits in input (in range [1,128])
ld1 { $rk0}, [$output_ptr] // load existing bytes where the possibly partial last block is to be stored
mvn $rkN_h, xzr // rkN_h = 0xffffffffffffffff
and $bit_length, $bit_length, #127 // bit_length %= 128
lsr $rkN_h, $rkN_h, $bit_length // rkN_h is mask for top 64b of last block
cmp $bit_length, #64
csel $input_l0, $rkN_l, $rkN_h, lt
csel $input_h0, $rkN_h, xzr, lt
fmov $ctr0d, $input_l0 // ctr0b is mask for last block
fmov $ctr0.d[1], $input_h0
and $res1b, $res1b, $ctr0b // possibly partial last block has zeroes in highest bits
rev64 $res0b, $res1b // GHASH final block
eor $res0b, $res0b, $t0.16b // feed in partial tag
bif $res1b, $rk0, $ctr0b // insert existing bytes in top end of result before storing
pmull2 $rk2q1, $res0.2d, $h1.2d // GHASH final block - high
mov $t0d, $res0.d[1] // GHASH final block - mid
rev $ctr32w, $rctr32w
Expand Down Expand Up @@ -1477,27 +1462,7 @@
eor $acc_mb, $acc_mb, $rk4v.16b // GHASH final-1 block - mid
eor $output_h0, $output_h0, $rkN_h // AES final block - round N high
.Ldec_blocks_less_than_1: // blocks left <= 1
and $bit_length, $bit_length, #127 // bit_length %= 128
mvn $rkN_h, xzr // rkN_h = 0xffffffffffffffff
sub $bit_length, $bit_length, #128 // bit_length -= 128
mvn $rkN_l, xzr // rkN_l = 0xffffffffffffffff
ldp $end_input_ptr, $main_end_input_ptr, [$output_ptr] // load existing bytes we need to not overwrite
neg $bit_length, $bit_length // bit_length = 128 - #bits in input (in range [1,128])
and $bit_length, $bit_length, #127 // bit_length %= 128
lsr $rkN_h, $rkN_h, $bit_length // rkN_h is mask for top 64b of last block
cmp $bit_length, #64
csel $ctr32x, $rkN_l, $rkN_h, lt
csel $ctr96_b64x, $rkN_h, xzr, lt
fmov $ctr0d, $ctr32x // ctr0b is mask for last block
and $output_l0, $output_l0, $ctr32x
mov $ctr0.d[1], $ctr96_b64x
bic $end_input_ptr, $end_input_ptr, $ctr32x // mask out low existing bytes
rev $ctr32w, $rctr32w
bic $main_end_input_ptr, $main_end_input_ptr, $ctr96_b64x // mask out high existing bytes
orr $output_l0, $output_l0, $end_input_ptr
and $output_h0, $output_h0, $ctr96_b64x
orr $output_h0, $output_h0, $main_end_input_ptr
and $res1b, $res1b, $ctr0b // possibly partial last block has zeroes in highest bits
rev64 $res0b, $res1b // GHASH final block
eor $res0b, $res0b, $t0.16b // feed in partial tag
pmull $rk3q1, $res0.1d, $h1.1d // GHASH final block - low
Expand Down
35 changes: 0 additions & 35 deletions generated-src/ios-aarch64/crypto/fipsmodule/aesv8-gcm-armv8.S
Original file line number Diff line number Diff line change
Expand Up @@ -729,23 +729,8 @@ Lenc_blocks_more_than_1: // blocks left > 1
eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid
eor v11.16b, v11.16b, v21.16b // GHASH final-1 block - low
Lenc_blocks_less_than_1: // blocks left <= 1
and x1, x1, #127 // bit_length %= 128
mvn x13, xzr // rkN_l = 0xffffffffffffffff
sub x1, x1, #128 // bit_length -= 128
neg x1, x1 // bit_length = 128 - #bits in input (in range [1,128])
ld1 { v18.16b}, [x2] // load existing bytes where the possibly partial last block is to be stored
mvn x14, xzr // rkN_h = 0xffffffffffffffff
and x1, x1, #127 // bit_length %= 128
lsr x14, x14, x1 // rkN_h is mask for top 64b of last block
cmp x1, #64
csel x6, x13, x14, lt
csel x7, x14, xzr, lt
fmov d0, x6 // ctr0b is mask for last block
fmov v0.d[1], x7
and v5.16b, v5.16b, v0.16b // possibly partial last block has zeroes in highest bits
rev64 v4.16b, v5.16b // GHASH final block
eor v4.16b, v4.16b, v8.16b // feed in partial tag
bif v5.16b, v18.16b, v0.16b // insert existing bytes in top end of result before storing
pmull2 v20.1q, v4.2d, v12.2d // GHASH final block - high
mov d8, v4.d[1] // GHASH final block - mid
rev w9, w12
Expand Down Expand Up @@ -1498,27 +1483,7 @@ Ldec_blocks_more_than_1: // blocks left > 1
eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid
eor x7, x7, x14 // AES final block - round N high
Ldec_blocks_less_than_1: // blocks left <= 1
and x1, x1, #127 // bit_length %= 128
mvn x14, xzr // rkN_h = 0xffffffffffffffff
sub x1, x1, #128 // bit_length -= 128
mvn x13, xzr // rkN_l = 0xffffffffffffffff
ldp x4, x5, [x2] // load existing bytes we need to not overwrite
neg x1, x1 // bit_length = 128 - #bits in input (in range [1,128])
and x1, x1, #127 // bit_length %= 128
lsr x14, x14, x1 // rkN_h is mask for top 64b of last block
cmp x1, #64
csel x9, x13, x14, lt
csel x10, x14, xzr, lt
fmov d0, x9 // ctr0b is mask for last block
and x6, x6, x9
mov v0.d[1], x10
bic x4, x4, x9 // mask out low existing bytes
rev w9, w12
bic x5, x5, x10 // mask out high existing bytes
orr x6, x6, x4
and x7, x7, x10
orr x7, x7, x5
and v5.16b, v5.16b, v0.16b // possibly partial last block has zeroes in highest bits
rev64 v4.16b, v5.16b // GHASH final block
eor v4.16b, v4.16b, v8.16b // feed in partial tag
pmull v21.1q, v4.1d, v12.1d // GHASH final block - low
Expand Down
35 changes: 0 additions & 35 deletions generated-src/linux-aarch64/crypto/fipsmodule/aesv8-gcm-armv8.S
Original file line number Diff line number Diff line change
Expand Up @@ -729,23 +729,8 @@ aes_gcm_enc_kernel:
eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid
eor v11.16b, v11.16b, v21.16b // GHASH final-1 block - low
.Lenc_blocks_less_than_1: // blocks left <= 1
and x1, x1, #127 // bit_length %= 128
mvn x13, xzr // rkN_l = 0xffffffffffffffff
sub x1, x1, #128 // bit_length -= 128
neg x1, x1 // bit_length = 128 - #bits in input (in range [1,128])
ld1 { v18.16b}, [x2] // load existing bytes where the possibly partial last block is to be stored
mvn x14, xzr // rkN_h = 0xffffffffffffffff
and x1, x1, #127 // bit_length %= 128
lsr x14, x14, x1 // rkN_h is mask for top 64b of last block
cmp x1, #64
csel x6, x13, x14, lt
csel x7, x14, xzr, lt
fmov d0, x6 // ctr0b is mask for last block
fmov v0.d[1], x7
and v5.16b, v5.16b, v0.16b // possibly partial last block has zeroes in highest bits
rev64 v4.16b, v5.16b // GHASH final block
eor v4.16b, v4.16b, v8.16b // feed in partial tag
bif v5.16b, v18.16b, v0.16b // insert existing bytes in top end of result before storing
pmull2 v20.1q, v4.2d, v12.2d // GHASH final block - high
mov d8, v4.d[1] // GHASH final block - mid
rev w9, w12
Expand Down Expand Up @@ -1498,27 +1483,7 @@ aes_gcm_dec_kernel:
eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid
eor x7, x7, x14 // AES final block - round N high
.Ldec_blocks_less_than_1: // blocks left <= 1
and x1, x1, #127 // bit_length %= 128
mvn x14, xzr // rkN_h = 0xffffffffffffffff
sub x1, x1, #128 // bit_length -= 128
mvn x13, xzr // rkN_l = 0xffffffffffffffff
ldp x4, x5, [x2] // load existing bytes we need to not overwrite
neg x1, x1 // bit_length = 128 - #bits in input (in range [1,128])
and x1, x1, #127 // bit_length %= 128
lsr x14, x14, x1 // rkN_h is mask for top 64b of last block
cmp x1, #64
csel x9, x13, x14, lt
csel x10, x14, xzr, lt
fmov d0, x9 // ctr0b is mask for last block
and x6, x6, x9
mov v0.d[1], x10
bic x4, x4, x9 // mask out low existing bytes
rev w9, w12
bic x5, x5, x10 // mask out high existing bytes
orr x6, x6, x4
and x7, x7, x10
orr x7, x7, x5
and v5.16b, v5.16b, v0.16b // possibly partial last block has zeroes in highest bits
rev64 v4.16b, v5.16b // GHASH final block
eor v4.16b, v4.16b, v8.16b // feed in partial tag
pmull v21.1q, v4.1d, v12.1d // GHASH final block - low
Expand Down
35 changes: 0 additions & 35 deletions generated-src/win-aarch64/crypto/fipsmodule/aesv8-gcm-armv8.S
Original file line number Diff line number Diff line change
Expand Up @@ -731,23 +731,8 @@ Lenc_blocks_more_than_1: // blocks left > 1
eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid
eor v11.16b, v11.16b, v21.16b // GHASH final-1 block - low
Lenc_blocks_less_than_1: // blocks left <= 1
and x1, x1, #127 // bit_length %= 128
mvn x13, xzr // rkN_l = 0xffffffffffffffff
sub x1, x1, #128 // bit_length -= 128
neg x1, x1 // bit_length = 128 - #bits in input (in range [1,128])
ld1 { v18.16b}, [x2] // load existing bytes where the possibly partial last block is to be stored
mvn x14, xzr // rkN_h = 0xffffffffffffffff
and x1, x1, #127 // bit_length %= 128
lsr x14, x14, x1 // rkN_h is mask for top 64b of last block
cmp x1, #64
csel x6, x13, x14, lt
csel x7, x14, xzr, lt
fmov d0, x6 // ctr0b is mask for last block
fmov v0.d[1], x7
and v5.16b, v5.16b, v0.16b // possibly partial last block has zeroes in highest bits
rev64 v4.16b, v5.16b // GHASH final block
eor v4.16b, v4.16b, v8.16b // feed in partial tag
bif v5.16b, v18.16b, v0.16b // insert existing bytes in top end of result before storing
pmull2 v20.1q, v4.2d, v12.2d // GHASH final block - high
mov d8, v4.d[1] // GHASH final block - mid
rev w9, w12
Expand Down Expand Up @@ -1502,27 +1487,7 @@ Ldec_blocks_more_than_1: // blocks left > 1
eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid
eor x7, x7, x14 // AES final block - round N high
Ldec_blocks_less_than_1: // blocks left <= 1
and x1, x1, #127 // bit_length %= 128
mvn x14, xzr // rkN_h = 0xffffffffffffffff
sub x1, x1, #128 // bit_length -= 128
mvn x13, xzr // rkN_l = 0xffffffffffffffff
ldp x4, x5, [x2] // load existing bytes we need to not overwrite
neg x1, x1 // bit_length = 128 - #bits in input (in range [1,128])
and x1, x1, #127 // bit_length %= 128
lsr x14, x14, x1 // rkN_h is mask for top 64b of last block
cmp x1, #64
csel x9, x13, x14, lt
csel x10, x14, xzr, lt
fmov d0, x9 // ctr0b is mask for last block
and x6, x6, x9
mov v0.d[1], x10
bic x4, x4, x9 // mask out low existing bytes
rev w9, w12
bic x5, x5, x10 // mask out high existing bytes
orr x6, x6, x4
and x7, x7, x10
orr x7, x7, x5
and v5.16b, v5.16b, v0.16b // possibly partial last block has zeroes in highest bits
rev64 v4.16b, v5.16b // GHASH final block
eor v4.16b, v4.16b, v8.16b // feed in partial tag
pmull v21.1q, v4.1d, v12.1d // GHASH final block - low
Expand Down
Loading