Skip to content

Commit

Permalink
Adjust mulmod{q} macro for fwd NTTs as well, add comments
Browse files Browse the repository at this point in the history
  • Loading branch information
dop-amin committed Apr 7, 2024
1 parent 8cb796e commit c7c10a9
Show file tree
Hide file tree
Showing 20 changed files with 74 additions and 140 deletions.
3 changes: 3 additions & 0 deletions examples/naive/aarch64/intt_dilithium_1234_5678.s
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,9 @@ layer1234_start:
str_vo data14, in, (14*(512/8))
str_vo data15, in, (15*(512/8))

// Scale half the coeffs by 1/n; for the other half, the scaling has
// been merged into the multiplication with the twiddle factor on the
// last layer.
mul_ninv data0, data1, data2, data3, data4, data5, data6, data7, data0, data1, data2, data3, data4, data5, data6, data7

canonical_reduce data0, modulus_half, neg_modulus_half, t2, t3
Expand Down
3 changes: 3 additions & 0 deletions examples/naive/aarch64/intt_dilithium_1234_5678_manual_ld4.s
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,9 @@ layer1234_start:
str_vo data14, in, (14*(512/8))
str_vo data15, in, (15*(512/8))

// Scale half the coeffs by 1/n; for the other half, the scaling has
// been merged into the multiplication with the twiddle factor on the
// last layer.
mul_ninv data0, data1, data2, data3, data4, data5, data6, data7, data0, data1, data2, data3, data4, data5, data6, data7

canonical_reduce data0, modulus_half, neg_modulus_half, t2, t3
Expand Down
3 changes: 3 additions & 0 deletions examples/naive/aarch64/intt_dilithium_123_45678.s
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,9 @@ layer123_start:
str_vo data6, in, (6*(1024/8))
str_vo data7, in, (7*(1024/8))

// Scale half the coeffs by 1/n; for the other half, the scaling has
// been merged into the multiplication with the twiddle factor on the
// last layer.
mul_ninv data0, data1, data2, data3, data0, data1, data2, data3

canonical_reduce data0, modulus_half, neg_modulus_half, t2, t3
Expand Down
3 changes: 3 additions & 0 deletions examples/naive/aarch64/intt_dilithium_123_45678_manual_ld4.s
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,9 @@ layer123_start:
str_vo data6, in, (6*(1024/8))
str_vo data7, in, (7*(1024/8))

// Scale half the coeffs by 1/n; for the other half, the scaling has
// been merged into the multiplication with the twiddle factor on the
// last layer.
mul_ninv data0, data1, data2, data3, data0, data1, data2, data3

canonical_reduce data0, modulus_half, neg_modulus_half, t2, t3
Expand Down
3 changes: 3 additions & 0 deletions examples/naive/aarch64/intt_kyber_123_4567.s
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,9 @@ layer123_start:
str_vo data6, in, (6*(512/8))
str_vo data7, in, (7*(512/8))

// Scale half the coeffs by 1/n; for the other half, the scaling has
// been merged into the multiplication with the twiddle factor on the
// last layer.
mul_ninv data0, data1, data2, data3, data0, data1, data2, data3

str_vi data0, in, (16)
Expand Down
3 changes: 3 additions & 0 deletions examples/naive/aarch64/intt_kyber_123_4567_manual_ld4.s
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,9 @@ layer123_start:
str_vo data6, in, (6*(512/8))
str_vo data7, in, (7*(512/8))

// Scale half the coeffs by 1/n; for the other half, the scaling has
// been merged into the multiplication with the twiddle factor on the
// last layer.
mul_ninv data0, data1, data2, data3, data0, data1, data2, data3

str_vi data0, in, (16)
Expand Down
14 changes: 4 additions & 10 deletions examples/naive/aarch64/ntt_dilithium_1234_5678.s
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@


.macro mulmodq dst, src, const, idx0, idx1
vqrdmulhq t2, \src, \const, \idx1
vmulq \dst, \src, \const, \idx0
vqrdmulhq \src, \src, \const, \idx1
vmla \dst, \src, modulus
vmla \dst, t2, modulus
.endm

.macro mulmod dst, src, const, const_twisted
vqrdmulh t2, \src, \const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vqrdmulh \src, \src, \const_twisted
vmla \dst, \src, modulus
vmla \dst, t2, modulus
.endm

.macro ct_butterfly a, b, root, idx0, idx1
Expand All @@ -85,12 +85,6 @@
add \a\().4s, \a\().4s, tmp.4s
.endm

.macro mulmod_v dst, src, const, const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vqrdmulh \src, \src, \const_twisted
vmla \dst, \src, modulus
.endm

.macro ct_butterfly_v a, b, root, root_twisted
mulmod tmp, \b, \root, \root_twisted
sub \b\().4s, \a\().4s, tmp.4s
Expand Down
14 changes: 4 additions & 10 deletions examples/naive/aarch64/ntt_dilithium_1234_5678_manual_st4.s
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,15 @@
.endm

.macro mulmodq dst, src, const, idx0, idx1
vqrdmulhq t2, \src, \const, \idx1
vmulq \dst, \src, \const, \idx0
vqrdmulhq \src, \src, \const, \idx1
vmla \dst, \src, modulus
vmla \dst, t2, modulus
.endm

.macro mulmod dst, src, const, const_twisted
vqrdmulh t2, \src, \const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vqrdmulh \src, \src, \const_twisted
vmlaq \dst, \src, modulus, 0
vmla \dst, t2, modulus
.endm

.macro ct_butterfly a, b, root, idx0, idx1
Expand All @@ -84,12 +84,6 @@
add \a\().4s, \a\().4s, tmp.4s
.endm

.macro mulmod_v dst, src, const, const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vqrdmulh \src, \src, \const_twisted
vmlaq \dst, \src, modulus, 0
.endm

.macro ct_butterfly_v a, b, root, root_twisted
mulmod tmp, \b, \root, \root_twisted
sub \b\().4s, \a\().4s, tmp.4s
Expand Down
14 changes: 4 additions & 10 deletions examples/naive/aarch64/ntt_dilithium_123_45678.s
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,15 @@ xtmp1 .req x11
.endm

.macro mulmodq dst, src, const, idx0, idx1
vqrdmulhq t2, \src, \const, \idx1
vmulq \dst, \src, \const, \idx0
vqrdmulhq \src, \src, \const, \idx1
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro mulmod dst, src, const, const_twisted
vqrdmulh t2, \src, \const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro ct_butterfly a, b, root, idx0, idx1
Expand All @@ -64,12 +64,6 @@ xtmp1 .req x11
add \a\().4s, \a\().4s, tmp.4s
.endm

.macro mulmod_v dst, src, const, const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
.endm

.macro ct_butterfly_v a, b, root, root_twisted
mulmod tmp, \b, \root, \root_twisted
sub \b\().4s, \a\().4s, tmp.4s
Expand Down
14 changes: 4 additions & 10 deletions examples/naive/aarch64/ntt_dilithium_123_45678_manual_st4.s
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,15 @@ xtmp1 .req x11
.endm

.macro mulmodq dst, src, const, idx0, idx1
vqrdmulhq t2, \src, \const, \idx1
vmulq \dst, \src, \const, \idx0
vqrdmulhq \src, \src, \const, \idx1
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro mulmod dst, src, const, const_twisted
vqrdmulh t2, \src, \const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro ct_butterfly a, b, root, idx0, idx1
Expand All @@ -64,12 +64,6 @@ xtmp1 .req x11
add \a\().4s, \a\().4s, tmp.4s
.endm

.macro mulmod_v dst, src, const, const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
.endm

.macro ct_butterfly_v a, b, root, root_twisted
mulmod tmp, \b, \root, \root_twisted
sub \b\().4s, \a\().4s, tmp.4s
Expand Down
14 changes: 4 additions & 10 deletions examples/naive/aarch64/ntt_dilithium_123_45678_red.s
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@
.endm

.macro mulmodq dst, src, const, idx0, idx1
vqrdmulhq t2, \src, \const, \idx1
vmulq \dst, \src, \const, \idx0
vqrdmulhq \src, \src, \const, \idx1
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro mulmod dst, src, const, const_twisted
vqrdmulh t2, \src, \const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro ct_butterfly a, b, root, idx0, idx1
Expand All @@ -65,12 +65,6 @@
add \a\().4s, \a\().4s, tmp.4s
.endm

.macro mulmod_v dst, src, const, const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
.endm

.macro ct_butterfly_v a, b, root, root_twisted
mulmod tmp, \b, \root, \root_twisted
sub \b\().4s, \a\().4s, tmp.4s
Expand Down
14 changes: 4 additions & 10 deletions examples/naive/aarch64/ntt_dilithium_123_45678_w_scalar.s
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,15 @@ xtmp1 .req x11
.endm

.macro mulmodq dst, src, const, idx0, idx1
vqrdmulhq t2, \src, \const, \idx1
vmulq \dst, \src, \const, \idx0
vqrdmulhq \src, \src, \const, \idx1
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro mulmod dst, src, const, const_twisted
vqrdmulh t2, \src, \const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro ct_butterfly a, b, root, idx0, idx1
Expand All @@ -74,12 +74,6 @@ xtmp1 .req x11
add \a\().4s, \a\().4s, tmp.4s
.endm

.macro mulmod_v dst, src, const, const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
.endm

.macro ct_butterfly_v a, b, root, root_twisted
mulmod tmp, \b, \root, \root_twisted
sub \b\().4s, \a\().4s, tmp.4s
Expand Down
14 changes: 4 additions & 10 deletions examples/naive/aarch64/ntt_dilithium_123_45678_w_scalar_red.s
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,15 @@ xtmp1 .req x11
.endm

.macro mulmodq dst, src, const, idx0, idx1
vqrdmulhq t2, \src, \const, \idx1
vmulq \dst, \src, \const, \idx0
vqrdmulhq \src, \src, \const, \idx1
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro mulmod dst, src, const, const_twisted
vqrdmulh t2, \src, \const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro ct_butterfly a, b, root, idx0, idx1
Expand All @@ -70,12 +70,6 @@ xtmp1 .req x11
add \a\().4s, \a\().4s, tmp.4s
.endm

.macro mulmod_v dst, src, const, const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
.endm

.macro ct_butterfly_v a, b, root, root_twisted
mulmod tmp, \b, \root, \root_twisted
sub \b\().4s, \a\().4s, tmp.4s
Expand Down
14 changes: 4 additions & 10 deletions examples/naive/aarch64/ntt_kyber_1234_567.s
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,15 @@
.endm

.macro mulmodq dst, src, const, idx0, idx1
vqrdmulhq t2, \src, \const, \idx1
vmulq \dst, \src, \const, \idx0
vqrdmulhq \src, \src, \const, \idx1
vmlaq \dst, \src, consts, 0
vmlaq \dst, t2, consts, 0
.endm

.macro mulmod dst, src, const, const_twisted
vqrdmulh t2, \src, \const_twisted
mul \dst\().8h, \src\().8h, \const\().8h
vqrdmulh \src, \src, \const_twisted
vmlaq \dst, \src, consts, 0
vmlaq \dst, t2, consts, 0
.endm

.macro ct_butterfly a, b, root, idx0, idx1
Expand All @@ -90,12 +90,6 @@
add \a\().8h, \a\().8h, tmp.8h
.endm

.macro mulmod_v dst, src, const, const_twisted
vmul \dst, \src, \const
vqrdmulh \src, \src, \const_twisted
vmla \dst, \src, consts
.endm

.macro ct_butterfly_v a, b, root, root_twisted
mulmod tmp, \b, \root, \root_twisted
sub \b\().8h, \a\().8h, tmp.8h
Expand Down
14 changes: 4 additions & 10 deletions examples/naive/aarch64/ntt_kyber_1234_567_manual_st4.s
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,15 @@
.endm

.macro mulmodq dst, src, const, idx0, idx1
vqrdmulhq t2, \src, \const, \idx1
vmulq \dst, \src, \const, \idx0
vqrdmulhq \src, \src, \const, \idx1
vmlaq \dst, \src, consts, 0
vmlaq \dst, t2, consts, 0
.endm

.macro mulmod dst, src, const, const_twisted
vqrdmulh t2, \src, \const_twisted
mul \dst\().8h, \src\().8h, \const\().8h
vqrdmulh \src, \src, \const_twisted
vmlaq \dst, \src, consts, 0
vmlaq \dst, t2, consts, 0
.endm

.macro ct_butterfly a, b, root, idx0, idx1
Expand All @@ -91,12 +91,6 @@
add \a\().8h, \a\().8h, tmp.8h
.endm

.macro mulmod_v dst, src, const, const_twisted
vmul \dst, \src, \const
vqrdmulh \src, \src, \const_twisted
vmla \dst, \src, consts
.endm

.macro ct_butterfly_v a, b, root, root_twisted
mulmod tmp, \b, \root, \root_twisted
sub \b\().8h, \a\().8h, tmp.8h
Expand Down
14 changes: 4 additions & 10 deletions examples/naive/aarch64/ntt_kyber_123_4567.s
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,15 @@
.endm

.macro mulmodq dst, src, const, idx0, idx1
vqrdmulhq t2, \src, \const, \idx1
vmulq \dst, \src, \const, \idx0
vqrdmulhq \src, \src, \const, \idx1
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro mulmod dst, src, const, const_twisted
vqrdmulh t2, \src, \const_twisted
mul \dst\().8h, \src\().8h, \const\().8h
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro ct_butterfly a, b, root, idx0, idx1
Expand All @@ -84,12 +84,6 @@
add \a\().8h, \a\().8h, tmp.8h
.endm

.macro mulmod_v dst, src, const, const_twisted
mul \dst\().8h, \src\().8h, \const\().8h
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
.endm

.macro ct_butterfly_v a, b, root, root_twisted
mulmod tmp, \b, \root, \root_twisted
sub \b\().8h, \a\().8h, tmp.8h
Expand Down
Loading

0 comments on commit c7c10a9

Please sign in to comment.