From d27a995bb8964fa24edd8f9c6a43bb1895c285cc Mon Sep 17 00:00:00 2001 From: Amin Abdulrahman Date: Sun, 7 Apr 2024 15:33:59 +0200 Subject: [PATCH] Add comment describing ninv implementation trick --- examples/naive/aarch64/intt_dilithium_1234_5678.s | 3 +++ examples/naive/aarch64/intt_dilithium_1234_5678_manual_ld4.s | 3 +++ examples/naive/aarch64/intt_dilithium_123_45678.s | 3 +++ examples/naive/aarch64/intt_dilithium_123_45678_manual_ld4.s | 3 +++ examples/naive/aarch64/intt_kyber_123_4567.s | 3 +++ examples/naive/aarch64/intt_kyber_123_4567_manual_ld4.s | 3 +++ 6 files changed, 18 insertions(+) diff --git a/examples/naive/aarch64/intt_dilithium_1234_5678.s b/examples/naive/aarch64/intt_dilithium_1234_5678.s index cdb7d40..6c6404c 100644 --- a/examples/naive/aarch64/intt_dilithium_1234_5678.s +++ b/examples/naive/aarch64/intt_dilithium_1234_5678.s @@ -480,6 +480,9 @@ layer1234_start: str_vo data14, in, (14*(512/8)) str_vo data15, in, (15*(512/8)) + // Scale half the coeffs by 1/n; for the other half, the scaling has + // been merged into the multiplication with the twiddle factor on the + // last layer. mul_ninv data0, data1, data2, data3, data4, data5, data6, data7, data0, data1, data2, data3, data4, data5, data6, data7 canonical_reduce data0, modulus_half, neg_modulus_half, t2, t3 diff --git a/examples/naive/aarch64/intt_dilithium_1234_5678_manual_ld4.s b/examples/naive/aarch64/intt_dilithium_1234_5678_manual_ld4.s index 3e13654..3f0c3cb 100644 --- a/examples/naive/aarch64/intt_dilithium_1234_5678_manual_ld4.s +++ b/examples/naive/aarch64/intt_dilithium_1234_5678_manual_ld4.s @@ -478,6 +478,9 @@ layer1234_start: str_vo data14, in, (14*(512/8)) str_vo data15, in, (15*(512/8)) + // Scale half the coeffs by 1/n; for the other half, the scaling has + // been merged into the multiplication with the twiddle factor on the + // last layer. mul_ninv data0, data1, data2, data3, data4, data5, data6, data7, data0, data1, data2, data3, data4, data5, data6, data7 canonical_reduce data0, modulus_half, neg_modulus_half, t2, t3 diff --git a/examples/naive/aarch64/intt_dilithium_123_45678.s b/examples/naive/aarch64/intt_dilithium_123_45678.s index 4527536..08593ce 100644 --- a/examples/naive/aarch64/intt_dilithium_123_45678.s +++ b/examples/naive/aarch64/intt_dilithium_123_45678.s @@ -514,6 +514,9 @@ layer123_start: str_vo data6, in, (6*(1024/8)) str_vo data7, in, (7*(1024/8)) + // Scale half the coeffs by 1/n; for the other half, the scaling has + // been merged into the multiplication with the twiddle factor on the + // last layer. mul_ninv data0, data1, data2, data3, data0, data1, data2, data3 canonical_reduce data0, modulus_half, neg_modulus_half, t2, t3 diff --git a/examples/naive/aarch64/intt_dilithium_123_45678_manual_ld4.s b/examples/naive/aarch64/intt_dilithium_123_45678_manual_ld4.s index 2a62f67..32c8dc8 100644 --- a/examples/naive/aarch64/intt_dilithium_123_45678_manual_ld4.s +++ b/examples/naive/aarch64/intt_dilithium_123_45678_manual_ld4.s @@ -523,6 +523,9 @@ layer123_start: str_vo data6, in, (6*(1024/8)) str_vo data7, in, (7*(1024/8)) + // Scale half the coeffs by 1/n; for the other half, the scaling has + // been merged into the multiplication with the twiddle factor on the + // last layer. mul_ninv data0, data1, data2, data3, data0, data1, data2, data3 canonical_reduce data0, modulus_half, neg_modulus_half, t2, t3 diff --git a/examples/naive/aarch64/intt_kyber_123_4567.s b/examples/naive/aarch64/intt_kyber_123_4567.s index ee06171..1ff1599 100644 --- a/examples/naive/aarch64/intt_kyber_123_4567.s +++ b/examples/naive/aarch64/intt_kyber_123_4567.s @@ -437,6 +437,9 @@ layer123_start: str_vo data6, in, (6*(512/8)) str_vo data7, in, (7*(512/8)) + // Scale half the coeffs by 1/n; for the other half, the scaling has + // been merged into the multiplication with the twiddle factor on the + // last layer. mul_ninv data0, data1, data2, data3, data0, data1, data2, data3 str_vi data0, in, (16) diff --git a/examples/naive/aarch64/intt_kyber_123_4567_manual_ld4.s b/examples/naive/aarch64/intt_kyber_123_4567_manual_ld4.s index cdd00ff..688740d 100644 --- a/examples/naive/aarch64/intt_kyber_123_4567_manual_ld4.s +++ b/examples/naive/aarch64/intt_kyber_123_4567_manual_ld4.s @@ -432,6 +432,9 @@ layer123_start: str_vo data6, in, (6*(512/8)) str_vo data7, in, (7*(512/8)) + // Scale half the coeffs by 1/n; for the other half, the scaling has + // been merged into the multiplication with the twiddle factor on the + // last layer. mul_ninv data0, data1, data2, data3, data0, data1, data2, data3 str_vi data0, in, (16)