test: fix RoPETest.

fix: optimize cos_value. Signed-off-by: Lee <[email protected]>
UbiquitousLearning · Oct 19, 2023 · 65ace63 · 65ace63
1 parent e0404e7
commit 65ace63
Show file tree

Hide file tree

Showing 4 changed files with 22 additions and 15 deletions.
diff --git a/src/backends/cpu/CPURoPE.cpp b/src/backends/cpu/CPURoPE.cpp
@@ -12,12 +12,16 @@ void sinusoidal_position_embedding(int batch_size, int nums_head, int seq_len, i
     for (int n = 0; n < batch_size; ++n) {
         for (int h = 0; h < nums_head; ++h) {
             for (int s = 0; s < seq_len; ++s) {
-                for (int d = 0; d < output_dim; ++d) {
+                for (int d = 0; d < output_dim; d += 2) {
                     int i = (int)d / 2;
                     float sin_value = std::sin(s / std::pow(10000, 2.0 * i / output_dim));
                     float cos_value = std::cos(s / std::pow(10000, 2.0 * i / output_dim));
                     sin.setDataAt<float>(n, h, s, d, sin_value);
                     cos.setDataAt<float>(n, h, s, d, cos_value);
+                    if (d + 1 < output_dim) {
+                        sin.setDataAt<float>(n, h, s, d + 1, sin_value);
+                        cos.setDataAt<float>(n, h, s, d + 1, cos_value);
+                    }
                 }
             }
         }
@@ -31,7 +35,7 @@ void sinusoidal_position_embedding_hf(int batch_size, int nums_head, int seq_len
     for (int n = 0; n < batch_size; ++n) {
         for (int h = 0; h < nums_head; ++h) {
             for (int s = 0; s < seq_len; ++s) {
-                for (int d = 0; d < output_dim; ++d) {
+                for (int d = 0; d < output_dim; d += 2) {
                     int i = (int)d;
                     if (d >= (int)output_dim / 2) {
                         i = (int)(d - output_dim / 2);
@@ -40,6 +44,10 @@ void sinusoidal_position_embedding_hf(int batch_size, int nums_head, int seq_len
                     float cos_value = std::cos(s / std::pow(10000, 2.0 * i / output_dim));
                     sin.setDataAt<float>(n, h, s, d, sin_value);
                     cos.setDataAt<float>(n, h, s, d, cos_value);
+                    if (d + 1 < output_dim) {
+                        sin.setDataAt<float>(n, h, s, d + 1, sin_value);
+                        cos.setDataAt<float>(n, h, s, d + 1, cos_value);
+                    }
                 }
             }
         }

diff --git a/test/cpu/CPURoPETest.cpp b/test/cpu/CPURoPETest.cpp
@@ -16,5 +16,5 @@ TEST_F(CPUTest, CPURoPE1) {
     TEST_SETUP({input0}, {c_output});
     TEST_EXCUTE({input0}, {c_output});
     PRINT_TENSOR_SHAPES(input0, c_output, output);
-    COMPARE_TENSOR(output, c_output);
+    COMPARE_TENSOR(output, c_output, true);
 }
diff --git a/test/cpu/CPURoPETest.py b/test/cpu/CPURoPETest.py
@@ -21,9 +21,6 @@ def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0):
     Returns:
         torch.Tensor: Precomputed frequency tensor with complex exponentials.
 
-
-
-
     """
     freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim))
     t = torch.arange(end, device=freqs.device)  # type: ignore
@@ -80,15 +77,15 @@ def apply_rotary_emb(
         Tuple[torch.Tensor, torch.Tensor]: Tuple of modified query tensor and key tensor with rotary embeddings.
     """
     xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1], -1, 2))
-    xk_ = torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2))
+    # xk_ = torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2))
     print(xq_.shape)
     freqs_cis = reshape_for_broadcast(freqs_cis, xq_)
     print(freqs_cis.shape)
     xq_out = torch.view_as_real(xq_ * freqs_cis).flatten(3)
-    xk_out = torch.view_as_real(xk_ * freqs_cis).flatten(3)
-    print(xk_out.shape)
+    # xk_out = torch.view_as_real(xk_ * freqs_cis).flatten(3)
+    # print(xk_out.shape)
 
-    return xq_out.type_as(xq), xk_out.type_as(xk)
+    return xq_out.type_as(xq)
 
 
 class RoPE(torch.nn.Module):
@@ -98,10 +95,9 @@ def __init__(self, ):
     def forward(self, out):
         seq_len = out.shape[1]
         dim = out.shape[-1]
-        start_pos = 0
-        freqs_cis = precompute_freqs_cis(dim, seq_len * 2)
-        freqs_cis = freqs_cis[start_pos: start_pos + seq_len]
-        out, _ = apply_rotary_emb(out, out, freqs_cis)
+        freqs_cis = precompute_freqs_cis(dim, seq_len)
+        # freqs_cis = freqs_cis[start_pos: start_pos + seq_len]
+        out = apply_rotary_emb(out, out, freqs_cis)
         return out
 
 
@@ -111,6 +107,8 @@ def test(self):
         model = RoPE()
         output = model(input0)
         print(output.shape)
+        input0 = input0.transpose(1, 2)
+        output = output.transpose(1, 2)
         self.test_done(True)
 
 

diff --git a/test/cpu/CPUTest.hpp b/test/cpu/CPUTest.hpp
@@ -66,9 +66,10 @@ static bool isSame(Tensor *a, Tensor *b, bool unstrict = false) {
                     double a_ = a->dataAt<float>({i, j, k, l});
                     double b_ = b->dataAt<float>({i, j, k, l});
                     //                     if ((a_ < b_) || (a_ > b_)) {
-                    if (abs(a_ - b_) / std::max(a_, b_) > eps) {
+                    if ((abs(a_ - b_) / std::max(a_, b_)) > eps) {
                         std::cout << std::setprecision(8) << setiosflags(std::ios::fixed | std::ios::showpoint) << "a[" << i << "," << j << "," << k << "," << l << "]: " << (double)a->dataAt<float>(i, j, k, l) << "!= b[" << i << "," << j << "," << k << "," << l << "]: " << (double)b->dataAt<float>(i, j, k, l) << std::endl;
                         //                        return false;
+                        std::cout << std::setprecision(8) << setiosflags(std::ios::fixed | std::ios::showpoint) << "Diff:" << abs(a_ - b_) / std::max(a_, b_) << std::endl;
                         flag += 1;
                         if (flag > 10) {
                             return false;