From 19311b4dda6d8557b0fdb2a3315bef99fdcca46c Mon Sep 17 00:00:00 2001
From: Amir Bawab <amirbawab@gmail.com>
Date: Sun, 7 Jul 2019 21:57:56 -0400
Subject: [PATCH] Fixed bug

---
 docs/demo/nnb_js.wasm                | Bin 2660961 -> 2660961 bytes
 src/nn-builder/src/snippet/matrix.cc |  28 +++++++++++++--------------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/docs/demo/nnb_js.wasm b/docs/demo/nnb_js.wasm
index a67e24e3f500b9bb63b92fbdf56d8eea6bfcbe89..3be0c55473a1f2b2ef2f27afba1f87df6f9a44ce 100644
GIT binary patch
delta 159
zcmWm1sS(0J0EW?7j&KvM<%T5O8jDL6TMpu%L2Lnd6!^@<VksknKKKw%ydry;0#_N8
zR8~b*1*)m8hMH=rt&X}1)l**s4K>Q_)0y}E<?O|L@~^Y;nFjvfTk0*wm|%(-=2&2f
W71oFlBf$n+?6AiH$2|3?DEtA1`8WLl

delta 159
zcmWm1w++HT0EST=!DLK2GT0{9v^2<5+&C1UL~#SqQ$cUZE3k$O_TZ;@#VfL>DddKs
zM41Xz0&3K0(4<A14qZZe^cgT@l-ZXv*T>u0i}~bVXXCRP_<!%Iw=$NAOl2l>S;$gW
WvX)3<Nn|5i*~wlGa?DeIiozcw#5Vf?

diff --git a/src/nn-builder/src/snippet/matrix.cc b/src/nn-builder/src/snippet/matrix.cc
index 31ff0b2..a4a7d9f 100644
--- a/src/nn-builder/src/snippet/matrix.cc
+++ b/src/nn-builder/src/snippet/matrix.cc
@@ -1217,15 +1217,15 @@ ExprList* MatrixSnippetSimd::MatrixAddRightSignScale(ds::NDArray *lhs, ds::NDArr
     auto lhs_addr = MakeBinary(Opcode::I32Add, MakeI32Const(lhs->Memory()->Begin()), MakeLocalGet(addr));
     auto rhs_addr = MakeBinary(Opcode::I32Add, MakeI32Const(rhs->Memory()->Begin()), MakeLocalGet(addr));
     // Compute right sign scale
-    // 1) [-1, 2, -3, 4]          >=        [0, 0, 0, 0]      = [0, -1, 0, -1]
-    // 2) [0, -1, 0, -1]          to-float                    = [0.0, -1.0. 0.0, -1.0]
-    // 3) [0.0, -1.0, 0.0, 1.0]   *         [2s, 2s, 2s, 2s]  = [0, -2s, 0, -2s]
-    // 4) [0, -2s, 0, -2s]        +         [s, s, s, s]      = [-s, s, -s, s]
+    // 1) [-1, 2, -3, 4]          >=        [0, 0, 0, 0]          = [0, -1, 0, -1]
+    // 2) [0, -1, 0, -1]          to-float                        = [0.0, -1.0. 0.0, -1.0]
+    // 3) [0.0, -1.0, 0.0, 1.0]   *         [-2s, -2s, -2s, -2s]  = [0, 2s, 0, 2s]
+    // 4) [0, 2s, 0, 2s]          -         [s, s, s, s]          = [-s, s, -s, s]
     auto rhs_ge = MakeBinary(Opcode::F32X4Ge, MakeV128Load(rhs_addr), MakeUnary(Opcode::F32X4Splat, MakeF32Const(0)));
     auto rhs_cnvt = MakeUnary(Opcode::F32X4ConvertI32X4S, rhs_ge);
-    auto rhs_mul = MakeBinary(Opcode::F32X4Mul, rhs_cnvt, MakeUnary(Opcode::F32X4Splat, MakeF32Const(2*scale)));
-    auto rhs_add = MakeBinary(Opcode::F32X4Add, rhs_mul, MakeUnary(Opcode::F32X4Splat, MakeF32Const(scale)));
-    b->Insert(MakeV128Store(MakeLocalGet(dst_addr), MakeBinary(Opcode::F32X4Add, MakeV128Load(lhs_addr), rhs_add)));
+    auto rhs_mul = MakeBinary(Opcode::F32X4Mul, rhs_cnvt, MakeUnary(Opcode::F32X4Splat, MakeF32Const(-2*scale)));
+    auto rhs_sub = MakeBinary(Opcode::F32X4Sub, rhs_mul, MakeUnary(Opcode::F32X4Splat, MakeF32Const(scale)));
+    b->Insert(MakeV128Store(MakeLocalGet(dst_addr), MakeBinary(Opcode::F32X4Add, MakeV128Load(lhs_addr), rhs_sub)));
     // Move to next elements
     b->Insert(GenerateCompoundAssignment(addr, Opcode::I32Add, MakeI32Const(simd_type_size)));
   }));
@@ -1280,16 +1280,16 @@ ExprList* MatrixSnippetSimd::MatrixAddRightSignScaleAddRightScale(nn::ds::NDArra
     // Cache rhs val
     b->Insert(MakeLocalSet(rhs_v128_cache, MakeV128Load(rhs_addr)));
     // Compute right sign scale
-    // 1) [-1, 2, -3, 4]          >=        [0, 0, 0, 0]      = [0, -1, 0, -1]
-    // 2) [0, -1, 0, -1]          to-float                    = [0.0, -1.0. 0.0, -1.0]
-    // 3) [0.0, -1.0, 0.0, 1.0]   *         [2s, 2s, 2s, 2s]  = [0, -2s, 0, -2s]
-    // 4) [0, -2s, 0, -2s]        +         [s, s, s, s]      = [-s, s, -s, s]
+    // 1) [-1, 2, -3, 4]          >=        [0, 0, 0, 0]          = [0, -1, 0, -1]
+    // 2) [0, -1, 0, -1]          to-float                        = [0.0, -1.0. 0.0, -1.0]
+    // 3) [0.0, -1.0, 0.0, 1.0]   *         [-2s, -2s, -2s, -2s]  = [0, 2s, 0, 2s]
+    // 4) [0, 2s, 0, 2s]          -         [s, s, s, s]          = [-s, s, -s, s]
     auto rhs_ge = MakeBinary(Opcode::F32X4Ge, MakeLocalGet(rhs_v128_cache), MakeUnary(Opcode::F32X4Splat, MakeF32Const(0)));
     auto rhs_cnvt = MakeUnary(Opcode::F32X4ConvertI32X4S, rhs_ge);
-    auto rhs_mul = MakeBinary(Opcode::F32X4Mul, rhs_cnvt, MakeUnary(Opcode::F32X4Splat, MakeF32Const(2*scale1)));
-    auto rhs_add = MakeBinary(Opcode::F32X4Add, rhs_mul, MakeUnary(Opcode::F32X4Splat, MakeF32Const(scale1)));
+    auto rhs_mul = MakeBinary(Opcode::F32X4Mul, rhs_cnvt, MakeUnary(Opcode::F32X4Splat, MakeF32Const(-2*scale1)));
+    auto rhs_sub = MakeBinary(Opcode::F32X4Sub, rhs_mul, MakeUnary(Opcode::F32X4Splat, MakeF32Const(scale1)));
     auto rhs_scale2 = MakeBinary(Opcode::F32X4Mul, MakeLocalGet(rhs_v128_cache), MakeUnary(Opcode::F32X4Splat, MakeF32Const(scale2)));
-    auto rhs_val = MakeBinary(Opcode::F32X4Add, rhs_add, rhs_scale2);
+    auto rhs_val = MakeBinary(Opcode::F32X4Add, rhs_sub, rhs_scale2);
     b->Insert(MakeV128Store(MakeLocalGet(dst_addr), MakeBinary(Opcode::F32X4Add, MakeV128Load(lhs_addr), rhs_val)));
     // Move to next elements
     b->Insert(GenerateCompoundAssignment(addr, Opcode::I32Add, MakeI32Const(simd_type_size)));