From ace8c5c8a6f06aaca68ed6928533fc5bbf0567f1 Mon Sep 17 00:00:00 2001 From: Ali Shariat Date: Sat, 7 Mar 2020 22:33:50 -0800 Subject: [PATCH] reduce memory access in linear solve loop By defining this temp variable we use that fact that `i` is never equal to `Li[j]`. Compiler does not have this information. binary code for the loop changes from ``` movsx rdi, DWORD PTR [rdx+rax*4] vmovss xmm0, DWORD PTR [rcx+rax*4] add rax, 1 lea rdi, [r8+rdi*4] vmovss xmm1, DWORD PTR [rdi] vfnmadd132ss xmm0, xmm1, DWORD PTR [r9] vmovss DWORD PTR [rdi], xmm0 ``` to ``` movsx rdi, DWORD PTR [rdx+rax*4] vmovss xmm0, DWORD PTR [rcx+rax*4] add rax, 1 lea rdi, [r8+rdi*4] vfnmadd213ss xmm0, xmm1, DWORD PTR [rdi] vmovss DWORD PTR [rdi], xmm0 ``` notice the drop of the first `vmovss` by the compiler. --- src/qdldl.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/qdldl.c b/src/qdldl.c index 31fd93a..6fb17d4 100644 --- a/src/qdldl.c +++ b/src/qdldl.c @@ -239,11 +239,12 @@ void QDLDL_Lsolve(const QDLDL_int n, const QDLDL_float* Lx, QDLDL_float* x){ -QDLDL_int i,j; + QDLDL_int i,j; for(i = 0; i < n; i++){ - for(j = Lp[i]; j < Lp[i+1]; j++){ - x[Li[j]] -= Lx[j]*x[i]; - } + QDLDL_float val = x[i]; + for(j = Lp[i]; j < Lp[i+1]; j++){ + x[Li[j]] -= Lx[j]*val; + } } } @@ -254,11 +255,13 @@ void QDLDL_Ltsolve(const QDLDL_int n, const QDLDL_float* Lx, QDLDL_float* x){ -QDLDL_int i,j; + QDLDL_int i,j; for(i = n-1; i>=0; i--){ - for(j = Lp[i]; j < Lp[i+1]; j++){ - x[i] -= Lx[j]*x[Li[j]]; - } + QDLDL_float val = x[i]; + for(j = Lp[i]; j < Lp[i+1]; j++){ + val -= Lx[j]*x[Li[j]]; + } + x[i] = val; } } @@ -270,10 +273,9 @@ void QDLDL_solve(const QDLDL_int n, const QDLDL_float* Dinv, QDLDL_float* x){ -QDLDL_int i; - -QDLDL_Lsolve(n,Lp,Li,Lx,x); -for(i = 0; i < n; i++) x[i] *= Dinv[i]; -QDLDL_Ltsolve(n,Lp,Li,Lx,x); + QDLDL_int i; + QDLDL_Lsolve(n,Lp,Li,Lx,x); + for(i = 0; i < n; i++) x[i] *= Dinv[i]; + QDLDL_Ltsolve(n,Lp,Li,Lx,x); }