Add GPU option and speed up even without GPU

yumorishita · Mar 5, 2021 · 647ff2f · 647ff2f
1 parent fed3bed
commit 647ff2f
Show file tree

Hide file tree

Showing 3 changed files with 141 additions and 55 deletions.
diff --git a/LiCSBAS_lib/LiCSBAS_inv_lib.py b/LiCSBAS_lib/LiCSBAS_inv_lib.py
@@ -8,6 +8,8 @@
 =========
 Changelog
 =========
+v1.5 20210305 Yu Morishita, GSI
+ - Add GPU option into invert_nsbas
 v1.4.2 20201118 Yu Morishita, GSI
  - Again Bug fix of multiprocessing
 v1.4.1 20201116 Yu Morishita, GSI
@@ -80,7 +82,7 @@ def make_sb_matrix2(ifgdates):
 
 
 #%%
-def invert_nsbas(unw, G, dt_cum, gamma, n_core):
+def invert_nsbas(unw, G, dt_cum, gamma, n_core, gpu):
     """
     Calculate increment displacement difference by NSBAS inversion. Points with all unw data are solved by simple SB inversion firstly at a time.
 
@@ -91,6 +93,7 @@ def invert_nsbas(unw, G, dt_cum, gamma, n_core):
       dt_cum : Cumulative years(or days) for each image (n_im)
       gamma  : Gamma value for NSBAS inversion, should be small enough (e.g., 0.0001)
       n_core : Number of cores for parallel processing
+      gpu  : GPU flag
 
     Returns:
       inc     : Incremental displacement (n_im-1, n_pt)
@@ -120,20 +123,30 @@ def invert_nsbas(unw, G, dt_cum, gamma, n_core):
 
     if n_pt_full!=0:
         print('  Solving {0:6}/{1:6}th points with full unw at a time...'.format(n_pt_full, n_pt), flush=True)
-    
+
         ## Sovle
         unw_tmp = np.concatenate((unw[bool_pt_full, :], np.zeros((n_pt_full, n_im), dtype=np.float32)), axis=1).transpose()
-        result[:, bool_pt_full] = np.linalg.lstsq(Gall, unw_tmp, rcond=None)[0]
 
+        if gpu:
+            print('  Using GPU')
+            import cupy as cp
+            unw_tmp_cp = cp.asarray(unw_tmp)
+            Gall_cp = cp.asarray(Gall)
+            _sol = cp.linalg.lstsq(Gall_cp, unw_tmp_cp, rcond=None)[0]
+            result[:, bool_pt_full] = cp.asnumpy(_sol)
+            del unw_tmp_cp, Gall_cp, _sol
+        else:
+            result[:, bool_pt_full] = np.linalg.lstsq(Gall, unw_tmp, rcond=None)[0]
 
     ### Solve other points with nan point by point.
+    ## Not use GPU because lstsq with small matrix is slower than CPU
     unw_tmp = np.concatenate((unw[~bool_pt_full, :], np.zeros((n_pt-n_pt_full, n_im), dtype=np.float32)), axis=1).transpose()
     mask = (~np.isnan(unw_tmp))
     unw_tmp[np.isnan(unw_tmp)] = 0
     print('  Next, solve {0} points including nan point-by-point...'.format(n_pt-n_pt_full), flush=True)
-    
+
     if n_core == 1:
-        result[:, ~bool_pt_full] = censored_lstsq_slow(Gall, unw_tmp, mask) #(n_im+1, n_pt) 
+        result[:, ~bool_pt_full] = censored_lstsq_slow(Gall, unw_tmp, mask) #(n_im+1, n_pt)
     else:
         print('  {} parallel processing'.format(n_core), flush=True)
 
@@ -206,7 +219,7 @@ def invert_nsbas_wls(unw, var, G, dt_cum, gamma, n_core):
 
     if n_core == 1:
         for i in range(n_pt):
-            result[:, i] = wls_nsbas(i) #(n_im+1, n_pt) 
+            result[:, i] = wls_nsbas(i) #(n_im+1, n_pt)
     else:
         print('  {} parallel processing'.format(n_core), flush=True)
 
@@ -227,7 +240,7 @@ def wls_nsbas(i):
     ### Use global value of Gall, unw_tmp, mask
     if np.mod(i, 1000) == 0:
         print('  Running {0:6}/{1:6}th point...'.format(i, unw_tmp.shape[1]), flush=True)
-        
+
     ## Weight unw and G
 
     Gall_w = Gall/np.sqrt(np.float64(var_tmp[:,i][:,np.newaxis]))
@@ -256,7 +269,7 @@ def calc_vel(cum, dt_cum):
     """
     n_pt, n_im = cum.shape
     result = np.zeros((2, n_pt), dtype=np.float32)*np.nan #[vconst, vel]
-   
+
     G = np.stack((np.ones_like(dt_cum), dt_cum), axis=1)
     vconst = np.zeros((n_pt), dtype=np.float32)*np.nan
     vel = np.zeros((n_pt), dtype=np.float32)*np.nan
@@ -266,7 +279,7 @@ def calc_vel(cum, dt_cum):
 
     if n_pt_full!=0:
         print('  Solving {0:6}/{1:6}th points with full cum at a time...'.format(n_pt_full, n_pt), flush=True)
-    
+
         ## Sovle
         result[:, bool_pt_full] = np.linalg.lstsq(G, cum[bool_pt_full, :].transpose(), rcond=None)[0]
 
@@ -275,9 +288,9 @@ def calc_vel(cum, dt_cum):
     mask = (~np.isnan(cum_tmp))
     cum_tmp[np.isnan(cum_tmp)] = 0
     print('  Next, solve {0} points including nan point-by-point...'.format(n_pt-n_pt_full), flush=True)
-    
-    result[:, ~bool_pt_full] = censored_lstsq_slow(G, cum_tmp, mask) #(n_im+1, n_pt) 
-        
+
+    result[:, ~bool_pt_full] = censored_lstsq_slow(G, cum_tmp, mask) #(n_im+1, n_pt)
+
     vconst = result[0, :]
     vel = result[1, :]
 
@@ -301,11 +314,11 @@ def calc_velsin(cum, dt_cum, imd0):
       dt     : Time difference of sin function wrt Jan 1 (day)
     """
     doy0 = (dt.datetime.strptime(imd0, '%Y%m%d')-dt.datetime.strptime(imd0[0:4]+'0101', '%Y%m%d')).days
-    
+
     n_pt, n_im = cum.shape
     result = np.zeros((4, n_pt), dtype=np.float32)*np.nan #[vconst, vel, coef_s, coef_c]
-   
-    
+
+
     sin = np.sin(2*np.pi*dt_cum)
     cos = np.cos(2*np.pi*dt_cum)
     G = np.stack((np.ones_like(dt_cum), dt_cum, sin, cos), axis=1)
@@ -320,7 +333,7 @@ def calc_velsin(cum, dt_cum, imd0):
 
     if n_pt_full!=0:
         print('  Solving {0:6}/{1:6}th points with full cum at a time...'.format(n_pt_full, n_pt), flush=True)
-    
+
         ## Sovle
         result[:, bool_pt_full] = np.linalg.lstsq(G, cum[bool_pt_full, :].transpose(), rcond=None)[0]
 
@@ -329,9 +342,9 @@ def calc_velsin(cum, dt_cum, imd0):
     mask = (~np.isnan(cum_tmp))
     cum_tmp[np.isnan(cum_tmp)] = 0
     print('  Next, solve {0} points including nan point-by-point...'.format(n_pt-n_pt_full), flush=True)
-    
-    result[:, ~bool_pt_full] = censored_lstsq_slow(G, cum_tmp, mask) #(n_im+1, n_pt) 
-        
+
+    result[:, ~bool_pt_full] = censored_lstsq_slow(G, cum_tmp, mask) #(n_im+1, n_pt)
+
     vconst = result[0, :]
     vel = result[1, :]
     coef_s = result[2, :]
@@ -362,21 +375,21 @@ def calc_velstd_withnan(cum, dt_cum):
     global bootcount, bootnum
     n_pt, n_im = cum.shape
     bootnum = 100
-    bootcount = 0 
+    bootcount = 0
 
     vstd = np.zeros((n_pt), dtype=np.float32)
     G = np.stack((np.ones_like(dt_cum), dt_cum), axis=1)
-    
+
     data = cum.transpose().copy()
     ixs_day = np.arange(n_im)
     mask = (~np.isnan(data))
     data[np.isnan(data)] = 0
-            
+
     velinv = lambda x : censored_lstsq2(G[x, :], data[x, :], mask[x, :])[1]
 
     with NumpyRNGContext(1):
         bootresult = bootstrap(ixs_day, bootnum, bootfunc=velinv)
-        
+
     vstd = np.nanstd(bootresult, axis=0)
 
     print('')
@@ -401,10 +414,10 @@ def censored_lstsq2(A, B, M):
         X = np.squeeze(np.linalg.solve(T, rhs)).T # transpose to get r x n
     except: ## In case Singular matrix
         X = np.zeros((B.shape[1]), dtype=np.float32)*np.nan
-    
+
     return X
 
-    
+
 #%%
 def calc_stc(cum):
     """
@@ -427,21 +440,21 @@ def calc_stc(cum):
     _stc = np.ones((length, width, 8), dtype=np.float32)*np.nan
     pixels = [[0, 0], [0, 1], [0, 2], [1, 0], [1, 2], [2, 0], [2, 1], [2, 2]]
     ## Left Top = [0, 0], Rigth Bottmon = [2, 2], Center = [1, 1]
-    
+
     for i, pixel in enumerate(pixels):
         ### Spatial difference (surrounding pixel-center)
         d_cum = cum1[:, pixel[0]:length+pixel[0], pixel[1]:width+pixel[1]] - cum1[:, 1:length+1, 1:width+1]
-        
+
         ### Temporal difference (double difference)
         dd_cum = d_cum[:-1,:,:]-d_cum[1:,:,:]
-        
+
         ### STC (i.e., RMS of DD)
         sumsq_dd_cum = np.nansum(dd_cum**2, axis=0)
         n_dd_cum = np.float32(np.sum(~np.isnan(dd_cum), axis=0)) #nof non-nan
         n_dd_cum[n_dd_cum==0] = np.nan #to avoid 0 division
         _stc[:, :, i] = np.sqrt(sumsq_dd_cum/n_dd_cum)
 
-    ### Strange but some adjacent pixels can have identical time series, 
+    ### Strange but some adjacent pixels can have identical time series,
     ### resulting in 0 of stc. To avoid this, replace 0 with nan.
     _stc[_stc==0] = np.nan
 
@@ -516,6 +529,6 @@ def censored_lstsq_slow(A, B, M):
             X[:,i] = np.linalg.lstsq(A[m], B[m,i], rcond=None)[0]
         except:
             X[:,i] = np.nan
-    
+
     print('')
     return X
diff --git a/batch_LiCSBAS.sh b/batch_LiCSBAS.sh
@@ -41,6 +41,7 @@ p05_clip_range_geo=""	# e.g. 130.11/131.12/34.34/34.6 (in deg)
 p11_unw_thre=""	# default: 0.3
 p11_coh_thre=""	# default: 0.05
 p12_loop_thre=""	# default: 1.5 rad
+p13_gpu="n"	# y/n. default: n
 p15_coh_thre=""	# default: 0.05
 p15_n_unw_r_thre=""	# default: 1.5
 p15_vstd_thre=""	# default: 100 mm/yr
@@ -285,6 +286,7 @@ if [ $start_step -le 13 -a $end_step -ge 13 ];then
   if [ ! -z $p13_n_para ];then p13_op="$p13_op --n_para $p13_n_para"; fi
   if [ ! -z $p13_n_unw_r_thre ];then p13_op="$p13_op --n_unw_r_thre $p13_n_unw_r_thre"; fi
   if [ $p13_keep_incfile == "y" ];then p13_op="$p13_op --keep_incfile"; fi
+  if [ $p13_gpu == "y" ];then p13_op="$p13_op --gpu"; fi
 
   if [ $check_only == "y" ];then
     echo "LiCSBAS13_sb_inv.py $p13_op"