-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathESD_calculate_stats_at_burst_overlap_level.py
362 lines (291 loc) · 15.2 KB
/
ESD_calculate_stats_at_burst_overlap_level.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 4 11:37:41 2024
@author: Sofia Viotto
"""
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import os,glob, time, logging
import xarray as xr
import argparse
import matplotlib.pyplot as plt
from argparse import RawTextHelpFormatter
logging.basicConfig(
format='%(asctime)s %(levelname)-8s %(message)s',
level=logging.INFO,
datefmt='%Y-%m-%d %H:%M:%S')
#---------------------------
EXAMPLE = """example:
python ESD_calculate_stats_at_burst_overlap_level.py \
--inDir /Home/ISCE_processing/ESD
python ESD_calculate_stats_at_burst_overlap_level.py \
--inDir /Home/ISCE_processing/ESD --subwath IW1
python ESD_calculate_stats_at_burst_overlap_level.py \
--inDir /Home/ISCE_processing/ESD --subwath IW1 IW2
"""
DESCRIPTION = """
Calculates statistics from Enhanced Spectral Diversity (ESD) files used to proper align the sences (stackSentinel.py tool from ISCE).
The script processes ESD files to extract median values, standard deviations, and coherence points, at burst overlaps levels.
Oct-2024, Sofia Viotto ([email protected])
"""
#-------------------------
parser = argparse.ArgumentParser(
description=DESCRIPTION, epilog=EXAMPLE, formatter_class=RawTextHelpFormatter
)
parser.add_argument('--inDir', '-i',dest='inDir',help='Full path to the ESD folder')
parser.add_argument('--subswath', '-s',dest='subswath',
help='Optional. Define a sub-swath to calculate statistics',
nargs='+',default=None)
args = parser.parse_args()
#------------------------------------------------#
def check_input_directories(inps):
skip=False
# Check if the main input directory exists
if os.path.exists(inps['inDir']) is False:
skip=True
#logging.info('Input Directory does not exist \n')
logging.info('Input Directory does not exis.')
return inps,skip
# Create ESD_azimuth_offsets directory if not found
elif os.path.exists(os.path.join(inps['inDir'],'ESD_azimuth_offsets')) is False:
os.mkdir(os.path.join(inps['inDir'],'ESD_azimuth_offsets'))
# Check if the ESD directory exists
if os.path.exists(inps['ESD_dir']) is False:
skip=True
logging.info('ESD directory not found')
return inps,skip
else:
# Identify sub-swaths based on the ESD folder
ESD_pairs_folders=sorted(glob.glob(os.path.join(inps['ESD_dir'],'2*')))
if inps['subswath']==None:
ESD_pairs_subswath_folders=sorted(glob.glob(os.path.join(inps['ESD_dir'],'2*','IW*')))
subswath_list=[os.path.basename(i) for i in ESD_pairs_subswath_folders]
subswath_unique=np.unique(subswath_list)
else:
subswath_unique=inps['subswath']
for iw in subswath_unique:
ESD_offset_filename=sorted(glob.glob(os.path.join(inps['ESD_dir'],'2*',iw,'combined.off.vrt')))
if len(ESD_pairs_folders)!=len(ESD_offset_filename):
skip=True
logging.info('Skipping. Number of pairs in the ESD folder differs from the number of combined.off.vrt files.')
return inps,skip
inps['subswath']=list(subswath_unique)
logging.info('Sub-swath found {}'.format(subswath_unique))
return inps,skip
def MAD(x):
med = np.median(x)
x = abs(x-med)
MAD = np.median(x)
return MAD
def IQR(x):
return np.nanpercentile(x,75)-np.nanpercentile(x,25)
def plot_distribution_per_burst_overlap(df_stats_medians,df_coh_points,subswath,inps):
boxprops = dict(facecolor='lightblue', color='black', linewidth=0.75)
medianprops = dict(color='red', linewidth=1)
whiskerprops = dict(color='black', linewidth=0.75)
capprops = dict(color='black', linewidth=0.75)
import matplotlib.ticker as mticker
median_max=np.nanmax(df_stats_medians.iloc[:, :-9])
fig,axs=plt.subplots(nrows=2,figsize=(8,15/2.54))
# First boxplot (for medians)
axs[0].boxplot(df_stats_medians.iloc[:, :-9].values, patch_artist=True, boxprops=boxprops,
medianprops=medianprops, whiskerprops=whiskerprops, capprops=capprops)
axs[0].set_ylabel('Median Offset Azimuth [px]')
axs[0].set_ylim(0,np.round(median_max,2))
axs[0].set_title('Medians')
# Second boxplot (for coherent points)
axs[1].boxplot(df_coh_points.iloc[:, :-4].values, patch_artist=True, boxprops=boxprops,
medianprops=medianprops, whiskerprops=whiskerprops, capprops=capprops)
axs[1].set_ylabel('#Points Coh >0.85')
axs[1].set_title('Coherent Points')
# Rotate x-axis labels
for ax in axs:
ax.set_xlabel('Burst Overlapping Area')
ax.tick_params(axis='x', labelrotation=90)
axs[1].yaxis.set_major_formatter(mticker.ScalarFormatter(useMathText=True))
axs[1].ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
# Set the main title and format the figure
fig.suptitle('Statistics on Each Burst Overlap (Sub-swath {})'.format(subswath), fontsize=12)
plt.tight_layout(rect=[0, 0, 1, 0.95])
fig.savefig(os.path.join(inps['inDir'],'ESD_azimuth_offsets/','boxplot_stats_at_burst_overlapping_area_{}.png'.format(subswath)),dpi=300)
def report_pairs(df_stats_medians,inps):
#mads=df_stats_medians.groupby('RefDate')['MAD_px'].median()
threshold=0.0009#np.nanpercentile(mads, 99)
pairs=df_stats_medians[df_stats_medians['MAD_px']>=threshold].index.to_list().copy()
out_report=os.path.join(inps['inDir'],'exclude_pairs_ESD.txt')
with open(out_report,'w') as fl:
fl.write('Pairs with Median Absolute Deviation MAD larger than {}\n'.format(threshold))
fl.write("\n".join(pairs))
def plot_histograms_of_global_variables(df_stats_medians,df_coh_points,subswath,inps):
fig,axs=plt.subplots(nrows=2,figsize=(8,15/2.54))
# First boxplot (for medians)
values=df_stats_medians['MAD_px'].values.flatten()
n_bins=20
axs[0].hist(values,bins=n_bins)
axs[0].set_ylabel('Frequency (log-scale)')
axs[0].set_yscale('log')
axs[0].set_xlabel('MAD per pair [px]')
axs[0].set_title('Median Absolute Deviation of Burst Overlap')
p75=np.nanpercentile(values,75)
axs[0].axvline(p75,c='orange',lw=1,label='75th %ile')
p90=np.nanpercentile(values,90)
axs[0].axvline(p90,c='red',lw=1,label='90th %ile')
#axs[0].text(p90, axs[0].get_ylim()[1]/3, '90th %ile', color='red', ha='center', va='center',rotation=90)
p95=np.nanpercentile(values,95)
axs[0].axvline(p95,c='red',lw=1,ls='--',label='95th %ile')
#axs[0].text(p95, axs[0].get_ylim()[1]/3, '95th %ile', color='red', ha='center', va='center',rotation=90)
accuracy_threshold=0.0009
axs[0].axvline(accuracy_threshold,c='k',lw=1,label='Accuracy Thresh.')
axs[0].legend()
#axs[0].text(accuracy_threshold, axs[0].get_ylim()[1]/3, 'Accuracy Thresh.', color='red', ha='center', va='center',rotation=90)
# Second boxplot (for coherent points)
values=df_coh_points['TotalCohPts'].values
axs[1].hist(values,bins=n_bins)
axs[1].set_ylabel('Frequency (log-scale)')
axs[0].set_yscale('log')
axs[1].set_xlabel('Total of Coherent Points per pair')
axs[1].set_title('Coherent Points')
# Set the main title and format the figure
fig.suptitle('Statistics on Each Burst Overlap (Sub-swath {})'.format(subswath), fontsize=12)
plt.tight_layout(rect=[0, 0, 1, 0.95])
fig.savefig(os.path.join(inps['inDir'],'ESD_azimuth_offsets/','histograms_stats_at_burst_overlapping_area_{}.png'.format(subswath)),dpi=300)
def calculate_median_ESD_per_burst(combined_fname):
"""
Calculates median ESD statistics for each burst overlap.
Parameters:
combined_fname (str): Filename of the combined offset file.
Returns:
Tuple containing medians, standard deviations, coherent points, and the number of burst overlaps.
"""
# Load the combined offset, coherence, and interferogram files
ESD_off=xr.open_dataarray(combined_fname)
ESD_cor=xr.open_dataarray(combined_fname.replace('.off.vrt','.cor.vrt'))
ESD_int=xr.open_dataarray(combined_fname.replace('.off.vrt','.int.vrt'))
# Step 1: Mask combined offsets with coherence and interferogram thresholds
ESD_off=ESD_off.where(ESD_cor>0.3)
ESD_off=ESD_off.where(np.angle(ESD_int)>0)
ESD_off=ESD_off.squeeze()
# Retrieve burst overlap coordinates
max_per_coordinates=ESD_off.max(dim='x')
#Keep the coordinates were maximum values are different from zero
coordinates=max_per_coordinates[(max_per_coordinates.notnull())].y.values
#Group coordinates to find the y-coordinate ranges along that separates every
#burst overlap
coordinates_split=np.split(coordinates, np.where(np.diff(coordinates) >1)[0] + 1)
number_brst_ovlp=len(coordinates_split)
# Step 2: Filter pixels with coherence >= 0.85 (ESD threshold)
ESD_off=ESD_off.where(ESD_cor>0.849)
# Calculate median, std, and number of coherent points per burst overlap
medians=[]
std=[]
coh_points=[]
for group in coordinates_split:
medians.append(np.nanmedian(ESD_off.sel(y=group).data))
std.append(np.nanstd(ESD_off.sel(y=group).data))
coh_points.append(np.count_nonzero(~np.isnan(ESD_off.sel(y=group).data)))
return medians,std,coh_points,number_brst_ovlp
def calculate_stats_by_subwath(inps,subswath):
"""
Calculates and saves ESD statistics for each sub-swath.
Parameters:
inps (dict): Input directory paths.
subswath (str): Sub-swath identifier.
"""
# Find files in the ESD directory
ESD_offset_filename=sorted(glob.glob(os.path.join(inps['ESD_dir'],'2*',subswath,'combined.off.vrt')))
n_pairs=len(ESD_offset_filename)
medians,std,coh_points,n_brst_ovlp=[],[],[],[]
for fname in ESD_offset_filename:
median_brst_ovlp,std_brst_ovlp,coh_point_brst_ovlp,number_brst_ovlp=calculate_median_ESD_per_burst(combined_fname=fname)
medians.extend(median_brst_ovlp)
std.extend(std_brst_ovlp)
coh_points.extend(coh_point_brst_ovlp)
n_brst_ovlp.append(number_brst_ovlp)
#Check number of burst overlapping areas
if len(set(n_brst_ovlp)) == 1:
logging.info(f'Number of burst overlapping areas found: {n_brst_ovlp[0]} ({subswath})')
else:
logging.info('Error found during calculations.')
# Reshape lists
std=np.asarray(std).reshape(n_pairs,n_brst_ovlp[0])
medians=np.asarray(medians).reshape(n_pairs,n_brst_ovlp[0])
coh_points=np.asarray(coh_points).reshape(n_pairs,n_brst_ovlp[0])
pairs=[os.path.basename(fname.split('/'+subswath)[0]) for fname in ESD_offset_filename]
#Coordinates are always read from the first burst overlapping area to the last one
burst_overlap=['BstOvlp' + str(i) for i in range(1,n_brst_ovlp[0]+1)]
#--------------------------------------#
#Prepare dataframe and save them
#Dataframe of median azimuth offset per burst overlapping areas
df_stats_medians=pd.DataFrame(medians,columns=burst_overlap,index=pairs)
df_stats_medians=df_stats_medians.add_prefix('MedianAzOff_')
df_stats_medians=df_stats_medians.add_suffix('_px')
#Dataframe
df_stats_std=pd.DataFrame(std,columns=burst_overlap,index=pairs)
df_stats_std=df_stats_std.add_prefix('StdAzOff_')
df_stats_std=df_stats_std.add_suffix('_px')
df_coh_points=pd.DataFrame(coh_points,columns=burst_overlap,index=pairs)
df_coh_points=df_coh_points.add_prefix('CohPts_')
#Transpose to calculate MADs per pair
df_stats_medians_T=df_stats_medians.T.copy()
iqrs=[]
mads=[]
for i in df_stats_medians_T.columns.tolist():
mads.append(MAD(df_stats_medians_T[i].values))
iqrs.append(IQR(df_stats_medians_T[i].values))
#Add MADs,IQRs, Range of medians across burst overlapping areas to dataframe
df_stats_medians['MAD_px']=mads
df_stats_medians['IQR_px']=iqrs
#Add other parameters
df_stats_medians['RefDate']= [pd.to_datetime(i.split('_')[0],format='%Y%m%d') for i in df_stats_medians.index.tolist()]
df_stats_medians['RefDate_month']= df_stats_medians['RefDate'].dt.month
df_stats_medians['SecDate']= [pd.to_datetime(i.split('_')[1],format='%Y%m%d') for i in df_stats_medians.index.tolist()]
df_stats_medians['SecDate_month']= df_stats_medians['SecDate'].dt.month
df_stats_medians['RefDate_year']= df_stats_medians['RefDate'].dt.year
df_stats_medians['SecDate_year']= df_stats_medians['SecDate'].dt.year
df_stats_medians['Bt_days']=(df_stats_medians['SecDate']-df_stats_medians['RefDate']).dt.days
#----------------------------------------------#
#Prepare the dataframe of coherent points per burst overlap
df_coh_points['TotalCohPts']=df_coh_points.sum(axis=1)
df_coh_points['RefDate']=df_stats_medians['RefDate'].copy()
df_coh_points['SecDate']=df_stats_medians['SecDate'].copy()
df_coh_points['Bt_days']=df_stats_medians['Bt_days'].copy()
#--------------------------------------------------#
#Save dataframes
logging.info('Saving dataframes')
df_stats_medians.to_csv(os.path.join(inps['inDir'],'ESD_azimuth_offsets/ESD_azimuth_offset_medians_pairs_{}.csv'.format(subswath)),
float_format='%.15f')
df_stats_std.to_csv(os.path.join(inps['inDir'],'ESD_azimuth_offsets/ESD_azimuth_offset_std_pairs_{}.csv'.format(subswath)),
float_format='%.15f')
df_coh_points.to_csv(os.path.join(inps['inDir'],'ESD_azimuth_offsets/ESD_azimuth_offset_coh_points_pairs_{}.csv'.format(subswath)),
float_format='%.15f')
#--------------------------------------------------#
#Save summaries from dataframes
df_stats_medians_describe=df_stats_medians.iloc[:,:-7].describe()
df_stats_medians_describe.to_csv(os.path.join(inps['inDir'],'ESD_azimuth_offsets/summary_ESD_azimuth_offset_medians_pairs_{}.csv'.format(subswath)),
float_format='%.15f')
df_coh_points_describe=df_coh_points.iloc[:,:-7].describe()
df_stats_medians_describe.to_csv(os.path.join(inps['inDir'],'ESD_azimuth_offsets/summary_ESD_azimuth_offset_coh_points_pairs_{}.csv'.format(subswath)),
float_format='%.15f')
#-----------------------------------------------------#
#Plot
logging.info('Plotting figures')
plot_distribution_per_burst_overlap(df_stats_medians,df_coh_points,subswath,inps)
plot_histograms_of_global_variables(df_stats_medians,df_coh_points,subswath,inps)
#----------------------------------------------------#
#Report
logging.info('Reporting pairs with large MAD of Azimuth Offset')
report_pairs(df_stats_medians,inps)
def run():
inps={'inDir':os.path.dirname(os.path.abspath(args.inDir)),
'ESD_dir':os.path.abspath(args.inDir),
'subswath':args.subswath}
logging.info('Checking input parameters')
inps,skip=check_input_directories(inps)
if skip==False:
logging.info('Retrieving burst overlap statistics at the sub-swath level')
for subswath in inps['subswath']:
calculate_stats_by_subwath(inps,subswath)
run()