forked from btswan87/isosceles
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial upload of multiprocessing ISOSCELES
- Loading branch information
Showing
1 changed file
with
257 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,257 @@ | ||
from __future__ import print_function | ||
import gdal | ||
import os | ||
import numpy as np | ||
from time import time | ||
import osr | ||
import argparse | ||
import shutil | ||
from scipy import ndimage as ndi | ||
from skimage.filters import gabor_kernel | ||
import multiprocessing as mp | ||
from sklearn.cluster import AffinityPropagation | ||
from sklearn import preprocessing | ||
import pandas as pd | ||
import string | ||
|
||
y_block_size = 500 | ||
x_block_size = 500 | ||
n_bands = 4 | ||
null_pix_value = 0 | ||
|
||
# Prepare filter bank kernels | ||
kernels = [] | ||
for theta in range(4): | ||
theta = theta / 4. * np.pi | ||
for sigma in (1, 3): | ||
for frequency in (0.05, 0.25): | ||
kernel = np.real(gabor_kernel(frequency, theta=theta, | ||
sigma_x=sigma, sigma_y=sigma)) | ||
kernels.append(kernel) | ||
n_filters = len(kernels) | ||
|
||
|
||
def bgr2grey(bgr): | ||
grey = (bgr[0, :, :] * 0.1140 + bgr[1, :, :] * 0.5870 + bgr[2, :, :] * 0.2989) | ||
return grey | ||
|
||
|
||
def compute_filter_feats(arr, bank): | ||
feats = np.zeros((2 * n_filters), dtype=np.double) | ||
for k, item in enumerate(bank): | ||
filtered = ndi.convolve(arr, item, mode='wrap') | ||
feats[k] = filtered.mean() | ||
feats[k+n_filters] = filtered.var() | ||
return feats | ||
|
||
|
||
def get_feats(io_arg): | ||
arr, chip_idx = io_arg | ||
arr_ds = gdal.Open(arr) | ||
chip_feats = np.zeros((1, 42)) | ||
|
||
chip_feats[0][0] = chip_idx[1] | ||
chip_feats[0][1] = chip_idx[0] | ||
|
||
chip = arr_ds.ReadAsArray(chip_idx[1], chip_idx[0], 500, 500) | ||
|
||
for b in range(0, chip.shape[0]): | ||
chip_feats[0][b + 2] = chip[b, :, :].mean() | ||
chip_feats[0][b + 2 + chip.shape[0]] = chip[b, :, :].std() | ||
|
||
chip_feats[0][10:] = compute_filter_feats(bgr2grey(chip), kernels) | ||
|
||
return chip_feats | ||
|
||
|
||
def get_chips(im_ras): | ||
samples = [] | ||
arr = gdal.Open(im_ras) | ||
y_size = arr.RasterYSize | ||
x_size = arr.RasterXSize | ||
for y in range(0, y_size, y_block_size): | ||
if y + y_block_size < y_size: | ||
rows = y_block_size | ||
else: | ||
continue | ||
for x in range(0, x_size, x_block_size): | ||
if x + x_block_size < x_size: | ||
cols = x_block_size | ||
block = arr.ReadAsArray(x, y, cols, rows) | ||
if null_pix_value not in block: | ||
samples.append((y, x)) | ||
|
||
return samples | ||
|
||
|
||
def mp_feats(arr, chips, processes): | ||
chips = zip([arr] * len(chips), [i for i in chips]) | ||
num_proc = processes | ||
pool = mp.Pool(processes=num_proc) | ||
mpresult = [] | ||
mpresult.extend(pool.map(get_feats, chips)) | ||
pool.close() | ||
pool.join() | ||
|
||
return mpresult | ||
|
||
|
||
if __name__ == "__main__": | ||
|
||
parser = argparse.ArgumentParser(description='Ready') | ||
parser.add_argument('--wd', dest='wd', required=True, | ||
help='Working directory to store temp raster') | ||
parser.add_argument('--list', dest='filelist', required=True, | ||
help='Path to text file listing images to process') | ||
parser.add_argument('--dst', dest='dst_folder', required=True, | ||
help='Top level folder for output') | ||
parser.add_argument('--fn', dest='out_fn', required=True, | ||
help='Prefix for output statistics files') | ||
parser.add_argument('--n_proc', dest='n_proc', required=True, type=int, | ||
help='Number of worker processes to use') | ||
parser.add_argument('--pref', dest='pref', required=False, type=float, | ||
help='AP preference value to be used instead /' | ||
'of default') | ||
|
||
args = parser.parse_args() | ||
wd = args.wd | ||
filelist = args.filelist | ||
dst_folder = args.dst_folder | ||
out_fn = args.out_fn | ||
n_proc = args.n_proc | ||
pref = args.pref | ||
|
||
if pref: | ||
print("Preference set to {}".format(pref)) | ||
else: | ||
print("Default preference will be used") | ||
|
||
# Create output folder if it doesn't already exist | ||
if not os.path.exists(dst_folder): | ||
os.makedirs(dst_folder) | ||
|
||
scene_list = [line.rstrip('\n\r', ) for line in open(filelist)] | ||
|
||
for i in range(0, len(scene_list)): | ||
scene = scene_list[i] | ||
start_scene = time() | ||
im_basename = os.path.splitext(os.path.basename(scene))[0] | ||
im_folder = "ex_" + im_basename | ||
out_path = os.path.join(dst_folder, im_folder) | ||
if not os.path.exists(out_path): | ||
os.mkdir(out_path) | ||
os.chdir(out_path) | ||
|
||
temp_dst = os.path.join(wd, os.path.basename(scene)) | ||
shutil.copy(scene, temp_dst) | ||
src_ds = gdal.Open(temp_dst) | ||
|
||
# Get raster parameters from sat image | ||
x_cell_size = src_ds.GetGeoTransform()[1] | ||
y_cell_size = src_ds.GetGeoTransform()[5] | ||
skew = 0 | ||
x_min = src_ds.GetGeoTransform()[0] | ||
y_max = src_ds.GetGeoTransform()[3] | ||
wkt = src_ds.GetProjection() | ||
srs = osr.SpatialReference() | ||
srs.ImportFromWkt(wkt) | ||
|
||
chips = get_chips(scene) | ||
|
||
start_blocks = time() | ||
print("Beginning scene {} of {}".format((i + 1), (len(scene_list)))) | ||
|
||
result = mp_feats(temp_dst, chips, n_proc) | ||
|
||
minutes = ((time() - start_blocks) / 60) | ||
print("Scene scan finished in {} minutes".format(minutes)) | ||
|
||
# Create numpy array to hold samples | ||
result_array = np.zeros((len(result), 42)) | ||
|
||
for n in range(0, len(result)): | ||
result_array[n][:] = result[n] | ||
|
||
df = pd.DataFrame(data=result_array, columns=['x_origin', 'y_origin', 'blue_mean', 'green_mean', | ||
'red_mean', 'nir_mean', 'blue_std', 'green_std', | ||
'red_std', 'nir_std', 'f1_mean', 'f2_mean', 'f3_mean', | ||
'f4_mean', 'f5_mean', 'f6_mean', 'f7_mean', 'f8_mean', | ||
'f9_mean', 'f10_mean', 'f11_mean', 'f12_mean', 'f13_mean', | ||
'f14_mean', 'f5_mean', 'f16_mean', 'f1_std', 'f2_std', | ||
'f3_std', 'f4_std', 'f5_std', 'f6_std', 'f7_std', | ||
'f8_std', 'f9_std', 'f10_std', 'f11_std', 'f12_std', | ||
'f13_std', 'f14_std', 'f15_std', 'f16_std']) | ||
|
||
df.to_csv("{}_ex_{}.csv".format(out_fn, im_basename), index=False, | ||
header=True) | ||
|
||
samples = result_array[:, 2:] | ||
|
||
sample_idxs = result_array[:, :2] | ||
|
||
# Initialize array to hold scaled samples | ||
scaled_samples = np.zeros(samples.shape) | ||
|
||
# Scale each feature type separately | ||
scaled_samples[:, 0:4] = preprocessing.scale(samples[:, 0:4]) | ||
scaled_samples[:, 4:8] = preprocessing.scale(samples[:, 4:8]) | ||
scaled_samples[:, 8:24] = preprocessing.scale(samples[:, 8:24]) | ||
scaled_samples[:, 24:] = preprocessing.scale(samples[:, 24:]) | ||
|
||
if pref: | ||
ap = AffinityPropagation(max_iter=10000, convergence_iter=100, | ||
preference=pref, | ||
affinity="euclidean").fit(scaled_samples) | ||
else: | ||
ap = AffinityPropagation(max_iter=10000, convergence_iter=100, | ||
affinity="euclidean").fit(scaled_samples) | ||
|
||
cluster_center_indices = ap.cluster_centers_indices_ | ||
|
||
chip_count = 1 | ||
|
||
for tile in cluster_center_indices: | ||
subset = sample_idxs[tile] | ||
path = os.path.join(out_path, im_basename + "_" + str(chip_count).zfill(4) + '.tif') | ||
|
||
# Get all raster bands for subset | ||
band = src_ds.GetRasterBand(1) | ||
red = band.ReadAsArray(int(subset[0]), int(subset[1]), x_block_size, y_block_size) | ||
band = src_ds.GetRasterBand(2) | ||
green = band.ReadAsArray(int(subset[0]), int(subset[1]), win_xsize=500, win_ysize=500) | ||
band = src_ds.GetRasterBand(3) | ||
blue = band.ReadAsArray(int(subset[0]), int(subset[1]), win_xsize=500, win_ysize=500) | ||
band = src_ds.GetRasterBand(4) | ||
nir = band.ReadAsArray(int(subset[0]), int(subset[1]), win_xsize=500, win_ysize=500) | ||
|
||
# Create geotransform information for destination raster | ||
x_origin = x_min + (subset[0] * x_cell_size) | ||
y_origin = y_max + (subset[1] * y_cell_size) | ||
new_transform = (x_origin, x_cell_size, skew, y_origin, skew, y_cell_size) | ||
|
||
# Create destination dataset | ||
driver = gdal.GetDriverByName('GTiff') | ||
dst_ds = driver.Create(path, x_block_size, | ||
y_block_size, 4, | ||
gdal.GDT_UInt16) | ||
|
||
# Write subsetted bands to destination dataset | ||
dst_ds.GetRasterBand(1).WriteArray(red) | ||
dst_ds.GetRasterBand(2).WriteArray(green) | ||
dst_ds.GetRasterBand(3).WriteArray(blue) | ||
dst_ds.GetRasterBand(4).WriteArray(nir) | ||
|
||
# Set geotransform and projection info | ||
dst_ds.SetGeoTransform(new_transform) | ||
dst_ds.SetProjection(srs.ExportToWkt()) | ||
|
||
# Close output | ||
dst_ds = None | ||
chip_count += 1 | ||
|
||
src_ds = None | ||
os.remove(temp_dst) | ||
|
||
minutes = ((time() - start_scene) / 60) | ||
print("Scene finished in {} minutes".format(minutes)) | ||
|