capstone-coal · TinyBugBigProblem · Dec 2, 2019
diff --git a/pycoal/mineral.py b/pycoal/mineral.py
@@ -24,6 +24,7 @@
 
 import pycoal
 from pycoal.resampler import create_resampling_matrix
+from pycoal import parallel
 import spectral
 import time
 import fnmatch
@@ -86,9 +87,95 @@ def calculate_pixel_confidence_value(pixel, angles_m, resampling_matrix):
 calling function but are optionals and may vary from one classifier to another.
 """
 
-# to be merged
-def SAM_pytorch():
-    pass
+def SAM_pytorch(image_file_name, classified_file_name, library_file_name,
+                scores_file_name=None, class_names=None, threshold=0.0,
+                in_memory=False, subset_rows=None, subset_cols=None):
+
+    # load and optionally subset the spectral library
+    library = spectral.open_image(library_file_name)
+    if class_names is not None:
+        library = pycoal.mineral.MineralClassification.subset_spectral_library(
+            library, class_names)
+
+    # open the image
+    image = spectral.open_image(image_file_name)
+    if subset_rows is not None and subset_cols is not None:
+        data = SubImage(image, subset_rows, subset_cols)
+        m = subset_rows[1] 
+        n = subset_cols[1]      
+    else:
+        if in_memory:
+            data = image.load()
+        else:
+            data = image.asarray()
+        m = image.shape[0]
+        n = image.shape[1]
+
+    logging.info("Classifying a %iX%i image" % (m, n))
+
+    # define a resampler
+    # TODO detect and scale units
+    # TODO band resampler should do this
+    resampling_matrix = create_resampling_matrix(
+        [x / 1000 for x in image.bands.centers], library.bands.centers)
+
+    # turn the data object into a tensor
+    data = numpy.array(data)
+    # float 64
+    data = data.astype(numpy.dtype('f8'))
+    data = torch.from_numpy(data)
+    # allocate a zero-initialized MxN array for the classified image
+    classified = torch.zeros(shape=(m, n), dtype=numpy.uint16)
+    scored = torch.zeros(shape=(m, n), dtype=numpy.float64)
+
+    # universal calculations for angles
+    # Adapted from Spectral library
+    ang_m = numpy.array(library.spectra, dtype=numpy.float64)
+    ang_m /= numpy.sqrt(numpy.einsum('ij,ij->i', ang_m, ang_m))[:, numpy.newaxis]
+
+    # Create data loader class that will return range of data to model
+    # dataSet = ImageDataSet(m, x, data, classified, scored)
+
+    classified, scored = parallel.data_parallel(resampling_matrix, data, classified, scored)
+
+    classified = classified + 1
+
+    # Turns variables back to numpy
+    classified = classified.numpy()
+    scored = scored.numpy()
+
+    indices = numpy.where(scored[:][:] <= threshold)
+
+    classified[indices] = 0
+    scored[indices] = 0
+
+    # save the classified image to a file
+    spectral.io.envi.save_classification(classified_file_name, classified,
+                                         class_names=['No data'] +
+                                         library.names,
+                                         metadata={'data ignore value': 0,
+                                                   'description': 'COAL ' +
+                                                   pycoal.version + ' '
+                                                   'mineral classified '
+                                                   'image.',
+                                                   'map info':
+                                                       image.metadata.get(
+                                                        'map info')})
+
+    # remove unused classes from the image
+    pycoal.mineral.MineralClassification.filter_classes(classified_file_name)
+
+    if scores_file_name is not None:
+        # save the scored image to a file
+        spectral.io.envi.save_image(scores_file_name, scored,
+                                    dtype=numpy.float64,
+                                    metadata={'data ignore value': -50,
+                                              'description': 'COAL ' +
+                                              pycoal.version + ' mineral '
+                                              'scored image.',
+                                              'map info': image.metadata.get(
+                                                  'map info')})
+
 
 
 def SAM_joblib(image_file_name, classified_file_name, library_file_name,

diff --git a/pycoal/parallel.py b/pycoal/parallel.py
@@ -0,0 +1,134 @@
+'''
+    Module Dependencies
+        torch:
+        torch.nn
+        Dataset
+        Dataloader
+'''
+import torch
+import torch.nn as nn
+from torch.autograd import Function
+
+import pycoal
+from torch.utils.data.dataset import Dataset
+import numpy
+#from torch.utils.data import Dataloader
+
+import spectral
+import time
+import fnmatch
+import shutil
+import math
+import sys
+import os
+
+'''
+    Custom function that will perform the operations on given pixel
+''' 
+class LinearFunction(Function):
+    '''
+        ctx is a context object that is typically used to save data that will be needed for the backward
+        pass, but in this application it is used to store needed data like:
+            1. Given data
+            2. Given resampling_matrix
+    '''
+    @staticmethod
+    def forward(x, y, data, resampling_matrix, classified, scored):
+
+        # read the pixel from the file
+        pixel = data[x, y]
+
+        # if it is not a no data pixel
+        if not numpy.isclose(pixel[0], -0.005) and not pixel[0] == -50:
+
+            # resample the pixel ignoring NaNs from target bands that
+            # don't overlap
+
+            # TODO fix spectral library so that bands are in order
+            resample_data = numpy.einsum('ij,j->i', resampling_matrix, pixel)
+            resample_data = numpy.nan_to_num(resample_data)
+
+            # calculate spectral angles
+            # Adapted from Spectral library
+            norms = numpy.sqrt(numpy.einsum('i,i->', resample_data, resample_data))
+            dots = numpy.einsum('i,ji->j', resample_data, ang_m)
+            dots = numpy.clip(dots / norms, -1, 1)
+            angles = numpy.arccos(dots)
+
+            # normalize confidence values from [pi,0] to [0,1]
+            angles = 1 - angles / math.pi
+
+            # get index of class with largest confidence value
+            classified[x, y] = numpy.argmax(angles)
+
+            # get confidence value of the classified pixel
+            scored[x, y] = angles[classified[x, y]]
+
+        return classified[x,y], scored[x,y]
+
+    # This is typically for the gradient formula. Since we don't need a gradient for this project, we will be returning None
+    @staticmethod
+    def backward():
+        return None
+
+
+'''
+
+'''
+def data_parallel(resampling_matrix, data, classified, scored):
+    # Get the number of available GPUs on device
+    deviceNum = torch.cuda.device_count()
+    # Create model
+    model = ImageParallelModel()
+
+    # Check if more than one GPU is available
+    if deviceNum > 1:
+        model = nn.DataParallel(mode, device_ids=range(0,deviceNum))
+
+    return model(resmapling_matrix, data, classified, scored)
+
+'''
+    Data Model
+        Purpose: To utilize torch's parallelization modules for processing images
+        Input: input size (input_size), output size (output_size)
+        Output:   
+'''
+class ImageParallelModel(nn.Module):
+    '''
+        This is where all used models are instantiated
+    '''
+    def __init__(self):
+        super(ImageParallelModel, self).__init__()
+        self.func = LinearFunction()
+
+    def forward(self, resampling_matrix, data, classified, scored):
+        for x in range(0, data.get_shape()[0].value):
+            for y in range(0, data.get_shape()[1].value):
+                classified[x,y], scored[x,y] = self.func(x,y,data,resampling_matrix, classified, scored)       
+
+        return classified, scored
+
+
+'''
+    Dataset class
+'''
+class ImageDataSet(Dataset):
+    def __init__(self, x, y, image, classified, scored):
+        self.x = x
+        self.y = y
+        self.image = image
+        self.classified = classified
+        self.scored = scored
+        self.resample = resample
+
+        return
+    def __len__(self):
+        return self.x
+    # Returns tensor column 
+    def __get__(self, x,y):
+        data = torch.narrow(self.image, 0, x, y - x)
+        classified = torch.narrow(self.classified, 0, x, y - x)
+        scored = torch.narrow(self.scored, 0, x, y - x)
+
+        return (data, classified, scored)
+