Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Investigate running Pytorch on GPU #211

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 90 additions & 3 deletions pycoal/mineral.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

import pycoal
from pycoal.resampler import create_resampling_matrix
from pycoal import parallel
import spectral
import time
import fnmatch
Expand Down Expand Up @@ -86,9 +87,95 @@ def calculate_pixel_confidence_value(pixel, angles_m, resampling_matrix):
calling function but are optionals and may vary from one classifier to another.
"""

# to be merged
def SAM_pytorch():
pass
def SAM_pytorch(image_file_name, classified_file_name, library_file_name,
scores_file_name=None, class_names=None, threshold=0.0,
in_memory=False, subset_rows=None, subset_cols=None):

# load and optionally subset the spectral library
library = spectral.open_image(library_file_name)
if class_names is not None:
library = pycoal.mineral.MineralClassification.subset_spectral_library(
library, class_names)

# open the image
image = spectral.open_image(image_file_name)
if subset_rows is not None and subset_cols is not None:
data = SubImage(image, subset_rows, subset_cols)
m = subset_rows[1]
n = subset_cols[1]
else:
if in_memory:
data = image.load()
else:
data = image.asarray()
m = image.shape[0]
n = image.shape[1]

logging.info("Classifying a %iX%i image" % (m, n))

# define a resampler
# TODO detect and scale units
# TODO band resampler should do this
resampling_matrix = create_resampling_matrix(
[x / 1000 for x in image.bands.centers], library.bands.centers)

# turn the data object into a tensor
data = numpy.array(data)
# float 64
data = data.astype(numpy.dtype('f8'))
data = torch.from_numpy(data)
# allocate a zero-initialized MxN array for the classified image
classified = torch.zeros(shape=(m, n), dtype=numpy.uint16)
scored = torch.zeros(shape=(m, n), dtype=numpy.float64)

# universal calculations for angles
# Adapted from Spectral library
ang_m = numpy.array(library.spectra, dtype=numpy.float64)
ang_m /= numpy.sqrt(numpy.einsum('ij,ij->i', ang_m, ang_m))[:, numpy.newaxis]

# Create data loader class that will return range of data to model
# dataSet = ImageDataSet(m, x, data, classified, scored)

classified, scored = parallel.data_parallel(resampling_matrix, data, classified, scored)

classified = classified + 1

# Turns variables back to numpy
classified = classified.numpy()
scored = scored.numpy()

indices = numpy.where(scored[:][:] <= threshold)

classified[indices] = 0
scored[indices] = 0

# save the classified image to a file
spectral.io.envi.save_classification(classified_file_name, classified,
class_names=['No data'] +
library.names,
metadata={'data ignore value': 0,
'description': 'COAL ' +
pycoal.version + ' '
'mineral classified '
'image.',
'map info':
image.metadata.get(
'map info')})

# remove unused classes from the image
pycoal.mineral.MineralClassification.filter_classes(classified_file_name)

if scores_file_name is not None:
# save the scored image to a file
spectral.io.envi.save_image(scores_file_name, scored,
dtype=numpy.float64,
metadata={'data ignore value': -50,
'description': 'COAL ' +
pycoal.version + ' mineral '
'scored image.',
'map info': image.metadata.get(
'map info')})



def SAM_joblib(image_file_name, classified_file_name, library_file_name,
Expand Down
134 changes: 134 additions & 0 deletions pycoal/parallel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
'''
Module Dependencies
torch:
torch.nn
Dataset
Dataloader
'''
import torch
import torch.nn as nn
from torch.autograd import Function

import pycoal
from torch.utils.data.dataset import Dataset
import numpy
#from torch.utils.data import Dataloader

import spectral
import time
import fnmatch
import shutil
import math
import sys
import os

'''
Custom function that will perform the operations on given pixel
'''
class LinearFunction(Function):
'''
ctx is a context object that is typically used to save data that will be needed for the backward
pass, but in this application it is used to store needed data like:
1. Given data
2. Given resampling_matrix
'''
@staticmethod
def forward(x, y, data, resampling_matrix, classified, scored):

# read the pixel from the file
pixel = data[x, y]

# if it is not a no data pixel
if not numpy.isclose(pixel[0], -0.005) and not pixel[0] == -50:

# resample the pixel ignoring NaNs from target bands that
# don't overlap

# TODO fix spectral library so that bands are in order
resample_data = numpy.einsum('ij,j->i', resampling_matrix, pixel)
resample_data = numpy.nan_to_num(resample_data)

# calculate spectral angles
# Adapted from Spectral library
norms = numpy.sqrt(numpy.einsum('i,i->', resample_data, resample_data))
dots = numpy.einsum('i,ji->j', resample_data, ang_m)
dots = numpy.clip(dots / norms, -1, 1)
angles = numpy.arccos(dots)

# normalize confidence values from [pi,0] to [0,1]
angles = 1 - angles / math.pi

# get index of class with largest confidence value
classified[x, y] = numpy.argmax(angles)

# get confidence value of the classified pixel
scored[x, y] = angles[classified[x, y]]

return classified[x,y], scored[x,y]

# This is typically for the gradient formula. Since we don't need a gradient for this project, we will be returning None
@staticmethod
def backward():
return None


'''

'''
def data_parallel(resampling_matrix, data, classified, scored):
# Get the number of available GPUs on device
deviceNum = torch.cuda.device_count()
# Create model
model = ImageParallelModel()

# Check if more than one GPU is available
if deviceNum > 1:
model = nn.DataParallel(mode, device_ids=range(0,deviceNum))

return model(resmapling_matrix, data, classified, scored)

'''
Data Model
Purpose: To utilize torch's parallelization modules for processing images
Input: input size (input_size), output size (output_size)
Output:
'''
class ImageParallelModel(nn.Module):
'''
This is where all used models are instantiated
'''
def __init__(self):
super(ImageParallelModel, self).__init__()
self.func = LinearFunction()

def forward(self, resampling_matrix, data, classified, scored):
for x in range(0, data.get_shape()[0].value):
for y in range(0, data.get_shape()[1].value):
classified[x,y], scored[x,y] = self.func(x,y,data,resampling_matrix, classified, scored)

return classified, scored


'''
Dataset class
'''
class ImageDataSet(Dataset):
def __init__(self, x, y, image, classified, scored):
self.x = x
self.y = y
self.image = image
self.classified = classified
self.scored = scored
self.resample = resample

return
def __len__(self):
return self.x
# Returns tensor column
def __get__(self, x,y):
data = torch.narrow(self.image, 0, x, y - x)
classified = torch.narrow(self.classified, 0, x, y - x)
scored = torch.narrow(self.scored, 0, x, y - x)

return (data, classified, scored)