Skip to content

Commit

Permalink
Merge pull request #110 from SbastianGarzon/crs
Browse files Browse the repository at this point in the history
Coordinate Reference System
  • Loading branch information
nuest authored Jan 5, 2021
2 parents c41560f + b880265 commit 34f3b2a
Show file tree
Hide file tree
Showing 21 changed files with 3,700 additions and 249 deletions.
2 changes: 1 addition & 1 deletion geoextent/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
name = "geoextent"

__version__ = '0.5.0'
__version__ = '0.5.1'
99 changes: 46 additions & 53 deletions geoextent/lib/extent.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,34 +11,38 @@
from . import helpfunctions as hf

logger = logging.getLogger("geoextent")
handle_modules = {'CSV': handleCSV, "raster":handleRaster, "vector":handleVector}
handle_modules = {'CSV': handleCSV, "raster": handleRaster, "vector": handleVector}


def computeBboxInWGS84(module, path):
'''
input "module": type module, module from which methods shall be used \n
input "path": type string, path to file \n
returns a bounding box, type list, length = 4 , type = float, schema = [min(longs), min(lats), max(longs), max(lats)], the boudning box has either its original crs or WGS84 (transformed).
'''
bbox_in_orig_crs = module.getBoundingBox(path)
logger.debug("computeBboxInWGS84: {}".format(path))
spatial_extent_origin = module.getBoundingBox(path)

try:
# TODO: Add function using to reproject coordinates system
if module.fileType == "application/shp":
crs = 'None'
return bbox_in_orig_crs
if spatial_extent_origin['crs'] == str(hf.WGS84_EPSG_ID):
spatial_extent = spatial_extent_origin
else:
crs = module.getCRS(path)
except:
pass
if 'crs' in locals() and crs and bbox_in_orig_crs:
bbox_transformed = hf.transformingArrayIntoWGS84(crs, bbox_in_orig_crs)
return bbox_transformed
else:
raise Exception("The bounding box could not be related to a CRS")
spatial_extent = {'bbox': hf.transformingArrayIntoWGS84(spatial_extent_origin['crs'],
spatial_extent_origin['bbox']),
'crs': str(hf.WGS84_EPSG_ID)}
except Exception as e:
raise Exception("The bounding box could not be transformed to the target CRS epsg:{}".format(hf.WGS84_EPSG_ID))

return spatial_extent


def fromDirectory(path, bbox=False, tbox=False):
''' TODO: implement
'''
""" Extracts geoextent from a directory/ZipFile
Keyword arguments:
path -- directory/ZipFile path
bbox -- True if bounding box is requested (default False)
tbox -- True if time box is requested (default False)
"""

logger.info("Extracting bbox={} tbox={} from Directory {}".format(bbox, tbox, path))

Expand All @@ -59,14 +63,14 @@ def fromDirectory(path, bbox=False, tbox=False):
path = extract_folder

for filename in os.listdir(path):
logger.info("path {}, folder/zipfile {}".format(path,filename))
logger.info("path {}, folder/zipfile {}".format(path, filename))
isZip = zipfile.is_zipfile(os.path.join(path, filename))
if isZip:
logger.info("**Inspecting folder {}, is zip ? {}**".format(filename, str(isZip)))
metadata_directory[filename] = fromDirectory(os.path.join(path,filename),bbox,tbox)
metadata_directory[filename] = fromDirectory(os.path.join(path, filename), bbox, tbox)
else:
logger.info("Inspecting folder {}, is zip ? {}".format(filename, str(isZip)))
if os.path.isdir(os.path.join(path,filename)):
if os.path.isdir(os.path.join(path, filename)):
metadata_directory[filename] = fromDirectory(os.path.join(path, filename), bbox, tbox)
else:
metadata_file = fromFile(os.path.join(path, filename), bbox, tbox)
Expand All @@ -76,35 +80,36 @@ def fromDirectory(path, bbox=False, tbox=False):
metadata['format'] = file_format

if bbox:
bbox_ext = hf.bbox_merge(metadata_directory,path)
bbox_ext = hf.bbox_merge(metadata_directory, path)
if bbox_ext is not None:
metadata['crs'] = "4326"
metadata['bbox'] = bbox_ext
if len(bbox_ext) != 0:
metadata['crs'] = bbox_ext['crs']
metadata['bbox'] = bbox_ext['bbox']
else:
logger.warning("The {} {} has no identifiable bbox - Coordinate reference system (CRS) may be missing".format(file_format,path))
logger.warning(
"The {} {} has no identifiable bbox - Coordinate reference system (CRS) may be missing".format(
file_format, path))

if tbox:
tbox_ext = hf.tbox_merge(metadata_directory,path)
tbox_ext = hf.tbox_merge(metadata_directory, path)
if tbox_ext is not None:
metadata['tbox'] = tbox_ext
else:
logger.warning("The {} {} has no identifiable time extent".format(file_format,path))
logger.warning("The {} {} has no identifiable time extent".format(file_format, path))

#metadata['details'] = metadata_directory
# metadata['details'] = metadata_directory

return metadata


def fromFile(filePath, bbox=True, tbox=True, num_sample=None):
''' TODO: update these docs
function is called when filePath is included in commandline (with tag 'b')
how this is done depends on the file format - the function calls the handler for each supported format \n
extracted data are bounding box, temporal extent and crs, a seperate thread is dedicated to each extraction process \n
input "filePath": type string, path to file from which the metadata shall be extracted \n
input "whatMetadata": type string, specifices which metadata should be extracted \n
returns None if the format is not supported, else returns the metadata of the file as a dict
(possible) keys of the dict: 'temporal_extent', 'bbox', 'vector_reps', 'crs'
'''
""" Extracts geoextent from a file
Keyword arguments:
path -- filepath
bbox -- True if bounding box is requested (default False)
tbox -- True if time box is requested (default False)
num_sample -- sample size to determine time format (Only required for csv files)
"""
logger.info("Extracting bbox={} tbox={} from file {}".format(bbox, tbox, filePath))

if bbox == False and tbox == False:
Expand All @@ -124,7 +129,7 @@ def fromFile(filePath, bbox=True, tbox=True, num_sample=None):
valid = handle_modules[i].checkFileSupported(filePath)
if valid:
usedModule = handle_modules[i]
logger.info("{} is being used to inspect {} file".format(usedModule.get_handler_name(),filePath))
logger.info("{} is being used to inspect {} file".format(usedModule.get_handler_name(), filePath))
break

# If file format is not supported
Expand All @@ -148,7 +153,9 @@ def run(self):
if self.task == "bbox":
try:
if bbox:
metadata["bbox"] = computeBboxInWGS84(usedModule, filePath)
spatial_extent = computeBboxInWGS84(usedModule, filePath)
metadata["bbox"] = spatial_extent['bbox']
metadata["crs"] = spatial_extent['crs']
except Exception as e:
logger.warning("Error for {} extracting bbox:\n{}".format(filePath, str(e)))
elif self.task == "tbox":
Expand All @@ -163,35 +170,21 @@ def run(self):
metadata["tbox"] = extract_tbox
except Exception as e:
logger.warning("Error extracting tbox, time format not found \n {}:".format(str(e)))
elif self.task == "crs":
try:
# the CRS is not necessarily required
if bbox and hasattr(usedModule, 'getCRS'):
metadata["crs"] = usedModule.getCRS(filePath)
elif tbox and hasattr(usedModule, 'getCRS'):
metadata["crs"] = usedModule.getCRS(filePath)
else:
logger.debug("The CRS cannot be extracted from the file {}".format(filePath))
except Exception as e:
logger.warning("Error for {} extracting CRS:\n{}".format(filePath, str(e)))
else:
raise Exception("Unsupported thread task {}".format(self.task))

logger.debug("Completed thread {} on file {}".format(self.task, filePath))

thread_bbox_except = thread("bbox")
thread_temp_except = thread("tbox")
thread_crs_except = thread("crs")

logger.debug("Starting 3 threads for extraction.")
logger.debug("Starting 2 threads for extraction.")

thread_bbox_except.start()
thread_temp_except.start()
thread_crs_except.start()

thread_bbox_except.join()
thread_temp_except.join()
thread_crs_except.join()

logger.debug("Extraction finished: {}".format(str(metadata)))
return metadata
55 changes: 30 additions & 25 deletions geoextent/lib/handleCSV.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,21 @@

logger = logging.getLogger("geoextent")

search = { "longitude" : ["(.)*longitude","(.)*long(.)*", "^lon","lon$","(.)*lng(.)*", "^x","x$"],
"latitude" : ["(.)*latitude(.)*", "^lat","lat$", "^y","y$"],
"time":["(.)*timestamp(.)*", "(.)*datetime(.)*", "(.)*time(.)*", "date$","^date"]}
search = {"longitude": ["(.)*longitude", "(.)*long(.)*", "^lon", "lon$", "(.)*lng(.)*", "^x", "x$"],
"latitude": ["(.)*latitude(.)*", "^lat", "lat$", "^y", "y$"],
"time": ["(.)*timestamp(.)*", "(.)*datetime(.)*", "(.)*time(.)*", "date$", "^date"]}


def get_handler_name():
return "handleCSV"


def checkFileSupported(filepath):
'''Checks whether it is valid CSV or not. \n
input "path": type string, path to file which shall be extracted \n
raise exception if not valid
'''

logger.info(filepath)
try:
file = gdal.OpenEx(filepath)
driver = file.GetDriver().ShortName
Expand All @@ -45,58 +47,59 @@ def getBoundingBox(filePath):
input "filepath": type string, file path to csv file \n
returns spatialExtent: type list, length = 4 , type = float, schema = [min(longs), min(lats), max(longs), max(lats)]
'''

with open(filePath) as csv_file:
# To get delimiter either comma or simecolon
daten = hf.getDelimiter(csv_file)

elements = []
for x in daten:
elements.append(x)

spatialExtent= []
spatialLatExtent=[]
spatialLonExtent=[]

spatialLatExtent= hf.searchForParameters(elements, search['latitude'], exp_data= 'numeric')
spatialLatExtent = hf.searchForParameters(elements, search['latitude'], exp_data='numeric')

minlat = None
maxlat = None

minlat= None
maxlat= None
if spatialLatExtent is None:
pass
else:
minlat= (min(spatialLatExtent))
maxlat= (max(spatialLatExtent))
minlat = (min(spatialLatExtent))
maxlat = (max(spatialLatExtent))

spatialLonExtent= hf.searchForParameters(elements, search['longitude'], exp_data= 'numeric')
spatialLonExtent = hf.searchForParameters(elements, search['longitude'], exp_data='numeric')

if spatialLonExtent is None:
raise Exception('The csv file from ' + filePath + ' has no BoundingBox')
else:
minlon= (min(spatialLonExtent))
maxlon= (max(spatialLonExtent))

spatialExtent= [minlon,minlat,maxlon,maxlat]
if not spatialExtent:
minlon = (min(spatialLonExtent))
maxlon = (max(spatialLonExtent))

bbox = [minlon, minlat, maxlon, maxlat]
logger.debug("Extracted Bounding box (without projection): {}".format(bbox))
crs = getCRS(filePath)
logger.debug("Extracted CRS: {}".format(crs))
spatialExtent = {"bbox": bbox, "crs": crs}
if not bbox or not crs:
raise Exception("Bounding box could not be extracted")
return spatialExtent

return spatialExtent

def getTemporalExtent(filePath, num_sample):
''' extract time extent from csv string \n
input "filePath": type string, file path to csv File \n
returns temporal extent of the file: type list, length = 2, both entries have the type str, temporalExtent[0] <= temporalExtent[1]
'''


with open(filePath) as csv_file:
# To get delimiter either comma or simecolon
daten = hf.getDelimiter(csv_file)

elements = []
for x in daten:
elements.append(x)
logger.info("Elements {}".format(elements))

all_temporal_extent = hf.searchForParameters(elements, search['time'], exp_data = "time" )
all_temporal_extent = hf.searchForParameters(elements, search['time'], exp_data="time")
if all_temporal_extent is None:
raise Exception('The csv file from ' + filePath + ' has no TemporalExtent')
else:
Expand All @@ -115,13 +118,15 @@ def getCRS(filePath):
'''extracts coordinatesystem from csv File \n
input "filepath": type string, file path to csv file \n
returns the epsg code of the used coordinate reference system, type list, contains extracted coordinate system of content from csv file
'''
'''

with open(filePath) as csv_file:
daten = csv.reader(csv_file.readlines())
elements = []
for x in daten:
elements.append(x)
if hf.searchForParameters(elements,search['latitude']+search['longitude']) is None:

if hf.searchForParameters(elements, search['latitude'] + search['longitude']) is None:
if hf.searchForParameters(elements, ["crs","srsID"]) is None:
raise Exception('The csv file from ' + filePath + ' has no CRS')
if hf.searchForParameters(elements, ["crs","srsID"]) == "WGS84":
Expand Down
25 changes: 10 additions & 15 deletions geoextent/lib/handleRaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,18 +41,20 @@ def getBoundingBox(filePath):
returns bounding box of the file: type list, length = 4 , type = float, schema = [min(longs), min(lats), max(longs), max(lats)]
'''
# Enable exceptions

crs_output = hf.WGS84_EPSG_ID
gdal.UseExceptions()

geotiffContent = gdal.Open(filePath)

# get the existing coordinate system
old_cs = osr.SpatialReference()
old_cs.ImportFromWkt(geotiffContent.GetProjectionRef())
old_crs = osr.SpatialReference()
old_crs.ImportFromWkt(geotiffContent.GetProjectionRef())

# create the new coordinate system

new_cs = osr.SpatialReference()
new_cs.ImportFromEPSG(hf.WGS84_EPSG_ID)
new_crs = osr.SpatialReference()
new_crs.ImportFromEPSG(crs_output)

# get the point to transform, pixel (0,0) in this case
width = geotiffContent.RasterXSize
Expand All @@ -64,27 +66,20 @@ def getBoundingBox(filePath):
maxx = gt[0] + width * gt[1] + height * gt[2]
maxy = gt[3]

transform = osr.CoordinateTransformation(old_cs, new_cs)
transform = osr.CoordinateTransformation(old_crs, new_crs)
# get the coordinates in lat long
latlongmin = transform.TransformPoint(minx, miny)
latlongmax = transform.TransformPoint(maxx, maxy)

bbox = [latlongmin[0], latlongmin[1], latlongmax[0], latlongmax[1]]

if int(osgeo.__version__[0]) >= 3:
if old_cs.GetAxisMappingStrategy() == 1:
if old_crs.GetAxisMappingStrategy() == 1:
bbox = [latlongmin[1], latlongmin[0], latlongmax[1], latlongmax[0]]

return bbox


def getCRS(filePath):
''' gets the coordinate reference systems from the geotiff file \n
input "filepath": type string, file path to geotiff file \n
return epsg code of the used coordiante reference system: type int
'''
spatialExtent = {"bbox": bbox, "crs": str(crs_output)}

return "4326"
return spatialExtent


def getTemporalExtent(filePath):
Expand Down
Loading

0 comments on commit 34f3b2a

Please sign in to comment.