From 51340112e9b75c4fd04cba42797e22467c5bcc07 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Thu, 22 Feb 2024 00:42:03 -0700 Subject: [PATCH 01/53] GDALVector: initial commit --- R/gdalvector.R | 1 + src/RcppExports.cpp | 2 + src/gdalvector.cpp | 271 ++++++++++++++++++++++++++++++++++++++++++++ src/gdalvector.h | 57 ++++++++++ 4 files changed, 331 insertions(+) create mode 100644 R/gdalvector.R create mode 100644 src/gdalvector.cpp create mode 100644 src/gdalvector.h diff --git a/R/gdalvector.R b/R/gdalvector.R new file mode 100644 index 000000000..4283b26c1 --- /dev/null +++ b/R/gdalvector.R @@ -0,0 +1 @@ +Rcpp::loadModule("mod_GDALVector", TRUE) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 81f149bc7..e6bbecd56 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -1046,6 +1046,7 @@ END_RCPP RcppExport SEXP _rcpp_module_boot_mod_cmb_table(); RcppExport SEXP _rcpp_module_boot_mod_GDALRaster(); +RcppExport SEXP _rcpp_module_boot_mod_GDALVector(); RcppExport SEXP _rcpp_module_boot_mod_running_stats(); static const R_CallMethodDef CallEntries[] = { @@ -1134,6 +1135,7 @@ static const R_CallMethodDef CallEntries[] = { {"_gdalraster_bbox_to_wkt", (DL_FUNC) &_gdalraster_bbox_to_wkt, 3}, {"_rcpp_module_boot_mod_cmb_table", (DL_FUNC) &_rcpp_module_boot_mod_cmb_table, 0}, {"_rcpp_module_boot_mod_GDALRaster", (DL_FUNC) &_rcpp_module_boot_mod_GDALRaster, 0}, + {"_rcpp_module_boot_mod_GDALVector", (DL_FUNC) &_rcpp_module_boot_mod_GDALVector, 0}, {"_rcpp_module_boot_mod_running_stats", (DL_FUNC) &_rcpp_module_boot_mod_running_stats, 0}, {NULL, NULL, 0} }; diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp new file mode 100644 index 000000000..48ab4b890 --- /dev/null +++ b/src/gdalvector.cpp @@ -0,0 +1,271 @@ +/* Implementation of class GDALVector + Encapsulates a GDALDataset and one OGRLayer + Chris Toney */ + +#include "gdal.h" +#include "cpl_error.h" +#include "cpl_port.h" +#include "cpl_string.h" +#include "ogr_srs_api.h" + +#include "gdalraster.h" +#include "gdalvector.h" + + +GDALVector::GDALVector() : + dsn_in(""), + hDataset(nullptr), + eAccess(GA_ReadOnly), + hLayer(nullptr), + bVirtual(true) {} + +GDALVector::GDALVector(OGRLayerH lyr_obj) : + dsn_in(""), + hDataset(nullptr), + eAccess(GA_ReadOnly), + hLayer(lyr_obj), + bVirtual(true) {} + +GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer) : + GDALVector(dsn, layer, true) {} + +GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, + bool read_only) : + hDataset(nullptr), + eAccess(GA_ReadOnly), + hLayer(nullptr), + bVirtual(false) { + + dsn_in = Rcpp::as(_check_gdal_filename(dsn)); + if (!read_only) + eAccess = GA_Update; + + unsigned int nOpenFlags = GDAL_OF_VECTOR | GDAL_OF_SHARED; + if (read_only) + nOpenFlags |= GDAL_OF_READONLY; + else + nOpenFlags |= GDAL_OF_UPDATE; + + hDataset = GDALOpenEx(dsn_in.c_str(), nOpenFlags, nullptr, nullptr, nullptr); + if (hDataset == nullptr) + Rcpp::stop("Open dataset failed."); + + hLayer = GDALDatasetGetLayerByName(hDataset, layer.c_str()); + if (hLayer == nullptr) + Rcpp::stop("Failed to get layer object."); + else + OGR_L_ResetReading(hLayer); + +} + +GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, + bool read_only, Rcpp::CharacterVector open_options) : + hDataset(nullptr), + eAccess(GA_ReadOnly), + hLayer(nullptr), + bVirtual(false) { + + dsn_in = Rcpp::as(_check_gdal_filename(dsn)); + if (!read_only) + eAccess = GA_Update; + + std::vector dsoo(open_options.size() + 1); + for (R_xlen_t i = 0; i < open_options.size(); ++i) { + dsoo[i] = (char *) (open_options[i]); + } + dsoo.push_back(nullptr); + + unsigned int nOpenFlags = GDAL_OF_VECTOR | GDAL_OF_SHARED; + if (read_only) + nOpenFlags |= GDAL_OF_READONLY; + else + nOpenFlags |= GDAL_OF_UPDATE; + + hDataset = GDALOpenEx(dsn_in.c_str(), nOpenFlags, nullptr, dsoo.data(), nullptr); + if (hDataset == nullptr) + Rcpp::stop("Open raster failed."); + + hLayer = GDALDatasetGetLayerByName(hDataset, layer.c_str()); + if (hLayer == nullptr) + Rcpp::stop("Failed to get layer object."); + else + OGR_L_ResetReading(hLayer); + +} + +std::string GDALVector::getDsn() const { + return dsn_in; +} + +bool GDALVector::isOpen() const { + if (hDataset == nullptr) + return false; + else + return true; +} + +bool GDALVector::isVirtual() const { + return bVirtual; +} + +Rcpp::CharacterVector GDALVector::getFileList() const { + _checkAccess(GA_ReadOnly); + + char **papszFiles; + papszFiles = GDALGetFileList(hDataset); + + int items = CSLCount(papszFiles); + if (items > 0) { + Rcpp::CharacterVector files(items); + for (int i=0; i < items; ++i) { + files(i) = papszFiles[i]; + } + CSLDestroy(papszFiles); + return files; + } + else { + CSLDestroy(papszFiles); + return ""; + } +} + +std::string GDALVector::getDriverShortName() const { + _checkAccess(GA_ReadOnly); + + GDALDriverH hDriver = GDALGetDatasetDriver(hDataset); + return GDALGetDriverShortName(hDriver); +} + +std::string GDALVector::getDriverLongName() const { + _checkAccess(GA_ReadOnly); + + GDALDriverH hDriver = GDALGetDatasetDriver(hDataset); + return GDALGetDriverLongName(hDriver); +} + +double GDALVector::getFeatureCount(bool force) const { + // OGR_L_GetFeatureCount returns GIntBig so we return as double to R + _checkAccess(GA_ReadOnly); + + return OGR_L_GetFeatureCount(hLayer, force); +} + +SEXP GDALVector::getNextFeature() const { + _checkAccess(GA_ReadOnly); + + OGRFeatureH hFeature = OGR_L_GetNextFeature(hLayer); + + if (hFeature != nullptr) { + Rcpp::List list_out = Rcpp::List::create(); + OGRFeatureDefnH hFDefn = OGR_L_GetLayerDefn(hLayer); + int iField; + + for (iField=0; iField < OGR_FD_GetFieldCount(hFDefn); ++iField) { + OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, iField); + if (!OGR_F_IsFieldSet(hFeature, iField) || + OGR_F_IsFieldNull(hFeature, iField)) { + continue; + } + + OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); + if (fld_type == OFTInteger) { + Rcpp::IntegerVector value(1); + value[0] = OGR_F_GetFieldAsInteger(hFeature, iField); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + } + else if (fld_type == OFTInteger64) { + // R does not have native int64 so handled as double for now + Rcpp::NumericVector value(1); + value[0] = static_cast( + OGR_F_GetFieldAsInteger64(hFeature, iField)); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + } + else if (fld_type == OFTReal) { + Rcpp::NumericVector value(1); + value[0] = OGR_F_GetFieldAsDouble(hFeature, iField); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + } + else { + Rcpp::CharacterVector value(1); + value[0] = OGR_F_GetFieldAsString(hFeature, iField); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + } + } + + int nGeomFieldCount = OGR_F_GetGeomFieldCount(hFeature); + for (int i = 0; i < nGeomFieldCount; ++i) { + OGRGeometryH hGeometry = OGR_F_GetGeomFieldRef(hFeature, i); + char* pszWKT; + OGR_G_ExportToWkt(hGeometry, &pszWKT); + Rcpp::CharacterVector wkt(1); + wkt[0] = pszWKT; + OGRGeomFieldDefnH hGeomFldDefn = + OGR_F_GetGeomFieldDefnRef(hFeature, i); + list_out.push_back(wkt, OGR_GFld_GetNameRef(hGeomFldDefn)); + CPLFree(pszWKT); + } + + return list_out; + } + else { + return R_NilValue; + } +} + +void GDALVector::close() { + GDALReleaseDataset(hDataset); + hDataset = nullptr; +} + +// **************************************************************************** +// class methods for internal use not exposed in R +// **************************************************************************** + +void GDALVector::_checkAccess(GDALAccess access_needed) const { + if (!isOpen()) + Rcpp::stop("Raster dataset is not open."); + + if (access_needed == GA_Update && eAccess == GA_ReadOnly) + Rcpp::stop("Dataset is read-only."); +} + + +// **************************************************************************** + +RCPP_MODULE(mod_GDALVector) { + + Rcpp::class_("GDALVector") + + .constructor + ("Default constructor, only for allocation in std::vector.") + .constructor + ("Usage: new(GDALVector, lyr_obj)") + .constructor + ("Usage: new(GDALVector, dsn, layer)") + .constructor + ("Usage: new(GDALVector, dsn, layer, read_only=[TRUE|FALSE])") + .constructor + ("Usage: new(GDALVector, dsn, layer, read_only, open_options)") + + // exposed member functions + .const_method("getDsn", &GDALVector::getDsn, + "Return the DSN.") + .const_method("isOpen", &GDALVector::isOpen, + "Is the dataset open?") + .const_method("isVirtual", &GDALVector::isVirtual, + "Is this a virtual layer?") + .const_method("getFileList", &GDALVector::getFileList, + "Fetch files forming dataset.") + .const_method("getDriverShortName", &GDALVector::getDriverShortName, + "Return the short name of the format driver.") + .const_method("getDriverLongName", &GDALVector::getDriverLongName, + "Return the long name of the format driver.") + .const_method("getFeatureCount", &GDALVector::getFeatureCount, + "Fetch the feature count in this layer.") + .const_method("getNextFeature", &GDALVector::getNextFeature, + "Fetch the next available feature from this layer.") + .method("close", &GDALVector::close, + "Release the dataset for proper cleanup.") + + ; +} diff --git a/src/gdalvector.h b/src/gdalvector.h new file mode 100644 index 000000000..ba3293434 --- /dev/null +++ b/src/gdalvector.h @@ -0,0 +1,57 @@ +/* R interface to a subset of the GDAL C API for vector + https://gdal.org/api/vector_c_api.html + Chris Toney */ + +#ifndef gdalvector_H +#define gdalvector_H + +#include "rcpp_util.h" + +#include +#include + +// Predeclare some GDAL types until the public header is included +#ifndef GDAL_H_INCLUDED +typedef void *GDALDatasetH; +typedef void *OGRLayerH; +typedef enum {GA_ReadOnly = 0, GA_Update = 1} GDALAccess; +#endif + + +class GDALVector { + + private: + std::string dsn_in; + GDALDatasetH hDataset; + GDALAccess eAccess; + OGRLayerH hLayer; + bool bVirtual; + + public: + GDALVector(); + GDALVector(OGRLayerH lyr_obj); + GDALVector(Rcpp::CharacterVector dsn, std::string layer); + GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only); + GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, + Rcpp::CharacterVector open_options); + + std::string getDsn() const; + bool isOpen() const; + bool isVirtual() const; + Rcpp::CharacterVector getFileList() const; + + std::string getDriverShortName() const; + std::string getDriverLongName() const; + + double getFeatureCount(bool force) const; + SEXP getNextFeature() const; + + void close(); + + // methods for internal use not exported to R + void _checkAccess(GDALAccess access_needed) const; +}; + +RCPP_EXPOSED_CLASS(GDALVector) + +#endif From 2f9de5fa50c2c2527d6717e9b892a24ee979c03a Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Thu, 22 Feb 2024 23:41:36 -0700 Subject: [PATCH 02/53] getLayerDefn(), setAttributeFilter(), resetReading() --- src/gdalvector.cpp | 115 +++++++++++++++++++++++++++++++++++++++++---- src/gdalvector.h | 8 +++- 2 files changed, 111 insertions(+), 12 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 48ab4b890..2024c3acf 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -143,14 +143,87 @@ std::string GDALVector::getDriverLongName() const { return GDALGetDriverLongName(hDriver); } -double GDALVector::getFeatureCount(bool force) const { - // OGR_L_GetFeatureCount returns GIntBig so we return as double to R +Rcpp::List GDALVector::getLayerDefn() const { + _checkAccess(GA_ReadOnly); + + OGRFeatureDefnH hFDefn = OGR_L_GetLayerDefn(hLayer); + if (hFDefn == nullptr) + Rcpp::stop("Error: could not obtain layer definition."); + + Rcpp::List list_out = Rcpp::List::create(); + int iField; + + for (iField=0; iField < OGR_FD_GetFieldCount(hFDefn); ++iField) { + OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, iField); + if (hFieldDefn == nullptr) + Rcpp::stop("Error: could not obtain field definition."); + + OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); + Rcpp::CharacterVector value(1); + + // TODO: add list types, date, time, binary, etc. + if (fld_type == OFTInteger) { + value[0] = "OFTInteger"; + } + else if (fld_type == OFTInteger64) { + value[0] = "OFTInteger64"; + } + else if (fld_type == OFTReal) { + value[0] = "OFTReal"; + } + else if (fld_type == OFTString) { + value[0] = "OFTString"; + } + else { + value[0] = "default (read as OFTString)"; + } + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + } + + int nGeomFieldCount = OGR_FD_GetGeomFieldCount(hFDefn); + for (int i = 0; i < nGeomFieldCount; ++i) { + // TODO: get geometry type +// OGRGeometryH hGeometry = OGR_F_GetGeomFieldRef(hFeature, i); +// if (hGeomFldDefn == nullptr) +// Rcpp::stop("Error: could not obtain geometry field definition."); +// char* pszWKT; +// OGR_G_ExportToWkt(hGeometry, &pszWKT); +// Rcpp::CharacterVector wkt(1); +// wkt[0] = pszWKT; + OGRGeomFieldDefnH hGeomFldDefn = + OGR_FD_GetGeomFieldDefn(hFDefn, i); + if (hGeomFldDefn == nullptr) + Rcpp::stop("Error: could not obtain geometry field definition."); + list_out.push_back("OGRwkbGeometryType", OGR_GFld_GetNameRef(hGeomFldDefn)); + // TODO: get possible spatial ref for this field OGR_GFld_GetSpatialRef() + // where should spatial ref at the geometry field-level be stored + //CPLFree(pszWKT); + } + + return list_out; +} + +void GDALVector::setAttributeFilter(std::string query) { + _checkAccess(GA_ReadOnly); + + const char* query_in = NULL; + if (query != "") + query_in = query.c_str(); + + if (OGR_L_SetAttributeFilter(hLayer, query_in) != OGRERR_NONE) + Rcpp::stop("Error setting filter, possibly in the query expression"); +} + +double GDALVector::getFeatureCount(bool force) { + // OGR_L_GetFeatureCount() returns GIntBig so we return as double to R. + // GDAL doc: Note that some implementations of this method may alter the + // read cursor of the layer. _checkAccess(GA_ReadOnly); return OGR_L_GetFeatureCount(hLayer, force); } -SEXP GDALVector::getNextFeature() const { +SEXP GDALVector::getNextFeature() { _checkAccess(GA_ReadOnly); OGRFeatureH hFeature = OGR_L_GetNextFeature(hLayer); @@ -158,15 +231,20 @@ SEXP GDALVector::getNextFeature() const { if (hFeature != nullptr) { Rcpp::List list_out = Rcpp::List::create(); OGRFeatureDefnH hFDefn = OGR_L_GetLayerDefn(hLayer); + if (hFDefn == nullptr) + Rcpp::stop("Error: could not obtain layer definition."); int iField; for (iField=0; iField < OGR_FD_GetFieldCount(hFDefn); ++iField) { OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, iField); + if (hFieldDefn == nullptr) + Rcpp::stop("Error: could not obtain field definition."); if (!OGR_F_IsFieldSet(hFeature, iField) || OGR_F_IsFieldNull(hFeature, iField)) { continue; } + // TODO: support date, time, binary, etc. OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); if (fld_type == OFTInteger) { Rcpp::IntegerVector value(1); @@ -186,6 +264,7 @@ SEXP GDALVector::getNextFeature() const { list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } else { + // read as string for now Rcpp::CharacterVector value(1); value[0] = OGR_F_GetFieldAsString(hFeature, iField); list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); @@ -195,12 +274,16 @@ SEXP GDALVector::getNextFeature() const { int nGeomFieldCount = OGR_F_GetGeomFieldCount(hFeature); for (int i = 0; i < nGeomFieldCount; ++i) { OGRGeometryH hGeometry = OGR_F_GetGeomFieldRef(hFeature, i); + if (hGeometry == nullptr) + Rcpp::stop("Error: could not obtain geometry reference."); char* pszWKT; OGR_G_ExportToWkt(hGeometry, &pszWKT); Rcpp::CharacterVector wkt(1); wkt[0] = pszWKT; OGRGeomFieldDefnH hGeomFldDefn = OGR_F_GetGeomFieldDefnRef(hFeature, i); + if (hGeomFldDefn == nullptr) + Rcpp::stop("Error: could not obtain geometry field definition."); list_out.push_back(wkt, OGR_GFld_GetNameRef(hGeomFldDefn)); CPLFree(pszWKT); } @@ -212,6 +295,12 @@ SEXP GDALVector::getNextFeature() const { } } +void GDALVector::resetReading() { + _checkAccess(GA_ReadOnly); + + OGR_L_ResetReading(hLayer); +} + void GDALVector::close() { GDALReleaseDataset(hDataset); hDataset = nullptr; @@ -248,23 +337,29 @@ RCPP_MODULE(mod_GDALVector) { ("Usage: new(GDALVector, dsn, layer, read_only, open_options)") // exposed member functions - .const_method("getDsn", &GDALVector::getDsn, + .const_method("getDsn", &GDALVector::getDsn, "Return the DSN.") - .const_method("isOpen", &GDALVector::isOpen, + .const_method("isOpen", &GDALVector::isOpen, "Is the dataset open?") - .const_method("isVirtual", &GDALVector::isVirtual, + .const_method("isVirtual", &GDALVector::isVirtual, "Is this a virtual layer?") - .const_method("getFileList", &GDALVector::getFileList, + .const_method("getFileList", &GDALVector::getFileList, "Fetch files forming dataset.") .const_method("getDriverShortName", &GDALVector::getDriverShortName, "Return the short name of the format driver.") .const_method("getDriverLongName", &GDALVector::getDriverLongName, "Return the long name of the format driver.") - .const_method("getFeatureCount", &GDALVector::getFeatureCount, + .const_method("getLayerDefn", &GDALVector::getLayerDefn, + "Fetch the schema information for this layer.") + .method("setAttributeFilter", &GDALVector::setAttributeFilter, + "Set a new attribute query.") + .method("getFeatureCount", &GDALVector::getFeatureCount, "Fetch the feature count in this layer.") - .const_method("getNextFeature", &GDALVector::getNextFeature, + .method("getNextFeature", &GDALVector::getNextFeature, "Fetch the next available feature from this layer.") - .method("close", &GDALVector::close, + .method("resetReading", &GDALVector::resetReading, + "Reset feature reading to start on the first feature.") + .method("close", &GDALVector::close, "Release the dataset for proper cleanup.") ; diff --git a/src/gdalvector.h b/src/gdalvector.h index ba3293434..eb94a8f53 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -43,8 +43,12 @@ class GDALVector { std::string getDriverShortName() const; std::string getDriverLongName() const; - double getFeatureCount(bool force) const; - SEXP getNextFeature() const; + Rcpp::List getLayerDefn() const; + + void setAttributeFilter(std::string qry); + double getFeatureCount(bool force); + SEXP getNextFeature(); + void resetReading(); void close(); From ea745602692b66a28e39e4c2bb183d7dcf49fc4f Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Fri, 23 Feb 2024 16:25:11 -0700 Subject: [PATCH 03/53] getLayerDefn(): add full field defn, geom field defn --- src/gdalvector.cpp | 105 ++++++++++++++++++++++++++++++++------------- 1 file changed, 76 insertions(+), 29 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 2024c3acf..79fe2b2d7 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -46,7 +46,8 @@ GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, else nOpenFlags |= GDAL_OF_UPDATE; - hDataset = GDALOpenEx(dsn_in.c_str(), nOpenFlags, nullptr, nullptr, nullptr); + hDataset = GDALOpenEx(dsn_in.c_str(), nOpenFlags, + nullptr, nullptr, nullptr); if (hDataset == nullptr) Rcpp::stop("Open dataset failed."); @@ -81,7 +82,8 @@ GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, else nOpenFlags |= GDAL_OF_UPDATE; - hDataset = GDALOpenEx(dsn_in.c_str(), nOpenFlags, nullptr, dsoo.data(), nullptr); + hDataset = GDALOpenEx(dsn_in.c_str(), nOpenFlags, + nullptr, dsoo.data(), nullptr); if (hDataset == nullptr) Rcpp::stop("Open raster failed."); @@ -151,53 +153,99 @@ Rcpp::List GDALVector::getLayerDefn() const { Rcpp::stop("Error: could not obtain layer definition."); Rcpp::List list_out = Rcpp::List::create(); + std::string sValue; + int nValue; + bool bValue; int iField; + // attribute fields for (iField=0; iField < OGR_FD_GetFieldCount(hFDefn); ++iField) { + + Rcpp::List list_fld_defn = Rcpp::List::create(); OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, iField); if (hFieldDefn == nullptr) Rcpp::stop("Error: could not obtain field definition."); OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); - Rcpp::CharacterVector value(1); - // TODO: add list types, date, time, binary, etc. if (fld_type == OFTInteger) { - value[0] = "OFTInteger"; + sValue = "OFTInteger"; } else if (fld_type == OFTInteger64) { - value[0] = "OFTInteger64"; + sValue = "OFTInteger64"; } else if (fld_type == OFTReal) { - value[0] = "OFTReal"; + sValue = "OFTReal"; } else if (fld_type == OFTString) { - value[0] = "OFTString"; + sValue = "OFTString"; } else { - value[0] = "default (read as OFTString)"; + sValue = "default (read as OFTString)"; } - list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + list_fld_defn.push_back(sValue, "type"); + + nValue = OGR_Fld_GetWidth(hFieldDefn); + list_fld_defn.push_back(nValue, "width"); + + nValue = OGR_Fld_GetPrecision(hFieldDefn); + list_fld_defn.push_back(nValue, "precision"); + + bValue = OGR_Fld_IsNullable(hFieldDefn); + list_fld_defn.push_back(bValue, "is_nullable"); + + bValue = OGR_Fld_IsUnique(hFieldDefn); + list_fld_defn.push_back(bValue, "is_unique"); + + if (OGR_Fld_GetDefault(hFieldDefn) != nullptr) + sValue = std::string(OGR_Fld_GetDefault(hFieldDefn)); + else + sValue = ""; + list_fld_defn.push_back(sValue, "default"); + + bValue = OGR_Fld_IsIgnored(hFieldDefn); + list_fld_defn.push_back(bValue, "is_ignored"); + + list_out.push_back(list_fld_defn, OGR_Fld_GetNameRef(hFieldDefn)); } - int nGeomFieldCount = OGR_FD_GetGeomFieldCount(hFDefn); - for (int i = 0; i < nGeomFieldCount; ++i) { - // TODO: get geometry type -// OGRGeometryH hGeometry = OGR_F_GetGeomFieldRef(hFeature, i); -// if (hGeomFldDefn == nullptr) -// Rcpp::stop("Error: could not obtain geometry field definition."); -// char* pszWKT; -// OGR_G_ExportToWkt(hGeometry, &pszWKT); -// Rcpp::CharacterVector wkt(1); -// wkt[0] = pszWKT; + // geometry fields + for (int i = 0; i < OGR_FD_GetGeomFieldCount(hFDefn); ++i) { + + Rcpp::List list_geom_fld_defn = Rcpp::List::create(); OGRGeomFieldDefnH hGeomFldDefn = OGR_FD_GetGeomFieldDefn(hFDefn, i); if (hGeomFldDefn == nullptr) Rcpp::stop("Error: could not obtain geometry field definition."); - list_out.push_back("OGRwkbGeometryType", OGR_GFld_GetNameRef(hGeomFldDefn)); - // TODO: get possible spatial ref for this field OGR_GFld_GetSpatialRef() - // where should spatial ref at the geometry field-level be stored - //CPLFree(pszWKT); + + // TODO: get geometry type name ("geometry" for now) + list_geom_fld_defn.push_back("geometry", "type"); + + // include the geom type enum value? + //nValue = OGR_GFld_GetType(hGeomFldDefn); + //list_geom_fld_defn.push_back(nValue, "OGRwkbGeometryType"); + + // TODO: make this always WKT2? + OGRSpatialReferenceH hSRS = OGR_GFld_GetSpatialRef(hGeomFldDefn); + if (hSRS == nullptr) + Rcpp::stop("Error: could not obtain geometry SRS."); + char *pszSRS_WKT = nullptr; + if (OSRExportToWkt(hSRS, &pszSRS_WKT) != OGRERR_NONE) { + Rcpp::stop("Error exporting geometry SRS to WKT."); + } + sValue = std::string(pszSRS_WKT); + list_geom_fld_defn.push_back(sValue, "srs"); + + bValue = OGR_GFld_IsNullable(hGeomFldDefn); + list_geom_fld_defn.push_back(bValue, "is_nullable"); + + bValue = OGR_GFld_IsIgnored(hGeomFldDefn); + list_geom_fld_defn.push_back(bValue, "is_ignored"); + + list_out.push_back(list_geom_fld_defn, + OGR_GFld_GetNameRef(hGeomFldDefn)); + + CPLFree(pszSRS_WKT); } return list_out; @@ -244,7 +292,6 @@ SEXP GDALVector::getNextFeature() { continue; } - // TODO: support date, time, binary, etc. OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); if (fld_type == OFTInteger) { Rcpp::IntegerVector value(1); @@ -264,6 +311,7 @@ SEXP GDALVector::getNextFeature() { list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } else { + // TODO: support date, time, binary, etc. // read as string for now Rcpp::CharacterVector value(1); value[0] = OGR_F_GetFieldAsString(hFeature, iField); @@ -271,8 +319,7 @@ SEXP GDALVector::getNextFeature() { } } - int nGeomFieldCount = OGR_F_GetGeomFieldCount(hFeature); - for (int i = 0; i < nGeomFieldCount; ++i) { + for (int i = 0; i < OGR_F_GetGeomFieldCount(hFeature); ++i) { OGRGeometryH hGeometry = OGR_F_GetGeomFieldRef(hFeature, i); if (hGeometry == nullptr) Rcpp::stop("Error: could not obtain geometry reference."); @@ -283,7 +330,7 @@ SEXP GDALVector::getNextFeature() { OGRGeomFieldDefnH hGeomFldDefn = OGR_F_GetGeomFieldDefnRef(hFeature, i); if (hGeomFldDefn == nullptr) - Rcpp::stop("Error: could not obtain geometry field definition."); + Rcpp::stop("Error: could not obtain geometry field def."); list_out.push_back(wkt, OGR_GFld_GetNameRef(hGeomFldDefn)); CPLFree(pszWKT); } @@ -312,7 +359,7 @@ void GDALVector::close() { void GDALVector::_checkAccess(GDALAccess access_needed) const { if (!isOpen()) - Rcpp::stop("Raster dataset is not open."); + Rcpp::stop("Dataset is not open."); if (access_needed == GA_Update && eAccess == GA_ReadOnly) Rcpp::stop("Dataset is read-only."); From 25420a8c95d9ab3d56374daa8a1879cc5cc97685 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Fri, 23 Feb 2024 23:25:37 -0700 Subject: [PATCH 04/53] filling in GDALVector --- src/gdalvector.cpp | 169 ++++++++++++++++++++++++++++++++++++--------- src/gdalvector.h | 23 ++++-- 2 files changed, 155 insertions(+), 37 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 79fe2b2d7..e3b74a744 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -1,11 +1,12 @@ /* Implementation of class GDALVector - Encapsulates a GDALDataset and one OGRLayer + Encapsulates one OGRLayer and its GDALDataset Chris Toney */ #include "gdal.h" #include "cpl_error.h" #include "cpl_port.h" #include "cpl_string.h" +#include "ogrsf_frmts.h" #include "ogr_srs_api.h" #include "gdalraster.h" @@ -16,15 +17,7 @@ GDALVector::GDALVector() : dsn_in(""), hDataset(nullptr), eAccess(GA_ReadOnly), - hLayer(nullptr), - bVirtual(true) {} - -GDALVector::GDALVector(OGRLayerH lyr_obj) : - dsn_in(""), - hDataset(nullptr), - eAccess(GA_ReadOnly), - hLayer(lyr_obj), - bVirtual(true) {} + hLayer(nullptr) {} GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer) : GDALVector(dsn, layer, true) {} @@ -33,14 +26,13 @@ GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only) : hDataset(nullptr), eAccess(GA_ReadOnly), - hLayer(nullptr), - bVirtual(false) { + hLayer(nullptr) { dsn_in = Rcpp::as(_check_gdal_filename(dsn)); if (!read_only) eAccess = GA_Update; - unsigned int nOpenFlags = GDAL_OF_VECTOR | GDAL_OF_SHARED; + unsigned int nOpenFlags = GDAL_OF_VECTOR; if (read_only) nOpenFlags |= GDAL_OF_READONLY; else @@ -63,8 +55,7 @@ GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, Rcpp::CharacterVector open_options) : hDataset(nullptr), eAccess(GA_ReadOnly), - hLayer(nullptr), - bVirtual(false) { + hLayer(nullptr) { dsn_in = Rcpp::as(_check_gdal_filename(dsn)); if (!read_only) @@ -76,7 +67,7 @@ GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, } dsoo.push_back(nullptr); - unsigned int nOpenFlags = GDAL_OF_VECTOR | GDAL_OF_SHARED; + unsigned int nOpenFlags = GDAL_OF_VECTOR; if (read_only) nOpenFlags |= GDAL_OF_READONLY; else @@ -106,10 +97,6 @@ bool GDALVector::isOpen() const { return true; } -bool GDALVector::isVirtual() const { - return bVirtual; -} - Rcpp::CharacterVector GDALVector::getFileList() const { _checkAccess(GA_ReadOnly); @@ -145,6 +132,59 @@ std::string GDALVector::getDriverLongName() const { return GDALGetDriverLongName(hDriver); } +std::string GDALVector::getName() const { + _checkAccess(GA_ReadOnly); + + return OGR_L_GetName(hLayer); +} + +std::string GDALVector::getGeomType() const { + _checkAccess(GA_ReadOnly); + + OGRwkbGeometryType eType = OGR_L_GetGeomType(hLayer); + return OGRGeometryTypeToName(eType); +} + +std::string GDALVector::getGeometryColumn() const { + _checkAccess(GA_ReadOnly); + + return OGR_L_GetGeometryColumn(hLayer); +} + +std::string GDALVector::getSpatialRef() const { + // OGRLayer::GetSpatialRef() as WKT string + _checkAccess(GA_ReadOnly); + + OGRSpatialReferenceH hSRS = OGR_L_GetSpatialRef(hLayer); + if (hSRS == nullptr) + Rcpp::stop("Error: could not obtain SRS."); + char *pszSRS_WKT = nullptr; + if (OSRExportToWkt(hSRS, &pszSRS_WKT) != OGRERR_NONE) { + Rcpp::stop("Error exporting SRS to WKT."); + } + std::string srs_wkt(pszSRS_WKT); + CPLFree(pszSRS_WKT); + + return srs_wkt; +} + +Rcpp::NumericVector GDALVector::bbox() { + // Note: bForce=true in tha call to OGR_L_GetExtent(), so the entire + // layer may be scanned to compute MBR. + // Depending on the driver, a spatial filter may/may not be taken into + // account. So it is safer to call bbox() without setting a spatial filter. + _checkAccess(GA_ReadOnly); + + OGREnvelope envelope; + if (OGR_L_GetExtent(hLayer, &envelope, true) != OGRERR_NONE) + Rcpp::stop("Error: the extent of the layer cannot be determined."); + + Rcpp::NumericVector bbox_out = { + envelope.MinX, envelope.MinY, envelope.MaxX, envelope.MaxY}; + + return bbox_out; +} + Rcpp::List GDALVector::getLayerDefn() const { _checkAccess(GA_ReadOnly); @@ -206,6 +246,9 @@ Rcpp::List GDALVector::getLayerDefn() const { bValue = OGR_Fld_IsIgnored(hFieldDefn); list_fld_defn.push_back(bValue, "is_ignored"); + bValue = false; + list_fld_defn.push_back(bValue, "is_geom"); + list_out.push_back(list_fld_defn, OGR_Fld_GetNameRef(hFieldDefn)); } @@ -218,14 +261,10 @@ Rcpp::List GDALVector::getLayerDefn() const { if (hGeomFldDefn == nullptr) Rcpp::stop("Error: could not obtain geometry field definition."); - // TODO: get geometry type name ("geometry" for now) - list_geom_fld_defn.push_back("geometry", "type"); + OGRwkbGeometryType eType = OGR_GFld_GetType(hGeomFldDefn); + sValue = std::string(OGRGeometryTypeToName(eType)); + list_geom_fld_defn.push_back(sValue, "type"); - // include the geom type enum value? - //nValue = OGR_GFld_GetType(hGeomFldDefn); - //list_geom_fld_defn.push_back(nValue, "OGRwkbGeometryType"); - - // TODO: make this always WKT2? OGRSpatialReferenceH hSRS = OGR_GFld_GetSpatialRef(hGeomFldDefn); if (hSRS == nullptr) Rcpp::stop("Error: could not obtain geometry SRS."); @@ -241,6 +280,9 @@ Rcpp::List GDALVector::getLayerDefn() const { bValue = OGR_GFld_IsIgnored(hGeomFldDefn); list_geom_fld_defn.push_back(bValue, "is_ignored"); + + bValue = true; + list_geom_fld_defn.push_back(bValue, "is_geom"); list_out.push_back(list_geom_fld_defn, OGR_GFld_GetNameRef(hGeomFldDefn)); @@ -262,6 +304,21 @@ void GDALVector::setAttributeFilter(std::string query) { Rcpp::stop("Error setting filter, possibly in the query expression"); } +void GDALVector::setSpatialFilterRect(Rcpp::NumericVector bbox) { + _checkAccess(GA_ReadOnly); + + if (Rcpp::any(Rcpp::is_na(bbox))) + Rcpp::stop("Error: bbox has one or more NA values."); + + OGR_L_SetSpatialFilterRect(hLayer, bbox[0], bbox[1], bbox[2], bbox[3]); +} + +void GDALVector::clearSpatialFilter() { + _checkAccess(GA_ReadOnly); + + OGR_L_SetSpatialFilter(hLayer, nullptr); +} + double GDALVector::getFeatureCount(bool force) { // OGR_L_GetFeatureCount() returns GIntBig so we return as double to R. // GDAL doc: Note that some implementations of this method may alter the @@ -348,6 +405,35 @@ void GDALVector::resetReading() { OGR_L_ResetReading(hLayer); } +void GDALVector::layerIntersection( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {NULL}; + if (options.isNotNull()) { + // cast to the underlying type + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = NULL; + } + + OGRErr err = OGR_L_Intersection( + hLayer, + method_layer._getOGRLayerH(), + result_layer._getOGRLayerH(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("Error during intersection or execution was interrupted."); + +} + void GDALVector::close() { GDALReleaseDataset(hDataset); hDataset = nullptr; @@ -365,6 +451,12 @@ void GDALVector::_checkAccess(GDALAccess access_needed) const { Rcpp::stop("Dataset is read-only."); } +OGRLayerH GDALVector::_getOGRLayerH() { + _checkAccess(GA_ReadOnly); + + return hLayer; +} + // **************************************************************************** @@ -373,9 +465,7 @@ RCPP_MODULE(mod_GDALVector) { Rcpp::class_("GDALVector") .constructor - ("Default constructor, only for allocation in std::vector.") - .constructor - ("Usage: new(GDALVector, lyr_obj)") + ("Default constructor, only for allocations in std::vector.") .constructor ("Usage: new(GDALVector, dsn, layer)") .constructor @@ -388,26 +478,41 @@ RCPP_MODULE(mod_GDALVector) { "Return the DSN.") .const_method("isOpen", &GDALVector::isOpen, "Is the dataset open?") - .const_method("isVirtual", &GDALVector::isVirtual, - "Is this a virtual layer?") .const_method("getFileList", &GDALVector::getFileList, "Fetch files forming dataset.") .const_method("getDriverShortName", &GDALVector::getDriverShortName, "Return the short name of the format driver.") .const_method("getDriverLongName", &GDALVector::getDriverLongName, "Return the long name of the format driver.") + .const_method("getName", &GDALVector::getName, + "Return the layer name.") + .const_method("getGeomType", &GDALVector::getGeomType, + "Return the layer geometry type.") + .const_method("getGeometryColumn", &GDALVector::getGeometryColumn, + "Return name of the underlying db column being used as geom column.") + .const_method("getSpatialRef", &GDALVector::getSpatialRef, + "Fetch the spatial reference system for this layer as WKT string.") + .method("bbox", &GDALVector::bbox, + "Return the bounding box (xmin, ymin, xmax, ymax).") .const_method("getLayerDefn", &GDALVector::getLayerDefn, "Fetch the schema information for this layer.") .method("setAttributeFilter", &GDALVector::setAttributeFilter, "Set a new attribute query.") + .method("setSpatialFilterRect", &GDALVector::setSpatialFilterRect, + "Set a new rectangular spatial filter.") + .method("clearSpatialFilter", &GDALVector::clearSpatialFilter, + "Clear the current spatial filter.") .method("getFeatureCount", &GDALVector::getFeatureCount, "Fetch the feature count in this layer.") .method("getNextFeature", &GDALVector::getNextFeature, "Fetch the next available feature from this layer.") .method("resetReading", &GDALVector::resetReading, "Reset feature reading to start on the first feature.") + .method("layerIntersection", &GDALVector::layerIntersection, + "Intersection of this layer with a method layer.") .method("close", &GDALVector::close, "Release the dataset for proper cleanup.") ; } + diff --git a/src/gdalvector.h b/src/gdalvector.h index eb94a8f53..0efb01cfd 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -1,4 +1,5 @@ -/* R interface to a subset of the GDAL C API for vector +/* R interface to a subset of the GDAL C API for vector. + An OGRLayer class, a layer of features in a GDALDataset. https://gdal.org/api/vector_c_api.html Chris Toney */ @@ -25,11 +26,9 @@ class GDALVector { GDALDatasetH hDataset; GDALAccess eAccess; OGRLayerH hLayer; - bool bVirtual; public: GDALVector(); - GDALVector(OGRLayerH lyr_obj); GDALVector(Rcpp::CharacterVector dsn, std::string layer); GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only); GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, @@ -37,23 +36,37 @@ class GDALVector { std::string getDsn() const; bool isOpen() const; - bool isVirtual() const; Rcpp::CharacterVector getFileList() const; std::string getDriverShortName() const; std::string getDriverLongName() const; + std::string getName() const; + std::string getGeomType() const; + std::string getGeometryColumn() const; + std::string getSpatialRef() const; + Rcpp::NumericVector bbox(); Rcpp::List getLayerDefn() const; - void setAttributeFilter(std::string qry); + void setAttributeFilter(std::string query); + void setSpatialFilterRect(Rcpp::NumericVector bbox); + void clearSpatialFilter(); + double getFeatureCount(bool force); SEXP getNextFeature(); void resetReading(); + void layerIntersection( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void close(); // methods for internal use not exported to R void _checkAccess(GDALAccess access_needed) const; + OGRLayerH _getOGRLayerH(); }; RCPP_EXPOSED_CLASS(GDALVector) From 7ae2d18c5913e8a42c6975c6fe1d1e82f3db3cde Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Fri, 23 Feb 2024 23:26:40 -0700 Subject: [PATCH 05/53] create field optionally --- src/ogr_util.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/ogr_util.cpp b/src/ogr_util.cpp index b7f6117f6..dc24b1ade 100644 --- a/src/ogr_util.cpp +++ b/src/ogr_util.cpp @@ -106,13 +106,17 @@ bool _create_ogr(std::string format, std::string dst_filename, } else { layer_ok = true; - hFieldDefn = OGR_Fld_Create(fld_name.c_str(), OFTInteger); - if (OGR_L_CreateField(hLayer, hFieldDefn, TRUE) != OGRERR_NONE) - fld_ok = false; - else + if (fld_name != "") { + hFieldDefn = OGR_Fld_Create(fld_name.c_str(), OFTInteger); + if (OGR_L_CreateField(hLayer, hFieldDefn, TRUE) != OGRERR_NONE) + fld_ok = false; + else + fld_ok = true; + OGR_Fld_Destroy(hFieldDefn); + } + else { fld_ok = true; - - OGR_Fld_Destroy(hFieldDefn); + } } OSRDestroySpatialReference(hSRS); From 759ce5a5ea2bc5240aa4a403978235ebad1f8b37 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sat, 24 Feb 2024 01:10:03 -0700 Subject: [PATCH 06/53] layer processing methods --- src/gdalvector.cpp | 188 ++++++++++++++++++++++++++++++++++++++++++++- src/gdalvector.h | 38 ++++++++- 2 files changed, 221 insertions(+), 5 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index e3b74a744..c44306cef 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -430,7 +430,181 @@ void GDALVector::layerIntersection( quiet ? nullptr : GDALTermProgressR, nullptr); if (err != OGRERR_NONE) - Rcpp::stop("Error during intersection or execution was interrupted."); + Rcpp::stop("Error during Intersection, or execution was interrupted."); + +} + +void GDALVector::layerUnion( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {NULL}; + if (options.isNotNull()) { + // cast to the underlying type + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = NULL; + } + + OGRErr err = OGR_L_Union( + hLayer, + method_layer._getOGRLayerH(), + result_layer._getOGRLayerH(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("Error during Union, or execution was interrupted."); + +} + +void GDALVector::layerSymDifference( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {NULL}; + if (options.isNotNull()) { + // cast to the underlying type + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = NULL; + } + + OGRErr err = OGR_L_SymDifference( + hLayer, + method_layer._getOGRLayerH(), + result_layer._getOGRLayerH(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("Error during SymDifference, or execution was interrupted."); + +} + +void GDALVector::layerIdentity( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {NULL}; + if (options.isNotNull()) { + // cast to the underlying type + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = NULL; + } + + OGRErr err = OGR_L_Identity( + hLayer, + method_layer._getOGRLayerH(), + result_layer._getOGRLayerH(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("Error during Identity, or execution was interrupted."); + +} + +void GDALVector::layerUpdate( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {NULL}; + if (options.isNotNull()) { + // cast to the underlying type + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = NULL; + } + + OGRErr err = OGR_L_Update( + hLayer, + method_layer._getOGRLayerH(), + result_layer._getOGRLayerH(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("Error during Update, or execution was interrupted."); + +} + +void GDALVector::layerClip( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {NULL}; + if (options.isNotNull()) { + // cast to the underlying type + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = NULL; + } + + OGRErr err = OGR_L_Clip( + hLayer, + method_layer._getOGRLayerH(), + result_layer._getOGRLayerH(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("Error during Clip, or execution was interrupted."); + +} + +void GDALVector::layerErase( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {NULL}; + if (options.isNotNull()) { + // cast to the underlying type + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = NULL; + } + + OGRErr err = OGR_L_Erase( + hLayer, + method_layer._getOGRLayerH(), + result_layer._getOGRLayerH(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("Error during Erase, or execution was interrupted."); } @@ -510,6 +684,18 @@ RCPP_MODULE(mod_GDALVector) { "Reset feature reading to start on the first feature.") .method("layerIntersection", &GDALVector::layerIntersection, "Intersection of this layer with a method layer.") + .method("layerUnion", &GDALVector::layerUnion, + "Union of this layer with a method layer.") + .method("layerSymDifference", &GDALVector::layerSymDifference, + "Symmetrical difference of this layer and a method layer.") + .method("layerIdentity", &GDALVector::layerIdentity, + "Identify features of this layer with the ones from the method layer.") + .method("layerUpdate", &GDALVector::layerUpdate, + "Update this layer with features from the method layer.") + .method("layerClip", &GDALVector::layerClip, + "Clip off areas that are not covered by the method layer.") + .method("layerErase", &GDALVector::layerErase, + "Remove areas that are covered by the method layer.") .method("close", &GDALVector::close, "Release the dataset for proper cleanup.") diff --git a/src/gdalvector.h b/src/gdalvector.h index 0efb01cfd..daca7b4ed 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -57,10 +57,40 @@ class GDALVector { void resetReading(); void layerIntersection( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerUnion( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerSymDifference( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerIdentity( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerUpdate( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerClip( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerErase( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); void close(); From 8c13339b65cb5503536463de28824c26621fa54d Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sat, 24 Feb 2024 07:52:11 -0700 Subject: [PATCH 07/53] testCapability(), add FID --- src/gdalvector.cpp | 34 +++++++++++++++++++++------------- src/gdalvector.h | 4 ++-- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index c44306cef..a350e893b 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -138,6 +138,12 @@ std::string GDALVector::getName() const { return OGR_L_GetName(hLayer); } +bool GDALVector::testCapability(std::string capability) const { + _checkAccess(GA_ReadOnly); + + return OGR_L_TestCapability(hLayer, capability.c_str()); +} + std::string GDALVector::getGeomType() const { _checkAccess(GA_ReadOnly); @@ -197,6 +203,8 @@ Rcpp::List GDALVector::getLayerDefn() const { int nValue; bool bValue; int iField; + + // TODO: include FID here? // attribute fields for (iField=0; iField < OGR_FD_GetFieldCount(hFDefn); ++iField) { @@ -319,13 +327,13 @@ void GDALVector::clearSpatialFilter() { OGR_L_SetSpatialFilter(hLayer, nullptr); } -double GDALVector::getFeatureCount(bool force) { +double GDALVector::getFeatureCount() { // OGR_L_GetFeatureCount() returns GIntBig so we return as double to R. // GDAL doc: Note that some implementations of this method may alter the // read cursor of the layer. _checkAccess(GA_ReadOnly); - - return OGR_L_GetFeatureCount(hLayer, force); + + return OGR_L_GetFeatureCount(hLayer, true); } SEXP GDALVector::getNextFeature() { @@ -340,6 +348,9 @@ SEXP GDALVector::getNextFeature() { Rcpp::stop("Error: could not obtain layer definition."); int iField; + double FID = static_cast(OGR_F_GetFID(hFeature)); + list_out.push_back(FID, "FID"); + for (iField=0; iField < OGR_FD_GetFieldCount(hFDefn); ++iField) { OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, iField); if (hFieldDefn == nullptr) @@ -351,27 +362,23 @@ SEXP GDALVector::getNextFeature() { OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); if (fld_type == OFTInteger) { - Rcpp::IntegerVector value(1); - value[0] = OGR_F_GetFieldAsInteger(hFeature, iField); + int value = OGR_F_GetFieldAsInteger(hFeature, iField); list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } else if (fld_type == OFTInteger64) { // R does not have native int64 so handled as double for now - Rcpp::NumericVector value(1); - value[0] = static_cast( + double value = static_cast( OGR_F_GetFieldAsInteger64(hFeature, iField)); list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } else if (fld_type == OFTReal) { - Rcpp::NumericVector value(1); - value[0] = OGR_F_GetFieldAsDouble(hFeature, iField); + double value = OGR_F_GetFieldAsDouble(hFeature, iField); list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } else { // TODO: support date, time, binary, etc. // read as string for now - Rcpp::CharacterVector value(1); - value[0] = OGR_F_GetFieldAsString(hFeature, iField); + std::string value = OGR_F_GetFieldAsString(hFeature, iField); list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } } @@ -382,8 +389,7 @@ SEXP GDALVector::getNextFeature() { Rcpp::stop("Error: could not obtain geometry reference."); char* pszWKT; OGR_G_ExportToWkt(hGeometry, &pszWKT); - Rcpp::CharacterVector wkt(1); - wkt[0] = pszWKT; + std::string wkt(pszWKT); OGRGeomFieldDefnH hGeomFldDefn = OGR_F_GetGeomFieldDefnRef(hFeature, i); if (hGeomFldDefn == nullptr) @@ -660,6 +666,8 @@ RCPP_MODULE(mod_GDALVector) { "Return the long name of the format driver.") .const_method("getName", &GDALVector::getName, "Return the layer name.") + .const_method("testCapability", &GDALVector::testCapability, + "Test if this layer supports the named capability.") .const_method("getGeomType", &GDALVector::getGeomType, "Return the layer geometry type.") .const_method("getGeometryColumn", &GDALVector::getGeometryColumn, diff --git a/src/gdalvector.h b/src/gdalvector.h index daca7b4ed..fbeacc957 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -37,11 +37,11 @@ class GDALVector { std::string getDsn() const; bool isOpen() const; Rcpp::CharacterVector getFileList() const; - std::string getDriverShortName() const; std::string getDriverLongName() const; std::string getName() const; + bool testCapability(std::string capability) const; std::string getGeomType() const; std::string getGeometryColumn() const; std::string getSpatialRef() const; @@ -52,7 +52,7 @@ class GDALVector { void setSpatialFilterRect(Rcpp::NumericVector bbox); void clearSpatialFilter(); - double getFeatureCount(bool force); + double getFeatureCount(); SEXP getNextFeature(); void resetReading(); From 31ad3991cc636266113e87ae7747d11b652528e3 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sat, 24 Feb 2024 10:53:55 -0700 Subject: [PATCH 08/53] add open() for re-open existing dsn/layer --- src/gdalvector.cpp | 62 ++++++++++++++++++++++++++++++++++++++++------ src/gdalvector.h | 5 +++- 2 files changed, 59 insertions(+), 8 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index a350e893b..747b3e15c 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -15,6 +15,7 @@ GDALVector::GDALVector() : dsn_in(""), + layer_in(""), hDataset(nullptr), eAccess(GA_ReadOnly), hLayer(nullptr) {} @@ -24,6 +25,8 @@ GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer) : GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only) : + layer_in(layer), + open_options_in(Rcpp::CharacterVector::create()), hDataset(nullptr), eAccess(GA_ReadOnly), hLayer(nullptr) { @@ -43,7 +46,7 @@ GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, if (hDataset == nullptr) Rcpp::stop("Open dataset failed."); - hLayer = GDALDatasetGetLayerByName(hDataset, layer.c_str()); + hLayer = GDALDatasetGetLayerByName(hDataset, layer_in.c_str()); if (hLayer == nullptr) Rcpp::stop("Failed to get layer object."); else @@ -53,6 +56,8 @@ GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, Rcpp::CharacterVector open_options) : + layer_in(layer), + open_options_in(open_options), hDataset(nullptr), eAccess(GA_ReadOnly), hLayer(nullptr) { @@ -61,9 +66,9 @@ GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, if (!read_only) eAccess = GA_Update; - std::vector dsoo(open_options.size() + 1); - for (R_xlen_t i = 0; i < open_options.size(); ++i) { - dsoo[i] = (char *) (open_options[i]); + std::vector dsoo(open_options_in.size() + 1); + for (R_xlen_t i = 0; i < open_options_in.size(); ++i) { + dsoo[i] = (char *) (open_options_in[i]); } dsoo.push_back(nullptr); @@ -76,9 +81,9 @@ GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, hDataset = GDALOpenEx(dsn_in.c_str(), nOpenFlags, nullptr, dsoo.data(), nullptr); if (hDataset == nullptr) - Rcpp::stop("Open raster failed."); - - hLayer = GDALDatasetGetLayerByName(hDataset, layer.c_str()); + Rcpp::stop("Open dataset failed."); + + hLayer = GDALDatasetGetLayerByName(hDataset, layer_in.c_str()); if (hLayer == nullptr) Rcpp::stop("Failed to get layer object."); else @@ -97,6 +102,46 @@ bool GDALVector::isOpen() const { return true; } +void GDALVector::open(bool read_only) { + if (dsn_in == "") + Rcpp::stop("DSN is not set."); + + GDALClose(hDataset); + hDataset = nullptr; + hLayer = nullptr; + + if (read_only) + eAccess = GA_ReadOnly; + else + eAccess = GA_Update; + + std::vector dsoo(open_options_in.size() + 1); + if (open_options_in.size() > 0) { + std::vector dsoo(open_options_in.size() + 1); + for (R_xlen_t i = 0; i < open_options_in.size(); ++i) { + dsoo[i] = (char *) (open_options_in[i]); + } + } + dsoo.push_back(nullptr); + + unsigned int nOpenFlags = GDAL_OF_VECTOR; + if (read_only) + nOpenFlags |= GDAL_OF_READONLY; + else + nOpenFlags |= GDAL_OF_UPDATE; + + hDataset = GDALOpenEx(dsn_in.c_str(), nOpenFlags, + nullptr, dsoo.data(), nullptr); + if (hDataset == nullptr) + Rcpp::stop("Open dataset failed."); + + hLayer = GDALDatasetGetLayerByName(hDataset, layer_in.c_str()); + if (hLayer == nullptr) + Rcpp::stop("Failed to get layer object."); + else + OGR_L_ResetReading(hLayer); +} + Rcpp::CharacterVector GDALVector::getFileList() const { _checkAccess(GA_ReadOnly); @@ -617,6 +662,7 @@ void GDALVector::layerErase( void GDALVector::close() { GDALReleaseDataset(hDataset); hDataset = nullptr; + hLayer = nullptr; } // **************************************************************************** @@ -658,6 +704,8 @@ RCPP_MODULE(mod_GDALVector) { "Return the DSN.") .const_method("isOpen", &GDALVector::isOpen, "Is the dataset open?") + .method("open", &GDALVector::open, + "(Re-)open the dataset on the existing DSN and layer.") .const_method("getFileList", &GDALVector::getFileList, "Fetch files forming dataset.") .const_method("getDriverShortName", &GDALVector::getDriverShortName, diff --git a/src/gdalvector.h b/src/gdalvector.h index fbeacc957..72b5eb5d8 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -23,6 +23,8 @@ class GDALVector { private: std::string dsn_in; + std::string layer_in; + Rcpp::CharacterVector open_options_in; GDALDatasetH hDataset; GDALAccess eAccess; OGRLayerH hLayer; @@ -33,9 +35,10 @@ class GDALVector { GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only); GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, Rcpp::CharacterVector open_options); - + std::string getDsn() const; bool isOpen() const; + void open(bool read_only); Rcpp::CharacterVector getFileList() const; std::string getDriverShortName() const; std::string getDriverLongName() const; From 91c944a208ccb537c3ca63b79c3185e25c813d64 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sat, 24 Feb 2024 11:05:22 -0700 Subject: [PATCH 09/53] refactor constructor --- src/gdalvector.cpp | 66 ++++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 41 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 747b3e15c..eed2c594d 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -14,61 +14,45 @@ GDALVector::GDALVector() : - dsn_in(""), - layer_in(""), - hDataset(nullptr), - eAccess(GA_ReadOnly), - hLayer(nullptr) {} + dsn_in(""), + layer_in(""), + hDataset(nullptr), + eAccess(GA_ReadOnly), + hLayer(nullptr) {} GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer) : - GDALVector(dsn, layer, true) {} + GDALVector( + dsn, + layer, + true, + Rcpp::CharacterVector::create()) {} GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only) : - layer_in(layer), - open_options_in(Rcpp::CharacterVector::create()), - hDataset(nullptr), - eAccess(GA_ReadOnly), - hLayer(nullptr) { - - dsn_in = Rcpp::as(_check_gdal_filename(dsn)); - if (!read_only) - eAccess = GA_Update; - - unsigned int nOpenFlags = GDAL_OF_VECTOR; - if (read_only) - nOpenFlags |= GDAL_OF_READONLY; - else - nOpenFlags |= GDAL_OF_UPDATE; - - hDataset = GDALOpenEx(dsn_in.c_str(), nOpenFlags, - nullptr, nullptr, nullptr); - if (hDataset == nullptr) - Rcpp::stop("Open dataset failed."); - - hLayer = GDALDatasetGetLayerByName(hDataset, layer_in.c_str()); - if (hLayer == nullptr) - Rcpp::stop("Failed to get layer object."); - else - OGR_L_ResetReading(hLayer); - -} + GDALVector( + dsn, + layer, + read_only, + Rcpp::CharacterVector::create()) {} GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, Rcpp::CharacterVector open_options) : - layer_in(layer), - open_options_in(open_options), - hDataset(nullptr), - eAccess(GA_ReadOnly), - hLayer(nullptr) { + layer_in(layer), + open_options_in(open_options), + hDataset(nullptr), + eAccess(GA_ReadOnly), + hLayer(nullptr) { dsn_in = Rcpp::as(_check_gdal_filename(dsn)); if (!read_only) eAccess = GA_Update; std::vector dsoo(open_options_in.size() + 1); - for (R_xlen_t i = 0; i < open_options_in.size(); ++i) { - dsoo[i] = (char *) (open_options_in[i]); + if (open_options_in.size() > 0) { + std::vector dsoo(open_options_in.size() + 1); + for (R_xlen_t i = 0; i < open_options_in.size(); ++i) { + dsoo[i] = (char *) (open_options_in[i]); + } } dsoo.push_back(nullptr); From 7f65ab70456f67f1635a36d0c323ece2187b50d6 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sat, 24 Feb 2024 13:59:24 -0700 Subject: [PATCH 10/53] fix dataset open options --- src/gdalvector.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index eed2c594d..096468f9d 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -16,6 +16,7 @@ GDALVector::GDALVector() : dsn_in(""), layer_in(""), + open_options_in(Rcpp::CharacterVector::create()), hDataset(nullptr), eAccess(GA_ReadOnly), hLayer(nullptr) {} @@ -49,7 +50,6 @@ GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, std::vector dsoo(open_options_in.size() + 1); if (open_options_in.size() > 0) { - std::vector dsoo(open_options_in.size() + 1); for (R_xlen_t i = 0; i < open_options_in.size(); ++i) { dsoo[i] = (char *) (open_options_in[i]); } @@ -101,7 +101,6 @@ void GDALVector::open(bool read_only) { std::vector dsoo(open_options_in.size() + 1); if (open_options_in.size() > 0) { - std::vector dsoo(open_options_in.size() + 1); for (R_xlen_t i = 0; i < open_options_in.size(); ++i) { dsoo[i] = (char *) (open_options_in[i]); } From 65aeb8c0a7948d344a30dcdea9f81ae34146883e Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sat, 24 Feb 2024 18:58:13 -0700 Subject: [PATCH 11/53] refactor with open(), hFDefn member variable --- src/gdalvector.cpp | 67 ++++++++++++++++------------------------------ src/gdalvector.h | 1 + 2 files changed, 24 insertions(+), 44 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 096468f9d..cecfc9675 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -45,34 +45,7 @@ GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, hLayer(nullptr) { dsn_in = Rcpp::as(_check_gdal_filename(dsn)); - if (!read_only) - eAccess = GA_Update; - - std::vector dsoo(open_options_in.size() + 1); - if (open_options_in.size() > 0) { - for (R_xlen_t i = 0; i < open_options_in.size(); ++i) { - dsoo[i] = (char *) (open_options_in[i]); - } - } - dsoo.push_back(nullptr); - - unsigned int nOpenFlags = GDAL_OF_VECTOR; - if (read_only) - nOpenFlags |= GDAL_OF_READONLY; - else - nOpenFlags |= GDAL_OF_UPDATE; - - hDataset = GDALOpenEx(dsn_in.c_str(), nOpenFlags, - nullptr, dsoo.data(), nullptr); - if (hDataset == nullptr) - Rcpp::stop("Open dataset failed."); - - hLayer = GDALDatasetGetLayerByName(hDataset, layer_in.c_str()); - if (hLayer == nullptr) - Rcpp::stop("Failed to get layer object."); - else - OGR_L_ResetReading(hLayer); - + open(read_only); } std::string GDALVector::getDsn() const { @@ -90,9 +63,11 @@ void GDALVector::open(bool read_only) { if (dsn_in == "") Rcpp::stop("DSN is not set."); - GDALClose(hDataset); - hDataset = nullptr; - hLayer = nullptr; + if (hDataset != nullptr) { + GDALReleaseDataset(hDataset); + hDataset = nullptr; + hLayer = nullptr; + } if (read_only) eAccess = GA_ReadOnly; @@ -119,10 +94,19 @@ void GDALVector::open(bool read_only) { Rcpp::stop("Open dataset failed."); hLayer = GDALDatasetGetLayerByName(hDataset, layer_in.c_str()); - if (hLayer == nullptr) - Rcpp::stop("Failed to get layer object."); - else + if (hLayer == nullptr) { + GDALReleaseDataset(hDataset); + Rcpp::stop("Get layer failed."); + } + else { OGR_L_ResetReading(hLayer); + } + + hFDefn = OGR_L_GetLayerDefn(hLayer); + if (hFDefn == nullptr) { + GDALReleaseDataset(hDataset); + Rcpp::stop("Get layer definition failed."); + } } Rcpp::CharacterVector GDALVector::getFileList() const { @@ -222,10 +206,6 @@ Rcpp::NumericVector GDALVector::bbox() { Rcpp::List GDALVector::getLayerDefn() const { _checkAccess(GA_ReadOnly); - OGRFeatureDefnH hFDefn = OGR_L_GetLayerDefn(hLayer); - if (hFDefn == nullptr) - Rcpp::stop("Error: could not obtain layer definition."); - Rcpp::List list_out = Rcpp::List::create(); std::string sValue; int nValue; @@ -371,9 +351,6 @@ SEXP GDALVector::getNextFeature() { if (hFeature != nullptr) { Rcpp::List list_out = Rcpp::List::create(); - OGRFeatureDefnH hFDefn = OGR_L_GetLayerDefn(hLayer); - if (hFDefn == nullptr) - Rcpp::stop("Error: could not obtain layer definition."); int iField; double FID = static_cast(OGR_F_GetFID(hFeature)); @@ -643,9 +620,11 @@ void GDALVector::layerErase( } void GDALVector::close() { - GDALReleaseDataset(hDataset); - hDataset = nullptr; - hLayer = nullptr; + if (hDataset != nullptr) { + GDALReleaseDataset(hDataset); + hDataset = nullptr; + hLayer = nullptr; + } } // **************************************************************************** diff --git a/src/gdalvector.h b/src/gdalvector.h index 72b5eb5d8..a5d03710e 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -28,6 +28,7 @@ class GDALVector { GDALDatasetH hDataset; GDALAccess eAccess; OGRLayerH hLayer; + OGRFeatureDefnH hFDefn; public: GDALVector(); From 27c56f38ae94d873977a40a9819167dd290e03ab Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sat, 24 Feb 2024 21:06:38 -0700 Subject: [PATCH 12/53] layer can be layer name or sql --- src/gdalvector.cpp | 51 ++++++++++++++++++++++++++++++++-------------- src/gdalvector.h | 8 +++++--- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index cecfc9675..410d69d7b 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -3,7 +3,6 @@ Chris Toney */ #include "gdal.h" -#include "cpl_error.h" #include "cpl_port.h" #include "cpl_string.h" #include "ogrsf_frmts.h" @@ -16,6 +15,7 @@ GDALVector::GDALVector() : dsn_in(""), layer_in(""), + is_sql_in(false), open_options_in(Rcpp::CharacterVector::create()), hDataset(nullptr), eAccess(GA_ReadOnly), @@ -48,22 +48,13 @@ GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, open(read_only); } -std::string GDALVector::getDsn() const { - return dsn_in; -} - -bool GDALVector::isOpen() const { - if (hDataset == nullptr) - return false; - else - return true; -} - void GDALVector::open(bool read_only) { if (dsn_in == "") Rcpp::stop("DSN is not set."); if (hDataset != nullptr) { + if (is_sql_in) + GDALDatasetReleaseResultSet(hDataset, hLayer); GDALReleaseDataset(hDataset); hDataset = nullptr; hLayer = nullptr; @@ -93,7 +84,14 @@ void GDALVector::open(bool read_only) { if (hDataset == nullptr) Rcpp::stop("Open dataset failed."); - hLayer = GDALDatasetGetLayerByName(hDataset, layer_in.c_str()); + if (STARTS_WITH_CI(layer_in.c_str(), "SELECT ")) { + is_sql_in = true; + hLayer = GDALDatasetExecuteSQL(hDataset, layer_in.c_str(), NULL, NULL); + } + else { + is_sql_in = false; + hLayer = GDALDatasetGetLayerByName(hDataset, layer_in.c_str()); + } if (hLayer == nullptr) { GDALReleaseDataset(hDataset); Rcpp::stop("Get layer failed."); @@ -104,11 +102,24 @@ void GDALVector::open(bool read_only) { hFDefn = OGR_L_GetLayerDefn(hLayer); if (hFDefn == nullptr) { + if (is_sql_in) + GDALDatasetReleaseResultSet(hDataset, hLayer); GDALReleaseDataset(hDataset); Rcpp::stop("Get layer definition failed."); } } +bool GDALVector::isOpen() const { + if (hDataset == nullptr) + return false; + else + return true; +} + +std::string GDALVector::getDsn() const { + return dsn_in; +} + Rcpp::CharacterVector GDALVector::getFileList() const { _checkAccess(GA_ReadOnly); @@ -156,6 +167,12 @@ bool GDALVector::testCapability(std::string capability) const { return OGR_L_TestCapability(hLayer, capability.c_str()); } +std::string GDALVector::getFIDColumn() const { + _checkAccess(GA_ReadOnly); + + return OGR_L_GetFIDColumn(hLayer); +} + std::string GDALVector::getGeomType() const { _checkAccess(GA_ReadOnly); @@ -189,6 +206,7 @@ std::string GDALVector::getSpatialRef() const { Rcpp::NumericVector GDALVector::bbox() { // Note: bForce=true in tha call to OGR_L_GetExtent(), so the entire // layer may be scanned to compute MBR. + // see: testCapability("FastGetExtent") // Depending on the driver, a spatial filter may/may not be taken into // account. So it is safer to call bbox() without setting a spatial filter. _checkAccess(GA_ReadOnly); @@ -211,8 +229,6 @@ Rcpp::List GDALVector::getLayerDefn() const { int nValue; bool bValue; int iField; - - // TODO: include FID here? // attribute fields for (iField=0; iField < OGR_FD_GetFieldCount(hFDefn); ++iField) { @@ -339,6 +355,7 @@ double GDALVector::getFeatureCount() { // OGR_L_GetFeatureCount() returns GIntBig so we return as double to R. // GDAL doc: Note that some implementations of this method may alter the // read cursor of the layer. + // see: testCapability("FastFeatureCount") _checkAccess(GA_ReadOnly); return OGR_L_GetFeatureCount(hLayer, true); @@ -621,6 +638,8 @@ void GDALVector::layerErase( void GDALVector::close() { if (hDataset != nullptr) { + if (is_sql_in) + GDALDatasetReleaseResultSet(hDataset, hLayer); GDALReleaseDataset(hDataset); hDataset = nullptr; hLayer = nullptr; @@ -678,6 +697,8 @@ RCPP_MODULE(mod_GDALVector) { "Return the layer name.") .const_method("testCapability", &GDALVector::testCapability, "Test if this layer supports the named capability.") + .const_method("getFIDColumn", &GDALVector::getFIDColumn, + "Return name of the underlying db column being used as FID column.") .const_method("getGeomType", &GDALVector::getGeomType, "Return the layer geometry type.") .const_method("getGeometryColumn", &GDALVector::getGeometryColumn, diff --git a/src/gdalvector.h b/src/gdalvector.h index a5d03710e..6af40d144 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -23,7 +23,8 @@ class GDALVector { private: std::string dsn_in; - std::string layer_in; + std::string layer_in; // layer name or sql statement + bool is_sql_in; Rcpp::CharacterVector open_options_in; GDALDatasetH hDataset; GDALAccess eAccess; @@ -37,15 +38,16 @@ class GDALVector { GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, Rcpp::CharacterVector open_options); - std::string getDsn() const; - bool isOpen() const; void open(bool read_only); + bool isOpen() const; + std::string getDsn() const; Rcpp::CharacterVector getFileList() const; std::string getDriverShortName() const; std::string getDriverLongName() const; std::string getName() const; bool testCapability(std::string capability) const; + std::string getFIDColumn() const; std::string getGeomType() const; std::string getGeometryColumn() const; std::string getSpatialRef() const; From fabd8b55f51277f71d8e889501b6461b151df01b Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sun, 25 Feb 2024 00:31:22 -0700 Subject: [PATCH 13/53] set field info in member variables --- src/gdalvector.cpp | 116 ++++++++++++++++++++++++++------------------- src/gdalvector.h | 8 ++++ 2 files changed, 74 insertions(+), 50 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 410d69d7b..6afdf7473 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -86,7 +86,8 @@ void GDALVector::open(bool read_only) { if (STARTS_WITH_CI(layer_in.c_str(), "SELECT ")) { is_sql_in = true; - hLayer = GDALDatasetExecuteSQL(hDataset, layer_in.c_str(), NULL, NULL); + hLayer = GDALDatasetExecuteSQL(hDataset, layer_in.c_str(), + nullptr, nullptr); } else { is_sql_in = false; @@ -328,7 +329,7 @@ Rcpp::List GDALVector::getLayerDefn() const { void GDALVector::setAttributeFilter(std::string query) { _checkAccess(GA_ReadOnly); - const char* query_in = NULL; + const char* query_in = nullptr; if (query != "") query_in = query.c_str(); @@ -368,44 +369,43 @@ SEXP GDALVector::getNextFeature() { if (hFeature != nullptr) { Rcpp::List list_out = Rcpp::List::create(); - int iField; + int i; + + if (!m_fld_info_is_set) + _setFldInfo(); double FID = static_cast(OGR_F_GetFID(hFeature)); list_out.push_back(FID, "FID"); - for (iField=0; iField < OGR_FD_GetFieldCount(hFDefn); ++iField) { - OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, iField); - if (hFieldDefn == nullptr) - Rcpp::stop("Error: could not obtain field definition."); - if (!OGR_F_IsFieldSet(hFeature, iField) || - OGR_F_IsFieldNull(hFeature, iField)) { + for (i = 0; i < m_num_flds; ++i) { + if (!OGR_F_IsFieldSet(hFeature, i) || + OGR_F_IsFieldNull(hFeature, i)) { continue; } - OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); - if (fld_type == OFTInteger) { - int value = OGR_F_GetFieldAsInteger(hFeature, iField); - list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + if (m_fld_types[i] == OFTInteger) { + int value = OGR_F_GetFieldAsInteger(hFeature, i); + list_out.push_back(value, m_fld_names[i]); } - else if (fld_type == OFTInteger64) { + else if (m_fld_types[i] == OFTInteger64) { // R does not have native int64 so handled as double for now double value = static_cast( - OGR_F_GetFieldAsInteger64(hFeature, iField)); - list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + OGR_F_GetFieldAsInteger64(hFeature, i)); + list_out.push_back(value, m_fld_names[i]); } - else if (fld_type == OFTReal) { - double value = OGR_F_GetFieldAsDouble(hFeature, iField); - list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + else if (m_fld_types[i] == OFTReal) { + double value = OGR_F_GetFieldAsDouble(hFeature, i); + list_out.push_back(value, m_fld_names[i]); } else { // TODO: support date, time, binary, etc. // read as string for now - std::string value = OGR_F_GetFieldAsString(hFeature, iField); - list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + std::string value = OGR_F_GetFieldAsString(hFeature, i); + list_out.push_back(value, m_fld_names[i]); } } - for (int i = 0; i < OGR_F_GetGeomFieldCount(hFeature); ++i) { + for (i = 0; i < OGR_F_GetGeomFieldCount(hFeature); ++i) { OGRGeometryH hGeometry = OGR_F_GetGeomFieldRef(hFeature, i); if (hGeometry == nullptr) Rcpp::stop("Error: could not obtain geometry reference."); @@ -439,15 +439,14 @@ void GDALVector::layerIntersection( bool quiet, Rcpp::Nullable options) { - std::vector opt_list = {NULL}; + std::vector opt_list = {nullptr}; if (options.isNotNull()) { - // cast to the underlying type Rcpp::CharacterVector options_in(options); opt_list.resize(options_in.size() + 1); for (R_xlen_t i = 0; i < options_in.size(); ++i) { opt_list[i] = (char *) (options_in[i]); } - opt_list[options_in.size()] = NULL; + opt_list[options_in.size()] = nullptr; } OGRErr err = OGR_L_Intersection( @@ -455,7 +454,8 @@ void GDALVector::layerIntersection( method_layer._getOGRLayerH(), result_layer._getOGRLayerH(), opt_list.data(), - quiet ? nullptr : GDALTermProgressR, nullptr); + quiet ? nullptr : GDALTermProgressR, + nullptr); if (err != OGRERR_NONE) Rcpp::stop("Error during Intersection, or execution was interrupted."); @@ -468,15 +468,14 @@ void GDALVector::layerUnion( bool quiet, Rcpp::Nullable options) { - std::vector opt_list = {NULL}; + std::vector opt_list = {nullptr}; if (options.isNotNull()) { - // cast to the underlying type Rcpp::CharacterVector options_in(options); opt_list.resize(options_in.size() + 1); for (R_xlen_t i = 0; i < options_in.size(); ++i) { opt_list[i] = (char *) (options_in[i]); } - opt_list[options_in.size()] = NULL; + opt_list[options_in.size()] = nullptr; } OGRErr err = OGR_L_Union( @@ -484,7 +483,8 @@ void GDALVector::layerUnion( method_layer._getOGRLayerH(), result_layer._getOGRLayerH(), opt_list.data(), - quiet ? nullptr : GDALTermProgressR, nullptr); + quiet ? nullptr : GDALTermProgressR, + nullptr); if (err != OGRERR_NONE) Rcpp::stop("Error during Union, or execution was interrupted."); @@ -497,15 +497,14 @@ void GDALVector::layerSymDifference( bool quiet, Rcpp::Nullable options) { - std::vector opt_list = {NULL}; + std::vector opt_list = {nullptr}; if (options.isNotNull()) { - // cast to the underlying type Rcpp::CharacterVector options_in(options); opt_list.resize(options_in.size() + 1); for (R_xlen_t i = 0; i < options_in.size(); ++i) { opt_list[i] = (char *) (options_in[i]); } - opt_list[options_in.size()] = NULL; + opt_list[options_in.size()] = nullptr; } OGRErr err = OGR_L_SymDifference( @@ -513,7 +512,8 @@ void GDALVector::layerSymDifference( method_layer._getOGRLayerH(), result_layer._getOGRLayerH(), opt_list.data(), - quiet ? nullptr : GDALTermProgressR, nullptr); + quiet ? nullptr : GDALTermProgressR, + nullptr); if (err != OGRERR_NONE) Rcpp::stop("Error during SymDifference, or execution was interrupted."); @@ -526,15 +526,14 @@ void GDALVector::layerIdentity( bool quiet, Rcpp::Nullable options) { - std::vector opt_list = {NULL}; + std::vector opt_list = {nullptr}; if (options.isNotNull()) { - // cast to the underlying type Rcpp::CharacterVector options_in(options); opt_list.resize(options_in.size() + 1); for (R_xlen_t i = 0; i < options_in.size(); ++i) { opt_list[i] = (char *) (options_in[i]); } - opt_list[options_in.size()] = NULL; + opt_list[options_in.size()] = nullptr; } OGRErr err = OGR_L_Identity( @@ -542,7 +541,8 @@ void GDALVector::layerIdentity( method_layer._getOGRLayerH(), result_layer._getOGRLayerH(), opt_list.data(), - quiet ? nullptr : GDALTermProgressR, nullptr); + quiet ? nullptr : GDALTermProgressR, + nullptr); if (err != OGRERR_NONE) Rcpp::stop("Error during Identity, or execution was interrupted."); @@ -555,15 +555,14 @@ void GDALVector::layerUpdate( bool quiet, Rcpp::Nullable options) { - std::vector opt_list = {NULL}; + std::vector opt_list = {nullptr}; if (options.isNotNull()) { - // cast to the underlying type Rcpp::CharacterVector options_in(options); opt_list.resize(options_in.size() + 1); for (R_xlen_t i = 0; i < options_in.size(); ++i) { opt_list[i] = (char *) (options_in[i]); } - opt_list[options_in.size()] = NULL; + opt_list[options_in.size()] = nullptr; } OGRErr err = OGR_L_Update( @@ -571,7 +570,8 @@ void GDALVector::layerUpdate( method_layer._getOGRLayerH(), result_layer._getOGRLayerH(), opt_list.data(), - quiet ? nullptr : GDALTermProgressR, nullptr); + quiet ? nullptr : GDALTermProgressR, + nullptr); if (err != OGRERR_NONE) Rcpp::stop("Error during Update, or execution was interrupted."); @@ -584,15 +584,14 @@ void GDALVector::layerClip( bool quiet, Rcpp::Nullable options) { - std::vector opt_list = {NULL}; + std::vector opt_list = {nullptr}; if (options.isNotNull()) { - // cast to the underlying type Rcpp::CharacterVector options_in(options); opt_list.resize(options_in.size() + 1); for (R_xlen_t i = 0; i < options_in.size(); ++i) { opt_list[i] = (char *) (options_in[i]); } - opt_list[options_in.size()] = NULL; + opt_list[options_in.size()] = nullptr; } OGRErr err = OGR_L_Clip( @@ -600,7 +599,8 @@ void GDALVector::layerClip( method_layer._getOGRLayerH(), result_layer._getOGRLayerH(), opt_list.data(), - quiet ? nullptr : GDALTermProgressR, nullptr); + quiet ? nullptr : GDALTermProgressR, + nullptr); if (err != OGRERR_NONE) Rcpp::stop("Error during Clip, or execution was interrupted."); @@ -613,15 +613,14 @@ void GDALVector::layerErase( bool quiet, Rcpp::Nullable options) { - std::vector opt_list = {NULL}; + std::vector opt_list = {nullptr}; if (options.isNotNull()) { - // cast to the underlying type Rcpp::CharacterVector options_in(options); opt_list.resize(options_in.size() + 1); for (R_xlen_t i = 0; i < options_in.size(); ++i) { opt_list[i] = (char *) (options_in[i]); } - opt_list[options_in.size()] = NULL; + opt_list[options_in.size()] = nullptr; } OGRErr err = OGR_L_Erase( @@ -629,7 +628,8 @@ void GDALVector::layerErase( method_layer._getOGRLayerH(), result_layer._getOGRLayerH(), opt_list.data(), - quiet ? nullptr : GDALTermProgressR, nullptr); + quiet ? nullptr : GDALTermProgressR, + nullptr); if (err != OGRERR_NONE) Rcpp::stop("Error during Erase, or execution was interrupted."); @@ -664,6 +664,22 @@ OGRLayerH GDALVector::_getOGRLayerH() { return hLayer; } +void GDALVector::_setFldInfo() { + m_num_flds = OGR_FD_GetFieldCount(hFDefn); + m_fld_types.clear(); + m_fld_names.clear(); + for (int i = 0; i < m_num_flds; ++i) { + OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); + if (hFieldDefn == nullptr) + Rcpp::stop("Error: could not obtain field definition."); + + m_fld_types.push_back(OGR_Fld_GetType(hFieldDefn)); + m_fld_names.push_back(OGR_Fld_GetNameRef(hFieldDefn)); + } + + m_fld_info_is_set = true; +} + // **************************************************************************** diff --git a/src/gdalvector.h b/src/gdalvector.h index 6af40d144..4d185d07d 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -30,6 +30,13 @@ class GDALVector { GDALAccess eAccess; OGRLayerH hLayer; OGRFeatureDefnH hFDefn; + int m_num_flds; + std::vector m_fld_types; + std::vector m_fld_names; + //int m_num_geom_flds; + //std::vector m_geom_fld_types; + //std::vector m_geom_fld_names; + bool m_fld_info_is_set = false; public: GDALVector(); @@ -103,6 +110,7 @@ class GDALVector { // methods for internal use not exported to R void _checkAccess(GDALAccess access_needed) const; OGRLayerH _getOGRLayerH(); + void _setFldInfo(); }; RCPP_EXPOSED_CLASS(GDALVector) From 22eb87de37427a1a38eae9346d1f2de73e3c0183 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sun, 25 Feb 2024 20:47:17 -0700 Subject: [PATCH 14/53] revert fabd8b5 (set field info in member variables) --- src/gdalvector.cpp | 43 ++++++++++++++----------------------------- src/gdalvector.h | 8 -------- 2 files changed, 14 insertions(+), 37 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 6afdf7473..eef3ce1e1 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -370,38 +370,40 @@ SEXP GDALVector::getNextFeature() { if (hFeature != nullptr) { Rcpp::List list_out = Rcpp::List::create(); int i; - - if (!m_fld_info_is_set) - _setFldInfo(); double FID = static_cast(OGR_F_GetFID(hFeature)); list_out.push_back(FID, "FID"); - for (i = 0; i < m_num_flds; ++i) { + for (i = 0; i < OGR_FD_GetFieldCount(hFDefn); ++i) { + OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); + if (hFieldDefn == nullptr) + Rcpp::stop("Error: could not obtain field definition."); + if (!OGR_F_IsFieldSet(hFeature, i) || OGR_F_IsFieldNull(hFeature, i)) { continue; } - - if (m_fld_types[i] == OFTInteger) { + + OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); + if (fld_type == OFTInteger) { int value = OGR_F_GetFieldAsInteger(hFeature, i); - list_out.push_back(value, m_fld_names[i]); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } - else if (m_fld_types[i] == OFTInteger64) { + else if (fld_type == OFTInteger64) { // R does not have native int64 so handled as double for now double value = static_cast( OGR_F_GetFieldAsInteger64(hFeature, i)); - list_out.push_back(value, m_fld_names[i]); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } - else if (m_fld_types[i] == OFTReal) { + else if (fld_type == OFTReal) { double value = OGR_F_GetFieldAsDouble(hFeature, i); - list_out.push_back(value, m_fld_names[i]); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } else { // TODO: support date, time, binary, etc. // read as string for now std::string value = OGR_F_GetFieldAsString(hFeature, i); - list_out.push_back(value, m_fld_names[i]); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } } @@ -664,23 +666,6 @@ OGRLayerH GDALVector::_getOGRLayerH() { return hLayer; } -void GDALVector::_setFldInfo() { - m_num_flds = OGR_FD_GetFieldCount(hFDefn); - m_fld_types.clear(); - m_fld_names.clear(); - for (int i = 0; i < m_num_flds; ++i) { - OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); - if (hFieldDefn == nullptr) - Rcpp::stop("Error: could not obtain field definition."); - - m_fld_types.push_back(OGR_Fld_GetType(hFieldDefn)); - m_fld_names.push_back(OGR_Fld_GetNameRef(hFieldDefn)); - } - - m_fld_info_is_set = true; -} - - // **************************************************************************** RCPP_MODULE(mod_GDALVector) { diff --git a/src/gdalvector.h b/src/gdalvector.h index 4d185d07d..6af40d144 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -30,13 +30,6 @@ class GDALVector { GDALAccess eAccess; OGRLayerH hLayer; OGRFeatureDefnH hFDefn; - int m_num_flds; - std::vector m_fld_types; - std::vector m_fld_names; - //int m_num_geom_flds; - //std::vector m_geom_fld_types; - //std::vector m_geom_fld_names; - bool m_fld_info_is_set = false; public: GDALVector(); @@ -110,7 +103,6 @@ class GDALVector { // methods for internal use not exported to R void _checkAccess(GDALAccess access_needed) const; OGRLayerH _getOGRLayerH(); - void _setFldInfo(); }; RCPP_EXPOSED_CLASS(GDALVector) From 23f9ae676e987577f1943a8100e6b6a7793f1131 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Mon, 26 Feb 2024 23:51:05 -0700 Subject: [PATCH 15/53] add constructor for dsn only --- src/gdalvector.cpp | 15 ++++++++++++++- src/gdalvector.h | 1 + 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index eef3ce1e1..e26a047c9 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -21,6 +21,13 @@ GDALVector::GDALVector() : eAccess(GA_ReadOnly), hLayer(nullptr) {} +GDALVector::GDALVector(Rcpp::CharacterVector dsn) : + GDALVector( + dsn, + "", + true, + Rcpp::CharacterVector::create()) {} + GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer) : GDALVector( dsn, @@ -84,7 +91,11 @@ void GDALVector::open(bool read_only) { if (hDataset == nullptr) Rcpp::stop("Open dataset failed."); - if (STARTS_WITH_CI(layer_in.c_str(), "SELECT ")) { + if (layer_in == "") { + is_sql_in = false; + hLayer = GDALDatasetGetLayer(hDataset, 0); + } + else if (STARTS_WITH_CI(layer_in.c_str(), "SELECT ")) { is_sql_in = true; hLayer = GDALDatasetExecuteSQL(hDataset, layer_in.c_str(), nullptr, nullptr); @@ -674,6 +685,8 @@ RCPP_MODULE(mod_GDALVector) { .constructor ("Default constructor, only for allocations in std::vector.") + .constructor + ("Usage: new(GDALVector, dsn)") .constructor ("Usage: new(GDALVector, dsn, layer)") .constructor diff --git a/src/gdalvector.h b/src/gdalvector.h index 6af40d144..cc0a603f0 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -33,6 +33,7 @@ class GDALVector { public: GDALVector(); + GDALVector(Rcpp::CharacterVector dsn); GDALVector(Rcpp::CharacterVector dsn, std::string layer); GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only); GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, From c460495a27767a9d0cad115ebd6fa6f545a99ee6 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Tue, 27 Feb 2024 12:02:52 -0700 Subject: [PATCH 16/53] OGR_G_DestroyGeometry(hGeom) --- src/gdalvector.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index e26a047c9..b1363f264 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -419,11 +419,11 @@ SEXP GDALVector::getNextFeature() { } for (i = 0; i < OGR_F_GetGeomFieldCount(hFeature); ++i) { - OGRGeometryH hGeometry = OGR_F_GetGeomFieldRef(hFeature, i); - if (hGeometry == nullptr) + OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeature, i); + if (hGeom == nullptr) Rcpp::stop("Error: could not obtain geometry reference."); char* pszWKT; - OGR_G_ExportToWkt(hGeometry, &pszWKT); + OGR_G_ExportToWkt(hGeom, &pszWKT); std::string wkt(pszWKT); OGRGeomFieldDefnH hGeomFldDefn = OGR_F_GetGeomFieldDefnRef(hFeature, i); @@ -431,6 +431,7 @@ SEXP GDALVector::getNextFeature() { Rcpp::stop("Error: could not obtain geometry field def."); list_out.push_back(wkt, OGR_GFld_GetNameRef(hGeomFldDefn)); CPLFree(pszWKT); + OGR_G_DestroyGeometry(hGeom); } return list_out; From de01112260260d6300f9959cbbe4543dfae88d25 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Thu, 29 Feb 2024 00:25:39 -0700 Subject: [PATCH 17/53] gdalvector-draft initial commit --- vignettes/articles/gdalvector-draft.Rmd | 514 ++++++++++++++++++++++++ 1 file changed, 514 insertions(+) create mode 100644 vignettes/articles/gdalvector-draft.Rmd diff --git a/vignettes/articles/gdalvector-draft.Rmd b/vignettes/articles/gdalvector-draft.Rmd new file mode 100644 index 000000000..2161110be --- /dev/null +++ b/vignettes/articles/gdalvector-draft.Rmd @@ -0,0 +1,514 @@ +--- +title: "Draft Bindings to the GDAL/OGR Vector API" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +Chris Toney +2024-02-28 + +## Summary + +This document describes R bindings to the GDAL/OGR Vector API proposed for inclusion in package **gdalraster**, analagous to its existing raster support. A package providing low-level access to both the raster and vector APIs in GDAL may be useful to developers creating higher level interfaces. For example, custom workflows that are I/O intensive may benefit from direct access to GDAL's I/O capabilities. R bindings for the vector API would support persistent connections to the data store, cursors with attribute and spatial filtering, transactions, feature-level insert/delete, update of attributes and geometries, and OGR facilities for geoprocessing. Calling signatures of a class-based interface will resemble the native C++ and Python APIs provided by the GDAL project. It is intended that bindings in **gdalraster** should provide long-term API stability while tracking changes GDAL. + +The proposed interface is described below in terms of the GDAL Vector Data Model, along with a draft class definition for implementation via `RCPP_EXPOSED_CLASS`. A prototype of the bindings is also provided in the `gdalvector` feature branch at https://github.com/USDAForestService/gdalraster/tree/gdalvector. The prototype bindings are currently undocumented. Code examples are also included below to demonstrate usage and proof-of-concept. + +## Description of the interface + +Bindings will be implemeted with **Rcpp** modules, and in particular `RCPP_EXPOSED_CLASS`. Exposing C++ classes directly in R provides a natural interface to the underlying object model. `RCPP_EXPOSED_CLASS` makes this easy and automatically handles the external pointer. It reduces the amount of wrapper code required compared with potential alternatives, so the code is simpler, faster to develop and easier to maintain. + +A GDAL dataset for vector is a file or database containing one or more OGR layers. A vector dataset is represented in R as a data source name (DSN), a character string that may be a filename, database connection string, URL, virtual file, etc. Management of datasets and their vector schemas will be done with: + +* existing management functions in **gdalraster** that operate on vector datasets: `addFilesInZip()` (supporting SOZip), `copyDatasetFiles()`, `deleteDataset()`, `renameDataset()` +* existing internal utility functions to be further developed: `.ogr_ds_exists()`, `.create_ogr()`, `.ogr_ds_layer_count()`, `.ogr_layer_exists()`, `.ogr_layer_create()`, `.ogr_layer_delete()` `.ogr_field_index()`, `.ogr_field_create()` +* wrappers for `vector_translate()` and `vector_info()` from the gdal_utils.h API (**gdalraster** 1.10) +* additional stand-alone functions TBD + +An `OGRLayer` object represents a layer of features within a data source. It will be modeled in R as class `GDALVector`, an exposed C++ class encapsulating an `OGRLayer` object and the `GDALDataset` that owns it. `GDALVector` will expose methods for retrieving layer information, attribute and spatial filtering, feature read/write, and layer geoprocessing. A draft definition for class `GDALVector` is given below. + +All features in an `OGRLayer` share a common schema (feature class) modeled in GDAL as `OGRFeatureDefn`. A feature class definition includes the set of attribute fields, their data types, geometry field(s), feature identifier (FID), and feature class name (normally used as a layer name). The feature class definition is represented in R as a list having as names the attribute/geometry field names, and each list element holding a field definition also as list. + +A definition for an attribute field is a list with elements: + +``` +$type : OGR Field Type (e.g., "OFTString", "OFTInteger", etc.) +$width : optional field width as integer +$precision : optional precision as integer +$is_nullable: optional logical scalar +$is_unique : optional logical scalar +$default : optional field default value +$is_ignored : optional logical scalar +$is_geom : FALSE for attribute fields +``` + +A definition for a geometry field is a list with elements: + +``` +$type : geom type ("Point", "Polygon", etc.) +$srs : spatial reference as WKT string +$is_nullable: optional logical scalar +$is_ignored : optional logical scalar +$is_geom : TRUE for geometry fields +``` + +An `OGRFeature`, as read by `GDALVector::getNextFeature()` or as passed to write methods, is a list of FID, attribute and geometry field names, and their values. `GDALVector::getFeatureSet()` will return a set of features as a data frame. (Note: FID is int64 type in GDAL. Handling of int64 TBD, currently as R double type). + +An `OGRGeometry` is represented in R as a character string containing OGC Well Known Text (WKT). Likewise, an `OGRSpatialReference`, which encapsulates the definition of a projection and datum, is represented in R as WKT. **gdalraster** has existing fuctions for working with spatial reference systems as WKT, and a set of geometry convenience functions also operating on WKT (GEOS wrappers via GDAL headers). + +## GDALVector class + +A draft class definition is given below. It has been partially implemented in: [ADD URL]. This does not inlude definitions of several stand-alone functions that will provide schema management. The existing definitions in src/ogr_util.h are a starting point for those. Also note that an **Rcpp** `.finalizer` function will be implemented to properly release resources in case an object of class `GDALVector` is garbage collected without an explicit call to `close()`. + +```cpp +class GDALVector { + + private: + std::string dsn_in; + std::string layer_in; // layer name or sql statement + bool is_sql_in; + Rcpp::CharacterVector open_options_in; + GDALDatasetH hDataset; + GDALAccess eAccess; + OGRLayerH hLayer; + OGRFeatureDefnH hFDefn; + + public: + GDALVector(); + GDALVector(Rcpp::CharacterVector dsn); + GDALVector(Rcpp::CharacterVector dsn, std::string layer); + GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only); + GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, + Rcpp::CharacterVector open_options); + + void open(bool read_only); + bool isOpen() const; + std::string getDsn() const; + Rcpp::CharacterVector getFileList() const; + std::string getDriverShortName() const; + std::string getDriverLongName() const; + + std::string getName() const; + bool testCapability(std::string capability) const; + std::string getFIDColumn() const; + std::string getGeomType() const; + std::string getGeometryColumn() const; + std::string getSpatialRef() const; + Rcpp::NumericVector bbox(); + Rcpp::List getLayerDefn() const; + + void setAttributeFilter(std::string query); + void setSpatialFilterRect(Rcpp::NumericVector bbox); + void clearSpatialFilter(); + + double getFeatureCount(); + SEXP getNextFeature(); + void resetReading(); + SEXP getFeature(GIntBig fid); // int64 handling TBD + Rcpp::DataFrame getFeatureSet(); + + void createFeature(Rcpp::List feat); + void setFeature(Rcpp::List feat); + void upsertFeature(Rcpp::List feat); + void deleteFeature(GIntBig fid); // int64 handling TBD + + bool startTransaction(bool force); + bool commitTransaction(); + bool rollbackTransaction(); + + void layerIntersection( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerUnion( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerSymDifference( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerIdentity( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerUpdate( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerClip( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerErase( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + + void close(); + + // methods for internal use not exported to R + void _checkAccess(GDALAccess access_needed) const; + OGRLayerH _getOGRLayerH(); +}; + +RCPP_EXPOSED_CLASS(GDALVector) +``` + +## Example: Usage + +``` r +library(gdalraster) +#> GDAL 3.8.3, released 2024/01/04, PROJ 9.3.1 + +# MTBS fires in Yellowstone National Park 1984-2022 +dsn <- system.file("extdata/ynp_fires_1984_2022.gpkg", package="gdalraster") +lyr <- new(GDALVector, dsn, "mtbs_perims", TRUE) + +# dataset info +lyr$getDriverShortName() +#> [1] "GPKG" +lyr$getDriverLongName() +#> [1] "GeoPackage" +lyr$getFileList() +#> [1] "/usr/local/lib/R/site-library/gdalraster/extdata/ynp_fires_1984_2022.gpkg" + +# layer info +lyr$getName() +#> [1] "mtbs_perims" +lyr$getGeomType() +#> [1] "Multi Polygon" +lyr$getGeometryColumn() +#> [1] "geom" +lyr$getFIDColumn() +#> [1] "fid" +lyr$getSpatialRef() +#> [1] "PROJCS[\"NAD83 / Montana\",GEOGCS[\"NAD83\",DATUM[\"North_American_Datum_1983\",SPHEROID[\"GRS 1980\",6378137,298.257222101,AUTHORITY[\"EPSG\",\"7019\"]],AUTHORITY[\"EPSG\",\"6269\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AUTHORITY[\"EPSG\",\"4269\"]],PROJECTION[\"Lambert_Conformal_Conic_2SP\"],PARAMETER[\"latitude_of_origin\",44.25],PARAMETER[\"central_meridian\",-109.5],PARAMETER[\"standard_parallel_1\",49],PARAMETER[\"standard_parallel_2\",45],PARAMETER[\"false_easting\",600000],PARAMETER[\"false_northing\",0],UNIT[\"metre\",1,AUTHORITY[\"EPSG\",\"9001\"]],AXIS[\"Easting\",EAST],AXIS[\"Northing\",NORTH],AUTHORITY[\"EPSG\",\"32100\"]]" +lyr$bbox() +#> [1] 469685.73 -12917.76 573531.72 96577.34 + +# get some information about layer capabilities +lyr$testCapability("RandomRead") +#> [1] TRUE + +# dataset was opened read-only: +lyr$testCapability("SequentialWrite") +#> [1] FALSE +lyr$testCapability("RandomWrite") +#> [1] FALSE + +lyr$testCapability("FastSpatialFilter") +#> [1] TRUE +lyr$testCapability("FastFeatureCount") +#> [1] TRUE +lyr$testCapability("FastGetExtent") +#> [1] TRUE +lyr$testCapability("Transactions") +#> [1] TRUE + +# re-open with write access +# lyr$open(read_only=FALSE) +# lyr$testCapability("SequentialWrite") +# lyr$testCapability("RandomWrite") + +# feature class definition +# a list of fields and their definitions +defn <- lyr$getLayerDefn() +names(defn) +#> [1] "event_id" "incid_name" "incid_type" "map_id" "burn_bnd_ac" +#> [6] "burn_bnd_lat" "burn_bnd_lon" "ig_date" "ig_year" "geom" + +# each list element holds a list containing a field definition +str(defn) +#> List of 10 +#> $ event_id :List of 8 +#> ..$ type : chr "OFTString" +#> ..$ width : int 254 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ incid_name :List of 8 +#> ..$ type : chr "OFTString" +#> ..$ width : int 254 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ incid_type :List of 8 +#> ..$ type : chr "OFTString" +#> ..$ width : int 254 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ map_id :List of 8 +#> ..$ type : chr "OFTInteger64" +#> ..$ width : int 0 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ burn_bnd_ac :List of 8 +#> ..$ type : chr "OFTInteger64" +#> ..$ width : int 0 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ burn_bnd_lat:List of 8 +#> ..$ type : chr "OFTString" +#> ..$ width : int 10 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ burn_bnd_lon:List of 8 +#> ..$ type : chr "OFTString" +#> ..$ width : int 10 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ ig_date :List of 8 +#> ..$ type : chr "default (read as OFTString)" +#> ..$ width : int 0 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ ig_year :List of 8 +#> ..$ type : chr "OFTInteger" +#> ..$ width : int 0 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ geom :List of 5 +#> ..$ type : chr "Multi Polygon" +#> ..$ srs : chr "PROJCS[\"NAD83 / Montana\",GEOGCS[\"NAD83\",DATUM[\"North_American_Datum_1983\",SPHEROID[\"GRS 1980\",6378137,2"| __truncated__ +#> ..$ is_nullable: logi TRUE +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi TRUE + +lyr$getFeatureCount() +#> [1] 61 + +# cursor +feat <- lyr$getNextFeature() +# a list of fields and their values +str(feat) +#> List of 11 +#> $ FID : num 1 +#> $ event_id : chr "WY4413411069519870807" +#> $ incid_name : chr "POLECAT" +#> $ incid_type : chr "Wildfire" +#> $ map_id : num 1e+07 +#> $ burn_bnd_ac : num 1093 +#> $ burn_bnd_lat: chr "44.132" +#> $ burn_bnd_lon: chr "-110.696" +#> $ ig_date : chr "1987/08/07" +#> $ ig_year : int 1987 +#> $ geom : chr "MULTIPOLYGON (((503099.439579653 -12893.9672899192,503169.756694236 -12756.3721247327,502689.845907435 -12131.5"| __truncated__ + +# attribute filter +lyr$setAttributeFilter("ig_year = 2020") +lyr$getFeatureCount() +#> [1] 1 + +feat <- lyr$getNextFeature() +str(feat) +#> List of 11 +#> $ FID : num 61 +#> $ event_id : chr "WY4438911082120200822" +#> $ incid_name : chr "LONE STAR" +#> $ incid_type : chr "Wildfire" +#> $ map_id : num 1e+07 +#> $ burn_bnd_ac : num 3348 +#> $ burn_bnd_lat: chr "44.4" +#> $ burn_bnd_lon: chr "-110.782" +#> $ ig_date : chr "2020/08/22" +#> $ ig_year : int 2020 +#> $ geom : chr "MULTIPOLYGON (((496593.122306971 15506.8828590633,496491.761299067 15605.3612548792,496290.812130161 15388.0465"| __truncated__ + +feat <- lyr$getNextFeature() +str(feat) +#> NULL + +lyr$resetReading() +feat <- lyr$getNextFeature() +str(feat) +#> List of 11 +#> $ FID : num 61 +#> $ event_id : chr "WY4438911082120200822" +#> $ incid_name : chr "LONE STAR" +#> $ incid_type : chr "Wildfire" +#> $ map_id : num 1e+07 +#> $ burn_bnd_ac : num 3348 +#> $ burn_bnd_lat: chr "44.4" +#> $ burn_bnd_lon: chr "-110.782" +#> $ ig_date : chr "2020/08/22" +#> $ ig_year : int 2020 +#> $ geom : chr "MULTIPOLYGON (((496593.122306971 15506.8828590633,496491.761299067 15605.3612548792,496290.812130161 15388.0465"| __truncated__ + +# clear attribute filter +lyr$setAttributeFilter("") +lyr$getFeatureCount() +#> [1] 61 + +# spatial filter +# find the largest 1988 fire +lyr$setAttributeFilter("ig_year = 1988 ORDER BY burn_bnd_ac DESC") +feat <- lyr$getNextFeature() +str(feat) +#> List of 11 +#> $ FID : num 7 +#> $ event_id : chr "WY4470811082119880722" +#> $ incid_name : chr "NORTH FORK" +#> $ incid_type : chr "Wildfire" +#> $ map_id : num 1e+07 +#> $ burn_bnd_ac : num 563527 +#> $ burn_bnd_lat: chr "44.678" +#> $ burn_bnd_lon: chr "-110.716" +#> $ ig_date : chr "1988/07/22" +#> $ ig_year : int 1988 +#> $ geom : chr "MULTIPOLYGON (((469685.969312071 29526.2354109807,469918.933844832 29654.3220754602,470030.299119989 29518.7441"| __truncated__ + +bbox <- bbox_from_wkt(feat$geom) +bbox +#> [1] 469685.97 11442.45 544069.63 85508.15 + +lyr$setAttributeFilter("") +lyr$getFeatureCount() +#> [1] 61 + +lyr$setSpatialFilterRect(bbox) +lyr$getFeatureCount() +#> [1] 40 + +lyr$clearSpatialFilter() + +## layer intersection + +# largest 1988 fire (FID from above) +lyr$setAttributeFilter("FID = 7") +lyr$getFeatureCount() +#> [1] 1 +feat <- lyr$getNextFeature() +str(feat) +#> List of 11 +#> $ FID : num 7 +#> $ event_id : chr "WY4470811082119880722" +#> $ incid_name : chr "NORTH FORK" +#> $ incid_type : chr "Wildfire" +#> $ map_id : num 1e+07 +#> $ burn_bnd_ac : num 563527 +#> $ burn_bnd_lat: chr "44.678" +#> $ burn_bnd_lon: chr "-110.716" +#> $ ig_date : chr "1988/07/22" +#> $ ig_year : int 1988 +#> $ geom : chr "MULTIPOLYGON (((469685.969312071 29526.2354109807,469918.933844832 29654.3220754602,470030.299119989 29518.7441"| __truncated__ + +# 2000-2022 fires +sql <- "SELECT FID, * FROM mtbs_perims WHERE ig_year >= 2000 ORDER BY mtbs_perims.ig_year" +lyr2 <- new(GDALVector, dsn, sql) +lyr2$getFeatureCount() +#> [1] 40 + +# create the output layer in a temporary in-memory dataset +dsn_out <- "/vsimem/lyr_proc_tmp.gpkg" +srs <- lyr$getSpatialRef() +# this uses existing internal OGR utilities +gdalraster:::.create_ogr("GPKG", dsn_out, 0, 0, 0, "Unknown", "int_result", srs) +#> [1] TRUE +gdalraster:::.ogr_ds_exists(dsn_out, with_update=TRUE) +#> [1] TRUE +gdalraster:::.ogr_layer_exists(dsn_out, "int_result") +#> [1] TRUE + +lyr_out <- new(GDALVector, dsn_out, "int_result", read_only=FALSE) +lyr_out$getFeatureCount() +#> [1] 0 + +# intersection of lyr and lyr2, with result in lyr_out +lyr$layerIntersection(lyr2, lyr_out, quiet=FALSE, options=NULL) +#> 0...10...20...30...40...50...60...70...80...90...100 - done. + +lyr_out$getFeatureCount() +#> [1] 5 +defn <- lyr_out$getLayerDefn() +# combined attributes +names(defn) +#> [1] "input_event_id" "input_incid_name" "input_incid_type" +#> [4] "input_map_id" "input_burn_bnd_ac" "input_burn_bnd_lat" +#> [7] "input_burn_bnd_lon" "input_ig_date" "input_ig_year" +#> [10] "method_event_id" "method_incid_name" "method_incid_type" +#> [13] "method_map_id" "method_burn_bnd_ac" "method_burn_bnd_lat" +#> [16] "method_burn_bnd_lon" "method_ig_date" "method_ig_year" +#> [19] "geom" + +# we don't have vector plotting yet, so use plot_raster() +# rasterize and display the output layer +out_file <- "/vsimem/int_result.tif" +rasterize(src_dsn = dsn_out, + dstfile = out_file, + layer = "int_result", + burn_attr = "method_ig_year", + tr = c(90,90), + tap = TRUE, + dtName = "Int16", + dstnodata = -9999, + init = -9999) +#> 0...10...20...30...40...50...60...70...80...90...100 - done. + +ds <- new(GDALRaster, out_file) +pal <- scales::viridis_pal(end = 0.8, direction = -1)(6) +ramp <- scales::colour_ramp(pal) +plot_raster(ds, legend = TRUE, col_map_fn = ramp, na_col = "#d9d9d9", + main="2000-2022 re-burns on the 1988 North Fork perimeter") +``` + +![](https://i.imgur.com/O1CdGd1.png) + +``` r + +ds$close() +lyr$close() +lyr2$close() +lyr_out$close() + +vsi_unlink(dsn_out) +vsi_unlink(out_file) +``` + +Created on 2024-02-25 with [reprex v2.1.0](https://reprex.tidyverse.org) + From 7181fabe88ffd66b53806a6748437e8b4fb15603 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Thu, 29 Feb 2024 17:41:22 -0700 Subject: [PATCH 18/53] update TODOs --- src/gdalvector.cpp | 7 +++++-- src/gdalvector.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index b1363f264..0320c3951 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -104,6 +104,7 @@ void GDALVector::open(bool read_only) { is_sql_in = false; hLayer = GDALDatasetGetLayerByName(hDataset, layer_in.c_str()); } + if (hLayer == nullptr) { GDALReleaseDataset(hDataset); Rcpp::stop("Get layer failed."); @@ -227,8 +228,8 @@ Rcpp::NumericVector GDALVector::bbox() { if (OGR_L_GetExtent(hLayer, &envelope, true) != OGRERR_NONE) Rcpp::stop("Error: the extent of the layer cannot be determined."); - Rcpp::NumericVector bbox_out = { - envelope.MinX, envelope.MinY, envelope.MaxX, envelope.MaxY}; + Rcpp::NumericVector bbox_out = + {envelope.MinX, envelope.MinY, envelope.MaxX, envelope.MaxY}; return bbox_out; } @@ -243,6 +244,7 @@ Rcpp::List GDALVector::getLayerDefn() const { int iField; // attribute fields + // TODO: add subtype and field domain name for (iField=0; iField < OGR_FD_GetFieldCount(hFDefn); ++iField) { Rcpp::List list_fld_defn = Rcpp::List::create(); @@ -401,6 +403,7 @@ SEXP GDALVector::getNextFeature() { list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } else if (fld_type == OFTInteger64) { + // TODO: emit a warning? // R does not have native int64 so handled as double for now double value = static_cast( OGR_F_GetFieldAsInteger64(hFeature, i)); diff --git a/src/gdalvector.h b/src/gdalvector.h index cc0a603f0..03e4e1947 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -1,5 +1,5 @@ /* R interface to a subset of the GDAL C API for vector. - An OGRLayer class, a layer of features in a GDALDataset. + A class for OGRLayer, a layer of features in a GDALDataset. https://gdal.org/api/vector_c_api.html Chris Toney */ From c6632947dfc42eac654e72119b72d9f66cd0f28f Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Thu, 29 Feb 2024 19:11:11 -0700 Subject: [PATCH 19/53] gdalvector-draft moved to main --- vignettes/articles/gdalvector-draft.Rmd | 514 ------------------------ 1 file changed, 514 deletions(-) delete mode 100644 vignettes/articles/gdalvector-draft.Rmd diff --git a/vignettes/articles/gdalvector-draft.Rmd b/vignettes/articles/gdalvector-draft.Rmd deleted file mode 100644 index 2161110be..000000000 --- a/vignettes/articles/gdalvector-draft.Rmd +++ /dev/null @@ -1,514 +0,0 @@ ---- -title: "Draft Bindings to the GDAL/OGR Vector API" ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -Chris Toney -2024-02-28 - -## Summary - -This document describes R bindings to the GDAL/OGR Vector API proposed for inclusion in package **gdalraster**, analagous to its existing raster support. A package providing low-level access to both the raster and vector APIs in GDAL may be useful to developers creating higher level interfaces. For example, custom workflows that are I/O intensive may benefit from direct access to GDAL's I/O capabilities. R bindings for the vector API would support persistent connections to the data store, cursors with attribute and spatial filtering, transactions, feature-level insert/delete, update of attributes and geometries, and OGR facilities for geoprocessing. Calling signatures of a class-based interface will resemble the native C++ and Python APIs provided by the GDAL project. It is intended that bindings in **gdalraster** should provide long-term API stability while tracking changes GDAL. - -The proposed interface is described below in terms of the GDAL Vector Data Model, along with a draft class definition for implementation via `RCPP_EXPOSED_CLASS`. A prototype of the bindings is also provided in the `gdalvector` feature branch at https://github.com/USDAForestService/gdalraster/tree/gdalvector. The prototype bindings are currently undocumented. Code examples are also included below to demonstrate usage and proof-of-concept. - -## Description of the interface - -Bindings will be implemeted with **Rcpp** modules, and in particular `RCPP_EXPOSED_CLASS`. Exposing C++ classes directly in R provides a natural interface to the underlying object model. `RCPP_EXPOSED_CLASS` makes this easy and automatically handles the external pointer. It reduces the amount of wrapper code required compared with potential alternatives, so the code is simpler, faster to develop and easier to maintain. - -A GDAL dataset for vector is a file or database containing one or more OGR layers. A vector dataset is represented in R as a data source name (DSN), a character string that may be a filename, database connection string, URL, virtual file, etc. Management of datasets and their vector schemas will be done with: - -* existing management functions in **gdalraster** that operate on vector datasets: `addFilesInZip()` (supporting SOZip), `copyDatasetFiles()`, `deleteDataset()`, `renameDataset()` -* existing internal utility functions to be further developed: `.ogr_ds_exists()`, `.create_ogr()`, `.ogr_ds_layer_count()`, `.ogr_layer_exists()`, `.ogr_layer_create()`, `.ogr_layer_delete()` `.ogr_field_index()`, `.ogr_field_create()` -* wrappers for `vector_translate()` and `vector_info()` from the gdal_utils.h API (**gdalraster** 1.10) -* additional stand-alone functions TBD - -An `OGRLayer` object represents a layer of features within a data source. It will be modeled in R as class `GDALVector`, an exposed C++ class encapsulating an `OGRLayer` object and the `GDALDataset` that owns it. `GDALVector` will expose methods for retrieving layer information, attribute and spatial filtering, feature read/write, and layer geoprocessing. A draft definition for class `GDALVector` is given below. - -All features in an `OGRLayer` share a common schema (feature class) modeled in GDAL as `OGRFeatureDefn`. A feature class definition includes the set of attribute fields, their data types, geometry field(s), feature identifier (FID), and feature class name (normally used as a layer name). The feature class definition is represented in R as a list having as names the attribute/geometry field names, and each list element holding a field definition also as list. - -A definition for an attribute field is a list with elements: - -``` -$type : OGR Field Type (e.g., "OFTString", "OFTInteger", etc.) -$width : optional field width as integer -$precision : optional precision as integer -$is_nullable: optional logical scalar -$is_unique : optional logical scalar -$default : optional field default value -$is_ignored : optional logical scalar -$is_geom : FALSE for attribute fields -``` - -A definition for a geometry field is a list with elements: - -``` -$type : geom type ("Point", "Polygon", etc.) -$srs : spatial reference as WKT string -$is_nullable: optional logical scalar -$is_ignored : optional logical scalar -$is_geom : TRUE for geometry fields -``` - -An `OGRFeature`, as read by `GDALVector::getNextFeature()` or as passed to write methods, is a list of FID, attribute and geometry field names, and their values. `GDALVector::getFeatureSet()` will return a set of features as a data frame. (Note: FID is int64 type in GDAL. Handling of int64 TBD, currently as R double type). - -An `OGRGeometry` is represented in R as a character string containing OGC Well Known Text (WKT). Likewise, an `OGRSpatialReference`, which encapsulates the definition of a projection and datum, is represented in R as WKT. **gdalraster** has existing fuctions for working with spatial reference systems as WKT, and a set of geometry convenience functions also operating on WKT (GEOS wrappers via GDAL headers). - -## GDALVector class - -A draft class definition is given below. It has been partially implemented in: [ADD URL]. This does not inlude definitions of several stand-alone functions that will provide schema management. The existing definitions in src/ogr_util.h are a starting point for those. Also note that an **Rcpp** `.finalizer` function will be implemented to properly release resources in case an object of class `GDALVector` is garbage collected without an explicit call to `close()`. - -```cpp -class GDALVector { - - private: - std::string dsn_in; - std::string layer_in; // layer name or sql statement - bool is_sql_in; - Rcpp::CharacterVector open_options_in; - GDALDatasetH hDataset; - GDALAccess eAccess; - OGRLayerH hLayer; - OGRFeatureDefnH hFDefn; - - public: - GDALVector(); - GDALVector(Rcpp::CharacterVector dsn); - GDALVector(Rcpp::CharacterVector dsn, std::string layer); - GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only); - GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, - Rcpp::CharacterVector open_options); - - void open(bool read_only); - bool isOpen() const; - std::string getDsn() const; - Rcpp::CharacterVector getFileList() const; - std::string getDriverShortName() const; - std::string getDriverLongName() const; - - std::string getName() const; - bool testCapability(std::string capability) const; - std::string getFIDColumn() const; - std::string getGeomType() const; - std::string getGeometryColumn() const; - std::string getSpatialRef() const; - Rcpp::NumericVector bbox(); - Rcpp::List getLayerDefn() const; - - void setAttributeFilter(std::string query); - void setSpatialFilterRect(Rcpp::NumericVector bbox); - void clearSpatialFilter(); - - double getFeatureCount(); - SEXP getNextFeature(); - void resetReading(); - SEXP getFeature(GIntBig fid); // int64 handling TBD - Rcpp::DataFrame getFeatureSet(); - - void createFeature(Rcpp::List feat); - void setFeature(Rcpp::List feat); - void upsertFeature(Rcpp::List feat); - void deleteFeature(GIntBig fid); // int64 handling TBD - - bool startTransaction(bool force); - bool commitTransaction(); - bool rollbackTransaction(); - - void layerIntersection( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerUnion( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerSymDifference( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerIdentity( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerUpdate( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerClip( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerErase( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - - void close(); - - // methods for internal use not exported to R - void _checkAccess(GDALAccess access_needed) const; - OGRLayerH _getOGRLayerH(); -}; - -RCPP_EXPOSED_CLASS(GDALVector) -``` - -## Example: Usage - -``` r -library(gdalraster) -#> GDAL 3.8.3, released 2024/01/04, PROJ 9.3.1 - -# MTBS fires in Yellowstone National Park 1984-2022 -dsn <- system.file("extdata/ynp_fires_1984_2022.gpkg", package="gdalraster") -lyr <- new(GDALVector, dsn, "mtbs_perims", TRUE) - -# dataset info -lyr$getDriverShortName() -#> [1] "GPKG" -lyr$getDriverLongName() -#> [1] "GeoPackage" -lyr$getFileList() -#> [1] "/usr/local/lib/R/site-library/gdalraster/extdata/ynp_fires_1984_2022.gpkg" - -# layer info -lyr$getName() -#> [1] "mtbs_perims" -lyr$getGeomType() -#> [1] "Multi Polygon" -lyr$getGeometryColumn() -#> [1] "geom" -lyr$getFIDColumn() -#> [1] "fid" -lyr$getSpatialRef() -#> [1] "PROJCS[\"NAD83 / Montana\",GEOGCS[\"NAD83\",DATUM[\"North_American_Datum_1983\",SPHEROID[\"GRS 1980\",6378137,298.257222101,AUTHORITY[\"EPSG\",\"7019\"]],AUTHORITY[\"EPSG\",\"6269\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AUTHORITY[\"EPSG\",\"4269\"]],PROJECTION[\"Lambert_Conformal_Conic_2SP\"],PARAMETER[\"latitude_of_origin\",44.25],PARAMETER[\"central_meridian\",-109.5],PARAMETER[\"standard_parallel_1\",49],PARAMETER[\"standard_parallel_2\",45],PARAMETER[\"false_easting\",600000],PARAMETER[\"false_northing\",0],UNIT[\"metre\",1,AUTHORITY[\"EPSG\",\"9001\"]],AXIS[\"Easting\",EAST],AXIS[\"Northing\",NORTH],AUTHORITY[\"EPSG\",\"32100\"]]" -lyr$bbox() -#> [1] 469685.73 -12917.76 573531.72 96577.34 - -# get some information about layer capabilities -lyr$testCapability("RandomRead") -#> [1] TRUE - -# dataset was opened read-only: -lyr$testCapability("SequentialWrite") -#> [1] FALSE -lyr$testCapability("RandomWrite") -#> [1] FALSE - -lyr$testCapability("FastSpatialFilter") -#> [1] TRUE -lyr$testCapability("FastFeatureCount") -#> [1] TRUE -lyr$testCapability("FastGetExtent") -#> [1] TRUE -lyr$testCapability("Transactions") -#> [1] TRUE - -# re-open with write access -# lyr$open(read_only=FALSE) -# lyr$testCapability("SequentialWrite") -# lyr$testCapability("RandomWrite") - -# feature class definition -# a list of fields and their definitions -defn <- lyr$getLayerDefn() -names(defn) -#> [1] "event_id" "incid_name" "incid_type" "map_id" "burn_bnd_ac" -#> [6] "burn_bnd_lat" "burn_bnd_lon" "ig_date" "ig_year" "geom" - -# each list element holds a list containing a field definition -str(defn) -#> List of 10 -#> $ event_id :List of 8 -#> ..$ type : chr "OFTString" -#> ..$ width : int 254 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ incid_name :List of 8 -#> ..$ type : chr "OFTString" -#> ..$ width : int 254 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ incid_type :List of 8 -#> ..$ type : chr "OFTString" -#> ..$ width : int 254 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ map_id :List of 8 -#> ..$ type : chr "OFTInteger64" -#> ..$ width : int 0 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ burn_bnd_ac :List of 8 -#> ..$ type : chr "OFTInteger64" -#> ..$ width : int 0 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ burn_bnd_lat:List of 8 -#> ..$ type : chr "OFTString" -#> ..$ width : int 10 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ burn_bnd_lon:List of 8 -#> ..$ type : chr "OFTString" -#> ..$ width : int 10 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ ig_date :List of 8 -#> ..$ type : chr "default (read as OFTString)" -#> ..$ width : int 0 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ ig_year :List of 8 -#> ..$ type : chr "OFTInteger" -#> ..$ width : int 0 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ geom :List of 5 -#> ..$ type : chr "Multi Polygon" -#> ..$ srs : chr "PROJCS[\"NAD83 / Montana\",GEOGCS[\"NAD83\",DATUM[\"North_American_Datum_1983\",SPHEROID[\"GRS 1980\",6378137,2"| __truncated__ -#> ..$ is_nullable: logi TRUE -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi TRUE - -lyr$getFeatureCount() -#> [1] 61 - -# cursor -feat <- lyr$getNextFeature() -# a list of fields and their values -str(feat) -#> List of 11 -#> $ FID : num 1 -#> $ event_id : chr "WY4413411069519870807" -#> $ incid_name : chr "POLECAT" -#> $ incid_type : chr "Wildfire" -#> $ map_id : num 1e+07 -#> $ burn_bnd_ac : num 1093 -#> $ burn_bnd_lat: chr "44.132" -#> $ burn_bnd_lon: chr "-110.696" -#> $ ig_date : chr "1987/08/07" -#> $ ig_year : int 1987 -#> $ geom : chr "MULTIPOLYGON (((503099.439579653 -12893.9672899192,503169.756694236 -12756.3721247327,502689.845907435 -12131.5"| __truncated__ - -# attribute filter -lyr$setAttributeFilter("ig_year = 2020") -lyr$getFeatureCount() -#> [1] 1 - -feat <- lyr$getNextFeature() -str(feat) -#> List of 11 -#> $ FID : num 61 -#> $ event_id : chr "WY4438911082120200822" -#> $ incid_name : chr "LONE STAR" -#> $ incid_type : chr "Wildfire" -#> $ map_id : num 1e+07 -#> $ burn_bnd_ac : num 3348 -#> $ burn_bnd_lat: chr "44.4" -#> $ burn_bnd_lon: chr "-110.782" -#> $ ig_date : chr "2020/08/22" -#> $ ig_year : int 2020 -#> $ geom : chr "MULTIPOLYGON (((496593.122306971 15506.8828590633,496491.761299067 15605.3612548792,496290.812130161 15388.0465"| __truncated__ - -feat <- lyr$getNextFeature() -str(feat) -#> NULL - -lyr$resetReading() -feat <- lyr$getNextFeature() -str(feat) -#> List of 11 -#> $ FID : num 61 -#> $ event_id : chr "WY4438911082120200822" -#> $ incid_name : chr "LONE STAR" -#> $ incid_type : chr "Wildfire" -#> $ map_id : num 1e+07 -#> $ burn_bnd_ac : num 3348 -#> $ burn_bnd_lat: chr "44.4" -#> $ burn_bnd_lon: chr "-110.782" -#> $ ig_date : chr "2020/08/22" -#> $ ig_year : int 2020 -#> $ geom : chr "MULTIPOLYGON (((496593.122306971 15506.8828590633,496491.761299067 15605.3612548792,496290.812130161 15388.0465"| __truncated__ - -# clear attribute filter -lyr$setAttributeFilter("") -lyr$getFeatureCount() -#> [1] 61 - -# spatial filter -# find the largest 1988 fire -lyr$setAttributeFilter("ig_year = 1988 ORDER BY burn_bnd_ac DESC") -feat <- lyr$getNextFeature() -str(feat) -#> List of 11 -#> $ FID : num 7 -#> $ event_id : chr "WY4470811082119880722" -#> $ incid_name : chr "NORTH FORK" -#> $ incid_type : chr "Wildfire" -#> $ map_id : num 1e+07 -#> $ burn_bnd_ac : num 563527 -#> $ burn_bnd_lat: chr "44.678" -#> $ burn_bnd_lon: chr "-110.716" -#> $ ig_date : chr "1988/07/22" -#> $ ig_year : int 1988 -#> $ geom : chr "MULTIPOLYGON (((469685.969312071 29526.2354109807,469918.933844832 29654.3220754602,470030.299119989 29518.7441"| __truncated__ - -bbox <- bbox_from_wkt(feat$geom) -bbox -#> [1] 469685.97 11442.45 544069.63 85508.15 - -lyr$setAttributeFilter("") -lyr$getFeatureCount() -#> [1] 61 - -lyr$setSpatialFilterRect(bbox) -lyr$getFeatureCount() -#> [1] 40 - -lyr$clearSpatialFilter() - -## layer intersection - -# largest 1988 fire (FID from above) -lyr$setAttributeFilter("FID = 7") -lyr$getFeatureCount() -#> [1] 1 -feat <- lyr$getNextFeature() -str(feat) -#> List of 11 -#> $ FID : num 7 -#> $ event_id : chr "WY4470811082119880722" -#> $ incid_name : chr "NORTH FORK" -#> $ incid_type : chr "Wildfire" -#> $ map_id : num 1e+07 -#> $ burn_bnd_ac : num 563527 -#> $ burn_bnd_lat: chr "44.678" -#> $ burn_bnd_lon: chr "-110.716" -#> $ ig_date : chr "1988/07/22" -#> $ ig_year : int 1988 -#> $ geom : chr "MULTIPOLYGON (((469685.969312071 29526.2354109807,469918.933844832 29654.3220754602,470030.299119989 29518.7441"| __truncated__ - -# 2000-2022 fires -sql <- "SELECT FID, * FROM mtbs_perims WHERE ig_year >= 2000 ORDER BY mtbs_perims.ig_year" -lyr2 <- new(GDALVector, dsn, sql) -lyr2$getFeatureCount() -#> [1] 40 - -# create the output layer in a temporary in-memory dataset -dsn_out <- "/vsimem/lyr_proc_tmp.gpkg" -srs <- lyr$getSpatialRef() -# this uses existing internal OGR utilities -gdalraster:::.create_ogr("GPKG", dsn_out, 0, 0, 0, "Unknown", "int_result", srs) -#> [1] TRUE -gdalraster:::.ogr_ds_exists(dsn_out, with_update=TRUE) -#> [1] TRUE -gdalraster:::.ogr_layer_exists(dsn_out, "int_result") -#> [1] TRUE - -lyr_out <- new(GDALVector, dsn_out, "int_result", read_only=FALSE) -lyr_out$getFeatureCount() -#> [1] 0 - -# intersection of lyr and lyr2, with result in lyr_out -lyr$layerIntersection(lyr2, lyr_out, quiet=FALSE, options=NULL) -#> 0...10...20...30...40...50...60...70...80...90...100 - done. - -lyr_out$getFeatureCount() -#> [1] 5 -defn <- lyr_out$getLayerDefn() -# combined attributes -names(defn) -#> [1] "input_event_id" "input_incid_name" "input_incid_type" -#> [4] "input_map_id" "input_burn_bnd_ac" "input_burn_bnd_lat" -#> [7] "input_burn_bnd_lon" "input_ig_date" "input_ig_year" -#> [10] "method_event_id" "method_incid_name" "method_incid_type" -#> [13] "method_map_id" "method_burn_bnd_ac" "method_burn_bnd_lat" -#> [16] "method_burn_bnd_lon" "method_ig_date" "method_ig_year" -#> [19] "geom" - -# we don't have vector plotting yet, so use plot_raster() -# rasterize and display the output layer -out_file <- "/vsimem/int_result.tif" -rasterize(src_dsn = dsn_out, - dstfile = out_file, - layer = "int_result", - burn_attr = "method_ig_year", - tr = c(90,90), - tap = TRUE, - dtName = "Int16", - dstnodata = -9999, - init = -9999) -#> 0...10...20...30...40...50...60...70...80...90...100 - done. - -ds <- new(GDALRaster, out_file) -pal <- scales::viridis_pal(end = 0.8, direction = -1)(6) -ramp <- scales::colour_ramp(pal) -plot_raster(ds, legend = TRUE, col_map_fn = ramp, na_col = "#d9d9d9", - main="2000-2022 re-burns on the 1988 North Fork perimeter") -``` - -![](https://i.imgur.com/O1CdGd1.png) - -``` r - -ds$close() -lyr$close() -lyr2$close() -lyr_out$close() - -vsi_unlink(dsn_out) -vsi_unlink(out_file) -``` - -Created on 2024-02-25 with [reprex v2.1.0](https://reprex.tidyverse.org) - From eb9b8b3cd8176a81b512973c61e2605b181a7b11 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sat, 16 Mar 2024 16:09:51 -0600 Subject: [PATCH 20/53] reformat code style --- src/ogr_util.cpp | 560 +++++++++++++++++++++++------------------------ 1 file changed, 280 insertions(+), 280 deletions(-) diff --git a/src/ogr_util.cpp b/src/ogr_util.cpp index dc24b1ade..67716806e 100644 --- a/src/ogr_util.cpp +++ b/src/ogr_util.cpp @@ -10,25 +10,25 @@ #include "ogr_srs_api.h" //' Does vector dataset exist -//' +//' //' @noRd // [[Rcpp::export(name = ".ogr_ds_exists")]] bool _ogr_ds_exists(std::string dsn, bool with_update = false) { - GDALDatasetH hDS; - - CPLPushErrorHandler(CPLQuietErrorHandler); - if (with_update) - hDS = GDALOpenEx(dsn.c_str(), GDAL_OF_VECTOR | GDAL_OF_UPDATE, - NULL, NULL, NULL); - else - hDS = GDALOpenEx(dsn.c_str(), GDAL_OF_VECTOR, NULL, NULL, NULL); - CPLPopErrorHandler(); - if (hDS == NULL) - return false; - - GDALClose(hDS); - return true; + GDALDatasetH hDS; + + CPLPushErrorHandler(CPLQuietErrorHandler); + if (with_update) + hDS = GDALOpenEx(dsn.c_str(), GDAL_OF_VECTOR | GDAL_OF_UPDATE, + NULL, NULL, NULL); + else + hDS = GDALOpenEx(dsn.c_str(), GDAL_OF_VECTOR, NULL, NULL, NULL); + CPLPopErrorHandler(); + if (hDS == NULL) + return false; + + GDALClose(hDS); + return true; } //' Create a vector dataset with layer and field @@ -37,139 +37,139 @@ bool _ogr_ds_exists(std::string dsn, bool with_update = false) { //' @noRd // [[Rcpp::export(name = ".create_ogr")]] bool _create_ogr(std::string format, std::string dst_filename, - int xsize, int ysize, int nbands, std::string dataType, - std::string layer, std::string srs = "", std::string fld_name = "", - Rcpp::Nullable dsco = R_NilValue, - Rcpp::Nullable lco = R_NilValue) { - - GDALDriverH hDriver = GDALGetDriverByName( format.c_str() ); - if (hDriver == NULL) - Rcpp::stop("Failed to get driver for the specified format."); - - char **papszMetadata = GDALGetMetadata(hDriver, NULL); - if (!CPLFetchBool(papszMetadata, GDAL_DCAP_CREATE, FALSE)) - Rcpp::stop("Driver does not support create."); - - GDALDataType dt = GDALGetDataTypeByName( dataType.c_str() ); - - std::vector opt_list = {NULL}; - if (dsco.isNotNull()) { - Rcpp::CharacterVector dsco_in(dsco); - opt_list.resize(dsco_in.size() + 1); - for (R_xlen_t i = 0; i < dsco_in.size(); ++i) { - opt_list[i] = (char *) (dsco_in[i]); - } - opt_list[dsco_in.size()] = NULL; - } - - GDALDatasetH hDstDS = NULL; - hDstDS = GDALCreate(hDriver, dst_filename.c_str(), - xsize, ysize, nbands, dt, - opt_list.data()); - - if (hDstDS == NULL) - Rcpp::stop("Create dataset failed."); - - if (!GDALDatasetTestCapability(hDstDS, ODsCCreateLayer)) { - GDALClose(hDstDS); - return false; - } - - OGRLayerH hLayer; - OGRFieldDefnH hFieldDefn; - bool layer_ok = false; - bool fld_ok = false; - - opt_list.clear(); - if (lco.isNotNull()) { - Rcpp::CharacterVector lco_in(lco); - opt_list.resize(lco_in.size() + 1); - for (R_xlen_t i = 0; i < lco_in.size(); ++i) { - opt_list[i] = (char *) (lco_in[i]); - } - } - opt_list.push_back(NULL); - - OGRSpatialReferenceH hSRS = OSRNewSpatialReference(NULL); - if (srs != "") { - if (OSRSetFromUserInput(hSRS, srs.c_str()) != OGRERR_NONE) { - GDALClose(hDstDS); - Rcpp::stop("Error importing SRS from user input."); - } - } - - hLayer = GDALDatasetCreateLayer(hDstDS, layer.c_str(), hSRS, wkbPolygon, - opt_list.data()); - - if (hLayer == NULL) { - layer_ok = false; - } - else { - layer_ok = true; - if (fld_name != "") { - hFieldDefn = OGR_Fld_Create(fld_name.c_str(), OFTInteger); - if (OGR_L_CreateField(hLayer, hFieldDefn, TRUE) != OGRERR_NONE) - fld_ok = false; - else - fld_ok = true; - OGR_Fld_Destroy(hFieldDefn); - } - else { - fld_ok = true; - } - } - - OSRDestroySpatialReference(hSRS); - GDALClose(hDstDS); - - if (layer_ok && fld_ok) - return true; - else - return false; + int xsize, int ysize, int nbands, std::string dataType, + std::string layer, std::string srs = "", std::string fld_name = "", + Rcpp::Nullable dsco = R_NilValue, + Rcpp::Nullable lco = R_NilValue) { + + GDALDriverH hDriver = GDALGetDriverByName( format.c_str() ); + if (hDriver == NULL) + Rcpp::stop("Failed to get driver for the specified format."); + + char **papszMetadata = GDALGetMetadata(hDriver, NULL); + if (!CPLFetchBool(papszMetadata, GDAL_DCAP_CREATE, FALSE)) + Rcpp::stop("Driver does not support create."); + + GDALDataType dt = GDALGetDataTypeByName( dataType.c_str() ); + + std::vector opt_list = {NULL}; + if (dsco.isNotNull()) { + Rcpp::CharacterVector dsco_in(dsco); + opt_list.resize(dsco_in.size() + 1); + for (R_xlen_t i = 0; i < dsco_in.size(); ++i) { + opt_list[i] = (char *) (dsco_in[i]); + } + opt_list[dsco_in.size()] = NULL; + } + + GDALDatasetH hDstDS = NULL; + hDstDS = GDALCreate(hDriver, dst_filename.c_str(), + xsize, ysize, nbands, dt, + opt_list.data()); + + if (hDstDS == NULL) + Rcpp::stop("Create dataset failed."); + + if (!GDALDatasetTestCapability(hDstDS, ODsCCreateLayer)) { + GDALClose(hDstDS); + return false; + } + + OGRLayerH hLayer; + OGRFieldDefnH hFieldDefn; + bool layer_ok = false; + bool fld_ok = false; + + opt_list.clear(); + if (lco.isNotNull()) { + Rcpp::CharacterVector lco_in(lco); + opt_list.resize(lco_in.size() + 1); + for (R_xlen_t i = 0; i < lco_in.size(); ++i) { + opt_list[i] = (char *) (lco_in[i]); + } + } + opt_list.push_back(NULL); + + OGRSpatialReferenceH hSRS = OSRNewSpatialReference(NULL); + if (srs != "") { + if (OSRSetFromUserInput(hSRS, srs.c_str()) != OGRERR_NONE) { + GDALClose(hDstDS); + Rcpp::stop("Error importing SRS from user input."); + } + } + + hLayer = GDALDatasetCreateLayer(hDstDS, layer.c_str(), hSRS, wkbPolygon, + opt_list.data()); + + if (hLayer == NULL) { + layer_ok = false; + } + else { + layer_ok = true; + if (fld_name != "") { + hFieldDefn = OGR_Fld_Create(fld_name.c_str(), OFTInteger); + if (OGR_L_CreateField(hLayer, hFieldDefn, TRUE) != OGRERR_NONE) + fld_ok = false; + else + fld_ok = true; + OGR_Fld_Destroy(hFieldDefn); + } + else { + fld_ok = true; + } + } + + OSRDestroySpatialReference(hSRS); + GDALClose(hDstDS); + + if (layer_ok && fld_ok) + return true; + else + return false; } //' Get number of layers in a dataset -//' +//' //' @noRd // [[Rcpp::export(name = ".ogr_ds_layer_count")]] int _ogr_ds_layer_count(std::string dsn) { - GDALDatasetH hDS; - - CPLPushErrorHandler(CPLQuietErrorHandler); - hDS = GDALOpenEx(dsn.c_str(), GDAL_OF_VECTOR, NULL, NULL, NULL); - if (hDS == NULL) - return -1; - CPLPopErrorHandler(); - - int cnt = GDALDatasetGetLayerCount(hDS); - GDALClose(hDS); - return cnt; + GDALDatasetH hDS; + + CPLPushErrorHandler(CPLQuietErrorHandler); + hDS = GDALOpenEx(dsn.c_str(), GDAL_OF_VECTOR, NULL, NULL, NULL); + if (hDS == NULL) + return -1; + CPLPopErrorHandler(); + + int cnt = GDALDatasetGetLayerCount(hDS); + GDALClose(hDS); + return cnt; } //' Does layer exist -//' +//' //' @noRd // [[Rcpp::export(name = ".ogr_layer_exists")]] bool _ogr_layer_exists(std::string dsn, std::string layer) { - GDALDatasetH hDS; - OGRLayerH hLayer; - bool ret; - - CPLPushErrorHandler(CPLQuietErrorHandler); - hDS = GDALOpenEx(dsn.c_str(), GDAL_OF_VECTOR, NULL, NULL, NULL); - if (hDS == NULL) - return false; - hLayer = GDALDatasetGetLayerByName(hDS, layer.c_str()); - CPLPopErrorHandler(); - if (hLayer == NULL) - ret = false; - else - ret = true; - - GDALClose(hDS); - return ret; + GDALDatasetH hDS; + OGRLayerH hLayer; + bool ret; + + CPLPushErrorHandler(CPLQuietErrorHandler); + hDS = GDALOpenEx(dsn.c_str(), GDAL_OF_VECTOR, NULL, NULL, NULL); + if (hDS == NULL) + return false; + hLayer = GDALDatasetGetLayerByName(hDS, layer.c_str()); + CPLPopErrorHandler(); + if (hLayer == NULL) + ret = false; + else + ret = true; + + GDALClose(hDS); + return ret; } //' Create a layer in a vector dataset @@ -178,51 +178,51 @@ bool _ogr_layer_exists(std::string dsn, std::string layer) { //' @noRd // [[Rcpp::export(name = ".ogr_layer_create")]] bool _ogr_layer_create(std::string dsn, std::string layer, - std::string srs = "", - Rcpp::Nullable options = R_NilValue) { - - GDALDatasetH hDS; - OGRLayerH hLayer; - bool ret; - - OGRSpatialReferenceH hSRS = OSRNewSpatialReference(NULL); - if (srs != "") { - if (OSRSetFromUserInput(hSRS, srs.c_str()) != OGRERR_NONE) - Rcpp::stop("Error importing SRS from user input."); - } - - hDS = GDALOpenEx(dsn.c_str(), GDAL_OF_VECTOR | GDAL_OF_UPDATE, - NULL, NULL, NULL); - - if (hDS == NULL) - return false; - - if (!GDALDatasetTestCapability(hDS, ODsCCreateLayer)) { - GDALClose(hDS); - return false; - } - - std::vector opt_list = {NULL}; - if (options.isNotNull()) { - Rcpp::CharacterVector options_in(options); - opt_list.resize(options_in.size() + 1); - for (R_xlen_t i = 0; i < options_in.size(); ++i) { - opt_list[i] = (char *) (options_in[i]); - } - opt_list[options_in.size()] = NULL; - } - - hLayer = GDALDatasetCreateLayer(hDS, layer.c_str(), hSRS, wkbPolygon, - opt_list.data()); - - if (hLayer == NULL) - ret = false; - else - ret = true; - - OSRDestroySpatialReference(hSRS); - GDALClose(hDS); - return ret; + std::string srs = "", + Rcpp::Nullable options = R_NilValue) { + + GDALDatasetH hDS; + OGRLayerH hLayer; + bool ret; + + OGRSpatialReferenceH hSRS = OSRNewSpatialReference(NULL); + if (srs != "") { + if (OSRSetFromUserInput(hSRS, srs.c_str()) != OGRERR_NONE) + Rcpp::stop("Error importing SRS from user input."); + } + + hDS = GDALOpenEx(dsn.c_str(), GDAL_OF_VECTOR | GDAL_OF_UPDATE, + NULL, NULL, NULL); + + if (hDS == NULL) + return false; + + if (!GDALDatasetTestCapability(hDS, ODsCCreateLayer)) { + GDALClose(hDS); + return false; + } + + std::vector opt_list = {NULL}; + if (options.isNotNull()) { + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = NULL; + } + + hLayer = GDALDatasetCreateLayer(hDS, layer.c_str(), hSRS, wkbPolygon, + opt_list.data()); + + if (hLayer == NULL) + ret = false; + else + ret = true; + + OSRDestroySpatialReference(hSRS); + GDALClose(hDS); + return ret; } //' Delete a layer in a vector dataset @@ -231,72 +231,72 @@ bool _ogr_layer_create(std::string dsn, std::string layer, // [[Rcpp::export(name = ".ogr_layer_delete")]] bool _ogr_layer_delete(std::string dsn, std::string layer) { - GDALDatasetH hDS; - OGRLayerH hLayer; - int layer_cnt, layer_idx; - bool ret; - - hDS = GDALOpenEx(dsn.c_str(), GDAL_OF_VECTOR | GDAL_OF_UPDATE, - NULL, NULL, NULL); - - if (hDS == NULL) - return false; - - if (!GDALDatasetTestCapability(hDS, ODsCDeleteLayer)) { - GDALClose(hDS); - return false; - } - - hLayer = GDALDatasetGetLayerByName(hDS, layer.c_str()); - if (hLayer == NULL) { - GDALClose(hDS); - return false; - } - - layer_cnt = GDALDatasetGetLayerCount(hDS); - for (layer_idx=0; layer_idx < layer_cnt; ++layer_idx) { - hLayer = GDALDatasetGetLayer(hDS, layer_idx); - if (EQUAL(OGR_L_GetName(hLayer), layer.c_str())) - break; - } - - if (GDALDatasetDeleteLayer(hDS, layer_idx) != OGRERR_NONE) - ret = false; - else - ret = true; - - GDALClose(hDS); - return ret; + GDALDatasetH hDS; + OGRLayerH hLayer; + int layer_cnt, layer_idx; + bool ret; + + hDS = GDALOpenEx(dsn.c_str(), GDAL_OF_VECTOR | GDAL_OF_UPDATE, + NULL, NULL, NULL); + + if (hDS == NULL) + return false; + + if (!GDALDatasetTestCapability(hDS, ODsCDeleteLayer)) { + GDALClose(hDS); + return false; + } + + hLayer = GDALDatasetGetLayerByName(hDS, layer.c_str()); + if (hLayer == NULL) { + GDALClose(hDS); + return false; + } + + layer_cnt = GDALDatasetGetLayerCount(hDS); + for (layer_idx=0; layer_idx < layer_cnt; ++layer_idx) { + hLayer = GDALDatasetGetLayer(hDS, layer_idx); + if (EQUAL(OGR_L_GetName(hLayer), layer.c_str())) + break; + } + + if (GDALDatasetDeleteLayer(hDS, layer_idx) != OGRERR_NONE) + ret = false; + else + ret = true; + + GDALClose(hDS); + return ret; } //' Get field index or -1 if fld_name not found -//' +//' //' @noRd // [[Rcpp::export(name = ".ogr_field_index")]] int _ogr_field_index(std::string dsn, std::string layer, - std::string fld_name) { - - GDALDatasetH hDS; - OGRLayerH hLayer; - OGRFeatureDefnH hFDefn; - int iField; - - CPLPushErrorHandler(CPLQuietErrorHandler); - hDS = GDALOpenEx(dsn.c_str(), GDAL_OF_VECTOR, NULL, NULL, NULL); - if (hDS == NULL) - return -1; - hLayer = GDALDatasetGetLayerByName(hDS, layer.c_str()); - CPLPopErrorHandler(); - - if (hLayer == NULL) { - GDALClose(hDS); - return -1; - } - - hFDefn = OGR_L_GetLayerDefn(hLayer); - iField = OGR_FD_GetFieldIndex(hFDefn, fld_name.c_str()); - GDALClose(hDS); - return iField; + std::string fld_name) { + + GDALDatasetH hDS; + OGRLayerH hLayer; + OGRFeatureDefnH hFDefn; + int iField; + + CPLPushErrorHandler(CPLQuietErrorHandler); + hDS = GDALOpenEx(dsn.c_str(), GDAL_OF_VECTOR, NULL, NULL, NULL); + if (hDS == NULL) + return -1; + hLayer = GDALDatasetGetLayerByName(hDS, layer.c_str()); + CPLPopErrorHandler(); + + if (hLayer == NULL) { + GDALClose(hDS); + return -1; + } + + hFDefn = OGR_L_GetLayerDefn(hLayer); + iField = OGR_FD_GetFieldIndex(hFDefn, fld_name.c_str()); + GDALClose(hDS); + return iField; } //' Create a new field on layer @@ -305,46 +305,46 @@ int _ogr_field_index(std::string dsn, std::string layer, //' @noRd // [[Rcpp::export(name = ".ogr_field_create")]] bool _ogr_field_create(std::string dsn, std::string layer, - std::string fld_name) { - - GDALDatasetH hDS; - OGRLayerH hLayer; - OGRFeatureDefnH hFDefn; - int iField; - OGRFieldDefnH hFieldDefn; - bool ret; - - CPLPushErrorHandler(CPLQuietErrorHandler); - hDS = GDALOpenEx(dsn.c_str(), GDAL_OF_VECTOR | GDAL_OF_UPDATE, - NULL, NULL, NULL); - - if (hDS == NULL) - return false; - - hLayer = GDALDatasetGetLayerByName(hDS, layer.c_str()); - CPLPopErrorHandler(); - - if (hLayer == NULL) { - GDALClose(hDS); - return false; - } - - hFDefn = OGR_L_GetLayerDefn(hLayer); - iField = OGR_FD_GetFieldIndex(hFDefn, fld_name.c_str()); - if (iField >= 0) { - // fld_name already exists - GDALClose(hDS); - return false; - } - - hFieldDefn = OGR_Fld_Create(fld_name.c_str(), OFTInteger); - if (OGR_L_CreateField(hLayer, hFieldDefn, TRUE) != OGRERR_NONE) - ret = false; - else - ret = true; - - OGR_Fld_Destroy(hFieldDefn); - GDALClose(hDS); - return ret; + std::string fld_name) { + + GDALDatasetH hDS; + OGRLayerH hLayer; + OGRFeatureDefnH hFDefn; + int iField; + OGRFieldDefnH hFieldDefn; + bool ret; + + CPLPushErrorHandler(CPLQuietErrorHandler); + hDS = GDALOpenEx(dsn.c_str(), GDAL_OF_VECTOR | GDAL_OF_UPDATE, + NULL, NULL, NULL); + + if (hDS == NULL) + return false; + + hLayer = GDALDatasetGetLayerByName(hDS, layer.c_str()); + CPLPopErrorHandler(); + + if (hLayer == NULL) { + GDALClose(hDS); + return false; + } + + hFDefn = OGR_L_GetLayerDefn(hLayer); + iField = OGR_FD_GetFieldIndex(hFDefn, fld_name.c_str()); + if (iField >= 0) { + // fld_name already exists + GDALClose(hDS); + return false; + } + + hFieldDefn = OGR_Fld_Create(fld_name.c_str(), OFTInteger); + if (OGR_L_CreateField(hLayer, hFieldDefn, TRUE) != OGRERR_NONE) + ret = false; + else + ret = true; + + OGR_Fld_Destroy(hFieldDefn); + GDALClose(hDS); + return ret; } From 5146434168160d7ca1e7b73b5a4bfa1525e29ba6 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sat, 16 Mar 2024 16:34:57 -0600 Subject: [PATCH 21/53] reformat code style --- src/gdalvector.cpp | 1180 ++++++++++++++++++++++---------------------- src/gdalvector.h | 167 ++++--- 2 files changed, 673 insertions(+), 674 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 0320c3951..a30991a80 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -12,655 +12,656 @@ #include "gdalvector.h" -GDALVector::GDALVector() : - dsn_in(""), - layer_in(""), - is_sql_in(false), - open_options_in(Rcpp::CharacterVector::create()), - hDataset(nullptr), - eAccess(GA_ReadOnly), - hLayer(nullptr) {} - -GDALVector::GDALVector(Rcpp::CharacterVector dsn) : - GDALVector( - dsn, - "", - true, - Rcpp::CharacterVector::create()) {} - -GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer) : - GDALVector( - dsn, - layer, - true, - Rcpp::CharacterVector::create()) {} +GDALVector::GDALVector() : + dsn_in(""), + layer_in(""), + is_sql_in(false), + open_options_in(Rcpp::CharacterVector::create()), + hDataset(nullptr), + eAccess(GA_ReadOnly), + hLayer(nullptr) {} + +GDALVector::GDALVector(Rcpp::CharacterVector dsn) : + GDALVector( + dsn, + "", + true, + Rcpp::CharacterVector::create()) {} + +GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer) : + GDALVector( + dsn, + layer, + true, + Rcpp::CharacterVector::create()) {} GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, - bool read_only) : - GDALVector( - dsn, - layer, - read_only, - Rcpp::CharacterVector::create()) {} + bool read_only) : + + GDALVector( + dsn, + layer, + read_only, + Rcpp::CharacterVector::create()) {} GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, - bool read_only, Rcpp::CharacterVector open_options) : - layer_in(layer), - open_options_in(open_options), - hDataset(nullptr), - eAccess(GA_ReadOnly), - hLayer(nullptr) { + bool read_only, Rcpp::CharacterVector open_options) : + + layer_in(layer), + open_options_in(open_options), + hDataset(nullptr), + eAccess(GA_ReadOnly), + hLayer(nullptr) { - dsn_in = Rcpp::as(_check_gdal_filename(dsn)); - open(read_only); + dsn_in = Rcpp::as(_check_gdal_filename(dsn)); + open(read_only); } void GDALVector::open(bool read_only) { - if (dsn_in == "") - Rcpp::stop("DSN is not set."); - - if (hDataset != nullptr) { - if (is_sql_in) - GDALDatasetReleaseResultSet(hDataset, hLayer); - GDALReleaseDataset(hDataset); - hDataset = nullptr; - hLayer = nullptr; - } - - if (read_only) - eAccess = GA_ReadOnly; - else - eAccess = GA_Update; - - std::vector dsoo(open_options_in.size() + 1); - if (open_options_in.size() > 0) { - for (R_xlen_t i = 0; i < open_options_in.size(); ++i) { - dsoo[i] = (char *) (open_options_in[i]); - } - } - dsoo.push_back(nullptr); - - unsigned int nOpenFlags = GDAL_OF_VECTOR; - if (read_only) - nOpenFlags |= GDAL_OF_READONLY; - else - nOpenFlags |= GDAL_OF_UPDATE; - - hDataset = GDALOpenEx(dsn_in.c_str(), nOpenFlags, - nullptr, dsoo.data(), nullptr); - if (hDataset == nullptr) - Rcpp::stop("Open dataset failed."); - - if (layer_in == "") { - is_sql_in = false; - hLayer = GDALDatasetGetLayer(hDataset, 0); - } - else if (STARTS_WITH_CI(layer_in.c_str(), "SELECT ")) { - is_sql_in = true; - hLayer = GDALDatasetExecuteSQL(hDataset, layer_in.c_str(), - nullptr, nullptr); - } - else { - is_sql_in = false; - hLayer = GDALDatasetGetLayerByName(hDataset, layer_in.c_str()); - } - - if (hLayer == nullptr) { - GDALReleaseDataset(hDataset); - Rcpp::stop("Get layer failed."); - } - else { - OGR_L_ResetReading(hLayer); - } - - hFDefn = OGR_L_GetLayerDefn(hLayer); - if (hFDefn == nullptr) { - if (is_sql_in) - GDALDatasetReleaseResultSet(hDataset, hLayer); - GDALReleaseDataset(hDataset); - Rcpp::stop("Get layer definition failed."); - } + if (dsn_in == "") + Rcpp::stop("DSN is not set."); + + if (hDataset != nullptr) { + if (is_sql_in) + GDALDatasetReleaseResultSet(hDataset, hLayer); + GDALReleaseDataset(hDataset); + hDataset = nullptr; + hLayer = nullptr; + } + + if (read_only) + eAccess = GA_ReadOnly; + else + eAccess = GA_Update; + + std::vector dsoo(open_options_in.size() + 1); + if (open_options_in.size() > 0) { + for (R_xlen_t i = 0; i < open_options_in.size(); ++i) { + dsoo[i] = (char *) (open_options_in[i]); + } + } + dsoo.push_back(nullptr); + + unsigned int nOpenFlags = GDAL_OF_VECTOR; + if (read_only) + nOpenFlags |= GDAL_OF_READONLY; + else + nOpenFlags |= GDAL_OF_UPDATE; + + hDataset = GDALOpenEx(dsn_in.c_str(), nOpenFlags, nullptr, + dsoo.data(), nullptr); + if (hDataset == nullptr) + Rcpp::stop("Open dataset failed."); + + if (layer_in == "") { + is_sql_in = false; + hLayer = GDALDatasetGetLayer(hDataset, 0); + } + else if (STARTS_WITH_CI(layer_in.c_str(), "SELECT ")) { + is_sql_in = true; + hLayer = GDALDatasetExecuteSQL(hDataset, layer_in.c_str(), + nullptr, nullptr); + } + else { + is_sql_in = false; + hLayer = GDALDatasetGetLayerByName(hDataset, layer_in.c_str()); + } + + if (hLayer == nullptr) { + GDALReleaseDataset(hDataset); + Rcpp::stop("Get layer failed."); + } + else { + OGR_L_ResetReading(hLayer); + } + + hFDefn = OGR_L_GetLayerDefn(hLayer); + if (hFDefn == nullptr) { + if (is_sql_in) + GDALDatasetReleaseResultSet(hDataset, hLayer); + GDALReleaseDataset(hDataset); + Rcpp::stop("Get layer definition failed."); + } } bool GDALVector::isOpen() const { - if (hDataset == nullptr) - return false; - else - return true; + if (hDataset == nullptr) + return false; + else + return true; } std::string GDALVector::getDsn() const { - return dsn_in; + return dsn_in; } Rcpp::CharacterVector GDALVector::getFileList() const { - _checkAccess(GA_ReadOnly); - - char **papszFiles; - papszFiles = GDALGetFileList(hDataset); - - int items = CSLCount(papszFiles); - if (items > 0) { - Rcpp::CharacterVector files(items); - for (int i=0; i < items; ++i) { - files(i) = papszFiles[i]; - } - CSLDestroy(papszFiles); - return files; - } - else { - CSLDestroy(papszFiles); - return ""; - } + _checkAccess(GA_ReadOnly); + + char **papszFiles; + papszFiles = GDALGetFileList(hDataset); + + int items = CSLCount(papszFiles); + if (items > 0) { + Rcpp::CharacterVector files(items); + for (int i=0; i < items; ++i) { + files(i) = papszFiles[i]; + } + CSLDestroy(papszFiles); + return files; + } + else { + CSLDestroy(papszFiles); + return ""; + } } std::string GDALVector::getDriverShortName() const { - _checkAccess(GA_ReadOnly); - - GDALDriverH hDriver = GDALGetDatasetDriver(hDataset); - return GDALGetDriverShortName(hDriver); + _checkAccess(GA_ReadOnly); + + GDALDriverH hDriver = GDALGetDatasetDriver(hDataset); + return GDALGetDriverShortName(hDriver); } std::string GDALVector::getDriverLongName() const { - _checkAccess(GA_ReadOnly); - - GDALDriverH hDriver = GDALGetDatasetDriver(hDataset); - return GDALGetDriverLongName(hDriver); + _checkAccess(GA_ReadOnly); + + GDALDriverH hDriver = GDALGetDatasetDriver(hDataset); + return GDALGetDriverLongName(hDriver); } std::string GDALVector::getName() const { - _checkAccess(GA_ReadOnly); + _checkAccess(GA_ReadOnly); - return OGR_L_GetName(hLayer); + return OGR_L_GetName(hLayer); } bool GDALVector::testCapability(std::string capability) const { - _checkAccess(GA_ReadOnly); - - return OGR_L_TestCapability(hLayer, capability.c_str()); + _checkAccess(GA_ReadOnly); + + return OGR_L_TestCapability(hLayer, capability.c_str()); } std::string GDALVector::getFIDColumn() const { - _checkAccess(GA_ReadOnly); + _checkAccess(GA_ReadOnly); - return OGR_L_GetFIDColumn(hLayer); + return OGR_L_GetFIDColumn(hLayer); } std::string GDALVector::getGeomType() const { - _checkAccess(GA_ReadOnly); - - OGRwkbGeometryType eType = OGR_L_GetGeomType(hLayer); - return OGRGeometryTypeToName(eType); + _checkAccess(GA_ReadOnly); + + OGRwkbGeometryType eType = OGR_L_GetGeomType(hLayer); + return OGRGeometryTypeToName(eType); } std::string GDALVector::getGeometryColumn() const { - _checkAccess(GA_ReadOnly); + _checkAccess(GA_ReadOnly); - return OGR_L_GetGeometryColumn(hLayer); + return OGR_L_GetGeometryColumn(hLayer); } std::string GDALVector::getSpatialRef() const { - // OGRLayer::GetSpatialRef() as WKT string - _checkAccess(GA_ReadOnly); + // OGRLayer::GetSpatialRef() as WKT string + _checkAccess(GA_ReadOnly); - OGRSpatialReferenceH hSRS = OGR_L_GetSpatialRef(hLayer); - if (hSRS == nullptr) - Rcpp::stop("Error: could not obtain SRS."); - char *pszSRS_WKT = nullptr; - if (OSRExportToWkt(hSRS, &pszSRS_WKT) != OGRERR_NONE) { - Rcpp::stop("Error exporting SRS to WKT."); - } - std::string srs_wkt(pszSRS_WKT); - CPLFree(pszSRS_WKT); + OGRSpatialReferenceH hSRS = OGR_L_GetSpatialRef(hLayer); + if (hSRS == nullptr) + Rcpp::stop("Error: could not obtain SRS."); + char *pszSRS_WKT = nullptr; + if (OSRExportToWkt(hSRS, &pszSRS_WKT) != OGRERR_NONE) + Rcpp::stop("Error exporting SRS to WKT."); + std::string srs_wkt(pszSRS_WKT); + CPLFree(pszSRS_WKT); - return srs_wkt; + return srs_wkt; } Rcpp::NumericVector GDALVector::bbox() { - // Note: bForce=true in tha call to OGR_L_GetExtent(), so the entire - // layer may be scanned to compute MBR. - // see: testCapability("FastGetExtent") - // Depending on the driver, a spatial filter may/may not be taken into - // account. So it is safer to call bbox() without setting a spatial filter. - _checkAccess(GA_ReadOnly); - - OGREnvelope envelope; - if (OGR_L_GetExtent(hLayer, &envelope, true) != OGRERR_NONE) - Rcpp::stop("Error: the extent of the layer cannot be determined."); - - Rcpp::NumericVector bbox_out = - {envelope.MinX, envelope.MinY, envelope.MaxX, envelope.MaxY}; - - return bbox_out; + // Note: bForce=true in tha call to OGR_L_GetExtent(), so the entire + // layer may be scanned to compute MBR. + // see: testCapability("FastGetExtent") + // Depending on the driver, a spatial filter may/may not be taken into + // account. So it is safer to call bbox() without setting a spatial filter. + _checkAccess(GA_ReadOnly); + + OGREnvelope envelope; + if (OGR_L_GetExtent(hLayer, &envelope, true) != OGRERR_NONE) + Rcpp::stop("Error: the extent of the layer cannot be determined."); + + Rcpp::NumericVector bbox_out = + {envelope.MinX, envelope.MinY, envelope.MaxX, envelope.MaxY}; + + return bbox_out; } Rcpp::List GDALVector::getLayerDefn() const { - _checkAccess(GA_ReadOnly); - - Rcpp::List list_out = Rcpp::List::create(); - std::string sValue; - int nValue; - bool bValue; - int iField; - - // attribute fields - // TODO: add subtype and field domain name - for (iField=0; iField < OGR_FD_GetFieldCount(hFDefn); ++iField) { - - Rcpp::List list_fld_defn = Rcpp::List::create(); - OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, iField); - if (hFieldDefn == nullptr) - Rcpp::stop("Error: could not obtain field definition."); - - OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); - // TODO: add list types, date, time, binary, etc. - if (fld_type == OFTInteger) { - sValue = "OFTInteger"; - } - else if (fld_type == OFTInteger64) { - sValue = "OFTInteger64"; - } - else if (fld_type == OFTReal) { - sValue = "OFTReal"; - } - else if (fld_type == OFTString) { - sValue = "OFTString"; - } - else { - sValue = "default (read as OFTString)"; - } - list_fld_defn.push_back(sValue, "type"); - - nValue = OGR_Fld_GetWidth(hFieldDefn); - list_fld_defn.push_back(nValue, "width"); - - nValue = OGR_Fld_GetPrecision(hFieldDefn); - list_fld_defn.push_back(nValue, "precision"); - - bValue = OGR_Fld_IsNullable(hFieldDefn); - list_fld_defn.push_back(bValue, "is_nullable"); - - bValue = OGR_Fld_IsUnique(hFieldDefn); - list_fld_defn.push_back(bValue, "is_unique"); - - if (OGR_Fld_GetDefault(hFieldDefn) != nullptr) - sValue = std::string(OGR_Fld_GetDefault(hFieldDefn)); - else - sValue = ""; - list_fld_defn.push_back(sValue, "default"); - - bValue = OGR_Fld_IsIgnored(hFieldDefn); - list_fld_defn.push_back(bValue, "is_ignored"); - - bValue = false; - list_fld_defn.push_back(bValue, "is_geom"); - - list_out.push_back(list_fld_defn, OGR_Fld_GetNameRef(hFieldDefn)); - } - - // geometry fields - for (int i = 0; i < OGR_FD_GetGeomFieldCount(hFDefn); ++i) { - - Rcpp::List list_geom_fld_defn = Rcpp::List::create(); - OGRGeomFieldDefnH hGeomFldDefn = - OGR_FD_GetGeomFieldDefn(hFDefn, i); - if (hGeomFldDefn == nullptr) - Rcpp::stop("Error: could not obtain geometry field definition."); - - OGRwkbGeometryType eType = OGR_GFld_GetType(hGeomFldDefn); - sValue = std::string(OGRGeometryTypeToName(eType)); - list_geom_fld_defn.push_back(sValue, "type"); - - OGRSpatialReferenceH hSRS = OGR_GFld_GetSpatialRef(hGeomFldDefn); - if (hSRS == nullptr) - Rcpp::stop("Error: could not obtain geometry SRS."); - char *pszSRS_WKT = nullptr; - if (OSRExportToWkt(hSRS, &pszSRS_WKT) != OGRERR_NONE) { - Rcpp::stop("Error exporting geometry SRS to WKT."); - } - sValue = std::string(pszSRS_WKT); - list_geom_fld_defn.push_back(sValue, "srs"); - - bValue = OGR_GFld_IsNullable(hGeomFldDefn); - list_geom_fld_defn.push_back(bValue, "is_nullable"); - - bValue = OGR_GFld_IsIgnored(hGeomFldDefn); - list_geom_fld_defn.push_back(bValue, "is_ignored"); - - bValue = true; - list_geom_fld_defn.push_back(bValue, "is_geom"); - - list_out.push_back(list_geom_fld_defn, - OGR_GFld_GetNameRef(hGeomFldDefn)); - - CPLFree(pszSRS_WKT); - } - - return list_out; + _checkAccess(GA_ReadOnly); + + Rcpp::List list_out = Rcpp::List::create(); + std::string sValue; + int nValue; + bool bValue; + int iField; + + // attribute fields + // TODO: add subtype and field domain name + for (iField=0; iField < OGR_FD_GetFieldCount(hFDefn); ++iField) { + + Rcpp::List list_fld_defn = Rcpp::List::create(); + OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, iField); + if (hFieldDefn == nullptr) + Rcpp::stop("Error: could not obtain field definition."); + + OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); + // TODO: add list types, date, time, binary, etc. + if (fld_type == OFTInteger) { + sValue = "OFTInteger"; + } + else if (fld_type == OFTInteger64) { + sValue = "OFTInteger64"; + } + else if (fld_type == OFTReal) { + sValue = "OFTReal"; + } + else if (fld_type == OFTString) { + sValue = "OFTString"; + } + else { + sValue = "default (read as OFTString)"; + } + list_fld_defn.push_back(sValue, "type"); + + nValue = OGR_Fld_GetWidth(hFieldDefn); + list_fld_defn.push_back(nValue, "width"); + + nValue = OGR_Fld_GetPrecision(hFieldDefn); + list_fld_defn.push_back(nValue, "precision"); + + bValue = OGR_Fld_IsNullable(hFieldDefn); + list_fld_defn.push_back(bValue, "is_nullable"); + + bValue = OGR_Fld_IsUnique(hFieldDefn); + list_fld_defn.push_back(bValue, "is_unique"); + + if (OGR_Fld_GetDefault(hFieldDefn) != nullptr) + sValue = std::string(OGR_Fld_GetDefault(hFieldDefn)); + else + sValue = ""; + list_fld_defn.push_back(sValue, "default"); + + bValue = OGR_Fld_IsIgnored(hFieldDefn); + list_fld_defn.push_back(bValue, "is_ignored"); + + bValue = false; + list_fld_defn.push_back(bValue, "is_geom"); + + list_out.push_back(list_fld_defn, OGR_Fld_GetNameRef(hFieldDefn)); + } + + // geometry fields + for (int i = 0; i < OGR_FD_GetGeomFieldCount(hFDefn); ++i) { + + Rcpp::List list_geom_fld_defn = Rcpp::List::create(); + OGRGeomFieldDefnH hGeomFldDefn = + OGR_FD_GetGeomFieldDefn(hFDefn, i); + if (hGeomFldDefn == nullptr) + Rcpp::stop("Error: could not obtain geometry field definition."); + + OGRwkbGeometryType eType = OGR_GFld_GetType(hGeomFldDefn); + sValue = std::string(OGRGeometryTypeToName(eType)); + list_geom_fld_defn.push_back(sValue, "type"); + + OGRSpatialReferenceH hSRS = OGR_GFld_GetSpatialRef(hGeomFldDefn); + if (hSRS == nullptr) + Rcpp::stop("Error: could not obtain geometry SRS."); + char *pszSRS_WKT = nullptr; + if (OSRExportToWkt(hSRS, &pszSRS_WKT) != OGRERR_NONE) { + Rcpp::stop("Error exporting geometry SRS to WKT."); + } + sValue = std::string(pszSRS_WKT); + list_geom_fld_defn.push_back(sValue, "srs"); + + bValue = OGR_GFld_IsNullable(hGeomFldDefn); + list_geom_fld_defn.push_back(bValue, "is_nullable"); + + bValue = OGR_GFld_IsIgnored(hGeomFldDefn); + list_geom_fld_defn.push_back(bValue, "is_ignored"); + + bValue = true; + list_geom_fld_defn.push_back(bValue, "is_geom"); + + list_out.push_back(list_geom_fld_defn, + OGR_GFld_GetNameRef(hGeomFldDefn)); + + CPLFree(pszSRS_WKT); + } + + return list_out; } void GDALVector::setAttributeFilter(std::string query) { - _checkAccess(GA_ReadOnly); + _checkAccess(GA_ReadOnly); + + const char* query_in = nullptr; + if (query != "") + query_in = query.c_str(); - const char* query_in = nullptr; - if (query != "") - query_in = query.c_str(); - - if (OGR_L_SetAttributeFilter(hLayer, query_in) != OGRERR_NONE) - Rcpp::stop("Error setting filter, possibly in the query expression"); + if (OGR_L_SetAttributeFilter(hLayer, query_in) != OGRERR_NONE) + Rcpp::stop("Error setting filter, possibly in the query expression"); } void GDALVector::setSpatialFilterRect(Rcpp::NumericVector bbox) { - _checkAccess(GA_ReadOnly); - - if (Rcpp::any(Rcpp::is_na(bbox))) - Rcpp::stop("Error: bbox has one or more NA values."); - - OGR_L_SetSpatialFilterRect(hLayer, bbox[0], bbox[1], bbox[2], bbox[3]); + _checkAccess(GA_ReadOnly); + + if (Rcpp::any(Rcpp::is_na(bbox))) + Rcpp::stop("Error: bbox has one or more NA values."); + + OGR_L_SetSpatialFilterRect(hLayer, bbox[0], bbox[1], bbox[2], bbox[3]); } void GDALVector::clearSpatialFilter() { - _checkAccess(GA_ReadOnly); + _checkAccess(GA_ReadOnly); - OGR_L_SetSpatialFilter(hLayer, nullptr); + OGR_L_SetSpatialFilter(hLayer, nullptr); } double GDALVector::getFeatureCount() { - // OGR_L_GetFeatureCount() returns GIntBig so we return as double to R. - // GDAL doc: Note that some implementations of this method may alter the - // read cursor of the layer. - // see: testCapability("FastFeatureCount") - _checkAccess(GA_ReadOnly); - - return OGR_L_GetFeatureCount(hLayer, true); + // OGR_L_GetFeatureCount() returns GIntBig so we return as double to R. + // GDAL doc: Note that some implementations of this method may alter the + // read cursor of the layer. + // see: testCapability("FastFeatureCount") + _checkAccess(GA_ReadOnly); + + return OGR_L_GetFeatureCount(hLayer, true); } SEXP GDALVector::getNextFeature() { - _checkAccess(GA_ReadOnly); - - OGRFeatureH hFeature = OGR_L_GetNextFeature(hLayer); - - if (hFeature != nullptr) { - Rcpp::List list_out = Rcpp::List::create(); - int i; - - double FID = static_cast(OGR_F_GetFID(hFeature)); - list_out.push_back(FID, "FID"); - - for (i = 0; i < OGR_FD_GetFieldCount(hFDefn); ++i) { - OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); - if (hFieldDefn == nullptr) - Rcpp::stop("Error: could not obtain field definition."); - - if (!OGR_F_IsFieldSet(hFeature, i) || - OGR_F_IsFieldNull(hFeature, i)) { - continue; - } - - OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); - if (fld_type == OFTInteger) { - int value = OGR_F_GetFieldAsInteger(hFeature, i); - list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); - } - else if (fld_type == OFTInteger64) { - // TODO: emit a warning? - // R does not have native int64 so handled as double for now - double value = static_cast( - OGR_F_GetFieldAsInteger64(hFeature, i)); - list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); - } - else if (fld_type == OFTReal) { - double value = OGR_F_GetFieldAsDouble(hFeature, i); - list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); - } - else { - // TODO: support date, time, binary, etc. - // read as string for now - std::string value = OGR_F_GetFieldAsString(hFeature, i); - list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); - } - } - - for (i = 0; i < OGR_F_GetGeomFieldCount(hFeature); ++i) { - OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeature, i); - if (hGeom == nullptr) - Rcpp::stop("Error: could not obtain geometry reference."); - char* pszWKT; - OGR_G_ExportToWkt(hGeom, &pszWKT); - std::string wkt(pszWKT); - OGRGeomFieldDefnH hGeomFldDefn = - OGR_F_GetGeomFieldDefnRef(hFeature, i); - if (hGeomFldDefn == nullptr) - Rcpp::stop("Error: could not obtain geometry field def."); - list_out.push_back(wkt, OGR_GFld_GetNameRef(hGeomFldDefn)); - CPLFree(pszWKT); - OGR_G_DestroyGeometry(hGeom); - } - - return list_out; - } - else { - return R_NilValue; - } + _checkAccess(GA_ReadOnly); + + OGRFeatureH hFeature = OGR_L_GetNextFeature(hLayer); + + if (hFeature != nullptr) { + Rcpp::List list_out = Rcpp::List::create(); + int i; + + double FID = static_cast(OGR_F_GetFID(hFeature)); + list_out.push_back(FID, "FID"); + + for (i = 0; i < OGR_FD_GetFieldCount(hFDefn); ++i) { + OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); + if (hFieldDefn == nullptr) + Rcpp::stop("Error: could not obtain field definition."); + + if (!OGR_F_IsFieldSet(hFeature, i) || + OGR_F_IsFieldNull(hFeature, i)) { + continue; + } + + OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); + if (fld_type == OFTInteger) { + int value = OGR_F_GetFieldAsInteger(hFeature, i); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + } + else if (fld_type == OFTInteger64) { + // TODO: emit a warning? + // R does not have native int64 so handled as double for now + double value = static_cast( + OGR_F_GetFieldAsInteger64(hFeature, i)); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + } + else if (fld_type == OFTReal) { + double value = OGR_F_GetFieldAsDouble(hFeature, i); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + } + else { + // TODO: support date, time, binary, etc. + // read as string for now + std::string value = OGR_F_GetFieldAsString(hFeature, i); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + } + } + + for (i = 0; i < OGR_F_GetGeomFieldCount(hFeature); ++i) { + OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeature, i); + if (hGeom == nullptr) + Rcpp::stop("Error: could not obtain geometry reference."); + char* pszWKT; + OGR_G_ExportToWkt(hGeom, &pszWKT); + std::string wkt(pszWKT); + OGRGeomFieldDefnH hGeomFldDefn = + OGR_F_GetGeomFieldDefnRef(hFeature, i); + if (hGeomFldDefn == nullptr) + Rcpp::stop("Error: could not obtain geometry field def."); + list_out.push_back(wkt, OGR_GFld_GetNameRef(hGeomFldDefn)); + CPLFree(pszWKT); + OGR_G_DestroyGeometry(hGeom); + } + + return list_out; + } + else { + return R_NilValue; + } } void GDALVector::resetReading() { - _checkAccess(GA_ReadOnly); - - OGR_L_ResetReading(hLayer); + _checkAccess(GA_ReadOnly); + + OGR_L_ResetReading(hLayer); } void GDALVector::layerIntersection( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options) { - - std::vector opt_list = {nullptr}; - if (options.isNotNull()) { - Rcpp::CharacterVector options_in(options); - opt_list.resize(options_in.size() + 1); - for (R_xlen_t i = 0; i < options_in.size(); ++i) { - opt_list[i] = (char *) (options_in[i]); - } - opt_list[options_in.size()] = nullptr; - } - - OGRErr err = OGR_L_Intersection( - hLayer, - method_layer._getOGRLayerH(), - result_layer._getOGRLayerH(), - opt_list.data(), - quiet ? nullptr : GDALTermProgressR, - nullptr); - - if (err != OGRERR_NONE) - Rcpp::stop("Error during Intersection, or execution was interrupted."); + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {nullptr}; + if (options.isNotNull()) { + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = nullptr; + } + + OGRErr err = OGR_L_Intersection( + hLayer, + method_layer._getOGRLayerH(), + result_layer._getOGRLayerH(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, + nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("Error during Intersection, or execution was interrupted."); } void GDALVector::layerUnion( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options) { - - std::vector opt_list = {nullptr}; - if (options.isNotNull()) { - Rcpp::CharacterVector options_in(options); - opt_list.resize(options_in.size() + 1); - for (R_xlen_t i = 0; i < options_in.size(); ++i) { - opt_list[i] = (char *) (options_in[i]); - } - opt_list[options_in.size()] = nullptr; - } - - OGRErr err = OGR_L_Union( - hLayer, - method_layer._getOGRLayerH(), - result_layer._getOGRLayerH(), - opt_list.data(), - quiet ? nullptr : GDALTermProgressR, - nullptr); - - if (err != OGRERR_NONE) - Rcpp::stop("Error during Union, or execution was interrupted."); + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {nullptr}; + if (options.isNotNull()) { + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = nullptr; + } + + OGRErr err = OGR_L_Union( + hLayer, + method_layer._getOGRLayerH(), + result_layer._getOGRLayerH(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, + nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("Error during Union, or execution was interrupted."); } void GDALVector::layerSymDifference( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options) { - - std::vector opt_list = {nullptr}; - if (options.isNotNull()) { - Rcpp::CharacterVector options_in(options); - opt_list.resize(options_in.size() + 1); - for (R_xlen_t i = 0; i < options_in.size(); ++i) { - opt_list[i] = (char *) (options_in[i]); - } - opt_list[options_in.size()] = nullptr; - } - - OGRErr err = OGR_L_SymDifference( - hLayer, - method_layer._getOGRLayerH(), - result_layer._getOGRLayerH(), - opt_list.data(), - quiet ? nullptr : GDALTermProgressR, - nullptr); - - if (err != OGRERR_NONE) - Rcpp::stop("Error during SymDifference, or execution was interrupted."); + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {nullptr}; + if (options.isNotNull()) { + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = nullptr; + } + + OGRErr err = OGR_L_SymDifference( + hLayer, + method_layer._getOGRLayerH(), + result_layer._getOGRLayerH(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, + nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("Error during SymDifference, or execution was interrupted."); } void GDALVector::layerIdentity( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options) { - - std::vector opt_list = {nullptr}; - if (options.isNotNull()) { - Rcpp::CharacterVector options_in(options); - opt_list.resize(options_in.size() + 1); - for (R_xlen_t i = 0; i < options_in.size(); ++i) { - opt_list[i] = (char *) (options_in[i]); - } - opt_list[options_in.size()] = nullptr; - } - - OGRErr err = OGR_L_Identity( - hLayer, - method_layer._getOGRLayerH(), - result_layer._getOGRLayerH(), - opt_list.data(), - quiet ? nullptr : GDALTermProgressR, - nullptr); - - if (err != OGRERR_NONE) - Rcpp::stop("Error during Identity, or execution was interrupted."); + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {nullptr}; + if (options.isNotNull()) { + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = nullptr; + } + + OGRErr err = OGR_L_Identity( + hLayer, + method_layer._getOGRLayerH(), + result_layer._getOGRLayerH(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, + nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("Error during Identity, or execution was interrupted."); } void GDALVector::layerUpdate( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options) { - - std::vector opt_list = {nullptr}; - if (options.isNotNull()) { - Rcpp::CharacterVector options_in(options); - opt_list.resize(options_in.size() + 1); - for (R_xlen_t i = 0; i < options_in.size(); ++i) { - opt_list[i] = (char *) (options_in[i]); - } - opt_list[options_in.size()] = nullptr; - } - - OGRErr err = OGR_L_Update( - hLayer, - method_layer._getOGRLayerH(), - result_layer._getOGRLayerH(), - opt_list.data(), - quiet ? nullptr : GDALTermProgressR, - nullptr); - - if (err != OGRERR_NONE) - Rcpp::stop("Error during Update, or execution was interrupted."); + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {nullptr}; + if (options.isNotNull()) { + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = nullptr; + } + + OGRErr err = OGR_L_Update( + hLayer, + method_layer._getOGRLayerH(), + result_layer._getOGRLayerH(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, + nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("Error during Update, or execution was interrupted."); } void GDALVector::layerClip( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options) { - - std::vector opt_list = {nullptr}; - if (options.isNotNull()) { - Rcpp::CharacterVector options_in(options); - opt_list.resize(options_in.size() + 1); - for (R_xlen_t i = 0; i < options_in.size(); ++i) { - opt_list[i] = (char *) (options_in[i]); - } - opt_list[options_in.size()] = nullptr; - } - - OGRErr err = OGR_L_Clip( - hLayer, - method_layer._getOGRLayerH(), - result_layer._getOGRLayerH(), - opt_list.data(), - quiet ? nullptr : GDALTermProgressR, - nullptr); - - if (err != OGRERR_NONE) - Rcpp::stop("Error during Clip, or execution was interrupted."); + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {nullptr}; + if (options.isNotNull()) { + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = nullptr; + } + + OGRErr err = OGR_L_Clip( + hLayer, + method_layer._getOGRLayerH(), + result_layer._getOGRLayerH(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, + nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("Error during Clip, or execution was interrupted."); } void GDALVector::layerErase( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options) { - - std::vector opt_list = {nullptr}; - if (options.isNotNull()) { - Rcpp::CharacterVector options_in(options); - opt_list.resize(options_in.size() + 1); - for (R_xlen_t i = 0; i < options_in.size(); ++i) { - opt_list[i] = (char *) (options_in[i]); - } - opt_list[options_in.size()] = nullptr; - } - - OGRErr err = OGR_L_Erase( - hLayer, - method_layer._getOGRLayerH(), - result_layer._getOGRLayerH(), - opt_list.data(), - quiet ? nullptr : GDALTermProgressR, - nullptr); - - if (err != OGRERR_NONE) - Rcpp::stop("Error during Erase, or execution was interrupted."); + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {nullptr}; + if (options.isNotNull()) { + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = nullptr; + } + + OGRErr err = OGR_L_Erase( + hLayer, + method_layer._getOGRLayerH(), + result_layer._getOGRLayerH(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, + nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("Error during Erase, or execution was interrupted."); } void GDALVector::close() { - if (hDataset != nullptr) { - if (is_sql_in) - GDALDatasetReleaseResultSet(hDataset, hLayer); - GDALReleaseDataset(hDataset); - hDataset = nullptr; - hLayer = nullptr; - } + if (hDataset != nullptr) { + if (is_sql_in) + GDALDatasetReleaseResultSet(hDataset, hLayer); + GDALReleaseDataset(hDataset); + hDataset = nullptr; + hLayer = nullptr; + } } // **************************************************************************** @@ -668,17 +669,17 @@ void GDALVector::close() { // **************************************************************************** void GDALVector::_checkAccess(GDALAccess access_needed) const { - if (!isOpen()) - Rcpp::stop("Dataset is not open."); - - if (access_needed == GA_Update && eAccess == GA_ReadOnly) - Rcpp::stop("Dataset is read-only."); + if (!isOpen()) + Rcpp::stop("Dataset is not open."); + + if (access_needed == GA_Update && eAccess == GA_ReadOnly) + Rcpp::stop("Dataset is read-only."); } OGRLayerH GDALVector::_getOGRLayerH() { - _checkAccess(GA_ReadOnly); - - return hLayer; + _checkAccess(GA_ReadOnly); + + return hLayer; } // **************************************************************************** @@ -688,74 +689,73 @@ RCPP_MODULE(mod_GDALVector) { Rcpp::class_("GDALVector") .constructor - ("Default constructor, only for allocations in std::vector.") + ("Default constructor, only for allocations in std::vector.") .constructor - ("Usage: new(GDALVector, dsn)") + ("Usage: new(GDALVector, dsn)") .constructor - ("Usage: new(GDALVector, dsn, layer)") + ("Usage: new(GDALVector, dsn, layer)") .constructor - ("Usage: new(GDALVector, dsn, layer, read_only=[TRUE|FALSE])") + ("Usage: new(GDALVector, dsn, layer, read_only=[TRUE|FALSE])") .constructor - ("Usage: new(GDALVector, dsn, layer, read_only, open_options)") - + ("Usage: new(GDALVector, dsn, layer, read_only, open_options)") + // exposed member functions .const_method("getDsn", &GDALVector::getDsn, - "Return the DSN.") + "Return the DSN.") .const_method("isOpen", &GDALVector::isOpen, - "Is the dataset open?") + "Is the dataset open?") .method("open", &GDALVector::open, - "(Re-)open the dataset on the existing DSN and layer.") + "(Re-)open the dataset on the existing DSN and layer.") .const_method("getFileList", &GDALVector::getFileList, - "Fetch files forming dataset.") + "Fetch files forming dataset.") .const_method("getDriverShortName", &GDALVector::getDriverShortName, - "Return the short name of the format driver.") + "Return the short name of the format driver.") .const_method("getDriverLongName", &GDALVector::getDriverLongName, - "Return the long name of the format driver.") + "Return the long name of the format driver.") .const_method("getName", &GDALVector::getName, - "Return the layer name.") + "Return the layer name.") .const_method("testCapability", &GDALVector::testCapability, - "Test if this layer supports the named capability.") + "Test if this layer supports the named capability.") .const_method("getFIDColumn", &GDALVector::getFIDColumn, - "Return name of the underlying db column being used as FID column.") + "Return name of the underlying db column being used as FID column.") .const_method("getGeomType", &GDALVector::getGeomType, - "Return the layer geometry type.") + "Return the layer geometry type.") .const_method("getGeometryColumn", &GDALVector::getGeometryColumn, - "Return name of the underlying db column being used as geom column.") + "Return name of the underlying db column being used as geom column.") .const_method("getSpatialRef", &GDALVector::getSpatialRef, - "Fetch the spatial reference system for this layer as WKT string.") + "Fetch the spatial reference system for this layer as WKT string.") .method("bbox", &GDALVector::bbox, - "Return the bounding box (xmin, ymin, xmax, ymax).") + "Return the bounding box (xmin, ymin, xmax, ymax).") .const_method("getLayerDefn", &GDALVector::getLayerDefn, - "Fetch the schema information for this layer.") + "Fetch the schema information for this layer.") .method("setAttributeFilter", &GDALVector::setAttributeFilter, - "Set a new attribute query.") + "Set a new attribute query.") .method("setSpatialFilterRect", &GDALVector::setSpatialFilterRect, - "Set a new rectangular spatial filter.") + "Set a new rectangular spatial filter.") .method("clearSpatialFilter", &GDALVector::clearSpatialFilter, - "Clear the current spatial filter.") + "Clear the current spatial filter.") .method("getFeatureCount", &GDALVector::getFeatureCount, - "Fetch the feature count in this layer.") + "Fetch the feature count in this layer.") .method("getNextFeature", &GDALVector::getNextFeature, - "Fetch the next available feature from this layer.") + "Fetch the next available feature from this layer.") .method("resetReading", &GDALVector::resetReading, - "Reset feature reading to start on the first feature.") + "Reset feature reading to start on the first feature.") .method("layerIntersection", &GDALVector::layerIntersection, - "Intersection of this layer with a method layer.") + "Intersection of this layer with a method layer.") .method("layerUnion", &GDALVector::layerUnion, - "Union of this layer with a method layer.") + "Union of this layer with a method layer.") .method("layerSymDifference", &GDALVector::layerSymDifference, - "Symmetrical difference of this layer and a method layer.") + "Symmetrical difference of this layer and a method layer.") .method("layerIdentity", &GDALVector::layerIdentity, - "Identify features of this layer with the ones from the method layer.") + "Identify features of this layer with the ones from the method layer.") .method("layerUpdate", &GDALVector::layerUpdate, - "Update this layer with features from the method layer.") + "Update this layer with features from the method layer.") .method("layerClip", &GDALVector::layerClip, - "Clip off areas that are not covered by the method layer.") + "Clip off areas that are not covered by the method layer.") .method("layerErase", &GDALVector::layerErase, - "Remove areas that are covered by the method layer.") + "Remove areas that are covered by the method layer.") .method("close", &GDALVector::close, - "Release the dataset for proper cleanup.") - + "Release the dataset for proper cleanup.") + ; } - diff --git a/src/gdalvector.h b/src/gdalvector.h index 03e4e1947..1551cbc57 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -18,92 +18,91 @@ typedef void *OGRLayerH; typedef enum {GA_ReadOnly = 0, GA_Update = 1} GDALAccess; #endif - class GDALVector { - private: - std::string dsn_in; - std::string layer_in; // layer name or sql statement - bool is_sql_in; - Rcpp::CharacterVector open_options_in; - GDALDatasetH hDataset; - GDALAccess eAccess; - OGRLayerH hLayer; - OGRFeatureDefnH hFDefn; - - public: - GDALVector(); - GDALVector(Rcpp::CharacterVector dsn); - GDALVector(Rcpp::CharacterVector dsn, std::string layer); - GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only); - GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, - Rcpp::CharacterVector open_options); - - void open(bool read_only); - bool isOpen() const; - std::string getDsn() const; - Rcpp::CharacterVector getFileList() const; - std::string getDriverShortName() const; - std::string getDriverLongName() const; - - std::string getName() const; - bool testCapability(std::string capability) const; - std::string getFIDColumn() const; - std::string getGeomType() const; - std::string getGeometryColumn() const; - std::string getSpatialRef() const; - Rcpp::NumericVector bbox(); - Rcpp::List getLayerDefn() const; - - void setAttributeFilter(std::string query); - void setSpatialFilterRect(Rcpp::NumericVector bbox); - void clearSpatialFilter(); - - double getFeatureCount(); - SEXP getNextFeature(); - void resetReading(); - - void layerIntersection( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerUnion( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerSymDifference( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerIdentity( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerUpdate( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerClip( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerErase( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - - void close(); - - // methods for internal use not exported to R - void _checkAccess(GDALAccess access_needed) const; - OGRLayerH _getOGRLayerH(); + private: + std::string dsn_in; + std::string layer_in; // layer name or sql statement + bool is_sql_in; + Rcpp::CharacterVector open_options_in; + GDALDatasetH hDataset; + GDALAccess eAccess; + OGRLayerH hLayer; + OGRFeatureDefnH hFDefn; + + public: + GDALVector(); + GDALVector(Rcpp::CharacterVector dsn); + GDALVector(Rcpp::CharacterVector dsn, std::string layer); + GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only); + GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, + Rcpp::CharacterVector open_options); + + void open(bool read_only); + bool isOpen() const; + std::string getDsn() const; + Rcpp::CharacterVector getFileList() const; + std::string getDriverShortName() const; + std::string getDriverLongName() const; + + std::string getName() const; + bool testCapability(std::string capability) const; + std::string getFIDColumn() const; + std::string getGeomType() const; + std::string getGeometryColumn() const; + std::string getSpatialRef() const; + Rcpp::NumericVector bbox(); + Rcpp::List getLayerDefn() const; + + void setAttributeFilter(std::string query); + void setSpatialFilterRect(Rcpp::NumericVector bbox); + void clearSpatialFilter(); + + double getFeatureCount(); + SEXP getNextFeature(); + void resetReading(); + + void layerIntersection( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerUnion( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerSymDifference( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerIdentity( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerUpdate( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerClip( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerErase( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + + void close(); + + // methods for internal use not exported to R + void _checkAccess(GDALAccess access_needed) const; + OGRLayerH _getOGRLayerH(); }; RCPP_EXPOSED_CLASS(GDALVector) From c3a233fe284debfbf96130db4cc0f093ac443e85 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sat, 16 Mar 2024 16:37:27 -0600 Subject: [PATCH 22/53] reformat code style --- .git-blame-ignore-revs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 16c15b811..ecd13956a 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -2,3 +2,5 @@ # Add .editorconfig and bulk reformat codebase 44c885936e1a948990b80faffa06d6f8fb55e435 fe6c783be086d71adea69bbeacae35c91f68ba2b +eb9b8b3cd8176a81b512973c61e2605b181a7b11 +5146434168160d7ca1e7b73b5a4bfa1525e29ba6 From 06763c0b83418adf9a47199acee193587b4d5cdf Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sat, 6 Apr 2024 00:53:55 -0600 Subject: [PATCH 23/53] support 64-bit integer in R with bit64/RcppInt64 --- DESCRIPTION | 3 +- NAMESPACE | 2 +- src/gdalvector.cpp | 126 ++++++++++++++++++++++----------------------- src/rcpp_util.h | 1 + 4 files changed, 66 insertions(+), 66 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index dcd510cdf..7a9eb1628 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,6 +43,7 @@ URL: https://usdaforestservice.github.io/gdalraster/, BugReports: https://github.com/USDAForestService/gdalraster/issues Depends: R (>= 4.2.0) Imports: + bit64, graphics, grDevices, methods, @@ -51,7 +52,7 @@ Imports: tools, utils, xml2 -LinkingTo: Rcpp +LinkingTo: Rcpp, RcppInt64 Suggests: gt, knitr, diff --git a/NAMESPACE b/NAMESPACE index 4a7ce5cf5..8bc0bf444 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,5 @@ useDynLib(gdalraster, .registration=TRUE) -import(graphics, methods, Rcpp, xml2) +import(bit64, graphics, methods, Rcpp, xml2) importFrom("grDevices", "as.raster", "dev.capabilities", "gray", "rgb") importFrom("stats", "quantile") importFrom("tools", "file_path_sans_ext") diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index a30991a80..3a45302c9 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -1,5 +1,6 @@ /* Implementation of class GDALVector Encapsulates one OGRLayer and its GDALDataset + Requires bit64 on the R side for the integer64 S3 class Chris Toney */ #include "gdal.h" @@ -59,7 +60,7 @@ GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, void GDALVector::open(bool read_only) { if (dsn_in == "") - Rcpp::stop("DSN is not set."); + Rcpp::stop("DSN is not set"); if (hDataset != nullptr) { if (is_sql_in) @@ -91,7 +92,7 @@ void GDALVector::open(bool read_only) { hDataset = GDALOpenEx(dsn_in.c_str(), nOpenFlags, nullptr, dsoo.data(), nullptr); if (hDataset == nullptr) - Rcpp::stop("Open dataset failed."); + Rcpp::stop("open dataset failed"); if (layer_in == "") { is_sql_in = false; @@ -109,7 +110,7 @@ void GDALVector::open(bool read_only) { if (hLayer == nullptr) { GDALReleaseDataset(hDataset); - Rcpp::stop("Get layer failed."); + Rcpp::stop("failed to get layer"); } else { OGR_L_ResetReading(hLayer); @@ -120,7 +121,7 @@ void GDALVector::open(bool read_only) { if (is_sql_in) GDALDatasetReleaseResultSet(hDataset, hLayer); GDALReleaseDataset(hDataset); - Rcpp::stop("Get layer definition failed."); + Rcpp::stop("failed to get layer definition"); } } @@ -207,10 +208,10 @@ std::string GDALVector::getSpatialRef() const { OGRSpatialReferenceH hSRS = OGR_L_GetSpatialRef(hLayer); if (hSRS == nullptr) - Rcpp::stop("Error: could not obtain SRS."); + Rcpp::stop("could not obtain spatial reference"); char *pszSRS_WKT = nullptr; if (OSRExportToWkt(hSRS, &pszSRS_WKT) != OGRERR_NONE) - Rcpp::stop("Error exporting SRS to WKT."); + Rcpp::stop("error exporting SRS to WKT"); std::string srs_wkt(pszSRS_WKT); CPLFree(pszSRS_WKT); @@ -227,7 +228,7 @@ Rcpp::NumericVector GDALVector::bbox() { OGREnvelope envelope; if (OGR_L_GetExtent(hLayer, &envelope, true) != OGRERR_NONE) - Rcpp::stop("Error: the extent of the layer cannot be determined."); + Rcpp::stop("the extent of the layer cannot be determined"); Rcpp::NumericVector bbox_out = {envelope.MinX, envelope.MinY, envelope.MaxX, envelope.MaxY}; @@ -251,7 +252,7 @@ Rcpp::List GDALVector::getLayerDefn() const { Rcpp::List list_fld_defn = Rcpp::List::create(); OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, iField); if (hFieldDefn == nullptr) - Rcpp::stop("Error: could not obtain field definition."); + Rcpp::stop("could not obtain field definition"); OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); // TODO: add list types, date, time, binary, etc. @@ -306,18 +307,17 @@ Rcpp::List GDALVector::getLayerDefn() const { OGRGeomFieldDefnH hGeomFldDefn = OGR_FD_GetGeomFieldDefn(hFDefn, i); if (hGeomFldDefn == nullptr) - Rcpp::stop("Error: could not obtain geometry field definition."); + Rcpp::stop("could not obtain geometry field definition"); OGRwkbGeometryType eType = OGR_GFld_GetType(hGeomFldDefn); - sValue = std::string(OGRGeometryTypeToName(eType)); - list_geom_fld_defn.push_back(sValue, "type"); + list_geom_fld_defn.push_back(OGRGeometryTypeToName(eType), "type"); OGRSpatialReferenceH hSRS = OGR_GFld_GetSpatialRef(hGeomFldDefn); if (hSRS == nullptr) - Rcpp::stop("Error: could not obtain geometry SRS."); + Rcpp::stop("could not obtain geometry SRS"); char *pszSRS_WKT = nullptr; if (OSRExportToWkt(hSRS, &pszSRS_WKT) != OGRERR_NONE) { - Rcpp::stop("Error exporting geometry SRS to WKT."); + Rcpp::stop("error exporting geometry SRS to WKT"); } sValue = std::string(pszSRS_WKT); list_geom_fld_defn.push_back(sValue, "srs"); @@ -348,14 +348,14 @@ void GDALVector::setAttributeFilter(std::string query) { query_in = query.c_str(); if (OGR_L_SetAttributeFilter(hLayer, query_in) != OGRERR_NONE) - Rcpp::stop("Error setting filter, possibly in the query expression"); + Rcpp::stop("error setting filter, possibly in the query expression"); } void GDALVector::setSpatialFilterRect(Rcpp::NumericVector bbox) { _checkAccess(GA_ReadOnly); if (Rcpp::any(Rcpp::is_na(bbox))) - Rcpp::stop("Error: bbox has one or more NA values."); + Rcpp::stop("'bbox' has one or more 'NA' values"); OGR_L_SetSpatialFilterRect(hLayer, bbox[0], bbox[1], bbox[2], bbox[3]); } @@ -367,13 +367,13 @@ void GDALVector::clearSpatialFilter() { } double GDALVector::getFeatureCount() { - // OGR_L_GetFeatureCount() returns GIntBig so we return as double to R. + // OGR_L_GetFeatureCount() returns GIntBig, return as R numeric for now // GDAL doc: Note that some implementations of this method may alter the // read cursor of the layer. // see: testCapability("FastFeatureCount") _checkAccess(GA_ReadOnly); - return OGR_L_GetFeatureCount(hLayer, true); + return static_cast(OGR_L_GetFeatureCount(hLayer, true)); } SEXP GDALVector::getNextFeature() { @@ -385,13 +385,13 @@ SEXP GDALVector::getNextFeature() { Rcpp::List list_out = Rcpp::List::create(); int i; - double FID = static_cast(OGR_F_GetFID(hFeature)); - list_out.push_back(FID, "FID"); + int64_t FID = static_cast(OGR_F_GetFID(hFeature)); + list_out.push_back(Rcpp::toInteger64(FID), "FID"); for (i = 0; i < OGR_FD_GetFieldCount(hFDefn); ++i) { OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); if (hFieldDefn == nullptr) - Rcpp::stop("Error: could not obtain field definition."); + Rcpp::stop("could not obtain field definition"); if (!OGR_F_IsFieldSet(hFeature, i) || OGR_F_IsFieldNull(hFeature, i)) { @@ -404,11 +404,10 @@ SEXP GDALVector::getNextFeature() { list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } else if (fld_type == OFTInteger64) { - // TODO: emit a warning? - // R does not have native int64 so handled as double for now - double value = static_cast( + int64_t value = static_cast( OGR_F_GetFieldAsInteger64(hFeature, i)); - list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + list_out.push_back(Rcpp::toInteger64(value), + OGR_Fld_GetNameRef(hFieldDefn)); } else if (fld_type == OFTReal) { double value = OGR_F_GetFieldAsDouble(hFeature, i); @@ -425,17 +424,16 @@ SEXP GDALVector::getNextFeature() { for (i = 0; i < OGR_F_GetGeomFieldCount(hFeature); ++i) { OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeature, i); if (hGeom == nullptr) - Rcpp::stop("Error: could not obtain geometry reference."); + Rcpp::stop("could not obtain geometry reference"); char* pszWKT; OGR_G_ExportToWkt(hGeom, &pszWKT); std::string wkt(pszWKT); OGRGeomFieldDefnH hGeomFldDefn = OGR_F_GetGeomFieldDefnRef(hFeature, i); if (hGeomFldDefn == nullptr) - Rcpp::stop("Error: could not obtain geometry field def."); + Rcpp::stop("could not obtain geometry field def"); list_out.push_back(wkt, OGR_GFld_GetNameRef(hGeomFldDefn)); CPLFree(pszWKT); - OGR_G_DestroyGeometry(hGeom); } return list_out; @@ -476,7 +474,7 @@ void GDALVector::layerIntersection( nullptr); if (err != OGRERR_NONE) - Rcpp::stop("Error during Intersection, or execution was interrupted."); + Rcpp::stop("error during Intersection, or execution was interrupted"); } @@ -505,7 +503,7 @@ void GDALVector::layerUnion( nullptr); if (err != OGRERR_NONE) - Rcpp::stop("Error during Union, or execution was interrupted."); + Rcpp::stop("error during Union, or execution was interrupted"); } @@ -534,7 +532,7 @@ void GDALVector::layerSymDifference( nullptr); if (err != OGRERR_NONE) - Rcpp::stop("Error during SymDifference, or execution was interrupted."); + Rcpp::stop("error during SymDifference, or execution was interrupted"); } @@ -563,7 +561,7 @@ void GDALVector::layerIdentity( nullptr); if (err != OGRERR_NONE) - Rcpp::stop("Error during Identity, or execution was interrupted."); + Rcpp::stop("error during Identity, or execution was interrupted"); } @@ -592,7 +590,7 @@ void GDALVector::layerUpdate( nullptr); if (err != OGRERR_NONE) - Rcpp::stop("Error during Update, or execution was interrupted."); + Rcpp::stop("error during Update, or execution was interrupted"); } @@ -621,7 +619,7 @@ void GDALVector::layerClip( nullptr); if (err != OGRERR_NONE) - Rcpp::stop("Error during Clip, or execution was interrupted."); + Rcpp::stop("error during Clip, or execution was interrupted"); } @@ -650,7 +648,7 @@ void GDALVector::layerErase( nullptr); if (err != OGRERR_NONE) - Rcpp::stop("Error during Erase, or execution was interrupted."); + Rcpp::stop("error during Erase, or execution was interrupted"); } @@ -670,10 +668,10 @@ void GDALVector::close() { void GDALVector::_checkAccess(GDALAccess access_needed) const { if (!isOpen()) - Rcpp::stop("Dataset is not open."); + Rcpp::stop("dataset is not open"); if (access_needed == GA_Update && eAccess == GA_ReadOnly) - Rcpp::stop("Dataset is read-only."); + Rcpp::stop("dataset is read-only"); } OGRLayerH GDALVector::_getOGRLayerH() { @@ -689,7 +687,7 @@ RCPP_MODULE(mod_GDALVector) { Rcpp::class_("GDALVector") .constructor - ("Default constructor, only for allocations in std::vector.") + ("Default constructor, only for allocations in std::vector") .constructor ("Usage: new(GDALVector, dsn)") .constructor @@ -701,61 +699,61 @@ RCPP_MODULE(mod_GDALVector) { // exposed member functions .const_method("getDsn", &GDALVector::getDsn, - "Return the DSN.") + "Return the DSN") .const_method("isOpen", &GDALVector::isOpen, "Is the dataset open?") .method("open", &GDALVector::open, - "(Re-)open the dataset on the existing DSN and layer.") + "(Re-)open the dataset on the existing DSN and layer") .const_method("getFileList", &GDALVector::getFileList, - "Fetch files forming dataset.") + "Fetch files forming dataset") .const_method("getDriverShortName", &GDALVector::getDriverShortName, - "Return the short name of the format driver.") + "Return the short name of the format driver") .const_method("getDriverLongName", &GDALVector::getDriverLongName, - "Return the long name of the format driver.") + "Return the long name of the format driver") .const_method("getName", &GDALVector::getName, - "Return the layer name.") + "Return the layer name") .const_method("testCapability", &GDALVector::testCapability, - "Test if this layer supports the named capability.") + "Test if this layer supports the named capability") .const_method("getFIDColumn", &GDALVector::getFIDColumn, - "Return name of the underlying db column being used as FID column.") + "Return name of the underlying db column being used as FID column") .const_method("getGeomType", &GDALVector::getGeomType, - "Return the layer geometry type.") + "Return the layer geometry type") .const_method("getGeometryColumn", &GDALVector::getGeometryColumn, - "Return name of the underlying db column being used as geom column.") + "Return name of the underlying db column being used as geom column") .const_method("getSpatialRef", &GDALVector::getSpatialRef, - "Fetch the spatial reference system for this layer as WKT string.") + "Fetch the spatial reference system for this layer as WKT string") .method("bbox", &GDALVector::bbox, - "Return the bounding box (xmin, ymin, xmax, ymax).") + "Return the bounding box (xmin, ymin, xmax, ymax)") .const_method("getLayerDefn", &GDALVector::getLayerDefn, - "Fetch the schema information for this layer.") + "Fetch the schema information for this layer") .method("setAttributeFilter", &GDALVector::setAttributeFilter, - "Set a new attribute query.") + "Set a new attribute query") .method("setSpatialFilterRect", &GDALVector::setSpatialFilterRect, - "Set a new rectangular spatial filter.") + "Set a new rectangular spatial filter") .method("clearSpatialFilter", &GDALVector::clearSpatialFilter, - "Clear the current spatial filter.") + "Clear the current spatial filter") .method("getFeatureCount", &GDALVector::getFeatureCount, - "Fetch the feature count in this layer.") + "Fetch the feature count in this layer") .method("getNextFeature", &GDALVector::getNextFeature, - "Fetch the next available feature from this layer.") + "Fetch the next available feature from this layer") .method("resetReading", &GDALVector::resetReading, - "Reset feature reading to start on the first feature.") + "Reset feature reading to start on the first feature") .method("layerIntersection", &GDALVector::layerIntersection, - "Intersection of this layer with a method layer.") + "Intersection of this layer with a method layer") .method("layerUnion", &GDALVector::layerUnion, - "Union of this layer with a method layer.") + "Union of this layer with a method layer") .method("layerSymDifference", &GDALVector::layerSymDifference, - "Symmetrical difference of this layer and a method layer.") + "Symmetrical difference of this layer and a method layer") .method("layerIdentity", &GDALVector::layerIdentity, - "Identify features of this layer with the ones from the method layer.") + "Identify features of this layer with the ones from the method layer") .method("layerUpdate", &GDALVector::layerUpdate, - "Update this layer with features from the method layer.") + "Update this layer with features from the method layer") .method("layerClip", &GDALVector::layerClip, - "Clip off areas that are not covered by the method layer.") + "Clip off areas that are not covered by the method layer") .method("layerErase", &GDALVector::layerErase, - "Remove areas that are covered by the method layer.") + "Remove areas that are covered by the method layer") .method("close", &GDALVector::close, - "Release the dataset for proper cleanup.") + "Release the dataset for proper cleanup") ; } diff --git a/src/rcpp_util.h b/src/rcpp_util.h index 17b4fd0f8..3354ddadc 100644 --- a/src/rcpp_util.h +++ b/src/rcpp_util.h @@ -5,6 +5,7 @@ #define rcpp_util_H #include +#include Rcpp::NumericMatrix _df_to_matrix(Rcpp::DataFrame df); Rcpp::IntegerMatrix _df_to_int_matrix(Rcpp::DataFrame df); From 80ea6e41fcf7033475a7e5ef85c5a64e27071f46 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sat, 6 Apr 2024 01:33:14 -0600 Subject: [PATCH 24/53] update gdalvector-draft --- vignettes/articles/gdalvector-draft.Rmd | 158 +++++++++++++----------- 1 file changed, 87 insertions(+), 71 deletions(-) diff --git a/vignettes/articles/gdalvector-draft.Rmd b/vignettes/articles/gdalvector-draft.Rmd index 62f08cfdd..d93a52152 100644 --- a/vignettes/articles/gdalvector-draft.Rmd +++ b/vignettes/articles/gdalvector-draft.Rmd @@ -11,7 +11,7 @@ knitr::opts_chunk$set( Chris Toney (chris.toney at usda.gov) -Last modified: 2024-03-10 +Last modified: 2024-04-06 Comment/discussion: @@ -45,7 +45,7 @@ An attribute field definition is a list with named elements: ``` $type : OGR Field Type ("OFTString", "OFTInteger", ...) -$subtype : optional ("OFSTBoolean", ...) +$subtype : optional ("OFSTBoolean", ...) $width : optional max number of characters $precision : optional number of digits after the decimal point $is_nullable: optional NOT NULL constraint (logical scalar) @@ -66,7 +66,7 @@ $is_ignored : whether ignored when retrieving features (logical scalar) $is_geom : TRUE for geometry fields ``` -An OGR Feature – as read by `GDALVector::getNextFeature()` or `GDALVector::getFeature()`, or as passed to write methods – is a list with the unique feature identifier (FID), attribute and geometry field names, and their values. `GDALVector::getFeatureSet()` will return a set of features as a data frame. (Note that FID is an `int64` type in GDAL. Handling of `int64` TBD, currently as R `double` type). +An OGR Feature – as read by `GDALVector::getNextFeature()` or `GDALVector::getFeature()`, or as passed to write methods – is a list with the unique feature identifier (FID), attribute and geometry field names, and their values. `GDALVector::getFeatureSet()` will return a set of features as a data frame. An OGR Geometry is represented in R as a character string containing OGC Well Known Text (WKT). Likewise, an OGR Spatial Reference, which encapsulates the definition of a projection and datum, is represented in R as WKT. **gdalraster** has existing functions for working with spatial reference systems as WKT (`srs_to_wkt()`, `srs_is_projected()`, etc.), and a set of geometry convenience functions also operating on WKT (GEOS via GDAL headers). @@ -80,7 +80,7 @@ The header file can be referenced for the public class methods that have been im -This does not include definitions of several stand-alone functions that will provide schema management. The existing definitions in `src/ogr_util.h` are a starting point for those. An **Rcpp** `.finalizer` function will be implemented to properly release resources in case an object of class `GDALVector` is garbage collected without an explicit call to `close()`. Also note that `GIntBig` is currently a placeholder type in the class definition. It is the type currently defined in the OGR API. For now it would be implemented as R numeric (i.e., `double`), with `int64` handling TBD. +This does not include definitions of several stand-alone functions that will provide schema management. The existing definitions in `src/ogr_util.h` are a starting point for those. An **Rcpp** `.finalizer` function will be implemented to properly release resources in case an object of class `GDALVector` is garbage collected without an explicit call to `close()`. Support for 64-bit integers requires **bit64** on the R side, with conversions in C++ using **RcppInt64**. ```cpp class GDALVector { @@ -94,7 +94,7 @@ class GDALVector { GDALAccess eAccess; OGRLayerH hLayer; OGRFeatureDefnH hFDefn; - + public: GDALVector(); GDALVector(Rcpp::CharacterVector dsn); @@ -118,30 +118,30 @@ class GDALVector { std::string getSpatialRef() const; Rcpp::NumericVector bbox(); Rcpp::List getLayerDefn() const; - + void setAttributeFilter(std::string query); void setSpatialFilterRect(Rcpp::NumericVector bbox); void setSpatialFilter(std::string wkt); std::string getSpatialFilter() const; void clearSpatialFilter(); - + double getFeatureCount(); SEXP getNextFeature(); void resetReading(); // int64 handling TBD - SEXP getFeature(GIntBig fid); - void setNextByIndex(GIntBig fid); + SEXP getFeature(int64_t fid); + void setNextByIndex(int64_t fid); void setIgnoredFields(Rcpp::CharacterVector fields); - + Rcpp::DataFrame getFeatureSet(Rcpp::CharacterVector fields, bool geom_column); - - // int64 handling TBD - GIntBig createFeature(Rcpp::List feat); - GIntBig setFeature(Rcpp::List feat); - GIntBig upsertFeature(Rcpp::List feat); - GIntBig deleteFeature(GIntBig fid); - + + // returning the FID of the affected feature if successful + int64_t createFeature(Rcpp::List feat); + int64_t setFeature(Rcpp::List feat); + int64_t upsertFeature(Rcpp::List feat); + int64_t deleteFeature(int64_t fid); + bool startTransaction(bool force); bool commitTransaction(); bool rollbackTransaction(); @@ -183,7 +183,7 @@ class GDALVector { Rcpp::Nullable options); void close(); - + // methods for internal use not exported to R void _checkAccess(GDALAccess access_needed) const; OGRLayerH _getOGRLayerH(); @@ -195,9 +195,10 @@ RCPP_EXPOSED_CLASS(GDALVector) ## Example: usage for class GDALVector ``` r +## usage for GDALVector class library(gdalraster) -#> GDAL 3.8.3, released 2024/01/04, PROJ 9.3.1 +#> GDAL 3.8.4, released 2024/02/08, GEOS 3.12.1, PROJ 9.3.1 # MTBS fires in Yellowstone National Park 1984-2022 f <- system.file("extdata/ynp_fires_1984_2022.gpkg", package="gdalraster") @@ -207,6 +208,20 @@ vsi_copy_file(f, dsn) lyr <- new(GDALVector, dsn, "mtbs_perims") +# object of class GDALVector +lyr +#> C++ object <0x58a6a88edc80> of class 'GDALVector' <0x58a6a2fce150> +str(lyr) +#> Reference class 'Rcpp_GDALVector' [package "gdalraster"] with 0 fields +#> list() +#> and 44 methods, of which 30 are possibly relevant: +#> bbox, clearSpatialFilter, close, finalize, getDriverLongName, +#> getDriverShortName, getDsn, getFeatureCount, getFIDColumn, getFileList, +#> getGeometryColumn, getGeomType, getLayerDefn, getName, getNextFeature, +#> getSpatialRef, initialize, isOpen, layerClip, layerErase, layerIdentity, +#> layerIntersection, layerSymDifference, layerUnion, layerUpdate, open, +#> resetReading, setAttributeFilter, setSpatialFilterRect, testCapability + # dataset info lyr$getDriverShortName() #> [1] "GPKG" @@ -257,7 +272,7 @@ lyr$testCapability("RandomWrite") # feature class definition - a list of fields and their definitions defn <- lyr$getLayerDefn() names(defn) -#> [1] "event_id" "incid_name" "incid_type" "map_id" "burn_bnd_ac" +#> [1] "event_id" "incid_name" "incid_type" "map_id" "burn_bnd_ac" #> [6] "burn_bnd_lat" "burn_bnd_lon" "ig_date" "ig_year" "geom" # each list element holds a field definition list @@ -359,12 +374,12 @@ feat <- lyr$getNextFeature() # a list of field names and their values str(feat) #> List of 11 -#> $ FID : num 1 +#> $ FID :integer64 1 #> $ event_id : chr "WY4413411069519870807" #> $ incid_name : chr "POLECAT" #> $ incid_type : chr "Wildfire" -#> $ map_id : num 1e+07 -#> $ burn_bnd_ac : num 1093 +#> $ map_id :integer64 10015934 +#> $ burn_bnd_ac :integer64 1093 #> $ burn_bnd_lat: chr "44.132" #> $ burn_bnd_lon: chr "-110.696" #> $ ig_date : chr "1987/08/07" @@ -379,12 +394,12 @@ lyr$getFeatureCount() feat <- lyr$getNextFeature() str(feat) #> List of 11 -#> $ FID : num 61 +#> $ FID :integer64 61 #> $ event_id : chr "WY4438911082120200822" #> $ incid_name : chr "LONE STAR" #> $ incid_type : chr "Wildfire" -#> $ map_id : num 1e+07 -#> $ burn_bnd_ac : num 3348 +#> $ map_id :integer64 10020495 +#> $ burn_bnd_ac :integer64 3348 #> $ burn_bnd_lat: chr "44.4" #> $ burn_bnd_lon: chr "-110.782" #> $ ig_date : chr "2020/08/22" @@ -401,12 +416,12 @@ lyr$resetReading() feat <- lyr$getNextFeature() str(feat) #> List of 11 -#> $ FID : num 61 +#> $ FID :integer64 61 #> $ event_id : chr "WY4438911082120200822" #> $ incid_name : chr "LONE STAR" #> $ incid_type : chr "Wildfire" -#> $ map_id : num 1e+07 -#> $ burn_bnd_ac : num 3348 +#> $ map_id :integer64 10020495 +#> $ burn_bnd_ac :integer64 3348 #> $ burn_bnd_lat: chr "44.4" #> $ burn_bnd_lon: chr "-110.782" #> $ ig_date : chr "2020/08/22" @@ -424,12 +439,12 @@ lyr$setAttributeFilter("ig_year = 1988 ORDER BY burn_bnd_ac DESC") feat <- lyr$getNextFeature() str(feat) #> List of 11 -#> $ FID : num 7 +#> $ FID :integer64 7 #> $ event_id : chr "WY4470811082119880722" #> $ incid_name : chr "NORTH FORK" #> $ incid_type : chr "Wildfire" -#> $ map_id : num 1e+07 -#> $ burn_bnd_ac : num 563527 +#> $ map_id :integer64 10014217 +#> $ burn_bnd_ac :integer64 563527 #> $ burn_bnd_lat: chr "44.678" #> $ burn_bnd_lon: chr "-110.716" #> $ ig_date : chr "1988/07/22" @@ -457,34 +472,33 @@ lyr$close() vsi_unlink(dsn) ``` +Created on 2024-04-06 with [reprex v2.1.0](https://reprex.tidyverse.org) + ## Example: layer geoprocessing ``` r - ## layer intersection example library(gdalraster) -#> GDAL 3.8.3, released 2024/01/04, PROJ 9.3.1 +#> GDAL 3.8.4, released 2024/02/08, GEOS 3.12.1, PROJ 9.3.1 # MTBS fires in Yellowstone National Park 1984-2022 dsn <- system.file("extdata/ynp_fires_1984_2022.gpkg", package="gdalraster") lyr <- new(GDALVector, dsn, "mtbs_perims") # largest 1988 fire (FID from the example above) -# Note that retrieving a feature by FID would normally be done with -# lyr$getFeature(fid), but that method is not exposed in the prototype yet. lyr$setAttributeFilter("FID = 7") lyr$getFeatureCount() #> [1] 1 feat <- lyr$getNextFeature() str(feat) #> List of 11 -#> $ FID : num 7 +#> $ FID :integer64 7 #> $ event_id : chr "WY4470811082119880722" #> $ incid_name : chr "NORTH FORK" #> $ incid_type : chr "Wildfire" -#> $ map_id : num 1e+07 -#> $ burn_bnd_ac : num 563527 +#> $ map_id :integer64 10014217 +#> $ burn_bnd_ac :integer64 563527 #> $ burn_bnd_lat: chr "44.678" #> $ burn_bnd_lon: chr "-110.716" #> $ ig_date : chr "1988/07/22" @@ -522,12 +536,12 @@ lyr_out$getFeatureCount() defn <- lyr_out$getLayerDefn() # combined attributes names(defn) -#> [1] "input_event_id" "input_incid_name" "input_incid_type" -#> [4] "input_map_id" "input_burn_bnd_ac" "input_burn_bnd_lat" -#> [7] "input_burn_bnd_lon" "input_ig_date" "input_ig_year" -#> [10] "method_event_id" "method_incid_name" "method_incid_type" +#> [1] "input_event_id" "input_incid_name" "input_incid_type" +#> [4] "input_map_id" "input_burn_bnd_ac" "input_burn_bnd_lat" +#> [7] "input_burn_bnd_lon" "input_ig_date" "input_ig_year" +#> [10] "method_event_id" "method_incid_name" "method_incid_type" #> [13] "method_map_id" "method_burn_bnd_ac" "method_burn_bnd_lat" -#> [16] "method_burn_bnd_lon" "method_ig_date" "method_ig_year" +#> [16] "method_burn_bnd_lon" "method_ig_date" "method_ig_year" #> [19] "geom" # we don't have vector plotting yet, so rasterize and use plot_raster() @@ -563,6 +577,8 @@ vsi_unlink(dsn_out) vsi_unlink(out_file) ``` +Created on 2024-04-06 with [reprex v2.1.0](https://reprex.tidyverse.org) + ## Example: WorldCover Sentinel-2 composites ``` r @@ -571,7 +587,7 @@ vsi_unlink(out_file) ## tile index lookup on remote filesystem and obtain raster data library(gdalraster) -#> GDAL 3.8.3, released 2024/01/04, PROJ 9.3.1 +#> GDAL 3.8.4, released 2024/02/08, GEOS 3.12.1, PROJ 9.3.1 fgb = "/vsicurl/https://esa-worldcover.s3.eu-central-1.amazonaws.com/esa_worldcover_grid_composites.fgb" lyr_tiles <- new(GDALVector, fgb) @@ -602,8 +618,8 @@ lyr_tiles$bbox() defn <- lyr_tiles$getLayerDefn() names(defn) #> [1] "tile" "s1_vvvhratio_2020" "s1_vvvhratio_2021" -#> [4] "s2_rgbnir_2020" "s2_rgbnir_2021" "s2_ndvi_2020" -#> [7] "s2_ndvi_2021" "s2_swir_2020" "s2_swir_2021" +#> [4] "s2_rgbnir_2020" "s2_rgbnir_2021" "s2_ndvi_2020" +#> [7] "s2_ndvi_2021" "s2_swir_2020" "s2_swir_2021" #> [10] "" # AOI for the Fishhawk fire @@ -616,12 +632,12 @@ lyr_ynp$getFeatureCount() feat <- lyr_ynp$getNextFeature() str(feat) #> List of 11 -#> $ FID : num 60 +#> $ FID :integer64 60 #> $ event_id : chr "WY4437710988020190902" #> $ incid_name : chr "FISHHAWK" #> $ incid_type : chr "Wildfire" -#> $ map_id : num 1e+07 -#> $ burn_bnd_ac : num 10775 +#> $ map_id :integer64 10016957 +#> $ burn_bnd_ac :integer64 10775 #> $ burn_bnd_lat: chr "44.384" #> $ burn_bnd_lon: chr "-109.85" #> $ ig_date : chr "2019/09/02" @@ -641,7 +657,7 @@ lyr_tiles$getFeatureCount() feat <- lyr_tiles$getNextFeature() str(feat) #> List of 11 -#> $ FID : num 16615 +#> $ FID :integer64 16615 #> $ tile : chr "N44W110" #> $ s1_vvvhratio_2020: chr "s3://esa-worldcover-s1/vvvhratio/2020/N44/ESA_WorldCover_10m_2020_v100_N44W110_S1VVVHratio.tif" #> $ s1_vvvhratio_2021: chr "s3://esa-worldcover-s1/vvvhratio/2021/N44/ESA_WorldCover_10m_2021_v200_N44W110_S1VVVHratio.tif" @@ -676,24 +692,24 @@ ds <- new(GDALRaster, tif_file) ds$dim() #> [1] 12000 12000 4 ds$getMetadata(band=0, domain="") -#> [1] "algorithm_version=V2.0.0" -#> [2] "AREA_OR_POINT=Area" -#> [3] "bands=Band 1: B04 (Red), Band 2: B03 (Green), Band 3: B02 (Blue), Band 4: B08 (Infrared)" -#> [4] "copyright=ESA WorldCover project 2021 / Contains modified Copernicus Sentinel data (2021) processed by ESA WorldCover consortium" -#> [5] "creation_time=2022-12-09 17:08:25.881589" +#> [1] "algorithm_version=V2.0.0" +#> [2] "AREA_OR_POINT=Area" +#> [3] "bands=Band 1: B04 (Red), Band 2: B03 (Green), Band 3: B02 (Blue), Band 4: B08 (Infrared)" +#> [4] "copyright=ESA WorldCover project 2021 / Contains modified Copernicus Sentinel data (2021) processed by ESA WorldCover consortium" +#> [5] "creation_time=2022-12-09 17:08:25.881589" #> [6] "description=The ESA WorldCover Sentinel-2 median L2A (RGBNIR) composite is a color image made from images in Blue (B02), Green (B03), Red (B04) and Infrared (B08). " -#> [7] "license=CC-BY 4.0 - https://creativecommons.org/licenses/by/4.0/" -#> [8] "product_crs=EPSG:4326" -#> [9] "product_grid=1x1 degree tiling grid" -#> [10] "product_tile=N44W110" -#> [11] "product_type=Sentinel-2 median L2A (RGBNIR) composite" -#> [12] "product_version=V2.0.0" -#> [13] "reference=https://esa-worldcover.org" -#> [14] "time_end=2021-12-31T23:59:59Z" -#> [15] "time_start=2021-01-01T00:00:00Z" +#> [7] "license=CC-BY 4.0 - https://creativecommons.org/licenses/by/4.0/" +#> [8] "product_crs=EPSG:4326" +#> [9] "product_grid=1x1 degree tiling grid" +#> [10] "product_tile=N44W110" +#> [11] "product_type=Sentinel-2 median L2A (RGBNIR) composite" +#> [12] "product_version=V2.0.0" +#> [13] "reference=https://esa-worldcover.org" +#> [14] "time_end=2021-12-31T23:59:59Z" +#> [15] "time_start=2021-01-01T00:00:00Z" #> [16] "title=ESA WorldCover Sentinel-2 median L2A (RGBNIR) composite at 10m resolution for year 2021" ds$getMetadata(band=0, domain="IMAGE_STRUCTURE") -#> [1] "COMPRESSION=DEFLATE" "INTERLEAVE=PIXEL" "LAYOUT=COG" +#> [1] "COMPRESSION=DEFLATE" "INTERLEAVE=PIXEL" "LAYOUT=COG" #> [4] "PREDICTOR=2" r <- read_ds(ds, bands=c(4,1,2), out_xsize = 800, out_ysize = 800) @@ -706,20 +722,18 @@ plot_raster(r, main = txt) ``` -![](img/worldcov2021_N44W110_S2RGBNIR_ex.png) - -``` r ds$close() lyr_ynp$close() lyr_tiles$close() - ``` +Created on 2024-04-06 with [reprex v2.1.0](https://reprex.tidyverse.org) + ## Further consideration / TBD This is a working list of potential issues and design questions that need further consideration, to be determined: -* handling of 64-bit integer: OGR FID currently is `GIntBig` (`int64_t`) and integer fields in vector data sources will commonly be `OFTInteger64`. These would likely be handled using **RcppInt64**, with **bit64** on the R side providing S3 class `integer64`. +* handling of 64-bit integer: OGR FID currently is `GIntBig` and integer fields in vector data sources will commonly be `OFTInteger64`. These are now handled using **RcppInt64**, with **bit64** on the R side providing S3 class `integer64`. * OGR's Arrow C interface: Implement `GDALVector::getArrowStream()` (GDAL >= 3.6) and `GDALVector::writeArrowBatch()` (GDAL >= 3.8), supported on the R side with package **nanoarrow**. * potential output vectors of GEOS or OGR pointers, WKB with support by **wk** * OGR layer geoprocessing might be stand-alone functions instead of class methods in `GDALVector`. We would have more flexibility in terms of optional/argument defaults. Either way, we should add an option to create the output layer. @@ -742,6 +756,8 @@ This is a working list of potential issues and design questions that need furthe * add link to issue 241 for discussion thread (2024-03-05) * OGR layer geoprocessing may be stand-alone functions instead of class methods TBD (2024-03-10) * add link to the header file to reference the class methods that have been implemented so far in the prototype (2024-03-10) +* `ogr2ogr()` and `ogrinfo()` are available in **gdalraster** 1.10.0 on CRAN (2024-03-26) +* initial int64 support; now linking to **RcppInt64**, and importing **bit64**; `FID` and `OFTInteger64` fields are now returned in R as `integer64`; updated the examples (2024-04-06) ## Contributors From 622b7f5419a74b0a4e78bbaa45590384a6a12561 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sat, 6 Apr 2024 10:46:13 -0600 Subject: [PATCH 25/53] add GDALVector::getFeature(): feature from its FID --- src/gdalvector.cpp | 148 +++-- src/gdalvector.h | 6 +- vignettes/articles/gdalvector-draft.Rmd | 766 ------------------------ 3 files changed, 94 insertions(+), 826 deletions(-) delete mode 100644 vignettes/articles/gdalvector-draft.Rmd diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 3a45302c9..2dedbaab2 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -1,8 +1,10 @@ /* Implementation of class GDALVector Encapsulates one OGRLayer and its GDALDataset - Requires bit64 on the R side for the integer64 S3 class + Requires bit64 on the R side for its integer64 S3 type Chris Toney */ +#include + #include "gdal.h" #include "cpl_port.h" #include "cpl_string.h" @@ -380,67 +382,35 @@ SEXP GDALVector::getNextFeature() { _checkAccess(GA_ReadOnly); OGRFeatureH hFeature = OGR_L_GetNextFeature(hLayer); + if (hFeature != nullptr) + return _featureToList(hFeature); + else + return R_NilValue; +} - if (hFeature != nullptr) { - Rcpp::List list_out = Rcpp::List::create(); - int i; - - int64_t FID = static_cast(OGR_F_GetFID(hFeature)); - list_out.push_back(Rcpp::toInteger64(FID), "FID"); - - for (i = 0; i < OGR_FD_GetFieldCount(hFDefn); ++i) { - OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); - if (hFieldDefn == nullptr) - Rcpp::stop("could not obtain field definition"); - - if (!OGR_F_IsFieldSet(hFeature, i) || - OGR_F_IsFieldNull(hFeature, i)) { - continue; - } - - OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); - if (fld_type == OFTInteger) { - int value = OGR_F_GetFieldAsInteger(hFeature, i); - list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); - } - else if (fld_type == OFTInteger64) { - int64_t value = static_cast( - OGR_F_GetFieldAsInteger64(hFeature, i)); - list_out.push_back(Rcpp::toInteger64(value), - OGR_Fld_GetNameRef(hFieldDefn)); - } - else if (fld_type == OFTReal) { - double value = OGR_F_GetFieldAsDouble(hFeature, i); - list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); - } - else { - // TODO: support date, time, binary, etc. - // read as string for now - std::string value = OGR_F_GetFieldAsString(hFeature, i); - list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); - } - } +SEXP GDALVector::getFeature(Rcpp::NumericVector fid) { + // fid must be an R numeric vector of length 1 + // i.e., a scalar but use NumericVector here since it can carry the class + // attribute for integer64 + _checkAccess(GA_ReadOnly); - for (i = 0; i < OGR_F_GetGeomFieldCount(hFeature); ++i) { - OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeature, i); - if (hGeom == nullptr) - Rcpp::stop("could not obtain geometry reference"); - char* pszWKT; - OGR_G_ExportToWkt(hGeom, &pszWKT); - std::string wkt(pszWKT); - OGRGeomFieldDefnH hGeomFldDefn = - OGR_F_GetGeomFieldDefnRef(hFeature, i); - if (hGeomFldDefn == nullptr) - Rcpp::stop("could not obtain geometry field def"); - list_out.push_back(wkt, OGR_GFld_GetNameRef(hGeomFldDefn)); - CPLFree(pszWKT); - } + if (fid.size() != 1) + Rcpp::stop("'fid' must be a length-1 numeric vector (integer64)"); - return list_out; - } - else { + int64_t fid_in; + + if (Rcpp::isInteger64(fid)) + fid_in = Rcpp::fromInteger64(fid[0]); + else + fid_in = static_cast(fid[0]); + + OGRFeatureH hFeature = OGR_L_GetFeature(hLayer, + static_cast(fid_in)); + + if (hFeature != nullptr) + return _featureToList(hFeature); + else return R_NilValue; - } } void GDALVector::resetReading() { @@ -674,12 +644,70 @@ void GDALVector::_checkAccess(GDALAccess access_needed) const { Rcpp::stop("dataset is read-only"); } -OGRLayerH GDALVector::_getOGRLayerH() { +OGRLayerH GDALVector::_getOGRLayerH() const { _checkAccess(GA_ReadOnly); return hLayer; } +Rcpp::List GDALVector::_featureToList(OGRFeatureH hFeature) const { + Rcpp::List list_out = Rcpp::List::create(); + int i; + + int64_t FID = static_cast(OGR_F_GetFID(hFeature)); + list_out.push_back(Rcpp::toInteger64(FID), "FID"); + + for (i = 0; i < OGR_FD_GetFieldCount(hFDefn); ++i) { + OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); + if (hFieldDefn == nullptr) + Rcpp::stop("could not obtain field definition"); + + if (!OGR_F_IsFieldSet(hFeature, i) || + OGR_F_IsFieldNull(hFeature, i)) { + continue; + } + + OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); + if (fld_type == OFTInteger) { + int value = OGR_F_GetFieldAsInteger(hFeature, i); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + } + else if (fld_type == OFTInteger64) { + int64_t value = static_cast( + OGR_F_GetFieldAsInteger64(hFeature, i)); + list_out.push_back(Rcpp::toInteger64(value), + OGR_Fld_GetNameRef(hFieldDefn)); + } + else if (fld_type == OFTReal) { + double value = OGR_F_GetFieldAsDouble(hFeature, i); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + } + else { + // TODO: support date, time, binary, etc. + // read as string for now + std::string value = OGR_F_GetFieldAsString(hFeature, i); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); + } + } + + for (i = 0; i < OGR_F_GetGeomFieldCount(hFeature); ++i) { + OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeature, i); + if (hGeom == nullptr) + Rcpp::stop("could not obtain geometry reference"); + char* pszWKT; + OGR_G_ExportToWkt(hGeom, &pszWKT); + std::string wkt(pszWKT); + OGRGeomFieldDefnH hGeomFldDefn = + OGR_F_GetGeomFieldDefnRef(hFeature, i); + if (hGeomFldDefn == nullptr) + Rcpp::stop("could not obtain geometry field def"); + list_out.push_back(wkt, OGR_GFld_GetNameRef(hGeomFldDefn)); + CPLFree(pszWKT); + } + + return list_out; +} + // **************************************************************************** RCPP_MODULE(mod_GDALVector) { @@ -736,6 +764,8 @@ RCPP_MODULE(mod_GDALVector) { "Fetch the feature count in this layer") .method("getNextFeature", &GDALVector::getNextFeature, "Fetch the next available feature from this layer") + .method("getFeature", &GDALVector::getFeature, + "Fetch a feature by its identifier") .method("resetReading", &GDALVector::resetReading, "Reset feature reading to start on the first feature") .method("layerIntersection", &GDALVector::layerIntersection, diff --git a/src/gdalvector.h b/src/gdalvector.h index 1551cbc57..bffaafccd 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -60,6 +60,9 @@ class GDALVector { double getFeatureCount(); SEXP getNextFeature(); + // fid must be a length-1 numeric vector, since numeric vector can carry + // the class attribute for integer64: + SEXP getFeature(Rcpp::NumericVector fid); void resetReading(); void layerIntersection( @@ -102,7 +105,8 @@ class GDALVector { // methods for internal use not exported to R void _checkAccess(GDALAccess access_needed) const; - OGRLayerH _getOGRLayerH(); + OGRLayerH _getOGRLayerH() const; + Rcpp::List _featureToList(OGRFeatureH hFeature) const; }; RCPP_EXPOSED_CLASS(GDALVector) diff --git a/vignettes/articles/gdalvector-draft.Rmd b/vignettes/articles/gdalvector-draft.Rmd deleted file mode 100644 index d93a52152..000000000 --- a/vignettes/articles/gdalvector-draft.Rmd +++ /dev/null @@ -1,766 +0,0 @@ ---- -title: "Draft Bindings to the GDAL/OGR Vector API" ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -Chris Toney (chris.toney at usda.gov) - -Last modified: 2024-04-06 - -Comment/discussion: - -## Summary - -This document describes R bindings to the GDAL/OGR Vector API proposed for inclusion in package **gdalraster**, analogous to its existing raster support. A package providing low-level access to both the raster and vector APIs in GDAL should be of interest to developers creating higher level interfaces. For example, custom workflows that are I/O intensive may benefit from direct access to GDAL's I/O capabilities. R bindings to the vector API would support persistent connections to the data store, cursors with attribute and spatial filtering, transactions, feature-level insert/delete, update of attributes and geometries, and OGR facilities for geoprocessing. Calling signatures of a class-based interface will resemble the C++ and Python APIs provided by the GDAL project. It is intended that bindings in **gdalraster** should provide long-term API stability while tracking changes in GDAL. - -A proposed interface is described in terms of the [GDAL Vector Data Model](https://gdal.org/user/vector_data_model.html), along with a draft class definition for implementation via `RCPP_EXPOSED_CLASS`. A prototype of the bindings is in the `gdalvector` branch at . The prototype bindings are currently undocumented. Code examples are included here to demonstrate usage and proof-of-concept. Installation of the development version of **gdalraster** containing the prototype vector bindings could be done with: - -```{r, eval=FALSE} -remotes::install_github("USDAForestService/gdalraster", ref = "gdalvector") -``` - -## Description of the interface - -Bindings will be implemented with **Rcpp** modules including `RCPP_EXPOSED_CLASS`. Exposing C++ classes directly in R provides a natural interface to the underlying object model. - -A GDAL Dataset for vector is a file or database containing one or more OGR layers. A vector dataset is represented in R as a data source name (DSN), a character string that may be a filename, database connection string, URL, virtual file, etc. Management of datasets and their vector schemas will be done with: - -* existing management functions in **gdalraster** that operate on vector datasets: `copyDatasetFiles()`, `deleteDataset()`, `renameDataset()` and `addFilesInZip()` (supports SOZip) -* existing internal utility functions to be further developed (potentially renamed/refactored): `.ogr_ds_exists()`, `.create_ogr()`, `.ogr_ds_layer_count()`, `.ogr_layer_exists()`, `.ogr_layer_create()`, `.ogr_layer_delete()` `.ogr_field_index()`, `.ogr_field_create()` -* existing wrappers `ogr2ogr()` and `ogrinfo()` from the gdal_utils.h API (**gdalraster** 1.9.0.9080 dev) -* add `ogr_execute_sql()`: execute an SQL statement against the data store for `CREATE INDEX`, `DROP INDEX`, `ALTER TABLE`, `DROP TABLE` (a SQL `SELECT` statement can be used in the constructor for class `GDALVector` described below, to open a layer of features) -* other stand-alone functions TBD - -OGR Layer class represents a layer of features within a data source. It will be modeled in R as class `GDALVector`, an exposed C++ class encapsulating an OGR Layer and the GDAL Dataset that owns it. A `GDALVector` object will persist an open connection to the dataset and expose methods for retrieving layer information, setting attribute and spatial filters, reading/writing features, and layer geoprocessing. A draft definition for class `GDALVector` is given below. - -All features in an OGR Layer share a common schema (feature class) modeled in GDAL as OGR Feature Definition. A feature class definition includes the set of attribute fields and their data types, the geometry field(s), and a feature class name (normally used as a layer name). The feature class definition is represented as a list in R, having as names the attribute/geometry field names, with each list element holding a field definition. - -An attribute field definition is a list with named elements: - -``` -$type : OGR Field Type ("OFTString", "OFTInteger", ...) -$subtype : optional ("OFSTBoolean", ...) -$width : optional max number of characters -$precision : optional number of digits after the decimal point -$is_nullable: optional NOT NULL constraint (logical scalar) -$is_unique : optional UNIQUE constraint (logical scalar) -$default : optional field default value as character string -$is_ignored : whether ignored when retrieving features (logical scalar) -$domain : optional domain name -$is_geom : FALSE for attribute fields -``` - -A geometry field definition is a list with named elements: - -``` -$type : geom type ("Point", "Polygon", etc.) -$srs : optional spatial reference as WKT string -$is_nullable: optional NOT NULL constraint (logical scalar) -$is_ignored : whether ignored when retrieving features (logical scalar) -$is_geom : TRUE for geometry fields -``` - -An OGR Feature – as read by `GDALVector::getNextFeature()` or `GDALVector::getFeature()`, or as passed to write methods – is a list with the unique feature identifier (FID), attribute and geometry field names, and their values. `GDALVector::getFeatureSet()` will return a set of features as a data frame. - -An OGR Geometry is represented in R as a character string containing OGC Well Known Text (WKT). Likewise, an OGR Spatial Reference, which encapsulates the definition of a projection and datum, is represented in R as WKT. **gdalraster** has existing functions for working with spatial reference systems as WKT (`srs_to_wkt()`, `srs_is_projected()`, etc.), and a set of geometry convenience functions also operating on WKT (GEOS via GDAL headers). - -## class GDALVector - -The draft class definition below has been partially implemented in: - - - -The header file can be referenced for the public class methods that have been implemented so far in the prototype (a subset of the draft class definition below): - - - -This does not include definitions of several stand-alone functions that will provide schema management. The existing definitions in `src/ogr_util.h` are a starting point for those. An **Rcpp** `.finalizer` function will be implemented to properly release resources in case an object of class `GDALVector` is garbage collected without an explicit call to `close()`. Support for 64-bit integers requires **bit64** on the R side, with conversions in C++ using **RcppInt64**. - -```cpp -class GDALVector { - - private: - std::string dsn_in; - std::string layer_in; // layer name or sql statement - bool is_sql_in; - Rcpp::CharacterVector open_options_in; - GDALDatasetH hDataset; - GDALAccess eAccess; - OGRLayerH hLayer; - OGRFeatureDefnH hFDefn; - - public: - GDALVector(); - GDALVector(Rcpp::CharacterVector dsn); - GDALVector(Rcpp::CharacterVector dsn, std::string layer); - GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only); - GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, - Rcpp::CharacterVector open_options); - - void open(bool read_only); - bool isOpen() const; - std::string getDsn() const; - Rcpp::CharacterVector getFileList() const; - std::string getDriverShortName() const; - std::string getDriverLongName() const; - - std::string getName() const; - bool testCapability(std::string capability) const; - std::string getFIDColumn() const; - std::string getGeomType() const; - std::string getGeometryColumn() const; - std::string getSpatialRef() const; - Rcpp::NumericVector bbox(); - Rcpp::List getLayerDefn() const; - - void setAttributeFilter(std::string query); - void setSpatialFilterRect(Rcpp::NumericVector bbox); - void setSpatialFilter(std::string wkt); - std::string getSpatialFilter() const; - void clearSpatialFilter(); - - double getFeatureCount(); - SEXP getNextFeature(); - void resetReading(); - // int64 handling TBD - SEXP getFeature(int64_t fid); - void setNextByIndex(int64_t fid); - void setIgnoredFields(Rcpp::CharacterVector fields); - - Rcpp::DataFrame getFeatureSet(Rcpp::CharacterVector fields, - bool geom_column); - - // returning the FID of the affected feature if successful - int64_t createFeature(Rcpp::List feat); - int64_t setFeature(Rcpp::List feat); - int64_t upsertFeature(Rcpp::List feat); - int64_t deleteFeature(int64_t fid); - - bool startTransaction(bool force); - bool commitTransaction(); - bool rollbackTransaction(); - - void layerIntersection( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerUnion( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerSymDifference( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerIdentity( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerUpdate( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerClip( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - void layerErase( - GDALVector method_layer, - GDALVector result_layer, - bool quiet, - Rcpp::Nullable options); - - void close(); - - // methods for internal use not exported to R - void _checkAccess(GDALAccess access_needed) const; - OGRLayerH _getOGRLayerH(); -}; - -RCPP_EXPOSED_CLASS(GDALVector) -``` - -## Example: usage for class GDALVector - -``` r -## usage for GDALVector class - -library(gdalraster) -#> GDAL 3.8.4, released 2024/02/08, GEOS 3.12.1, PROJ 9.3.1 - -# MTBS fires in Yellowstone National Park 1984-2022 -f <- system.file("extdata/ynp_fires_1984_2022.gpkg", package="gdalraster") -# copy to a temporary in-memory file that is writeable -dsn <- paste0("/vsimem/", basename(f)) -vsi_copy_file(f, dsn) - -lyr <- new(GDALVector, dsn, "mtbs_perims") - -# object of class GDALVector -lyr -#> C++ object <0x58a6a88edc80> of class 'GDALVector' <0x58a6a2fce150> -str(lyr) -#> Reference class 'Rcpp_GDALVector' [package "gdalraster"] with 0 fields -#> list() -#> and 44 methods, of which 30 are possibly relevant: -#> bbox, clearSpatialFilter, close, finalize, getDriverLongName, -#> getDriverShortName, getDsn, getFeatureCount, getFIDColumn, getFileList, -#> getGeometryColumn, getGeomType, getLayerDefn, getName, getNextFeature, -#> getSpatialRef, initialize, isOpen, layerClip, layerErase, layerIdentity, -#> layerIntersection, layerSymDifference, layerUnion, layerUpdate, open, -#> resetReading, setAttributeFilter, setSpatialFilterRect, testCapability - -# dataset info -lyr$getDriverShortName() -#> [1] "GPKG" -lyr$getDriverLongName() -#> [1] "GeoPackage" -lyr$getFileList() -#> [1] "/vsimem/ynp_fires_1984_2022.gpkg" - -# layer info -lyr$getName() -#> [1] "mtbs_perims" -lyr$getGeomType() -#> [1] "Multi Polygon" -lyr$getGeometryColumn() -#> [1] "geom" -lyr$getFIDColumn() -#> [1] "fid" -lyr$getSpatialRef() -#> [1] "PROJCS[\"NAD83 / Montana\",GEOGCS[\"NAD83\",DATUM[\"North_American_Datum_1983\",SPHEROID[\"GRS 1980\",6378137,298.257222101,AUTHORITY[\"EPSG\",\"7019\"]],AUTHORITY[\"EPSG\",\"6269\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AUTHORITY[\"EPSG\",\"4269\"]],PROJECTION[\"Lambert_Conformal_Conic_2SP\"],PARAMETER[\"latitude_of_origin\",44.25],PARAMETER[\"central_meridian\",-109.5],PARAMETER[\"standard_parallel_1\",49],PARAMETER[\"standard_parallel_2\",45],PARAMETER[\"false_easting\",600000],PARAMETER[\"false_northing\",0],UNIT[\"metre\",1,AUTHORITY[\"EPSG\",\"9001\"]],AXIS[\"Easting\",EAST],AXIS[\"Northing\",NORTH],AUTHORITY[\"EPSG\",\"32100\"]]" -lyr$bbox() -#> [1] 469685.73 -12917.76 573531.72 96577.34 - -# layer capabilities -lyr$testCapability("RandomRead") -#> [1] TRUE -lyr$testCapability("FastSpatialFilter") -#> [1] TRUE -lyr$testCapability("FastFeatureCount") -#> [1] TRUE -lyr$testCapability("FastGetExtent") -#> [1] TRUE -lyr$testCapability("Transactions") -#> [1] TRUE - -# the dataset was opened read-only: -lyr$testCapability("SequentialWrite") -#> [1] FALSE -lyr$testCapability("RandomWrite") -#> [1] FALSE - -# re-open with write access -lyr$open(read_only = FALSE) -lyr$testCapability("SequentialWrite") -#> [1] TRUE -lyr$testCapability("RandomWrite") -#> [1] TRUE - -# feature class definition - a list of fields and their definitions -defn <- lyr$getLayerDefn() -names(defn) -#> [1] "event_id" "incid_name" "incid_type" "map_id" "burn_bnd_ac" -#> [6] "burn_bnd_lat" "burn_bnd_lon" "ig_date" "ig_year" "geom" - -# each list element holds a field definition list -str(defn) -#> List of 10 -#> $ event_id :List of 8 -#> ..$ type : chr "OFTString" -#> ..$ width : int 254 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ incid_name :List of 8 -#> ..$ type : chr "OFTString" -#> ..$ width : int 254 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ incid_type :List of 8 -#> ..$ type : chr "OFTString" -#> ..$ width : int 254 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ map_id :List of 8 -#> ..$ type : chr "OFTInteger64" -#> ..$ width : int 0 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ burn_bnd_ac :List of 8 -#> ..$ type : chr "OFTInteger64" -#> ..$ width : int 0 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ burn_bnd_lat:List of 8 -#> ..$ type : chr "OFTString" -#> ..$ width : int 10 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ burn_bnd_lon:List of 8 -#> ..$ type : chr "OFTString" -#> ..$ width : int 10 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ ig_date :List of 8 -#> ..$ type : chr "default (read as OFTString)" -#> ..$ width : int 0 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ ig_year :List of 8 -#> ..$ type : chr "OFTInteger" -#> ..$ width : int 0 -#> ..$ precision : int 0 -#> ..$ is_nullable: logi TRUE -#> ..$ is_unique : logi FALSE -#> ..$ default : chr "" -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi FALSE -#> $ geom :List of 5 -#> ..$ type : chr "Multi Polygon" -#> ..$ srs : chr "PROJCS[\"NAD83 / Montana\",GEOGCS[\"NAD83\",DATUM[\"North_American_Datum_1983\",SPHEROID[\"GRS 1980\",6378137,2"| __truncated__ -#> ..$ is_nullable: logi TRUE -#> ..$ is_ignored : logi FALSE -#> ..$ is_geom : logi TRUE - -lyr$getFeatureCount() -#> [1] 61 - -# cursor -feat <- lyr$getNextFeature() -# a list of field names and their values -str(feat) -#> List of 11 -#> $ FID :integer64 1 -#> $ event_id : chr "WY4413411069519870807" -#> $ incid_name : chr "POLECAT" -#> $ incid_type : chr "Wildfire" -#> $ map_id :integer64 10015934 -#> $ burn_bnd_ac :integer64 1093 -#> $ burn_bnd_lat: chr "44.132" -#> $ burn_bnd_lon: chr "-110.696" -#> $ ig_date : chr "1987/08/07" -#> $ ig_year : int 1987 -#> $ geom : chr "MULTIPOLYGON (((503099.439579653 -12893.9672899192,503169.756694236 -12756.3721247327,502689.845907435 -12131.5"| __truncated__ - -# attribute filter -lyr$setAttributeFilter("ig_year = 2020") -lyr$getFeatureCount() -#> [1] 1 - -feat <- lyr$getNextFeature() -str(feat) -#> List of 11 -#> $ FID :integer64 61 -#> $ event_id : chr "WY4438911082120200822" -#> $ incid_name : chr "LONE STAR" -#> $ incid_type : chr "Wildfire" -#> $ map_id :integer64 10020495 -#> $ burn_bnd_ac :integer64 3348 -#> $ burn_bnd_lat: chr "44.4" -#> $ burn_bnd_lon: chr "-110.782" -#> $ ig_date : chr "2020/08/22" -#> $ ig_year : int 2020 -#> $ geom : chr "MULTIPOLYGON (((496593.122306971 15506.8828590633,496491.761299067 15605.3612548792,496290.812130161 15388.0465"| __truncated__ - -# NULL if no more features are available -feat <- lyr$getNextFeature() -str(feat) -#> NULL - -# reset reading to the start -lyr$resetReading() -feat <- lyr$getNextFeature() -str(feat) -#> List of 11 -#> $ FID :integer64 61 -#> $ event_id : chr "WY4438911082120200822" -#> $ incid_name : chr "LONE STAR" -#> $ incid_type : chr "Wildfire" -#> $ map_id :integer64 10020495 -#> $ burn_bnd_ac :integer64 3348 -#> $ burn_bnd_lat: chr "44.4" -#> $ burn_bnd_lon: chr "-110.782" -#> $ ig_date : chr "2020/08/22" -#> $ ig_year : int 2020 -#> $ geom : chr "MULTIPOLYGON (((496593.122306971 15506.8828590633,496491.761299067 15605.3612548792,496290.812130161 15388.0465"| __truncated__ - -# clear attribute filter -lyr$setAttributeFilter("") -lyr$getFeatureCount() -#> [1] 61 - -# spatial filter -# get the bounding box of the largest 1988 fire -lyr$setAttributeFilter("ig_year = 1988 ORDER BY burn_bnd_ac DESC") -feat <- lyr$getNextFeature() -str(feat) -#> List of 11 -#> $ FID :integer64 7 -#> $ event_id : chr "WY4470811082119880722" -#> $ incid_name : chr "NORTH FORK" -#> $ incid_type : chr "Wildfire" -#> $ map_id :integer64 10014217 -#> $ burn_bnd_ac :integer64 563527 -#> $ burn_bnd_lat: chr "44.678" -#> $ burn_bnd_lon: chr "-110.716" -#> $ ig_date : chr "1988/07/22" -#> $ ig_year : int 1988 -#> $ geom : chr "MULTIPOLYGON (((469685.969312071 29526.2354109807,469918.933844832 29654.3220754602,470030.299119989 29518.7441"| __truncated__ - -bbox <- bbox_from_wkt(feat$geom) -print(bbox) -#> [1] 469685.97 11442.45 544069.63 85508.15 - -# clear the attribute filter -lyr$setAttributeFilter("") -lyr$getFeatureCount() -#> [1] 61 - -lyr$setSpatialFilterRect(bbox) -lyr$getFeatureCount() -#> [1] 40 - -lyr$clearSpatialFilter() -lyr$getFeatureCount() -#> [1] 61 - -lyr$close() -vsi_unlink(dsn) -``` - -Created on 2024-04-06 with [reprex v2.1.0](https://reprex.tidyverse.org) - -## Example: layer geoprocessing - -``` r -## layer intersection example - -library(gdalraster) -#> GDAL 3.8.4, released 2024/02/08, GEOS 3.12.1, PROJ 9.3.1 - -# MTBS fires in Yellowstone National Park 1984-2022 -dsn <- system.file("extdata/ynp_fires_1984_2022.gpkg", package="gdalraster") -lyr <- new(GDALVector, dsn, "mtbs_perims") - -# largest 1988 fire (FID from the example above) -lyr$setAttributeFilter("FID = 7") -lyr$getFeatureCount() -#> [1] 1 -feat <- lyr$getNextFeature() -str(feat) -#> List of 11 -#> $ FID :integer64 7 -#> $ event_id : chr "WY4470811082119880722" -#> $ incid_name : chr "NORTH FORK" -#> $ incid_type : chr "Wildfire" -#> $ map_id :integer64 10014217 -#> $ burn_bnd_ac :integer64 563527 -#> $ burn_bnd_lat: chr "44.678" -#> $ burn_bnd_lon: chr "-110.716" -#> $ ig_date : chr "1988/07/22" -#> $ ig_year : int 1988 -#> $ geom : chr "MULTIPOLYGON (((469685.969312071 29526.2354109807,469918.933844832 29654.3220754602,470030.299119989 29518.7441"| __truncated__ - -# get a second layer for 2000-2022 fires -# the layer argument can be a sql statement -sql <- "SELECT FID, * FROM mtbs_perims WHERE ig_year >= 2000 ORDER BY mtbs_perims.ig_year" -lyr2 <- new(GDALVector, dsn, sql) -lyr2$getFeatureCount() -#> [1] 40 - -# create an output layer using a temporary in-memory dataset -dsn_out <- "/vsimem/lyr_proc_tmp.gpkg" -srs <- lyr$getSpatialRef() -# this uses existing internal OGR utilities -gdalraster:::.create_ogr("GPKG", dsn_out, 0, 0, 0, "Unknown", "result_layer", srs) -#> [1] TRUE -gdalraster:::.ogr_ds_exists(dsn_out, with_update = TRUE) -#> [1] TRUE -gdalraster:::.ogr_layer_exists(dsn_out, "result_layer") -#> [1] TRUE - -lyr_out <- new(GDALVector, dsn_out, "result_layer", read_only = FALSE) -lyr_out$getFeatureCount() -#> [1] 0 - -# intersection of lyr and lyr2, with result in lyr_out -lyr$layerIntersection(lyr2, lyr_out, quiet = FALSE, options = NULL) -#> 0...10...20...30...40...50...60...70...80...90...100 - done. - -lyr_out$getFeatureCount() -#> [1] 5 -defn <- lyr_out$getLayerDefn() -# combined attributes -names(defn) -#> [1] "input_event_id" "input_incid_name" "input_incid_type" -#> [4] "input_map_id" "input_burn_bnd_ac" "input_burn_bnd_lat" -#> [7] "input_burn_bnd_lon" "input_ig_date" "input_ig_year" -#> [10] "method_event_id" "method_incid_name" "method_incid_type" -#> [13] "method_map_id" "method_burn_bnd_ac" "method_burn_bnd_lat" -#> [16] "method_burn_bnd_lon" "method_ig_date" "method_ig_year" -#> [19] "geom" - -# we don't have vector plotting yet, so rasterize and use plot_raster() -out_file <- "/vsimem/result_layer.tif" -rasterize(src_dsn = dsn_out, - dstfile = out_file, - layer = "result_layer", - burn_attr = "method_ig_year", - tr = c(90,90), - tap = TRUE, - dtName = "Int16", - dstnodata = -9999, - init = -9999) -#> 0...10...20...30...40...50...60...70...80...90...100 - done. - -ds <- new(GDALRaster, out_file) -pal <- scales::viridis_pal(end = 0.8, direction = -1)(6) -ramp <- scales::colour_ramp(pal) -plot_raster(ds, legend = TRUE, col_map_fn = ramp, na_col = "#d9d9d9", - main="2000-2022 re-burn within the 1988 North Fork perimeter") -``` - -![](img/ynp_mtbs_lyr_int_ex.png) - -``` r - -ds$close() -lyr$close() -lyr2$close() -lyr_out$close() - -vsi_unlink(dsn_out) -vsi_unlink(out_file) -``` - -Created on 2024-04-06 with [reprex v2.1.0](https://reprex.tidyverse.org) - -## Example: WorldCover Sentinel-2 composites - -``` r -## WorldCover Sentinel-2 annual composites -## see: https://blog.vito.be/remotesensing/worldcover-annual-composites -## tile index lookup on remote filesystem and obtain raster data - -library(gdalraster) -#> GDAL 3.8.4, released 2024/02/08, GEOS 3.12.1, PROJ 9.3.1 - -fgb = "/vsicurl/https://esa-worldcover.s3.eu-central-1.amazonaws.com/esa_worldcover_grid_composites.fgb" -lyr_tiles <- new(GDALVector, fgb) - -lyr_tiles$getDriverShortName() -#> [1] "FlatGeobuf" -lyr_tiles$getName() -#> [1] "worldcover_composites_grid_aws" -lyr_tiles$getGeomType() -#> [1] "Polygon" -lyr_tiles$getSpatialRef() -#> [1] "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]" - -lyr_tiles$testCapability("RandomRead") -#> [1] TRUE -lyr_tiles$testCapability("FastSpatialFilter") -#> [1] TRUE -lyr_tiles$testCapability("FastFeatureCount") -#> [1] TRUE -lyr_tiles$testCapability("FastGetExtent") -#> [1] TRUE - -lyr_tiles$getFeatureCount() -#> [1] 19363 -lyr_tiles$bbox() -#> [1] -180 -60 180 83 - -defn <- lyr_tiles$getLayerDefn() -names(defn) -#> [1] "tile" "s1_vvvhratio_2020" "s1_vvvhratio_2021" -#> [4] "s2_rgbnir_2020" "s2_rgbnir_2021" "s2_ndvi_2020" -#> [7] "s2_ndvi_2021" "s2_swir_2020" "s2_swir_2021" -#> [10] "" - -# AOI for the Fishhawk fire -dsn <- system.file("extdata/ynp_fires_1984_2022.gpkg", package="gdalraster") -lyr_ynp <- new(GDALVector, dsn, "mtbs_perims") - -lyr_ynp$setAttributeFilter("incid_name = 'FISHHAWK'") -lyr_ynp$getFeatureCount() -#> [1] 1 -feat <- lyr_ynp$getNextFeature() -str(feat) -#> List of 11 -#> $ FID :integer64 60 -#> $ event_id : chr "WY4437710988020190902" -#> $ incid_name : chr "FISHHAWK" -#> $ incid_type : chr "Wildfire" -#> $ map_id :integer64 10016957 -#> $ burn_bnd_ac :integer64 10775 -#> $ burn_bnd_lat: chr "44.384" -#> $ burn_bnd_lon: chr "-109.85" -#> $ ig_date : chr "2019/09/02" -#> $ ig_year : int 2019 -#> $ geom : chr "MULTIPOLYGON (((573530.585472236 9160.22088906913,573266.183153384 9415.42216938034,573137.427110327 9285.44517"| __truncated__ - -# get the feature bbox in WGS84 -bb <- g_transform(feat$geom, lyr_ynp$getSpatialRef(), epsg_to_wkt(4326)) |> - bbox_from_wkt() -print(bb) -#> [1] -109.89753 44.32714 -109.83172 44.44030 - -# tile index lookup -lyr_tiles$setSpatialFilterRect(bb) -lyr_tiles$getFeatureCount() -#> [1] 1 -feat <- lyr_tiles$getNextFeature() -str(feat) -#> List of 11 -#> $ FID :integer64 16615 -#> $ tile : chr "N44W110" -#> $ s1_vvvhratio_2020: chr "s3://esa-worldcover-s1/vvvhratio/2020/N44/ESA_WorldCover_10m_2020_v100_N44W110_S1VVVHratio.tif" -#> $ s1_vvvhratio_2021: chr "s3://esa-worldcover-s1/vvvhratio/2021/N44/ESA_WorldCover_10m_2021_v200_N44W110_S1VVVHratio.tif" -#> $ s2_rgbnir_2020 : chr "s3://esa-worldcover-s2/rgbnir/2020/N44/ESA_WorldCover_10m_2020_v100_N44W110_S2RGBNIR.tif" -#> $ s2_rgbnir_2021 : chr "s3://esa-worldcover-s2/rgbnir/2021/N44/ESA_WorldCover_10m_2021_v200_N44W110_S2RGBNIR.tif" -#> $ s2_ndvi_2020 : chr "s3://esa-worldcover-s2/ndvi/2020/N44/ESA_WorldCover_10m_2020_v100_N44W110_NDVI.tif" -#> $ s2_ndvi_2021 : chr "s3://esa-worldcover-s2/ndvi/2021/N44/ESA_WorldCover_10m_2021_v200_N44W110_NDVI.tif" -#> $ s2_swir_2020 : chr "s3://esa-worldcover-s2/swir/2020/N44/ESA_WorldCover_10m_2020_v100_N44W110_SWIR.tif" -#> $ s2_swir_2021 : chr "s3://esa-worldcover-s2/swir/2021/N44/ESA_WorldCover_10m_2021_v200_N44W110_SWIR.tif" -#> $ : chr "POLYGON ((-110 44,-110 45,-109 45,-109 44,-110 44))" - -# get the Sentinel-2 RGBNIR composite for this AOI -tif_file <- sub("s3://", "/vsis3/", feat$s2_rgbnir_2021, fixed = TRUE) -print(tif_file) -#> [1] "/vsis3/esa-worldcover-s2/rgbnir/2021/N44/ESA_WorldCover_10m_2021_v200_N44W110_S2RGBNIR.tif" - -# public bucket so disable request signing -set_config_option("AWS_NO_SIGN_REQUEST", "YES") - -# check if file exists and get size in MB -vsi_stat(tif_file) -#> [1] TRUE -vsi_stat(tif_file, "size") / (1000 * 1000) -#> [1] 562.039 - -# copy to a local or remote filesystem, e.g., -# tif_copy <- file.path(tempdir(), basename(tif_file)) -# vsi_copy_file(tif_file, tif_copy, show_progress = TRUE) - -# or open the dataset for reading -ds <- new(GDALRaster, tif_file) -ds$dim() -#> [1] 12000 12000 4 -ds$getMetadata(band=0, domain="") -#> [1] "algorithm_version=V2.0.0" -#> [2] "AREA_OR_POINT=Area" -#> [3] "bands=Band 1: B04 (Red), Band 2: B03 (Green), Band 3: B02 (Blue), Band 4: B08 (Infrared)" -#> [4] "copyright=ESA WorldCover project 2021 / Contains modified Copernicus Sentinel data (2021) processed by ESA WorldCover consortium" -#> [5] "creation_time=2022-12-09 17:08:25.881589" -#> [6] "description=The ESA WorldCover Sentinel-2 median L2A (RGBNIR) composite is a color image made from images in Blue (B02), Green (B03), Red (B04) and Infrared (B08). " -#> [7] "license=CC-BY 4.0 - https://creativecommons.org/licenses/by/4.0/" -#> [8] "product_crs=EPSG:4326" -#> [9] "product_grid=1x1 degree tiling grid" -#> [10] "product_tile=N44W110" -#> [11] "product_type=Sentinel-2 median L2A (RGBNIR) composite" -#> [12] "product_version=V2.0.0" -#> [13] "reference=https://esa-worldcover.org" -#> [14] "time_end=2021-12-31T23:59:59Z" -#> [15] "time_start=2021-01-01T00:00:00Z" -#> [16] "title=ESA WorldCover Sentinel-2 median L2A (RGBNIR) composite at 10m resolution for year 2021" -ds$getMetadata(band=0, domain="IMAGE_STRUCTURE") -#> [1] "COMPRESSION=DEFLATE" "INTERLEAVE=PIXEL" "LAYOUT=COG" -#> [4] "PREDICTOR=2" - -r <- read_ds(ds, bands=c(4,1,2), out_xsize = 800, out_ysize = 800) -txt <- paste0(tools::file_path_sans_ext(basename(tif_file)), - "\n(color infrared B8-B4-B3)") -plot_raster(r, - minmax_pct_cut = c(2, 98), - xlab = "longitude", - ylab = "latitude", - main = txt) -``` - -ds$close() -lyr_ynp$close() -lyr_tiles$close() -``` - -Created on 2024-04-06 with [reprex v2.1.0](https://reprex.tidyverse.org) - -## Further consideration / TBD - -This is a working list of potential issues and design questions that need further consideration, to be determined: - -* handling of 64-bit integer: OGR FID currently is `GIntBig` and integer fields in vector data sources will commonly be `OFTInteger64`. These are now handled using **RcppInt64**, with **bit64** on the R side providing S3 class `integer64`. -* OGR's Arrow C interface: Implement `GDALVector::getArrowStream()` (GDAL >= 3.6) and `GDALVector::writeArrowBatch()` (GDAL >= 3.8), supported on the R side with package **nanoarrow**. -* potential output vectors of GEOS or OGR pointers, WKB with support by **wk** -* OGR layer geoprocessing might be stand-alone functions instead of class methods in `GDALVector`. We would have more flexibility in terms of optional/argument defaults. Either way, we should add an option to create the output layer. - -## Document changelog - -* add `GDALVector::setSpatialFilter()` to the draft class definition - set spatial filter from WKT geometry (2024-03-02) -* add comment in the layer intersection example re: feature retrieval by FID (2024-03-02) -* add arguments `fields` and `geom_column` in `GDALVector::getFeatureSet()` - optionally retrieve a subset of attribute fields, and specify whether to include geometry column(s) in the returned data frame (2024-03-02) -* the return value of feature writing methods should be the FID of the newly created/set/deleted feature upon successful write, or `OGRNullFID` on failure (2024-03-02) -* add a note above `class GDALVector` re: handling of 64-bit integer types to be determined (2024-03-02) -* add `GDALVector::setIgnoredFields()`: set which fields can be omitted when retrieving features from the layer. If the driver supports this functionality (testable using OLCIgnoreFields capability), it will not fetch the specified fields in subsequent calls to `getFeature()` / `getNextFeature()` thus save some processing time and/or bandwidth. (2024-03-02) -* add `ogr_execute_sql()` for dataset/schema management (2024-03-02) -* add `GDALVector::setNextByIndex()` for cursor positioning (2024-03-03) -* add `GDALVector::getSpatialFilter()`: get the WKT geometry currently in use as the spatial filter, or `""` (2024-03-03) -* add section "Further consideration / TBD" (2024-03-03) -* `ogr2ogr()` and `ogrinfo()` are available in 1.9.0.9080 dev (2024-03-04) -* add potential output vectors of geos or ogr pointers, or wkb/wkt with support by {wk} (@mdsumner, 2024-03-04) -* add section "Contributors" (2024-03-04) -* add link to issue 241 for discussion thread (2024-03-05) -* OGR layer geoprocessing may be stand-alone functions instead of class methods TBD (2024-03-10) -* add link to the header file to reference the class methods that have been implemented so far in the prototype (2024-03-10) -* `ogr2ogr()` and `ogrinfo()` are available in **gdalraster** 1.10.0 on CRAN (2024-03-26) -* initial int64 support; now linking to **RcppInt64**, and importing **bit64**; `FID` and `OFTInteger64` fields are now returned in R as `integer64`; updated the examples (2024-04-06) - -## Contributors - -* [@goergen95](https://github.com/goergen95) ([#205](https://github.com/USDAForestService/gdalraster/issues/205)) -* [@mdsumner](https://github.com/mdsumner) - From 04c32db0b137e6150c28f1c744f6fe8f1e089a2c Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sat, 6 Apr 2024 10:54:48 -0600 Subject: [PATCH 26/53] update gdalvector-draft --- vignettes/articles/gdalvector-draft.Rmd | 770 ++++++++++++++++++++++++ 1 file changed, 770 insertions(+) create mode 100644 vignettes/articles/gdalvector-draft.Rmd diff --git a/vignettes/articles/gdalvector-draft.Rmd b/vignettes/articles/gdalvector-draft.Rmd new file mode 100644 index 000000000..2fe134cd3 --- /dev/null +++ b/vignettes/articles/gdalvector-draft.Rmd @@ -0,0 +1,770 @@ +--- +title: "Draft Bindings to the GDAL/OGR Vector API" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +Chris Toney (chris.toney at usda.gov) + +Last modified: 2024-04-06 + +Comment/discussion: + +## Summary + +This document describes R bindings to the GDAL/OGR Vector API proposed for inclusion in package **gdalraster**, analogous to its existing raster support. A package providing low-level access to both the raster and vector APIs in GDAL should be of interest to developers creating higher level interfaces. For example, custom workflows that are I/O intensive may benefit from direct access to GDAL's I/O capabilities. R bindings to the vector API would support persistent connections to the data store, cursors with attribute and spatial filtering, transactions, feature-level insert/delete, update of attributes and geometries, and OGR facilities for geoprocessing. Calling signatures of a class-based interface will resemble the C++ and Python APIs provided by the GDAL project. It is intended that bindings in **gdalraster** should provide long-term API stability while tracking changes in GDAL. + +A proposed interface is described in terms of the [GDAL Vector Data Model](https://gdal.org/user/vector_data_model.html), along with a draft class definition for implementation via `RCPP_EXPOSED_CLASS`. A prototype of the bindings is in the `gdalvector` branch at . The prototype bindings are currently undocumented. Code examples are included here to demonstrate usage and proof-of-concept. Installation of the development version of **gdalraster** containing the prototype vector bindings could be done with: + +```{r, eval=FALSE} +remotes::install_github("USDAForestService/gdalraster", ref = "gdalvector") +``` + +## Description of the interface + +Bindings will be implemented with **Rcpp** modules including `RCPP_EXPOSED_CLASS`. Exposing C++ classes directly in R provides a natural interface to the underlying object model. + +A GDAL Dataset for vector is a file or database containing one or more OGR layers. A vector dataset is represented in R as a data source name (DSN), a character string that may be a filename, database connection string, URL, virtual file, etc. Management of datasets and their vector schemas will be done with: + +* existing management functions in **gdalraster** that operate on vector datasets: `copyDatasetFiles()`, `deleteDataset()`, `renameDataset()` and `addFilesInZip()` (supports SOZip) +* existing internal utility functions to be further developed (potentially renamed/refactored): `.ogr_ds_exists()`, `.create_ogr()`, `.ogr_ds_layer_count()`, `.ogr_layer_exists()`, `.ogr_layer_create()`, `.ogr_layer_delete()` `.ogr_field_index()`, `.ogr_field_create()` +* existing wrappers `ogr2ogr()` and `ogrinfo()` from the gdal_utils.h API (**gdalraster** 1.10.0 CRAN release) +* add `ogr_execute_sql()`: execute an SQL statement against the data store for `CREATE INDEX`, `DROP INDEX`, `ALTER TABLE`, `DROP TABLE` (a SQL `SELECT` statement can be used in the constructor for class `GDALVector` described below, to open a layer of features) +* other stand-alone functions TBD + +OGR Layer class represents a layer of features within a data source. It will be modeled in R as class `GDALVector`, an exposed C++ class encapsulating an OGR Layer and the GDAL Dataset that owns it. A `GDALVector` object will persist an open connection to the dataset and expose methods for retrieving layer information, setting attribute and spatial filters, reading/writing features, and layer geoprocessing. A draft definition for class `GDALVector` is given below. + +All features in an OGR Layer share a common schema (feature class) modeled in GDAL as OGR Feature Definition. A feature class definition includes the set of attribute fields and their data types, the geometry field(s), and a feature class name (normally used as a layer name). The feature class definition is represented as a list in R, having as names the attribute/geometry field names, with each list element holding a field definition. + +An attribute field definition is a list with named elements: + +``` +$type : OGR Field Type ("OFTString", "OFTInteger", ...) +$subtype : optional ("OFSTBoolean", ...) +$width : optional max number of characters +$precision : optional number of digits after the decimal point +$is_nullable: optional NOT NULL constraint (logical scalar) +$is_unique : optional UNIQUE constraint (logical scalar) +$default : optional field default value as character string +$is_ignored : whether ignored when retrieving features (logical scalar) +$domain : optional domain name +$is_geom : FALSE for attribute fields +``` + +A geometry field definition is a list with named elements: + +``` +$type : geom type ("Point", "Polygon", etc.) +$srs : optional spatial reference as WKT string +$is_nullable: optional NOT NULL constraint (logical scalar) +$is_ignored : whether ignored when retrieving features (logical scalar) +$is_geom : TRUE for geometry fields +``` + +An OGR Feature – as read by `GDALVector::getNextFeature()` or `GDALVector::getFeature()`, or as passed to write methods – is a list with the unique feature identifier (FID), attribute and geometry field names, and their values. `GDALVector::getFeatureSet()` will return a set of features as a data frame. + +An OGR Geometry is represented in R as a character string containing OGC Well Known Text (WKT). Likewise, an OGR Spatial Reference, which encapsulates the definition of a projection and datum, is represented in R as WKT. **gdalraster** has existing functions for working with spatial reference systems as WKT (`srs_to_wkt()`, `srs_is_projected()`, etc.), and a set of geometry convenience functions also operating on WKT (GEOS via GDAL headers). + +## class GDALVector + +The draft class definition below has been partially implemented in: + + + +The header file can be referenced for the public class methods that have been implemented so far in the prototype (a subset of the draft class definition below): + + + +This does not include definitions of several stand-alone functions that will provide schema management. The existing definitions in `src/ogr_util.h` are a starting point for those. An **Rcpp** `.finalizer` function will be implemented to properly release resources in case an object of class `GDALVector` is garbage collected without an explicit call to `close()`. Support for 64-bit integers requires **bit64** on the R side, with conversions in C++ using **RcppInt64**. + +```cpp +class GDALVector { + + private: + std::string dsn_in; + std::string layer_in; // layer name or sql statement + bool is_sql_in; + Rcpp::CharacterVector open_options_in; + GDALDatasetH hDataset; + GDALAccess eAccess; + OGRLayerH hLayer; + OGRFeatureDefnH hFDefn; + + public: + GDALVector(); + GDALVector(Rcpp::CharacterVector dsn); + GDALVector(Rcpp::CharacterVector dsn, std::string layer); + GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only); + GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, + Rcpp::CharacterVector open_options); + + void open(bool read_only); + bool isOpen() const; + std::string getDsn() const; + Rcpp::CharacterVector getFileList() const; + std::string getDriverShortName() const; + std::string getDriverLongName() const; + + std::string getName() const; + bool testCapability(std::string capability) const; + std::string getFIDColumn() const; + std::string getGeomType() const; + std::string getGeometryColumn() const; + std::string getSpatialRef() const; + Rcpp::NumericVector bbox(); + Rcpp::List getLayerDefn() const; + + void setAttributeFilter(std::string query); + void setSpatialFilterRect(Rcpp::NumericVector bbox); + void setSpatialFilter(std::string wkt); + std::string getSpatialFilter() const; + void clearSpatialFilter(); + + double getFeatureCount(); + SEXP getNextFeature(); + void resetReading(); + // int64 handling TBD + SEXP getFeature(int64_t fid); + void setNextByIndex(int64_t fid); + void setIgnoredFields(Rcpp::CharacterVector fields); + + Rcpp::DataFrame getFeatureSet(Rcpp::CharacterVector fields, + bool geom_column); + + // returning the FID of the affected feature if successful + int64_t createFeature(Rcpp::List feat); + int64_t setFeature(Rcpp::List feat); + int64_t upsertFeature(Rcpp::List feat); + int64_t deleteFeature(int64_t fid); + + bool startTransaction(bool force); + bool commitTransaction(); + bool rollbackTransaction(); + + void layerIntersection( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerUnion( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerSymDifference( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerIdentity( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerUpdate( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerClip( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerErase( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + + void close(); + + // methods for internal use not exported to R + void _checkAccess(GDALAccess access_needed) const; + OGRLayerH _getOGRLayerH(); +}; + +RCPP_EXPOSED_CLASS(GDALVector) +``` + +## Example: usage for class GDALVector + +``` r +## usage for GDALVector class + +library(gdalraster) +#> GDAL 3.8.4, released 2024/02/08, GEOS 3.12.1, PROJ 9.3.1 + +# MTBS fires in Yellowstone National Park 1984-2022 +f <- system.file("extdata/ynp_fires_1984_2022.gpkg", package="gdalraster") +# copy to a temporary in-memory file that is writeable +dsn <- paste0("/vsimem/", basename(f)) +vsi_copy_file(f, dsn) + +lyr <- new(GDALVector, dsn, "mtbs_perims") + +# object of class GDALVector +lyr +#> C++ object <0x58a6a88edc80> of class 'GDALVector' <0x58a6a2fce150> +str(lyr) +#> Reference class 'Rcpp_GDALVector' [package "gdalraster"] with 0 fields +#> list() +#> and 44 methods, of which 30 are possibly relevant: +#> bbox, clearSpatialFilter, close, finalize, getDriverLongName, +#> getDriverShortName, getDsn, getFeatureCount, getFIDColumn, getFileList, +#> getGeometryColumn, getGeomType, getLayerDefn, getName, getNextFeature, +#> getSpatialRef, initialize, isOpen, layerClip, layerErase, layerIdentity, +#> layerIntersection, layerSymDifference, layerUnion, layerUpdate, open, +#> resetReading, setAttributeFilter, setSpatialFilterRect, testCapability + +# dataset info +lyr$getDriverShortName() +#> [1] "GPKG" +lyr$getDriverLongName() +#> [1] "GeoPackage" +lyr$getFileList() +#> [1] "/vsimem/ynp_fires_1984_2022.gpkg" + +# layer info +lyr$getName() +#> [1] "mtbs_perims" +lyr$getGeomType() +#> [1] "Multi Polygon" +lyr$getGeometryColumn() +#> [1] "geom" +lyr$getFIDColumn() +#> [1] "fid" +lyr$getSpatialRef() +#> [1] "PROJCS[\"NAD83 / Montana\",GEOGCS[\"NAD83\",DATUM[\"North_American_Datum_1983\",SPHEROID[\"GRS 1980\",6378137,298.257222101,AUTHORITY[\"EPSG\",\"7019\"]],AUTHORITY[\"EPSG\",\"6269\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AUTHORITY[\"EPSG\",\"4269\"]],PROJECTION[\"Lambert_Conformal_Conic_2SP\"],PARAMETER[\"latitude_of_origin\",44.25],PARAMETER[\"central_meridian\",-109.5],PARAMETER[\"standard_parallel_1\",49],PARAMETER[\"standard_parallel_2\",45],PARAMETER[\"false_easting\",600000],PARAMETER[\"false_northing\",0],UNIT[\"metre\",1,AUTHORITY[\"EPSG\",\"9001\"]],AXIS[\"Easting\",EAST],AXIS[\"Northing\",NORTH],AUTHORITY[\"EPSG\",\"32100\"]]" +lyr$bbox() +#> [1] 469685.73 -12917.76 573531.72 96577.34 + +# layer capabilities +lyr$testCapability("RandomRead") +#> [1] TRUE +lyr$testCapability("FastSpatialFilter") +#> [1] TRUE +lyr$testCapability("FastFeatureCount") +#> [1] TRUE +lyr$testCapability("FastGetExtent") +#> [1] TRUE +lyr$testCapability("Transactions") +#> [1] TRUE + +# the dataset was opened read-only: +lyr$testCapability("SequentialWrite") +#> [1] FALSE +lyr$testCapability("RandomWrite") +#> [1] FALSE + +# re-open with write access +lyr$open(read_only = FALSE) +lyr$testCapability("SequentialWrite") +#> [1] TRUE +lyr$testCapability("RandomWrite") +#> [1] TRUE + +# feature class definition - a list of fields and their definitions +defn <- lyr$getLayerDefn() +names(defn) +#> [1] "event_id" "incid_name" "incid_type" "map_id" "burn_bnd_ac" +#> [6] "burn_bnd_lat" "burn_bnd_lon" "ig_date" "ig_year" "geom" + +# each list element holds a field definition list +str(defn) +#> List of 10 +#> $ event_id :List of 8 +#> ..$ type : chr "OFTString" +#> ..$ width : int 254 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ incid_name :List of 8 +#> ..$ type : chr "OFTString" +#> ..$ width : int 254 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ incid_type :List of 8 +#> ..$ type : chr "OFTString" +#> ..$ width : int 254 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ map_id :List of 8 +#> ..$ type : chr "OFTInteger64" +#> ..$ width : int 0 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ burn_bnd_ac :List of 8 +#> ..$ type : chr "OFTInteger64" +#> ..$ width : int 0 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ burn_bnd_lat:List of 8 +#> ..$ type : chr "OFTString" +#> ..$ width : int 10 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ burn_bnd_lon:List of 8 +#> ..$ type : chr "OFTString" +#> ..$ width : int 10 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ ig_date :List of 8 +#> ..$ type : chr "default (read as OFTString)" +#> ..$ width : int 0 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ ig_year :List of 8 +#> ..$ type : chr "OFTInteger" +#> ..$ width : int 0 +#> ..$ precision : int 0 +#> ..$ is_nullable: logi TRUE +#> ..$ is_unique : logi FALSE +#> ..$ default : chr "" +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi FALSE +#> $ geom :List of 5 +#> ..$ type : chr "Multi Polygon" +#> ..$ srs : chr "PROJCS[\"NAD83 / Montana\",GEOGCS[\"NAD83\",DATUM[\"North_American_Datum_1983\",SPHEROID[\"GRS 1980\",6378137,2"| __truncated__ +#> ..$ is_nullable: logi TRUE +#> ..$ is_ignored : logi FALSE +#> ..$ is_geom : logi TRUE + +lyr$getFeatureCount() +#> [1] 61 + +# cursor +feat <- lyr$getNextFeature() +# a list of field names and their values +str(feat) +#> List of 11 +#> $ FID :integer64 1 +#> $ event_id : chr "WY4413411069519870807" +#> $ incid_name : chr "POLECAT" +#> $ incid_type : chr "Wildfire" +#> $ map_id :integer64 10015934 +#> $ burn_bnd_ac :integer64 1093 +#> $ burn_bnd_lat: chr "44.132" +#> $ burn_bnd_lon: chr "-110.696" +#> $ ig_date : chr "1987/08/07" +#> $ ig_year : int 1987 +#> $ geom : chr "MULTIPOLYGON (((503099.439579653 -12893.9672899192,503169.756694236 -12756.3721247327,502689.845907435 -12131.5"| __truncated__ + +# attribute filter +lyr$setAttributeFilter("ig_year = 2020") +lyr$getFeatureCount() +#> [1] 1 + +feat <- lyr$getNextFeature() +str(feat) +#> List of 11 +#> $ FID :integer64 61 +#> $ event_id : chr "WY4438911082120200822" +#> $ incid_name : chr "LONE STAR" +#> $ incid_type : chr "Wildfire" +#> $ map_id :integer64 10020495 +#> $ burn_bnd_ac :integer64 3348 +#> $ burn_bnd_lat: chr "44.4" +#> $ burn_bnd_lon: chr "-110.782" +#> $ ig_date : chr "2020/08/22" +#> $ ig_year : int 2020 +#> $ geom : chr "MULTIPOLYGON (((496593.122306971 15506.8828590633,496491.761299067 15605.3612548792,496290.812130161 15388.0465"| __truncated__ + +# NULL if no more features are available +feat <- lyr$getNextFeature() +str(feat) +#> NULL + +# reset reading to the start +lyr$resetReading() +feat <- lyr$getNextFeature() +str(feat) +#> List of 11 +#> $ FID :integer64 61 +#> $ event_id : chr "WY4438911082120200822" +#> $ incid_name : chr "LONE STAR" +#> $ incid_type : chr "Wildfire" +#> $ map_id :integer64 10020495 +#> $ burn_bnd_ac :integer64 3348 +#> $ burn_bnd_lat: chr "44.4" +#> $ burn_bnd_lon: chr "-110.782" +#> $ ig_date : chr "2020/08/22" +#> $ ig_year : int 2020 +#> $ geom : chr "MULTIPOLYGON (((496593.122306971 15506.8828590633,496491.761299067 15605.3612548792,496290.812130161 15388.0465"| __truncated__ + +# clear attribute filter +lyr$setAttributeFilter("") +lyr$getFeatureCount() +#> [1] 61 + +# spatial filter +# get the bounding box of the largest 1988 fire +lyr$setAttributeFilter("ig_year = 1988 ORDER BY burn_bnd_ac DESC") +feat <- lyr$getNextFeature() +str(feat) +#> List of 11 +#> $ FID :integer64 7 +#> $ event_id : chr "WY4470811082119880722" +#> $ incid_name : chr "NORTH FORK" +#> $ incid_type : chr "Wildfire" +#> $ map_id :integer64 10014217 +#> $ burn_bnd_ac :integer64 563527 +#> $ burn_bnd_lat: chr "44.678" +#> $ burn_bnd_lon: chr "-110.716" +#> $ ig_date : chr "1988/07/22" +#> $ ig_year : int 1988 +#> $ geom : chr "MULTIPOLYGON (((469685.969312071 29526.2354109807,469918.933844832 29654.3220754602,470030.299119989 29518.7441"| __truncated__ + +bbox <- bbox_from_wkt(feat$geom) +print(bbox) +#> [1] 469685.97 11442.45 544069.63 85508.15 + +# clear the attribute filter +lyr$setAttributeFilter("") +lyr$getFeatureCount() +#> [1] 61 + +lyr$setSpatialFilterRect(bbox) +lyr$getFeatureCount() +#> [1] 40 + +lyr$clearSpatialFilter() +lyr$getFeatureCount() +#> [1] 61 + +lyr$close() +vsi_unlink(dsn) +``` + +Created on 2024-04-06 with [reprex v2.1.0](https://reprex.tidyverse.org) + +## Example: layer geoprocessing + +``` r +## layer intersection example + +library(gdalraster) +#> GDAL 3.8.4, released 2024/02/08, GEOS 3.12.1, PROJ 9.3.1 + +# MTBS fires in Yellowstone National Park 1984-2022 +dsn <- system.file("extdata/ynp_fires_1984_2022.gpkg", package="gdalraster") +lyr <- new(GDALVector, dsn, "mtbs_perims") + +# largest 1988 fire (FID from the example above) +lyr$setAttributeFilter("FID = 7") +lyr$getFeatureCount() +#> [1] 1 +feat <- lyr$getNextFeature() +str(feat) +#> List of 11 +#> $ FID :integer64 7 +#> $ event_id : chr "WY4470811082119880722" +#> $ incid_name : chr "NORTH FORK" +#> $ incid_type : chr "Wildfire" +#> $ map_id :integer64 10014217 +#> $ burn_bnd_ac :integer64 563527 +#> $ burn_bnd_lat: chr "44.678" +#> $ burn_bnd_lon: chr "-110.716" +#> $ ig_date : chr "1988/07/22" +#> $ ig_year : int 1988 +#> $ geom : chr "MULTIPOLYGON (((469685.969312071 29526.2354109807,469918.933844832 29654.3220754602,470030.299119989 29518.7441"| __truncated__ + +# get a second layer for 2000-2022 fires +# the layer argument can be a sql statement +sql <- "SELECT FID, * FROM mtbs_perims WHERE ig_year >= 2000 ORDER BY mtbs_perims.ig_year" +lyr2 <- new(GDALVector, dsn, sql) +lyr2$getFeatureCount() +#> [1] 40 + +# create an output layer using a temporary in-memory dataset +dsn_out <- "/vsimem/lyr_proc_tmp.gpkg" +srs <- lyr$getSpatialRef() +# this uses existing internal OGR utilities +gdalraster:::.create_ogr("GPKG", dsn_out, 0, 0, 0, "Unknown", "result_layer", srs) +#> [1] TRUE +gdalraster:::.ogr_ds_exists(dsn_out, with_update = TRUE) +#> [1] TRUE +gdalraster:::.ogr_layer_exists(dsn_out, "result_layer") +#> [1] TRUE + +lyr_out <- new(GDALVector, dsn_out, "result_layer", read_only = FALSE) +lyr_out$getFeatureCount() +#> [1] 0 + +# intersection of lyr and lyr2, with result in lyr_out +lyr$layerIntersection(lyr2, lyr_out, quiet = FALSE, options = NULL) +#> 0...10...20...30...40...50...60...70...80...90...100 - done. + +lyr_out$getFeatureCount() +#> [1] 5 +defn <- lyr_out$getLayerDefn() +# combined attributes +names(defn) +#> [1] "input_event_id" "input_incid_name" "input_incid_type" +#> [4] "input_map_id" "input_burn_bnd_ac" "input_burn_bnd_lat" +#> [7] "input_burn_bnd_lon" "input_ig_date" "input_ig_year" +#> [10] "method_event_id" "method_incid_name" "method_incid_type" +#> [13] "method_map_id" "method_burn_bnd_ac" "method_burn_bnd_lat" +#> [16] "method_burn_bnd_lon" "method_ig_date" "method_ig_year" +#> [19] "geom" + +# we don't have vector plotting yet, so rasterize and use plot_raster() +out_file <- "/vsimem/result_layer.tif" +rasterize(src_dsn = dsn_out, + dstfile = out_file, + layer = "result_layer", + burn_attr = "method_ig_year", + tr = c(90,90), + tap = TRUE, + dtName = "Int16", + dstnodata = -9999, + init = -9999) +#> 0...10...20...30...40...50...60...70...80...90...100 - done. + +ds <- new(GDALRaster, out_file) +pal <- scales::viridis_pal(end = 0.8, direction = -1)(6) +ramp <- scales::colour_ramp(pal) +plot_raster(ds, legend = TRUE, col_map_fn = ramp, na_col = "#d9d9d9", + main="2000-2022 re-burn within the 1988 North Fork perimeter") +``` + +![](img/ynp_mtbs_lyr_int_ex.png) + +``` r + +ds$close() +lyr$close() +lyr2$close() +lyr_out$close() + +vsi_unlink(dsn_out) +vsi_unlink(out_file) +``` + +Created on 2024-04-06 with [reprex v2.1.0](https://reprex.tidyverse.org) + +## Example: WorldCover Sentinel-2 composites + +``` r +## WorldCover Sentinel-2 annual composites +## https://blog.vito.be/remotesensing/worldcover-annual-composites +## tile index lookup on remote filesystem and obtain raster data + +library(gdalraster) +#> GDAL 3.8.4, released 2024/02/08, GEOS 3.12.1, PROJ 9.3.1 + +fgb = "/vsicurl/https://esa-worldcover.s3.eu-central-1.amazonaws.com/esa_worldcover_grid_composites.fgb" +lyr_tiles <- new(GDALVector, fgb) + +lyr_tiles$getDriverShortName() +#> [1] "FlatGeobuf" +lyr_tiles$getName() +#> [1] "worldcover_composites_grid_aws" +lyr_tiles$getGeomType() +#> [1] "Polygon" +lyr_tiles$getSpatialRef() +#> [1] "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]" + +lyr_tiles$testCapability("RandomRead") +#> [1] TRUE +lyr_tiles$testCapability("FastSpatialFilter") +#> [1] TRUE +lyr_tiles$testCapability("FastFeatureCount") +#> [1] TRUE +lyr_tiles$testCapability("FastGetExtent") +#> [1] TRUE + +lyr_tiles$getFeatureCount() +#> [1] 19363 +lyr_tiles$bbox() +#> [1] -180 -60 180 83 + +defn <- lyr_tiles$getLayerDefn() +names(defn) +#> [1] "tile" "s1_vvvhratio_2020" "s1_vvvhratio_2021" +#> [4] "s2_rgbnir_2020" "s2_rgbnir_2021" "s2_ndvi_2020" +#> [7] "s2_ndvi_2021" "s2_swir_2020" "s2_swir_2021" +#> [10] "" + +# AOI for the Fishhawk fire +dsn <- system.file("extdata/ynp_fires_1984_2022.gpkg", package="gdalraster") +lyr_ynp <- new(GDALVector, dsn, "mtbs_perims") + +lyr_ynp$setAttributeFilter("incid_name = 'FISHHAWK'") +lyr_ynp$getFeatureCount() +#> [1] 1 +feat <- lyr_ynp$getNextFeature() +str(feat) +#> List of 11 +#> $ FID :integer64 60 +#> $ event_id : chr "WY4437710988020190902" +#> $ incid_name : chr "FISHHAWK" +#> $ incid_type : chr "Wildfire" +#> $ map_id :integer64 10016957 +#> $ burn_bnd_ac :integer64 10775 +#> $ burn_bnd_lat: chr "44.384" +#> $ burn_bnd_lon: chr "-109.85" +#> $ ig_date : chr "2019/09/02" +#> $ ig_year : int 2019 +#> $ geom : chr "MULTIPOLYGON (((573530.585472236 9160.22088906913,573266.183153384 9415.42216938034,573137.427110327 9285.44517"| __truncated__ + +# get the feature bbox in WGS84 +bb <- g_transform(feat$geom, lyr_ynp$getSpatialRef(), epsg_to_wkt(4326)) |> + bbox_from_wkt() +print(bb) +#> [1] -109.89753 44.32714 -109.83172 44.44030 + +# tile index lookup +lyr_tiles$setSpatialFilterRect(bb) +lyr_tiles$getFeatureCount() +#> [1] 1 +feat <- lyr_tiles$getNextFeature() +str(feat) +#> List of 11 +#> $ FID :integer64 16615 +#> $ tile : chr "N44W110" +#> $ s1_vvvhratio_2020: chr "s3://esa-worldcover-s1/vvvhratio/2020/N44/ESA_WorldCover_10m_2020_v100_N44W110_S1VVVHratio.tif" +#> $ s1_vvvhratio_2021: chr "s3://esa-worldcover-s1/vvvhratio/2021/N44/ESA_WorldCover_10m_2021_v200_N44W110_S1VVVHratio.tif" +#> $ s2_rgbnir_2020 : chr "s3://esa-worldcover-s2/rgbnir/2020/N44/ESA_WorldCover_10m_2020_v100_N44W110_S2RGBNIR.tif" +#> $ s2_rgbnir_2021 : chr "s3://esa-worldcover-s2/rgbnir/2021/N44/ESA_WorldCover_10m_2021_v200_N44W110_S2RGBNIR.tif" +#> $ s2_ndvi_2020 : chr "s3://esa-worldcover-s2/ndvi/2020/N44/ESA_WorldCover_10m_2020_v100_N44W110_NDVI.tif" +#> $ s2_ndvi_2021 : chr "s3://esa-worldcover-s2/ndvi/2021/N44/ESA_WorldCover_10m_2021_v200_N44W110_NDVI.tif" +#> $ s2_swir_2020 : chr "s3://esa-worldcover-s2/swir/2020/N44/ESA_WorldCover_10m_2020_v100_N44W110_SWIR.tif" +#> $ s2_swir_2021 : chr "s3://esa-worldcover-s2/swir/2021/N44/ESA_WorldCover_10m_2021_v200_N44W110_SWIR.tif" +#> $ : chr "POLYGON ((-110 44,-110 45,-109 45,-109 44,-110 44))" + +# get the Sentinel-2 RGBNIR composite for this AOI +tif_file <- sub("s3://", "/vsis3/", feat$s2_rgbnir_2021, fixed = TRUE) +print(tif_file) +#> [1] "/vsis3/esa-worldcover-s2/rgbnir/2021/N44/ESA_WorldCover_10m_2021_v200_N44W110_S2RGBNIR.tif" + +# public bucket so disable request signing +set_config_option("AWS_NO_SIGN_REQUEST", "YES") + +# check if file exists and get size in MB +vsi_stat(tif_file) +#> [1] TRUE +vsi_stat(tif_file, "size") / (1000 * 1000) +#> [1] 562.039 + +# copy to a local or remote filesystem, e.g., +# tif_copy <- file.path(tempdir(), basename(tif_file)) +# vsi_copy_file(tif_file, tif_copy, show_progress = TRUE) + +# or open the dataset for reading +ds <- new(GDALRaster, tif_file) +ds$dim() +#> [1] 12000 12000 4 +ds$getMetadata(band=0, domain="") +#> [1] "algorithm_version=V2.0.0" +#> [2] "AREA_OR_POINT=Area" +#> [3] "bands=Band 1: B04 (Red), Band 2: B03 (Green), Band 3: B02 (Blue), Band 4: B08 (Infrared)" +#> [4] "copyright=ESA WorldCover project 2021 / Contains modified Copernicus Sentinel data (2021) processed by ESA WorldCover consortium" +#> [5] "creation_time=2022-12-09 17:08:25.881589" +#> [6] "description=The ESA WorldCover Sentinel-2 median L2A (RGBNIR) composite is a color image made from images in Blue (B02), Green (B03), Red (B04) and Infrared (B08). " +#> [7] "license=CC-BY 4.0 - https://creativecommons.org/licenses/by/4.0/" +#> [8] "product_crs=EPSG:4326" +#> [9] "product_grid=1x1 degree tiling grid" +#> [10] "product_tile=N44W110" +#> [11] "product_type=Sentinel-2 median L2A (RGBNIR) composite" +#> [12] "product_version=V2.0.0" +#> [13] "reference=https://esa-worldcover.org" +#> [14] "time_end=2021-12-31T23:59:59Z" +#> [15] "time_start=2021-01-01T00:00:00Z" +#> [16] "title=ESA WorldCover Sentinel-2 median L2A (RGBNIR) composite at 10m resolution for year 2021" +ds$getMetadata(band=0, domain="IMAGE_STRUCTURE") +#> [1] "COMPRESSION=DEFLATE" "INTERLEAVE=PIXEL" "LAYOUT=COG" +#> [4] "PREDICTOR=2" + +r <- read_ds(ds, bands=c(4,1,2), out_xsize = 800, out_ysize = 800) +txt <- paste0(tools::file_path_sans_ext(basename(tif_file)), + "\n(color infrared B8-B4-B3)") +plot_raster(r, + minmax_pct_cut = c(2, 98), + xlab = "longitude", + ylab = "latitude", + main = txt) +``` + +![](img/worldcov2021_N44W110_S2RGBNIR_ex.png) + +``` r + +ds$close() +lyr_ynp$close() +lyr_tiles$close() +``` + +Created on 2024-04-06 with [reprex v2.1.0](https://reprex.tidyverse.org) + +## Further consideration / TBD + +This is a working list of potential issues and design questions that need further consideration, to be determined: + +* handling of 64-bit integer: OGR FID currently is `GIntBig` and integer fields in vector data sources will commonly be `OFTInteger64`. These are now handled using **RcppInt64**, with **bit64** on the R side providing S3 class `integer64`. +* OGR's Arrow C interface: Implement `GDALVector::getArrowStream()` (GDAL >= 3.6) and `GDALVector::writeArrowBatch()` (GDAL >= 3.8), supported on the R side with package **nanoarrow**. +* potential output vectors of GEOS or OGR pointers, WKB with support by **wk** +* OGR layer geoprocessing might be stand-alone functions instead of class methods in `GDALVector`. We would have more flexibility in terms of optional/argument defaults. Either way, we should add an option to create the output layer. + +## Document changelog + +* add `GDALVector::setSpatialFilter()` to the draft class definition - set spatial filter from WKT geometry (2024-03-02) +* add comment in the layer intersection example re: feature retrieval by FID (2024-03-02) +* add arguments `fields` and `geom_column` in `GDALVector::getFeatureSet()` - optionally retrieve a subset of attribute fields, and specify whether to include geometry column(s) in the returned data frame (2024-03-02) +* the return value of feature writing methods should be the FID of the newly created/set/deleted feature upon successful write, or `OGRNullFID` on failure (2024-03-02) +* add a note above `class GDALVector` re: handling of 64-bit integer types to be determined (2024-03-02) +* add `GDALVector::setIgnoredFields()`: set which fields can be omitted when retrieving features from the layer. If the driver supports this functionality (testable using OLCIgnoreFields capability), it will not fetch the specified fields in subsequent calls to `getFeature()` / `getNextFeature()` thus save some processing time and/or bandwidth. (2024-03-02) +* add `ogr_execute_sql()` for dataset/schema management (2024-03-02) +* add `GDALVector::setNextByIndex()` for cursor positioning (2024-03-03) +* add `GDALVector::getSpatialFilter()`: get the WKT geometry currently in use as the spatial filter, or `""` (2024-03-03) +* add section "Further consideration / TBD" (2024-03-03) +* `ogr2ogr()` and `ogrinfo()` are available in 1.9.0.9080 dev (2024-03-04) +* add potential output vectors of geos or ogr pointers, or wkb/wkt with support by {wk} (@mdsumner, 2024-03-04) +* add section "Contributors" (2024-03-04) +* add link to issue 241 for discussion thread (2024-03-05) +* OGR layer geoprocessing may be stand-alone functions instead of class methods TBD (2024-03-10) +* add link to the header file to reference the class methods that have been implemented so far in the prototype (2024-03-10) +* `ogr2ogr()` and `ogrinfo()` are available in **gdalraster** 1.10.0 on CRAN (2024-03-26) +* initial int64 support; now linking to **RcppInt64**, and importing **bit64**; `FID` and `OFTInteger64` fields are now returned in R as `integer64`; updated the examples (2024-04-06) + +## Contributors + +* [@goergen95](https://github.com/goergen95) ([#205](https://github.com/USDAForestService/gdalraster/issues/205)) +* [@mdsumner](https://github.com/mdsumner) + From 01f23136b4ca9c62007e6f68f56ea1ecbec20f20 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sun, 28 Apr 2024 00:47:13 -0600 Subject: [PATCH 27/53] GDALVector: add constructor with 'spatial_filter' and 'dialect' --- src/gdalvector.cpp | 69 +++++++++++++++++--------- src/gdalvector.h | 23 +++++---- tests/testthat/test-GDALVector-class.R | 40 +++++++++++++++ 3 files changed, 99 insertions(+), 33 deletions(-) create mode 100644 tests/testthat/test-GDALVector-class.R diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 2dedbaab2..865e7cb7c 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -1,6 +1,5 @@ -/* Implementation of class GDALVector - Encapsulates one OGRLayer and its GDALDataset - Requires bit64 on the R side for its integer64 S3 type +/* Implementation of class GDALVector. Encapsulates one OGRLayer and its + GDALDataset. Requires bit64 on the R side for its integer64 S3 type. Chris Toney */ #include @@ -14,44 +13,43 @@ #include "gdalraster.h" #include "gdalvector.h" - GDALVector::GDALVector() : dsn_in(""), layer_in(""), is_sql_in(false), open_options_in(Rcpp::CharacterVector::create()), + spatial_filter_in(""), + dialect_in(""), hDataset(nullptr), eAccess(GA_ReadOnly), hLayer(nullptr) {} GDALVector::GDALVector(Rcpp::CharacterVector dsn) : - GDALVector( - dsn, - "", - true, - Rcpp::CharacterVector::create()) {} + GDALVector(dsn, "", true, Rcpp::CharacterVector::create(), + "", "") {} GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer) : - GDALVector( - dsn, - layer, - true, - Rcpp::CharacterVector::create()) {} + GDALVector(dsn, layer, true, Rcpp::CharacterVector::create(), + "", "") {} GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only) : - - GDALVector( - dsn, - layer, - read_only, - Rcpp::CharacterVector::create()) {} + GDALVector(dsn, layer, read_only, Rcpp::CharacterVector::create(), + "", "") {} GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, Rcpp::CharacterVector open_options) : + GDALVector(dsn, layer, read_only, open_options, "", "") {} +GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, + bool read_only, + Rcpp::Nullable open_options, + std::string spatial_filter, std::string dialect) : layer_in(layer), - open_options_in(open_options), + open_options_in(open_options.isNotNull() ? open_options : + Rcpp::CharacterVector::create()), + spatial_filter_in(spatial_filter), + dialect_in(dialect), hDataset(nullptr), eAccess(GA_ReadOnly), hLayer(nullptr) { @@ -85,6 +83,23 @@ void GDALVector::open(bool read_only) { } dsoo.push_back(nullptr); + OGRGeometryH hGeom_filter = nullptr; + if (spatial_filter_in != "") { + char* pszWKT = (char*) spatial_filter_in.c_str(); + if (OGR_G_CreateFromWkt(&pszWKT, nullptr, &hGeom_filter) != + OGRERR_NONE) { + if (hGeom_filter != nullptr) + OGR_G_DestroyGeometry(hGeom_filter); + Rcpp::stop("failed to create geometry from 'spatial_filter'"); + } + } + + const char* pszDialect; + if (dialect_in != "") + pszDialect = dialect_in.c_str(); + else + pszDialect = nullptr; + unsigned int nOpenFlags = GDAL_OF_VECTOR; if (read_only) nOpenFlags |= GDAL_OF_READONLY; @@ -103,7 +118,7 @@ void GDALVector::open(bool read_only) { else if (STARTS_WITH_CI(layer_in.c_str(), "SELECT ")) { is_sql_in = true; hLayer = GDALDatasetExecuteSQL(hDataset, layer_in.c_str(), - nullptr, nullptr); + hGeom_filter, pszDialect); } else { is_sql_in = false; @@ -125,6 +140,9 @@ void GDALVector::open(bool read_only) { GDALReleaseDataset(hDataset); Rcpp::stop("failed to get layer definition"); } + + if (hGeom_filter != nullptr) + OGR_G_DestroyGeometry(hGeom_filter); } bool GDALVector::isOpen() const { @@ -722,8 +740,13 @@ RCPP_MODULE(mod_GDALVector) { ("Usage: new(GDALVector, dsn, layer)") .constructor ("Usage: new(GDALVector, dsn, layer, read_only=[TRUE|FALSE])") - .constructor + .constructor ("Usage: new(GDALVector, dsn, layer, read_only, open_options)") + .constructor, std::string, + std::string> + ("Usage: new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect)") // exposed member functions .const_method("getDsn", &GDALVector::getDsn, diff --git a/src/gdalvector.h b/src/gdalvector.h index bffaafccd..a1ef94032 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -1,16 +1,15 @@ -/* R interface to a subset of the GDAL C API for vector. - A class for OGRLayer, a layer of features in a GDALDataset. - https://gdal.org/api/vector_c_api.html +/* R interface to a subset of the GDAL C API for vector. A class for OGRLayer, + a layer of features in a GDALDataset. https://gdal.org/api/vector_c_api.html Chris Toney */ #ifndef gdalvector_H #define gdalvector_H -#include "rcpp_util.h" - #include #include +#include "rcpp_util.h" + // Predeclare some GDAL types until the public header is included #ifndef GDAL_H_INCLUDED typedef void *GDALDatasetH; @@ -19,24 +18,28 @@ typedef enum {GA_ReadOnly = 0, GA_Update = 1} GDALAccess; #endif class GDALVector { - - private: + private: std::string dsn_in; std::string layer_in; // layer name or sql statement bool is_sql_in; Rcpp::CharacterVector open_options_in; - GDALDatasetH hDataset; + std::string spatial_filter_in; + std::string dialect_in; + GDALDatasetH hDataset; GDALAccess eAccess; OGRLayerH hLayer; OGRFeatureDefnH hFDefn; - public: + public: GDALVector(); - GDALVector(Rcpp::CharacterVector dsn); + explicit GDALVector(Rcpp::CharacterVector dsn); GDALVector(Rcpp::CharacterVector dsn, std::string layer); GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only); GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, Rcpp::CharacterVector open_options); + GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, + Rcpp::Nullable open_options, + std::string spatial_filter, std::string dialect); void open(bool read_only); bool isOpen() const; diff --git a/tests/testthat/test-GDALVector-class.R b/tests/testthat/test-GDALVector-class.R new file mode 100644 index 000000000..2e1d33692 --- /dev/null +++ b/tests/testthat/test-GDALVector-class.R @@ -0,0 +1,40 @@ +# Tests for src/gdalvector.cpp +test_that("class constructors work", { + f <- system.file("extdata/ynp_fires_1984_2022.gpkg", package="gdalraster") + dsn <- paste0("/vsimem/", basename(f)) + vsi_copy_file(f, dsn) + + lyr <- new(GDALVector, dsn) + expect_equal(lyr$getName(), "mtbs_perims") + lyr$close() + + lyr <- new(GDALVector, dsn, "mtbs_perims") + expect_equal(lyr$bbox(), c(469685.73, -12917.76, 573531.72, 96577.34)) + lyr$close() + + lyr <- new(GDALVector, dsn, "mtbs_perims", read_only = FALSE) + expect_true(lyr$testCapability("RandomWrite")) + lyr$close() + + lyr <- new(GDALVector, dsn, "mtbs_perims", read_only = TRUE, + "LIST_ALL_TABLES=NO") + expect_false(lyr$testCapability("RandomWrite")) + lyr$close() + + bb <- c(469685.97, 11442.45, 544069.63, 85508.15) + + # spatial filter with SQL layer + sql <- "SELECT FID, * FROM mtbs_perims" + lyr <- new(GDALVector, dsn, sql, read_only = TRUE, open_options = NULL, + spatial_filter = bbox_to_wkt(bb), dialect = "") + expect_equal(lyr$getFeatureCount(), 40) + lyr$close() + + # add dialect + lyr <- new(GDALVector, dsn, sql, read_only = TRUE, open_options = NULL, + spatial_filter = bbox_to_wkt(bb), dialect = "OGRSQL") + expect_equal(lyr$getFeatureCount(), 40) + lyr$close() + + vsi_unlink(dsn) +}) From 33075b717a954deaebd2f52e5ff891644c63638a Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Wed, 8 May 2024 15:11:21 -0600 Subject: [PATCH 28/53] has_spatialite() now public (and code linting) --- src/gdalvector.cpp | 22 +++++++--------------- src/gdalvector.h | 6 +++--- src/geos_wkt.cpp | 2 +- 3 files changed, 11 insertions(+), 19 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 865e7cb7c..7de1847b8 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -1,5 +1,5 @@ -/* Implementation of class GDALVector. Encapsulates one OGRLayer and its - GDALDataset. Requires bit64 on the R side for its integer64 S3 type. +/* Implementation of class GDALVector. Encapsulates an OGRLayer and its + GDALDataset. Requires {bit64} on the R side for its integer64 S3 type. Chris Toney */ #include @@ -117,6 +117,8 @@ void GDALVector::open(bool read_only) { } else if (STARTS_WITH_CI(layer_in.c_str(), "SELECT ")) { is_sql_in = true; + if (EQUALN(pszDialect, "SQLite", 6) && !has_spatialite()) + Rcpp::warning("spatialite not available"); hLayer = GDALDatasetExecuteSQL(hDataset, layer_in.c_str(), hGeom_filter, pszDialect); } @@ -266,16 +268,15 @@ Rcpp::List GDALVector::getLayerDefn() const { int iField; // attribute fields - // TODO: add subtype and field domain name + // TODO(ctoney): add subtype and field domain name for (iField=0; iField < OGR_FD_GetFieldCount(hFDefn); ++iField) { - Rcpp::List list_fld_defn = Rcpp::List::create(); OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, iField); if (hFieldDefn == nullptr) Rcpp::stop("could not obtain field definition"); OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); - // TODO: add list types, date, time, binary, etc. + // TODO(ctoney): add list types, date, time, binary, etc. if (fld_type == OFTInteger) { sValue = "OFTInteger"; } @@ -322,7 +323,6 @@ Rcpp::List GDALVector::getLayerDefn() const { // geometry fields for (int i = 0; i < OGR_FD_GetGeomFieldCount(hFDefn); ++i) { - Rcpp::List list_geom_fld_defn = Rcpp::List::create(); OGRGeomFieldDefnH hGeomFldDefn = OGR_FD_GetGeomFieldDefn(hFDefn, i); @@ -463,7 +463,6 @@ void GDALVector::layerIntersection( if (err != OGRERR_NONE) Rcpp::stop("error during Intersection, or execution was interrupted"); - } void GDALVector::layerUnion( @@ -492,7 +491,6 @@ void GDALVector::layerUnion( if (err != OGRERR_NONE) Rcpp::stop("error during Union, or execution was interrupted"); - } void GDALVector::layerSymDifference( @@ -521,7 +519,6 @@ void GDALVector::layerSymDifference( if (err != OGRERR_NONE) Rcpp::stop("error during SymDifference, or execution was interrupted"); - } void GDALVector::layerIdentity( @@ -550,7 +547,6 @@ void GDALVector::layerIdentity( if (err != OGRERR_NONE) Rcpp::stop("error during Identity, or execution was interrupted"); - } void GDALVector::layerUpdate( @@ -579,7 +575,6 @@ void GDALVector::layerUpdate( if (err != OGRERR_NONE) Rcpp::stop("error during Update, or execution was interrupted"); - } void GDALVector::layerClip( @@ -608,7 +603,6 @@ void GDALVector::layerClip( if (err != OGRERR_NONE) Rcpp::stop("error during Clip, or execution was interrupted"); - } void GDALVector::layerErase( @@ -637,7 +631,6 @@ void GDALVector::layerErase( if (err != OGRERR_NONE) Rcpp::stop("error during Erase, or execution was interrupted"); - } void GDALVector::close() { @@ -701,7 +694,7 @@ Rcpp::List GDALVector::_featureToList(OGRFeatureH hFeature) const { list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } else { - // TODO: support date, time, binary, etc. + // TODO(ctoney): support date, time, binary, etc. // read as string for now std::string value = OGR_F_GetFieldAsString(hFeature, i); list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); @@ -729,7 +722,6 @@ Rcpp::List GDALVector::_featureToList(OGRFeatureH hFeature) const { // **************************************************************************** RCPP_MODULE(mod_GDALVector) { - Rcpp::class_("GDALVector") .constructor diff --git a/src/gdalvector.h b/src/gdalvector.h index a1ef94032..9c35ff16b 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -2,8 +2,8 @@ a layer of features in a GDALDataset. https://gdal.org/api/vector_c_api.html Chris Toney */ -#ifndef gdalvector_H -#define gdalvector_H +#ifndef SRC_GDALVECTOR_H_ +#define SRC_GDALVECTOR_H_ #include #include @@ -114,4 +114,4 @@ class GDALVector { RCPP_EXPOSED_CLASS(GDALVector) -#endif +#endif // SRC_GDALVECTOR_H_ diff --git a/src/geos_wkt.cpp b/src/geos_wkt.cpp index 016018aa7..568f46382 100644 --- a/src/geos_wkt.cpp +++ b/src/geos_wkt.cpp @@ -529,7 +529,7 @@ bool _g_overlaps(std::string this_geom, std::string other_geom) { //' @noRd // [[Rcpp::export(name = ".g_buffer")]] std::string _g_buffer(std::string geom, double dist, int quad_segs = 30) { -//Compute buffer of geometry. +// Compute buffer of geometry. // Builds a new geometry containing the buffer region around the geometry on // which it is invoked. The buffer is a polygon containing the region within From 64fd91bc11d69297f72d1da0696076d83059d270 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Wed, 8 May 2024 15:22:37 -0600 Subject: [PATCH 29/53] fix the check on dialect_in during open() --- src/gdalvector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 7de1847b8..982293745 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -117,7 +117,7 @@ void GDALVector::open(bool read_only) { } else if (STARTS_WITH_CI(layer_in.c_str(), "SELECT ")) { is_sql_in = true; - if (EQUALN(pszDialect, "SQLite", 6) && !has_spatialite()) + if (EQUALN(dialect_in.c_str(), "SQLite", 6) && !has_spatialite()) Rcpp::warning("spatialite not available"); hLayer = GDALDatasetExecuteSQL(hDataset, layer_in.c_str(), hGeom_filter, pszDialect); From 3404fc3840b1f7b5a2da5881ea886f655320fd20 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Wed, 8 May 2024 22:09:11 -0600 Subject: [PATCH 30/53] make dialect param optional in GDALVector constructor --- src/gdalvector.cpp | 18 ++++++++++-------- tests/testthat/test-GDALVector-class.R | 4 ++-- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 982293745..1ea137dde 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -34,17 +34,20 @@ GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer) : GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only) : + GDALVector(dsn, layer, read_only, Rcpp::CharacterVector::create(), "", "") {} GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, Rcpp::CharacterVector open_options) : + GDALVector(dsn, layer, read_only, open_options, "", "") {} GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, Rcpp::Nullable open_options, - std::string spatial_filter, std::string dialect) : + std::string spatial_filter, std::string dialect = "") : + layer_in(layer), open_options_in(open_options.isNotNull() ? open_options : Rcpp::CharacterVector::create()), @@ -94,12 +97,6 @@ void GDALVector::open(bool read_only) { } } - const char* pszDialect; - if (dialect_in != "") - pszDialect = dialect_in.c_str(); - else - pszDialect = nullptr; - unsigned int nOpenFlags = GDAL_OF_VECTOR; if (read_only) nOpenFlags |= GDAL_OF_READONLY; @@ -111,13 +108,15 @@ void GDALVector::open(bool read_only) { if (hDataset == nullptr) Rcpp::stop("open dataset failed"); + const char* pszDialect = dialect_in.c_str(); + if (layer_in == "") { is_sql_in = false; hLayer = GDALDatasetGetLayer(hDataset, 0); } else if (STARTS_WITH_CI(layer_in.c_str(), "SELECT ")) { is_sql_in = true; - if (EQUALN(dialect_in.c_str(), "SQLite", 6) && !has_spatialite()) + if (EQUALN(pszDialect, "SQLite", 6) && !has_spatialite()) Rcpp::warning("spatialite not available"); hLayer = GDALDatasetExecuteSQL(hDataset, layer_in.c_str(), hGeom_filter, pszDialect); @@ -735,6 +734,9 @@ RCPP_MODULE(mod_GDALVector) { .constructor ("Usage: new(GDALVector, dsn, layer, read_only, open_options)") + .constructor, std::string> + ("Usage: new(GDALVector, dsn, layer, read_only, open_options, spatial_filter)") .constructor, std::string, std::string> diff --git a/tests/testthat/test-GDALVector-class.R b/tests/testthat/test-GDALVector-class.R index 2e1d33692..af5c4cbd2 100644 --- a/tests/testthat/test-GDALVector-class.R +++ b/tests/testthat/test-GDALVector-class.R @@ -26,13 +26,13 @@ test_that("class constructors work", { # spatial filter with SQL layer sql <- "SELECT FID, * FROM mtbs_perims" lyr <- new(GDALVector, dsn, sql, read_only = TRUE, open_options = NULL, - spatial_filter = bbox_to_wkt(bb), dialect = "") + spatial_filter = bbox_to_wkt(bb)) expect_equal(lyr$getFeatureCount(), 40) lyr$close() # add dialect lyr <- new(GDALVector, dsn, sql, read_only = TRUE, open_options = NULL, - spatial_filter = bbox_to_wkt(bb), dialect = "OGRSQL") + spatial_filter = bbox_to_wkt(bb), dialect = "") expect_equal(lyr$getFeatureCount(), 40) lyr$close() From a5517fabe8a02f08170c4efb2d487677e13ecb3d Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Fri, 17 May 2024 11:33:48 -0600 Subject: [PATCH 31/53] GDALVector::getFeature(): handle fid correctly for R numeric --- src/gdalvector.cpp | 10 ++++++---- tests/testthat/test-GDALVector-class.R | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 1ea137dde..9ca453a98 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -415,11 +415,13 @@ SEXP GDALVector::getFeature(Rcpp::NumericVector fid) { Rcpp::stop("'fid' must be a length-1 numeric vector (integer64)"); int64_t fid_in; - - if (Rcpp::isInteger64(fid)) + if (Rcpp::isInteger64(fid)) { fid_in = Rcpp::fromInteger64(fid[0]); - else - fid_in = static_cast(fid[0]); + } + else { + std::vector tmp = Rcpp::as>(fid); + fid_in = static_cast(tmp[0]); + } OGRFeatureH hFeature = OGR_L_GetFeature(hLayer, static_cast(fid_in)); diff --git a/tests/testthat/test-GDALVector-class.R b/tests/testthat/test-GDALVector-class.R index af5c4cbd2..dd7029b07 100644 --- a/tests/testthat/test-GDALVector-class.R +++ b/tests/testthat/test-GDALVector-class.R @@ -6,6 +6,7 @@ test_that("class constructors work", { lyr <- new(GDALVector, dsn) expect_equal(lyr$getName(), "mtbs_perims") + expect_type(lyr$getFeature(1), "list") lyr$close() lyr <- new(GDALVector, dsn, "mtbs_perims") From 509e95797818c427e66d3dc11083fd49b879cdbf Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sat, 15 Jun 2024 23:42:53 -0600 Subject: [PATCH 32/53] hFDefn no longer a member variable, code linting --- src/gdalvector.cpp | 101 ++++++++++++++++++++++++--------------------- src/gdalvector.h | 12 +++--- 2 files changed, 60 insertions(+), 53 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 9ca453a98..634d10f75 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -1,6 +1,9 @@ /* Implementation of class GDALVector. Encapsulates an OGRLayer and its GDALDataset. Requires {bit64} on the R side for its integer64 S3 type. - Chris Toney */ + + Chris Toney + Copyright (c) 2023-2024 gdalraster authors +*/ #include @@ -57,7 +60,7 @@ GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, eAccess(GA_ReadOnly), hLayer(nullptr) { - dsn_in = Rcpp::as(_check_gdal_filename(dsn)); + dsn_in = Rcpp::as(check_gdal_filename(dsn)); open(read_only); } @@ -134,14 +137,6 @@ void GDALVector::open(bool read_only) { OGR_L_ResetReading(hLayer); } - hFDefn = OGR_L_GetLayerDefn(hLayer); - if (hFDefn == nullptr) { - if (is_sql_in) - GDALDatasetReleaseResultSet(hDataset, hLayer); - GDALReleaseDataset(hDataset); - Rcpp::stop("failed to get layer definition"); - } - if (hGeom_filter != nullptr) OGR_G_DestroyGeometry(hGeom_filter); } @@ -158,7 +153,7 @@ std::string GDALVector::getDsn() const { } Rcpp::CharacterVector GDALVector::getFileList() const { - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); char **papszFiles; papszFiles = GDALGetFileList(hDataset); @@ -179,53 +174,53 @@ Rcpp::CharacterVector GDALVector::getFileList() const { } std::string GDALVector::getDriverShortName() const { - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); GDALDriverH hDriver = GDALGetDatasetDriver(hDataset); return GDALGetDriverShortName(hDriver); } std::string GDALVector::getDriverLongName() const { - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); GDALDriverH hDriver = GDALGetDatasetDriver(hDataset); return GDALGetDriverLongName(hDriver); } std::string GDALVector::getName() const { - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); return OGR_L_GetName(hLayer); } bool GDALVector::testCapability(std::string capability) const { - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); return OGR_L_TestCapability(hLayer, capability.c_str()); } std::string GDALVector::getFIDColumn() const { - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); return OGR_L_GetFIDColumn(hLayer); } std::string GDALVector::getGeomType() const { - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); OGRwkbGeometryType eType = OGR_L_GetGeomType(hLayer); return OGRGeometryTypeToName(eType); } std::string GDALVector::getGeometryColumn() const { - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); return OGR_L_GetGeometryColumn(hLayer); } std::string GDALVector::getSpatialRef() const { // OGRLayer::GetSpatialRef() as WKT string - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); OGRSpatialReferenceH hSRS = OGR_L_GetSpatialRef(hLayer); if (hSRS == nullptr) @@ -245,7 +240,7 @@ Rcpp::NumericVector GDALVector::bbox() { // see: testCapability("FastGetExtent") // Depending on the driver, a spatial filter may/may not be taken into // account. So it is safer to call bbox() without setting a spatial filter. - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); OGREnvelope envelope; if (OGR_L_GetExtent(hLayer, &envelope, true) != OGRERR_NONE) @@ -258,7 +253,12 @@ Rcpp::NumericVector GDALVector::bbox() { } Rcpp::List GDALVector::getLayerDefn() const { - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); + + OGRFeatureDefnH hFDefn; + hFDefn = OGR_L_GetLayerDefn(hLayer); + if (hFDefn == nullptr) + Rcpp::stop("failed to get layer definition"); Rcpp::List list_out = Rcpp::List::create(); std::string sValue; @@ -360,7 +360,7 @@ Rcpp::List GDALVector::getLayerDefn() const { } void GDALVector::setAttributeFilter(std::string query) { - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); const char* query_in = nullptr; if (query != "") @@ -371,7 +371,7 @@ void GDALVector::setAttributeFilter(std::string query) { } void GDALVector::setSpatialFilterRect(Rcpp::NumericVector bbox) { - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); if (Rcpp::any(Rcpp::is_na(bbox))) Rcpp::stop("'bbox' has one or more 'NA' values"); @@ -380,7 +380,7 @@ void GDALVector::setSpatialFilterRect(Rcpp::NumericVector bbox) { } void GDALVector::clearSpatialFilter() { - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); OGR_L_SetSpatialFilter(hLayer, nullptr); } @@ -390,17 +390,17 @@ double GDALVector::getFeatureCount() { // GDAL doc: Note that some implementations of this method may alter the // read cursor of the layer. // see: testCapability("FastFeatureCount") - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); return static_cast(OGR_L_GetFeatureCount(hLayer, true)); } SEXP GDALVector::getNextFeature() { - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); OGRFeatureH hFeature = OGR_L_GetNextFeature(hLayer); if (hFeature != nullptr) - return _featureToList(hFeature); + return featureToList_(hFeature); else return R_NilValue; } @@ -409,7 +409,7 @@ SEXP GDALVector::getFeature(Rcpp::NumericVector fid) { // fid must be an R numeric vector of length 1 // i.e., a scalar but use NumericVector here since it can carry the class // attribute for integer64 - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); if (fid.size() != 1) Rcpp::stop("'fid' must be a length-1 numeric vector (integer64)"); @@ -427,13 +427,13 @@ SEXP GDALVector::getFeature(Rcpp::NumericVector fid) { static_cast(fid_in)); if (hFeature != nullptr) - return _featureToList(hFeature); + return featureToList_(hFeature); else return R_NilValue; } void GDALVector::resetReading() { - _checkAccess(GA_ReadOnly); + checkAccess_(GA_ReadOnly); OGR_L_ResetReading(hLayer); } @@ -456,8 +456,8 @@ void GDALVector::layerIntersection( OGRErr err = OGR_L_Intersection( hLayer, - method_layer._getOGRLayerH(), - result_layer._getOGRLayerH(), + method_layer.getOGRLayerH_(), + result_layer.getOGRLayerH_(), opt_list.data(), quiet ? nullptr : GDALTermProgressR, nullptr); @@ -484,8 +484,8 @@ void GDALVector::layerUnion( OGRErr err = OGR_L_Union( hLayer, - method_layer._getOGRLayerH(), - result_layer._getOGRLayerH(), + method_layer.getOGRLayerH_(), + result_layer.getOGRLayerH_(), opt_list.data(), quiet ? nullptr : GDALTermProgressR, nullptr); @@ -512,8 +512,8 @@ void GDALVector::layerSymDifference( OGRErr err = OGR_L_SymDifference( hLayer, - method_layer._getOGRLayerH(), - result_layer._getOGRLayerH(), + method_layer.getOGRLayerH_(), + result_layer.getOGRLayerH_(), opt_list.data(), quiet ? nullptr : GDALTermProgressR, nullptr); @@ -540,8 +540,8 @@ void GDALVector::layerIdentity( OGRErr err = OGR_L_Identity( hLayer, - method_layer._getOGRLayerH(), - result_layer._getOGRLayerH(), + method_layer.getOGRLayerH_(), + result_layer.getOGRLayerH_(), opt_list.data(), quiet ? nullptr : GDALTermProgressR, nullptr); @@ -568,8 +568,8 @@ void GDALVector::layerUpdate( OGRErr err = OGR_L_Update( hLayer, - method_layer._getOGRLayerH(), - result_layer._getOGRLayerH(), + method_layer.getOGRLayerH_(), + result_layer.getOGRLayerH_(), opt_list.data(), quiet ? nullptr : GDALTermProgressR, nullptr); @@ -596,8 +596,8 @@ void GDALVector::layerClip( OGRErr err = OGR_L_Clip( hLayer, - method_layer._getOGRLayerH(), - result_layer._getOGRLayerH(), + method_layer.getOGRLayerH_(), + result_layer.getOGRLayerH_(), opt_list.data(), quiet ? nullptr : GDALTermProgressR, nullptr); @@ -624,8 +624,8 @@ void GDALVector::layerErase( OGRErr err = OGR_L_Erase( hLayer, - method_layer._getOGRLayerH(), - result_layer._getOGRLayerH(), + method_layer.getOGRLayerH_(), + result_layer.getOGRLayerH_(), opt_list.data(), quiet ? nullptr : GDALTermProgressR, nullptr); @@ -648,7 +648,7 @@ void GDALVector::close() { // class methods for internal use not exposed in R // **************************************************************************** -void GDALVector::_checkAccess(GDALAccess access_needed) const { +void GDALVector::checkAccess_(GDALAccess access_needed) const { if (!isOpen()) Rcpp::stop("dataset is not open"); @@ -656,13 +656,18 @@ void GDALVector::_checkAccess(GDALAccess access_needed) const { Rcpp::stop("dataset is read-only"); } -OGRLayerH GDALVector::_getOGRLayerH() const { - _checkAccess(GA_ReadOnly); +OGRLayerH GDALVector::getOGRLayerH_() const { + checkAccess_(GA_ReadOnly); return hLayer; } -Rcpp::List GDALVector::_featureToList(OGRFeatureH hFeature) const { +Rcpp::List GDALVector::featureToList_(OGRFeatureH hFeature) const { + OGRFeatureDefnH hFDefn; + hFDefn = OGR_L_GetLayerDefn(hLayer); + if (hFDefn == nullptr) + Rcpp::stop("failed to get layer definition"); + Rcpp::List list_out = Rcpp::List::create(); int i; diff --git a/src/gdalvector.h b/src/gdalvector.h index 9c35ff16b..14e194609 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -1,6 +1,9 @@ /* R interface to a subset of the GDAL C API for vector. A class for OGRLayer, a layer of features in a GDALDataset. https://gdal.org/api/vector_c_api.html - Chris Toney */ + + Chris Toney + Copyright (c) 2023-2024 gdalraster authors +*/ #ifndef SRC_GDALVECTOR_H_ #define SRC_GDALVECTOR_H_ @@ -28,7 +31,6 @@ class GDALVector { GDALDatasetH hDataset; GDALAccess eAccess; OGRLayerH hLayer; - OGRFeatureDefnH hFDefn; public: GDALVector(); @@ -107,9 +109,9 @@ class GDALVector { void close(); // methods for internal use not exported to R - void _checkAccess(GDALAccess access_needed) const; - OGRLayerH _getOGRLayerH() const; - Rcpp::List _featureToList(OGRFeatureH hFeature) const; + void checkAccess_(GDALAccess access_needed) const; + OGRLayerH getOGRLayerH_() const; + Rcpp::List featureToList_(OGRFeatureH hFeature) const; }; RCPP_EXPOSED_CLASS(GDALVector) From 6566fe260105366df874004c57a1c0f977bb007d Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sun, 16 Jun 2024 01:14:52 -0600 Subject: [PATCH 33/53] add GDALVector::initDF_() wip --- src/gdalvector.cpp | 54 +++++++++++++++++++++++++++++++++++++++++++++- src/gdalvector.h | 1 + 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 634d10f75..6b375fe67 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -10,7 +10,7 @@ #include "gdal.h" #include "cpl_port.h" #include "cpl_string.h" -#include "ogrsf_frmts.h" +// #include "ogrsf_frmts.h" #include "ogr_srs_api.h" #include "gdalraster.h" @@ -725,6 +725,58 @@ Rcpp::List GDALVector::featureToList_(OGRFeatureH hFeature) const { return list_out; } +Rcpp::DataFrame GDALVector::initDF_(R_xlen_t nrow) const { + OGRFeatureDefnH hFDefn; + hFDefn = OGR_L_GetLayerDefn(hLayer); + if (hFDefn == nullptr) + Rcpp::stop("failed to get layer definition"); + + Rcpp::DataFrame df_out = Rcpp::DataFrame::create(); + int i; + + std::vector fid_(nrow); + Rcpp::NumericVector fid = Rcpp::wrap(fid_); + df_out.push_back(fid, "FID"); + + for (i = 0; i < OGR_FD_GetFieldCount(hFDefn); ++i) { + OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); + if (hFieldDefn == nullptr) + Rcpp::stop("could not obtain field definition"); + + OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); + if (fld_type == OFTInteger) { + Rcpp::IntegerVector v(nrow); + df_out.push_back(v, OGR_Fld_GetNameRef(hFieldDefn)); + } + else if (fld_type == OFTInteger64) { + std::vector v_(nrow); + Rcpp::NumericVector v = Rcpp::wrap(v_); + df_out.push_back(v, OGR_Fld_GetNameRef(hFieldDefn)); + } + else if (fld_type == OFTReal) { + Rcpp::NumericVector v(nrow); + df_out.push_back(v, OGR_Fld_GetNameRef(hFieldDefn)); + } + else { + // TODO(ctoney): support date, time, binary, etc. + // read as string for now + Rcpp::CharacterVector v(nrow); + df_out.push_back(v, OGR_Fld_GetNameRef(hFieldDefn)); + } + } + + for (i = 0; i < OGR_FD_GetGeomFieldCount(hFDefn); ++i) { + OGRGeomFieldDefnH hGeomFldDefn = OGR_FD_GetGeomFieldDefn(hFDefn, i); + if (hGeomFldDefn == nullptr) + Rcpp::stop("could not obtain geometry field def"); + + Rcpp::CharacterVector v(nrow); + df_out.push_back(v, OGR_GFld_GetNameRef(hGeomFldDefn)); + } + + return df_out; +} + // **************************************************************************** RCPP_MODULE(mod_GDALVector) { diff --git a/src/gdalvector.h b/src/gdalvector.h index 14e194609..99aea10bc 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -112,6 +112,7 @@ class GDALVector { void checkAccess_(GDALAccess access_needed) const; OGRLayerH getOGRLayerH_() const; Rcpp::List featureToList_(OGRFeatureH hFeature) const; + Rcpp::DataFrame initDF_(R_xlen_t nrow) const; }; RCPP_EXPOSED_CLASS(GDALVector) From c560d582f8f274ecaa9d56e2adcbc2f3c82fd930 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sun, 16 Jun 2024 21:55:23 -0600 Subject: [PATCH 34/53] initDF_(), featureToList_() wip --- src/gdalvector.cpp | 68 ++++++++++++++++++++++++++++++---------------- src/gdalvector.h | 2 +- src/rcpp_util.h | 6 ++++ 3 files changed, 52 insertions(+), 24 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 6b375fe67..27507f603 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -679,62 +679,81 @@ Rcpp::List GDALVector::featureToList_(OGRFeatureH hFeature) const { if (hFieldDefn == nullptr) Rcpp::stop("could not obtain field definition"); + bool has_value = true; if (!OGR_F_IsFieldSet(hFeature, i) || OGR_F_IsFieldNull(hFeature, i)) { - continue; + has_value = false; } OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); if (fld_type == OFTInteger) { - int value = OGR_F_GetFieldAsInteger(hFeature, i); + int value = NA_INTEGER; + if (has_value) + value = OGR_F_GetFieldAsInteger(hFeature, i); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } else if (fld_type == OFTInteger64) { - int64_t value = static_cast( - OGR_F_GetFieldAsInteger64(hFeature, i)); + int64_t value = NA_INTEGER64; + if (has_value) + value = static_cast( + OGR_F_GetFieldAsInteger64(hFeature, i)); + list_out.push_back(Rcpp::toInteger64(value), - OGR_Fld_GetNameRef(hFieldDefn)); + OGR_Fld_GetNameRef(hFieldDefn)); } else if (fld_type == OFTReal) { - double value = OGR_F_GetFieldAsDouble(hFeature, i); + double value = NA_REAL; + if (has_value) + value = OGR_F_GetFieldAsDouble(hFeature, i); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } else { // TODO(ctoney): support date, time, binary, etc. // read as string for now - std::string value = OGR_F_GetFieldAsString(hFeature, i); + std::string value = ""; + if (has_value) + OGR_F_GetFieldAsString(hFeature, i); + list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } } for (i = 0; i < OGR_F_GetGeomFieldCount(hFeature); ++i) { - OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeature, i); - if (hGeom == nullptr) - Rcpp::stop("could not obtain geometry reference"); - char* pszWKT; - OGR_G_ExportToWkt(hGeom, &pszWKT); - std::string wkt(pszWKT); OGRGeomFieldDefnH hGeomFldDefn = OGR_F_GetGeomFieldDefnRef(hFeature, i); if (hGeomFldDefn == nullptr) Rcpp::stop("could not obtain geometry field def"); - list_out.push_back(wkt, OGR_GFld_GetNameRef(hGeomFldDefn)); - CPLFree(pszWKT); + + OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeature, i); + if (hGeom != nullptr) { + char* pszWKT; + OGR_G_ExportToWkt(hGeom, &pszWKT); + std::string wkt(pszWKT); + list_out.push_back(wkt, OGR_GFld_GetNameRef(hGeomFldDefn)); + CPLFree(pszWKT); + } + else { + list_out.push_back("", OGR_GFld_GetNameRef(hGeomFldDefn)); + } } return list_out; } -Rcpp::DataFrame GDALVector::initDF_(R_xlen_t nrow) const { +SEXP GDALVector::initDF_(R_xlen_t nrow) const { OGRFeatureDefnH hFDefn; hFDefn = OGR_L_GetLayerDefn(hLayer); if (hFDefn == nullptr) Rcpp::stop("failed to get layer definition"); - Rcpp::DataFrame df_out = Rcpp::DataFrame::create(); + // construct the output data frame as list + // it will be coerced to data frame at return + Rcpp::List df_out = Rcpp::List::create(); int i; - std::vector fid_(nrow); + std::vector fid_(nrow, NA_INTEGER64); Rcpp::NumericVector fid = Rcpp::wrap(fid_); df_out.push_back(fid, "FID"); @@ -745,22 +764,23 @@ Rcpp::DataFrame GDALVector::initDF_(R_xlen_t nrow) const { OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); if (fld_type == OFTInteger) { - Rcpp::IntegerVector v(nrow); + // TODO: handle boolean subtype + Rcpp::IntegerVector v(nrow, NA_INTEGER); df_out.push_back(v, OGR_Fld_GetNameRef(hFieldDefn)); } else if (fld_type == OFTInteger64) { - std::vector v_(nrow); + std::vector v_(nrow, NA_INTEGER64); Rcpp::NumericVector v = Rcpp::wrap(v_); df_out.push_back(v, OGR_Fld_GetNameRef(hFieldDefn)); } else if (fld_type == OFTReal) { - Rcpp::NumericVector v(nrow); + Rcpp::NumericVector v(nrow, NA_REAL); df_out.push_back(v, OGR_Fld_GetNameRef(hFieldDefn)); } else { // TODO(ctoney): support date, time, binary, etc. // read as string for now - Rcpp::CharacterVector v(nrow); + Rcpp::CharacterVector v(nrow, NA_STRING); df_out.push_back(v, OGR_Fld_GetNameRef(hFieldDefn)); } } @@ -770,10 +790,12 @@ Rcpp::DataFrame GDALVector::initDF_(R_xlen_t nrow) const { if (hGeomFldDefn == nullptr) Rcpp::stop("could not obtain geometry field def"); - Rcpp::CharacterVector v(nrow); + Rcpp::CharacterVector v(nrow, NA_STRING); df_out.push_back(v, OGR_GFld_GetNameRef(hGeomFldDefn)); } + df_out.attr("class") = "data.frame"; + df_out.attr("row.names") = Rcpp::seq_len(nrow); return df_out; } diff --git a/src/gdalvector.h b/src/gdalvector.h index 99aea10bc..cc5918ed2 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -112,7 +112,7 @@ class GDALVector { void checkAccess_(GDALAccess access_needed) const; OGRLayerH getOGRLayerH_() const; Rcpp::List featureToList_(OGRFeatureH hFeature) const; - Rcpp::DataFrame initDF_(R_xlen_t nrow) const; + SEXP initDF_(R_xlen_t nrow) const; }; RCPP_EXPOSED_CLASS(GDALVector) diff --git a/src/rcpp_util.h b/src/rcpp_util.h index 3a424a7ab..f4042917d 100644 --- a/src/rcpp_util.h +++ b/src/rcpp_util.h @@ -6,6 +6,8 @@ #ifndef SRC_RCPP_UTIL_H_ #define SRC_RCPP_UTIL_H_ +#include + #include #include @@ -13,6 +15,10 @@ #include #include +// as defined in the bit64 package src/integer64.h +#define NA_INTEGER64 LLONG_MIN +#define ISNA_INTEGER64(X)((X) == NA_INTEGER64) + Rcpp::NumericMatrix df_to_matrix_(const Rcpp::DataFrame& df); Rcpp::IntegerMatrix df_to_int_matrix_(const Rcpp::DataFrame& df); Rcpp::CharacterVector path_expand_(Rcpp::CharacterVector path); From ed21845ef164c29446bde517f846fad6c84b9f15 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sat, 22 Jun 2024 23:04:31 -0600 Subject: [PATCH 35/53] GDALVector::fetch() initial commit --- src/gdalvector.cpp | 137 ++++++++++++++++++++++++++++++++++++++------- src/gdalvector.h | 2 + 2 files changed, 119 insertions(+), 20 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 27507f603..dbe9dc489 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -5,6 +5,7 @@ Copyright (c) 2023-2024 gdalraster authors */ +#include #include #include "gdal.h" @@ -398,9 +399,9 @@ double GDALVector::getFeatureCount() { SEXP GDALVector::getNextFeature() { checkAccess_(GA_ReadOnly); - OGRFeatureH hFeature = OGR_L_GetNextFeature(hLayer); - if (hFeature != nullptr) - return featureToList_(hFeature); + OGRFeatureH hFeat = OGR_L_GetNextFeature(hLayer); + if (hFeat != nullptr) + return featureToList_(hFeat); else return R_NilValue; } @@ -423,11 +424,11 @@ SEXP GDALVector::getFeature(Rcpp::NumericVector fid) { fid_in = static_cast(tmp[0]); } - OGRFeatureH hFeature = OGR_L_GetFeature(hLayer, + OGRFeatureH hFeat = OGR_L_GetFeature(hLayer, static_cast(fid_in)); - if (hFeature != nullptr) - return featureToList_(hFeature); + if (hFeat != nullptr) + return featureToList_(hFeat); else return R_NilValue; } @@ -438,6 +439,99 @@ void GDALVector::resetReading() { OGR_L_ResetReading(hLayer); } +Rcpp::DataFrame GDALVector::fetch(double n) { + // Analog of DBI::dbFetch(), mostly following its specification: + // https://dbi.r-dbi.org/reference/dbFetch.html#specification + // n should be passed as a whole number (integer or numeric). A value of + // Inf for the n argument is supported and also returns the full result. + + OGRFeatureDefnH hFDefn = nullptr; + hFDefn = OGR_L_GetLayerDefn(hLayer); + if (hFDefn == nullptr) + Rcpp::stop("failed to get layer definition"); + + bool fetch_all = true; + double fetch_num = 0; + if (n >= 0 && std::isfinite(n)) { + fetch_all = false; + fetch_num = std::trunc(n); + } + else { + fetch_num = getFeatureCount(); + } + + Rcpp::DataFrame df = initDF_(static_cast(fetch_num)); + + if (fetch_num == 0 || std::isnan(n)) + return df; + + OGRFeatureH hFeat = nullptr; + double fetch_count = 0; + + while ((hFeat = OGR_L_GetNextFeature(hLayer)) != nullptr) { + R_xlen_t row = static_cast(fetch_count); + + int64_t fid = static_cast(OGR_F_GetFID(hFeat)); + Rcpp::NumericVector fid_col = df[0]; + fid_col(row) = Rcpp::toInteger64(fid)[0]; + + for (int i = 0; i < OGR_FD_GetFieldCount(hFDefn); ++i) { + OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); + if (hFieldDefn == nullptr) + Rcpp::stop("could not obtain field definition"); + + bool has_value = true; + if (!OGR_F_IsFieldSet(hFeat, i) || + OGR_F_IsFieldNull(hFeat, i)) { + has_value = false; + } + + OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); + if (fld_type == OFTInteger) { + int value = NA_INTEGER; + if (has_value) + value = OGR_F_GetFieldAsInteger(hFeat, i); + + Rcpp::IntegerVector col = df[i + 1]; + col(row) = value; + } + else if (fld_type == OFTInteger64) { + int64_t value = NA_INTEGER64; + if (has_value) + value = static_cast( + OGR_F_GetFieldAsInteger64(hFeat, i)); + + Rcpp::NumericVector col = df[i + 1]; + col(row) = Rcpp::toInteger64(value)[0]; + } + else if (fld_type == OFTReal) { + double value = NA_REAL; + if (has_value) + value = OGR_F_GetFieldAsDouble(hFeat, i); + + Rcpp::NumericVector col = df[i + 1]; + col(row) = value; + } + else { + // TODO(ctoney): support date, time, binary, etc. + // read as string for now + std::string value = ""; + if (has_value) + value = OGR_F_GetFieldAsString(hFeat, i); + + Rcpp::CharacterVector col = df[i + 1]; + col(row) = value; + } + } + + fetch_count += 1; + if (!fetch_all && (fetch_count == fetch_num)) + break; + } + + return df; +} + void GDALVector::layerIntersection( GDALVector method_layer, GDALVector result_layer, @@ -662,8 +756,8 @@ OGRLayerH GDALVector::getOGRLayerH_() const { return hLayer; } -Rcpp::List GDALVector::featureToList_(OGRFeatureH hFeature) const { - OGRFeatureDefnH hFDefn; +Rcpp::List GDALVector::featureToList_(OGRFeatureH hFeat) const { + OGRFeatureDefnH hFDefn = nullptr; hFDefn = OGR_L_GetLayerDefn(hLayer); if (hFDefn == nullptr) Rcpp::stop("failed to get layer definition"); @@ -671,7 +765,7 @@ Rcpp::List GDALVector::featureToList_(OGRFeatureH hFeature) const { Rcpp::List list_out = Rcpp::List::create(); int i; - int64_t FID = static_cast(OGR_F_GetFID(hFeature)); + int64_t FID = static_cast(OGR_F_GetFID(hFeat)); list_out.push_back(Rcpp::toInteger64(FID), "FID"); for (i = 0; i < OGR_FD_GetFieldCount(hFDefn); ++i) { @@ -680,8 +774,8 @@ Rcpp::List GDALVector::featureToList_(OGRFeatureH hFeature) const { Rcpp::stop("could not obtain field definition"); bool has_value = true; - if (!OGR_F_IsFieldSet(hFeature, i) || - OGR_F_IsFieldNull(hFeature, i)) { + if (!OGR_F_IsFieldSet(hFeat, i) || + OGR_F_IsFieldNull(hFeat, i)) { has_value = false; } @@ -689,7 +783,7 @@ Rcpp::List GDALVector::featureToList_(OGRFeatureH hFeature) const { if (fld_type == OFTInteger) { int value = NA_INTEGER; if (has_value) - value = OGR_F_GetFieldAsInteger(hFeature, i); + value = OGR_F_GetFieldAsInteger(hFeat, i); list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } @@ -697,7 +791,7 @@ Rcpp::List GDALVector::featureToList_(OGRFeatureH hFeature) const { int64_t value = NA_INTEGER64; if (has_value) value = static_cast( - OGR_F_GetFieldAsInteger64(hFeature, i)); + OGR_F_GetFieldAsInteger64(hFeat, i)); list_out.push_back(Rcpp::toInteger64(value), OGR_Fld_GetNameRef(hFieldDefn)); @@ -705,7 +799,7 @@ Rcpp::List GDALVector::featureToList_(OGRFeatureH hFeature) const { else if (fld_type == OFTReal) { double value = NA_REAL; if (has_value) - value = OGR_F_GetFieldAsDouble(hFeature, i); + value = OGR_F_GetFieldAsDouble(hFeat, i); list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } @@ -714,19 +808,19 @@ Rcpp::List GDALVector::featureToList_(OGRFeatureH hFeature) const { // read as string for now std::string value = ""; if (has_value) - OGR_F_GetFieldAsString(hFeature, i); + value = OGR_F_GetFieldAsString(hFeat, i); list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); } } - for (i = 0; i < OGR_F_GetGeomFieldCount(hFeature); ++i) { + for (i = 0; i < OGR_F_GetGeomFieldCount(hFeat); ++i) { OGRGeomFieldDefnH hGeomFldDefn = - OGR_F_GetGeomFieldDefnRef(hFeature, i); + OGR_F_GetGeomFieldDefnRef(hFeat, i); if (hGeomFldDefn == nullptr) Rcpp::stop("could not obtain geometry field def"); - OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeature, i); + OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeat, i); if (hGeom != nullptr) { char* pszWKT; OGR_G_ExportToWkt(hGeom, &pszWKT); @@ -743,7 +837,8 @@ Rcpp::List GDALVector::featureToList_(OGRFeatureH hFeature) const { } SEXP GDALVector::initDF_(R_xlen_t nrow) const { - OGRFeatureDefnH hFDefn; + // initialize a data frame with nrow rows for the layer definition + OGRFeatureDefnH hFDefn = nullptr; hFDefn = OGR_L_GetLayerDefn(hLayer); if (hFDefn == nullptr) Rcpp::stop("failed to get layer definition"); @@ -778,7 +873,7 @@ SEXP GDALVector::initDF_(R_xlen_t nrow) const { df_out.push_back(v, OGR_Fld_GetNameRef(hFieldDefn)); } else { - // TODO(ctoney): support date, time, binary, etc. + // TODO: support date, time, binary, etc. // read as string for now Rcpp::CharacterVector v(nrow, NA_STRING); df_out.push_back(v, OGR_Fld_GetNameRef(hFieldDefn)); @@ -866,6 +961,8 @@ RCPP_MODULE(mod_GDALVector) { "Fetch a feature by its identifier") .method("resetReading", &GDALVector::resetReading, "Reset feature reading to start on the first feature") + .method("fetch", &GDALVector::fetch, + "Fetch a set features as a data frame") .method("layerIntersection", &GDALVector::layerIntersection, "Intersection of this layer with a method layer") .method("layerUnion", &GDALVector::layerUnion, diff --git a/src/gdalvector.h b/src/gdalvector.h index cc5918ed2..6f907550d 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -70,6 +70,8 @@ class GDALVector { SEXP getFeature(Rcpp::NumericVector fid); void resetReading(); + Rcpp::DataFrame fetch(double n); + void layerIntersection( GDALVector method_layer, GDALVector result_layer, From 769a2829b8ee4cae3a750578548436dcda19f118 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sun, 23 Jun 2024 23:38:24 -0600 Subject: [PATCH 36/53] GDALVector::fetch() wip --- src/gdalvector.cpp | 98 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 77 insertions(+), 21 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index dbe9dc489..e3ff7c818 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -5,6 +5,7 @@ Copyright (c) 2023-2024 gdalraster authors */ +#include #include #include @@ -445,6 +446,8 @@ Rcpp::DataFrame GDALVector::fetch(double n) { // n should be passed as a whole number (integer or numeric). A value of // Inf for the n argument is supported and also returns the full result. + checkAccess_(GA_ReadOnly); + OGRFeatureDefnH hFDefn = nullptr; hFDefn = OGR_L_GetLayerDefn(hLayer); if (hFDefn == nullptr) @@ -525,11 +528,57 @@ Rcpp::DataFrame GDALVector::fetch(double n) { } fetch_count += 1; - if (!fetch_all && (fetch_count == fetch_num)) + if (fetch_count == fetch_num) break; } - return df; + if (fetch_all) { + if (OGR_L_GetNextFeature(hLayer) != nullptr) { + Rcpp::Rcout << "getFeatureCount() returned " << fetch_count + << std::endl; + std::string msg = "more features potentially available "; + msg += "than reported by getFeatureCount()"; + Rcpp::warning(msg); + } + } + + if (fetch_count < fetch_num) { + R_xlen_t ncopy = static_cast(fetch_count); + Rcpp::DataFrame df_trunc = initDF_(ncopy); + Rcpp::NumericVector fid_col = df[0]; + Rcpp::NumericVector fid_col_trunc = df_trunc[0]; + std::copy_n(fid_col.cbegin(), ncopy, fid_col_trunc.begin()); + + for (int i = 0; i < OGR_FD_GetFieldCount(hFDefn); ++i) { + OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); + OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); + if (fld_type == OFTInteger) { + Rcpp::IntegerVector col = df[i + 1]; + Rcpp::IntegerVector col_trunc = df_trunc[i + 1]; + std::copy_n(col.cbegin(), ncopy, col_trunc.begin()); + } + else if (fld_type == OFTInteger64) { + Rcpp::NumericVector col = df[i + 1]; + Rcpp::NumericVector col_trunc = df_trunc[i + 1]; + std::copy_n(col.cbegin(), ncopy, col_trunc.begin()); + } + else if (fld_type == OFTReal) { + Rcpp::NumericVector col = df[i + 1]; + Rcpp::NumericVector col_trunc = df_trunc[i + 1]; + std::copy_n(col.cbegin(), ncopy, col_trunc.begin()); + } + else { + Rcpp::CharacterVector col = df[i + 1]; + Rcpp::CharacterVector col_trunc = df_trunc[i + 1]; + std::copy_n(col.cbegin(), ncopy, col_trunc.begin()); + } + } + + return df_trunc; + } + else { + return df; + } } void GDALVector::layerIntersection( @@ -843,16 +892,18 @@ SEXP GDALVector::initDF_(R_xlen_t nrow) const { if (hFDefn == nullptr) Rcpp::stop("failed to get layer definition"); - // construct the output data frame as list - // it will be coerced to data frame at return - Rcpp::List df_out = Rcpp::List::create(); - int i; + int nFields = OGR_FD_GetFieldCount(hFDefn); + int nGeomFields = OGR_FD_GetGeomFieldCount(hFDefn); - std::vector fid_(nrow, NA_INTEGER64); - Rcpp::NumericVector fid = Rcpp::wrap(fid_); - df_out.push_back(fid, "FID"); + // construct the data frame as list and convert at return + Rcpp::List df(1 + nFields + nGeomFields); + Rcpp::CharacterVector col_names(1 + nFields + nGeomFields); - for (i = 0; i < OGR_FD_GetFieldCount(hFDefn); ++i) { + std::vector fid(nrow, NA_INTEGER64); + df[0] = Rcpp::wrap(fid); + col_names[0] = "FID"; + + for (int i = 0; i < nFields; ++i) { OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); if (hFieldDefn == nullptr) Rcpp::stop("could not obtain field definition"); @@ -861,37 +912,42 @@ SEXP GDALVector::initDF_(R_xlen_t nrow) const { if (fld_type == OFTInteger) { // TODO: handle boolean subtype Rcpp::IntegerVector v(nrow, NA_INTEGER); - df_out.push_back(v, OGR_Fld_GetNameRef(hFieldDefn)); + df[i + 1] = v; + col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); } else if (fld_type == OFTInteger64) { - std::vector v_(nrow, NA_INTEGER64); - Rcpp::NumericVector v = Rcpp::wrap(v_); - df_out.push_back(v, OGR_Fld_GetNameRef(hFieldDefn)); + std::vector v(nrow, NA_INTEGER64); + df[i + 1] = Rcpp::wrap(v); + col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); } else if (fld_type == OFTReal) { Rcpp::NumericVector v(nrow, NA_REAL); - df_out.push_back(v, OGR_Fld_GetNameRef(hFieldDefn)); + df[i + 1] = v; + col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); } else { // TODO: support date, time, binary, etc. // read as string for now Rcpp::CharacterVector v(nrow, NA_STRING); - df_out.push_back(v, OGR_Fld_GetNameRef(hFieldDefn)); + df[i + 1] = v; + col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); } } - for (i = 0; i < OGR_FD_GetGeomFieldCount(hFDefn); ++i) { + for (int i = 0; i < nGeomFields; ++i) { OGRGeomFieldDefnH hGeomFldDefn = OGR_FD_GetGeomFieldDefn(hFDefn, i); if (hGeomFldDefn == nullptr) Rcpp::stop("could not obtain geometry field def"); Rcpp::CharacterVector v(nrow, NA_STRING); - df_out.push_back(v, OGR_GFld_GetNameRef(hGeomFldDefn)); + df[i + 1 + nFields] = v; + col_names[i + 1 + nFields] = OGR_GFld_GetNameRef(hGeomFldDefn); } - df_out.attr("class") = "data.frame"; - df_out.attr("row.names") = Rcpp::seq_len(nrow); - return df_out; + df.names() = col_names; + df.attr("class") = "data.frame"; + df.attr("row.names") = Rcpp::seq_len(nrow); + return df; } // **************************************************************************** From 60440f77fbb80ebd63da51b5d0e0feef67a04186 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sun, 30 Jun 2024 00:21:35 -0600 Subject: [PATCH 37/53] GDALVector::fetch() wip --- src/gdalvector.cpp | 158 ++++++++++++++++++++++++++++++++++----------- src/gdalvector.h | 4 ++ 2 files changed, 124 insertions(+), 38 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index e3ff7c818..f34f1b6d9 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -352,8 +352,11 @@ Rcpp::List GDALVector::getLayerDefn() const { bValue = true; list_geom_fld_defn.push_back(bValue, "is_geom"); - list_out.push_back(list_geom_fld_defn, - OGR_GFld_GetNameRef(hGeomFldDefn)); + std::string geomFldName(OGR_GFld_GetNameRef(hGeomFldDefn)); + if (geomFldName == "") + geomFldName = defaultGeomFldName; + + list_out.push_back(list_geom_fld_defn, geomFldName); CPLFree(pszSRS_WKT); } @@ -400,6 +403,7 @@ double GDALVector::getFeatureCount() { SEXP GDALVector::getNextFeature() { checkAccess_(GA_ReadOnly); + // TODO: OGR_F_Destroy()? OGRFeatureH hFeat = OGR_L_GetNextFeature(hLayer); if (hFeat != nullptr) return featureToList_(hFeat); @@ -426,8 +430,9 @@ SEXP GDALVector::getFeature(Rcpp::NumericVector fid) { } OGRFeatureH hFeat = OGR_L_GetFeature(hLayer, - static_cast(fid_in)); + static_cast(fid_in)); + // TODO: OGR_F_Destroy()? if (hFeat != nullptr) return featureToList_(hFeat); else @@ -454,40 +459,47 @@ Rcpp::DataFrame GDALVector::fetch(double n) { Rcpp::stop("failed to get layer definition"); bool fetch_all = true; - double fetch_num = 0; - if (n >= 0 && std::isfinite(n)) { + size_t fetch_num = 0; + if (n == -1 || (n > 0 && std::isinf(n))) { + resetReading(); + fetch_num = OGR_L_GetFeatureCount(hLayer, true); + } + else if (Rcpp::NumericVector::is_na(n)) { + fetch_num = OGR_L_GetFeatureCount(hLayer, true); + } + else if (n >= 0 && std::isfinite(n)) { + if (n > 9007199254740992) + Rcpp::stop("out-of-range value given for 'n'"); + fetch_all = false; - fetch_num = std::trunc(n); + fetch_num = static_cast(std::trunc(n)); } else { - fetch_num = getFeatureCount(); + Rcpp::stop("invalid value given for 'n'"); } - Rcpp::DataFrame df = initDF_(static_cast(fetch_num)); - - if (fetch_num == 0 || std::isnan(n)) + Rcpp::DataFrame df = initDF_(fetch_num); + if (fetch_num == 0) return df; OGRFeatureH hFeat = nullptr; - double fetch_count = 0; + size_t fetch_count = 0; + int nFields = OGR_FD_GetFieldCount(hFDefn); + int nGeomFields = OGR_FD_GetGeomFieldCount(hFDefn); while ((hFeat = OGR_L_GetNextFeature(hLayer)) != nullptr) { - R_xlen_t row = static_cast(fetch_count); - int64_t fid = static_cast(OGR_F_GetFID(hFeat)); Rcpp::NumericVector fid_col = df[0]; - fid_col(row) = Rcpp::toInteger64(fid)[0]; + fid_col[fetch_count] = Rcpp::toInteger64(fid)[0]; - for (int i = 0; i < OGR_FD_GetFieldCount(hFDefn); ++i) { + for (int i = 0; i < nFields; ++i) { OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); if (hFieldDefn == nullptr) Rcpp::stop("could not obtain field definition"); bool has_value = true; - if (!OGR_F_IsFieldSet(hFeat, i) || - OGR_F_IsFieldNull(hFeat, i)) { + if (!OGR_F_IsFieldSet(hFeat, i) || OGR_F_IsFieldNull(hFeat, i)) has_value = false; - } OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); if (fld_type == OFTInteger) { @@ -496,7 +508,7 @@ Rcpp::DataFrame GDALVector::fetch(double n) { value = OGR_F_GetFieldAsInteger(hFeat, i); Rcpp::IntegerVector col = df[i + 1]; - col(row) = value; + col[fetch_count] = value; } else if (fld_type == OFTInteger64) { int64_t value = NA_INTEGER64; @@ -505,7 +517,7 @@ Rcpp::DataFrame GDALVector::fetch(double n) { OGR_F_GetFieldAsInteger64(hFeat, i)); Rcpp::NumericVector col = df[i + 1]; - col(row) = Rcpp::toInteger64(value)[0]; + col[fetch_count] = Rcpp::toInteger64(value)[0]; } else if (fld_type == OFTReal) { double value = NA_REAL; @@ -513,7 +525,7 @@ Rcpp::DataFrame GDALVector::fetch(double n) { value = OGR_F_GetFieldAsDouble(hFeat, i); Rcpp::NumericVector col = df[i + 1]; - col(row) = value; + col[fetch_count] = value; } else { // TODO(ctoney): support date, time, binary, etc. @@ -523,7 +535,56 @@ Rcpp::DataFrame GDALVector::fetch(double n) { value = OGR_F_GetFieldAsString(hFeat, i); Rcpp::CharacterVector col = df[i + 1]; - col(row) = value; + col[fetch_count] = value; + } + } + + for (int i = 0; i < nGeomFields; ++i) { + OGRGeomFieldDefnH hGeomFldDefn = + OGR_F_GetGeomFieldDefnRef(hFeat, i); + if (hGeomFldDefn == nullptr) + Rcpp::stop("could not obtain geometry field def"); + + if (EQUALN(returnGeomAs.c_str(), "TYPE_ONLY", 9)) { + OGRwkbGeometryType eType = OGR_GFld_GetType(hGeomFldDefn); + Rcpp::CharacterVector col = df[nFields + 1 + i]; + col[fetch_count] = OGRGeometryTypeToName(eType);; + } + else if (STARTS_WITH_CI(returnGeomAs.c_str(), "WKB")) { + OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeat, i); + if (hGeom != nullptr) { + const int nWKBSize = OGR_G_WkbSize(hGeom); + OGRwkbByteOrder eOrder = wkbNDR; + if (EQUALN(wkbByteOrder.c_str(), "MSB", 3)) + eOrder = wkbXDR; + + if (nWKBSize) { + Rcpp::RawVector wkb(nWKBSize); + if (EQUALN(returnGeomAs.c_str(), "WKB_ISO", 7)) + OGR_G_ExportToIsoWkb(hGeom, eOrder, &wkb[0]); + else + OGR_G_ExportToWkb(hGeom, eOrder, &wkb[0]); + + Rcpp::List col = df[nFields + 1 + i]; + col[fetch_count] = wkb; + } + } + } + else if (EQUALN(returnGeomAs.c_str(), "WKT", 3)) { + Rcpp::CharacterVector col = df[nFields + 1 + i]; + OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeat, i); + if (hGeom != nullptr) { + char* pszWKT; + OGR_G_ExportToWkt(hGeom, &pszWKT); + col[fetch_count] = pszWKT; + CPLFree(pszWKT); + } + else { + col[fetch_count] = NA_STRING; + } + } + else { + Rcpp::stop("invalid value of field 'returnGeomAs'"); } } @@ -542,12 +603,16 @@ Rcpp::DataFrame GDALVector::fetch(double n) { } } - if (fetch_count < fetch_num) { - R_xlen_t ncopy = static_cast(fetch_count); - Rcpp::DataFrame df_trunc = initDF_(ncopy); + if (fetch_count == fetch_num) { + return df; + } + else { + // truncate the data frame by copying to a new one + // hard to avoid copy here since Rcpp vectors cannot be resized + Rcpp::DataFrame df_trunc = initDF_(fetch_count); Rcpp::NumericVector fid_col = df[0]; Rcpp::NumericVector fid_col_trunc = df_trunc[0]; - std::copy_n(fid_col.cbegin(), ncopy, fid_col_trunc.begin()); + std::copy_n(fid_col.cbegin(), fetch_count, fid_col_trunc.begin()); for (int i = 0; i < OGR_FD_GetFieldCount(hFDefn); ++i) { OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); @@ -555,30 +620,27 @@ Rcpp::DataFrame GDALVector::fetch(double n) { if (fld_type == OFTInteger) { Rcpp::IntegerVector col = df[i + 1]; Rcpp::IntegerVector col_trunc = df_trunc[i + 1]; - std::copy_n(col.cbegin(), ncopy, col_trunc.begin()); + std::copy_n(col.cbegin(), fetch_count, col_trunc.begin()); } else if (fld_type == OFTInteger64) { Rcpp::NumericVector col = df[i + 1]; Rcpp::NumericVector col_trunc = df_trunc[i + 1]; - std::copy_n(col.cbegin(), ncopy, col_trunc.begin()); + std::copy_n(col.cbegin(), fetch_count, col_trunc.begin()); } else if (fld_type == OFTReal) { Rcpp::NumericVector col = df[i + 1]; Rcpp::NumericVector col_trunc = df_trunc[i + 1]; - std::copy_n(col.cbegin(), ncopy, col_trunc.begin()); + std::copy_n(col.cbegin(), fetch_count, col_trunc.begin()); } else { Rcpp::CharacterVector col = df[i + 1]; Rcpp::CharacterVector col_trunc = df_trunc[i + 1]; - std::copy_n(col.cbegin(), ncopy, col_trunc.begin()); + std::copy_n(col.cbegin(), fetch_count, col_trunc.begin()); } } return df_trunc; } - else { - return df; - } } void GDALVector::layerIntersection( @@ -869,16 +931,20 @@ Rcpp::List GDALVector::featureToList_(OGRFeatureH hFeat) const { if (hGeomFldDefn == nullptr) Rcpp::stop("could not obtain geometry field def"); + std::string geomFldName(OGR_GFld_GetNameRef(hGeomFldDefn)); + if (geomFldName == "") + geomFldName = defaultGeomFldName; + OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeat, i); if (hGeom != nullptr) { char* pszWKT; OGR_G_ExportToWkt(hGeom, &pszWKT); std::string wkt(pszWKT); - list_out.push_back(wkt, OGR_GFld_GetNameRef(hGeomFldDefn)); + list_out.push_back(wkt, geomFldName); CPLFree(pszWKT); } else { - list_out.push_back("", OGR_GFld_GetNameRef(hGeomFldDefn)); + list_out.push_back("", geomFldName); } } @@ -939,9 +1005,20 @@ SEXP GDALVector::initDF_(R_xlen_t nrow) const { if (hGeomFldDefn == nullptr) Rcpp::stop("could not obtain geometry field def"); - Rcpp::CharacterVector v(nrow, NA_STRING); - df[i + 1 + nFields] = v; - col_names[i + 1 + nFields] = OGR_GFld_GetNameRef(hGeomFldDefn); + if (STARTS_WITH_CI(returnGeomAs.c_str(), "WKB")) { + Rcpp::List v(nrow); + df[i + 1 + nFields] = v; + } + else { + Rcpp::CharacterVector v(nrow, NA_STRING); + df[i + 1 + nFields] = v; + } + + std::string geomFldName(OGR_GFld_GetNameRef(hGeomFldDefn)); + if (geomFldName == "") + geomFldName = defaultGeomFldName; + + col_names[i + 1 + nFields] = geomFldName; } df.names() = col_names; @@ -974,6 +1051,11 @@ RCPP_MODULE(mod_GDALVector) { std::string> ("Usage: new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect)") + // exposed read/write fields + .field("defaultGeomFldName", &GDALVector::defaultGeomFldName) + .field("returnGeomAs", &GDALVector::returnGeomAs) + .field("wkbByteOrder", &GDALVector::wkbByteOrder) + // exposed member functions .const_method("getDsn", &GDALVector::getDsn, "Return the DSN") diff --git a/src/gdalvector.h b/src/gdalvector.h index 6f907550d..3e8db4a0a 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -43,6 +43,10 @@ class GDALVector { Rcpp::Nullable open_options, std::string spatial_filter, std::string dialect); + std::string defaultGeomFldName = "geometry"; + std::string returnGeomAs = "WKB"; + std::string wkbByteOrder = "LSB"; + void open(bool read_only); bool isOpen() const; std::string getDsn() const; From e09e519e66e0090b0c579cb4d120618d4e5af848 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Mon, 1 Jul 2024 00:02:00 -0600 Subject: [PATCH 38/53] GDALVector::fetch() wip --- src/gdalvector.cpp | 286 +++++++++++++++++++++++++++++++-------------- 1 file changed, 197 insertions(+), 89 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index f34f1b6d9..f0ca4f554 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -12,6 +12,7 @@ #include "gdal.h" #include "cpl_port.h" #include "cpl_string.h" +#include "cpl_time.h" // #include "ogrsf_frmts.h" #include "ogr_srs_api.h" @@ -121,7 +122,7 @@ void GDALVector::open(bool read_only) { } else if (STARTS_WITH_CI(layer_in.c_str(), "SELECT ")) { is_sql_in = true; - if (EQUALN(pszDialect, "SQLite", 6) && !has_spatialite()) + if (EQUAL(pszDialect, "SQLite") && !has_spatialite()) Rcpp::warning("spatialite not available"); hLayer = GDALDatasetExecuteSQL(hDataset, layer_in.c_str(), hGeom_filter, pszDialect); @@ -469,13 +470,13 @@ Rcpp::DataFrame GDALVector::fetch(double n) { } else if (n >= 0 && std::isfinite(n)) { if (n > 9007199254740992) - Rcpp::stop("out-of-range value given for 'n'"); + Rcpp::stop("'n' is out of range"); fetch_all = false; fetch_num = static_cast(std::trunc(n)); } else { - Rcpp::stop("invalid value given for 'n'"); + Rcpp::stop("'n' is invalid"); } Rcpp::DataFrame df = initDF_(fetch_num); @@ -483,14 +484,33 @@ Rcpp::DataFrame GDALVector::fetch(double n) { return df; OGRFeatureH hFeat = nullptr; - size_t fetch_count = 0; + size_t row_num = 0; int nFields = OGR_FD_GetFieldCount(hFDefn); int nGeomFields = OGR_FD_GetGeomFieldCount(hFDefn); + bool include_geom = true; + if (EQUAL(returnGeomAs.c_str(), "NONE")) { + include_geom = false; + } + else if (!(EQUAL(returnGeomAs.c_str(), "WKB") || + EQUAL(returnGeomAs.c_str(), "WKB_ISO") || + EQUAL(returnGeomAs.c_str(), "WKT") || + EQUAL(returnGeomAs.c_str(), "WKT_ISO") || + EQUAL(returnGeomAs.c_str(), "TYPE_NAME"))) { + Rcpp::stop("unrecognized value of field 'returnGeomAs'"); + } + + OGRwkbByteOrder eOrder; + if (EQUAL(wkbByteOrder.c_str(), "LSB")) + eOrder = wkbNDR; + else if (EQUAL(wkbByteOrder.c_str(), "MSB")) + eOrder = wkbXDR; + else + Rcpp::stop("invalid value of field 'wkbByteOrder'"); while ((hFeat = OGR_L_GetNextFeature(hLayer)) != nullptr) { - int64_t fid = static_cast(OGR_F_GetFID(hFeat)); + const int64_t fid = static_cast(OGR_F_GetFID(hFeat)); Rcpp::NumericVector fid_col = df[0]; - fid_col[fetch_count] = Rcpp::toInteger64(fid)[0]; + fid_col[row_num] = Rcpp::toInteger64(fid)[0]; for (int i = 0; i < nFields; ++i) { OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); @@ -502,100 +522,142 @@ Rcpp::DataFrame GDALVector::fetch(double n) { has_value = false; OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); - if (fld_type == OFTInteger) { - int value = NA_INTEGER; - if (has_value) - value = OGR_F_GetFieldAsInteger(hFeat, i); + if (fld_type == OFTInteger && has_value) { Rcpp::IntegerVector col = df[i + 1]; - col[fetch_count] = value; + col[row_num] = OGR_F_GetFieldAsInteger(hFeat, i); } - else if (fld_type == OFTInteger64) { - int64_t value = NA_INTEGER64; - if (has_value) - value = static_cast( - OGR_F_GetFieldAsInteger64(hFeat, i)); + else if (fld_type == OFTInteger64 && has_value) { + const int64_t value = static_cast( + OGR_F_GetFieldAsInteger64(hFeat, i)); Rcpp::NumericVector col = df[i + 1]; - col[fetch_count] = Rcpp::toInteger64(value)[0]; + col[row_num] = Rcpp::toInteger64(value)[0]; } - else if (fld_type == OFTReal) { - double value = NA_REAL; - if (has_value) - value = OGR_F_GetFieldAsDouble(hFeat, i); - + else if (fld_type == OFTReal && has_value) { Rcpp::NumericVector col = df[i + 1]; - col[fetch_count] = value; + col[row_num] = OGR_F_GetFieldAsDouble(hFeat, i); + } + else if ((fld_type == OFTDate || fld_type == OFTDateTime) + && has_value) { + + int yr = 9999; + int mo, day = 9; + int hr, min, sec, tzflag = 0; + if (OGR_F_GetFieldAsDateTime(hFeat, i, &yr, &mo, &day, + &hr, &min, &sec, &tzflag)) { + + struct tm brokendowntime; + brokendowntime.tm_year = yr - 1900; + brokendowntime.tm_mon = mo - 1; + brokendowntime.tm_mday = day; + brokendowntime.tm_hour = hr; + brokendowntime.tm_min = min; + brokendowntime.tm_sec = sec; + int64_t nUnixTime = CPLYMDHMSToUnixTime(&brokendowntime); + Rcpp::NumericVector col = df[i + 1]; + if (fld_type == OFTDate) { + const int64_t value = nUnixTime / 86400; + col[row_num] = static_cast(value); + } + else { + if (tzflag > 1 && tzflag != 100) { + // convert to GMT + const int tzoffset = std::abs(tzflag - 100) * 15; + const int tzhour = tzoffset / 60; + const int tzmin = tzoffset - tzhour * 60; + const int offset = tzhour * 3600 + tzmin * 60; + if (tzflag >= 100) + nUnixTime -= offset; + else + nUnixTime += offset; + } + col[row_num] = static_cast(nUnixTime); + } + } + } + else if (fld_type == OFTBinary && has_value) { + int nDataSize = 0; + GByte *pabyData = OGR_F_GetFieldAsBinary(hFeat, i, &nDataSize); + if (nDataSize > 0) { + Rcpp::RawVector blob(nDataSize); + std::memcpy(&blob[0], pabyData, nDataSize); + Rcpp::List col = df[i + 1]; + col[row_num] = blob; + } } else { - // TODO(ctoney): support date, time, binary, etc. - // read as string for now - std::string value = ""; - if (has_value) - value = OGR_F_GetFieldAsString(hFeat, i); - - Rcpp::CharacterVector col = df[i + 1]; - col[fetch_count] = value; + // use string + if (has_value) { + Rcpp::CharacterVector col = df[i + 1]; + col[row_num] = OGR_F_GetFieldAsString(hFeat, i); + } } } - for (int i = 0; i < nGeomFields; ++i) { - OGRGeomFieldDefnH hGeomFldDefn = - OGR_F_GetGeomFieldDefnRef(hFeat, i); - if (hGeomFldDefn == nullptr) - Rcpp::stop("could not obtain geometry field def"); - - if (EQUALN(returnGeomAs.c_str(), "TYPE_ONLY", 9)) { - OGRwkbGeometryType eType = OGR_GFld_GetType(hGeomFldDefn); - Rcpp::CharacterVector col = df[nFields + 1 + i]; - col[fetch_count] = OGRGeometryTypeToName(eType);; - } - else if (STARTS_WITH_CI(returnGeomAs.c_str(), "WKB")) { - OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeat, i); - if (hGeom != nullptr) { - const int nWKBSize = OGR_G_WkbSize(hGeom); - OGRwkbByteOrder eOrder = wkbNDR; - if (EQUALN(wkbByteOrder.c_str(), "MSB", 3)) - eOrder = wkbXDR; - - if (nWKBSize) { - Rcpp::RawVector wkb(nWKBSize); - if (EQUALN(returnGeomAs.c_str(), "WKB_ISO", 7)) - OGR_G_ExportToIsoWkb(hGeom, eOrder, &wkb[0]); - else - OGR_G_ExportToWkb(hGeom, eOrder, &wkb[0]); - - Rcpp::List col = df[nFields + 1 + i]; - col[fetch_count] = wkb; + if (include_geom) { + for (int i = 0; i < nGeomFields; ++i) { + OGRGeomFieldDefnH hGeomFldDefn = + OGR_F_GetGeomFieldDefnRef(hFeat, i); + if (hGeomFldDefn == nullptr) + Rcpp::stop("could not obtain geometry field def"); + + if (STARTS_WITH_CI(returnGeomAs.c_str(), "WKB")) { + OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeat, i); + if (hGeom != nullptr) { +#if GDAL_VERSION_NUM >= 3030000 + const int nWKBSize = OGR_G_WkbSizeEx(hGeom); +#else + const int nWKBSize = OGR_G_WkbSize(hGeom); +#endif + if (nWKBSize) { + Rcpp::RawVector wkb(nWKBSize); + if (EQUAL(returnGeomAs.c_str(), "WKB")) + OGR_G_ExportToWkb(hGeom, eOrder, &wkb[0]); + else if (EQUAL(returnGeomAs.c_str(), "WKB_ISO")) + OGR_G_ExportToIsoWkb(hGeom, eOrder, &wkb[0]); + + Rcpp::List col = df[nFields + 1 + i]; + col[row_num] = wkb; + } } } - } - else if (EQUALN(returnGeomAs.c_str(), "WKT", 3)) { - Rcpp::CharacterVector col = df[nFields + 1 + i]; - OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeat, i); - if (hGeom != nullptr) { - char* pszWKT; - OGR_G_ExportToWkt(hGeom, &pszWKT); - col[fetch_count] = pszWKT; - CPLFree(pszWKT); + else if (STARTS_WITH_CI(returnGeomAs.c_str(), "WKT")) { + Rcpp::CharacterVector col = df[nFields + 1 + i]; + OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeat, i); + if (hGeom != nullptr) { + char* pszWKT; + if (EQUAL(returnGeomAs.c_str(), "WKT")) + OGR_G_ExportToWkt(hGeom, &pszWKT); + else if (EQUAL(returnGeomAs.c_str(), "WKT_ISO")) + OGR_G_ExportToIsoWkt(hGeom, &pszWKT); + + col[row_num] = pszWKT; + CPLFree(pszWKT); + } + else { + col[row_num] = NA_STRING; + } } - else { - col[fetch_count] = NA_STRING; + else if (EQUAL(returnGeomAs.c_str(), "TYPE_NAME")) { + OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeat, i); + Rcpp::CharacterVector col = df[nFields + 1 + i]; + if (hGeom != nullptr) + col[row_num] = OGR_G_GetGeometryName(hGeom); + else + col[row_num] = NA_STRING; } } - else { - Rcpp::stop("invalid value of field 'returnGeomAs'"); - } } - fetch_count += 1; - if (fetch_count == fetch_num) + row_num += 1; + if (row_num == fetch_num) break; } if (fetch_all) { if (OGR_L_GetNextFeature(hLayer) != nullptr) { - Rcpp::Rcout << "getFeatureCount() returned " << fetch_count + Rcpp::Rcout << "getFeatureCount() returned " << row_num << std::endl; std::string msg = "more features potentially available "; msg += "than reported by getFeatureCount()"; @@ -603,16 +665,19 @@ Rcpp::DataFrame GDALVector::fetch(double n) { } } - if (fetch_count == fetch_num) { + if (row_num == fetch_num) { return df; } else { - // truncate the data frame by copying to a new one - // hard to avoid copy here since Rcpp vectors cannot be resized - Rcpp::DataFrame df_trunc = initDF_(fetch_count); + // Truncate the data frame by copying to a new one. Hard to avoid + // a copy here since Rcpp vectors cannot be resized. This is only + // needed for the last page when paging through features with repeated + // calls to fetch(n), so the data generally should not be large enough + // for this to be a problem. + Rcpp::DataFrame df_trunc = initDF_(row_num); Rcpp::NumericVector fid_col = df[0]; Rcpp::NumericVector fid_col_trunc = df_trunc[0]; - std::copy_n(fid_col.cbegin(), fetch_count, fid_col_trunc.begin()); + std::copy_n(fid_col.cbegin(), row_num, fid_col_trunc.begin()); for (int i = 0; i < OGR_FD_GetFieldCount(hFDefn); ++i) { OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); @@ -620,22 +685,45 @@ Rcpp::DataFrame GDALVector::fetch(double n) { if (fld_type == OFTInteger) { Rcpp::IntegerVector col = df[i + 1]; Rcpp::IntegerVector col_trunc = df_trunc[i + 1]; - std::copy_n(col.cbegin(), fetch_count, col_trunc.begin()); + std::copy_n(col.cbegin(), row_num, col_trunc.begin()); } else if (fld_type == OFTInteger64) { Rcpp::NumericVector col = df[i + 1]; Rcpp::NumericVector col_trunc = df_trunc[i + 1]; - std::copy_n(col.cbegin(), fetch_count, col_trunc.begin()); + std::copy_n(col.cbegin(), row_num, col_trunc.begin()); } - else if (fld_type == OFTReal) { + else if (fld_type == OFTReal || fld_type == OFTDate || + fld_type == OFTDateTime) { Rcpp::NumericVector col = df[i + 1]; Rcpp::NumericVector col_trunc = df_trunc[i + 1]; - std::copy_n(col.cbegin(), fetch_count, col_trunc.begin()); + std::copy_n(col.cbegin(), row_num, col_trunc.begin()); + } + else if (fld_type == OFTBinary) { + Rcpp::List col = df[nFields + 1 + i]; + Rcpp::List col_trunc = df_trunc[nFields + 1 + i]; + for (size_t n = 0; n < row_num; ++n) + col_trunc[n] = col[n]; } else { Rcpp::CharacterVector col = df[i + 1]; Rcpp::CharacterVector col_trunc = df_trunc[i + 1]; - std::copy_n(col.cbegin(), fetch_count, col_trunc.begin()); + std::copy_n(col.cbegin(), row_num, col_trunc.begin()); + } + } + + if (include_geom) { + for (int i = 0; i < nGeomFields; ++i) { + if (STARTS_WITH_CI(returnGeomAs.c_str(), "WKB")) { + Rcpp::List col = df[nFields + 1 + i]; + Rcpp::List col_trunc = df_trunc[nFields + 1 + i]; + for (size_t n = 0; n < row_num; ++n) + col_trunc[n] = col[n]; + } + else { + Rcpp::CharacterVector col = df[i + 1]; + Rcpp::CharacterVector col_trunc = df_trunc[i + 1]; + std::copy_n(col.cbegin(), row_num, col_trunc.begin()); + } } } @@ -959,7 +1047,9 @@ SEXP GDALVector::initDF_(R_xlen_t nrow) const { Rcpp::stop("failed to get layer definition"); int nFields = OGR_FD_GetFieldCount(hFDefn); - int nGeomFields = OGR_FD_GetGeomFieldCount(hFDefn); + int nGeomFields = 0; + if (!EQUAL(returnGeomAs.c_str(), "NONE")) + nGeomFields = OGR_FD_GetGeomFieldCount(hFDefn); // construct the data frame as list and convert at return Rcpp::List df(1 + nFields + nGeomFields); @@ -991,9 +1081,27 @@ SEXP GDALVector::initDF_(R_xlen_t nrow) const { df[i + 1] = v; col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); } + else if (fld_type == OFTDate) { + Rcpp::NumericVector v(nrow, NA_REAL); + v.attr("class") = "Date"; + df[i + 1] = v; + col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); + } + else if (fld_type == OFTDateTime) { + Rcpp::NumericVector v(nrow, NA_REAL); + Rcpp::CharacterVector class_names = {"POSIXt", "POSIXct"}; + v.attr("class") = class_names; + v.attr("tzone") = "UTC"; + df[i + 1] = v; + col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); + } + else if (fld_type == OFTBinary) { + Rcpp::List v(nrow); + df[i + 1] = v; + col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); + } else { - // TODO: support date, time, binary, etc. - // read as string for now + // use string Rcpp::CharacterVector v(nrow, NA_STRING); df[i + 1] = v; col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); From d3cd965f90686080a28818075e0bd1ff16deff85 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Tue, 2 Jul 2024 23:33:33 -0600 Subject: [PATCH 39/53] fetch(): add the list field types --- src/gdalvector.cpp | 192 ++++++++++++++++++++++++++++++++++++--------- src/gdalvector.h | 2 +- 2 files changed, 156 insertions(+), 38 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index f0ca4f554..aba292c90 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -524,8 +524,15 @@ Rcpp::DataFrame GDALVector::fetch(double n) { OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); if (fld_type == OFTInteger && has_value) { - Rcpp::IntegerVector col = df[i + 1]; - col[row_num] = OGR_F_GetFieldAsInteger(hFeat, i); + OGRFieldSubType fld_subtype = OGR_Fld_GetSubType(hFieldDefn); + if (fld_subtype == OFSTBoolean) { + Rcpp::LogicalVector col = df[i + 1]; + col[row_num] = OGR_F_GetFieldAsInteger(hFeat, i); + } + else { + Rcpp::IntegerVector col = df[i + 1]; + col[row_num] = OGR_F_GetFieldAsInteger(hFeat, i); + } } else if (fld_type == OFTInteger64 && has_value) { const int64_t value = static_cast( @@ -539,7 +546,7 @@ Rcpp::DataFrame GDALVector::fetch(double n) { col[row_num] = OGR_F_GetFieldAsDouble(hFeat, i); } else if ((fld_type == OFTDate || fld_type == OFTDateTime) - && has_value) { + && has_value) { int yr = 9999; int mo, day = 9; @@ -557,10 +564,11 @@ Rcpp::DataFrame GDALVector::fetch(double n) { int64_t nUnixTime = CPLYMDHMSToUnixTime(&brokendowntime); Rcpp::NumericVector col = df[i + 1]; if (fld_type == OFTDate) { - const int64_t value = nUnixTime / 86400; - col[row_num] = static_cast(value); + const int64_t nUnixTime_days = nUnixTime / 86400; + col[row_num] = static_cast(nUnixTime_days); } else { + // OFTDateTime if (tzflag > 1 && tzflag != 100) { // convert to GMT const int tzoffset = std::abs(tzflag - 100) * 15; @@ -576,14 +584,101 @@ Rcpp::DataFrame GDALVector::fetch(double n) { } } } - else if (fld_type == OFTBinary && has_value) { - int nDataSize = 0; - GByte *pabyData = OGR_F_GetFieldAsBinary(hFeat, i, &nDataSize); - if (nDataSize > 0) { - Rcpp::RawVector blob(nDataSize); - std::memcpy(&blob[0], pabyData, nDataSize); - Rcpp::List col = df[i + 1]; - col[row_num] = blob; + else if (fld_type == OFTBinary) { + Rcpp::List col = df[i + 1]; + if (has_value) { + int nDataSize = 0; + GByte *pabyData = OGR_F_GetFieldAsBinary(hFeat, i, &nDataSize); + if (nDataSize > 0) { + Rcpp::RawVector blob(nDataSize); + std::memcpy(&blob[0], pabyData, nDataSize); + col[row_num] = blob; + } + else { + col[row_num] = Rcpp::RawVector::create(); + } + } + else { + col[row_num] = Rcpp::RawVector::create(); + } + } + else if (fld_type == OFTIntegerList) { + Rcpp::List col = df[i + 1]; + if (has_value) { + int nCount = 0; + const int *value = OGR_F_GetFieldAsIntegerList(hFeat, i, + &nCount); + if (nCount > 0) { + std::vector v(value, value + nCount); + col[row_num] = Rcpp::wrap(v); + } + else { + col[row_num] = Rcpp::IntegerVector::create(); + } + } + else { + col[row_num] = NA_INTEGER; + } + } + else if (fld_type == OFTInteger64List) { + Rcpp::List col = df[i + 1]; + if (has_value) { + int nCount = 0; + const int64_t *value = reinterpret_cast( + OGR_F_GetFieldAsInteger64List(hFeat, i, &nCount)); + + if (nCount > 0) { + std::vector v(value, value + nCount); + col[row_num] = Rcpp::wrap(v); + } + else { + Rcpp::NumericVector v = Rcpp::NumericVector::create(); + v.attr("class") = "integer64"; + col[row_num] = v; + } + } + else { + std::vector v(1); + v[0] = NA_INTEGER64; + col[row_num] = Rcpp::wrap(v); + } + } + else if (fld_type == OFTRealList) { + Rcpp::List col = df[i + 1]; + if (has_value) { + int nCount = 0; + const double *value = OGR_F_GetFieldAsDoubleList(hFeat, i, + &nCount); + if (nCount > 0) { + std::vector v(value, value + nCount); + col[row_num] = Rcpp::wrap(v); + } + else { + col[row_num] = Rcpp::NumericVector::create(); + } + } + else { + col[row_num] = NA_REAL; + } + } + else if (fld_type == OFTStringList) { + Rcpp::List col = df[i + 1]; + if (has_value) { + char **papszValue = OGR_F_GetFieldAsStringList(hFeat, i); + int nCount = 0; + nCount = CSLCount(papszValue); + if (nCount > 0) { + std::vector v(papszValue, + papszValue + nCount); + + col[row_num] = Rcpp::wrap(v); + } + else { + col[row_num] = Rcpp::CharacterVector::create(); + } + } + else { + col[row_num] = NA_STRING; } } else { @@ -620,6 +715,10 @@ Rcpp::DataFrame GDALVector::fetch(double n) { Rcpp::List col = df[nFields + 1 + i]; col[row_num] = wkb; } + else { + Rcpp::List col = df[nFields + 1 + i]; + col[row_num] = Rcpp::RawVector::create(); + } } } else if (STARTS_WITH_CI(returnGeomAs.c_str(), "WKT")) { @@ -657,10 +756,10 @@ Rcpp::DataFrame GDALVector::fetch(double n) { if (fetch_all) { if (OGR_L_GetNextFeature(hLayer) != nullptr) { - Rcpp::Rcout << "getFeatureCount() returned " << row_num + Rcpp::Rcout << "getFeatureCount() reported " << row_num << std::endl; - std::string msg = "more features potentially available "; - msg += "than reported by getFeatureCount()"; + std::string msg = + "more features potentially available than reported by getFeatureCount()"; Rcpp::warning(msg); } } @@ -675,6 +774,7 @@ Rcpp::DataFrame GDALVector::fetch(double n) { // calls to fetch(n), so the data generally should not be large enough // for this to be a problem. Rcpp::DataFrame df_trunc = initDF_(row_num); + Rcpp::NumericVector fid_col = df[0]; Rcpp::NumericVector fid_col_trunc = df_trunc[0]; std::copy_n(fid_col.cbegin(), row_num, fid_col_trunc.begin()); @@ -682,25 +782,33 @@ Rcpp::DataFrame GDALVector::fetch(double n) { for (int i = 0; i < OGR_FD_GetFieldCount(hFDefn); ++i) { OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); + if (fld_type == OFTInteger) { - Rcpp::IntegerVector col = df[i + 1]; - Rcpp::IntegerVector col_trunc = df_trunc[i + 1]; - std::copy_n(col.cbegin(), row_num, col_trunc.begin()); - } - else if (fld_type == OFTInteger64) { - Rcpp::NumericVector col = df[i + 1]; - Rcpp::NumericVector col_trunc = df_trunc[i + 1]; - std::copy_n(col.cbegin(), row_num, col_trunc.begin()); + OGRFieldSubType fld_subtype = OGR_Fld_GetSubType(hFieldDefn); + if (fld_subtype == OFSTBoolean) { + Rcpp::LogicalVector col = df[i + 1]; + Rcpp::LogicalVector col_trunc = df_trunc[i + 1]; + std::copy_n(col.cbegin(), row_num, col_trunc.begin()); + } + else { + Rcpp::IntegerVector col = df[i + 1]; + Rcpp::IntegerVector col_trunc = df_trunc[i + 1]; + std::copy_n(col.cbegin(), row_num, col_trunc.begin()); + } } - else if (fld_type == OFTReal || fld_type == OFTDate || - fld_type == OFTDateTime) { + else if (fld_type == OFTInteger64 || fld_type == OFTReal || + fld_type == OFTDate || fld_type == OFTDateTime) { + Rcpp::NumericVector col = df[i + 1]; Rcpp::NumericVector col_trunc = df_trunc[i + 1]; std::copy_n(col.cbegin(), row_num, col_trunc.begin()); } - else if (fld_type == OFTBinary) { - Rcpp::List col = df[nFields + 1 + i]; - Rcpp::List col_trunc = df_trunc[nFields + 1 + i]; + else if (fld_type == OFTBinary || fld_type == OFTIntegerList || + fld_type == OFTInteger64List || fld_type == OFTRealList || + fld_type == OFTStringList) { + + Rcpp::List col = df[i + 1]; + Rcpp::List col_trunc = df_trunc[i + 1]; for (size_t n = 0; n < row_num; ++n) col_trunc[n] = col[n]; } @@ -714,14 +822,14 @@ Rcpp::DataFrame GDALVector::fetch(double n) { if (include_geom) { for (int i = 0; i < nGeomFields; ++i) { if (STARTS_WITH_CI(returnGeomAs.c_str(), "WKB")) { - Rcpp::List col = df[nFields + 1 + i]; - Rcpp::List col_trunc = df_trunc[nFields + 1 + i]; + Rcpp::List col = df[nFields + i + 1]; + Rcpp::List col_trunc = df_trunc[nFields + i + 1]; for (size_t n = 0; n < row_num; ++n) col_trunc[n] = col[n]; } else { - Rcpp::CharacterVector col = df[i + 1]; - Rcpp::CharacterVector col_trunc = df_trunc[i + 1]; + Rcpp::CharacterVector col = df[nFields + i + 1]; + Rcpp::CharacterVector col_trunc = df_trunc[nFields + i + 1]; std::copy_n(col.cbegin(), row_num, col_trunc.begin()); } } @@ -1065,10 +1173,17 @@ SEXP GDALVector::initDF_(R_xlen_t nrow) const { Rcpp::stop("could not obtain field definition"); OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); + if (fld_type == OFTInteger) { - // TODO: handle boolean subtype - Rcpp::IntegerVector v(nrow, NA_INTEGER); - df[i + 1] = v; + OGRFieldSubType fld_subtype = OGR_Fld_GetSubType(hFieldDefn); + if (fld_subtype == OFSTBoolean) { + Rcpp::LogicalVector v(nrow, NA_LOGICAL); + df[i + 1] = v; + } + else { + Rcpp::IntegerVector v(nrow, NA_INTEGER); + df[i + 1] = v; + } col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); } else if (fld_type == OFTInteger64) { @@ -1095,7 +1210,10 @@ SEXP GDALVector::initDF_(R_xlen_t nrow) const { df[i + 1] = v; col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); } - else if (fld_type == OFTBinary) { + else if (fld_type == OFTBinary || fld_type == OFTIntegerList || + fld_type == OFTInteger64List || fld_type == OFTRealList || + fld_type == OFTStringList) { + Rcpp::List v(nrow); df[i + 1] = v; col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); diff --git a/src/gdalvector.h b/src/gdalvector.h index 3e8db4a0a..05548f427 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -44,7 +44,7 @@ class GDALVector { std::string spatial_filter, std::string dialect); std::string defaultGeomFldName = "geometry"; - std::string returnGeomAs = "WKB"; + std::string returnGeomAs = "NONE"; std::string wkbByteOrder = "LSB"; void open(bool read_only); From 62504831c0569638908012c6274d09998cb9d4a4 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Thu, 4 Jul 2024 23:17:44 -0600 Subject: [PATCH 40/53] use fetch() instead in getFeature() and getNextFeature() --- src/gdalvector.cpp | 225 ++++++++++++++++++--------------------------- src/gdalvector.h | 2 +- src/ogr_util.cpp | 13 +-- src/ogr_util.h | 28 ++++++ 4 files changed, 120 insertions(+), 148 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index aba292c90..a5ab5ecad 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -18,6 +18,7 @@ #include "gdalraster.h" #include "gdalvector.h" +#include "ogr_util.h" GDALVector::GDALVector() : dsn_in(""), @@ -264,38 +265,27 @@ Rcpp::List GDALVector::getLayerDefn() const { Rcpp::stop("failed to get layer definition"); Rcpp::List list_out = Rcpp::List::create(); - std::string sValue; - int nValue; + std::string sValue = ""; + int nValue = -1; bool bValue; int iField; // attribute fields - // TODO(ctoney): add subtype and field domain name - for (iField=0; iField < OGR_FD_GetFieldCount(hFDefn); ++iField) { + // TODO(ctoney): add field domain name + for (iField = 0; iField < OGR_FD_GetFieldCount(hFDefn); ++iField) { Rcpp::List list_fld_defn = Rcpp::List::create(); OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, iField); if (hFieldDefn == nullptr) Rcpp::stop("could not obtain field definition"); OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); - // TODO(ctoney): add list types, date, time, binary, etc. - if (fld_type == OFTInteger) { - sValue = "OFTInteger"; - } - else if (fld_type == OFTInteger64) { - sValue = "OFTInteger64"; - } - else if (fld_type == OFTReal) { - sValue = "OFTReal"; - } - else if (fld_type == OFTString) { - sValue = "OFTString"; - } - else { - sValue = "default (read as OFTString)"; - } + sValue = getOFTString_(fld_type); list_fld_defn.push_back(sValue, "type"); + OGRFieldSubType fld_subtype = OGR_Fld_GetSubType(hFieldDefn); + sValue = getOFTSubtypeString_(fld_subtype); + list_fld_defn.push_back(sValue, "subtype"); + nValue = OGR_Fld_GetWidth(hFieldDefn); list_fld_defn.push_back(nValue, "width"); @@ -332,7 +322,7 @@ Rcpp::List GDALVector::getLayerDefn() const { Rcpp::stop("could not obtain geometry field definition"); OGRwkbGeometryType eType = OGR_GFld_GetType(hGeomFldDefn); - list_geom_fld_defn.push_back(OGRGeometryTypeToName(eType), "type"); + list_geom_fld_defn.push_back(getWkbGeomString_(eType), "type"); OGRSpatialReferenceH hSRS = OGR_GFld_GetSpatialRef(hGeomFldDefn); if (hSRS == nullptr) @@ -374,6 +364,8 @@ void GDALVector::setAttributeFilter(std::string query) { if (OGR_L_SetAttributeFilter(hLayer, query_in) != OGRERR_NONE) Rcpp::stop("error setting filter, possibly in the query expression"); + else + m_attr_filter = query; } void GDALVector::setSpatialFilterRect(Rcpp::NumericVector bbox) { @@ -404,12 +396,16 @@ double GDALVector::getFeatureCount() { SEXP GDALVector::getNextFeature() { checkAccess_(GA_ReadOnly); - // TODO: OGR_F_Destroy()? - OGRFeatureH hFeat = OGR_L_GetNextFeature(hLayer); - if (hFeat != nullptr) - return featureToList_(hFeat); - else + Rcpp::DataFrame df = fetch(1); + if (df.nrows() == 0) { return R_NilValue; + } + else { + // return as list + df.attr("class") = R_NilValue; + df.attr("row.names") = R_NilValue; + return df; + } } SEXP GDALVector::getFeature(Rcpp::NumericVector fid) { @@ -421,23 +417,45 @@ SEXP GDALVector::getFeature(Rcpp::NumericVector fid) { if (fid.size() != 1) Rcpp::stop("'fid' must be a length-1 numeric vector (integer64)"); - int64_t fid_in; - if (Rcpp::isInteger64(fid)) { + int64_t fid_in = -1; + if (Rcpp::isInteger64(fid)) fid_in = Rcpp::fromInteger64(fid[0]); - } - else { - std::vector tmp = Rcpp::as>(fid); - fid_in = static_cast(tmp[0]); + else + fid_in = static_cast(fid[0]); + + // save the current attribute and spatial filters + std::string orig_filter = m_attr_filter; + OGRGeometryH hOrigFilterGeom = nullptr; + OGRGeometryH hFilterGeom = nullptr; + hFilterGeom = OGR_L_GetSpatialFilter(hLayer); + if (hFilterGeom != nullptr) { + hOrigFilterGeom = OGR_G_Clone(hFilterGeom); + hFilterGeom = nullptr; } - OGRFeatureH hFeat = OGR_L_GetFeature(hLayer, - static_cast(fid_in)); + // filter based on FID + clearSpatialFilter(); + setAttributeFilter("FID = " + std::to_string(fid_in)); - // TODO: OGR_F_Destroy()? - if (hFeat != nullptr) - return featureToList_(hFeat); - else + Rcpp::DataFrame df = fetch(1); + + // restore original filters + setAttributeFilter(orig_filter); + OGR_L_SetSpatialFilter(hLayer, hOrigFilterGeom); + if (hOrigFilterGeom != nullptr) { + OGR_G_DestroyGeometry(hOrigFilterGeom); + hOrigFilterGeom = nullptr; + } + + if (df.nrows() == 0) { return R_NilValue; + } + else { + // return as list + df.attr("class") = R_NilValue; + df.attr("row.names") = R_NilValue; + return df; + } } void GDALVector::resetReading() { @@ -538,8 +556,15 @@ Rcpp::DataFrame GDALVector::fetch(double n) { const int64_t value = static_cast( OGR_F_GetFieldAsInteger64(hFeat, i)); - Rcpp::NumericVector col = df[i + 1]; - col[row_num] = Rcpp::toInteger64(value)[0]; + OGRFieldSubType fld_subtype = OGR_Fld_GetSubType(hFieldDefn); + if (fld_subtype == OFSTBoolean) { + Rcpp::LogicalVector col = df[i + 1]; + col[row_num] = Rcpp::toInteger64(value)[0]; + } + else { + Rcpp::NumericVector col = df[i + 1]; + col[row_num] = Rcpp::toInteger64(value)[0]; + } } else if (fld_type == OFTReal && has_value) { Rcpp::NumericVector col = df[i + 1]; @@ -606,10 +631,10 @@ Rcpp::DataFrame GDALVector::fetch(double n) { Rcpp::List col = df[i + 1]; if (has_value) { int nCount = 0; - const int *value = OGR_F_GetFieldAsIntegerList(hFeat, i, - &nCount); + const int *panValue = OGR_F_GetFieldAsIntegerList(hFeat, i, + &nCount); if (nCount > 0) { - std::vector v(value, value + nCount); + std::vector v(panValue, panValue + nCount); col[row_num] = Rcpp::wrap(v); } else { @@ -624,11 +649,11 @@ Rcpp::DataFrame GDALVector::fetch(double n) { Rcpp::List col = df[i + 1]; if (has_value) { int nCount = 0; - const int64_t *value = reinterpret_cast( + const int64_t *panValue = reinterpret_cast( OGR_F_GetFieldAsInteger64List(hFeat, i, &nCount)); if (nCount > 0) { - std::vector v(value, value + nCount); + std::vector v(panValue, panValue + nCount); col[row_num] = Rcpp::wrap(v); } else { @@ -647,10 +672,11 @@ Rcpp::DataFrame GDALVector::fetch(double n) { Rcpp::List col = df[i + 1]; if (has_value) { int nCount = 0; - const double *value = OGR_F_GetFieldAsDoubleList(hFeat, i, - &nCount); + const double *padfValue = + OGR_F_GetFieldAsDoubleList(hFeat, i, &nCount); + if (nCount > 0) { - std::vector v(value, value + nCount); + std::vector v(padfValue, padfValue + nCount); col[row_num] = Rcpp::wrap(v); } else { @@ -749,18 +775,24 @@ Rcpp::DataFrame GDALVector::fetch(double n) { } } + OGR_F_Destroy(hFeat); + hFeat = nullptr; + row_num += 1; if (row_num == fetch_num) break; } if (fetch_all) { - if (OGR_L_GetNextFeature(hLayer) != nullptr) { + hFeat = OGR_L_GetNextFeature(hLayer); + if (hFeat != nullptr) { Rcpp::Rcout << "getFeatureCount() reported " << row_num << std::endl; std::string msg = "more features potentially available than reported by getFeatureCount()"; Rcpp::warning(msg); + OGR_F_Destroy(hFeat); + hFeat = nullptr; } } @@ -1063,90 +1095,6 @@ OGRLayerH GDALVector::getOGRLayerH_() const { return hLayer; } -Rcpp::List GDALVector::featureToList_(OGRFeatureH hFeat) const { - OGRFeatureDefnH hFDefn = nullptr; - hFDefn = OGR_L_GetLayerDefn(hLayer); - if (hFDefn == nullptr) - Rcpp::stop("failed to get layer definition"); - - Rcpp::List list_out = Rcpp::List::create(); - int i; - - int64_t FID = static_cast(OGR_F_GetFID(hFeat)); - list_out.push_back(Rcpp::toInteger64(FID), "FID"); - - for (i = 0; i < OGR_FD_GetFieldCount(hFDefn); ++i) { - OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); - if (hFieldDefn == nullptr) - Rcpp::stop("could not obtain field definition"); - - bool has_value = true; - if (!OGR_F_IsFieldSet(hFeat, i) || - OGR_F_IsFieldNull(hFeat, i)) { - has_value = false; - } - - OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); - if (fld_type == OFTInteger) { - int value = NA_INTEGER; - if (has_value) - value = OGR_F_GetFieldAsInteger(hFeat, i); - - list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); - } - else if (fld_type == OFTInteger64) { - int64_t value = NA_INTEGER64; - if (has_value) - value = static_cast( - OGR_F_GetFieldAsInteger64(hFeat, i)); - - list_out.push_back(Rcpp::toInteger64(value), - OGR_Fld_GetNameRef(hFieldDefn)); - } - else if (fld_type == OFTReal) { - double value = NA_REAL; - if (has_value) - value = OGR_F_GetFieldAsDouble(hFeat, i); - - list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); - } - else { - // TODO(ctoney): support date, time, binary, etc. - // read as string for now - std::string value = ""; - if (has_value) - value = OGR_F_GetFieldAsString(hFeat, i); - - list_out.push_back(value, OGR_Fld_GetNameRef(hFieldDefn)); - } - } - - for (i = 0; i < OGR_F_GetGeomFieldCount(hFeat); ++i) { - OGRGeomFieldDefnH hGeomFldDefn = - OGR_F_GetGeomFieldDefnRef(hFeat, i); - if (hGeomFldDefn == nullptr) - Rcpp::stop("could not obtain geometry field def"); - - std::string geomFldName(OGR_GFld_GetNameRef(hGeomFldDefn)); - if (geomFldName == "") - geomFldName = defaultGeomFldName; - - OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeat, i); - if (hGeom != nullptr) { - char* pszWKT; - OGR_G_ExportToWkt(hGeom, &pszWKT); - std::string wkt(pszWKT); - list_out.push_back(wkt, geomFldName); - CPLFree(pszWKT); - } - else { - list_out.push_back("", geomFldName); - } - } - - return list_out; -} - SEXP GDALVector::initDF_(R_xlen_t nrow) const { // initialize a data frame with nrow rows for the layer definition OGRFeatureDefnH hFDefn = nullptr; @@ -1187,8 +1135,15 @@ SEXP GDALVector::initDF_(R_xlen_t nrow) const { col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); } else if (fld_type == OFTInteger64) { - std::vector v(nrow, NA_INTEGER64); - df[i + 1] = Rcpp::wrap(v); + OGRFieldSubType fld_subtype = OGR_Fld_GetSubType(hFieldDefn); + if (fld_subtype == OFSTBoolean) { + Rcpp::LogicalVector v(nrow, NA_LOGICAL); + df[i + 1] = v; + } + else { + std::vector v(nrow, NA_INTEGER64); + df[i + 1] = Rcpp::wrap(v); + } col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); } else if (fld_type == OFTReal) { diff --git a/src/gdalvector.h b/src/gdalvector.h index 05548f427..4b7d7df16 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -31,6 +31,7 @@ class GDALVector { GDALDatasetH hDataset; GDALAccess eAccess; OGRLayerH hLayer; + std::string m_attr_filter = ""; public: GDALVector(); @@ -117,7 +118,6 @@ class GDALVector { // methods for internal use not exported to R void checkAccess_(GDALAccess access_needed) const; OGRLayerH getOGRLayerH_() const; - Rcpp::List featureToList_(OGRFeatureH hFeature) const; SEXP initDF_(R_xlen_t nrow) const; }; diff --git a/src/ogr_util.cpp b/src/ogr_util.cpp index dd9c5a612..6a3312320 100644 --- a/src/ogr_util.cpp +++ b/src/ogr_util.cpp @@ -14,8 +14,7 @@ #include "gdalraster.h" #include "ogr_util.h" -// Internal lookup of OGRwkbGeometryType by string descriptor -// Returns wkbUnknown if no match + OGRwkbGeometryType getWkbGeomType_(std::string geom_type) { std::string geom_type_in = str_toupper_(geom_type); if (auto it = MAP_OGR_GEOM_TYPE.find(geom_type_in); @@ -28,8 +27,6 @@ OGRwkbGeometryType getWkbGeomType_(std::string geom_type) { } } -// Internal lookup of geometry type string by OGRwkbGeometryType -// Returns "UNKNOWN" if no match std::string getWkbGeomString_(OGRwkbGeometryType eType) { for (auto it = MAP_OGR_GEOM_TYPE.begin(); it != MAP_OGR_GEOM_TYPE.end(); ++it) { @@ -40,8 +37,6 @@ std::string getWkbGeomString_(OGRwkbGeometryType eType) { return "UNKNOWN"; } -// Internal lookup of OGRFieldType by string descriptor -// Error if no match OGRFieldType getOFT_(std::string fld_type) { if (auto it = MAP_OGR_FLD_TYPE.find(fld_type); it != MAP_OGR_FLD_TYPE.end()) { @@ -53,8 +48,6 @@ OGRFieldType getOFT_(std::string fld_type) { } } -// Internal lookup of OGR field type string by OGRFieldType -// Returns empty string if no match, with warning emitted std::string getOFTString_(OGRFieldType eType) { for (auto it = MAP_OGR_FLD_TYPE.begin(); it != MAP_OGR_FLD_TYPE.end(); ++it) { @@ -66,8 +59,6 @@ std::string getOFTString_(OGRFieldType eType) { return ""; } -// Internal lookup of OGRFieldSubType by string descriptor -// Returns OFSTNone if no match OGRFieldSubType getOFTSubtype_(std::string fld_subtype) { if (auto it = MAP_OGR_FLD_SUBTYPE.find(fld_subtype); it != MAP_OGR_FLD_SUBTYPE.end()) { @@ -79,8 +70,6 @@ OGRFieldSubType getOFTSubtype_(std::string fld_subtype) { } } -// Internal lookup of OGR field subtype string by OGRFieldSubType -// Returns "OFSTNone" if no match std::string getOFTSubtypeString_(OGRFieldSubType eType) { for (auto it = MAP_OGR_FLD_SUBTYPE.begin(); it != MAP_OGR_FLD_SUBTYPE.end(); ++it) { diff --git a/src/ogr_util.h b/src/ogr_util.h index c56b89f2f..62057ebd0 100644 --- a/src/ogr_util.h +++ b/src/ogr_util.h @@ -115,6 +115,31 @@ const std::map MAP_OGR_FLD_SUBTYPE{ }; #endif +// Internal lookup of OGRwkbGeometryType by string descriptor +// Returns wkbUnknown if no match +OGRwkbGeometryType getWkbGeomType_(std::string geom_type); + +// Internal lookup of geometry type string by OGRwkbGeometryType +// Returns "UNKNOWN" if no match +std::string getWkbGeomString_(OGRwkbGeometryType eType); + +// Internal lookup of OGRFieldType by string descriptor +// Error if no match +OGRFieldType getOFT_(std::string fld_type); + +// Internal lookup of OGR field type string by OGRFieldType +// Returns empty string if no match, with warning emitted +std::string getOFTString_(OGRFieldType eType); + +// Internal lookup of OGRFieldSubType by string descriptor +// Returns OFSTNone if no match +OGRFieldSubType getOFTSubtype_(std::string fld_subtype); + +// Internal lookup of OGR field subtype string by OGRFieldSubType +// Returns "OFSTNone" if no match +std::string getOFTSubtypeString_(OGRFieldSubType eType); + + bool ogr_ds_exists(std::string dsn, bool with_update); std::string ogr_ds_format(std::string dsn); @@ -138,6 +163,7 @@ bool ogr_layer_exists(std::string dsn, std::string layer); SEXP ogr_layer_test_cap(std::string dsn, std::string layer, bool with_update); +// internal CreateLayer OGRLayerH CreateLayer_(GDALDatasetH hDS, std::string layer, Rcpp::Nullable layer_defn, std::string geom_type, std::string srs, @@ -154,6 +180,7 @@ SEXP ogr_layer_field_names(std::string dsn, std::string layer); int ogr_field_index(std::string dsn, std::string layer, std::string fld_name); +// internal CreateField bool CreateField_(GDALDatasetH hDS, OGRLayerH hLayer, std::string fld_name, std::string fld_type, std::string fld_subtype, int fld_width, int fld_precision, bool is_nullable, bool is_ignored, @@ -166,6 +193,7 @@ bool ogr_field_create(std::string dsn, std::string layer, bool is_ignored, bool is_unique, std::string default_value); +// internal CreateGeomField bool CreateGeomField_(GDALDatasetH hDS, OGRLayerH hLayer, std::string fld_name, OGRwkbGeometryType eGeomType, std::string srs, bool is_nullable, bool is_ignored); From 834d2acdc7e06db7222e803eaf6891b06b1bcb72 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Mon, 8 Jul 2024 22:59:31 -0600 Subject: [PATCH 41/53] code clean up and class variable names --- src/gdalvector.cpp | 274 ++++++++++++++++++++++----------------------- src/gdalvector.h | 18 +-- 2 files changed, 144 insertions(+), 148 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index a5ab5ecad..6e0a13a1a 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -13,7 +13,6 @@ #include "cpl_port.h" #include "cpl_string.h" #include "cpl_time.h" -// #include "ogrsf_frmts.h" #include "ogr_srs_api.h" #include "gdalraster.h" @@ -21,15 +20,15 @@ #include "ogr_util.h" GDALVector::GDALVector() : - dsn_in(""), - layer_in(""), - is_sql_in(false), - open_options_in(Rcpp::CharacterVector::create()), - spatial_filter_in(""), - dialect_in(""), - hDataset(nullptr), - eAccess(GA_ReadOnly), - hLayer(nullptr) {} + m_dsn(""), + m_layer_name(""), + m_is_sql(false), + m_open_options(Rcpp::CharacterVector::create()), + m_spatial_filter(""), + m_dialect(""), + m_hDataset(nullptr), + m_eAccess(GA_ReadOnly), + m_hLayer(nullptr) {} GDALVector::GDALVector(Rcpp::CharacterVector dsn) : GDALVector(dsn, "", true, Rcpp::CharacterVector::create(), @@ -55,47 +54,47 @@ GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, Rcpp::Nullable open_options, std::string spatial_filter, std::string dialect = "") : - layer_in(layer), - open_options_in(open_options.isNotNull() ? open_options : - Rcpp::CharacterVector::create()), - spatial_filter_in(spatial_filter), - dialect_in(dialect), - hDataset(nullptr), - eAccess(GA_ReadOnly), - hLayer(nullptr) { + m_layer_name(layer), + m_open_options(open_options.isNotNull() ? open_options : + Rcpp::CharacterVector::create()), + m_spatial_filter(spatial_filter), + m_dialect(dialect), + m_hDataset(nullptr), + m_eAccess(GA_ReadOnly), + m_hLayer(nullptr) { - dsn_in = Rcpp::as(check_gdal_filename(dsn)); + m_dsn = Rcpp::as(check_gdal_filename(dsn)); open(read_only); } void GDALVector::open(bool read_only) { - if (dsn_in == "") + if (m_dsn == "") Rcpp::stop("DSN is not set"); - if (hDataset != nullptr) { - if (is_sql_in) - GDALDatasetReleaseResultSet(hDataset, hLayer); - GDALReleaseDataset(hDataset); - hDataset = nullptr; - hLayer = nullptr; + if (m_hDataset != nullptr) { + if (m_is_sql) + GDALDatasetReleaseResultSet(m_hDataset, m_hLayer); + GDALReleaseDataset(m_hDataset); + m_hDataset = nullptr; + m_hLayer = nullptr; } if (read_only) - eAccess = GA_ReadOnly; + m_eAccess = GA_ReadOnly; else - eAccess = GA_Update; + m_eAccess = GA_Update; - std::vector dsoo(open_options_in.size() + 1); - if (open_options_in.size() > 0) { - for (R_xlen_t i = 0; i < open_options_in.size(); ++i) { - dsoo[i] = (char *) (open_options_in[i]); + std::vector dsoo(m_open_options.size() + 1); + if (m_open_options.size() > 0) { + for (R_xlen_t i = 0; i < m_open_options.size(); ++i) { + dsoo[i] = (char *) (m_open_options[i]); } } dsoo.push_back(nullptr); OGRGeometryH hGeom_filter = nullptr; - if (spatial_filter_in != "") { - char* pszWKT = (char*) spatial_filter_in.c_str(); + if (m_spatial_filter != "") { + char* pszWKT = (char*) m_spatial_filter.c_str(); if (OGR_G_CreateFromWkt(&pszWKT, nullptr, &hGeom_filter) != OGRERR_NONE) { if (hGeom_filter != nullptr) @@ -110,35 +109,35 @@ void GDALVector::open(bool read_only) { else nOpenFlags |= GDAL_OF_UPDATE; - hDataset = GDALOpenEx(dsn_in.c_str(), nOpenFlags, nullptr, - dsoo.data(), nullptr); - if (hDataset == nullptr) + m_hDataset = GDALOpenEx(m_dsn.c_str(), nOpenFlags, nullptr, + dsoo.data(), nullptr); + if (m_hDataset == nullptr) Rcpp::stop("open dataset failed"); - const char* pszDialect = dialect_in.c_str(); + const char* pszDialect = m_dialect.c_str(); - if (layer_in == "") { - is_sql_in = false; - hLayer = GDALDatasetGetLayer(hDataset, 0); + if (m_layer_name == "") { + m_is_sql = false; + m_hLayer = GDALDatasetGetLayer(m_hDataset, 0); } - else if (STARTS_WITH_CI(layer_in.c_str(), "SELECT ")) { - is_sql_in = true; + else if (STARTS_WITH_CI(m_layer_name.c_str(), "SELECT ")) { + m_is_sql = true; if (EQUAL(pszDialect, "SQLite") && !has_spatialite()) - Rcpp::warning("spatialite not available"); - hLayer = GDALDatasetExecuteSQL(hDataset, layer_in.c_str(), - hGeom_filter, pszDialect); + Rcpp::warning("SpatiaLite not available"); + m_hLayer = GDALDatasetExecuteSQL(m_hDataset, m_layer_name.c_str(), + hGeom_filter, pszDialect); } else { - is_sql_in = false; - hLayer = GDALDatasetGetLayerByName(hDataset, layer_in.c_str()); + m_is_sql = false; + m_hLayer = GDALDatasetGetLayerByName(m_hDataset, m_layer_name.c_str()); } - if (hLayer == nullptr) { - GDALReleaseDataset(hDataset); + if (m_hLayer == nullptr) { + GDALReleaseDataset(m_hDataset); Rcpp::stop("failed to get layer"); } else { - OGR_L_ResetReading(hLayer); + OGR_L_ResetReading(m_hLayer); } if (hGeom_filter != nullptr) @@ -146,21 +145,21 @@ void GDALVector::open(bool read_only) { } bool GDALVector::isOpen() const { - if (hDataset == nullptr) + if (m_hDataset == nullptr) return false; else return true; } std::string GDALVector::getDsn() const { - return dsn_in; + return m_dsn; } Rcpp::CharacterVector GDALVector::getFileList() const { checkAccess_(GA_ReadOnly); char **papszFiles; - papszFiles = GDALGetFileList(hDataset); + papszFiles = GDALGetFileList(m_hDataset); int items = CSLCount(papszFiles); if (items > 0) { @@ -180,53 +179,53 @@ Rcpp::CharacterVector GDALVector::getFileList() const { std::string GDALVector::getDriverShortName() const { checkAccess_(GA_ReadOnly); - GDALDriverH hDriver = GDALGetDatasetDriver(hDataset); + GDALDriverH hDriver = GDALGetDatasetDriver(m_hDataset); return GDALGetDriverShortName(hDriver); } std::string GDALVector::getDriverLongName() const { checkAccess_(GA_ReadOnly); - GDALDriverH hDriver = GDALGetDatasetDriver(hDataset); + GDALDriverH hDriver = GDALGetDatasetDriver(m_hDataset); return GDALGetDriverLongName(hDriver); } std::string GDALVector::getName() const { checkAccess_(GA_ReadOnly); - return OGR_L_GetName(hLayer); + return OGR_L_GetName(m_hLayer); } bool GDALVector::testCapability(std::string capability) const { checkAccess_(GA_ReadOnly); - return OGR_L_TestCapability(hLayer, capability.c_str()); + return OGR_L_TestCapability(m_hLayer, capability.c_str()); } std::string GDALVector::getFIDColumn() const { checkAccess_(GA_ReadOnly); - return OGR_L_GetFIDColumn(hLayer); + return OGR_L_GetFIDColumn(m_hLayer); } std::string GDALVector::getGeomType() const { checkAccess_(GA_ReadOnly); - OGRwkbGeometryType eType = OGR_L_GetGeomType(hLayer); - return OGRGeometryTypeToName(eType); + OGRwkbGeometryType eType = OGR_L_GetGeomType(m_hLayer); + return getWkbGeomString_(eType); } std::string GDALVector::getGeometryColumn() const { checkAccess_(GA_ReadOnly); - return OGR_L_GetGeometryColumn(hLayer); + return OGR_L_GetGeometryColumn(m_hLayer); } std::string GDALVector::getSpatialRef() const { // OGRLayer::GetSpatialRef() as WKT string checkAccess_(GA_ReadOnly); - OGRSpatialReferenceH hSRS = OGR_L_GetSpatialRef(hLayer); + OGRSpatialReferenceH hSRS = OGR_L_GetSpatialRef(m_hLayer); if (hSRS == nullptr) Rcpp::stop("could not obtain spatial reference"); char *pszSRS_WKT = nullptr; @@ -239,7 +238,7 @@ std::string GDALVector::getSpatialRef() const { } Rcpp::NumericVector GDALVector::bbox() { - // Note: bForce=true in tha call to OGR_L_GetExtent(), so the entire + // Note: bForce = true in the call to OGR_L_GetExtent(), so the entire // layer may be scanned to compute MBR. // see: testCapability("FastGetExtent") // Depending on the driver, a spatial filter may/may not be taken into @@ -247,7 +246,7 @@ Rcpp::NumericVector GDALVector::bbox() { checkAccess_(GA_ReadOnly); OGREnvelope envelope; - if (OGR_L_GetExtent(hLayer, &envelope, true) != OGRERR_NONE) + if (OGR_L_GetExtent(m_hLayer, &envelope, true) != OGRERR_NONE) Rcpp::stop("the extent of the layer cannot be determined"); Rcpp::NumericVector bbox_out = @@ -260,37 +259,30 @@ Rcpp::List GDALVector::getLayerDefn() const { checkAccess_(GA_ReadOnly); OGRFeatureDefnH hFDefn; - hFDefn = OGR_L_GetLayerDefn(hLayer); + hFDefn = OGR_L_GetLayerDefn(m_hLayer); if (hFDefn == nullptr) Rcpp::stop("failed to get layer definition"); Rcpp::List list_out = Rcpp::List::create(); - std::string sValue = ""; - int nValue = -1; bool bValue; - int iField; // attribute fields // TODO(ctoney): add field domain name - for (iField = 0; iField < OGR_FD_GetFieldCount(hFDefn); ++iField) { + for (int iField = 0; iField < OGR_FD_GetFieldCount(hFDefn); ++iField) { Rcpp::List list_fld_defn = Rcpp::List::create(); OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, iField); if (hFieldDefn == nullptr) Rcpp::stop("could not obtain field definition"); OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); - sValue = getOFTString_(fld_type); - list_fld_defn.push_back(sValue, "type"); + list_fld_defn.push_back(getOFTString_(fld_type), "type"); OGRFieldSubType fld_subtype = OGR_Fld_GetSubType(hFieldDefn); - sValue = getOFTSubtypeString_(fld_subtype); - list_fld_defn.push_back(sValue, "subtype"); + list_fld_defn.push_back(getOFTSubtypeString_(fld_subtype), "subtype"); - nValue = OGR_Fld_GetWidth(hFieldDefn); - list_fld_defn.push_back(nValue, "width"); + list_fld_defn.push_back(OGR_Fld_GetWidth(hFieldDefn), "width"); - nValue = OGR_Fld_GetPrecision(hFieldDefn); - list_fld_defn.push_back(nValue, "precision"); + list_fld_defn.push_back(OGR_Fld_GetPrecision(hFieldDefn), "precision"); bValue = OGR_Fld_IsNullable(hFieldDefn); list_fld_defn.push_back(bValue, "is_nullable"); @@ -298,10 +290,9 @@ Rcpp::List GDALVector::getLayerDefn() const { bValue = OGR_Fld_IsUnique(hFieldDefn); list_fld_defn.push_back(bValue, "is_unique"); + std::string sValue = ""; if (OGR_Fld_GetDefault(hFieldDefn) != nullptr) sValue = std::string(OGR_Fld_GetDefault(hFieldDefn)); - else - sValue = ""; list_fld_defn.push_back(sValue, "default"); bValue = OGR_Fld_IsIgnored(hFieldDefn); @@ -324,15 +315,23 @@ Rcpp::List GDALVector::getLayerDefn() const { OGRwkbGeometryType eType = OGR_GFld_GetType(hGeomFldDefn); list_geom_fld_defn.push_back(getWkbGeomString_(eType), "type"); - OGRSpatialReferenceH hSRS = OGR_GFld_GetSpatialRef(hGeomFldDefn); - if (hSRS == nullptr) - Rcpp::stop("could not obtain geometry SRS"); - char *pszSRS_WKT = nullptr; - if (OSRExportToWkt(hSRS, &pszSRS_WKT) != OGRERR_NONE) { - Rcpp::stop("error exporting geometry SRS to WKT"); + OGRSpatialReferenceH hSRS = nullptr; + hSRS = OGR_GFld_GetSpatialRef(hGeomFldDefn); + if (hSRS == nullptr) { + Rcpp::warning("could not obtain geometry field SRS"); + list_geom_fld_defn.push_back(NA_STRING, "srs"); + } + else { + char *pszSRS_WKT = nullptr; + if (OSRExportToWkt(hSRS, &pszSRS_WKT) != OGRERR_NONE) { + Rcpp::warning("error exporting geometry SRS to WKT"); + list_geom_fld_defn.push_back(NA_STRING, "srs"); + } + else { + list_geom_fld_defn.push_back(std::string(pszSRS_WKT), "srs"); + } + CPLFree(pszSRS_WKT); } - sValue = std::string(pszSRS_WKT); - list_geom_fld_defn.push_back(sValue, "srs"); bValue = OGR_GFld_IsNullable(hGeomFldDefn); list_geom_fld_defn.push_back(bValue, "is_nullable"); @@ -346,10 +345,7 @@ Rcpp::List GDALVector::getLayerDefn() const { std::string geomFldName(OGR_GFld_GetNameRef(hGeomFldDefn)); if (geomFldName == "") geomFldName = defaultGeomFldName; - list_out.push_back(list_geom_fld_defn, geomFldName); - - CPLFree(pszSRS_WKT); } return list_out; @@ -362,7 +358,7 @@ void GDALVector::setAttributeFilter(std::string query) { if (query != "") query_in = query.c_str(); - if (OGR_L_SetAttributeFilter(hLayer, query_in) != OGRERR_NONE) + if (OGR_L_SetAttributeFilter(m_hLayer, query_in) != OGRERR_NONE) Rcpp::stop("error setting filter, possibly in the query expression"); else m_attr_filter = query; @@ -374,13 +370,13 @@ void GDALVector::setSpatialFilterRect(Rcpp::NumericVector bbox) { if (Rcpp::any(Rcpp::is_na(bbox))) Rcpp::stop("'bbox' has one or more 'NA' values"); - OGR_L_SetSpatialFilterRect(hLayer, bbox[0], bbox[1], bbox[2], bbox[3]); + OGR_L_SetSpatialFilterRect(m_hLayer, bbox[0], bbox[1], bbox[2], bbox[3]); } void GDALVector::clearSpatialFilter() { checkAccess_(GA_ReadOnly); - OGR_L_SetSpatialFilter(hLayer, nullptr); + OGR_L_SetSpatialFilter(m_hLayer, nullptr); } double GDALVector::getFeatureCount() { @@ -390,7 +386,7 @@ double GDALVector::getFeatureCount() { // see: testCapability("FastFeatureCount") checkAccess_(GA_ReadOnly); - return static_cast(OGR_L_GetFeatureCount(hLayer, true)); + return static_cast(OGR_L_GetFeatureCount(m_hLayer, true)); } SEXP GDALVector::getNextFeature() { @@ -409,9 +405,11 @@ SEXP GDALVector::getNextFeature() { } SEXP GDALVector::getFeature(Rcpp::NumericVector fid) { - // fid must be an R numeric vector of length 1 - // i.e., a scalar but use NumericVector here since it can carry the class - // attribute for integer64 + // fid must be an R numeric vector of length 1, i.e., a scalar but using + // NumericVector since it can carry the class attribute for integer64. + // Instead of wrapping OGR_L_GetFeature(), we use fetch() because it + // already builds the return data structure. + checkAccess_(GA_ReadOnly); if (fid.size() != 1) @@ -427,13 +425,13 @@ SEXP GDALVector::getFeature(Rcpp::NumericVector fid) { std::string orig_filter = m_attr_filter; OGRGeometryH hOrigFilterGeom = nullptr; OGRGeometryH hFilterGeom = nullptr; - hFilterGeom = OGR_L_GetSpatialFilter(hLayer); + hFilterGeom = OGR_L_GetSpatialFilter(m_hLayer); if (hFilterGeom != nullptr) { hOrigFilterGeom = OGR_G_Clone(hFilterGeom); hFilterGeom = nullptr; } - // filter based on FID + // filter on FID clearSpatialFilter(); setAttributeFilter("FID = " + std::to_string(fid_in)); @@ -441,7 +439,7 @@ SEXP GDALVector::getFeature(Rcpp::NumericVector fid) { // restore original filters setAttributeFilter(orig_filter); - OGR_L_SetSpatialFilter(hLayer, hOrigFilterGeom); + OGR_L_SetSpatialFilter(m_hLayer, hOrigFilterGeom); if (hOrigFilterGeom != nullptr) { OGR_G_DestroyGeometry(hOrigFilterGeom); hOrigFilterGeom = nullptr; @@ -461,35 +459,32 @@ SEXP GDALVector::getFeature(Rcpp::NumericVector fid) { void GDALVector::resetReading() { checkAccess_(GA_ReadOnly); - OGR_L_ResetReading(hLayer); + OGR_L_ResetReading(m_hLayer); } Rcpp::DataFrame GDALVector::fetch(double n) { - // Analog of DBI::dbFetch(), mostly following its specification: + // Analog of DBI::dbFetch(), generally following its specification: // https://dbi.r-dbi.org/reference/dbFetch.html#specification - // n should be passed as a whole number (integer or numeric). A value of - // Inf for the n argument is supported and also returns the full result. checkAccess_(GA_ReadOnly); OGRFeatureDefnH hFDefn = nullptr; - hFDefn = OGR_L_GetLayerDefn(hLayer); + hFDefn = OGR_L_GetLayerDefn(m_hLayer); if (hFDefn == nullptr) Rcpp::stop("failed to get layer definition"); bool fetch_all = true; size_t fetch_num = 0; - if (n == -1 || (n > 0 && std::isinf(n))) { + if (n == -1 || (std::isinf(n) && n > 0)) { resetReading(); - fetch_num = OGR_L_GetFeatureCount(hLayer, true); + fetch_num = OGR_L_GetFeatureCount(m_hLayer, true); } else if (Rcpp::NumericVector::is_na(n)) { - fetch_num = OGR_L_GetFeatureCount(hLayer, true); + fetch_num = OGR_L_GetFeatureCount(m_hLayer, true); } - else if (n >= 0 && std::isfinite(n)) { + else if (n >= 0) { if (n > 9007199254740992) Rcpp::stop("'n' is out of range"); - fetch_all = false; fetch_num = static_cast(std::trunc(n)); } @@ -501,8 +496,6 @@ Rcpp::DataFrame GDALVector::fetch(double n) { if (fetch_num == 0) return df; - OGRFeatureH hFeat = nullptr; - size_t row_num = 0; int nFields = OGR_FD_GetFieldCount(hFDefn); int nGeomFields = OGR_FD_GetGeomFieldCount(hFDefn); bool include_geom = true; @@ -514,7 +507,7 @@ Rcpp::DataFrame GDALVector::fetch(double n) { EQUAL(returnGeomAs.c_str(), "WKT") || EQUAL(returnGeomAs.c_str(), "WKT_ISO") || EQUAL(returnGeomAs.c_str(), "TYPE_NAME"))) { - Rcpp::stop("unrecognized value of field 'returnGeomAs'"); + Rcpp::stop("unsupported value of field 'returnGeomAs'"); } OGRwkbByteOrder eOrder; @@ -525,7 +518,10 @@ Rcpp::DataFrame GDALVector::fetch(double n) { else Rcpp::stop("invalid value of field 'wkbByteOrder'"); - while ((hFeat = OGR_L_GetNextFeature(hLayer)) != nullptr) { + OGRFeatureH hFeat = nullptr; + size_t row_num = 0; + + while ((hFeat = OGR_L_GetNextFeature(m_hLayer)) != nullptr) { const int64_t fid = static_cast(OGR_F_GetFID(hFeat)); Rcpp::NumericVector fid_col = df[0]; fid_col[row_num] = Rcpp::toInteger64(fid)[0]; @@ -613,7 +609,8 @@ Rcpp::DataFrame GDALVector::fetch(double n) { Rcpp::List col = df[i + 1]; if (has_value) { int nDataSize = 0; - GByte *pabyData = OGR_F_GetFieldAsBinary(hFeat, i, &nDataSize); + GByte *pabyData = OGR_F_GetFieldAsBinary(hFeat, i, + &nDataSize); if (nDataSize > 0) { Rcpp::RawVector blob(nDataSize); std::memcpy(&blob[0], pabyData, nDataSize); @@ -708,7 +705,6 @@ Rcpp::DataFrame GDALVector::fetch(double n) { } } else { - // use string if (has_value) { Rcpp::CharacterVector col = df[i + 1]; col[row_num] = OGR_F_GetFieldAsString(hFeat, i); @@ -784,9 +780,9 @@ Rcpp::DataFrame GDALVector::fetch(double n) { } if (fetch_all) { - hFeat = OGR_L_GetNextFeature(hLayer); + hFeat = OGR_L_GetNextFeature(m_hLayer); if (hFeat != nullptr) { - Rcpp::Rcout << "getFeatureCount() reported " << row_num + Rcpp::Rcout << "getFeatureCount() reported: " << row_num << std::endl; std::string msg = "more features potentially available than reported by getFeatureCount()"; @@ -888,7 +884,7 @@ void GDALVector::layerIntersection( } OGRErr err = OGR_L_Intersection( - hLayer, + m_hLayer, method_layer.getOGRLayerH_(), result_layer.getOGRLayerH_(), opt_list.data(), @@ -916,7 +912,7 @@ void GDALVector::layerUnion( } OGRErr err = OGR_L_Union( - hLayer, + m_hLayer, method_layer.getOGRLayerH_(), result_layer.getOGRLayerH_(), opt_list.data(), @@ -944,7 +940,7 @@ void GDALVector::layerSymDifference( } OGRErr err = OGR_L_SymDifference( - hLayer, + m_hLayer, method_layer.getOGRLayerH_(), result_layer.getOGRLayerH_(), opt_list.data(), @@ -972,7 +968,7 @@ void GDALVector::layerIdentity( } OGRErr err = OGR_L_Identity( - hLayer, + m_hLayer, method_layer.getOGRLayerH_(), result_layer.getOGRLayerH_(), opt_list.data(), @@ -1000,7 +996,7 @@ void GDALVector::layerUpdate( } OGRErr err = OGR_L_Update( - hLayer, + m_hLayer, method_layer.getOGRLayerH_(), result_layer.getOGRLayerH_(), opt_list.data(), @@ -1028,7 +1024,7 @@ void GDALVector::layerClip( } OGRErr err = OGR_L_Clip( - hLayer, + m_hLayer, method_layer.getOGRLayerH_(), result_layer.getOGRLayerH_(), opt_list.data(), @@ -1056,7 +1052,7 @@ void GDALVector::layerErase( } OGRErr err = OGR_L_Erase( - hLayer, + m_hLayer, method_layer.getOGRLayerH_(), result_layer.getOGRLayerH_(), opt_list.data(), @@ -1068,12 +1064,12 @@ void GDALVector::layerErase( } void GDALVector::close() { - if (hDataset != nullptr) { - if (is_sql_in) - GDALDatasetReleaseResultSet(hDataset, hLayer); - GDALReleaseDataset(hDataset); - hDataset = nullptr; - hLayer = nullptr; + if (m_hDataset != nullptr) { + if (m_is_sql) + GDALDatasetReleaseResultSet(m_hDataset, m_hLayer); + GDALReleaseDataset(m_hDataset); + m_hDataset = nullptr; + m_hLayer = nullptr; } } @@ -1085,20 +1081,20 @@ void GDALVector::checkAccess_(GDALAccess access_needed) const { if (!isOpen()) Rcpp::stop("dataset is not open"); - if (access_needed == GA_Update && eAccess == GA_ReadOnly) + if (access_needed == GA_Update && m_eAccess == GA_ReadOnly) Rcpp::stop("dataset is read-only"); } OGRLayerH GDALVector::getOGRLayerH_() const { checkAccess_(GA_ReadOnly); - return hLayer; + return m_hLayer; } SEXP GDALVector::initDF_(R_xlen_t nrow) const { - // initialize a data frame with nrow rows for the layer definition + // initialize a data frame based on the layer definition OGRFeatureDefnH hFDefn = nullptr; - hFDefn = OGR_L_GetLayerDefn(hLayer); + hFDefn = OGR_L_GetLayerDefn(m_hLayer); if (hFDefn == nullptr) Rcpp::stop("failed to get layer definition"); @@ -1107,7 +1103,7 @@ SEXP GDALVector::initDF_(R_xlen_t nrow) const { if (!EQUAL(returnGeomAs.c_str(), "NONE")) nGeomFields = OGR_FD_GetGeomFieldCount(hFDefn); - // construct the data frame as list and convert at return + // construct as list and convert to data frame at return Rcpp::List df(1 + nFields + nGeomFields); Rcpp::CharacterVector col_names(1 + nFields + nGeomFields); @@ -1159,8 +1155,8 @@ SEXP GDALVector::initDF_(R_xlen_t nrow) const { } else if (fld_type == OFTDateTime) { Rcpp::NumericVector v(nrow, NA_REAL); - Rcpp::CharacterVector class_names = {"POSIXt", "POSIXct"}; - v.attr("class") = class_names; + Rcpp::CharacterVector classes = {"POSIXt", "POSIXct"}; + v.attr("class") = classes; v.attr("tzone") = "UTC"; df[i + 1] = v; col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); diff --git a/src/gdalvector.h b/src/gdalvector.h index 4b7d7df16..588b4b0e5 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -22,15 +22,15 @@ typedef enum {GA_ReadOnly = 0, GA_Update = 1} GDALAccess; class GDALVector { private: - std::string dsn_in; - std::string layer_in; // layer name or sql statement - bool is_sql_in; - Rcpp::CharacterVector open_options_in; - std::string spatial_filter_in; - std::string dialect_in; - GDALDatasetH hDataset; - GDALAccess eAccess; - OGRLayerH hLayer; + std::string m_dsn; + std::string m_layer_name; // layer name or sql statement + bool m_is_sql; + Rcpp::CharacterVector m_open_options; + std::string m_spatial_filter; + std::string m_dialect; + GDALDatasetH m_hDataset; + GDALAccess m_eAccess; + OGRLayerH m_hLayer; std::string m_attr_filter = ""; public: From 8da722105393a68f09cd178fc3f6344cce562865 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Tue, 9 Jul 2024 01:21:35 -0600 Subject: [PATCH 42/53] retain millisecond accuracy for OFTDateTime --- src/gdalvector.cpp | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 6e0a13a1a..90bb1f41b 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -567,13 +567,13 @@ Rcpp::DataFrame GDALVector::fetch(double n) { col[row_num] = OGR_F_GetFieldAsDouble(hFeat, i); } else if ((fld_type == OFTDate || fld_type == OFTDateTime) - && has_value) { + && has_value) { - int yr = 9999; - int mo, day = 9; - int hr, min, sec, tzflag = 0; - if (OGR_F_GetFieldAsDateTime(hFeat, i, &yr, &mo, &day, - &hr, &min, &sec, &tzflag)) { + Rcpp::NumericVector col = df[i + 1]; + int yr, mo, day, hr, min, tzflag = 0; + float sec = 0; + if (OGR_F_GetFieldAsDateTimeEx(hFeat, i, &yr, &mo, &day, + &hr, &min, &sec, &tzflag)) { struct tm brokendowntime; brokendowntime.tm_year = yr - 1900; @@ -581,12 +581,10 @@ Rcpp::DataFrame GDALVector::fetch(double n) { brokendowntime.tm_mday = day; brokendowntime.tm_hour = hr; brokendowntime.tm_min = min; - brokendowntime.tm_sec = sec; + brokendowntime.tm_sec = static_cast(sec); int64_t nUnixTime = CPLYMDHMSToUnixTime(&brokendowntime); - Rcpp::NumericVector col = df[i + 1]; if (fld_type == OFTDate) { - const int64_t nUnixTime_days = nUnixTime / 86400; - col[row_num] = static_cast(nUnixTime_days); + col[row_num] = static_cast(nUnixTime / 86400); } else { // OFTDateTime @@ -595,13 +593,14 @@ Rcpp::DataFrame GDALVector::fetch(double n) { const int tzoffset = std::abs(tzflag - 100) * 15; const int tzhour = tzoffset / 60; const int tzmin = tzoffset - tzhour * 60; - const int offset = tzhour * 3600 + tzmin * 60; + const int offset_sec = tzhour * 3600 + tzmin * 60; if (tzflag >= 100) - nUnixTime -= offset; + nUnixTime -= offset_sec; else - nUnixTime += offset; + nUnixTime += offset_sec; } - col[row_num] = static_cast(nUnixTime); + col[row_num] = static_cast( + nUnixTime + std::fmod(sec, 1)); } } } From 30f4d14366e8022ac25880ad9b31279f2ec05c8e Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Tue, 16 Jul 2024 22:56:42 -0600 Subject: [PATCH 43/53] Documentation for class GDALVector (WIP) --- R/gdalvector.R | 114 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/R/gdalvector.R b/R/gdalvector.R index 4283b26c1..ff6355b41 100644 --- a/R/gdalvector.R +++ b/R/gdalvector.R @@ -1 +1,115 @@ +#' @name GDALVector-class +#' +#' @aliases +#' Rcpp_GDALVector Rcpp_GDALVector-class GDALVector +#' +#' @title Class encapsulating a vector layer in a GDAL dataset +#' +#' @description +#' `GDALVector` provides an interface for accessing a vector layer in a GDAL +#' dataset and calling methods on the underlying `OGRLayer` and `OGRFeature` +#' objects. See \url{https://gdal.org/api/index.html} for details of the GDAL +#' Vector API. +#' +#' @param dsn Character string containing the data source name (usually a +#' filename or database connection string, see GDAL vector format +#' descriptions: \url{https://gdal.org/drivers/vector/index.html}). +#' @param layer Character string containing either the name of a layer of +#' features within the data source, or an SQL SELECT statement to be executed +#' against the data source that defines a layer via its result set. +#' @param read_only Logical. `TRUE` to open the layer read-only (the default), +#' or `FALSE` to open with write access. +#' @param open_options Optional character vector of `NAME=VALUE` pairs +#' specifying layer open options. +#' @param spatial_filter Optional character string containing a geometry in +#' Well Known Text (WKT) format which represents a spatial filter. +#' @param dialect Optional character string to control the statement dialect +#' when SQL is used to define the layer. By default, the OGR SQL engine will +#' be used, except for RDBMS drivers that will use their dedicated SQL engine, +#' unless `"OGRSQL"` is explicitly passed as the dialect. The `"SQLITE"` +#' dialect can also be used +#' (see \url{https://gdal.org/user/ogr_sql_sqlite_dialect.html}). +#' @returns An object of class `GDALVector` which contains pointers to the +#' opened layer and the dataset that contains it, and methods that operate on +#' the layer as described in Details. `GDALVector` is a C++ class exposed +#' directly to R (via `RCPP_EXPOSED_CLASS`). Fields and methods of the class +#' are accessed using the `$` operator. The read/write fields are used for +#' per-object settings. +#' +#' @section Usage: +#' \preformatted{ +#' ## Constructors +#' # read-only by default: +#' ds <- new(GDALVector, dsn) +#' # for update access: +#' ds <- new(GDALVector, dsn, read_only = FALSE) +#' # to use dataset open options: +#' ds <- new(GDALVector, dsn, read_only = TRUE|FALSE, open_options) +#' # to open without shared mode: +#' new(GDALVector, dsn, read_only, open_options, shared = FALSE) +#' +#' ## Read/write fields (see Details) +#' +#' ## Methods (see Details) +#' ds$open(read_only) +#' ds$isOpen() +#' ds$getDsn() +#' ds$getFileList() +#' +#' ds$getDriverShortName() +#' ds$getDriverLongName() +#' +#' ds$close() +#' } +#' @section Details: +#' +#' \code{new(GDALVector, dsn)} +#' Constructor. Returns an object of class `GDALVector`. +#' `read_only` defaults to `TRUE` if not specified. +#' +#' \code{$quiet} +#' Read/write field. +#' A logical value, `FALSE` by default. This field can be set to `TRUE` which +#' will suppress various messages as well as progress reporting for potentially +#' long-running processes such as building overviews and computation of +#' statistics and histograms. +#' +#' \code{$open(read_only)} +#' (Re-)opens the raster dataset on the existing dsn. Use this method to +#' open a dataset that has been closed using \code{$close()}. May be used to +#' re-open a dataset with a different read/write access (`read_only` set to +#' `TRUE` or `FALSE`). The method will first close an open dataset, so it is +#' not required to call \code{$close()} explicitly in this case. +#' No return value, called for side effects. +#' +#' \code{$isOpen()} +#' Returns logical indicating whether the associated vector dataset is open. +#' +#' \code{$getDsn()} +#' Returns a character string containing the `dsn` associated with this +#' `GDALVector` object (`dsn` originally used to open the layer). +#' +#' \code{$getFileList()} +#' Returns a character vector of files believed to be part of this dataset. +#' If it returns an empty string (`""`) it means there is believed to be no +#' local file system files associated with the dataset (e.g., a virtual file +#' system). The returned filenames will normally be relative or absolute +#' paths depending on the path used to originally open the dataset. +#' +#' \code{$close()} +#' Closes the GDAL dataset (no return value, called for side effects). +#' Calling \code{$close()} results in proper cleanup, and flushing of any +#' pending writes. +#' The `GDALVector` object is still available after calling \code{$close()}. +#' The dataset can be re-opened on the existing \code{dsn} with +#' \code{$open(read_only=TRUE)} or \code{$open(read_only=FALSE)}. +#' +#' @note +#' +#' @seealso +#' +#' @examples +#' +NULL + Rcpp::loadModule("mod_GDALVector", TRUE) From b1533b178782adcebc52183e78705f0267479f35 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sun, 21 Jul 2024 15:56:12 -0600 Subject: [PATCH 44/53] GDALVector::testCapability(): return list instead --- src/gdalvector.cpp | 48 +++++++++++++++++++++++++++++++++++++++++++--- src/gdalvector.h | 2 +- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 90bb1f41b..0a09e526f 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -90,7 +90,7 @@ void GDALVector::open(bool read_only) { dsoo[i] = (char *) (m_open_options[i]); } } - dsoo.push_back(nullptr); + dsoo[m_open_options.size()] = nullptr; OGRGeometryH hGeom_filter = nullptr; if (m_spatial_filter != "") { @@ -196,10 +196,52 @@ std::string GDALVector::getName() const { return OGR_L_GetName(m_hLayer); } -bool GDALVector::testCapability(std::string capability) const { +Rcpp::List GDALVector::testCapability() const { checkAccess_(GA_ReadOnly); - return OGR_L_TestCapability(m_hLayer, capability.c_str()); + Rcpp::List capabilities = Rcpp::List::create( + Rcpp::Named("RandomRead") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCRandomRead)), + Rcpp::Named("SequentialWrite") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCSequentialWrite)), + Rcpp::Named("RandomWrite") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCRandomWrite)), +#if GDAL_VERSION_NUM >= 3060000 + Rcpp::Named("UpsertFeature") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCUpsertFeature)), +#endif + Rcpp::Named("FastSpatialFilter") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCFastSpatialFilter)), + Rcpp::Named("FastFeatureCount") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCFastFeatureCount)), + Rcpp::Named("FastGetExtent") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCFastGetExtent)), + Rcpp::Named("FastSetNextByIndex") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCFastSetNextByIndex)), + Rcpp::Named("CreateField") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCCreateField)), + Rcpp::Named("CreateGeomField") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCCreateGeomField)), + Rcpp::Named("DeleteField") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCDeleteField)), + Rcpp::Named("ReorderFields") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCReorderFields)), + Rcpp::Named("AlterFieldDefn") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCAlterFieldDefn)), +#if GDAL_VERSION_NUM >= 3060000 + Rcpp::Named("AlterGeomFieldDefn") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCAlterGeomFieldDefn)), +#endif + Rcpp::Named("DeleteFeature") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCDeleteFeature)), + Rcpp::Named("StringsAsUTF8") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCStringsAsUTF8)), + Rcpp::Named("Transactions") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCTransactions)), + Rcpp::Named("CurveGeometries") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCCurveGeometries))); + + return capabilities; } std::string GDALVector::getFIDColumn() const { diff --git a/src/gdalvector.h b/src/gdalvector.h index 588b4b0e5..a01d02f3b 100644 --- a/src/gdalvector.h +++ b/src/gdalvector.h @@ -56,7 +56,7 @@ class GDALVector { std::string getDriverLongName() const; std::string getName() const; - bool testCapability(std::string capability) const; + Rcpp::List testCapability() const; std::string getFIDColumn() const; std::string getGeomType() const; std::string getGeometryColumn() const; From 59bf4f89b76e4597e58d4e507b2bbb1c9cc36d56 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sun, 21 Jul 2024 15:58:20 -0600 Subject: [PATCH 45/53] Documentation GDALVector (fields and methods) --- R/gdalvector.R | 276 +++++++++++++++++++++++++++++---- man/GDALVector-class.Rd | 332 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 574 insertions(+), 34 deletions(-) create mode 100644 man/GDALVector-class.Rd diff --git a/R/gdalvector.R b/R/gdalvector.R index ff6355b41..7be707d36 100644 --- a/R/gdalvector.R +++ b/R/gdalvector.R @@ -7,26 +7,29 @@ #' #' @description #' `GDALVector` provides an interface for accessing a vector layer in a GDAL -#' dataset and calling methods on the underlying `OGRLayer` and `OGRFeature` -#' objects. See \url{https://gdal.org/api/index.html} for details of the GDAL +#' dataset and calling methods on the underlying `OGRLayer` object. +#' An object of class `GDALVector` persists an open connection to the dataset, +#' and exposes methods for retrieving layer information, setting attribute and +#' spatial filters, and reading/writing feature data. +#' See \url{https://gdal.org/api/index.html} for details of the GDAL #' Vector API. #' -#' @param dsn Character string containing the data source name (usually a -#' filename or database connection string, see GDAL vector format -#' descriptions: \url{https://gdal.org/drivers/vector/index.html}). +#' @param dsn Character string containing the data source name (DSN, usually a +#' filename or database connection string). See the GDAL vector format +#' descriptions at \url{https://gdal.org/drivers/vector/index.html}. #' @param layer Character string containing either the name of a layer of #' features within the data source, or an SQL SELECT statement to be executed #' against the data source that defines a layer via its result set. #' @param read_only Logical. `TRUE` to open the layer read-only (the default), #' or `FALSE` to open with write access. #' @param open_options Optional character vector of `NAME=VALUE` pairs -#' specifying layer open options. +#' specifying dataset open options. #' @param spatial_filter Optional character string containing a geometry in #' Well Known Text (WKT) format which represents a spatial filter. #' @param dialect Optional character string to control the statement dialect #' when SQL is used to define the layer. By default, the OGR SQL engine will #' be used, except for RDBMS drivers that will use their dedicated SQL engine, -#' unless `"OGRSQL"` is explicitly passed as the dialect. The `"SQLITE"` +#' unless `"OGRSQL"` is explicitly passed as the dialect. The `SQLITE` #' dialect can also be used #' (see \url{https://gdal.org/user/ogr_sql_sqlite_dialect.html}). #' @returns An object of class `GDALVector` which contains pointers to the @@ -41,72 +44,277 @@ #' ## Constructors #' # read-only by default: #' ds <- new(GDALVector, dsn) +#' ds <- new(GDALVector, dsn, layer) #' # for update access: -#' ds <- new(GDALVector, dsn, read_only = FALSE) -#' # to use dataset open options: -#' ds <- new(GDALVector, dsn, read_only = TRUE|FALSE, open_options) -#' # to open without shared mode: -#' new(GDALVector, dsn, read_only, open_options, shared = FALSE) +#' ds <- new(GDALVector, dsn, layer, read_only = FALSE) +#' # to use dataset open options +#' ds <- new(GDALVector, dsn, layer, read_only = TRUE|FALSE, open_options) +#' # to specify a spatial filter and/or dialect +#' new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect) #' #' ## Read/write fields (see Details) +#' lyr$defaultGeomFldName +#' lyr$returnGeomAs +#' lyr$wkbByteOrder #' #' ## Methods (see Details) -#' ds$open(read_only) -#' ds$isOpen() -#' ds$getDsn() -#' ds$getFileList() +#' lyr$open(read_only) +#' lyr$isOpen() +#' lyr$getDsn() +#' lyr$getFileList() +#' lyr$getDriverShortName() +#' lyr$getDriverLongName() #' -#' ds$getDriverShortName() -#' ds$getDriverLongName() +#' lyr$getName() +#' lyr$testCapability() +#' lyr$getFIDColumn() +#' lyr$getGeomType() +#' lyr$getGeometryColumn() +#' lyr$getSpatialRef() +#' lyr$bbox() +#' lyr$getLayerDefn() #' -#' ds$close() +#' lyr$setAttributeFilter(query) +#' lyr$setSpatialFilterRect(bbox) +#' lyr$clearSpatialFilter() +#' +#' lyr$getFeatureCount() +#' lyr$getNextFeature() +#' lyr$getFeature(fid) +#' lyr$resetReading() +#' +#' lyr$fetch(n); +#' +#' lyr$close() #' } #' @section Details: #' #' \code{new(GDALVector, dsn)} -#' Constructor. Returns an object of class `GDALVector`. -#' `read_only` defaults to `TRUE` if not specified. +#' Constructor. If `layer` is omitted, it defaults to the first layer in the +#' data source by index, so this form of the constructor might be used for +#' single-layer formats like shapefile. `read_only` defaults to `TRUE`. +#' +#' \code{new(GDALVector, dsn, layer)} +#' Constructor specifying the name of a layer to open. `layer` may also be given +#' as an SQL SELECT statement to define a layer as the result set (read only). +#' +#' \code{new(GDALVector, dsn, layer, read_only = TRUE|FALSE)} +#' Constructor specifying read/write access. The `layer` argument is required in +#' this form of the constructor, but may be given as empty string (`""`), in +#' which case the first layer in the data source by index will be opened. +#' +#' \code{new(GDALVector, dsn, layer, read_only = TRUE|FALSE, open_options)} +#' Constructor specifying dataset open options as a character vector of +#' `NAME=VALUE` pairs. +#' +#' \code{new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect))} +#' Constructor specifying a spatial filter and/or SQL dialect. All arguments +#' are required in this form of the constructor, but `open_options` may be +#' `NULL`, and `spatial_filter` or `dialect` may be empty string (`""`). #' -#' \code{$quiet} -#' Read/write field. -#' A logical value, `FALSE` by default. This field can be set to `TRUE` which -#' will suppress various messages as well as progress reporting for potentially -#' long-running processes such as building overviews and computation of -#' statistics and histograms. +#' \code{$defaultGeomFldName} +#' Read/write field specifying a return column name when the geometry column +#' name in the source layer is empty, like with shapefiles etc. +#' Character string, defaults to `geometry`. +#' +#' \code{$returnGeomAs} +#' Read/write field specifying the return format for feature geometries. +#' Character string, one of `WKT`, `WKT_ISO`, `WKB`, `WKB_ISO`, `TYPE_NAME` or +#' `NONE` (the default). +#' +#' \code{$wkbByteOrder} +#' Read/write field specifying the byte order for WKB geometries. +#' Character string, one `LSB` (Least Significant Byte First, the default) or +#' `MSB` (Most Significant Byte First). #' #' \code{$open(read_only)} -#' (Re-)opens the raster dataset on the existing dsn. Use this method to -#' open a dataset that has been closed using \code{$close()}. May be used to -#' re-open a dataset with a different read/write access (`read_only` set to +#' (Re-)opens the vector layer on the existing DSN. Use this method to +#' open a layer that has been closed using \code{$close()}. May be used to +#' re-open a layer with a different read/write access (`read_only` set to #' `TRUE` or `FALSE`). The method will first close an open dataset, so it is #' not required to call \code{$close()} explicitly in this case. #' No return value, called for side effects. #' #' \code{$isOpen()} -#' Returns logical indicating whether the associated vector dataset is open. +#' Returns a `logical` scalar indicating whether the vector dataset is open. #' #' \code{$getDsn()} #' Returns a character string containing the `dsn` associated with this #' `GDALVector` object (`dsn` originally used to open the layer). #' #' \code{$getFileList()} -#' Returns a character vector of files believed to be part of this dataset. +#' Returns a character vector of files believed to be part of the data source. #' If it returns an empty string (`""`) it means there is believed to be no #' local file system files associated with the dataset (e.g., a virtual file #' system). The returned filenames will normally be relative or absolute #' paths depending on the path used to originally open the dataset. #' +#' \code{$getDriverShortName()} +#' Returns the short name of the vector format driver. +#' +#' \code{$getDriverLongName()} +#' Returns the long name of the vector format driver. +#' +#' \code{$getName()} +#' Returns the layer name. +#' +#' \code{$testCapability()} +#' Tests whether the layer supports named capabilities based on the current +#' read/write access. Returns a list of capabilities with values `TRUE` or +#' `FALSE`. See `ogr_layer_test_cap()` for a list of the capabilities tested. +#' +#' \code{$getFIDColumn()} +#' Returns the name of the underlying database column being used as the FID +#' column, or empty string (`""`) if not supported. +#' +#' \code{$getGeomType()} +#' Returns the well known name of the layer geometry type as character string. +#' For layers with multiple geometry fields, this method only returns the +#' geometry type of the first geometry column. For other columns, use +#' `$getLayerDefn()`. For layers without any geometry field, this method +#' returns `NONE`. +#' +#' \code{$getGeometryColumn()} +#' Returns he name of the underlying database column being used as the geometry +#' column, or an empty string (`""`) if not supported. +#' For layers with multiple geometry fields, this method only returns the +#' name of the first geometry column. For other columns, use `$getLayerDefn()`. +#' +#' \code{$getSpatialRef()} +#' Returns a WKT string containing the spatial reference system for this layer. +#' +#' \code{$bbox()} +#' Returns a numeric vector of length four containing the bounding box +#' (xmin, ymin, xmax, ymax) for this layer. Note that `bForce = true` is set in +#' the underlying API call to `OGR_L_GetExtent()`, so the entire layer may be +#' scanned to compute minimum bounding rectangle (see `FastGetExtent` in the +#' list returned by `$testCapability()`). Depending on the driver, a spatial +#' filter may/may not be taken into account, so it is safer to call `$bbox()` +#' without setting a spatial filter. +#' +#' \code{$getLayerDefn()} +#' Returns a list containing the OGR feature class definition for this layer +#' (a.k.a. layer definition). The list contains zero or more attribute field +#' definitions, along with one or more geometry field definitions. +#' See [ogr_define] for details of the field and feature class definitions. +#' +#' \code{$setAttributeFilter(query)} +#' Sets an attribute query string to be used when fetching features via the +#' `$getNextFeature()` or `$fetch()` methods. +#' Only features for which `query` evaluates as true will be returned. +#' The query string should be in the format of an SQL WHERE clause, e.g., +#' `"population > 1000000 and population < 5000000"` where population is an +#' attribute in the layer. The query format is normally a SQL WHERE clause as +#' described in the ["WHERE"](https://gdal.org/user/ogr_sql_dialect.html#where) +#' section of the OGR SQL dialect documentation. +#' In some cases (RDBMS backed drivers, SQLite, GeoPackage) the native +#' capabilities of the database may be used to to interpret the WHERE clause, +#' in which case the capabilities will be broader than those of OGR SQL. +#' Note that installing a query string will generally result in resetting the +#' current reading position (as with `$resetReading()` below). +#' The `query` parameter may be set to `""` (empty string) to clear the current +#' attribute filter. +#' +#' \code{$setSpatialFilterRect(bbox)} +#' Sets a new rectangular spatial filter. This method sets a rectangle to be +#' used as a spatial filter when fetching features via the `$getNextFeature()` +#' or `$fetch()` methods. Only features that geometrically intersect the given +#' rectangle will be returned. +#' The x/y values in `bbox` (a `numeric` vector of length four: xmin, ymin, +#' xmax, ymax) should be in the same coordinate system as the layer as a whole +#' (as returned by `$getSpatialRef()`). +#' +#' \code{$clearSpatialFilter()} +#' Clears a spatial filter that was set with `$setSpatialFilterRect()`. +#' No return value, called for that side effect. +#' +#' \code{$getFeatureCount()} +#' Returns the number of features in the layer. For dynamic databases the count +#' may not be exact. This method forces a count in the underlying API call +#' (i.e., `bForce = TRUE` in the call to `OGR_L_GetFeatureCount()`). Note that +#' some vector drivers will actually scan the entire layer once to count +#' features. The list element `FastFeatureCount` returned by +#' `$testCapability()` can be checked if this might be a concern. +#' The returned count takes the spatial and/or attribute filters into account. +#' Note that some driver implementations of this method may alter the read +#' cursor of the layer. +#' +#' \code{$getNextFeature()} +#' Fetch the next available feature from this layer. Only features matching the +#' current spatial and/or attribute filter (if defined) will be returned. +#' This method implements sequential access to the features of a layer. +#' The `$resetReading()` method can be used to start at the beginning again. +#' Returns a list with the unique feature identifier (FID), the attribute and +#' geometry field names, and their values. `NULL` is retunred if no more +#' features are available. +#' +#' \code{$getFeature(fid)} +#' Returns a feature by its identifier. The value of `fid` must be a numeric +#' scalar, optionally carrying the `bit64::integer64` class attribute. +#' Success or failure of this operation is unaffected by any spatial or +#' attribute filters that may be in effect. +#' The list element `RandomRead` returned by `$testCapability()` can be checked +#' to establish if this layer supports efficient random access reading; +#' however, the call should always work if the feature exists since a fallback +#' implementation just scans all the features in the layer looking for the +#' desired feature. Returns a list with the unique feature identifier (FID), +#' the attribute and geometry field names, and their values, or `NULL` on +#' failure. Note that sequential reads (with `$getNextFeature()`) are generally +#' considered interrupted by a `$getFeature()` call. +#' +#' \code{$resetReading()} +#' Reset feature reading to start on the first feature. No return value, called +#' for that side effect. +#' +#' \code{$fetch(n)} +#' Fetches the next `n` features from the layer and returns them as a data +#' frame. This allows retrieving the entire feature set, one page of features +#' at a time, or the remaining features (potentially with an attribute and/or +#' spatial filter applied). This function is an analog of `DBI::dbFetch()`, +#' where the `GDALVector` object itself is analogous to a DBI result set. +#' The `n` parameter is the maximum number of features to retrieve per fetch +#' given as `numeric` scalar (assumed to be a whole number, will be truncated). +#' Use `n = -1` or `n = Inf` to retrieve all pending features (resets reading +#' to the first feature). +#' Otherwise, `fetch(n)` can be called multiple times to perform forward paging +#' from the current cursor position. Passing `n = NA` is supported and returns +#' the remaining features (from the current cursor position). +#' Fetching zero features is also possible to retrieve the structure of the +#' result set as a data frame. +#' OGR field types are returned as the following R types (`NA` for OGR NULL +#' values): +#' * `OFTInteger`: `integer` (or `logical` for subtype `OFSTBoolean`) +#' * `OFTIntegerList`: vector of `integer` (data frame list column) +#' * `OFTInteger64`: `bit64::integer64` (or `logical` for subtype `OFSTBoolean`) +#' * `OFTInteger64List`: vector of `bit64::integer64` (data frame list column) +#' * `OFTReal`: `numeric` +#' * `OFTRealList`: vector of `numeric` (data frame list column) +#' * `OFTString`: `character` string +#' * `OFTStringList`: vector of `character` strings (data frame list column) +#' * `OFTDate`: `Date` +#' * `OFTDateTime`: `POSIXct` (millisecond accuracy, adjustment for time zone +#' flag if present) +#' * `OFTBinary`: `raw` vector (data frame list column) +#' +#' Geomtries are not returned if the field `returnGeomAs` is set to `NONE` +#' (currently the default). +#' Geometries are returned as `raw` vectors in a data frame list column if the +#' field `returnGeomAs` is set to `WKB` or `WKB_ISO`. +#' Otherwise, geometries are returned as `character` (`returnGeomAs` set to one +#' of `WKT`, `WKT_ISO` or `TYPE_NAME`). +#' #' \code{$close()} -#' Closes the GDAL dataset (no return value, called for side effects). +#' Closes the vector dataset (no return value, called for side effects). #' Calling \code{$close()} results in proper cleanup, and flushing of any #' pending writes. #' The `GDALVector` object is still available after calling \code{$close()}. -#' The dataset can be re-opened on the existing \code{dsn} with +#' The layer can be re-opened on the existing \code{dsn} with #' \code{$open(read_only=TRUE)} or \code{$open(read_only=FALSE)}. #' #' @note #' #' @seealso +#' [ogr_define], [ogr_manage], [ogr2ogr()], [ogrinfo()] #' #' @examples #' diff --git a/man/GDALVector-class.Rd b/man/GDALVector-class.Rd new file mode 100644 index 000000000..965158ace --- /dev/null +++ b/man/GDALVector-class.Rd @@ -0,0 +1,332 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gdalvector.R +\name{GDALVector-class} +\alias{GDALVector-class} +\alias{Rcpp_GDALVector} +\alias{Rcpp_GDALVector-class} +\alias{GDALVector} +\title{Class encapsulating a vector layer in a GDAL dataset} +\arguments{ +\item{dsn}{Character string containing the data source name (DSN, usually a +filename or database connection string). See the GDAL vector format +descriptions at \url{https://gdal.org/drivers/vector/index.html}.} + +\item{layer}{Character string containing either the name of a layer of +features within the data source, or an SQL SELECT statement to be executed +against the data source that defines a layer via its result set.} + +\item{read_only}{Logical. \code{TRUE} to open the layer read-only (the default), +or \code{FALSE} to open with write access.} + +\item{open_options}{Optional character vector of \code{NAME=VALUE} pairs +specifying dataset open options.} + +\item{spatial_filter}{Optional character string containing a geometry in +Well Known Text (WKT) format which represents a spatial filter.} + +\item{dialect}{Optional character string to control the statement dialect +when SQL is used to define the layer. By default, the OGR SQL engine will +be used, except for RDBMS drivers that will use their dedicated SQL engine, +unless \code{"OGRSQL"} is explicitly passed as the dialect. The \code{SQLITE} +dialect can also be used +(see \url{https://gdal.org/user/ogr_sql_sqlite_dialect.html}).} +} +\value{ +An object of class \code{GDALVector} which contains pointers to the +opened layer and the dataset that contains it, and methods that operate on +the layer as described in Details. \code{GDALVector} is a C++ class exposed +directly to R (via \code{RCPP_EXPOSED_CLASS}). Fields and methods of the class +are accessed using the \code{$} operator. The read/write fields are used for +per-object settings. +} +\description{ +\code{GDALVector} provides an interface for accessing a vector layer in a GDAL +dataset and calling methods on the underlying \code{OGRLayer} object. +An object of class \code{GDALVector} persists an open connection to the dataset, +and exposes methods for retrieving layer information, setting attribute and +spatial filters, and reading/writing feature data. +See \url{https://gdal.org/api/index.html} for details of the GDAL +Vector API. +} +\section{Usage}{ + +\preformatted{ +## Constructors +# read-only by default: +ds <- new(GDALVector, dsn) +ds <- new(GDALVector, dsn, layer) +# for update access: +ds <- new(GDALVector, dsn, layer, read_only = FALSE) +# to use dataset open options +ds <- new(GDALVector, dsn, layer, read_only = TRUE|FALSE, open_options) +# to specify a spatial filter and/or dialect +new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect) + +## Read/write fields (see Details) +lyr$defaultGeomFldName +lyr$returnGeomAs +lyr$wkbByteOrder + +## Methods (see Details) +lyr$open(read_only) +lyr$isOpen() +lyr$getDsn() +lyr$getFileList() +lyr$getDriverShortName() +lyr$getDriverLongName() + +lyr$getName() +lyr$testCapability() +lyr$getFIDColumn() +lyr$getGeomType() +lyr$getGeometryColumn() +lyr$getSpatialRef() +lyr$bbox() +lyr$getLayerDefn() + +lyr$setAttributeFilter(query) +lyr$setSpatialFilterRect(bbox) +lyr$clearSpatialFilter() + +lyr$getFeatureCount() +lyr$getNextFeature() +lyr$getFeature(fid) +lyr$resetReading() + +lyr$fetch(n); + +lyr$close() +} +} + +\section{Details}{ + + +\code{new(GDALVector, dsn)} +Constructor. If \code{layer} is omitted, it defaults to the first layer in the +data source by index, so this form of the constructor might be used for +single-layer formats like shapefile. \code{read_only} defaults to \code{TRUE}. + +\code{new(GDALVector, dsn, layer)} +Constructor specifying the name of a layer to open. \code{layer} may also be given +as an SQL SELECT statement to define a layer as the result set (read only). + +\code{new(GDALVector, dsn, layer, read_only = TRUE|FALSE)} +Constructor specifying read/write access. The \code{layer} argument is required in +this form of the constructor, but may be given as empty string (\code{""}), in +which case the first layer in the data source by index will be opened. + +\code{new(GDALVector, dsn, layer, read_only = TRUE|FALSE, open_options)} +Constructor specifying dataset open options as a character vector of +\code{NAME=VALUE} pairs. + +\code{new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect))} +Constructor specifying a spatial filter and/or SQL dialect. All arguments +are required in this form of the constructor, but \code{open_options} may be +\code{NULL}, and \code{spatial_filter} or \code{dialect} may be empty string (\code{""}). + +\code{$defaultGeomFldName} +Read/write field specifying a return column name when the geometry column +name in the source layer is empty, like with shapefiles etc. +Character string, defaults to \code{geometry}. + +\code{$returnGeomAs} +Read/write field specifying the return format for feature geometries. +Character string, one of \code{WKT}, \code{WKT_ISO}, \code{WKB}, \code{WKB_ISO}, \code{TYPE_NAME} or +\code{NONE} (the default). + +\code{$wkbByteOrder} +Read/write field specifying the byte order for WKB geometries. +Character string, one \code{LSB} (Least Significant Byte First, the default) or +\code{MSB} (Most Significant Byte First). + +\code{$open(read_only)} +(Re-)opens the vector layer on the existing DSN. Use this method to +open a layer that has been closed using \code{$close()}. May be used to +re-open a layer with a different read/write access (\code{read_only} set to +\code{TRUE} or \code{FALSE}). The method will first close an open dataset, so it is +not required to call \code{$close()} explicitly in this case. +No return value, called for side effects. + +\code{$isOpen()} +Returns a \code{logical} scalar indicating whether the vector dataset is open. + +\code{$getDsn()} +Returns a character string containing the \code{dsn} associated with this +\code{GDALVector} object (\code{dsn} originally used to open the layer). + +\code{$getFileList()} +Returns a character vector of files believed to be part of the data source. +If it returns an empty string (\code{""}) it means there is believed to be no +local file system files associated with the dataset (e.g., a virtual file +system). The returned filenames will normally be relative or absolute +paths depending on the path used to originally open the dataset. + +\code{$getDriverShortName()} +Returns the short name of the vector format driver. + +\code{$getDriverLongName()} +Returns the long name of the vector format driver. + +\code{$getName()} +Returns the layer name. + +\code{$testCapability()} +Tests whether the layer supports named capabilities based on the current +read/write access. Returns a list of capabilities with values \code{TRUE} or +\code{FALSE}. See \code{ogr_layer_test_cap()} for a list of the capabilities tested. + +\code{$getFIDColumn()} +Returns the name of the underlying database column being used as the FID +column, or empty string (\code{""}) if not supported. + +\code{$getGeomType()} +Returns the well known name of the layer geometry type as character string. +For layers with multiple geometry fields, this method only returns the +geometry type of the first geometry column. For other columns, use +\verb{$getLayerDefn()}. For layers without any geometry field, this method +returns \code{NONE}. + +\code{$getGeometryColumn()} +Returns he name of the underlying database column being used as the geometry +column, or an empty string (\code{""}) if not supported. +For layers with multiple geometry fields, this method only returns the +name of the first geometry column. For other columns, use \verb{$getLayerDefn()}. + +\code{$getSpatialRef()} +Returns a WKT string containing the spatial reference system for this layer. + +\code{$bbox()} +Returns a numeric vector of length four containing the bounding box +(xmin, ymin, xmax, ymax) for this layer. Note that \code{bForce = true} is set in +the underlying API call to \code{OGR_L_GetExtent()}, so the entire layer may be +scanned to compute minimum bounding rectangle (see \code{FastGetExtent} in the +list returned by \verb{$testCapability()}). Depending on the driver, a spatial +filter may/may not be taken into account, so it is safer to call \verb{$bbox()} +without setting a spatial filter. + +\code{$getLayerDefn()} +Returns a list containing the OGR feature class definition for this layer +(a.k.a. layer definition). The list contains zero or more attribute field +definitions, along with one or more geometry field definitions. +See \link{ogr_define} for details of the field and feature class definitions. + +\code{$setAttributeFilter(query)} +Sets an attribute query string to be used when fetching features via the +\verb{$getNextFeature()} or \verb{$fetch()} methods. +Only features for which \code{query} evaluates as true will be returned. +The query string should be in the format of an SQL WHERE clause, e.g., +\code{"population > 1000000 and population < 5000000"} where population is an +attribute in the layer. The query format is normally a SQL WHERE clause as +described in the \href{https://gdal.org/user/ogr_sql_dialect.html#where}{"WHERE"} +section of the OGR SQL dialect documentation. +In some cases (RDBMS backed drivers, SQLite, GeoPackage) the native +capabilities of the database may be used to to interpret the WHERE clause, +in which case the capabilities will be broader than those of OGR SQL. +Note that installing a query string will generally result in resetting the +current reading position (as with \verb{$resetReading()} below). +The \code{query} parameter may be set to \code{""} (empty string) to clear the current +attribute filter. + +\code{$setSpatialFilterRect(bbox)} +Sets a new rectangular spatial filter. This method sets a rectangle to be +used as a spatial filter when fetching features via the \verb{$getNextFeature()} +or \verb{$fetch()} methods. Only features that geometrically intersect the given +rectangle will be returned. +The x/y values in \code{bbox} (a \code{numeric} vector of length four: xmin, ymin, +xmax, ymax) should be in the same coordinate system as the layer as a whole +(as returned by \verb{$getSpatialRef()}). + +\code{$clearSpatialFilter()} +Clears a spatial filter that was set with \verb{$setSpatialFilterRect()}. +No return value, called for that side effect. + +\code{$getFeatureCount()} +Returns the number of features in the layer. For dynamic databases the count +may not be exact. This method forces a count in the underlying API call +(i.e., \code{bForce = TRUE} in the call to \code{OGR_L_GetFeatureCount()}). Note that +some vector drivers will actually scan the entire layer once to count +features. The list element \code{FastFeatureCount} returned by +\verb{$testCapability()} can be checked if this might be a concern. +The returned count takes the spatial and/or attribute filters into account. +Note that some driver implementations of this method may alter the read +cursor of the layer. + +\code{$getNextFeature()} +Fetch the next available feature from this layer. Only features matching the +current spatial and/or attribute filter (if defined) will be returned. +This method implements sequential access to the features of a layer. +The \verb{$resetReading()} method can be used to start at the beginning again. +Returns a list with the unique feature identifier (FID), the attribute and +geometry field names, and their values. \code{NULL} is retunred if no more +features are available. + +\code{$getFeature(fid)} +Returns a feature by its identifier. The value of \code{fid} must be a numeric +scalar, optionally carrying the \code{bit64::integer64} class attribute. +Success or failure of this operation is unaffected by any spatial or +attribute filters that may be in effect. +The list element \code{RandomRead} returned by \verb{$testCapability()} can be checked +to establish if this layer supports efficient random access reading; +however, the call should always work if the feature exists since a fallback +implementation just scans all the features in the layer looking for the +desired feature. Returns a list with the unique feature identifier (FID), +the attribute and geometry field names, and their values, or \code{NULL} on +failure. Note that sequential reads (with \verb{$getNextFeature()}) are generally +considered interrupted by a \verb{$getFeature()} call. + +\code{$resetReading()} +Reset feature reading to start on the first feature. No return value, called +for that side effect. + +\code{$fetch(n)} +Fetches the next \code{n} features from the layer and returns them as a data +frame. This allows retrieving the entire feature set, one page of features +at a time, or the remaining features (potentially with an attribute and/or +spatial filter applied). This function is an analog of \code{DBI::dbFetch()}, +where the \code{GDALVector} object itself is analogous to a DBI result set. +The \code{n} parameter is the maximum number of features to retrieve per fetch +given as \code{numeric} scalar (assumed to be a whole number, will be truncated). +Use \code{n = -1} or \code{n = Inf} to retrieve all pending features (resets reading +to the first feature). +Otherwise, \code{fetch(n)} can be called multiple times to perform forward paging +from the current cursor position. Passing \code{n = NA} is supported and returns +the remaining features (from the current cursor position). +Fetching zero features is also possible to retrieve the structure of the +result set as a data frame. +OGR field types are returned as the following R types (\code{NA} for OGR NULL +values): +\itemize{ +\item \code{OFTInteger}: \code{integer} (or \code{logical} for subtype \code{OFSTBoolean}) +\item \code{OFTIntegerList}: vector of \code{integer} (data frame list column) +\item \code{OFTInteger64}: \code{bit64::integer64} (or \code{logical} for subtype \code{OFSTBoolean}) +\item \code{OFTInteger64List}: vector of \code{bit64::integer64} (data frame list column) +\item \code{OFTReal}: \code{numeric} +\item \code{OFTRealList}: vector of \code{numeric} (data frame list column) +\item \code{OFTString}: \code{character} string +\item \code{OFTStringList}: vector of \code{character} strings (data frame list column) +\item \code{OFTDate}: \code{Date} +\item \code{OFTDateTime}: \code{POSIXct} (millisecond accuracy, adjustment for time zone +flag if present) +\item \code{OFTBinary}: \code{raw} vector (data frame list column) +} + +Geomtries are not returned if the field \code{returnGeomAs} is set to \code{NONE} +(currently the default). +Geometries are returned as \code{raw} vectors in a data frame list column if the +field \code{returnGeomAs} is set to \code{WKB} or \code{WKB_ISO}. +Otherwise, geometries are returned as \code{character} (\code{returnGeomAs} set to one +of \code{WKT}, \code{WKT_ISO} or \code{TYPE_NAME}). + +\code{$close()} +Closes the vector dataset (no return value, called for side effects). +Calling \code{$close()} results in proper cleanup, and flushing of any +pending writes. +The \code{GDALVector} object is still available after calling \code{$close()}. +The layer can be re-opened on the existing \code{dsn} with +\code{$open(read_only=TRUE)} or \code{$open(read_only=FALSE)}. +} + +\seealso{ +\link{ogr_define}, \link{ogr_manage}, \code{\link[=ogr2ogr]{ogr2ogr()}}, \code{\link[=ogrinfo]{ogrinfo()}} +} From 7ada5128a029ae2a3ea4678066c06c8401f3479a Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sun, 21 Jul 2024 23:38:47 -0600 Subject: [PATCH 46/53] fix tests using GDALVector::testCapability() --- tests/testthat/test-GDALVector-class.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-GDALVector-class.R b/tests/testthat/test-GDALVector-class.R index dd7029b07..b0212cf7f 100644 --- a/tests/testthat/test-GDALVector-class.R +++ b/tests/testthat/test-GDALVector-class.R @@ -14,12 +14,12 @@ test_that("class constructors work", { lyr$close() lyr <- new(GDALVector, dsn, "mtbs_perims", read_only = FALSE) - expect_true(lyr$testCapability("RandomWrite")) + expect_true(lyr$testCapability()$RandomWrite) lyr$close() lyr <- new(GDALVector, dsn, "mtbs_perims", read_only = TRUE, "LIST_ALL_TABLES=NO") - expect_false(lyr$testCapability("RandomWrite")) + expect_false(lyr$testCapability()$RandomWrite) lyr$close() bb <- c(469685.97, 11442.45, 544069.63, 85508.15) From bdbd7a399807d1603e691510610f3ffb6c6a1426 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Sun, 21 Jul 2024 23:39:46 -0600 Subject: [PATCH 47/53] Documentation for GDALVector (examples) --- DESCRIPTION | 1 + R/gdalvector.R | 327 +++++++++++++++++++++++++++------------ man/GDALVector-class.Rd | 333 ++++++++++++++++++++++++++++------------ 3 files changed, 458 insertions(+), 203 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f3dfe0b68..d84c44c5d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -56,6 +56,7 @@ Imports: xml2 LinkingTo: Rcpp, RcppInt64 Suggests: + wk, gt, knitr, rmarkdown, diff --git a/R/gdalvector.R b/R/gdalvector.R index 7be707d36..88b2e41b3 100644 --- a/R/gdalvector.R +++ b/R/gdalvector.R @@ -17,9 +17,9 @@ #' @param dsn Character string containing the data source name (DSN, usually a #' filename or database connection string). See the GDAL vector format #' descriptions at \url{https://gdal.org/drivers/vector/index.html}. -#' @param layer Character string containing either the name of a layer of -#' features within the data source, or an SQL SELECT statement to be executed -#' against the data source that defines a layer via its result set. +#' @param layer Character string containing either the name of a layer within +#' the data source, or an SQL SELECT statement to be executed against the data +#' source that defines a layer via its result set. #' @param read_only Logical. `TRUE` to open the layer read-only (the default), #' or `FALSE` to open with write access. #' @param open_options Optional character vector of `NAME=VALUE` pairs @@ -36,10 +36,10 @@ #' opened layer and the dataset that contains it, and methods that operate on #' the layer as described in Details. `GDALVector` is a C++ class exposed #' directly to R (via `RCPP_EXPOSED_CLASS`). Fields and methods of the class -#' are accessed using the `$` operator. The read/write fields are used for -#' per-object settings. +#' are accessed using the `$` operator. The read/write fields are per-object +#' settings which can be changed as needed during the lifetime of the object. #' -#' @section Usage: +#' @section Usage (see Details): #' \preformatted{ #' ## Constructors #' # read-only by default: @@ -48,16 +48,16 @@ #' # for update access: #' ds <- new(GDALVector, dsn, layer, read_only = FALSE) #' # to use dataset open options -#' ds <- new(GDALVector, dsn, layer, read_only = TRUE|FALSE, open_options) +#' ds <- new(GDALVector, dsn, layer, read_only, open_options) #' # to specify a spatial filter and/or dialect -#' new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect) +#' ds <- new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect) #' -#' ## Read/write fields (see Details) +#' ## Read/write fields #' lyr$defaultGeomFldName #' lyr$returnGeomAs #' lyr$wkbByteOrder #' -#' ## Methods (see Details) +#' ## Methods #' lyr$open(read_only) #' lyr$isOpen() #' lyr$getDsn() @@ -83,51 +83,61 @@ #' lyr$getFeature(fid) #' lyr$resetReading() #' -#' lyr$fetch(n); +#' lyr$fetch(n) #' #' lyr$close() #' } #' @section Details: +#' ## Constructors #' -#' \code{new(GDALVector, dsn)} -#' Constructor. If `layer` is omitted, it defaults to the first layer in the -#' data source by index, so this form of the constructor might be used for -#' single-layer formats like shapefile. `read_only` defaults to `TRUE`. +#' \code{new(GDALVector, dsn)}\cr +#' If `layer` is omitted, the first layer by index is assumed, so this form +#' of the constructor might be used for single-layer formats like shapefile. #' -#' \code{new(GDALVector, dsn, layer)} -#' Constructor specifying the name of a layer to open. `layer` may also be given -#' as an SQL SELECT statement to define a layer as the result set (read only). +#' \code{new(GDALVector, dsn, layer)}\cr +#' Constructor specifying the name of a layer to open. The `layer` argument +#' may also be given as an SQL SELECT statement to define a layer as the result +#' set. #' -#' \code{new(GDALVector, dsn, layer, read_only = TRUE|FALSE)} -#' Constructor specifying read/write access. The `layer` argument is required in -#' this form of the constructor, but may be given as empty string (`""`), in -#' which case the first layer in the data source by index will be opened. +#' \code{new(GDALVector, dsn, layer, read_only)}\cr +#' Specifies read/write access (`read_only = {TRUE|FALSE})`. +#' The `layer` argument is required in this form of the constructor, but may be +#' given as empty string (`""`), in which case the first layer by index will be +#' assumed. #' -#' \code{new(GDALVector, dsn, layer, read_only = TRUE|FALSE, open_options)} -#' Constructor specifying dataset open options as a character vector of +#' \code{new(GDALVector, dsn, layer, read_only, open_options)}\cr +#' Constructor to specify dataset open options as a character vector of #' `NAME=VALUE` pairs. #' -#' \code{new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect))} -#' Constructor specifying a spatial filter and/or SQL dialect. All arguments +#' \code{new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect))}\cr +#' Constructor to specify a spatial filter and/or SQL dialect. All arguments #' are required in this form of the constructor, but `open_options` may be -#' `NULL`, and `spatial_filter` or `dialect` may be empty string (`""`). +#' `NULL`, and `spatial_filter` or `dialect` may be an empty string (`""`). +#' +#' ## Read/write fields #' -#' \code{$defaultGeomFldName} -#' Read/write field specifying a return column name when the geometry column -#' name in the source layer is empty, like with shapefiles etc. -#' Character string, defaults to `geometry`. +#' \code{$defaultGeomFldName}\cr +#' Character string specifying a name to use for returned columns when the +#' geometry column name in the source layer is empty, like with shapefiles etc. +#' Defaults to `"geometry"`. #' -#' \code{$returnGeomAs} -#' Read/write field specifying the return format for feature geometries. -#' Character string, one of `WKT`, `WKT_ISO`, `WKB`, `WKB_ISO`, `TYPE_NAME` or -#' `NONE` (the default). +#' \code{$returnGeomAs}\cr +#' Character string specifying the return format of feature geometries. +#' Must be one of `WKT`, `WKT_ISO`, `WKB`, `WKB_ISO`, `TYPE_NAME` or +#' `NONE` (the default). `WKB` is used for backward compatibility purposes. +#' It exports the old-style 99-402 extended dimension (Z) WKB types for +#' Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon and +#' GeometryCollection. For other geometry types, it is equivalent to using +#' `WKB_ISO` (see \url{https://libgeos.org/specifications/wkb/}). #' -#' \code{$wkbByteOrder} -#' Read/write field specifying the byte order for WKB geometries. -#' Character string, one `LSB` (Least Significant Byte First, the default) or -#' `MSB` (Most Significant Byte First). +#' \code{$wkbByteOrder}\cr +#' Character string specifying the byte order for WKB geometries. +#' Must be either `LSB` (Least Significant Byte first, the default) or +#' `MSB` (Most Significant Byte first). #' -#' \code{$open(read_only)} +#' ## Methods +#' +#' \code{$open(read_only)}\cr #' (Re-)opens the vector layer on the existing DSN. Use this method to #' open a layer that has been closed using \code{$close()}. May be used to #' re-open a layer with a different read/write access (`read_only` set to @@ -135,87 +145,87 @@ #' not required to call \code{$close()} explicitly in this case. #' No return value, called for side effects. #' -#' \code{$isOpen()} +#' \code{$isOpen()}\cr #' Returns a `logical` scalar indicating whether the vector dataset is open. #' -#' \code{$getDsn()} +#' \code{$getDsn()}\cr #' Returns a character string containing the `dsn` associated with this #' `GDALVector` object (`dsn` originally used to open the layer). #' -#' \code{$getFileList()} +#' \code{$getFileList()}\cr #' Returns a character vector of files believed to be part of the data source. #' If it returns an empty string (`""`) it means there is believed to be no #' local file system files associated with the dataset (e.g., a virtual file #' system). The returned filenames will normally be relative or absolute #' paths depending on the path used to originally open the dataset. #' -#' \code{$getDriverShortName()} +#' \code{$getDriverShortName()}\cr #' Returns the short name of the vector format driver. #' -#' \code{$getDriverLongName()} +#' \code{$getDriverLongName()}\cr #' Returns the long name of the vector format driver. #' -#' \code{$getName()} +#' \code{$getName()}\cr #' Returns the layer name. #' -#' \code{$testCapability()} +#' \code{$testCapability()}\cr #' Tests whether the layer supports named capabilities based on the current #' read/write access. Returns a list of capabilities with values `TRUE` or -#' `FALSE`. See `ogr_layer_test_cap()` for a list of the capabilities tested. +#' `FALSE`. See [`ogr_layer_test_cap()`] for a list of the capabilities tested. #' -#' \code{$getFIDColumn()} +#' \code{$getFIDColumn()}\cr #' Returns the name of the underlying database column being used as the FID #' column, or empty string (`""`) if not supported. #' -#' \code{$getGeomType()} +#' \code{$getGeomType()}\cr #' Returns the well known name of the layer geometry type as character string. #' For layers with multiple geometry fields, this method only returns the #' geometry type of the first geometry column. For other columns, use #' `$getLayerDefn()`. For layers without any geometry field, this method -#' returns `NONE`. +#' returns `"NONE"`. #' -#' \code{$getGeometryColumn()} +#' \code{$getGeometryColumn()}\cr #' Returns he name of the underlying database column being used as the geometry #' column, or an empty string (`""`) if not supported. #' For layers with multiple geometry fields, this method only returns the #' name of the first geometry column. For other columns, use `$getLayerDefn()`. #' -#' \code{$getSpatialRef()} +#' \code{$getSpatialRef()}\cr #' Returns a WKT string containing the spatial reference system for this layer. #' -#' \code{$bbox()} +#' \code{$bbox()}\cr #' Returns a numeric vector of length four containing the bounding box #' (xmin, ymin, xmax, ymax) for this layer. Note that `bForce = true` is set in #' the underlying API call to `OGR_L_GetExtent()`, so the entire layer may be -#' scanned to compute minimum bounding rectangle (see `FastGetExtent` in the -#' list returned by `$testCapability()`). Depending on the driver, a spatial -#' filter may/may not be taken into account, so it is safer to call `$bbox()` -#' without setting a spatial filter. +#' scanned to compute a minimum bounding rectangle (see `FastGetExtent` in the +#' list returned by `$testCapability()`). Depending on the format driver, a +#' spatial filter may/may not be taken into account, so it is safer to call +#' `$bbox()` without setting a spatial filter. #' -#' \code{$getLayerDefn()} +#' \code{$getLayerDefn()}\cr #' Returns a list containing the OGR feature class definition for this layer #' (a.k.a. layer definition). The list contains zero or more attribute field #' definitions, along with one or more geometry field definitions. #' See [ogr_define] for details of the field and feature class definitions. #' -#' \code{$setAttributeFilter(query)} +#' \code{$setAttributeFilter(query)}\cr #' Sets an attribute query string to be used when fetching features via the #' `$getNextFeature()` or `$fetch()` methods. #' Only features for which `query` evaluates as true will be returned. -#' The query string should be in the format of an SQL WHERE clause, e.g., -#' `"population > 1000000 and population < 5000000"` where population is an -#' attribute in the layer. The query format is normally a SQL WHERE clause as -#' described in the ["WHERE"](https://gdal.org/user/ogr_sql_dialect.html#where) -#' section of the OGR SQL dialect documentation. +#' The query string should be in the format of an SQL WHERE clause, described +#' in the ["WHERE"](https://gdal.org/user/ogr_sql_dialect.html#where) +#' section of the OGR SQL dialect documentation (e.g., +#' `"population > 1000000 and population < 5000000"`, where population is an +#' attribute in the layer). #' In some cases (RDBMS backed drivers, SQLite, GeoPackage) the native #' capabilities of the database may be used to to interpret the WHERE clause, #' in which case the capabilities will be broader than those of OGR SQL. #' Note that installing a query string will generally result in resetting the -#' current reading position (as with `$resetReading()` below). +#' current reading position (as with `$resetReading()` decribed below). #' The `query` parameter may be set to `""` (empty string) to clear the current #' attribute filter. #' -#' \code{$setSpatialFilterRect(bbox)} +#' \code{$setSpatialFilterRect(bbox)}\cr #' Sets a new rectangular spatial filter. This method sets a rectangle to be #' used as a spatial filter when fetching features via the `$getNextFeature()` #' or `$fetch()` methods. Only features that geometrically intersect the given @@ -224,63 +234,63 @@ #' xmax, ymax) should be in the same coordinate system as the layer as a whole #' (as returned by `$getSpatialRef()`). #' -#' \code{$clearSpatialFilter()} +#' \code{$clearSpatialFilter()}\cr #' Clears a spatial filter that was set with `$setSpatialFilterRect()`. #' No return value, called for that side effect. #' -#' \code{$getFeatureCount()} +#' \code{$getFeatureCount()}\cr #' Returns the number of features in the layer. For dynamic databases the count #' may not be exact. This method forces a count in the underlying API call #' (i.e., `bForce = TRUE` in the call to `OGR_L_GetFeatureCount()`). Note that #' some vector drivers will actually scan the entire layer once to count -#' features. The list element `FastFeatureCount` returned by +#' features. The `FastFeatureCount` capability in the list returned by #' `$testCapability()` can be checked if this might be a concern. #' The returned count takes the spatial and/or attribute filters into account. #' Note that some driver implementations of this method may alter the read #' cursor of the layer. #' -#' \code{$getNextFeature()} +#' \code{$getNextFeature()}\cr #' Fetch the next available feature from this layer. Only features matching the #' current spatial and/or attribute filter (if defined) will be returned. #' This method implements sequential access to the features of a layer. #' The `$resetReading()` method can be used to start at the beginning again. #' Returns a list with the unique feature identifier (FID), the attribute and -#' geometry field names, and their values. `NULL` is retunred if no more +#' geometry field names, and their values. `NULL` is returned if no more #' features are available. #' -#' \code{$getFeature(fid)} +#' \code{$getFeature(fid)}\cr #' Returns a feature by its identifier. The value of `fid` must be a numeric #' scalar, optionally carrying the `bit64::integer64` class attribute. #' Success or failure of this operation is unaffected by any spatial or #' attribute filters that may be in effect. -#' The list element `RandomRead` returned by `$testCapability()` can be checked -#' to establish if this layer supports efficient random access reading; -#' however, the call should always work if the feature exists since a fallback -#' implementation just scans all the features in the layer looking for the -#' desired feature. Returns a list with the unique feature identifier (FID), +#' The `RandomRead` capability in the list returned by `$testCapability()` can +#' be checked to establish if this layer supports efficient random access +#' reading; however, the call should always work if the feature exists since a +#' fallback implementation just scans all the features in the layer looking for +#' the desired feature. Returns a list with the unique feature identifier (FID), #' the attribute and geometry field names, and their values, or `NULL` on #' failure. Note that sequential reads (with `$getNextFeature()`) are generally -#' considered interrupted by a `$getFeature()` call. +#' considered interrupted by a call to `$getFeature()`. #' -#' \code{$resetReading()} +#' \code{$resetReading()}\cr #' Reset feature reading to start on the first feature. No return value, called #' for that side effect. #' -#' \code{$fetch(n)} +#' \code{$fetch(n)}\cr #' Fetches the next `n` features from the layer and returns them as a data -#' frame. This allows retrieving the entire feature set, one page of features -#' at a time, or the remaining features (potentially with an attribute and/or -#' spatial filter applied). This function is an analog of `DBI::dbFetch()`, -#' where the `GDALVector` object itself is analogous to a DBI result set. +#' frame. This allows retrieving the entire set of features, one page of +#' features at a time, or the remaining features (from the current cursor +#' position). This function is an analog of +#' [`DBI::dbFetch()`](https://dbi.r-dbi.org/reference/dbFetch.html). #' The `n` parameter is the maximum number of features to retrieve per fetch -#' given as `numeric` scalar (assumed to be a whole number, will be truncated). -#' Use `n = -1` or `n = Inf` to retrieve all pending features (resets reading -#' to the first feature). +#' given as a `numeric` scalar (assumed to be a whole number, will be +#' truncated). Use `n = -1` or `n = Inf` to retrieve all pending features +#' (resets reading to the first feature). #' Otherwise, `fetch(n)` can be called multiple times to perform forward paging -#' from the current cursor position. Passing `n = NA` is supported and returns -#' the remaining features (from the current cursor position). -#' Fetching zero features is also possible to retrieve the structure of the -#' result set as a data frame. +#' from the current cursor position. Passing `n = NA` is also supported and +#' returns the remaining features. +#' Fetching zero features is possible to retrieve the structure of the feature +#' set as a data frame (columns fully typed). #' OGR field types are returned as the following R types (`NA` for OGR NULL #' values): #' * `OFTInteger`: `integer` (or `logical` for subtype `OFSTBoolean`) @@ -292,18 +302,19 @@ #' * `OFTString`: `character` string #' * `OFTStringList`: vector of `character` strings (data frame list column) #' * `OFTDate`: `Date` -#' * `OFTDateTime`: `POSIXct` (millisecond accuracy, adjustment for time zone +#' * `OFTDateTime`: `POSIXct` (millisecond accuracy and adjustment for time zone #' flag if present) #' * `OFTBinary`: `raw` vector (data frame list column) #' #' Geomtries are not returned if the field `returnGeomAs` is set to `NONE` -#' (currently the default). -#' Geometries are returned as `raw` vectors in a data frame list column if the -#' field `returnGeomAs` is set to `WKB` or `WKB_ISO`. -#' Otherwise, geometries are returned as `character` (`returnGeomAs` set to one -#' of `WKT`, `WKT_ISO` or `TYPE_NAME`). -#' -#' \code{$close()} +#' (currently the default). Omitting the geometries may be beneficial for +#' performance and memory usage when access only to feature attributes is +#' needed. Geometries are returned as `raw` vectors in a data frame list column +#' when `returnGeomAs` is set to `WKB` or `WKB_ISO`. Otherwise, geometries are +#' returned as `character` when `returnGeomAs` is set to one of `WKT`, +#' `WKT_ISO` or `TYPE_NAME`. +#' +#' \code{$close()}\cr #' Closes the vector dataset (no return value, called for side effects). #' Calling \code{$close()} results in proper cleanup, and flushing of any #' pending writes. @@ -311,13 +322,125 @@ #' The layer can be re-opened on the existing \code{dsn} with #' \code{$open(read_only=TRUE)} or \code{$open(read_only=FALSE)}. #' -#' @note -#' #' @seealso #' [ogr_define], [ogr_manage], [ogr2ogr()], [ogrinfo()] #' #' @examples +#' # MTBS fire perimeters in Yellowstone National Park 1984-2022 +#' f <- system.file("extdata/ynp_fires_1984_2022.gpkg", package = "gdalraster") +#' +#' # copy to a temporary in-memory file that is writeable +#' dsn <- file.path("/vsimem", basename(f)) +#' vsi_copy_file(f, dsn) +#' +#' lyr <- new(GDALVector, dsn, "mtbs_perims") +#' +#' # object of class GDALVector +#' lyr +#' str(lyr) +#' +#' # dataset info +#' lyr$getDriverShortName() +#' lyr$getDriverLongName() +#' lyr$getFileList() +#' +#' # layer info +#' lyr$getName() +#' lyr$getGeomType() +#' lyr$getGeometryColumn() +#' lyr$getFIDColumn() +#' lyr$getSpatialRef() +#' lyr$bbox() +#' +#' # layer capabilities +#' lyr$testCapability() +#' +#' # re-open with write access +#' lyr$open(read_only = FALSE) +#' lyr$testCapability()$SequentialWrite +#' lyr$testCapability()$RandomWrite +#' +#' # feature class definition - a list of fields and their definitions +#' defn <- lyr$getLayerDefn() +#' names(defn) +#' +#' # each list element holds a field definition +#' str(defn) +#' +#' lyr$getFeatureCount() #' +#' # sequential read cursor +#' feat <- lyr$getNextFeature() +#' # a list of field names and their values +#' str(feat) +#' +#' # attribute filter +#' lyr$setAttributeFilter("ig_year = 2020") +#' lyr$getFeatureCount() +#' +#' # the default value of read/write field 'returnGeomAs' +#' lyr$returnGeomAs +#' +#' feat <- lyr$getNextFeature() +#' str(feat) +#' +#' # NULL if no more features are available +#' feat <- lyr$getNextFeature() +#' str(feat) +#' +#' # reset reading to the start and return geometry as WKT +#' lyr$resetReading() +#' lyr$returnGeomAs <- "WKT" +#' feat <- lyr$getNextFeature() +#' str(feat) +#' +#' # clear attribute filter +#' lyr$setAttributeFilter("") +#' lyr$getFeatureCount() +#' +#' # spatial filter +#' # get the bounding box of the largest 1988 fire and use as spatial filter +#' # first set a temporary attribute filter to do the lookup +#' lyr$setAttributeFilter("ig_year = 1988 ORDER BY burn_bnd_ac DESC") +#' feat <- lyr$getNextFeature() +#' str(feat) +#' +#' bbox <- bbox_from_wkt(feat$geom) +#' print(bbox) +#' +#' # set spatial filter on the full layer +#' lyr$setAttributeFilter("") +#' lyr$setSpatialFilterRect(bbox) +#' lyr$getFeatureCount() +#' +#' # fetch in chunks and return as data frame +#' # geometry can optionally be returned as NONE, WKT, WKB or TYPE_NAME +#' d <- lyr$fetch(20) +#' str(d) +#' +#' # the next chunk +#' d <- lyr$fetch(20) +#' nrow(d) +#' +#' # no features remaining +#' d <- lyr$fetch(20) +#' nrow(d) +#' str(d) # 0-row data frame with columns typed +#' +#' # fetch all features, filtered spatially, and return geometries as WKB +#' lyr$returnGeomAs <- "WKB" +#' d <- lyr$fetch(-1) # resets reading to the first feature +#' str(d) +#' +#' # parse WKB using package wk +#' wk_obj <- wk::wkb(d$geom, crs = lyr$getSpatialRef()) +#' plot(wk_obj) +#' +#' lyr$clearSpatialFilter() +#' lyr$getFeatureCount() +#' +#' lyr$close() +#' vsi_unlink(dsn) NULL Rcpp::loadModule("mod_GDALVector", TRUE) diff --git a/man/GDALVector-class.Rd b/man/GDALVector-class.Rd index 965158ace..212a7cc48 100644 --- a/man/GDALVector-class.Rd +++ b/man/GDALVector-class.Rd @@ -11,9 +11,9 @@ filename or database connection string). See the GDAL vector format descriptions at \url{https://gdal.org/drivers/vector/index.html}.} -\item{layer}{Character string containing either the name of a layer of -features within the data source, or an SQL SELECT statement to be executed -against the data source that defines a layer via its result set.} +\item{layer}{Character string containing either the name of a layer within +the data source, or an SQL SELECT statement to be executed against the data +source that defines a layer via its result set.} \item{read_only}{Logical. \code{TRUE} to open the layer read-only (the default), or \code{FALSE} to open with write access.} @@ -36,8 +36,8 @@ An object of class \code{GDALVector} which contains pointers to the opened layer and the dataset that contains it, and methods that operate on the layer as described in Details. \code{GDALVector} is a C++ class exposed directly to R (via \code{RCPP_EXPOSED_CLASS}). Fields and methods of the class -are accessed using the \code{$} operator. The read/write fields are used for -per-object settings. +are accessed using the \code{$} operator. The read/write fields are per-object +settings which can be changed as needed during the lifetime of the object. } \description{ \code{GDALVector} provides an interface for accessing a vector layer in a GDAL @@ -48,7 +48,7 @@ spatial filters, and reading/writing feature data. See \url{https://gdal.org/api/index.html} for details of the GDAL Vector API. } -\section{Usage}{ +\section{Usage (see Details)}{ \preformatted{ ## Constructors @@ -58,16 +58,16 @@ ds <- new(GDALVector, dsn, layer) # for update access: ds <- new(GDALVector, dsn, layer, read_only = FALSE) # to use dataset open options -ds <- new(GDALVector, dsn, layer, read_only = TRUE|FALSE, open_options) +ds <- new(GDALVector, dsn, layer, read_only, open_options) # to specify a spatial filter and/or dialect -new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect) +ds <- new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect) -## Read/write fields (see Details) +## Read/write fields lyr$defaultGeomFldName lyr$returnGeomAs lyr$wkbByteOrder -## Methods (see Details) +## Methods lyr$open(read_only) lyr$isOpen() lyr$getDsn() @@ -93,7 +93,7 @@ lyr$getNextFeature() lyr$getFeature(fid) lyr$resetReading() -lyr$fetch(n); +lyr$fetch(n) lyr$close() } @@ -101,46 +101,58 @@ lyr$close() \section{Details}{ +\subsection{Constructors}{ -\code{new(GDALVector, dsn)} -Constructor. If \code{layer} is omitted, it defaults to the first layer in the -data source by index, so this form of the constructor might be used for -single-layer formats like shapefile. \code{read_only} defaults to \code{TRUE}. +\code{new(GDALVector, dsn)}\cr +If \code{layer} is omitted, the first layer by index is assumed, so this form +of the constructor might be used for single-layer formats like shapefile. -\code{new(GDALVector, dsn, layer)} -Constructor specifying the name of a layer to open. \code{layer} may also be given -as an SQL SELECT statement to define a layer as the result set (read only). +\code{new(GDALVector, dsn, layer)}\cr +Constructor specifying the name of a layer to open. The \code{layer} argument +may also be given as an SQL SELECT statement to define a layer as the result +set. -\code{new(GDALVector, dsn, layer, read_only = TRUE|FALSE)} -Constructor specifying read/write access. The \code{layer} argument is required in -this form of the constructor, but may be given as empty string (\code{""}), in -which case the first layer in the data source by index will be opened. +\code{new(GDALVector, dsn, layer, read_only)}\cr +Specifies read/write access (\verb{read_only = \{TRUE|FALSE\})}. +The \code{layer} argument is required in this form of the constructor, but may be +given as empty string (\code{""}), in which case the first layer by index will be +assumed. -\code{new(GDALVector, dsn, layer, read_only = TRUE|FALSE, open_options)} -Constructor specifying dataset open options as a character vector of +\code{new(GDALVector, dsn, layer, read_only, open_options)}\cr +Constructor to specify dataset open options as a character vector of \code{NAME=VALUE} pairs. -\code{new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect))} -Constructor specifying a spatial filter and/or SQL dialect. All arguments +\code{new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect))}\cr +Constructor to specify a spatial filter and/or SQL dialect. All arguments are required in this form of the constructor, but \code{open_options} may be -\code{NULL}, and \code{spatial_filter} or \code{dialect} may be empty string (\code{""}). - -\code{$defaultGeomFldName} -Read/write field specifying a return column name when the geometry column -name in the source layer is empty, like with shapefiles etc. -Character string, defaults to \code{geometry}. +\code{NULL}, and \code{spatial_filter} or \code{dialect} may be an empty string (\code{""}). +} -\code{$returnGeomAs} -Read/write field specifying the return format for feature geometries. -Character string, one of \code{WKT}, \code{WKT_ISO}, \code{WKB}, \code{WKB_ISO}, \code{TYPE_NAME} or -\code{NONE} (the default). +\subsection{Read/write fields}{ + +\code{$defaultGeomFldName}\cr +Character string specifying a name to use for returned columns when the +geometry column name in the source layer is empty, like with shapefiles etc. +Defaults to \code{"geometry"}. + +\code{$returnGeomAs}\cr +Character string specifying the return format of feature geometries. +Must be one of \code{WKT}, \code{WKT_ISO}, \code{WKB}, \code{WKB_ISO}, \code{TYPE_NAME} or +\code{NONE} (the default). \code{WKB} is used for backward compatibility purposes. +It exports the old-style 99-402 extended dimension (Z) WKB types for +Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon and +GeometryCollection. For other geometry types, it is equivalent to using +\code{WKB_ISO} (see \url{https://libgeos.org/specifications/wkb/}). + +\code{$wkbByteOrder}\cr +Character string specifying the byte order for WKB geometries. +Must be either \code{LSB} (Least Significant Byte first, the default) or +\code{MSB} (Most Significant Byte first). +} -\code{$wkbByteOrder} -Read/write field specifying the byte order for WKB geometries. -Character string, one \code{LSB} (Least Significant Byte First, the default) or -\code{MSB} (Most Significant Byte First). +\subsection{Methods}{ -\code{$open(read_only)} +\code{$open(read_only)}\cr (Re-)opens the vector layer on the existing DSN. Use this method to open a layer that has been closed using \code{$close()}. May be used to re-open a layer with a different read/write access (\code{read_only} set to @@ -148,87 +160,87 @@ re-open a layer with a different read/write access (\code{read_only} set to not required to call \code{$close()} explicitly in this case. No return value, called for side effects. -\code{$isOpen()} +\code{$isOpen()}\cr Returns a \code{logical} scalar indicating whether the vector dataset is open. -\code{$getDsn()} +\code{$getDsn()}\cr Returns a character string containing the \code{dsn} associated with this \code{GDALVector} object (\code{dsn} originally used to open the layer). -\code{$getFileList()} +\code{$getFileList()}\cr Returns a character vector of files believed to be part of the data source. If it returns an empty string (\code{""}) it means there is believed to be no local file system files associated with the dataset (e.g., a virtual file system). The returned filenames will normally be relative or absolute paths depending on the path used to originally open the dataset. -\code{$getDriverShortName()} +\code{$getDriverShortName()}\cr Returns the short name of the vector format driver. -\code{$getDriverLongName()} +\code{$getDriverLongName()}\cr Returns the long name of the vector format driver. -\code{$getName()} +\code{$getName()}\cr Returns the layer name. -\code{$testCapability()} +\code{$testCapability()}\cr Tests whether the layer supports named capabilities based on the current read/write access. Returns a list of capabilities with values \code{TRUE} or -\code{FALSE}. See \code{ogr_layer_test_cap()} for a list of the capabilities tested. +\code{FALSE}. See \code{\link[=ogr_layer_test_cap]{ogr_layer_test_cap()}} for a list of the capabilities tested. -\code{$getFIDColumn()} +\code{$getFIDColumn()}\cr Returns the name of the underlying database column being used as the FID column, or empty string (\code{""}) if not supported. -\code{$getGeomType()} +\code{$getGeomType()}\cr Returns the well known name of the layer geometry type as character string. For layers with multiple geometry fields, this method only returns the geometry type of the first geometry column. For other columns, use \verb{$getLayerDefn()}. For layers without any geometry field, this method -returns \code{NONE}. +returns \code{"NONE"}. -\code{$getGeometryColumn()} +\code{$getGeometryColumn()}\cr Returns he name of the underlying database column being used as the geometry column, or an empty string (\code{""}) if not supported. For layers with multiple geometry fields, this method only returns the name of the first geometry column. For other columns, use \verb{$getLayerDefn()}. -\code{$getSpatialRef()} +\code{$getSpatialRef()}\cr Returns a WKT string containing the spatial reference system for this layer. -\code{$bbox()} +\code{$bbox()}\cr Returns a numeric vector of length four containing the bounding box (xmin, ymin, xmax, ymax) for this layer. Note that \code{bForce = true} is set in the underlying API call to \code{OGR_L_GetExtent()}, so the entire layer may be -scanned to compute minimum bounding rectangle (see \code{FastGetExtent} in the -list returned by \verb{$testCapability()}). Depending on the driver, a spatial -filter may/may not be taken into account, so it is safer to call \verb{$bbox()} -without setting a spatial filter. +scanned to compute a minimum bounding rectangle (see \code{FastGetExtent} in the +list returned by \verb{$testCapability()}). Depending on the format driver, a +spatial filter may/may not be taken into account, so it is safer to call +\verb{$bbox()} without setting a spatial filter. -\code{$getLayerDefn()} +\code{$getLayerDefn()}\cr Returns a list containing the OGR feature class definition for this layer (a.k.a. layer definition). The list contains zero or more attribute field definitions, along with one or more geometry field definitions. See \link{ogr_define} for details of the field and feature class definitions. -\code{$setAttributeFilter(query)} +\code{$setAttributeFilter(query)}\cr Sets an attribute query string to be used when fetching features via the \verb{$getNextFeature()} or \verb{$fetch()} methods. Only features for which \code{query} evaluates as true will be returned. -The query string should be in the format of an SQL WHERE clause, e.g., -\code{"population > 1000000 and population < 5000000"} where population is an -attribute in the layer. The query format is normally a SQL WHERE clause as -described in the \href{https://gdal.org/user/ogr_sql_dialect.html#where}{"WHERE"} -section of the OGR SQL dialect documentation. +The query string should be in the format of an SQL WHERE clause, described +in the \href{https://gdal.org/user/ogr_sql_dialect.html#where}{"WHERE"} +section of the OGR SQL dialect documentation (e.g., +\code{"population > 1000000 and population < 5000000"}, where population is an +attribute in the layer). In some cases (RDBMS backed drivers, SQLite, GeoPackage) the native capabilities of the database may be used to to interpret the WHERE clause, in which case the capabilities will be broader than those of OGR SQL. Note that installing a query string will generally result in resetting the -current reading position (as with \verb{$resetReading()} below). +current reading position (as with \verb{$resetReading()} decribed below). The \code{query} parameter may be set to \code{""} (empty string) to clear the current attribute filter. -\code{$setSpatialFilterRect(bbox)} +\code{$setSpatialFilterRect(bbox)}\cr Sets a new rectangular spatial filter. This method sets a rectangle to be used as a spatial filter when fetching features via the \verb{$getNextFeature()} or \verb{$fetch()} methods. Only features that geometrically intersect the given @@ -237,63 +249,63 @@ The x/y values in \code{bbox} (a \code{numeric} vector of length four: xmin, ymi xmax, ymax) should be in the same coordinate system as the layer as a whole (as returned by \verb{$getSpatialRef()}). -\code{$clearSpatialFilter()} +\code{$clearSpatialFilter()}\cr Clears a spatial filter that was set with \verb{$setSpatialFilterRect()}. No return value, called for that side effect. -\code{$getFeatureCount()} +\code{$getFeatureCount()}\cr Returns the number of features in the layer. For dynamic databases the count may not be exact. This method forces a count in the underlying API call (i.e., \code{bForce = TRUE} in the call to \code{OGR_L_GetFeatureCount()}). Note that some vector drivers will actually scan the entire layer once to count -features. The list element \code{FastFeatureCount} returned by +features. The \code{FastFeatureCount} capability in the list returned by \verb{$testCapability()} can be checked if this might be a concern. The returned count takes the spatial and/or attribute filters into account. Note that some driver implementations of this method may alter the read cursor of the layer. -\code{$getNextFeature()} +\code{$getNextFeature()}\cr Fetch the next available feature from this layer. Only features matching the current spatial and/or attribute filter (if defined) will be returned. This method implements sequential access to the features of a layer. The \verb{$resetReading()} method can be used to start at the beginning again. Returns a list with the unique feature identifier (FID), the attribute and -geometry field names, and their values. \code{NULL} is retunred if no more +geometry field names, and their values. \code{NULL} is returned if no more features are available. -\code{$getFeature(fid)} +\code{$getFeature(fid)}\cr Returns a feature by its identifier. The value of \code{fid} must be a numeric scalar, optionally carrying the \code{bit64::integer64} class attribute. Success or failure of this operation is unaffected by any spatial or attribute filters that may be in effect. -The list element \code{RandomRead} returned by \verb{$testCapability()} can be checked -to establish if this layer supports efficient random access reading; -however, the call should always work if the feature exists since a fallback -implementation just scans all the features in the layer looking for the -desired feature. Returns a list with the unique feature identifier (FID), +The \code{RandomRead} capability in the list returned by \verb{$testCapability()} can +be checked to establish if this layer supports efficient random access +reading; however, the call should always work if the feature exists since a +fallback implementation just scans all the features in the layer looking for +the desired feature. Returns a list with the unique feature identifier (FID), the attribute and geometry field names, and their values, or \code{NULL} on failure. Note that sequential reads (with \verb{$getNextFeature()}) are generally -considered interrupted by a \verb{$getFeature()} call. +considered interrupted by a call to \verb{$getFeature()}. -\code{$resetReading()} +\code{$resetReading()}\cr Reset feature reading to start on the first feature. No return value, called for that side effect. -\code{$fetch(n)} +\code{$fetch(n)}\cr Fetches the next \code{n} features from the layer and returns them as a data -frame. This allows retrieving the entire feature set, one page of features -at a time, or the remaining features (potentially with an attribute and/or -spatial filter applied). This function is an analog of \code{DBI::dbFetch()}, -where the \code{GDALVector} object itself is analogous to a DBI result set. +frame. This allows retrieving the entire set of features, one page of +features at a time, or the remaining features (from the current cursor +position). This function is an analog of +\href{https://dbi.r-dbi.org/reference/dbFetch.html}{\code{DBI::dbFetch()}}. The \code{n} parameter is the maximum number of features to retrieve per fetch -given as \code{numeric} scalar (assumed to be a whole number, will be truncated). -Use \code{n = -1} or \code{n = Inf} to retrieve all pending features (resets reading -to the first feature). +given as a \code{numeric} scalar (assumed to be a whole number, will be +truncated). Use \code{n = -1} or \code{n = Inf} to retrieve all pending features +(resets reading to the first feature). Otherwise, \code{fetch(n)} can be called multiple times to perform forward paging -from the current cursor position. Passing \code{n = NA} is supported and returns -the remaining features (from the current cursor position). -Fetching zero features is also possible to retrieve the structure of the -result set as a data frame. +from the current cursor position. Passing \code{n = NA} is also supported and +returns the remaining features. +Fetching zero features is possible to retrieve the structure of the feature +set as a data frame (columns fully typed). OGR field types are returned as the following R types (\code{NA} for OGR NULL values): \itemize{ @@ -306,19 +318,20 @@ values): \item \code{OFTString}: \code{character} string \item \code{OFTStringList}: vector of \code{character} strings (data frame list column) \item \code{OFTDate}: \code{Date} -\item \code{OFTDateTime}: \code{POSIXct} (millisecond accuracy, adjustment for time zone +\item \code{OFTDateTime}: \code{POSIXct} (millisecond accuracy and adjustment for time zone flag if present) \item \code{OFTBinary}: \code{raw} vector (data frame list column) } Geomtries are not returned if the field \code{returnGeomAs} is set to \code{NONE} -(currently the default). -Geometries are returned as \code{raw} vectors in a data frame list column if the -field \code{returnGeomAs} is set to \code{WKB} or \code{WKB_ISO}. -Otherwise, geometries are returned as \code{character} (\code{returnGeomAs} set to one -of \code{WKT}, \code{WKT_ISO} or \code{TYPE_NAME}). - -\code{$close()} +(currently the default). Omitting the geometries may be beneficial for +performance and memory usage when access only to feature attributes is +needed. Geometries are returned as \code{raw} vectors in a data frame list column +when \code{returnGeomAs} is set to \code{WKB} or \code{WKB_ISO}. Otherwise, geometries are +returned as \code{character} when \code{returnGeomAs} is set to one of \code{WKT}, +\code{WKT_ISO} or \code{TYPE_NAME}. + +\code{$close()}\cr Closes the vector dataset (no return value, called for side effects). Calling \code{$close()} results in proper cleanup, and flushing of any pending writes. @@ -326,7 +339,125 @@ The \code{GDALVector} object is still available after calling \code{$close()}. The layer can be re-opened on the existing \code{dsn} with \code{$open(read_only=TRUE)} or \code{$open(read_only=FALSE)}. } +} + +\examples{ +# MTBS fire perimeters in Yellowstone National Park 1984-2022 +f <- system.file("extdata/ynp_fires_1984_2022.gpkg", package = "gdalraster") + +# copy to a temporary in-memory file that is writeable +dsn <- file.path("/vsimem", basename(f)) +vsi_copy_file(f, dsn) + +lyr <- new(GDALVector, dsn, "mtbs_perims") + +# object of class GDALVector +lyr +str(lyr) + +# dataset info +lyr$getDriverShortName() +lyr$getDriverLongName() +lyr$getFileList() + +# layer info +lyr$getName() +lyr$getGeomType() +lyr$getGeometryColumn() +lyr$getFIDColumn() +lyr$getSpatialRef() +lyr$bbox() + +# layer capabilities +lyr$testCapability() + +# re-open with write access +lyr$open(read_only = FALSE) +lyr$testCapability()$SequentialWrite +lyr$testCapability()$RandomWrite + +# feature class definition - a list of fields and their definitions +defn <- lyr$getLayerDefn() +names(defn) + +# each list element holds a field definition +str(defn) + +lyr$getFeatureCount() +# sequential read cursor +feat <- lyr$getNextFeature() +# a list of field names and their values +str(feat) + +# attribute filter +lyr$setAttributeFilter("ig_year = 2020") +lyr$getFeatureCount() + +# the default value of read/write field 'returnGeomAs' +lyr$returnGeomAs + +feat <- lyr$getNextFeature() +str(feat) + +# NULL if no more features are available +feat <- lyr$getNextFeature() +str(feat) + +# reset reading to the start and return geometry as WKT +lyr$resetReading() +lyr$returnGeomAs <- "WKT" +feat <- lyr$getNextFeature() +str(feat) + +# clear attribute filter +lyr$setAttributeFilter("") +lyr$getFeatureCount() + +# spatial filter +# get the bounding box of the largest 1988 fire and use as spatial filter +# first set a temporary attribute filter to do the lookup +lyr$setAttributeFilter("ig_year = 1988 ORDER BY burn_bnd_ac DESC") +feat <- lyr$getNextFeature() +str(feat) + +bbox <- bbox_from_wkt(feat$geom) +print(bbox) + +# set spatial filter on the full layer +lyr$setAttributeFilter("") +lyr$setSpatialFilterRect(bbox) +lyr$getFeatureCount() + +# fetch in chunks and return as data frame +# geometry can optionally be returned as NONE, WKT, WKB or TYPE_NAME +d <- lyr$fetch(20) +str(d) + +# the next chunk +d <- lyr$fetch(20) +nrow(d) + +# no features remaining +d <- lyr$fetch(20) +nrow(d) +str(d) # 0-row data frame with columns typed + +# fetch all features, filtered spatially, and return geometries as WKB +lyr$returnGeomAs <- "WKB" +d <- lyr$fetch(-1) # resets reading to the first feature +str(d) + +# parse WKB using package wk +wk_obj <- wk::wkb(d$geom, crs = lyr$getSpatialRef()) +plot(wk_obj) + +lyr$clearSpatialFilter() +lyr$getFeatureCount() + +lyr$close() +vsi_unlink(dsn) +} \seealso{ \link{ogr_define}, \link{ogr_manage}, \code{\link[=ogr2ogr]{ogr2ogr()}}, \code{\link[=ogrinfo]{ogrinfo()}} } From a6d4d7d4faf519c6ee8e0d2d7d10312ea0275afb Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Mon, 22 Jul 2024 21:52:50 -0600 Subject: [PATCH 48/53] Doc: edit class GDALVector --- R/gdalvector.R | 154 ++++++++++++++++++++++------------------ man/GDALVector-class.Rd | 154 ++++++++++++++++++++++------------------ 2 files changed, 172 insertions(+), 136 deletions(-) diff --git a/R/gdalvector.R b/R/gdalvector.R index 88b2e41b3..e421f26a7 100644 --- a/R/gdalvector.R +++ b/R/gdalvector.R @@ -14,14 +14,13 @@ #' See \url{https://gdal.org/api/index.html} for details of the GDAL #' Vector API. #' -#' @param dsn Character string containing the data source name (DSN, usually a -#' filename or database connection string). See the GDAL vector format -#' descriptions at \url{https://gdal.org/drivers/vector/index.html}. -#' @param layer Character string containing either the name of a layer within -#' the data source, or an SQL SELECT statement to be executed against the data -#' source that defines a layer via its result set. -#' @param read_only Logical. `TRUE` to open the layer read-only (the default), -#' or `FALSE` to open with write access. +#' @param dsn Character string containing the data source name (DSN), usually a +#' filename or database connection string. +#' @param layer Character string containing the name of a layer within the +#' data source. May also be given as an SQL SELECT statement to be executed +#' against the data source, defining a layer as the result set. +#' @param read_only Logical scalar. `TRUE` to open the layer read-only (the +#' default), or `FALSE` to open with write access. #' @param open_options Optional character vector of `NAME=VALUE` pairs #' specifying dataset open options. #' @param spatial_filter Optional character string containing a geometry in @@ -29,9 +28,8 @@ #' @param dialect Optional character string to control the statement dialect #' when SQL is used to define the layer. By default, the OGR SQL engine will #' be used, except for RDBMS drivers that will use their dedicated SQL engine, -#' unless `"OGRSQL"` is explicitly passed as the dialect. The `SQLITE` -#' dialect can also be used -#' (see \url{https://gdal.org/user/ogr_sql_sqlite_dialect.html}). +#' unless `"OGRSQL"` is explicitly passed as the dialect. The `"SQLITE"` +#' dialect can also be used. #' @returns An object of class `GDALVector` which contains pointers to the #' opened layer and the dataset that contains it, and methods that operate on #' the layer as described in Details. `GDALVector` is a C++ class exposed @@ -42,15 +40,16 @@ #' @section Usage (see Details): #' \preformatted{ #' ## Constructors -#' # read-only by default: -#' ds <- new(GDALVector, dsn) -#' ds <- new(GDALVector, dsn, layer) -#' # for update access: -#' ds <- new(GDALVector, dsn, layer, read_only = FALSE) -#' # to use dataset open options -#' ds <- new(GDALVector, dsn, layer, read_only, open_options) -#' # to specify a spatial filter and/or dialect -#' ds <- new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect) +#' # for single-layer file formats such as shapefile +#' lyr <- new(GDALVector, dsn) +#' # specifying the layer name, or SQL statement defining the layer +#' lyr <- new(GDALVector, dsn, layer) +#' # for update access +#' lyr <- new(GDALVector, dsn, layer, read_only = FALSE) +#' # using dataset open options +#' lyr <- new(GDALVector, dsn, layer, read_only, open_options) +#' # setting a spatial filter and/or specifying the SQL dialect +#' lyr <- new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect) #' #' ## Read/write fields #' lyr$defaultGeomFldName @@ -82,7 +81,6 @@ #' lyr$getNextFeature() #' lyr$getFeature(fid) #' lyr$resetReading() -#' #' lyr$fetch(n) #' #' lyr$close() @@ -91,8 +89,9 @@ #' ## Constructors #' #' \code{new(GDALVector, dsn)}\cr -#' If `layer` is omitted, the first layer by index is assumed, so this form -#' of the constructor might be used for single-layer formats like shapefile. +#' The first layer by index is assumed if the `layer` argument is omitted, so +#' this form of the constructor might be used for single-layer formats like +#' shapefile. #' #' \code{new(GDALVector, dsn, layer)}\cr #' Constructor specifying the name of a layer to open. The `layer` argument @@ -100,13 +99,13 @@ #' set. #' #' \code{new(GDALVector, dsn, layer, read_only)}\cr -#' Specifies read/write access (`read_only = {TRUE|FALSE})`. +#' Constructor specifying read/write access (`read_only = {TRUE|FALSE})`. #' The `layer` argument is required in this form of the constructor, but may be #' given as empty string (`""`), in which case the first layer by index will be #' assumed. #' #' \code{new(GDALVector, dsn, layer, read_only, open_options)}\cr -#' Constructor to specify dataset open options as a character vector of +#' Constructor specifying dataset open options as a character vector of #' `NAME=VALUE` pairs. #' #' \code{new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect))}\cr @@ -124,11 +123,11 @@ #' \code{$returnGeomAs}\cr #' Character string specifying the return format of feature geometries. #' Must be one of `WKT`, `WKT_ISO`, `WKB`, `WKB_ISO`, `TYPE_NAME` or -#' `NONE` (the default). `WKB` is used for backward compatibility purposes. -#' It exports the old-style 99-402 extended dimension (Z) WKB types for -#' Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon and -#' GeometryCollection. For other geometry types, it is equivalent to using -#' `WKB_ISO` (see \url{https://libgeos.org/specifications/wkb/}). +#' `NONE` (the default). `WKB`/`WKT` export old-style 99-402 extended +#' dimension (Z) types for Point, LineString, Polygon, MultiPoint, +#' MultiLineString, MultiPolygon and GeometryCollection. For other geometry +#' types, it is equivalent to using `WKB_ISO`/`WKT_ISO` +#' (see \url{https://libgeos.org/specifications/wkb/}). #' #' \code{$wkbByteOrder}\cr #' Character string specifying the byte order for WKB geometries. @@ -171,7 +170,14 @@ #' \code{$testCapability()}\cr #' Tests whether the layer supports named capabilities based on the current #' read/write access. Returns a list of capabilities with values `TRUE` or -#' `FALSE`. See [`ogr_layer_test_cap()`] for a list of the capabilities tested. +#' `FALSE`. The returned list contains the following named elements: +#' `RandomRead`, `SequentialWrite`, `RandomWrite`, `UpsertFeature`, +#' `FastSpatialFilter`, `FastFeatureCount`, `FastGetExtent`, +#' `FastSetNextByIndex`, `CreateField`, `CreateGeomField`, `DeleteField`, +#' `ReorderFields`, `AlterFieldDefn`, `AlterGeomFieldDefn`, `DeleteFeature`, +#' `StringsAsUTF8`, `Transactions`, `CurveGeometries`. +#' (See the GDAL documentation for +#' [`OGR_L_TestCapability()`](https://gdal.org/api/vector_c_api.html#_CPPv420OGR_L_TestCapability9OGRLayerHPKc).) #' #' \code{$getFIDColumn()}\cr #' Returns the name of the underlying database column being used as the FID @@ -195,11 +201,11 @@ #' #' \code{$bbox()}\cr #' Returns a numeric vector of length four containing the bounding box -#' (xmin, ymin, xmax, ymax) for this layer. Note that `bForce = true` is set in +#' for this layer (xmin, ymin, xmax, ymax). Note that `bForce = true` is set in #' the underlying API call to `OGR_L_GetExtent()`, so the entire layer may be #' scanned to compute a minimum bounding rectangle (see `FastGetExtent` in the #' list returned by `$testCapability()`). Depending on the format driver, a -#' spatial filter may/may not be taken into account, so it is safer to call +#' spatial filter may or may not be taken into account, so it is safer to call #' `$bbox()` without setting a spatial filter. #' #' \code{$getLayerDefn()}\cr @@ -215,14 +221,14 @@ #' The query string should be in the format of an SQL WHERE clause, described #' in the ["WHERE"](https://gdal.org/user/ogr_sql_dialect.html#where) #' section of the OGR SQL dialect documentation (e.g., -#' `"population > 1000000 and population < 5000000"`, where population is an +#' `"population > 1000000 and population < 5000000"`, where `population` is an #' attribute in the layer). #' In some cases (RDBMS backed drivers, SQLite, GeoPackage) the native #' capabilities of the database may be used to to interpret the WHERE clause, #' in which case the capabilities will be broader than those of OGR SQL. #' Note that installing a query string will generally result in resetting the #' current reading position (as with `$resetReading()` decribed below). -#' The `query` parameter may be set to `""` (empty string) to clear the current +#' The `query` parameter may be set to empty string (`""`) to clear the current #' attribute filter. #' #' \code{$setSpatialFilterRect(bbox)}\cr @@ -230,9 +236,9 @@ #' used as a spatial filter when fetching features via the `$getNextFeature()` #' or `$fetch()` methods. Only features that geometrically intersect the given #' rectangle will be returned. -#' The x/y values in `bbox` (a `numeric` vector of length four: xmin, ymin, -#' xmax, ymax) should be in the same coordinate system as the layer as a whole -#' (as returned by `$getSpatialRef()`). +#' `bbox` is a numeric vector of length four containing xmin, ymin, xmax, ymax +#' in the same coordinate system as the layer as a whole (as returned by +#' `$getSpatialRef()`). #' #' \code{$clearSpatialFilter()}\cr #' Clears a spatial filter that was set with `$setSpatialFilterRect()`. @@ -243,11 +249,11 @@ #' may not be exact. This method forces a count in the underlying API call #' (i.e., `bForce = TRUE` in the call to `OGR_L_GetFeatureCount()`). Note that #' some vector drivers will actually scan the entire layer once to count -#' features. The `FastFeatureCount` capability in the list returned by +#' features. The `FastFeatureCount` element in the list returned by #' `$testCapability()` can be checked if this might be a concern. -#' The returned count takes the spatial and/or attribute filters into account. -#' Note that some driver implementations of this method may alter the read -#' cursor of the layer. +#' The number of features returned takes into account the spatial and/or +#' attribute filters. Some driver implementations of this method may alter the +#' read cursor of the layer. #' #' \code{$getNextFeature()}\cr #' Fetch the next available feature from this layer. Only features matching the @@ -263,7 +269,7 @@ #' scalar, optionally carrying the `bit64::integer64` class attribute. #' Success or failure of this operation is unaffected by any spatial or #' attribute filters that may be in effect. -#' The `RandomRead` capability in the list returned by `$testCapability()` can +#' The `RandomRead` element in the list returned by `$testCapability()` can #' be checked to establish if this layer supports efficient random access #' reading; however, the call should always work if the feature exists since a #' fallback implementation just scans all the features in the layer looking for @@ -280,51 +286,66 @@ #' Fetches the next `n` features from the layer and returns them as a data #' frame. This allows retrieving the entire set of features, one page of #' features at a time, or the remaining features (from the current cursor -#' position). This function is an analog of +#' position). +#' +#' This method is an analog of #' [`DBI::dbFetch()`](https://dbi.r-dbi.org/reference/dbFetch.html). +#' #' The `n` parameter is the maximum number of features to retrieve per fetch -#' given as a `numeric` scalar (assumed to be a whole number, will be +#' given as a `numeric` value but assumed to be a whole number (will be #' truncated). Use `n = -1` or `n = Inf` to retrieve all pending features #' (resets reading to the first feature). -#' Otherwise, `fetch(n)` can be called multiple times to perform forward paging +#' Otherwise, `$fetch()` can be called multiple times to perform forward paging #' from the current cursor position. Passing `n = NA` is also supported and #' returns the remaining features. #' Fetching zero features is possible to retrieve the structure of the feature #' set as a data frame (columns fully typed). +#' #' OGR field types are returned as the following R types (`NA` for OGR NULL #' values): -#' * `OFTInteger`: `integer` (or `logical` for subtype `OFSTBoolean`) -#' * `OFTIntegerList`: vector of `integer` (data frame list column) -#' * `OFTInteger64`: `bit64::integer64` (or `logical` for subtype `OFSTBoolean`) -#' * `OFTInteger64List`: vector of `bit64::integer64` (data frame list column) +#' * `OFTInteger`: `integer` +#' * `OFTInteger` subtype `OFSTBoolean`: `logical` +#' * `OFTIntegerList`: vector of `integer` (list column) +#' * `OFTInteger64`: `bit64::integer64` +#' * `OFTInteger64` subtype `OFSTBoolean`: `logical` +#' * `OFTInteger64List`: vector of `bit64::integer64` (list column) #' * `OFTReal`: `numeric` -#' * `OFTRealList`: vector of `numeric` (data frame list column) +#' * `OFTRealList`: vector of `numeric` (list column) #' * `OFTString`: `character` string -#' * `OFTStringList`: vector of `character` strings (data frame list column) +#' * `OFTStringList`: vector of `character` strings (list column) #' * `OFTDate`: `Date` #' * `OFTDateTime`: `POSIXct` (millisecond accuracy and adjustment for time zone #' flag if present) -#' * `OFTBinary`: `raw` vector (data frame list column) +#' * `OFTBinary`: `raw` vector (list column, `NULL` entries for OGR NULL values) #' #' Geomtries are not returned if the field `returnGeomAs` is set to `NONE` #' (currently the default). Omitting the geometries may be beneficial for #' performance and memory usage when access only to feature attributes is #' needed. Geometries are returned as `raw` vectors in a data frame list column #' when `returnGeomAs` is set to `WKB` or `WKB_ISO`. Otherwise, geometries are -#' returned as `character` when `returnGeomAs` is set to one of `WKT`, +#' returned as `character` strings when `returnGeomAs` is set to one of `WKT`, #' `WKT_ISO` or `TYPE_NAME`. #' +#' Note that `$getFeatureCount()` is called internally when fetching all +#' features or all remaining features (but not for a page of features). +#' #' \code{$close()}\cr #' Closes the vector dataset (no return value, called for side effects). #' Calling \code{$close()} results in proper cleanup, and flushing of any #' pending writes. #' The `GDALVector` object is still available after calling \code{$close()}. #' The layer can be re-opened on the existing \code{dsn} with -#' \code{$open(read_only=TRUE)} or \code{$open(read_only=FALSE)}. +#' \code{$open(read_only = {TRUE|FALSE})}. #' #' @seealso #' [ogr_define], [ogr_manage], [ogr2ogr()], [ogrinfo()] #' +#' GDAL vector format descriptions:\cr +#' \url{https://gdal.org/drivers/vector/index.html} +#' +#' GDAL-supported SQL dialects:\cr +#' \url{https://gdal.org/user/ogr_sql_sqlite_dialect.html}) +#' #' @examples #' # MTBS fire perimeters in Yellowstone National Park 1984-2022 #' f <- system.file("extdata/ynp_fires_1984_2022.gpkg", package = "gdalraster") @@ -360,13 +381,14 @@ #' lyr$testCapability()$SequentialWrite #' lyr$testCapability()$RandomWrite #' -#' # feature class definition - a list of fields and their definitions +#' # feature class definition - a list of field names and their definitions #' defn <- lyr$getLayerDefn() #' names(defn) -#' -#' # each list element holds a field definition #' str(defn) #' +#' # default value of the read/write field 'returnGeomAs' +#' print(lyr$returnGeomAs) +#' #' lyr$getFeatureCount() #' #' # sequential read cursor @@ -374,31 +396,28 @@ #' # a list of field names and their values #' str(feat) #' -#' # attribute filter +#' # set an attribute filter #' lyr$setAttributeFilter("ig_year = 2020") #' lyr$getFeatureCount() #' -#' # the default value of read/write field 'returnGeomAs' -#' lyr$returnGeomAs -#' #' feat <- lyr$getNextFeature() #' str(feat) #' -#' # NULL if no more features are available +#' # NULL when no more features are available #' feat <- lyr$getNextFeature() #' str(feat) #' -#' # reset reading to the start and return geometry as WKT +#' # reset reading to the start and return geometries as WKT #' lyr$resetReading() #' lyr$returnGeomAs <- "WKT" #' feat <- lyr$getNextFeature() #' str(feat) #' -#' # clear attribute filter +#' # clear the attribute filter #' lyr$setAttributeFilter("") #' lyr$getFeatureCount() #' -#' # spatial filter +#' # set a spatial filter #' # get the bounding box of the largest 1988 fire and use as spatial filter #' # first set a temporary attribute filter to do the lookup #' lyr$setAttributeFilter("ig_year = 1988 ORDER BY burn_bnd_ac DESC") @@ -414,7 +433,6 @@ #' lyr$getFeatureCount() #' #' # fetch in chunks and return as data frame -#' # geometry can optionally be returned as NONE, WKT, WKB or TYPE_NAME #' d <- lyr$fetch(20) #' str(d) #' @@ -427,7 +445,7 @@ #' nrow(d) #' str(d) # 0-row data frame with columns typed #' -#' # fetch all features, filtered spatially, and return geometries as WKB +#' # fetch all pending features with geometries as WKB #' lyr$returnGeomAs <- "WKB" #' d <- lyr$fetch(-1) # resets reading to the first feature #' str(d) diff --git a/man/GDALVector-class.Rd b/man/GDALVector-class.Rd index 212a7cc48..8fce6ef8c 100644 --- a/man/GDALVector-class.Rd +++ b/man/GDALVector-class.Rd @@ -7,16 +7,15 @@ \alias{GDALVector} \title{Class encapsulating a vector layer in a GDAL dataset} \arguments{ -\item{dsn}{Character string containing the data source name (DSN, usually a -filename or database connection string). See the GDAL vector format -descriptions at \url{https://gdal.org/drivers/vector/index.html}.} +\item{dsn}{Character string containing the data source name (DSN), usually a +filename or database connection string.} -\item{layer}{Character string containing either the name of a layer within -the data source, or an SQL SELECT statement to be executed against the data -source that defines a layer via its result set.} +\item{layer}{Character string containing the name of a layer within the +data source. May also be given as an SQL SELECT statement to be executed +against the data source, defining a layer as the result set.} -\item{read_only}{Logical. \code{TRUE} to open the layer read-only (the default), -or \code{FALSE} to open with write access.} +\item{read_only}{Logical scalar. \code{TRUE} to open the layer read-only (the +default), or \code{FALSE} to open with write access.} \item{open_options}{Optional character vector of \code{NAME=VALUE} pairs specifying dataset open options.} @@ -27,9 +26,8 @@ Well Known Text (WKT) format which represents a spatial filter.} \item{dialect}{Optional character string to control the statement dialect when SQL is used to define the layer. By default, the OGR SQL engine will be used, except for RDBMS drivers that will use their dedicated SQL engine, -unless \code{"OGRSQL"} is explicitly passed as the dialect. The \code{SQLITE} -dialect can also be used -(see \url{https://gdal.org/user/ogr_sql_sqlite_dialect.html}).} +unless \code{"OGRSQL"} is explicitly passed as the dialect. The \code{"SQLITE"} +dialect can also be used.} } \value{ An object of class \code{GDALVector} which contains pointers to the @@ -52,15 +50,16 @@ Vector API. \preformatted{ ## Constructors -# read-only by default: -ds <- new(GDALVector, dsn) -ds <- new(GDALVector, dsn, layer) -# for update access: -ds <- new(GDALVector, dsn, layer, read_only = FALSE) -# to use dataset open options -ds <- new(GDALVector, dsn, layer, read_only, open_options) -# to specify a spatial filter and/or dialect -ds <- new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect) +# for single-layer file formats such as shapefile +lyr <- new(GDALVector, dsn) +# specifying the layer name, or SQL statement defining the layer +lyr <- new(GDALVector, dsn, layer) +# for update access +lyr <- new(GDALVector, dsn, layer, read_only = FALSE) +# using dataset open options +lyr <- new(GDALVector, dsn, layer, read_only, open_options) +# setting a spatial filter and/or specifying the SQL dialect +lyr <- new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect) ## Read/write fields lyr$defaultGeomFldName @@ -92,7 +91,6 @@ lyr$getFeatureCount() lyr$getNextFeature() lyr$getFeature(fid) lyr$resetReading() - lyr$fetch(n) lyr$close() @@ -104,8 +102,9 @@ lyr$close() \subsection{Constructors}{ \code{new(GDALVector, dsn)}\cr -If \code{layer} is omitted, the first layer by index is assumed, so this form -of the constructor might be used for single-layer formats like shapefile. +The first layer by index is assumed if the \code{layer} argument is omitted, so +this form of the constructor might be used for single-layer formats like +shapefile. \code{new(GDALVector, dsn, layer)}\cr Constructor specifying the name of a layer to open. The \code{layer} argument @@ -113,13 +112,13 @@ may also be given as an SQL SELECT statement to define a layer as the result set. \code{new(GDALVector, dsn, layer, read_only)}\cr -Specifies read/write access (\verb{read_only = \{TRUE|FALSE\})}. +Constructor specifying read/write access (\verb{read_only = \{TRUE|FALSE\})}. The \code{layer} argument is required in this form of the constructor, but may be given as empty string (\code{""}), in which case the first layer by index will be assumed. \code{new(GDALVector, dsn, layer, read_only, open_options)}\cr -Constructor to specify dataset open options as a character vector of +Constructor specifying dataset open options as a character vector of \code{NAME=VALUE} pairs. \code{new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect))}\cr @@ -138,11 +137,11 @@ Defaults to \code{"geometry"}. \code{$returnGeomAs}\cr Character string specifying the return format of feature geometries. Must be one of \code{WKT}, \code{WKT_ISO}, \code{WKB}, \code{WKB_ISO}, \code{TYPE_NAME} or -\code{NONE} (the default). \code{WKB} is used for backward compatibility purposes. -It exports the old-style 99-402 extended dimension (Z) WKB types for -Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon and -GeometryCollection. For other geometry types, it is equivalent to using -\code{WKB_ISO} (see \url{https://libgeos.org/specifications/wkb/}). +\code{NONE} (the default). \code{WKB}/\code{WKT} export old-style 99-402 extended +dimension (Z) types for Point, LineString, Polygon, MultiPoint, +MultiLineString, MultiPolygon and GeometryCollection. For other geometry +types, it is equivalent to using \code{WKB_ISO}/\code{WKT_ISO} +(see \url{https://libgeos.org/specifications/wkb/}). \code{$wkbByteOrder}\cr Character string specifying the byte order for WKB geometries. @@ -186,7 +185,14 @@ Returns the layer name. \code{$testCapability()}\cr Tests whether the layer supports named capabilities based on the current read/write access. Returns a list of capabilities with values \code{TRUE} or -\code{FALSE}. See \code{\link[=ogr_layer_test_cap]{ogr_layer_test_cap()}} for a list of the capabilities tested. +\code{FALSE}. The returned list contains the following named elements: +\code{RandomRead}, \code{SequentialWrite}, \code{RandomWrite}, \code{UpsertFeature}, +\code{FastSpatialFilter}, \code{FastFeatureCount}, \code{FastGetExtent}, +\code{FastSetNextByIndex}, \code{CreateField}, \code{CreateGeomField}, \code{DeleteField}, +\code{ReorderFields}, \code{AlterFieldDefn}, \code{AlterGeomFieldDefn}, \code{DeleteFeature}, +\code{StringsAsUTF8}, \code{Transactions}, \code{CurveGeometries}. +(See the GDAL documentation for +\href{https://gdal.org/api/vector_c_api.html#_CPPv420OGR_L_TestCapability9OGRLayerHPKc}{\code{OGR_L_TestCapability()}}.) \code{$getFIDColumn()}\cr Returns the name of the underlying database column being used as the FID @@ -210,11 +216,11 @@ Returns a WKT string containing the spatial reference system for this layer. \code{$bbox()}\cr Returns a numeric vector of length four containing the bounding box -(xmin, ymin, xmax, ymax) for this layer. Note that \code{bForce = true} is set in +for this layer (xmin, ymin, xmax, ymax). Note that \code{bForce = true} is set in the underlying API call to \code{OGR_L_GetExtent()}, so the entire layer may be scanned to compute a minimum bounding rectangle (see \code{FastGetExtent} in the list returned by \verb{$testCapability()}). Depending on the format driver, a -spatial filter may/may not be taken into account, so it is safer to call +spatial filter may or may not be taken into account, so it is safer to call \verb{$bbox()} without setting a spatial filter. \code{$getLayerDefn()}\cr @@ -230,14 +236,14 @@ Only features for which \code{query} evaluates as true will be returned. The query string should be in the format of an SQL WHERE clause, described in the \href{https://gdal.org/user/ogr_sql_dialect.html#where}{"WHERE"} section of the OGR SQL dialect documentation (e.g., -\code{"population > 1000000 and population < 5000000"}, where population is an +\code{"population > 1000000 and population < 5000000"}, where \code{population} is an attribute in the layer). In some cases (RDBMS backed drivers, SQLite, GeoPackage) the native capabilities of the database may be used to to interpret the WHERE clause, in which case the capabilities will be broader than those of OGR SQL. Note that installing a query string will generally result in resetting the current reading position (as with \verb{$resetReading()} decribed below). -The \code{query} parameter may be set to \code{""} (empty string) to clear the current +The \code{query} parameter may be set to empty string (\code{""}) to clear the current attribute filter. \code{$setSpatialFilterRect(bbox)}\cr @@ -245,9 +251,9 @@ Sets a new rectangular spatial filter. This method sets a rectangle to be used as a spatial filter when fetching features via the \verb{$getNextFeature()} or \verb{$fetch()} methods. Only features that geometrically intersect the given rectangle will be returned. -The x/y values in \code{bbox} (a \code{numeric} vector of length four: xmin, ymin, -xmax, ymax) should be in the same coordinate system as the layer as a whole -(as returned by \verb{$getSpatialRef()}). +\code{bbox} is a numeric vector of length four containing xmin, ymin, xmax, ymax +in the same coordinate system as the layer as a whole (as returned by +\verb{$getSpatialRef()}). \code{$clearSpatialFilter()}\cr Clears a spatial filter that was set with \verb{$setSpatialFilterRect()}. @@ -258,11 +264,11 @@ Returns the number of features in the layer. For dynamic databases the count may not be exact. This method forces a count in the underlying API call (i.e., \code{bForce = TRUE} in the call to \code{OGR_L_GetFeatureCount()}). Note that some vector drivers will actually scan the entire layer once to count -features. The \code{FastFeatureCount} capability in the list returned by +features. The \code{FastFeatureCount} element in the list returned by \verb{$testCapability()} can be checked if this might be a concern. -The returned count takes the spatial and/or attribute filters into account. -Note that some driver implementations of this method may alter the read -cursor of the layer. +The number of features returned takes into account the spatial and/or +attribute filters. Some driver implementations of this method may alter the +read cursor of the layer. \code{$getNextFeature()}\cr Fetch the next available feature from this layer. Only features matching the @@ -278,7 +284,7 @@ Returns a feature by its identifier. The value of \code{fid} must be a numeric scalar, optionally carrying the \code{bit64::integer64} class attribute. Success or failure of this operation is unaffected by any spatial or attribute filters that may be in effect. -The \code{RandomRead} capability in the list returned by \verb{$testCapability()} can +The \code{RandomRead} element in the list returned by \verb{$testCapability()} can be checked to establish if this layer supports efficient random access reading; however, the call should always work if the feature exists since a fallback implementation just scans all the features in the layer looking for @@ -295,32 +301,38 @@ for that side effect. Fetches the next \code{n} features from the layer and returns them as a data frame. This allows retrieving the entire set of features, one page of features at a time, or the remaining features (from the current cursor -position). This function is an analog of +position). + +This method is an analog of \href{https://dbi.r-dbi.org/reference/dbFetch.html}{\code{DBI::dbFetch()}}. + The \code{n} parameter is the maximum number of features to retrieve per fetch -given as a \code{numeric} scalar (assumed to be a whole number, will be +given as a \code{numeric} value but assumed to be a whole number (will be truncated). Use \code{n = -1} or \code{n = Inf} to retrieve all pending features (resets reading to the first feature). -Otherwise, \code{fetch(n)} can be called multiple times to perform forward paging +Otherwise, \verb{$fetch()} can be called multiple times to perform forward paging from the current cursor position. Passing \code{n = NA} is also supported and returns the remaining features. Fetching zero features is possible to retrieve the structure of the feature set as a data frame (columns fully typed). + OGR field types are returned as the following R types (\code{NA} for OGR NULL values): \itemize{ -\item \code{OFTInteger}: \code{integer} (or \code{logical} for subtype \code{OFSTBoolean}) -\item \code{OFTIntegerList}: vector of \code{integer} (data frame list column) -\item \code{OFTInteger64}: \code{bit64::integer64} (or \code{logical} for subtype \code{OFSTBoolean}) -\item \code{OFTInteger64List}: vector of \code{bit64::integer64} (data frame list column) +\item \code{OFTInteger}: \code{integer} +\item \code{OFTInteger} subtype \code{OFSTBoolean}: \code{logical} +\item \code{OFTIntegerList}: vector of \code{integer} (list column) +\item \code{OFTInteger64}: \code{bit64::integer64} +\item \code{OFTInteger64} subtype \code{OFSTBoolean}: \code{logical} +\item \code{OFTInteger64List}: vector of \code{bit64::integer64} (list column) \item \code{OFTReal}: \code{numeric} -\item \code{OFTRealList}: vector of \code{numeric} (data frame list column) +\item \code{OFTRealList}: vector of \code{numeric} (list column) \item \code{OFTString}: \code{character} string -\item \code{OFTStringList}: vector of \code{character} strings (data frame list column) +\item \code{OFTStringList}: vector of \code{character} strings (list column) \item \code{OFTDate}: \code{Date} \item \code{OFTDateTime}: \code{POSIXct} (millisecond accuracy and adjustment for time zone flag if present) -\item \code{OFTBinary}: \code{raw} vector (data frame list column) +\item \code{OFTBinary}: \code{raw} vector (list column, \code{NULL} entries for OGR NULL values) } Geomtries are not returned if the field \code{returnGeomAs} is set to \code{NONE} @@ -328,16 +340,19 @@ Geomtries are not returned if the field \code{returnGeomAs} is set to \code{NONE performance and memory usage when access only to feature attributes is needed. Geometries are returned as \code{raw} vectors in a data frame list column when \code{returnGeomAs} is set to \code{WKB} or \code{WKB_ISO}. Otherwise, geometries are -returned as \code{character} when \code{returnGeomAs} is set to one of \code{WKT}, +returned as \code{character} strings when \code{returnGeomAs} is set to one of \code{WKT}, \code{WKT_ISO} or \code{TYPE_NAME}. +Note that \verb{$getFeatureCount()} is called internally when fetching all +features or all remaining features (but not for a page of features). + \code{$close()}\cr Closes the vector dataset (no return value, called for side effects). Calling \code{$close()} results in proper cleanup, and flushing of any pending writes. The \code{GDALVector} object is still available after calling \code{$close()}. The layer can be re-opened on the existing \code{dsn} with -\code{$open(read_only=TRUE)} or \code{$open(read_only=FALSE)}. +\code{$open(read_only = {TRUE|FALSE})}. } } @@ -376,13 +391,14 @@ lyr$open(read_only = FALSE) lyr$testCapability()$SequentialWrite lyr$testCapability()$RandomWrite -# feature class definition - a list of fields and their definitions +# feature class definition - a list of field names and their definitions defn <- lyr$getLayerDefn() names(defn) - -# each list element holds a field definition str(defn) +# default value of the read/write field 'returnGeomAs' +print(lyr$returnGeomAs) + lyr$getFeatureCount() # sequential read cursor @@ -390,31 +406,28 @@ feat <- lyr$getNextFeature() # a list of field names and their values str(feat) -# attribute filter +# set an attribute filter lyr$setAttributeFilter("ig_year = 2020") lyr$getFeatureCount() -# the default value of read/write field 'returnGeomAs' -lyr$returnGeomAs - feat <- lyr$getNextFeature() str(feat) -# NULL if no more features are available +# NULL when no more features are available feat <- lyr$getNextFeature() str(feat) -# reset reading to the start and return geometry as WKT +# reset reading to the start and return geometries as WKT lyr$resetReading() lyr$returnGeomAs <- "WKT" feat <- lyr$getNextFeature() str(feat) -# clear attribute filter +# clear the attribute filter lyr$setAttributeFilter("") lyr$getFeatureCount() -# spatial filter +# set a spatial filter # get the bounding box of the largest 1988 fire and use as spatial filter # first set a temporary attribute filter to do the lookup lyr$setAttributeFilter("ig_year = 1988 ORDER BY burn_bnd_ac DESC") @@ -430,7 +443,6 @@ lyr$setSpatialFilterRect(bbox) lyr$getFeatureCount() # fetch in chunks and return as data frame -# geometry can optionally be returned as NONE, WKT, WKB or TYPE_NAME d <- lyr$fetch(20) str(d) @@ -443,7 +455,7 @@ d <- lyr$fetch(20) nrow(d) str(d) # 0-row data frame with columns typed -# fetch all features, filtered spatially, and return geometries as WKB +# fetch all pending features with geometries as WKB lyr$returnGeomAs <- "WKB" d <- lyr$fetch(-1) # resets reading to the first feature str(d) @@ -460,4 +472,10 @@ vsi_unlink(dsn) } \seealso{ \link{ogr_define}, \link{ogr_manage}, \code{\link[=ogr2ogr]{ogr2ogr()}}, \code{\link[=ogrinfo]{ogrinfo()}} + +GDAL vector format descriptions:\cr +\url{https://gdal.org/drivers/vector/index.html} + +GDAL-supported SQL dialects:\cr +\url{https://gdal.org/user/ogr_sql_sqlite_dialect.html}) } From d603fc270b317bbb89fbdda513c7d56000a6c70b Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Mon, 22 Jul 2024 22:04:06 -0600 Subject: [PATCH 49/53] GDALVector::fetch(): set NULL entries for OGR NULL in an OFTBinary field --- src/gdalvector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp index 0a09e526f..bb63f29d4 100644 --- a/src/gdalvector.cpp +++ b/src/gdalvector.cpp @@ -662,7 +662,7 @@ Rcpp::DataFrame GDALVector::fetch(double n) { } } else { - col[row_num] = Rcpp::RawVector::create(); + col[row_num] = R_NilValue; } } else if (fld_type == OFTIntegerList) { From d2e68a35b6a577d789889a19c4bff6a00e04dbb3 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Mon, 22 Jul 2024 23:50:02 -0600 Subject: [PATCH 50/53] class GDALVector initial implementation --- DESCRIPTION | 6 +++--- NEWS.md | 4 +++- R/gdalvector.R | 21 +++++++++++++++------ man/GDALVector-class.Rd | 21 +++++++++++++++------ 4 files changed, 36 insertions(+), 16 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d84c44c5d..36a242bdf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: gdalraster Title: Bindings to the 'Geospatial Data Abstraction Library' Raster API -Version: 1.11.1.9050 +Version: 1.11.1.9100 Authors@R: c( person("Chris", "Toney", email = "chris.toney@usda.gov", role = c("aut", "cre"), comment = "R interface/additional functionality"), @@ -56,12 +56,12 @@ Imports: xml2 LinkingTo: Rcpp, RcppInt64 Suggests: - wk, gt, knitr, rmarkdown, scales, - testthat (>= 3.0.0) + testthat (>= 3.0.0), + wk NeedsCompilation: yes SystemRequirements: GDAL (>= 3.1.0, built against GEOS), PROJ, libxml2 Encoding: UTF-8 diff --git a/NEWS.md b/NEWS.md index 4ca7b2494..439c036da 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,6 @@ -# gdalraster 1.11.1.9050 (dev) +# gdalraster 1.11.1.9100 (dev) + +* add class `GDALVector`, initial implementation / WIP (2024-07-22) * `plot_raster()`: default to no stretch when the input is an RGB Byte raster, addresses #429 (2024-07-10) diff --git a/R/gdalvector.R b/R/gdalvector.R index e421f26a7..9ba8c1849 100644 --- a/R/gdalvector.R +++ b/R/gdalvector.R @@ -14,6 +14,13 @@ #' See \url{https://gdal.org/api/index.html} for details of the GDAL #' Vector API. #' +#' **Class `GDALVector` is currently under development**. An initial +#' implemetation supporting read access was added in gdalraster 1.11.1.9100. +#' A working document with draft specifications is available at:\cr +#' \url{https://usdaforestservice.github.io/gdalraster/articles/gdalvector-draft.html}\cr +#' and discussion thread/status updates at:\cr +#' \url{https://github.com/USDAForestService/gdalraster/issues/241}. +#' #' @param dsn Character string containing the data source name (DSN), usually a #' filename or database connection string. #' @param layer Character string containing the name of a layer within the @@ -286,14 +293,16 @@ #' Fetches the next `n` features from the layer and returns them as a data #' frame. This allows retrieving the entire set of features, one page of #' features at a time, or the remaining features (from the current cursor -#' position). +#' position). Returns a data frame with as many rows as features were fetched, +#' and as many columns as attribute plus geometry fields in the result set, +#' even if the result is a single value or has one or zero rows. #' #' This method is an analog of #' [`DBI::dbFetch()`](https://dbi.r-dbi.org/reference/dbFetch.html). #' -#' The `n` parameter is the maximum number of features to retrieve per fetch -#' given as a `numeric` value but assumed to be a whole number (will be -#' truncated). Use `n = -1` or `n = Inf` to retrieve all pending features +#' The `n` argument is the maximum number of features to retrieve per fetch +#' given as `integer` or `numeric` but assumed to be a whole number (will +#' be truncated). Use `n = -1` or `n = Inf` to retrieve all pending features #' (resets reading to the first feature). #' Otherwise, `$fetch()` can be called multiple times to perform forward paging #' from the current cursor position. Passing `n = NA` is also supported and @@ -326,8 +335,8 @@ #' returned as `character` strings when `returnGeomAs` is set to one of `WKT`, #' `WKT_ISO` or `TYPE_NAME`. #' -#' Note that `$getFeatureCount()` is called internally when fetching all -#' features or all remaining features (but not for a page of features). +#' Note that `$getFeatureCount()` is called internally when fetching the full +#' feature set or all remaining features (but not for a page of features). #' #' \code{$close()}\cr #' Closes the vector dataset (no return value, called for side effects). diff --git a/man/GDALVector-class.Rd b/man/GDALVector-class.Rd index 8fce6ef8c..d81563e0a 100644 --- a/man/GDALVector-class.Rd +++ b/man/GDALVector-class.Rd @@ -45,6 +45,13 @@ and exposes methods for retrieving layer information, setting attribute and spatial filters, and reading/writing feature data. See \url{https://gdal.org/api/index.html} for details of the GDAL Vector API. + +\strong{Class \code{GDALVector} is currently under development}. An initial +implemetation supporting read access was added in gdalraster 1.11.1.9100. +A working document with draft specifications is available at:\cr +\url{https://usdaforestservice.github.io/gdalraster/articles/gdalvector-draft.html}\cr +and discussion thread/status updates at:\cr +\url{https://github.com/USDAForestService/gdalraster/issues/241}. } \section{Usage (see Details)}{ @@ -301,14 +308,16 @@ for that side effect. Fetches the next \code{n} features from the layer and returns them as a data frame. This allows retrieving the entire set of features, one page of features at a time, or the remaining features (from the current cursor -position). +position). Returns a data frame with as many rows as features were fetched, +and as many columns as attribute plus geometry fields in the result set, +even if the result is a single value or has one or zero rows. This method is an analog of \href{https://dbi.r-dbi.org/reference/dbFetch.html}{\code{DBI::dbFetch()}}. -The \code{n} parameter is the maximum number of features to retrieve per fetch -given as a \code{numeric} value but assumed to be a whole number (will be -truncated). Use \code{n = -1} or \code{n = Inf} to retrieve all pending features +The \code{n} argument is the maximum number of features to retrieve per fetch +given as \code{integer} or \code{numeric} but assumed to be a whole number (will +be truncated). Use \code{n = -1} or \code{n = Inf} to retrieve all pending features (resets reading to the first feature). Otherwise, \verb{$fetch()} can be called multiple times to perform forward paging from the current cursor position. Passing \code{n = NA} is also supported and @@ -343,8 +352,8 @@ when \code{returnGeomAs} is set to \code{WKB} or \code{WKB_ISO}. Otherwise, geom returned as \code{character} strings when \code{returnGeomAs} is set to one of \code{WKT}, \code{WKT_ISO} or \code{TYPE_NAME}. -Note that \verb{$getFeatureCount()} is called internally when fetching all -features or all remaining features (but not for a page of features). +Note that \verb{$getFeatureCount()} is called internally when fetching the full +feature set or all remaining features (but not for a page of features). \code{$close()}\cr Closes the vector dataset (no return value, called for side effects). From 4ef29c447045327166850dcc5bc491774985f408 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Tue, 23 Jul 2024 00:03:15 -0600 Subject: [PATCH 51/53] add GDALVector-class --- _pkgdown.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index 259e6e2b7..253b36328 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -21,8 +21,9 @@ reference: - title: Exposed C++ classes - contents: - - CmbTable-class - GDALRaster-class + - GDALVector-class + - CmbTable-class - RunningStats-class - VSIFile-class From 36f9b15cc2443b2fc297a3edf22da4ab10c4ed3b Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Tue, 23 Jul 2024 10:29:41 -0600 Subject: [PATCH 52/53] fix test for GDAL < 3.7 --- tests/testthat/test-GDALVector-class.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-GDALVector-class.R b/tests/testthat/test-GDALVector-class.R index b0212cf7f..e9345ae87 100644 --- a/tests/testthat/test-GDALVector-class.R +++ b/tests/testthat/test-GDALVector-class.R @@ -1,8 +1,8 @@ # Tests for src/gdalvector.cpp test_that("class constructors work", { f <- system.file("extdata/ynp_fires_1984_2022.gpkg", package="gdalraster") - dsn <- paste0("/vsimem/", basename(f)) - vsi_copy_file(f, dsn) + dsn <- file.path(tempdir(), basename(f)) + file.copy(f, dsn, overwrite = TRUE) lyr <- new(GDALVector, dsn) expect_equal(lyr$getName(), "mtbs_perims") @@ -37,5 +37,5 @@ test_that("class constructors work", { expect_equal(lyr$getFeatureCount(), 40) lyr$close() - vsi_unlink(dsn) + unlink(dsn) }) From f594340ca4210b6a46d5e7ee9b4488e5ee4e28c5 Mon Sep 17 00:00:00 2001 From: Chris Toney Date: Tue, 23 Jul 2024 10:39:47 -0600 Subject: [PATCH 53/53] fix examples for GDAL < 3.7 --- R/gdalvector.R | 8 ++++---- man/GDALVector-class.Rd | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/gdalvector.R b/R/gdalvector.R index 9ba8c1849..bcc519556 100644 --- a/R/gdalvector.R +++ b/R/gdalvector.R @@ -359,9 +359,9 @@ #' # MTBS fire perimeters in Yellowstone National Park 1984-2022 #' f <- system.file("extdata/ynp_fires_1984_2022.gpkg", package = "gdalraster") #' -#' # copy to a temporary in-memory file that is writeable -#' dsn <- file.path("/vsimem", basename(f)) -#' vsi_copy_file(f, dsn) +#' # copy to a temporary file that is writeable +#' dsn <- file.path(tempdir(), basename(f)) +#' file.copy(f, dsn) #' #' lyr <- new(GDALVector, dsn, "mtbs_perims") #' @@ -467,7 +467,7 @@ #' lyr$getFeatureCount() #' #' lyr$close() -#' vsi_unlink(dsn) +#' unlink(dsn) NULL Rcpp::loadModule("mod_GDALVector", TRUE) diff --git a/man/GDALVector-class.Rd b/man/GDALVector-class.Rd index d81563e0a..9b47e1629 100644 --- a/man/GDALVector-class.Rd +++ b/man/GDALVector-class.Rd @@ -369,9 +369,9 @@ The layer can be re-opened on the existing \code{dsn} with # MTBS fire perimeters in Yellowstone National Park 1984-2022 f <- system.file("extdata/ynp_fires_1984_2022.gpkg", package = "gdalraster") -# copy to a temporary in-memory file that is writeable -dsn <- file.path("/vsimem", basename(f)) -vsi_copy_file(f, dsn) +# copy to a temporary file that is writeable +dsn <- file.path(tempdir(), basename(f)) +file.copy(f, dsn) lyr <- new(GDALVector, dsn, "mtbs_perims") @@ -477,7 +477,7 @@ lyr$clearSpatialFilter() lyr$getFeatureCount() lyr$close() -vsi_unlink(dsn) +unlink(dsn) } \seealso{ \link{ogr_define}, \link{ogr_manage}, \code{\link[=ogr2ogr]{ogr2ogr()}}, \code{\link[=ogrinfo]{ogrinfo()}}