diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 16c15b811..ecd13956a 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -2,3 +2,5 @@ # Add .editorconfig and bulk reformat codebase 44c885936e1a948990b80faffa06d6f8fb55e435 fe6c783be086d71adea69bbeacae35c91f68ba2b +eb9b8b3cd8176a81b512973c61e2605b181a7b11 +5146434168160d7ca1e7b73b5a4bfa1525e29ba6 diff --git a/DESCRIPTION b/DESCRIPTION index f3dfe0b68..36a242bdf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: gdalraster Title: Bindings to the 'Geospatial Data Abstraction Library' Raster API -Version: 1.11.1.9050 +Version: 1.11.1.9100 Authors@R: c( person("Chris", "Toney", email = "chris.toney@usda.gov", role = c("aut", "cre"), comment = "R interface/additional functionality"), @@ -60,7 +60,8 @@ Suggests: knitr, rmarkdown, scales, - testthat (>= 3.0.0) + testthat (>= 3.0.0), + wk NeedsCompilation: yes SystemRequirements: GDAL (>= 3.1.0, built against GEOS), PROJ, libxml2 Encoding: UTF-8 diff --git a/NEWS.md b/NEWS.md index 4ca7b2494..439c036da 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,6 @@ -# gdalraster 1.11.1.9050 (dev) +# gdalraster 1.11.1.9100 (dev) + +* add class `GDALVector`, initial implementation / WIP (2024-07-22) * `plot_raster()`: default to no stretch when the input is an RGB Byte raster, addresses #429 (2024-07-10) diff --git a/R/gdalvector.R b/R/gdalvector.R new file mode 100644 index 000000000..bcc519556 --- /dev/null +++ b/R/gdalvector.R @@ -0,0 +1,473 @@ +#' @name GDALVector-class +#' +#' @aliases +#' Rcpp_GDALVector Rcpp_GDALVector-class GDALVector +#' +#' @title Class encapsulating a vector layer in a GDAL dataset +#' +#' @description +#' `GDALVector` provides an interface for accessing a vector layer in a GDAL +#' dataset and calling methods on the underlying `OGRLayer` object. +#' An object of class `GDALVector` persists an open connection to the dataset, +#' and exposes methods for retrieving layer information, setting attribute and +#' spatial filters, and reading/writing feature data. +#' See \url{https://gdal.org/api/index.html} for details of the GDAL +#' Vector API. +#' +#' **Class `GDALVector` is currently under development**. An initial +#' implemetation supporting read access was added in gdalraster 1.11.1.9100. +#' A working document with draft specifications is available at:\cr +#' \url{https://usdaforestservice.github.io/gdalraster/articles/gdalvector-draft.html}\cr +#' and discussion thread/status updates at:\cr +#' \url{https://github.com/USDAForestService/gdalraster/issues/241}. +#' +#' @param dsn Character string containing the data source name (DSN), usually a +#' filename or database connection string. +#' @param layer Character string containing the name of a layer within the +#' data source. May also be given as an SQL SELECT statement to be executed +#' against the data source, defining a layer as the result set. +#' @param read_only Logical scalar. `TRUE` to open the layer read-only (the +#' default), or `FALSE` to open with write access. +#' @param open_options Optional character vector of `NAME=VALUE` pairs +#' specifying dataset open options. +#' @param spatial_filter Optional character string containing a geometry in +#' Well Known Text (WKT) format which represents a spatial filter. +#' @param dialect Optional character string to control the statement dialect +#' when SQL is used to define the layer. By default, the OGR SQL engine will +#' be used, except for RDBMS drivers that will use their dedicated SQL engine, +#' unless `"OGRSQL"` is explicitly passed as the dialect. The `"SQLITE"` +#' dialect can also be used. +#' @returns An object of class `GDALVector` which contains pointers to the +#' opened layer and the dataset that contains it, and methods that operate on +#' the layer as described in Details. `GDALVector` is a C++ class exposed +#' directly to R (via `RCPP_EXPOSED_CLASS`). Fields and methods of the class +#' are accessed using the `$` operator. The read/write fields are per-object +#' settings which can be changed as needed during the lifetime of the object. +#' +#' @section Usage (see Details): +#' \preformatted{ +#' ## Constructors +#' # for single-layer file formats such as shapefile +#' lyr <- new(GDALVector, dsn) +#' # specifying the layer name, or SQL statement defining the layer +#' lyr <- new(GDALVector, dsn, layer) +#' # for update access +#' lyr <- new(GDALVector, dsn, layer, read_only = FALSE) +#' # using dataset open options +#' lyr <- new(GDALVector, dsn, layer, read_only, open_options) +#' # setting a spatial filter and/or specifying the SQL dialect +#' lyr <- new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect) +#' +#' ## Read/write fields +#' lyr$defaultGeomFldName +#' lyr$returnGeomAs +#' lyr$wkbByteOrder +#' +#' ## Methods +#' lyr$open(read_only) +#' lyr$isOpen() +#' lyr$getDsn() +#' lyr$getFileList() +#' lyr$getDriverShortName() +#' lyr$getDriverLongName() +#' +#' lyr$getName() +#' lyr$testCapability() +#' lyr$getFIDColumn() +#' lyr$getGeomType() +#' lyr$getGeometryColumn() +#' lyr$getSpatialRef() +#' lyr$bbox() +#' lyr$getLayerDefn() +#' +#' lyr$setAttributeFilter(query) +#' lyr$setSpatialFilterRect(bbox) +#' lyr$clearSpatialFilter() +#' +#' lyr$getFeatureCount() +#' lyr$getNextFeature() +#' lyr$getFeature(fid) +#' lyr$resetReading() +#' lyr$fetch(n) +#' +#' lyr$close() +#' } +#' @section Details: +#' ## Constructors +#' +#' \code{new(GDALVector, dsn)}\cr +#' The first layer by index is assumed if the `layer` argument is omitted, so +#' this form of the constructor might be used for single-layer formats like +#' shapefile. +#' +#' \code{new(GDALVector, dsn, layer)}\cr +#' Constructor specifying the name of a layer to open. The `layer` argument +#' may also be given as an SQL SELECT statement to define a layer as the result +#' set. +#' +#' \code{new(GDALVector, dsn, layer, read_only)}\cr +#' Constructor specifying read/write access (`read_only = {TRUE|FALSE})`. +#' The `layer` argument is required in this form of the constructor, but may be +#' given as empty string (`""`), in which case the first layer by index will be +#' assumed. +#' +#' \code{new(GDALVector, dsn, layer, read_only, open_options)}\cr +#' Constructor specifying dataset open options as a character vector of +#' `NAME=VALUE` pairs. +#' +#' \code{new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect))}\cr +#' Constructor to specify a spatial filter and/or SQL dialect. All arguments +#' are required in this form of the constructor, but `open_options` may be +#' `NULL`, and `spatial_filter` or `dialect` may be an empty string (`""`). +#' +#' ## Read/write fields +#' +#' \code{$defaultGeomFldName}\cr +#' Character string specifying a name to use for returned columns when the +#' geometry column name in the source layer is empty, like with shapefiles etc. +#' Defaults to `"geometry"`. +#' +#' \code{$returnGeomAs}\cr +#' Character string specifying the return format of feature geometries. +#' Must be one of `WKT`, `WKT_ISO`, `WKB`, `WKB_ISO`, `TYPE_NAME` or +#' `NONE` (the default). `WKB`/`WKT` export old-style 99-402 extended +#' dimension (Z) types for Point, LineString, Polygon, MultiPoint, +#' MultiLineString, MultiPolygon and GeometryCollection. For other geometry +#' types, it is equivalent to using `WKB_ISO`/`WKT_ISO` +#' (see \url{https://libgeos.org/specifications/wkb/}). +#' +#' \code{$wkbByteOrder}\cr +#' Character string specifying the byte order for WKB geometries. +#' Must be either `LSB` (Least Significant Byte first, the default) or +#' `MSB` (Most Significant Byte first). +#' +#' ## Methods +#' +#' \code{$open(read_only)}\cr +#' (Re-)opens the vector layer on the existing DSN. Use this method to +#' open a layer that has been closed using \code{$close()}. May be used to +#' re-open a layer with a different read/write access (`read_only` set to +#' `TRUE` or `FALSE`). The method will first close an open dataset, so it is +#' not required to call \code{$close()} explicitly in this case. +#' No return value, called for side effects. +#' +#' \code{$isOpen()}\cr +#' Returns a `logical` scalar indicating whether the vector dataset is open. +#' +#' \code{$getDsn()}\cr +#' Returns a character string containing the `dsn` associated with this +#' `GDALVector` object (`dsn` originally used to open the layer). +#' +#' \code{$getFileList()}\cr +#' Returns a character vector of files believed to be part of the data source. +#' If it returns an empty string (`""`) it means there is believed to be no +#' local file system files associated with the dataset (e.g., a virtual file +#' system). The returned filenames will normally be relative or absolute +#' paths depending on the path used to originally open the dataset. +#' +#' \code{$getDriverShortName()}\cr +#' Returns the short name of the vector format driver. +#' +#' \code{$getDriverLongName()}\cr +#' Returns the long name of the vector format driver. +#' +#' \code{$getName()}\cr +#' Returns the layer name. +#' +#' \code{$testCapability()}\cr +#' Tests whether the layer supports named capabilities based on the current +#' read/write access. Returns a list of capabilities with values `TRUE` or +#' `FALSE`. The returned list contains the following named elements: +#' `RandomRead`, `SequentialWrite`, `RandomWrite`, `UpsertFeature`, +#' `FastSpatialFilter`, `FastFeatureCount`, `FastGetExtent`, +#' `FastSetNextByIndex`, `CreateField`, `CreateGeomField`, `DeleteField`, +#' `ReorderFields`, `AlterFieldDefn`, `AlterGeomFieldDefn`, `DeleteFeature`, +#' `StringsAsUTF8`, `Transactions`, `CurveGeometries`. +#' (See the GDAL documentation for +#' [`OGR_L_TestCapability()`](https://gdal.org/api/vector_c_api.html#_CPPv420OGR_L_TestCapability9OGRLayerHPKc).) +#' +#' \code{$getFIDColumn()}\cr +#' Returns the name of the underlying database column being used as the FID +#' column, or empty string (`""`) if not supported. +#' +#' \code{$getGeomType()}\cr +#' Returns the well known name of the layer geometry type as character string. +#' For layers with multiple geometry fields, this method only returns the +#' geometry type of the first geometry column. For other columns, use +#' `$getLayerDefn()`. For layers without any geometry field, this method +#' returns `"NONE"`. +#' +#' \code{$getGeometryColumn()}\cr +#' Returns he name of the underlying database column being used as the geometry +#' column, or an empty string (`""`) if not supported. +#' For layers with multiple geometry fields, this method only returns the +#' name of the first geometry column. For other columns, use `$getLayerDefn()`. +#' +#' \code{$getSpatialRef()}\cr +#' Returns a WKT string containing the spatial reference system for this layer. +#' +#' \code{$bbox()}\cr +#' Returns a numeric vector of length four containing the bounding box +#' for this layer (xmin, ymin, xmax, ymax). Note that `bForce = true` is set in +#' the underlying API call to `OGR_L_GetExtent()`, so the entire layer may be +#' scanned to compute a minimum bounding rectangle (see `FastGetExtent` in the +#' list returned by `$testCapability()`). Depending on the format driver, a +#' spatial filter may or may not be taken into account, so it is safer to call +#' `$bbox()` without setting a spatial filter. +#' +#' \code{$getLayerDefn()}\cr +#' Returns a list containing the OGR feature class definition for this layer +#' (a.k.a. layer definition). The list contains zero or more attribute field +#' definitions, along with one or more geometry field definitions. +#' See [ogr_define] for details of the field and feature class definitions. +#' +#' \code{$setAttributeFilter(query)}\cr +#' Sets an attribute query string to be used when fetching features via the +#' `$getNextFeature()` or `$fetch()` methods. +#' Only features for which `query` evaluates as true will be returned. +#' The query string should be in the format of an SQL WHERE clause, described +#' in the ["WHERE"](https://gdal.org/user/ogr_sql_dialect.html#where) +#' section of the OGR SQL dialect documentation (e.g., +#' `"population > 1000000 and population < 5000000"`, where `population` is an +#' attribute in the layer). +#' In some cases (RDBMS backed drivers, SQLite, GeoPackage) the native +#' capabilities of the database may be used to to interpret the WHERE clause, +#' in which case the capabilities will be broader than those of OGR SQL. +#' Note that installing a query string will generally result in resetting the +#' current reading position (as with `$resetReading()` decribed below). +#' The `query` parameter may be set to empty string (`""`) to clear the current +#' attribute filter. +#' +#' \code{$setSpatialFilterRect(bbox)}\cr +#' Sets a new rectangular spatial filter. This method sets a rectangle to be +#' used as a spatial filter when fetching features via the `$getNextFeature()` +#' or `$fetch()` methods. Only features that geometrically intersect the given +#' rectangle will be returned. +#' `bbox` is a numeric vector of length four containing xmin, ymin, xmax, ymax +#' in the same coordinate system as the layer as a whole (as returned by +#' `$getSpatialRef()`). +#' +#' \code{$clearSpatialFilter()}\cr +#' Clears a spatial filter that was set with `$setSpatialFilterRect()`. +#' No return value, called for that side effect. +#' +#' \code{$getFeatureCount()}\cr +#' Returns the number of features in the layer. For dynamic databases the count +#' may not be exact. This method forces a count in the underlying API call +#' (i.e., `bForce = TRUE` in the call to `OGR_L_GetFeatureCount()`). Note that +#' some vector drivers will actually scan the entire layer once to count +#' features. The `FastFeatureCount` element in the list returned by +#' `$testCapability()` can be checked if this might be a concern. +#' The number of features returned takes into account the spatial and/or +#' attribute filters. Some driver implementations of this method may alter the +#' read cursor of the layer. +#' +#' \code{$getNextFeature()}\cr +#' Fetch the next available feature from this layer. Only features matching the +#' current spatial and/or attribute filter (if defined) will be returned. +#' This method implements sequential access to the features of a layer. +#' The `$resetReading()` method can be used to start at the beginning again. +#' Returns a list with the unique feature identifier (FID), the attribute and +#' geometry field names, and their values. `NULL` is returned if no more +#' features are available. +#' +#' \code{$getFeature(fid)}\cr +#' Returns a feature by its identifier. The value of `fid` must be a numeric +#' scalar, optionally carrying the `bit64::integer64` class attribute. +#' Success or failure of this operation is unaffected by any spatial or +#' attribute filters that may be in effect. +#' The `RandomRead` element in the list returned by `$testCapability()` can +#' be checked to establish if this layer supports efficient random access +#' reading; however, the call should always work if the feature exists since a +#' fallback implementation just scans all the features in the layer looking for +#' the desired feature. Returns a list with the unique feature identifier (FID), +#' the attribute and geometry field names, and their values, or `NULL` on +#' failure. Note that sequential reads (with `$getNextFeature()`) are generally +#' considered interrupted by a call to `$getFeature()`. +#' +#' \code{$resetReading()}\cr +#' Reset feature reading to start on the first feature. No return value, called +#' for that side effect. +#' +#' \code{$fetch(n)}\cr +#' Fetches the next `n` features from the layer and returns them as a data +#' frame. This allows retrieving the entire set of features, one page of +#' features at a time, or the remaining features (from the current cursor +#' position). Returns a data frame with as many rows as features were fetched, +#' and as many columns as attribute plus geometry fields in the result set, +#' even if the result is a single value or has one or zero rows. +#' +#' This method is an analog of +#' [`DBI::dbFetch()`](https://dbi.r-dbi.org/reference/dbFetch.html). +#' +#' The `n` argument is the maximum number of features to retrieve per fetch +#' given as `integer` or `numeric` but assumed to be a whole number (will +#' be truncated). Use `n = -1` or `n = Inf` to retrieve all pending features +#' (resets reading to the first feature). +#' Otherwise, `$fetch()` can be called multiple times to perform forward paging +#' from the current cursor position. Passing `n = NA` is also supported and +#' returns the remaining features. +#' Fetching zero features is possible to retrieve the structure of the feature +#' set as a data frame (columns fully typed). +#' +#' OGR field types are returned as the following R types (`NA` for OGR NULL +#' values): +#' * `OFTInteger`: `integer` +#' * `OFTInteger` subtype `OFSTBoolean`: `logical` +#' * `OFTIntegerList`: vector of `integer` (list column) +#' * `OFTInteger64`: `bit64::integer64` +#' * `OFTInteger64` subtype `OFSTBoolean`: `logical` +#' * `OFTInteger64List`: vector of `bit64::integer64` (list column) +#' * `OFTReal`: `numeric` +#' * `OFTRealList`: vector of `numeric` (list column) +#' * `OFTString`: `character` string +#' * `OFTStringList`: vector of `character` strings (list column) +#' * `OFTDate`: `Date` +#' * `OFTDateTime`: `POSIXct` (millisecond accuracy and adjustment for time zone +#' flag if present) +#' * `OFTBinary`: `raw` vector (list column, `NULL` entries for OGR NULL values) +#' +#' Geomtries are not returned if the field `returnGeomAs` is set to `NONE` +#' (currently the default). Omitting the geometries may be beneficial for +#' performance and memory usage when access only to feature attributes is +#' needed. Geometries are returned as `raw` vectors in a data frame list column +#' when `returnGeomAs` is set to `WKB` or `WKB_ISO`. Otherwise, geometries are +#' returned as `character` strings when `returnGeomAs` is set to one of `WKT`, +#' `WKT_ISO` or `TYPE_NAME`. +#' +#' Note that `$getFeatureCount()` is called internally when fetching the full +#' feature set or all remaining features (but not for a page of features). +#' +#' \code{$close()}\cr +#' Closes the vector dataset (no return value, called for side effects). +#' Calling \code{$close()} results in proper cleanup, and flushing of any +#' pending writes. +#' The `GDALVector` object is still available after calling \code{$close()}. +#' The layer can be re-opened on the existing \code{dsn} with +#' \code{$open(read_only = {TRUE|FALSE})}. +#' +#' @seealso +#' [ogr_define], [ogr_manage], [ogr2ogr()], [ogrinfo()] +#' +#' GDAL vector format descriptions:\cr +#' \url{https://gdal.org/drivers/vector/index.html} +#' +#' GDAL-supported SQL dialects:\cr +#' \url{https://gdal.org/user/ogr_sql_sqlite_dialect.html}) +#' +#' @examples +#' # MTBS fire perimeters in Yellowstone National Park 1984-2022 +#' f <- system.file("extdata/ynp_fires_1984_2022.gpkg", package = "gdalraster") +#' +#' # copy to a temporary file that is writeable +#' dsn <- file.path(tempdir(), basename(f)) +#' file.copy(f, dsn) +#' +#' lyr <- new(GDALVector, dsn, "mtbs_perims") +#' +#' # object of class GDALVector +#' lyr +#' str(lyr) +#' +#' # dataset info +#' lyr$getDriverShortName() +#' lyr$getDriverLongName() +#' lyr$getFileList() +#' +#' # layer info +#' lyr$getName() +#' lyr$getGeomType() +#' lyr$getGeometryColumn() +#' lyr$getFIDColumn() +#' lyr$getSpatialRef() +#' lyr$bbox() +#' +#' # layer capabilities +#' lyr$testCapability() +#' +#' # re-open with write access +#' lyr$open(read_only = FALSE) +#' lyr$testCapability()$SequentialWrite +#' lyr$testCapability()$RandomWrite +#' +#' # feature class definition - a list of field names and their definitions +#' defn <- lyr$getLayerDefn() +#' names(defn) +#' str(defn) +#' +#' # default value of the read/write field 'returnGeomAs' +#' print(lyr$returnGeomAs) +#' +#' lyr$getFeatureCount() +#' +#' # sequential read cursor +#' feat <- lyr$getNextFeature() +#' # a list of field names and their values +#' str(feat) +#' +#' # set an attribute filter +#' lyr$setAttributeFilter("ig_year = 2020") +#' lyr$getFeatureCount() +#' +#' feat <- lyr$getNextFeature() +#' str(feat) +#' +#' # NULL when no more features are available +#' feat <- lyr$getNextFeature() +#' str(feat) +#' +#' # reset reading to the start and return geometries as WKT +#' lyr$resetReading() +#' lyr$returnGeomAs <- "WKT" +#' feat <- lyr$getNextFeature() +#' str(feat) +#' +#' # clear the attribute filter +#' lyr$setAttributeFilter("") +#' lyr$getFeatureCount() +#' +#' # set a spatial filter +#' # get the bounding box of the largest 1988 fire and use as spatial filter +#' # first set a temporary attribute filter to do the lookup +#' lyr$setAttributeFilter("ig_year = 1988 ORDER BY burn_bnd_ac DESC") +#' feat <- lyr$getNextFeature() +#' str(feat) +#' +#' bbox <- bbox_from_wkt(feat$geom) +#' print(bbox) +#' +#' # set spatial filter on the full layer +#' lyr$setAttributeFilter("") +#' lyr$setSpatialFilterRect(bbox) +#' lyr$getFeatureCount() +#' +#' # fetch in chunks and return as data frame +#' d <- lyr$fetch(20) +#' str(d) +#' +#' # the next chunk +#' d <- lyr$fetch(20) +#' nrow(d) +#' +#' # no features remaining +#' d <- lyr$fetch(20) +#' nrow(d) +#' str(d) # 0-row data frame with columns typed +#' +#' # fetch all pending features with geometries as WKB +#' lyr$returnGeomAs <- "WKB" +#' d <- lyr$fetch(-1) # resets reading to the first feature +#' str(d) +#' +#' # parse WKB using package wk +#' wk_obj <- wk::wkb(d$geom, crs = lyr$getSpatialRef()) +#' plot(wk_obj) +#' +#' lyr$clearSpatialFilter() +#' lyr$getFeatureCount() +#' +#' lyr$close() +#' unlink(dsn) +NULL + +Rcpp::loadModule("mod_GDALVector", TRUE) diff --git a/_pkgdown.yml b/_pkgdown.yml index 259e6e2b7..253b36328 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -21,8 +21,9 @@ reference: - title: Exposed C++ classes - contents: - - CmbTable-class - GDALRaster-class + - GDALVector-class + - CmbTable-class - RunningStats-class - VSIFile-class diff --git a/man/GDALVector-class.Rd b/man/GDALVector-class.Rd new file mode 100644 index 000000000..9b47e1629 --- /dev/null +++ b/man/GDALVector-class.Rd @@ -0,0 +1,490 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gdalvector.R +\name{GDALVector-class} +\alias{GDALVector-class} +\alias{Rcpp_GDALVector} +\alias{Rcpp_GDALVector-class} +\alias{GDALVector} +\title{Class encapsulating a vector layer in a GDAL dataset} +\arguments{ +\item{dsn}{Character string containing the data source name (DSN), usually a +filename or database connection string.} + +\item{layer}{Character string containing the name of a layer within the +data source. May also be given as an SQL SELECT statement to be executed +against the data source, defining a layer as the result set.} + +\item{read_only}{Logical scalar. \code{TRUE} to open the layer read-only (the +default), or \code{FALSE} to open with write access.} + +\item{open_options}{Optional character vector of \code{NAME=VALUE} pairs +specifying dataset open options.} + +\item{spatial_filter}{Optional character string containing a geometry in +Well Known Text (WKT) format which represents a spatial filter.} + +\item{dialect}{Optional character string to control the statement dialect +when SQL is used to define the layer. By default, the OGR SQL engine will +be used, except for RDBMS drivers that will use their dedicated SQL engine, +unless \code{"OGRSQL"} is explicitly passed as the dialect. The \code{"SQLITE"} +dialect can also be used.} +} +\value{ +An object of class \code{GDALVector} which contains pointers to the +opened layer and the dataset that contains it, and methods that operate on +the layer as described in Details. \code{GDALVector} is a C++ class exposed +directly to R (via \code{RCPP_EXPOSED_CLASS}). Fields and methods of the class +are accessed using the \code{$} operator. The read/write fields are per-object +settings which can be changed as needed during the lifetime of the object. +} +\description{ +\code{GDALVector} provides an interface for accessing a vector layer in a GDAL +dataset and calling methods on the underlying \code{OGRLayer} object. +An object of class \code{GDALVector} persists an open connection to the dataset, +and exposes methods for retrieving layer information, setting attribute and +spatial filters, and reading/writing feature data. +See \url{https://gdal.org/api/index.html} for details of the GDAL +Vector API. + +\strong{Class \code{GDALVector} is currently under development}. An initial +implemetation supporting read access was added in gdalraster 1.11.1.9100. +A working document with draft specifications is available at:\cr +\url{https://usdaforestservice.github.io/gdalraster/articles/gdalvector-draft.html}\cr +and discussion thread/status updates at:\cr +\url{https://github.com/USDAForestService/gdalraster/issues/241}. +} +\section{Usage (see Details)}{ + +\preformatted{ +## Constructors +# for single-layer file formats such as shapefile +lyr <- new(GDALVector, dsn) +# specifying the layer name, or SQL statement defining the layer +lyr <- new(GDALVector, dsn, layer) +# for update access +lyr <- new(GDALVector, dsn, layer, read_only = FALSE) +# using dataset open options +lyr <- new(GDALVector, dsn, layer, read_only, open_options) +# setting a spatial filter and/or specifying the SQL dialect +lyr <- new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect) + +## Read/write fields +lyr$defaultGeomFldName +lyr$returnGeomAs +lyr$wkbByteOrder + +## Methods +lyr$open(read_only) +lyr$isOpen() +lyr$getDsn() +lyr$getFileList() +lyr$getDriverShortName() +lyr$getDriverLongName() + +lyr$getName() +lyr$testCapability() +lyr$getFIDColumn() +lyr$getGeomType() +lyr$getGeometryColumn() +lyr$getSpatialRef() +lyr$bbox() +lyr$getLayerDefn() + +lyr$setAttributeFilter(query) +lyr$setSpatialFilterRect(bbox) +lyr$clearSpatialFilter() + +lyr$getFeatureCount() +lyr$getNextFeature() +lyr$getFeature(fid) +lyr$resetReading() +lyr$fetch(n) + +lyr$close() +} +} + +\section{Details}{ + +\subsection{Constructors}{ + +\code{new(GDALVector, dsn)}\cr +The first layer by index is assumed if the \code{layer} argument is omitted, so +this form of the constructor might be used for single-layer formats like +shapefile. + +\code{new(GDALVector, dsn, layer)}\cr +Constructor specifying the name of a layer to open. The \code{layer} argument +may also be given as an SQL SELECT statement to define a layer as the result +set. + +\code{new(GDALVector, dsn, layer, read_only)}\cr +Constructor specifying read/write access (\verb{read_only = \{TRUE|FALSE\})}. +The \code{layer} argument is required in this form of the constructor, but may be +given as empty string (\code{""}), in which case the first layer by index will be +assumed. + +\code{new(GDALVector, dsn, layer, read_only, open_options)}\cr +Constructor specifying dataset open options as a character vector of +\code{NAME=VALUE} pairs. + +\code{new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect))}\cr +Constructor to specify a spatial filter and/or SQL dialect. All arguments +are required in this form of the constructor, but \code{open_options} may be +\code{NULL}, and \code{spatial_filter} or \code{dialect} may be an empty string (\code{""}). +} + +\subsection{Read/write fields}{ + +\code{$defaultGeomFldName}\cr +Character string specifying a name to use for returned columns when the +geometry column name in the source layer is empty, like with shapefiles etc. +Defaults to \code{"geometry"}. + +\code{$returnGeomAs}\cr +Character string specifying the return format of feature geometries. +Must be one of \code{WKT}, \code{WKT_ISO}, \code{WKB}, \code{WKB_ISO}, \code{TYPE_NAME} or +\code{NONE} (the default). \code{WKB}/\code{WKT} export old-style 99-402 extended +dimension (Z) types for Point, LineString, Polygon, MultiPoint, +MultiLineString, MultiPolygon and GeometryCollection. For other geometry +types, it is equivalent to using \code{WKB_ISO}/\code{WKT_ISO} +(see \url{https://libgeos.org/specifications/wkb/}). + +\code{$wkbByteOrder}\cr +Character string specifying the byte order for WKB geometries. +Must be either \code{LSB} (Least Significant Byte first, the default) or +\code{MSB} (Most Significant Byte first). +} + +\subsection{Methods}{ + +\code{$open(read_only)}\cr +(Re-)opens the vector layer on the existing DSN. Use this method to +open a layer that has been closed using \code{$close()}. May be used to +re-open a layer with a different read/write access (\code{read_only} set to +\code{TRUE} or \code{FALSE}). The method will first close an open dataset, so it is +not required to call \code{$close()} explicitly in this case. +No return value, called for side effects. + +\code{$isOpen()}\cr +Returns a \code{logical} scalar indicating whether the vector dataset is open. + +\code{$getDsn()}\cr +Returns a character string containing the \code{dsn} associated with this +\code{GDALVector} object (\code{dsn} originally used to open the layer). + +\code{$getFileList()}\cr +Returns a character vector of files believed to be part of the data source. +If it returns an empty string (\code{""}) it means there is believed to be no +local file system files associated with the dataset (e.g., a virtual file +system). The returned filenames will normally be relative or absolute +paths depending on the path used to originally open the dataset. + +\code{$getDriverShortName()}\cr +Returns the short name of the vector format driver. + +\code{$getDriverLongName()}\cr +Returns the long name of the vector format driver. + +\code{$getName()}\cr +Returns the layer name. + +\code{$testCapability()}\cr +Tests whether the layer supports named capabilities based on the current +read/write access. Returns a list of capabilities with values \code{TRUE} or +\code{FALSE}. The returned list contains the following named elements: +\code{RandomRead}, \code{SequentialWrite}, \code{RandomWrite}, \code{UpsertFeature}, +\code{FastSpatialFilter}, \code{FastFeatureCount}, \code{FastGetExtent}, +\code{FastSetNextByIndex}, \code{CreateField}, \code{CreateGeomField}, \code{DeleteField}, +\code{ReorderFields}, \code{AlterFieldDefn}, \code{AlterGeomFieldDefn}, \code{DeleteFeature}, +\code{StringsAsUTF8}, \code{Transactions}, \code{CurveGeometries}. +(See the GDAL documentation for +\href{https://gdal.org/api/vector_c_api.html#_CPPv420OGR_L_TestCapability9OGRLayerHPKc}{\code{OGR_L_TestCapability()}}.) + +\code{$getFIDColumn()}\cr +Returns the name of the underlying database column being used as the FID +column, or empty string (\code{""}) if not supported. + +\code{$getGeomType()}\cr +Returns the well known name of the layer geometry type as character string. +For layers with multiple geometry fields, this method only returns the +geometry type of the first geometry column. For other columns, use +\verb{$getLayerDefn()}. For layers without any geometry field, this method +returns \code{"NONE"}. + +\code{$getGeometryColumn()}\cr +Returns he name of the underlying database column being used as the geometry +column, or an empty string (\code{""}) if not supported. +For layers with multiple geometry fields, this method only returns the +name of the first geometry column. For other columns, use \verb{$getLayerDefn()}. + +\code{$getSpatialRef()}\cr +Returns a WKT string containing the spatial reference system for this layer. + +\code{$bbox()}\cr +Returns a numeric vector of length four containing the bounding box +for this layer (xmin, ymin, xmax, ymax). Note that \code{bForce = true} is set in +the underlying API call to \code{OGR_L_GetExtent()}, so the entire layer may be +scanned to compute a minimum bounding rectangle (see \code{FastGetExtent} in the +list returned by \verb{$testCapability()}). Depending on the format driver, a +spatial filter may or may not be taken into account, so it is safer to call +\verb{$bbox()} without setting a spatial filter. + +\code{$getLayerDefn()}\cr +Returns a list containing the OGR feature class definition for this layer +(a.k.a. layer definition). The list contains zero or more attribute field +definitions, along with one or more geometry field definitions. +See \link{ogr_define} for details of the field and feature class definitions. + +\code{$setAttributeFilter(query)}\cr +Sets an attribute query string to be used when fetching features via the +\verb{$getNextFeature()} or \verb{$fetch()} methods. +Only features for which \code{query} evaluates as true will be returned. +The query string should be in the format of an SQL WHERE clause, described +in the \href{https://gdal.org/user/ogr_sql_dialect.html#where}{"WHERE"} +section of the OGR SQL dialect documentation (e.g., +\code{"population > 1000000 and population < 5000000"}, where \code{population} is an +attribute in the layer). +In some cases (RDBMS backed drivers, SQLite, GeoPackage) the native +capabilities of the database may be used to to interpret the WHERE clause, +in which case the capabilities will be broader than those of OGR SQL. +Note that installing a query string will generally result in resetting the +current reading position (as with \verb{$resetReading()} decribed below). +The \code{query} parameter may be set to empty string (\code{""}) to clear the current +attribute filter. + +\code{$setSpatialFilterRect(bbox)}\cr +Sets a new rectangular spatial filter. This method sets a rectangle to be +used as a spatial filter when fetching features via the \verb{$getNextFeature()} +or \verb{$fetch()} methods. Only features that geometrically intersect the given +rectangle will be returned. +\code{bbox} is a numeric vector of length four containing xmin, ymin, xmax, ymax +in the same coordinate system as the layer as a whole (as returned by +\verb{$getSpatialRef()}). + +\code{$clearSpatialFilter()}\cr +Clears a spatial filter that was set with \verb{$setSpatialFilterRect()}. +No return value, called for that side effect. + +\code{$getFeatureCount()}\cr +Returns the number of features in the layer. For dynamic databases the count +may not be exact. This method forces a count in the underlying API call +(i.e., \code{bForce = TRUE} in the call to \code{OGR_L_GetFeatureCount()}). Note that +some vector drivers will actually scan the entire layer once to count +features. The \code{FastFeatureCount} element in the list returned by +\verb{$testCapability()} can be checked if this might be a concern. +The number of features returned takes into account the spatial and/or +attribute filters. Some driver implementations of this method may alter the +read cursor of the layer. + +\code{$getNextFeature()}\cr +Fetch the next available feature from this layer. Only features matching the +current spatial and/or attribute filter (if defined) will be returned. +This method implements sequential access to the features of a layer. +The \verb{$resetReading()} method can be used to start at the beginning again. +Returns a list with the unique feature identifier (FID), the attribute and +geometry field names, and their values. \code{NULL} is returned if no more +features are available. + +\code{$getFeature(fid)}\cr +Returns a feature by its identifier. The value of \code{fid} must be a numeric +scalar, optionally carrying the \code{bit64::integer64} class attribute. +Success or failure of this operation is unaffected by any spatial or +attribute filters that may be in effect. +The \code{RandomRead} element in the list returned by \verb{$testCapability()} can +be checked to establish if this layer supports efficient random access +reading; however, the call should always work if the feature exists since a +fallback implementation just scans all the features in the layer looking for +the desired feature. Returns a list with the unique feature identifier (FID), +the attribute and geometry field names, and their values, or \code{NULL} on +failure. Note that sequential reads (with \verb{$getNextFeature()}) are generally +considered interrupted by a call to \verb{$getFeature()}. + +\code{$resetReading()}\cr +Reset feature reading to start on the first feature. No return value, called +for that side effect. + +\code{$fetch(n)}\cr +Fetches the next \code{n} features from the layer and returns them as a data +frame. This allows retrieving the entire set of features, one page of +features at a time, or the remaining features (from the current cursor +position). Returns a data frame with as many rows as features were fetched, +and as many columns as attribute plus geometry fields in the result set, +even if the result is a single value or has one or zero rows. + +This method is an analog of +\href{https://dbi.r-dbi.org/reference/dbFetch.html}{\code{DBI::dbFetch()}}. + +The \code{n} argument is the maximum number of features to retrieve per fetch +given as \code{integer} or \code{numeric} but assumed to be a whole number (will +be truncated). Use \code{n = -1} or \code{n = Inf} to retrieve all pending features +(resets reading to the first feature). +Otherwise, \verb{$fetch()} can be called multiple times to perform forward paging +from the current cursor position. Passing \code{n = NA} is also supported and +returns the remaining features. +Fetching zero features is possible to retrieve the structure of the feature +set as a data frame (columns fully typed). + +OGR field types are returned as the following R types (\code{NA} for OGR NULL +values): +\itemize{ +\item \code{OFTInteger}: \code{integer} +\item \code{OFTInteger} subtype \code{OFSTBoolean}: \code{logical} +\item \code{OFTIntegerList}: vector of \code{integer} (list column) +\item \code{OFTInteger64}: \code{bit64::integer64} +\item \code{OFTInteger64} subtype \code{OFSTBoolean}: \code{logical} +\item \code{OFTInteger64List}: vector of \code{bit64::integer64} (list column) +\item \code{OFTReal}: \code{numeric} +\item \code{OFTRealList}: vector of \code{numeric} (list column) +\item \code{OFTString}: \code{character} string +\item \code{OFTStringList}: vector of \code{character} strings (list column) +\item \code{OFTDate}: \code{Date} +\item \code{OFTDateTime}: \code{POSIXct} (millisecond accuracy and adjustment for time zone +flag if present) +\item \code{OFTBinary}: \code{raw} vector (list column, \code{NULL} entries for OGR NULL values) +} + +Geomtries are not returned if the field \code{returnGeomAs} is set to \code{NONE} +(currently the default). Omitting the geometries may be beneficial for +performance and memory usage when access only to feature attributes is +needed. Geometries are returned as \code{raw} vectors in a data frame list column +when \code{returnGeomAs} is set to \code{WKB} or \code{WKB_ISO}. Otherwise, geometries are +returned as \code{character} strings when \code{returnGeomAs} is set to one of \code{WKT}, +\code{WKT_ISO} or \code{TYPE_NAME}. + +Note that \verb{$getFeatureCount()} is called internally when fetching the full +feature set or all remaining features (but not for a page of features). + +\code{$close()}\cr +Closes the vector dataset (no return value, called for side effects). +Calling \code{$close()} results in proper cleanup, and flushing of any +pending writes. +The \code{GDALVector} object is still available after calling \code{$close()}. +The layer can be re-opened on the existing \code{dsn} with +\code{$open(read_only = {TRUE|FALSE})}. +} +} + +\examples{ +# MTBS fire perimeters in Yellowstone National Park 1984-2022 +f <- system.file("extdata/ynp_fires_1984_2022.gpkg", package = "gdalraster") + +# copy to a temporary file that is writeable +dsn <- file.path(tempdir(), basename(f)) +file.copy(f, dsn) + +lyr <- new(GDALVector, dsn, "mtbs_perims") + +# object of class GDALVector +lyr +str(lyr) + +# dataset info +lyr$getDriverShortName() +lyr$getDriverLongName() +lyr$getFileList() + +# layer info +lyr$getName() +lyr$getGeomType() +lyr$getGeometryColumn() +lyr$getFIDColumn() +lyr$getSpatialRef() +lyr$bbox() + +# layer capabilities +lyr$testCapability() + +# re-open with write access +lyr$open(read_only = FALSE) +lyr$testCapability()$SequentialWrite +lyr$testCapability()$RandomWrite + +# feature class definition - a list of field names and their definitions +defn <- lyr$getLayerDefn() +names(defn) +str(defn) + +# default value of the read/write field 'returnGeomAs' +print(lyr$returnGeomAs) + +lyr$getFeatureCount() + +# sequential read cursor +feat <- lyr$getNextFeature() +# a list of field names and their values +str(feat) + +# set an attribute filter +lyr$setAttributeFilter("ig_year = 2020") +lyr$getFeatureCount() + +feat <- lyr$getNextFeature() +str(feat) + +# NULL when no more features are available +feat <- lyr$getNextFeature() +str(feat) + +# reset reading to the start and return geometries as WKT +lyr$resetReading() +lyr$returnGeomAs <- "WKT" +feat <- lyr$getNextFeature() +str(feat) + +# clear the attribute filter +lyr$setAttributeFilter("") +lyr$getFeatureCount() + +# set a spatial filter +# get the bounding box of the largest 1988 fire and use as spatial filter +# first set a temporary attribute filter to do the lookup +lyr$setAttributeFilter("ig_year = 1988 ORDER BY burn_bnd_ac DESC") +feat <- lyr$getNextFeature() +str(feat) + +bbox <- bbox_from_wkt(feat$geom) +print(bbox) + +# set spatial filter on the full layer +lyr$setAttributeFilter("") +lyr$setSpatialFilterRect(bbox) +lyr$getFeatureCount() + +# fetch in chunks and return as data frame +d <- lyr$fetch(20) +str(d) + +# the next chunk +d <- lyr$fetch(20) +nrow(d) + +# no features remaining +d <- lyr$fetch(20) +nrow(d) +str(d) # 0-row data frame with columns typed + +# fetch all pending features with geometries as WKB +lyr$returnGeomAs <- "WKB" +d <- lyr$fetch(-1) # resets reading to the first feature +str(d) + +# parse WKB using package wk +wk_obj <- wk::wkb(d$geom, crs = lyr$getSpatialRef()) +plot(wk_obj) + +lyr$clearSpatialFilter() +lyr$getFeatureCount() + +lyr$close() +unlink(dsn) +} +\seealso{ +\link{ogr_define}, \link{ogr_manage}, \code{\link[=ogr2ogr]{ogr2ogr()}}, \code{\link[=ogrinfo]{ogrinfo()}} + +GDAL vector format descriptions:\cr +\url{https://gdal.org/drivers/vector/index.html} + +GDAL-supported SQL dialects:\cr +\url{https://gdal.org/user/ogr_sql_sqlite_dialect.html}) +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 936a85f87..2945b6f7f 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -1502,6 +1502,7 @@ END_RCPP RcppExport SEXP _rcpp_module_boot_mod_cmb_table(); RcppExport SEXP _rcpp_module_boot_mod_GDALRaster(); +RcppExport SEXP _rcpp_module_boot_mod_GDALVector(); RcppExport SEXP _rcpp_module_boot_mod_running_stats(); RcppExport SEXP _rcpp_module_boot_mod_VSIFile(); @@ -1627,6 +1628,7 @@ static const R_CallMethodDef CallEntries[] = { {"_gdalraster_bbox_to_wkt", (DL_FUNC) &_gdalraster_bbox_to_wkt, 3}, {"_rcpp_module_boot_mod_cmb_table", (DL_FUNC) &_rcpp_module_boot_mod_cmb_table, 0}, {"_rcpp_module_boot_mod_GDALRaster", (DL_FUNC) &_rcpp_module_boot_mod_GDALRaster, 0}, + {"_rcpp_module_boot_mod_GDALVector", (DL_FUNC) &_rcpp_module_boot_mod_GDALVector, 0}, {"_rcpp_module_boot_mod_running_stats", (DL_FUNC) &_rcpp_module_boot_mod_running_stats, 0}, {"_rcpp_module_boot_mod_VSIFile", (DL_FUNC) &_rcpp_module_boot_mod_VSIFile, 0}, {NULL, NULL, 0} diff --git a/src/gdalvector.cpp b/src/gdalvector.cpp new file mode 100644 index 000000000..bb63f29d4 --- /dev/null +++ b/src/gdalvector.cpp @@ -0,0 +1,1340 @@ +/* Implementation of class GDALVector. Encapsulates an OGRLayer and its + GDALDataset. Requires {bit64} on the R side for its integer64 S3 type. + + Chris Toney + Copyright (c) 2023-2024 gdalraster authors +*/ + +#include +#include +#include + +#include "gdal.h" +#include "cpl_port.h" +#include "cpl_string.h" +#include "cpl_time.h" +#include "ogr_srs_api.h" + +#include "gdalraster.h" +#include "gdalvector.h" +#include "ogr_util.h" + +GDALVector::GDALVector() : + m_dsn(""), + m_layer_name(""), + m_is_sql(false), + m_open_options(Rcpp::CharacterVector::create()), + m_spatial_filter(""), + m_dialect(""), + m_hDataset(nullptr), + m_eAccess(GA_ReadOnly), + m_hLayer(nullptr) {} + +GDALVector::GDALVector(Rcpp::CharacterVector dsn) : + GDALVector(dsn, "", true, Rcpp::CharacterVector::create(), + "", "") {} + +GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer) : + GDALVector(dsn, layer, true, Rcpp::CharacterVector::create(), + "", "") {} + +GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, + bool read_only) : + + GDALVector(dsn, layer, read_only, Rcpp::CharacterVector::create(), + "", "") {} + +GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, + bool read_only, Rcpp::CharacterVector open_options) : + + GDALVector(dsn, layer, read_only, open_options, "", "") {} + +GDALVector::GDALVector(Rcpp::CharacterVector dsn, std::string layer, + bool read_only, + Rcpp::Nullable open_options, + std::string spatial_filter, std::string dialect = "") : + + m_layer_name(layer), + m_open_options(open_options.isNotNull() ? open_options : + Rcpp::CharacterVector::create()), + m_spatial_filter(spatial_filter), + m_dialect(dialect), + m_hDataset(nullptr), + m_eAccess(GA_ReadOnly), + m_hLayer(nullptr) { + + m_dsn = Rcpp::as(check_gdal_filename(dsn)); + open(read_only); +} + +void GDALVector::open(bool read_only) { + if (m_dsn == "") + Rcpp::stop("DSN is not set"); + + if (m_hDataset != nullptr) { + if (m_is_sql) + GDALDatasetReleaseResultSet(m_hDataset, m_hLayer); + GDALReleaseDataset(m_hDataset); + m_hDataset = nullptr; + m_hLayer = nullptr; + } + + if (read_only) + m_eAccess = GA_ReadOnly; + else + m_eAccess = GA_Update; + + std::vector dsoo(m_open_options.size() + 1); + if (m_open_options.size() > 0) { + for (R_xlen_t i = 0; i < m_open_options.size(); ++i) { + dsoo[i] = (char *) (m_open_options[i]); + } + } + dsoo[m_open_options.size()] = nullptr; + + OGRGeometryH hGeom_filter = nullptr; + if (m_spatial_filter != "") { + char* pszWKT = (char*) m_spatial_filter.c_str(); + if (OGR_G_CreateFromWkt(&pszWKT, nullptr, &hGeom_filter) != + OGRERR_NONE) { + if (hGeom_filter != nullptr) + OGR_G_DestroyGeometry(hGeom_filter); + Rcpp::stop("failed to create geometry from 'spatial_filter'"); + } + } + + unsigned int nOpenFlags = GDAL_OF_VECTOR; + if (read_only) + nOpenFlags |= GDAL_OF_READONLY; + else + nOpenFlags |= GDAL_OF_UPDATE; + + m_hDataset = GDALOpenEx(m_dsn.c_str(), nOpenFlags, nullptr, + dsoo.data(), nullptr); + if (m_hDataset == nullptr) + Rcpp::stop("open dataset failed"); + + const char* pszDialect = m_dialect.c_str(); + + if (m_layer_name == "") { + m_is_sql = false; + m_hLayer = GDALDatasetGetLayer(m_hDataset, 0); + } + else if (STARTS_WITH_CI(m_layer_name.c_str(), "SELECT ")) { + m_is_sql = true; + if (EQUAL(pszDialect, "SQLite") && !has_spatialite()) + Rcpp::warning("SpatiaLite not available"); + m_hLayer = GDALDatasetExecuteSQL(m_hDataset, m_layer_name.c_str(), + hGeom_filter, pszDialect); + } + else { + m_is_sql = false; + m_hLayer = GDALDatasetGetLayerByName(m_hDataset, m_layer_name.c_str()); + } + + if (m_hLayer == nullptr) { + GDALReleaseDataset(m_hDataset); + Rcpp::stop("failed to get layer"); + } + else { + OGR_L_ResetReading(m_hLayer); + } + + if (hGeom_filter != nullptr) + OGR_G_DestroyGeometry(hGeom_filter); +} + +bool GDALVector::isOpen() const { + if (m_hDataset == nullptr) + return false; + else + return true; +} + +std::string GDALVector::getDsn() const { + return m_dsn; +} + +Rcpp::CharacterVector GDALVector::getFileList() const { + checkAccess_(GA_ReadOnly); + + char **papszFiles; + papszFiles = GDALGetFileList(m_hDataset); + + int items = CSLCount(papszFiles); + if (items > 0) { + Rcpp::CharacterVector files(items); + for (int i=0; i < items; ++i) { + files(i) = papszFiles[i]; + } + CSLDestroy(papszFiles); + return files; + } + else { + CSLDestroy(papszFiles); + return ""; + } +} + +std::string GDALVector::getDriverShortName() const { + checkAccess_(GA_ReadOnly); + + GDALDriverH hDriver = GDALGetDatasetDriver(m_hDataset); + return GDALGetDriverShortName(hDriver); +} + +std::string GDALVector::getDriverLongName() const { + checkAccess_(GA_ReadOnly); + + GDALDriverH hDriver = GDALGetDatasetDriver(m_hDataset); + return GDALGetDriverLongName(hDriver); +} + +std::string GDALVector::getName() const { + checkAccess_(GA_ReadOnly); + + return OGR_L_GetName(m_hLayer); +} + +Rcpp::List GDALVector::testCapability() const { + checkAccess_(GA_ReadOnly); + + Rcpp::List capabilities = Rcpp::List::create( + Rcpp::Named("RandomRead") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCRandomRead)), + Rcpp::Named("SequentialWrite") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCSequentialWrite)), + Rcpp::Named("RandomWrite") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCRandomWrite)), +#if GDAL_VERSION_NUM >= 3060000 + Rcpp::Named("UpsertFeature") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCUpsertFeature)), +#endif + Rcpp::Named("FastSpatialFilter") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCFastSpatialFilter)), + Rcpp::Named("FastFeatureCount") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCFastFeatureCount)), + Rcpp::Named("FastGetExtent") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCFastGetExtent)), + Rcpp::Named("FastSetNextByIndex") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCFastSetNextByIndex)), + Rcpp::Named("CreateField") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCCreateField)), + Rcpp::Named("CreateGeomField") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCCreateGeomField)), + Rcpp::Named("DeleteField") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCDeleteField)), + Rcpp::Named("ReorderFields") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCReorderFields)), + Rcpp::Named("AlterFieldDefn") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCAlterFieldDefn)), +#if GDAL_VERSION_NUM >= 3060000 + Rcpp::Named("AlterGeomFieldDefn") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCAlterGeomFieldDefn)), +#endif + Rcpp::Named("DeleteFeature") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCDeleteFeature)), + Rcpp::Named("StringsAsUTF8") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCStringsAsUTF8)), + Rcpp::Named("Transactions") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCTransactions)), + Rcpp::Named("CurveGeometries") = static_cast( + OGR_L_TestCapability(m_hLayer, OLCCurveGeometries))); + + return capabilities; +} + +std::string GDALVector::getFIDColumn() const { + checkAccess_(GA_ReadOnly); + + return OGR_L_GetFIDColumn(m_hLayer); +} + +std::string GDALVector::getGeomType() const { + checkAccess_(GA_ReadOnly); + + OGRwkbGeometryType eType = OGR_L_GetGeomType(m_hLayer); + return getWkbGeomString_(eType); +} + +std::string GDALVector::getGeometryColumn() const { + checkAccess_(GA_ReadOnly); + + return OGR_L_GetGeometryColumn(m_hLayer); +} + +std::string GDALVector::getSpatialRef() const { + // OGRLayer::GetSpatialRef() as WKT string + checkAccess_(GA_ReadOnly); + + OGRSpatialReferenceH hSRS = OGR_L_GetSpatialRef(m_hLayer); + if (hSRS == nullptr) + Rcpp::stop("could not obtain spatial reference"); + char *pszSRS_WKT = nullptr; + if (OSRExportToWkt(hSRS, &pszSRS_WKT) != OGRERR_NONE) + Rcpp::stop("error exporting SRS to WKT"); + std::string srs_wkt(pszSRS_WKT); + CPLFree(pszSRS_WKT); + + return srs_wkt; +} + +Rcpp::NumericVector GDALVector::bbox() { + // Note: bForce = true in the call to OGR_L_GetExtent(), so the entire + // layer may be scanned to compute MBR. + // see: testCapability("FastGetExtent") + // Depending on the driver, a spatial filter may/may not be taken into + // account. So it is safer to call bbox() without setting a spatial filter. + checkAccess_(GA_ReadOnly); + + OGREnvelope envelope; + if (OGR_L_GetExtent(m_hLayer, &envelope, true) != OGRERR_NONE) + Rcpp::stop("the extent of the layer cannot be determined"); + + Rcpp::NumericVector bbox_out = + {envelope.MinX, envelope.MinY, envelope.MaxX, envelope.MaxY}; + + return bbox_out; +} + +Rcpp::List GDALVector::getLayerDefn() const { + checkAccess_(GA_ReadOnly); + + OGRFeatureDefnH hFDefn; + hFDefn = OGR_L_GetLayerDefn(m_hLayer); + if (hFDefn == nullptr) + Rcpp::stop("failed to get layer definition"); + + Rcpp::List list_out = Rcpp::List::create(); + bool bValue; + + // attribute fields + // TODO(ctoney): add field domain name + for (int iField = 0; iField < OGR_FD_GetFieldCount(hFDefn); ++iField) { + Rcpp::List list_fld_defn = Rcpp::List::create(); + OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, iField); + if (hFieldDefn == nullptr) + Rcpp::stop("could not obtain field definition"); + + OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); + list_fld_defn.push_back(getOFTString_(fld_type), "type"); + + OGRFieldSubType fld_subtype = OGR_Fld_GetSubType(hFieldDefn); + list_fld_defn.push_back(getOFTSubtypeString_(fld_subtype), "subtype"); + + list_fld_defn.push_back(OGR_Fld_GetWidth(hFieldDefn), "width"); + + list_fld_defn.push_back(OGR_Fld_GetPrecision(hFieldDefn), "precision"); + + bValue = OGR_Fld_IsNullable(hFieldDefn); + list_fld_defn.push_back(bValue, "is_nullable"); + + bValue = OGR_Fld_IsUnique(hFieldDefn); + list_fld_defn.push_back(bValue, "is_unique"); + + std::string sValue = ""; + if (OGR_Fld_GetDefault(hFieldDefn) != nullptr) + sValue = std::string(OGR_Fld_GetDefault(hFieldDefn)); + list_fld_defn.push_back(sValue, "default"); + + bValue = OGR_Fld_IsIgnored(hFieldDefn); + list_fld_defn.push_back(bValue, "is_ignored"); + + bValue = false; + list_fld_defn.push_back(bValue, "is_geom"); + + list_out.push_back(list_fld_defn, OGR_Fld_GetNameRef(hFieldDefn)); + } + + // geometry fields + for (int i = 0; i < OGR_FD_GetGeomFieldCount(hFDefn); ++i) { + Rcpp::List list_geom_fld_defn = Rcpp::List::create(); + OGRGeomFieldDefnH hGeomFldDefn = + OGR_FD_GetGeomFieldDefn(hFDefn, i); + if (hGeomFldDefn == nullptr) + Rcpp::stop("could not obtain geometry field definition"); + + OGRwkbGeometryType eType = OGR_GFld_GetType(hGeomFldDefn); + list_geom_fld_defn.push_back(getWkbGeomString_(eType), "type"); + + OGRSpatialReferenceH hSRS = nullptr; + hSRS = OGR_GFld_GetSpatialRef(hGeomFldDefn); + if (hSRS == nullptr) { + Rcpp::warning("could not obtain geometry field SRS"); + list_geom_fld_defn.push_back(NA_STRING, "srs"); + } + else { + char *pszSRS_WKT = nullptr; + if (OSRExportToWkt(hSRS, &pszSRS_WKT) != OGRERR_NONE) { + Rcpp::warning("error exporting geometry SRS to WKT"); + list_geom_fld_defn.push_back(NA_STRING, "srs"); + } + else { + list_geom_fld_defn.push_back(std::string(pszSRS_WKT), "srs"); + } + CPLFree(pszSRS_WKT); + } + + bValue = OGR_GFld_IsNullable(hGeomFldDefn); + list_geom_fld_defn.push_back(bValue, "is_nullable"); + + bValue = OGR_GFld_IsIgnored(hGeomFldDefn); + list_geom_fld_defn.push_back(bValue, "is_ignored"); + + bValue = true; + list_geom_fld_defn.push_back(bValue, "is_geom"); + + std::string geomFldName(OGR_GFld_GetNameRef(hGeomFldDefn)); + if (geomFldName == "") + geomFldName = defaultGeomFldName; + list_out.push_back(list_geom_fld_defn, geomFldName); + } + + return list_out; +} + +void GDALVector::setAttributeFilter(std::string query) { + checkAccess_(GA_ReadOnly); + + const char* query_in = nullptr; + if (query != "") + query_in = query.c_str(); + + if (OGR_L_SetAttributeFilter(m_hLayer, query_in) != OGRERR_NONE) + Rcpp::stop("error setting filter, possibly in the query expression"); + else + m_attr_filter = query; +} + +void GDALVector::setSpatialFilterRect(Rcpp::NumericVector bbox) { + checkAccess_(GA_ReadOnly); + + if (Rcpp::any(Rcpp::is_na(bbox))) + Rcpp::stop("'bbox' has one or more 'NA' values"); + + OGR_L_SetSpatialFilterRect(m_hLayer, bbox[0], bbox[1], bbox[2], bbox[3]); +} + +void GDALVector::clearSpatialFilter() { + checkAccess_(GA_ReadOnly); + + OGR_L_SetSpatialFilter(m_hLayer, nullptr); +} + +double GDALVector::getFeatureCount() { + // OGR_L_GetFeatureCount() returns GIntBig, return as R numeric for now + // GDAL doc: Note that some implementations of this method may alter the + // read cursor of the layer. + // see: testCapability("FastFeatureCount") + checkAccess_(GA_ReadOnly); + + return static_cast(OGR_L_GetFeatureCount(m_hLayer, true)); +} + +SEXP GDALVector::getNextFeature() { + checkAccess_(GA_ReadOnly); + + Rcpp::DataFrame df = fetch(1); + if (df.nrows() == 0) { + return R_NilValue; + } + else { + // return as list + df.attr("class") = R_NilValue; + df.attr("row.names") = R_NilValue; + return df; + } +} + +SEXP GDALVector::getFeature(Rcpp::NumericVector fid) { + // fid must be an R numeric vector of length 1, i.e., a scalar but using + // NumericVector since it can carry the class attribute for integer64. + // Instead of wrapping OGR_L_GetFeature(), we use fetch() because it + // already builds the return data structure. + + checkAccess_(GA_ReadOnly); + + if (fid.size() != 1) + Rcpp::stop("'fid' must be a length-1 numeric vector (integer64)"); + + int64_t fid_in = -1; + if (Rcpp::isInteger64(fid)) + fid_in = Rcpp::fromInteger64(fid[0]); + else + fid_in = static_cast(fid[0]); + + // save the current attribute and spatial filters + std::string orig_filter = m_attr_filter; + OGRGeometryH hOrigFilterGeom = nullptr; + OGRGeometryH hFilterGeom = nullptr; + hFilterGeom = OGR_L_GetSpatialFilter(m_hLayer); + if (hFilterGeom != nullptr) { + hOrigFilterGeom = OGR_G_Clone(hFilterGeom); + hFilterGeom = nullptr; + } + + // filter on FID + clearSpatialFilter(); + setAttributeFilter("FID = " + std::to_string(fid_in)); + + Rcpp::DataFrame df = fetch(1); + + // restore original filters + setAttributeFilter(orig_filter); + OGR_L_SetSpatialFilter(m_hLayer, hOrigFilterGeom); + if (hOrigFilterGeom != nullptr) { + OGR_G_DestroyGeometry(hOrigFilterGeom); + hOrigFilterGeom = nullptr; + } + + if (df.nrows() == 0) { + return R_NilValue; + } + else { + // return as list + df.attr("class") = R_NilValue; + df.attr("row.names") = R_NilValue; + return df; + } +} + +void GDALVector::resetReading() { + checkAccess_(GA_ReadOnly); + + OGR_L_ResetReading(m_hLayer); +} + +Rcpp::DataFrame GDALVector::fetch(double n) { + // Analog of DBI::dbFetch(), generally following its specification: + // https://dbi.r-dbi.org/reference/dbFetch.html#specification + + checkAccess_(GA_ReadOnly); + + OGRFeatureDefnH hFDefn = nullptr; + hFDefn = OGR_L_GetLayerDefn(m_hLayer); + if (hFDefn == nullptr) + Rcpp::stop("failed to get layer definition"); + + bool fetch_all = true; + size_t fetch_num = 0; + if (n == -1 || (std::isinf(n) && n > 0)) { + resetReading(); + fetch_num = OGR_L_GetFeatureCount(m_hLayer, true); + } + else if (Rcpp::NumericVector::is_na(n)) { + fetch_num = OGR_L_GetFeatureCount(m_hLayer, true); + } + else if (n >= 0) { + if (n > 9007199254740992) + Rcpp::stop("'n' is out of range"); + fetch_all = false; + fetch_num = static_cast(std::trunc(n)); + } + else { + Rcpp::stop("'n' is invalid"); + } + + Rcpp::DataFrame df = initDF_(fetch_num); + if (fetch_num == 0) + return df; + + int nFields = OGR_FD_GetFieldCount(hFDefn); + int nGeomFields = OGR_FD_GetGeomFieldCount(hFDefn); + bool include_geom = true; + if (EQUAL(returnGeomAs.c_str(), "NONE")) { + include_geom = false; + } + else if (!(EQUAL(returnGeomAs.c_str(), "WKB") || + EQUAL(returnGeomAs.c_str(), "WKB_ISO") || + EQUAL(returnGeomAs.c_str(), "WKT") || + EQUAL(returnGeomAs.c_str(), "WKT_ISO") || + EQUAL(returnGeomAs.c_str(), "TYPE_NAME"))) { + Rcpp::stop("unsupported value of field 'returnGeomAs'"); + } + + OGRwkbByteOrder eOrder; + if (EQUAL(wkbByteOrder.c_str(), "LSB")) + eOrder = wkbNDR; + else if (EQUAL(wkbByteOrder.c_str(), "MSB")) + eOrder = wkbXDR; + else + Rcpp::stop("invalid value of field 'wkbByteOrder'"); + + OGRFeatureH hFeat = nullptr; + size_t row_num = 0; + + while ((hFeat = OGR_L_GetNextFeature(m_hLayer)) != nullptr) { + const int64_t fid = static_cast(OGR_F_GetFID(hFeat)); + Rcpp::NumericVector fid_col = df[0]; + fid_col[row_num] = Rcpp::toInteger64(fid)[0]; + + for (int i = 0; i < nFields; ++i) { + OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); + if (hFieldDefn == nullptr) + Rcpp::stop("could not obtain field definition"); + + bool has_value = true; + if (!OGR_F_IsFieldSet(hFeat, i) || OGR_F_IsFieldNull(hFeat, i)) + has_value = false; + + OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); + + if (fld_type == OFTInteger && has_value) { + OGRFieldSubType fld_subtype = OGR_Fld_GetSubType(hFieldDefn); + if (fld_subtype == OFSTBoolean) { + Rcpp::LogicalVector col = df[i + 1]; + col[row_num] = OGR_F_GetFieldAsInteger(hFeat, i); + } + else { + Rcpp::IntegerVector col = df[i + 1]; + col[row_num] = OGR_F_GetFieldAsInteger(hFeat, i); + } + } + else if (fld_type == OFTInteger64 && has_value) { + const int64_t value = static_cast( + OGR_F_GetFieldAsInteger64(hFeat, i)); + + OGRFieldSubType fld_subtype = OGR_Fld_GetSubType(hFieldDefn); + if (fld_subtype == OFSTBoolean) { + Rcpp::LogicalVector col = df[i + 1]; + col[row_num] = Rcpp::toInteger64(value)[0]; + } + else { + Rcpp::NumericVector col = df[i + 1]; + col[row_num] = Rcpp::toInteger64(value)[0]; + } + } + else if (fld_type == OFTReal && has_value) { + Rcpp::NumericVector col = df[i + 1]; + col[row_num] = OGR_F_GetFieldAsDouble(hFeat, i); + } + else if ((fld_type == OFTDate || fld_type == OFTDateTime) + && has_value) { + + Rcpp::NumericVector col = df[i + 1]; + int yr, mo, day, hr, min, tzflag = 0; + float sec = 0; + if (OGR_F_GetFieldAsDateTimeEx(hFeat, i, &yr, &mo, &day, + &hr, &min, &sec, &tzflag)) { + + struct tm brokendowntime; + brokendowntime.tm_year = yr - 1900; + brokendowntime.tm_mon = mo - 1; + brokendowntime.tm_mday = day; + brokendowntime.tm_hour = hr; + brokendowntime.tm_min = min; + brokendowntime.tm_sec = static_cast(sec); + int64_t nUnixTime = CPLYMDHMSToUnixTime(&brokendowntime); + if (fld_type == OFTDate) { + col[row_num] = static_cast(nUnixTime / 86400); + } + else { + // OFTDateTime + if (tzflag > 1 && tzflag != 100) { + // convert to GMT + const int tzoffset = std::abs(tzflag - 100) * 15; + const int tzhour = tzoffset / 60; + const int tzmin = tzoffset - tzhour * 60; + const int offset_sec = tzhour * 3600 + tzmin * 60; + if (tzflag >= 100) + nUnixTime -= offset_sec; + else + nUnixTime += offset_sec; + } + col[row_num] = static_cast( + nUnixTime + std::fmod(sec, 1)); + } + } + } + else if (fld_type == OFTBinary) { + Rcpp::List col = df[i + 1]; + if (has_value) { + int nDataSize = 0; + GByte *pabyData = OGR_F_GetFieldAsBinary(hFeat, i, + &nDataSize); + if (nDataSize > 0) { + Rcpp::RawVector blob(nDataSize); + std::memcpy(&blob[0], pabyData, nDataSize); + col[row_num] = blob; + } + else { + col[row_num] = Rcpp::RawVector::create(); + } + } + else { + col[row_num] = R_NilValue; + } + } + else if (fld_type == OFTIntegerList) { + Rcpp::List col = df[i + 1]; + if (has_value) { + int nCount = 0; + const int *panValue = OGR_F_GetFieldAsIntegerList(hFeat, i, + &nCount); + if (nCount > 0) { + std::vector v(panValue, panValue + nCount); + col[row_num] = Rcpp::wrap(v); + } + else { + col[row_num] = Rcpp::IntegerVector::create(); + } + } + else { + col[row_num] = NA_INTEGER; + } + } + else if (fld_type == OFTInteger64List) { + Rcpp::List col = df[i + 1]; + if (has_value) { + int nCount = 0; + const int64_t *panValue = reinterpret_cast( + OGR_F_GetFieldAsInteger64List(hFeat, i, &nCount)); + + if (nCount > 0) { + std::vector v(panValue, panValue + nCount); + col[row_num] = Rcpp::wrap(v); + } + else { + Rcpp::NumericVector v = Rcpp::NumericVector::create(); + v.attr("class") = "integer64"; + col[row_num] = v; + } + } + else { + std::vector v(1); + v[0] = NA_INTEGER64; + col[row_num] = Rcpp::wrap(v); + } + } + else if (fld_type == OFTRealList) { + Rcpp::List col = df[i + 1]; + if (has_value) { + int nCount = 0; + const double *padfValue = + OGR_F_GetFieldAsDoubleList(hFeat, i, &nCount); + + if (nCount > 0) { + std::vector v(padfValue, padfValue + nCount); + col[row_num] = Rcpp::wrap(v); + } + else { + col[row_num] = Rcpp::NumericVector::create(); + } + } + else { + col[row_num] = NA_REAL; + } + } + else if (fld_type == OFTStringList) { + Rcpp::List col = df[i + 1]; + if (has_value) { + char **papszValue = OGR_F_GetFieldAsStringList(hFeat, i); + int nCount = 0; + nCount = CSLCount(papszValue); + if (nCount > 0) { + std::vector v(papszValue, + papszValue + nCount); + + col[row_num] = Rcpp::wrap(v); + } + else { + col[row_num] = Rcpp::CharacterVector::create(); + } + } + else { + col[row_num] = NA_STRING; + } + } + else { + if (has_value) { + Rcpp::CharacterVector col = df[i + 1]; + col[row_num] = OGR_F_GetFieldAsString(hFeat, i); + } + } + } + + if (include_geom) { + for (int i = 0; i < nGeomFields; ++i) { + OGRGeomFieldDefnH hGeomFldDefn = + OGR_F_GetGeomFieldDefnRef(hFeat, i); + if (hGeomFldDefn == nullptr) + Rcpp::stop("could not obtain geometry field def"); + + if (STARTS_WITH_CI(returnGeomAs.c_str(), "WKB")) { + OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeat, i); + if (hGeom != nullptr) { +#if GDAL_VERSION_NUM >= 3030000 + const int nWKBSize = OGR_G_WkbSizeEx(hGeom); +#else + const int nWKBSize = OGR_G_WkbSize(hGeom); +#endif + if (nWKBSize) { + Rcpp::RawVector wkb(nWKBSize); + if (EQUAL(returnGeomAs.c_str(), "WKB")) + OGR_G_ExportToWkb(hGeom, eOrder, &wkb[0]); + else if (EQUAL(returnGeomAs.c_str(), "WKB_ISO")) + OGR_G_ExportToIsoWkb(hGeom, eOrder, &wkb[0]); + + Rcpp::List col = df[nFields + 1 + i]; + col[row_num] = wkb; + } + else { + Rcpp::List col = df[nFields + 1 + i]; + col[row_num] = Rcpp::RawVector::create(); + } + } + } + else if (STARTS_WITH_CI(returnGeomAs.c_str(), "WKT")) { + Rcpp::CharacterVector col = df[nFields + 1 + i]; + OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeat, i); + if (hGeom != nullptr) { + char* pszWKT; + if (EQUAL(returnGeomAs.c_str(), "WKT")) + OGR_G_ExportToWkt(hGeom, &pszWKT); + else if (EQUAL(returnGeomAs.c_str(), "WKT_ISO")) + OGR_G_ExportToIsoWkt(hGeom, &pszWKT); + + col[row_num] = pszWKT; + CPLFree(pszWKT); + } + else { + col[row_num] = NA_STRING; + } + } + else if (EQUAL(returnGeomAs.c_str(), "TYPE_NAME")) { + OGRGeometryH hGeom = OGR_F_GetGeomFieldRef(hFeat, i); + Rcpp::CharacterVector col = df[nFields + 1 + i]; + if (hGeom != nullptr) + col[row_num] = OGR_G_GetGeometryName(hGeom); + else + col[row_num] = NA_STRING; + } + } + } + + OGR_F_Destroy(hFeat); + hFeat = nullptr; + + row_num += 1; + if (row_num == fetch_num) + break; + } + + if (fetch_all) { + hFeat = OGR_L_GetNextFeature(m_hLayer); + if (hFeat != nullptr) { + Rcpp::Rcout << "getFeatureCount() reported: " << row_num + << std::endl; + std::string msg = + "more features potentially available than reported by getFeatureCount()"; + Rcpp::warning(msg); + OGR_F_Destroy(hFeat); + hFeat = nullptr; + } + } + + if (row_num == fetch_num) { + return df; + } + else { + // Truncate the data frame by copying to a new one. Hard to avoid + // a copy here since Rcpp vectors cannot be resized. This is only + // needed for the last page when paging through features with repeated + // calls to fetch(n), so the data generally should not be large enough + // for this to be a problem. + Rcpp::DataFrame df_trunc = initDF_(row_num); + + Rcpp::NumericVector fid_col = df[0]; + Rcpp::NumericVector fid_col_trunc = df_trunc[0]; + std::copy_n(fid_col.cbegin(), row_num, fid_col_trunc.begin()); + + for (int i = 0; i < OGR_FD_GetFieldCount(hFDefn); ++i) { + OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); + OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); + + if (fld_type == OFTInteger) { + OGRFieldSubType fld_subtype = OGR_Fld_GetSubType(hFieldDefn); + if (fld_subtype == OFSTBoolean) { + Rcpp::LogicalVector col = df[i + 1]; + Rcpp::LogicalVector col_trunc = df_trunc[i + 1]; + std::copy_n(col.cbegin(), row_num, col_trunc.begin()); + } + else { + Rcpp::IntegerVector col = df[i + 1]; + Rcpp::IntegerVector col_trunc = df_trunc[i + 1]; + std::copy_n(col.cbegin(), row_num, col_trunc.begin()); + } + } + else if (fld_type == OFTInteger64 || fld_type == OFTReal || + fld_type == OFTDate || fld_type == OFTDateTime) { + + Rcpp::NumericVector col = df[i + 1]; + Rcpp::NumericVector col_trunc = df_trunc[i + 1]; + std::copy_n(col.cbegin(), row_num, col_trunc.begin()); + } + else if (fld_type == OFTBinary || fld_type == OFTIntegerList || + fld_type == OFTInteger64List || fld_type == OFTRealList || + fld_type == OFTStringList) { + + Rcpp::List col = df[i + 1]; + Rcpp::List col_trunc = df_trunc[i + 1]; + for (size_t n = 0; n < row_num; ++n) + col_trunc[n] = col[n]; + } + else { + Rcpp::CharacterVector col = df[i + 1]; + Rcpp::CharacterVector col_trunc = df_trunc[i + 1]; + std::copy_n(col.cbegin(), row_num, col_trunc.begin()); + } + } + + if (include_geom) { + for (int i = 0; i < nGeomFields; ++i) { + if (STARTS_WITH_CI(returnGeomAs.c_str(), "WKB")) { + Rcpp::List col = df[nFields + i + 1]; + Rcpp::List col_trunc = df_trunc[nFields + i + 1]; + for (size_t n = 0; n < row_num; ++n) + col_trunc[n] = col[n]; + } + else { + Rcpp::CharacterVector col = df[nFields + i + 1]; + Rcpp::CharacterVector col_trunc = df_trunc[nFields + i + 1]; + std::copy_n(col.cbegin(), row_num, col_trunc.begin()); + } + } + } + + return df_trunc; + } +} + +void GDALVector::layerIntersection( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {nullptr}; + if (options.isNotNull()) { + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = nullptr; + } + + OGRErr err = OGR_L_Intersection( + m_hLayer, + method_layer.getOGRLayerH_(), + result_layer.getOGRLayerH_(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, + nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("error during Intersection, or execution was interrupted"); +} + +void GDALVector::layerUnion( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {nullptr}; + if (options.isNotNull()) { + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = nullptr; + } + + OGRErr err = OGR_L_Union( + m_hLayer, + method_layer.getOGRLayerH_(), + result_layer.getOGRLayerH_(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, + nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("error during Union, or execution was interrupted"); +} + +void GDALVector::layerSymDifference( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {nullptr}; + if (options.isNotNull()) { + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = nullptr; + } + + OGRErr err = OGR_L_SymDifference( + m_hLayer, + method_layer.getOGRLayerH_(), + result_layer.getOGRLayerH_(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, + nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("error during SymDifference, or execution was interrupted"); +} + +void GDALVector::layerIdentity( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {nullptr}; + if (options.isNotNull()) { + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = nullptr; + } + + OGRErr err = OGR_L_Identity( + m_hLayer, + method_layer.getOGRLayerH_(), + result_layer.getOGRLayerH_(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, + nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("error during Identity, or execution was interrupted"); +} + +void GDALVector::layerUpdate( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {nullptr}; + if (options.isNotNull()) { + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = nullptr; + } + + OGRErr err = OGR_L_Update( + m_hLayer, + method_layer.getOGRLayerH_(), + result_layer.getOGRLayerH_(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, + nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("error during Update, or execution was interrupted"); +} + +void GDALVector::layerClip( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {nullptr}; + if (options.isNotNull()) { + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = nullptr; + } + + OGRErr err = OGR_L_Clip( + m_hLayer, + method_layer.getOGRLayerH_(), + result_layer.getOGRLayerH_(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, + nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("error during Clip, or execution was interrupted"); +} + +void GDALVector::layerErase( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options) { + + std::vector opt_list = {nullptr}; + if (options.isNotNull()) { + Rcpp::CharacterVector options_in(options); + opt_list.resize(options_in.size() + 1); + for (R_xlen_t i = 0; i < options_in.size(); ++i) { + opt_list[i] = (char *) (options_in[i]); + } + opt_list[options_in.size()] = nullptr; + } + + OGRErr err = OGR_L_Erase( + m_hLayer, + method_layer.getOGRLayerH_(), + result_layer.getOGRLayerH_(), + opt_list.data(), + quiet ? nullptr : GDALTermProgressR, + nullptr); + + if (err != OGRERR_NONE) + Rcpp::stop("error during Erase, or execution was interrupted"); +} + +void GDALVector::close() { + if (m_hDataset != nullptr) { + if (m_is_sql) + GDALDatasetReleaseResultSet(m_hDataset, m_hLayer); + GDALReleaseDataset(m_hDataset); + m_hDataset = nullptr; + m_hLayer = nullptr; + } +} + +// **************************************************************************** +// class methods for internal use not exposed in R +// **************************************************************************** + +void GDALVector::checkAccess_(GDALAccess access_needed) const { + if (!isOpen()) + Rcpp::stop("dataset is not open"); + + if (access_needed == GA_Update && m_eAccess == GA_ReadOnly) + Rcpp::stop("dataset is read-only"); +} + +OGRLayerH GDALVector::getOGRLayerH_() const { + checkAccess_(GA_ReadOnly); + + return m_hLayer; +} + +SEXP GDALVector::initDF_(R_xlen_t nrow) const { + // initialize a data frame based on the layer definition + OGRFeatureDefnH hFDefn = nullptr; + hFDefn = OGR_L_GetLayerDefn(m_hLayer); + if (hFDefn == nullptr) + Rcpp::stop("failed to get layer definition"); + + int nFields = OGR_FD_GetFieldCount(hFDefn); + int nGeomFields = 0; + if (!EQUAL(returnGeomAs.c_str(), "NONE")) + nGeomFields = OGR_FD_GetGeomFieldCount(hFDefn); + + // construct as list and convert to data frame at return + Rcpp::List df(1 + nFields + nGeomFields); + Rcpp::CharacterVector col_names(1 + nFields + nGeomFields); + + std::vector fid(nrow, NA_INTEGER64); + df[0] = Rcpp::wrap(fid); + col_names[0] = "FID"; + + for (int i = 0; i < nFields; ++i) { + OGRFieldDefnH hFieldDefn = OGR_FD_GetFieldDefn(hFDefn, i); + if (hFieldDefn == nullptr) + Rcpp::stop("could not obtain field definition"); + + OGRFieldType fld_type = OGR_Fld_GetType(hFieldDefn); + + if (fld_type == OFTInteger) { + OGRFieldSubType fld_subtype = OGR_Fld_GetSubType(hFieldDefn); + if (fld_subtype == OFSTBoolean) { + Rcpp::LogicalVector v(nrow, NA_LOGICAL); + df[i + 1] = v; + } + else { + Rcpp::IntegerVector v(nrow, NA_INTEGER); + df[i + 1] = v; + } + col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); + } + else if (fld_type == OFTInteger64) { + OGRFieldSubType fld_subtype = OGR_Fld_GetSubType(hFieldDefn); + if (fld_subtype == OFSTBoolean) { + Rcpp::LogicalVector v(nrow, NA_LOGICAL); + df[i + 1] = v; + } + else { + std::vector v(nrow, NA_INTEGER64); + df[i + 1] = Rcpp::wrap(v); + } + col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); + } + else if (fld_type == OFTReal) { + Rcpp::NumericVector v(nrow, NA_REAL); + df[i + 1] = v; + col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); + } + else if (fld_type == OFTDate) { + Rcpp::NumericVector v(nrow, NA_REAL); + v.attr("class") = "Date"; + df[i + 1] = v; + col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); + } + else if (fld_type == OFTDateTime) { + Rcpp::NumericVector v(nrow, NA_REAL); + Rcpp::CharacterVector classes = {"POSIXt", "POSIXct"}; + v.attr("class") = classes; + v.attr("tzone") = "UTC"; + df[i + 1] = v; + col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); + } + else if (fld_type == OFTBinary || fld_type == OFTIntegerList || + fld_type == OFTInteger64List || fld_type == OFTRealList || + fld_type == OFTStringList) { + + Rcpp::List v(nrow); + df[i + 1] = v; + col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); + } + else { + // use string + Rcpp::CharacterVector v(nrow, NA_STRING); + df[i + 1] = v; + col_names[i + 1] = OGR_Fld_GetNameRef(hFieldDefn); + } + } + + for (int i = 0; i < nGeomFields; ++i) { + OGRGeomFieldDefnH hGeomFldDefn = OGR_FD_GetGeomFieldDefn(hFDefn, i); + if (hGeomFldDefn == nullptr) + Rcpp::stop("could not obtain geometry field def"); + + if (STARTS_WITH_CI(returnGeomAs.c_str(), "WKB")) { + Rcpp::List v(nrow); + df[i + 1 + nFields] = v; + } + else { + Rcpp::CharacterVector v(nrow, NA_STRING); + df[i + 1 + nFields] = v; + } + + std::string geomFldName(OGR_GFld_GetNameRef(hGeomFldDefn)); + if (geomFldName == "") + geomFldName = defaultGeomFldName; + + col_names[i + 1 + nFields] = geomFldName; + } + + df.names() = col_names; + df.attr("class") = "data.frame"; + df.attr("row.names") = Rcpp::seq_len(nrow); + return df; +} + +// **************************************************************************** + +RCPP_MODULE(mod_GDALVector) { + Rcpp::class_("GDALVector") + + .constructor + ("Default constructor, only for allocations in std::vector") + .constructor + ("Usage: new(GDALVector, dsn)") + .constructor + ("Usage: new(GDALVector, dsn, layer)") + .constructor + ("Usage: new(GDALVector, dsn, layer, read_only=[TRUE|FALSE])") + .constructor + ("Usage: new(GDALVector, dsn, layer, read_only, open_options)") + .constructor, std::string> + ("Usage: new(GDALVector, dsn, layer, read_only, open_options, spatial_filter)") + .constructor, std::string, + std::string> + ("Usage: new(GDALVector, dsn, layer, read_only, open_options, spatial_filter, dialect)") + + // exposed read/write fields + .field("defaultGeomFldName", &GDALVector::defaultGeomFldName) + .field("returnGeomAs", &GDALVector::returnGeomAs) + .field("wkbByteOrder", &GDALVector::wkbByteOrder) + + // exposed member functions + .const_method("getDsn", &GDALVector::getDsn, + "Return the DSN") + .const_method("isOpen", &GDALVector::isOpen, + "Is the dataset open?") + .method("open", &GDALVector::open, + "(Re-)open the dataset on the existing DSN and layer") + .const_method("getFileList", &GDALVector::getFileList, + "Fetch files forming dataset") + .const_method("getDriverShortName", &GDALVector::getDriverShortName, + "Return the short name of the format driver") + .const_method("getDriverLongName", &GDALVector::getDriverLongName, + "Return the long name of the format driver") + .const_method("getName", &GDALVector::getName, + "Return the layer name") + .const_method("testCapability", &GDALVector::testCapability, + "Test if this layer supports the named capability") + .const_method("getFIDColumn", &GDALVector::getFIDColumn, + "Return name of the underlying db column being used as FID column") + .const_method("getGeomType", &GDALVector::getGeomType, + "Return the layer geometry type") + .const_method("getGeometryColumn", &GDALVector::getGeometryColumn, + "Return name of the underlying db column being used as geom column") + .const_method("getSpatialRef", &GDALVector::getSpatialRef, + "Fetch the spatial reference system for this layer as WKT string") + .method("bbox", &GDALVector::bbox, + "Return the bounding box (xmin, ymin, xmax, ymax)") + .const_method("getLayerDefn", &GDALVector::getLayerDefn, + "Fetch the schema information for this layer") + .method("setAttributeFilter", &GDALVector::setAttributeFilter, + "Set a new attribute query") + .method("setSpatialFilterRect", &GDALVector::setSpatialFilterRect, + "Set a new rectangular spatial filter") + .method("clearSpatialFilter", &GDALVector::clearSpatialFilter, + "Clear the current spatial filter") + .method("getFeatureCount", &GDALVector::getFeatureCount, + "Fetch the feature count in this layer") + .method("getNextFeature", &GDALVector::getNextFeature, + "Fetch the next available feature from this layer") + .method("getFeature", &GDALVector::getFeature, + "Fetch a feature by its identifier") + .method("resetReading", &GDALVector::resetReading, + "Reset feature reading to start on the first feature") + .method("fetch", &GDALVector::fetch, + "Fetch a set features as a data frame") + .method("layerIntersection", &GDALVector::layerIntersection, + "Intersection of this layer with a method layer") + .method("layerUnion", &GDALVector::layerUnion, + "Union of this layer with a method layer") + .method("layerSymDifference", &GDALVector::layerSymDifference, + "Symmetrical difference of this layer and a method layer") + .method("layerIdentity", &GDALVector::layerIdentity, + "Identify features of this layer with the ones from the method layer") + .method("layerUpdate", &GDALVector::layerUpdate, + "Update this layer with features from the method layer") + .method("layerClip", &GDALVector::layerClip, + "Clip off areas that are not covered by the method layer") + .method("layerErase", &GDALVector::layerErase, + "Remove areas that are covered by the method layer") + .method("close", &GDALVector::close, + "Release the dataset for proper cleanup") + + ; +} diff --git a/src/gdalvector.h b/src/gdalvector.h new file mode 100644 index 000000000..a01d02f3b --- /dev/null +++ b/src/gdalvector.h @@ -0,0 +1,126 @@ +/* R interface to a subset of the GDAL C API for vector. A class for OGRLayer, + a layer of features in a GDALDataset. https://gdal.org/api/vector_c_api.html + + Chris Toney + Copyright (c) 2023-2024 gdalraster authors +*/ + +#ifndef SRC_GDALVECTOR_H_ +#define SRC_GDALVECTOR_H_ + +#include +#include + +#include "rcpp_util.h" + +// Predeclare some GDAL types until the public header is included +#ifndef GDAL_H_INCLUDED +typedef void *GDALDatasetH; +typedef void *OGRLayerH; +typedef enum {GA_ReadOnly = 0, GA_Update = 1} GDALAccess; +#endif + +class GDALVector { + private: + std::string m_dsn; + std::string m_layer_name; // layer name or sql statement + bool m_is_sql; + Rcpp::CharacterVector m_open_options; + std::string m_spatial_filter; + std::string m_dialect; + GDALDatasetH m_hDataset; + GDALAccess m_eAccess; + OGRLayerH m_hLayer; + std::string m_attr_filter = ""; + + public: + GDALVector(); + explicit GDALVector(Rcpp::CharacterVector dsn); + GDALVector(Rcpp::CharacterVector dsn, std::string layer); + GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only); + GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, + Rcpp::CharacterVector open_options); + GDALVector(Rcpp::CharacterVector dsn, std::string layer, bool read_only, + Rcpp::Nullable open_options, + std::string spatial_filter, std::string dialect); + + std::string defaultGeomFldName = "geometry"; + std::string returnGeomAs = "NONE"; + std::string wkbByteOrder = "LSB"; + + void open(bool read_only); + bool isOpen() const; + std::string getDsn() const; + Rcpp::CharacterVector getFileList() const; + std::string getDriverShortName() const; + std::string getDriverLongName() const; + + std::string getName() const; + Rcpp::List testCapability() const; + std::string getFIDColumn() const; + std::string getGeomType() const; + std::string getGeometryColumn() const; + std::string getSpatialRef() const; + Rcpp::NumericVector bbox(); + Rcpp::List getLayerDefn() const; + + void setAttributeFilter(std::string query); + void setSpatialFilterRect(Rcpp::NumericVector bbox); + void clearSpatialFilter(); + + double getFeatureCount(); + SEXP getNextFeature(); + // fid must be a length-1 numeric vector, since numeric vector can carry + // the class attribute for integer64: + SEXP getFeature(Rcpp::NumericVector fid); + void resetReading(); + + Rcpp::DataFrame fetch(double n); + + void layerIntersection( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerUnion( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerSymDifference( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerIdentity( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerUpdate( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerClip( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + void layerErase( + GDALVector method_layer, + GDALVector result_layer, + bool quiet, + Rcpp::Nullable options); + + void close(); + + // methods for internal use not exported to R + void checkAccess_(GDALAccess access_needed) const; + OGRLayerH getOGRLayerH_() const; + SEXP initDF_(R_xlen_t nrow) const; +}; + +RCPP_EXPOSED_CLASS(GDALVector) + +#endif // SRC_GDALVECTOR_H_ diff --git a/src/ogr_util.cpp b/src/ogr_util.cpp index dd9c5a612..6a3312320 100644 --- a/src/ogr_util.cpp +++ b/src/ogr_util.cpp @@ -14,8 +14,7 @@ #include "gdalraster.h" #include "ogr_util.h" -// Internal lookup of OGRwkbGeometryType by string descriptor -// Returns wkbUnknown if no match + OGRwkbGeometryType getWkbGeomType_(std::string geom_type) { std::string geom_type_in = str_toupper_(geom_type); if (auto it = MAP_OGR_GEOM_TYPE.find(geom_type_in); @@ -28,8 +27,6 @@ OGRwkbGeometryType getWkbGeomType_(std::string geom_type) { } } -// Internal lookup of geometry type string by OGRwkbGeometryType -// Returns "UNKNOWN" if no match std::string getWkbGeomString_(OGRwkbGeometryType eType) { for (auto it = MAP_OGR_GEOM_TYPE.begin(); it != MAP_OGR_GEOM_TYPE.end(); ++it) { @@ -40,8 +37,6 @@ std::string getWkbGeomString_(OGRwkbGeometryType eType) { return "UNKNOWN"; } -// Internal lookup of OGRFieldType by string descriptor -// Error if no match OGRFieldType getOFT_(std::string fld_type) { if (auto it = MAP_OGR_FLD_TYPE.find(fld_type); it != MAP_OGR_FLD_TYPE.end()) { @@ -53,8 +48,6 @@ OGRFieldType getOFT_(std::string fld_type) { } } -// Internal lookup of OGR field type string by OGRFieldType -// Returns empty string if no match, with warning emitted std::string getOFTString_(OGRFieldType eType) { for (auto it = MAP_OGR_FLD_TYPE.begin(); it != MAP_OGR_FLD_TYPE.end(); ++it) { @@ -66,8 +59,6 @@ std::string getOFTString_(OGRFieldType eType) { return ""; } -// Internal lookup of OGRFieldSubType by string descriptor -// Returns OFSTNone if no match OGRFieldSubType getOFTSubtype_(std::string fld_subtype) { if (auto it = MAP_OGR_FLD_SUBTYPE.find(fld_subtype); it != MAP_OGR_FLD_SUBTYPE.end()) { @@ -79,8 +70,6 @@ OGRFieldSubType getOFTSubtype_(std::string fld_subtype) { } } -// Internal lookup of OGR field subtype string by OGRFieldSubType -// Returns "OFSTNone" if no match std::string getOFTSubtypeString_(OGRFieldSubType eType) { for (auto it = MAP_OGR_FLD_SUBTYPE.begin(); it != MAP_OGR_FLD_SUBTYPE.end(); ++it) { diff --git a/src/ogr_util.h b/src/ogr_util.h index c56b89f2f..62057ebd0 100644 --- a/src/ogr_util.h +++ b/src/ogr_util.h @@ -115,6 +115,31 @@ const std::map MAP_OGR_FLD_SUBTYPE{ }; #endif +// Internal lookup of OGRwkbGeometryType by string descriptor +// Returns wkbUnknown if no match +OGRwkbGeometryType getWkbGeomType_(std::string geom_type); + +// Internal lookup of geometry type string by OGRwkbGeometryType +// Returns "UNKNOWN" if no match +std::string getWkbGeomString_(OGRwkbGeometryType eType); + +// Internal lookup of OGRFieldType by string descriptor +// Error if no match +OGRFieldType getOFT_(std::string fld_type); + +// Internal lookup of OGR field type string by OGRFieldType +// Returns empty string if no match, with warning emitted +std::string getOFTString_(OGRFieldType eType); + +// Internal lookup of OGRFieldSubType by string descriptor +// Returns OFSTNone if no match +OGRFieldSubType getOFTSubtype_(std::string fld_subtype); + +// Internal lookup of OGR field subtype string by OGRFieldSubType +// Returns "OFSTNone" if no match +std::string getOFTSubtypeString_(OGRFieldSubType eType); + + bool ogr_ds_exists(std::string dsn, bool with_update); std::string ogr_ds_format(std::string dsn); @@ -138,6 +163,7 @@ bool ogr_layer_exists(std::string dsn, std::string layer); SEXP ogr_layer_test_cap(std::string dsn, std::string layer, bool with_update); +// internal CreateLayer OGRLayerH CreateLayer_(GDALDatasetH hDS, std::string layer, Rcpp::Nullable layer_defn, std::string geom_type, std::string srs, @@ -154,6 +180,7 @@ SEXP ogr_layer_field_names(std::string dsn, std::string layer); int ogr_field_index(std::string dsn, std::string layer, std::string fld_name); +// internal CreateField bool CreateField_(GDALDatasetH hDS, OGRLayerH hLayer, std::string fld_name, std::string fld_type, std::string fld_subtype, int fld_width, int fld_precision, bool is_nullable, bool is_ignored, @@ -166,6 +193,7 @@ bool ogr_field_create(std::string dsn, std::string layer, bool is_ignored, bool is_unique, std::string default_value); +// internal CreateGeomField bool CreateGeomField_(GDALDatasetH hDS, OGRLayerH hLayer, std::string fld_name, OGRwkbGeometryType eGeomType, std::string srs, bool is_nullable, bool is_ignored); diff --git a/src/rcpp_util.h b/src/rcpp_util.h index 3a424a7ab..f4042917d 100644 --- a/src/rcpp_util.h +++ b/src/rcpp_util.h @@ -6,6 +6,8 @@ #ifndef SRC_RCPP_UTIL_H_ #define SRC_RCPP_UTIL_H_ +#include + #include #include @@ -13,6 +15,10 @@ #include #include +// as defined in the bit64 package src/integer64.h +#define NA_INTEGER64 LLONG_MIN +#define ISNA_INTEGER64(X)((X) == NA_INTEGER64) + Rcpp::NumericMatrix df_to_matrix_(const Rcpp::DataFrame& df); Rcpp::IntegerMatrix df_to_int_matrix_(const Rcpp::DataFrame& df); Rcpp::CharacterVector path_expand_(Rcpp::CharacterVector path); diff --git a/tests/testthat/test-GDALVector-class.R b/tests/testthat/test-GDALVector-class.R new file mode 100644 index 000000000..e9345ae87 --- /dev/null +++ b/tests/testthat/test-GDALVector-class.R @@ -0,0 +1,41 @@ +# Tests for src/gdalvector.cpp +test_that("class constructors work", { + f <- system.file("extdata/ynp_fires_1984_2022.gpkg", package="gdalraster") + dsn <- file.path(tempdir(), basename(f)) + file.copy(f, dsn, overwrite = TRUE) + + lyr <- new(GDALVector, dsn) + expect_equal(lyr$getName(), "mtbs_perims") + expect_type(lyr$getFeature(1), "list") + lyr$close() + + lyr <- new(GDALVector, dsn, "mtbs_perims") + expect_equal(lyr$bbox(), c(469685.73, -12917.76, 573531.72, 96577.34)) + lyr$close() + + lyr <- new(GDALVector, dsn, "mtbs_perims", read_only = FALSE) + expect_true(lyr$testCapability()$RandomWrite) + lyr$close() + + lyr <- new(GDALVector, dsn, "mtbs_perims", read_only = TRUE, + "LIST_ALL_TABLES=NO") + expect_false(lyr$testCapability()$RandomWrite) + lyr$close() + + bb <- c(469685.97, 11442.45, 544069.63, 85508.15) + + # spatial filter with SQL layer + sql <- "SELECT FID, * FROM mtbs_perims" + lyr <- new(GDALVector, dsn, sql, read_only = TRUE, open_options = NULL, + spatial_filter = bbox_to_wkt(bb)) + expect_equal(lyr$getFeatureCount(), 40) + lyr$close() + + # add dialect + lyr <- new(GDALVector, dsn, sql, read_only = TRUE, open_options = NULL, + spatial_filter = bbox_to_wkt(bb), dialect = "") + expect_equal(lyr$getFeatureCount(), 40) + lyr$close() + + unlink(dsn) +})