Skip to content

Commit

Permalink
Merge pull request #1 from JosiahParry/geoarrow
Browse files Browse the repository at this point in the history
Arrow conversions
  • Loading branch information
JosiahParry authored Nov 20, 2023
2 parents a8e2dec + b303959 commit b3cee7c
Show file tree
Hide file tree
Showing 9 changed files with 809 additions and 38 deletions.
562 changes: 562 additions & 0 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@ readme = "README.md"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow = "49.0.0"
geo-types = { version = "0.7.12", optional = true }
# geo-types = { version = "0.7.12" }
serde = { version = "1.0.192", features = ["derive"] }
serde_json = "1.0.108"
serde_with = "3.4.0"


[lib]
crate-type = ["staticlib", "lib"]

Expand Down
205 changes: 205 additions & 0 deletions src/arrow_compat.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
use std::collections::HashMap;

use arrow::array::Array;
use arrow::{
array::{
make_builder, ArrayBuilder, BooleanBuilder, Float32Builder, Float64Builder,
Int16Builder, Int32Builder, Int64Builder, Int8Builder, NullBuilder, StringBuilder,
UInt16Builder, UInt32Builder, UInt64Builder, UInt8Builder,
},
datatypes::{
DataType,
Field as AField,
// Schema,
Fields,
SchemaBuilder,
}
};
use serde_json::Value;

// use arrow::array::builder::StructBuilder;

use crate::features::FeatureSet;
use crate::{
features::{Feature, Field},
field_type::FieldType,
};

// convert a field to a new field
impl From<Field> for AField {
fn from(value: Field) -> Self {
let dtype = match value.field_type {
FieldType::EsriFieldTypeSmallInteger => DataType::Int16,
FieldType::EsriFieldTypeInteger => DataType::Int32,
FieldType::EsriFieldTypeSingle => DataType::Float32,
FieldType::EsriFieldTypeDouble => DataType::Float64,
FieldType::EsriFieldTypeString => DataType::Utf8,
FieldType::EsriFieldTypeDate => DataType::Date32,
FieldType::EsriFieldTypeOid => DataType::Int64,
FieldType::EsriFieldTypeBlob => DataType::LargeBinary,
FieldType::EsriFieldTypeGuid => DataType::Utf8,
FieldType::EsriFieldTypeGlobalId => DataType::Utf8,
FieldType::EsriFieldTypeXml => DataType::LargeUtf8,
FieldType::EsriFieldTypeRaster => unimplemented!(),
FieldType::EsriFieldTypeGeometry => unimplemented!(),
};

Self::new(value.name, dtype, true)
}
}

// Takes a vector or Esri Fields and creates a Fields
// of arrow field types
fn field_to_schema(fields: Vec<Field>) -> Fields {
let mut sbuilder = SchemaBuilder::with_capacity(fields.len());

for field in fields.into_iter() {
let arrow_field = AField::from(field);
sbuilder.push(arrow_field);
}
sbuilder.finish().fields
}

fn create_array_vecs<const N: usize>(
fields: &Fields,
feats: Vec<Feature<N>>,
) -> Vec<std::sync::Arc<dyn Array>> {
let n = feats.len();
let mut map: HashMap<&String, (&AField, Box<dyn ArrayBuilder>)> = HashMap::new();

fields.iter().for_each(|f| {
let b = make_builder(f.data_type(), n);
map.insert(f.name(), (&f, b));
});

// let a1 = feats[0].attributes.clone().unwrap();

feats.into_iter().for_each(|m| {
let a1 = m.attributes.unwrap();

a1.into_iter().for_each(|(k, v)| {
let (field, builder) = map.get_mut(&k).unwrap();
append_value(v, field, builder);
});
});

let res = map
.into_iter()
.map(|mut bi| {
let arr = bi.1 .1.finish();
arr
})
.collect::<Vec<_>>();

res
}

pub fn featureset_to_arrow<const N: usize>(x: FeatureSet<N>) -> Vec<std::sync::Arc<dyn Array>> {
let fields = field_to_schema(x.fields.unwrap());
create_array_vecs(&fields, x.features)
}

// take a field and a builder
// then match on the field to use downcast mut
fn append_value(v: Value, f: &AField, builder: &mut Box<dyn ArrayBuilder>) -> () {
let bb = builder.as_any_mut();
match f.data_type() {
DataType::Null => {
bb.downcast_mut::<NullBuilder>()
.unwrap()
.append_empty_value();
}
DataType::Boolean => {
bb.downcast_mut::<BooleanBuilder>()
.unwrap()
.append_value(v.as_bool().unwrap());
}
DataType::Int8 => {
bb.downcast_mut::<Int8Builder>()
.unwrap()
.append_value(v.as_i64().unwrap() as i8);
}
DataType::Int16 => {
bb.downcast_mut::<Int16Builder>()
.unwrap()
.append_value(v.as_i64().unwrap() as i16);
}
DataType::Int32 => {
bb.downcast_mut::<Int32Builder>()
.unwrap()
.append_value(v.as_i64().unwrap() as i32);
}
DataType::Int64 => {
bb.downcast_mut::<Int64Builder>()
.unwrap()
.append_value(v.as_i64().unwrap());
}
DataType::UInt8 => {
bb.downcast_mut::<UInt8Builder>()
.unwrap()
.append_value(v.as_u64().unwrap() as u8);
}
DataType::UInt16 => {
bb.downcast_mut::<UInt16Builder>()
.unwrap()
.append_value(v.as_u64().unwrap() as u16);
}
DataType::UInt32 => {
bb.downcast_mut::<UInt32Builder>()
.unwrap()
.append_value(v.as_u64().unwrap() as u32);
}
DataType::UInt64 => {
bb.downcast_mut::<UInt64Builder>()
.unwrap()
.append_value(v.as_u64().unwrap());
}
DataType::Float16 => {
// bb.downcast_mut::<Float16Builder>()
// .unwrap()
// .append_value(v.as_f64().unwrap() as f16);
// There is no 16 bit float in rust
todo!()
}
DataType::Float32 => {
bb.downcast_mut::<Float32Builder>()
.unwrap()
.append_value(v.as_f64().unwrap() as f32);
}
DataType::Float64 => {
bb.downcast_mut::<Float64Builder>()
.unwrap()
.append_value(v.as_f64().unwrap());
}
DataType::Timestamp(_, _) => todo!(),
DataType::Date32 => todo!(),
DataType::Date64 => todo!(),
DataType::Time32(_) => todo!(),
DataType::Time64(_) => todo!(),
DataType::Duration(_) => todo!(),
DataType::Interval(_) => todo!(),
DataType::Binary => todo!(),
DataType::FixedSizeBinary(_) => todo!(),
DataType::LargeBinary => todo!(),
DataType::Utf8 => {
bb.downcast_mut::<StringBuilder>()
.unwrap()
.append_value(v.as_str().unwrap());
}
DataType::LargeUtf8 => {
bb.downcast_mut::<StringBuilder>()
.unwrap()
.append_value(v.as_str().unwrap());
}
DataType::List(_) => todo!(),
DataType::FixedSizeList(_, _) => todo!(),
DataType::LargeList(_) => todo!(),
DataType::Struct(_) => todo!(),
DataType::Union(_, _) => todo!(),
DataType::Dictionary(_, _) => todo!(),
DataType::Decimal128(_, _) => todo!(),
DataType::Decimal256(_, _) => todo!(),
DataType::Map(_, _) => todo!(),
DataType::RunEndEncoded(_, _) => todo!(),
}
}
15 changes: 7 additions & 8 deletions src/features.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
//! `Feature` and `FeatureSet` objects
//!
//! Geometry objects are often accompanied by attributes that describe the point in space.
//!
//! Geometry objects are often accompanied by attributes that describe the point in space.
//! The Esri [`Feature`](https://developers.arcgis.com/documentation/common-data-types/feature-object.htm)
//! object enables us to represent geometries and attributes alongside each other.
//!
//! object enables us to represent geometries and attributes alongside each other.
//!
//! The Esri [`FeatureSet`](https://developers.arcgis.com/documentation/common-data-types/featureset-object.htm)
//! object represents a collection of individual features. This is the most common representation that is encountered
//! when working with a Feature Service via its rest API.
//! when working with a Feature Service via its rest API.
use crate::{field_type::FieldType, geometry::EsriGeometry, spatial_reference::SpatialReference};
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
Expand All @@ -15,12 +15,11 @@ use serde_with::{serde_as, DisplayFromStr};
// handy reference
// https://github.com/Esri/arcgis-rest-js/blob/0e410dc16e0dd2961affb09ff7efbfb9b6c4999a/packages/arcgis-rest-request/src/types/feature.ts#L24


/// A single geometry and its attributes
///
///
/// Note that both geometry and attributes are optional. This is because
/// we can anticipate receiving _only_ geometries, or _only_ attributes
/// or both together.
/// or both together.
#[derive(Debug, Deserialize, Serialize)]
pub struct Feature<const N: usize> {
pub geometry: Option<EsriGeometry<N>>,
Expand Down
1 change: 0 additions & 1 deletion src/field_type.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
//! Enumeration of valid esri field types
use serde::{Deserialize, Serialize};


#[derive(Clone, Debug, Deserialize, Serialize)]
pub enum FieldType {
EsriFieldTypeSmallInteger = 0,
Expand Down
9 changes: 4 additions & 5 deletions src/geo_types.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
//! Optional feature for converting Esri JSON objects into `geo-types` geometries.
//!
//! This feature enables conversion between `EsriGeometry` objects and geo_types obejcts.
//! Since geo_types presently only supports 2 dimensional coordinatees,
//! This feature enables conversion between `EsriGeometry` objects and geo_types obejcts.
//! Since geo_types presently only supports 2 dimensional coordinatees,
//! Z and M dimensions are dropped if present.
//!
//!
//! Provides conversions for:
//!
//!
//! - `EsriCoord` -> `Coord`
//! - `EsriPoint` -> `Point`
//! - `EsriMultiPoint` -> `MultiPoint`
//! - `EsriPolyline` -> `MultiLineString`
//! - `EsriPolygon` -> `Polygon`
//!
use crate::geometry::*;
use geo_types::{Coord, LineString, MultiLineString, MultiPoint, Point, Polygon};

Expand Down
28 changes: 16 additions & 12 deletions src/geometry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,13 @@ use serde_with::skip_serializing_none;
/// It requires a valid `N` of values per coordinate. Should always be one of
/// `2`, `3`, or `4` in the case of XY, XYZ, or XYZM coordinates.
#[derive(Clone, Deserialize, Serialize, Debug)]
// From discord: https://discord.com/channels/273534239310479360/273541522815713281/1175836081091006484
// Motivation: converting from this to Arrow
// "you could go from a Box<[f64; 4]> to a Vec<f64> without copying or reallocating,
// because in that case the allocation already exists"
pub struct EsriCoord<const N: usize>(#[serde(with = "arrays")] pub [f64; N]);

/// An `esriGeometryPoint` with fields x, y, z, and m. x and y are both required.
/// An `esriGeometryPoint` with fields x, y, z, and m. x and y are both required.
#[skip_serializing_none]
#[allow(non_snake_case)]
#[derive(Clone, Deserialize, Serialize, Debug)]
Expand All @@ -27,9 +31,9 @@ pub struct EsriPoint {
pub spatialReference: Option<SpatialReference>,
}

/// An `esriGeometryMultipoint` defined by a vector of `EsriCoord`s.
///
/// `<N>` parameter should be equal to `2 + hasZ + hasM`. There are no
/// An `esriGeometryMultipoint` defined by a vector of `EsriCoord`s.
///
/// `<N>` parameter should be equal to `2 + hasZ + hasM`. There are no
/// checks on the const value. If an incorrect value is provided, expect
/// a `panic!`.
#[skip_serializing_none]
Expand All @@ -43,10 +47,10 @@ pub struct EsriMultiPoint<const N: usize> {
}

/// An `esriGeometryPolyline` defined by a vector of `Vec<EsriCoord<N>>`.
///
/// Each inner vector should be a single linestring.
///
/// `<N>` parameter should be equal to `2 + hasZ + hasM`. There are no
///
/// Each inner vector should be a single linestring.
///
/// `<N>` parameter should be equal to `2 + hasZ + hasM`. There are no
/// checks on the const value. If an incorrect value is provided, expect
/// a `panic!`.
#[skip_serializing_none]
Expand All @@ -60,12 +64,12 @@ pub struct EsriPolyline<const N: usize> {
}

/// An `esriGeometryPolygon` defined by a `Vec<Vec<EsriCoord<N>>>`
///
///
/// Each inner vector should be a single linear ring. The first `Vec<EsriCoord<N>>`
/// represents the exterior ring. Subsequent ones are interior rings. No checking
/// of widing occurs.
///
/// `<N>` parameter should be equal to `2 + hasZ + hasM`. There are no
/// of widing occurs.
///
/// `<N>` parameter should be equal to `2 + hasZ + hasM`. There are no
/// checks on the const value. If an incorrect value is provided, expect
/// a `panic!`.
#[skip_serializing_none]
Expand Down
22 changes: 12 additions & 10 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
//!
//!
//! Example usage:
//!
//!
//!
//! Example usage:
//!
//! ```rust
//! use serde_esri::features::FeatureSet;
//! use reqwest::Error;
//! use std::io::Read;
//!
//!
//! fn main() {
//!
//!
//! // USA counties query
//! let flayer_url = "https://services.arcgis.com/P3ePLMYs2RVChkJx/ArcGIS/rest/services/USA_Counties_Generalized_Boundaries/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=false&resultRecordCount=1&f=json";
//!
//!
//! // perform request
//! let mut res = reqwest::blocking::get(flayer_url).unwrap();
//! let mut body = String::new();
//!
//! // read body into string
//!
//! // read body into string
//! res.read_to_string(&mut body).unwrap();
//!
//! // process into FeatureSet
//! let fset: FeatureSet<2> = serde_json::from_str(&body).unwrap();
//! let fset: FeatureSet<2> = serde_json::from_str(&body).unwrap();
//! println!("{:#?}", fset);
//! }
Expand All @@ -34,3 +34,5 @@ pub mod spatial_reference;
// feature flag: geo-types
#[cfg(feature = "geo")]
pub mod geo_types;

pub mod arrow_compat;
2 changes: 1 addition & 1 deletion src/spatial_reference.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Represents a spatial reference
use serde::{Deserialize, Serialize};
use serde_with::skip_serializing_none;

/// Read more on [Esri docs site](https://developers.arcgis.com/documentation/common-data-types/geometry-objects.htm#GUID-DFF0E738-5A42-40BC-A811-ACCB5814BABC)
#[skip_serializing_none]
#[derive(Serialize, Deserialize, Debug, Clone)]
Expand Down

0 comments on commit b3cee7c

Please sign in to comment.