Skip to content

Commit

Permalink
Enable setting coord type when creating mutable arrays (#277)
Browse files Browse the repository at this point in the history
* Public from methods on mutable arrays

* optional coord type

* Adds `new_with_capacity()` for all other geometry types  (#279)

* add multable multilinepoint array with options

* new_with_options() for MultiLineString

* new_with_options() for Polygon

* new_with_options() for MultiPolygon

* run cargo fmt

* address PR comments

* cargo fmt

* wip public first_pass and second_pass api

* Flesh out public apis

---------

Co-authored-by: Josiah Parry <[email protected]>
  • Loading branch information
kylebarron and JosiahParry authored Nov 28, 2023
1 parent 8767908 commit 774b212
Show file tree
Hide file tree
Showing 8 changed files with 627 additions and 348 deletions.
3 changes: 2 additions & 1 deletion src/array/coord/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ pub use combined::{CoordBuffer, MutableCoordBuffer};
pub use interleaved::{InterleavedCoordBuffer, MutableInterleavedCoordBuffer};
pub use separated::{MutableSeparatedCoordBuffer, SeparatedCoordBuffer};

#[derive(Debug, Clone, PartialEq)]
#[derive(Default, Debug, Clone, PartialEq)]
pub enum CoordType {
#[default]
Interleaved,
Separated,
}
144 changes: 100 additions & 44 deletions src/array/linestring/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// use super::array::check;
use crate::array::mutable_offset::OffsetsBuilder;
use crate::array::{
LineStringArray, MutableCoordBuffer, MutableInterleavedCoordBuffer, MutableMultiPointArray,
WKBArray,
CoordType, LineStringArray, MutableCoordBuffer, MutableInterleavedCoordBuffer,
MutableMultiPointArray, MutableSeparatedCoordBuffer, WKBArray,
};
use crate::error::{GeoArrowError, Result};
use crate::geo_traits::LineStringTrait;
Expand Down Expand Up @@ -30,19 +30,52 @@ pub struct MutableLineStringArray<O: OffsetSizeTrait> {
impl<O: OffsetSizeTrait> MutableLineStringArray<O> {
/// Creates a new empty [`MutableLineStringArray`].
pub fn new() -> Self {
Self::with_capacities(0, 0)
Self::new_with_options(Default::default())
}

pub fn new_with_options(coord_type: CoordType) -> Self {
Self::with_capacities_and_options(0, 0, coord_type)
}

/// Creates a new [`MutableLineStringArray`] with a capacity.
pub fn with_capacities(coord_capacity: usize, geom_capacity: usize) -> Self {
let coords = MutableInterleavedCoordBuffer::with_capacity(coord_capacity);
Self::with_capacities_and_options(coord_capacity, geom_capacity, Default::default())
}

pub fn with_capacities_and_options(
coord_capacity: usize,
geom_capacity: usize,
coord_type: CoordType,
) -> Self {
let coords = match coord_type {
CoordType::Interleaved => MutableCoordBuffer::Interleaved(
MutableInterleavedCoordBuffer::with_capacity(coord_capacity),
),
CoordType::Separated => MutableCoordBuffer::Separated(
MutableSeparatedCoordBuffer::with_capacity(coord_capacity),
),
};
Self {
coords: MutableCoordBuffer::Interleaved(coords),
coords,
geom_offsets: OffsetsBuilder::with_capacity(geom_capacity),
validity: NullBufferBuilder::new(geom_capacity),
}
}

pub fn with_capacities_from_iter<'a>(
geoms: impl Iterator<Item = Option<&'a (impl LineStringTrait + 'a)>>,
) -> Self {
Self::with_capacities_and_options_from_iter(geoms, Default::default())
}

pub fn with_capacities_and_options_from_iter<'a>(
geoms: impl Iterator<Item = Option<&'a (impl LineStringTrait + 'a)>>,
coord_type: CoordType,
) -> Self {
let (coord_capacity, geom_capacity) = count_from_iter(geoms);
Self::with_capacities_and_options(coord_capacity, geom_capacity, coord_type)
}

/// Reserves capacity for at least `additional` more LineStrings to be inserted
/// in the given `Vec<T>`. The collection may reserve more space to
/// speculatively avoid frequent reallocations. After calling `reserve`,
Expand All @@ -53,6 +86,22 @@ impl<O: OffsetSizeTrait> MutableLineStringArray<O> {
self.geom_offsets.reserve(geom_additional);
}

pub fn reserve_from_iter<'a>(
&mut self,
geoms: impl Iterator<Item = Option<&'a (impl LineStringTrait + 'a)>>,
) {
let (coord_capacity, geom_capacity) = count_from_iter(geoms);
self.reserve(coord_capacity, geom_capacity)
}

pub fn reserve_exact_from_iter<'a>(
&mut self,
geoms: impl Iterator<Item = Option<&'a (impl LineStringTrait + 'a)>>,
) {
let (coord_capacity, geom_capacity) = count_from_iter(geoms);
self.reserve_exact(coord_capacity, geom_capacity)
}

/// Reserves the minimum capacity for at least `additional` more LineStrings to
/// be inserted in the given `Vec<T>`. Unlike [`reserve`], this will not
/// deliberately over-allocate to speculatively avoid frequent allocations.
Expand All @@ -67,6 +116,7 @@ impl<O: OffsetSizeTrait> MutableLineStringArray<O> {
/// [`reserve`]: Vec::reserve
pub fn reserve_exact(&mut self, coord_additional: usize, geom_additional: usize) {
self.coords.reserve_exact(coord_additional);
// TODO: implement reserve_exact on OffsetsBuilder
self.geom_offsets.reserve(geom_additional);
}

Expand Down Expand Up @@ -126,6 +176,16 @@ impl<O: OffsetSizeTrait> MutableLineStringArray<O> {
Ok(())
}

pub fn extend_from_iter<'a>(
&mut self,
geoms: impl Iterator<Item = Option<&'a (impl LineStringTrait<T = f64> + 'a)>>,
) {
geoms
.into_iter()
.try_for_each(|maybe_multi_point| self.push_line_string(maybe_multi_point))
.unwrap();
}

/// Push a raw coordinate to the underlying coordinate array.
///
/// # Safety
Expand Down Expand Up @@ -155,6 +215,30 @@ impl<O: OffsetSizeTrait> MutableLineStringArray<O> {
pub fn into_array_ref(self) -> Arc<dyn Array> {
Arc::new(self.into_arrow())
}

pub fn from_line_strings(
geoms: &[impl LineStringTrait<T = f64>],
coord_type: Option<CoordType>,
) -> Self {
let mut array = Self::with_capacities_and_options_from_iter(
geoms.iter().map(Some),
coord_type.unwrap_or_default(),
);
array.extend_from_iter(geoms.iter().map(Some));
array
}

pub fn from_nullable_line_strings(
geoms: &[Option<impl LineStringTrait<T = f64>>],
coord_type: Option<CoordType>,
) -> Self {
let mut array = Self::with_capacities_and_options_from_iter(
geoms.iter().map(|x| x.as_ref()),
coord_type.unwrap_or_default(),
);
array.extend_from_iter(geoms.iter().map(|x| x.as_ref()));
array
}
}

impl<O: OffsetSizeTrait> IntoArrow for MutableLineStringArray<O> {
Expand Down Expand Up @@ -185,77 +269,49 @@ impl<O: OffsetSizeTrait> From<MutableLineStringArray<O>> for GenericListArray<O>
}
}

pub(crate) fn first_pass<'a>(
pub(crate) fn count_from_iter<'a>(
geoms: impl Iterator<Item = Option<&'a (impl LineStringTrait + 'a)>>,
geoms_length: usize,
) -> (usize, usize) {
let mut coord_capacity = 0;
let geom_capacity = geoms_length;
let mut geom_capacity = 0;

for line_string in geoms.into_iter().flatten() {
coord_capacity += line_string.num_coords();
for maybe_line_string in geoms.into_iter() {
geom_capacity += 1;
if let Some(line_string) = maybe_line_string {
coord_capacity += line_string.num_coords();
}
}

(coord_capacity, geom_capacity)
}

pub(crate) fn second_pass<'a, O: OffsetSizeTrait>(
geoms: impl Iterator<Item = Option<&'a (impl LineStringTrait<T = f64> + 'a)>>,
coord_capacity: usize,
geom_capacity: usize,
) -> MutableLineStringArray<O> {
let mut array = MutableLineStringArray::with_capacities(coord_capacity, geom_capacity);

geoms
.into_iter()
.try_for_each(|maybe_multi_point| array.push_line_string(maybe_multi_point))
.unwrap();

array
}

impl<O: OffsetSizeTrait, G: LineStringTrait<T = f64>> From<Vec<G>> for MutableLineStringArray<O> {
fn from(geoms: Vec<G>) -> Self {
let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(Some), geoms.len());
second_pass(geoms.iter().map(Some), coord_capacity, geom_capacity)
Self::from_line_strings(&geoms, Default::default())
}
}

impl<O: OffsetSizeTrait, G: LineStringTrait<T = f64>> From<Vec<Option<G>>>
for MutableLineStringArray<O>
{
fn from(geoms: Vec<Option<G>>) -> Self {
let geoms_len = geoms.len();
let (coord_capacity, geom_capacity) =
first_pass(geoms.iter().map(|x| x.as_ref()), geoms_len);
second_pass(
geoms.iter().map(|x| x.as_ref()),
coord_capacity,
geom_capacity,
)
Self::from_nullable_line_strings(&geoms, Default::default())
}
}

impl<O: OffsetSizeTrait, G: LineStringTrait<T = f64>> From<bumpalo::collections::Vec<'_, G>>
for MutableLineStringArray<O>
{
fn from(geoms: bumpalo::collections::Vec<'_, G>) -> Self {
let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(Some), geoms.len());
second_pass(geoms.iter().map(Some), coord_capacity, geom_capacity)
Self::from_line_strings(&geoms, Default::default())
}
}

impl<O: OffsetSizeTrait, G: LineStringTrait<T = f64>> From<bumpalo::collections::Vec<'_, Option<G>>>
for MutableLineStringArray<O>
{
fn from(geoms: bumpalo::collections::Vec<'_, Option<G>>) -> Self {
let (coord_capacity, geom_capacity) =
first_pass(geoms.iter().map(|x| x.as_ref()), geoms.len());
second_pass(
geoms.iter().map(|x| x.as_ref()),
coord_capacity,
geom_capacity,
)
Self::from_nullable_line_strings(&geoms, Default::default())
}
}

Expand Down
Loading

0 comments on commit 774b212

Please sign in to comment.