Skip to content

Commit

Permalink
Simplify mixed slicing (#917)
Browse files Browse the repository at this point in the history
It turns out we actually didn't need a slice_offset
  • Loading branch information
kylebarron authored Dec 7, 2024
1 parent b661c17 commit e3e5328
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 42 deletions.
46 changes: 25 additions & 21 deletions rust/geoarrow/src/array/geometry/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,25 +82,6 @@ pub struct GeometryArray {
pub(crate) mline_string_xyz: MultiLineStringArray,
pub(crate) mpolygon_xyz: MultiPolygonArray,
pub(crate) gc_xyz: GeometryCollectionArray,

/// An offset used for slicing into this array. The offset will be 0 if the array has not been
/// sliced.
///
/// In order to slice this array efficiently (and zero-cost) we can't slice the underlying
/// fields directly. If this were always a _sparse_ union array, we could! We could then always
/// slice from offset to length of each underlying array. But we're under the assumption that
/// most or all of the time we have a dense union array, where the `offsets` buffer is defined.
/// In that case, to know how to slice each underlying array, we'd have to walk the `type_ids`
/// and `offsets` arrays (in O(N) time) to figure out how to slice the underlying arrays.
///
/// Instead, we store the slice offset.
///
/// Note that this offset is only for slicing into the **fields**, i.e. the geometry arrays.
/// The `type_ids` and `offsets` arrays are sliced as usual.
///
/// TODO: when exporting this array, export to arrow2 and then slice from scratch because we
/// can't set the `offset` in a UnionArray constructor
pub(crate) slice_offset: usize,
}

impl GeometryArray {
Expand Down Expand Up @@ -174,7 +155,6 @@ impl GeometryArray {
mline_string_xyz,
mpolygon_xyz,
gc_xyz,
slice_offset: 0,
metadata,
}
}
Expand Down Expand Up @@ -437,7 +417,6 @@ impl GeometryArray {
mpolygon_xyz: self.mpolygon_xyz.clone(),
gc_xyz: self.gc_xyz.clone(),

slice_offset: self.slice_offset + offset,
metadata: self.metadata.clone(),
}
}
Expand Down Expand Up @@ -1211,4 +1190,29 @@ mod test {
assert_eq!(round_trip_arr.value_as_geo(0), geoms[0]);
assert_eq!(round_trip_arr.value_as_geo(1), geoms[1]);
}

#[test]
fn test_slicing() {
let geoms: Vec<geo::Geometry> = vec![
geo::Geometry::Point(point::p0()),
geo::Geometry::LineString(linestring::ls0()),
geo::Geometry::Polygon(polygon::p0()),
geo::Geometry::MultiPoint(multipoint::mp0()),
geo::Geometry::MultiLineString(multilinestring::ml0()),
geo::Geometry::MultiPolygon(multipolygon::mp0()),
];

let arr: GeometryArray = GeometryBuilder::from_geometries(
geoms.as_slice(),
Default::default(),
Default::default(),
false,
)
.unwrap()
.finish();

assert_eq!(arr.slice(1, 2).value_as_geo(0), geoms[1]);
assert_eq!(arr.slice(1, 2).value_as_geo(1), geoms[2]);
assert_eq!(arr.slice(3, 3).value_as_geo(2), geoms[5]);
}
}
21 changes: 0 additions & 21 deletions rust/geoarrow/src/array/mixed/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,25 +73,6 @@ pub struct MixedGeometryArray {
pub(crate) multi_points: MultiPointArray,
pub(crate) multi_line_strings: MultiLineStringArray,
pub(crate) multi_polygons: MultiPolygonArray,

/// An offset used for slicing into this array. The offset will be 0 if the array has not been
/// sliced.
///
/// In order to slice this array efficiently (and zero-cost) we can't slice the underlying
/// fields directly. If this were always a _sparse_ union array, we could! We could then always
/// slice from offset to length of each underlying array. But we're under the assumption that
/// most or all of the time we have a dense union array, where the `offsets` buffer is defined.
/// In that case, to know how to slice each underlying array, we'd have to walk the `type_ids`
/// and `offsets` arrays (in O(N) time) to figure out how to slice the underlying arrays.
///
/// Instead, we store the slice offset.
///
/// Note that this offset is only for slicing into the **fields**, i.e. the geometry arrays.
/// The `type_ids` and `offsets` arrays are sliced as usual.
///
/// TODO: when exporting this array, export to arrow2 and then slice from scratch because we
/// can't set the `offset` in a UnionArray constructor
pub(crate) slice_offset: usize,
}

impl MixedGeometryArray {
Expand Down Expand Up @@ -151,7 +132,6 @@ impl MixedGeometryArray {
multi_points,
multi_line_strings,
multi_polygons,
slice_offset: 0,
metadata,
}
}
Expand Down Expand Up @@ -276,7 +256,6 @@ impl MixedGeometryArray {
multi_points: self.multi_points.clone(),
multi_line_strings: self.multi_line_strings.clone(),
multi_polygons: self.multi_polygons.clone(),
slice_offset: self.slice_offset + offset,
metadata: self.metadata.clone(),
}
}
Expand Down

0 comments on commit e3e5328

Please sign in to comment.