From 4d3e4b69bbfa8759c8f9e1440a6b72027d6a7768 Mon Sep 17 00:00:00 2001 From: Bruce Martin Date: Fri, 10 Jan 2025 08:00:19 -0800 Subject: [PATCH] [C++] Fix `ManagedQuery` segv: ctor/dtor order fix (#3544) * ctor/dtor order fix * lint * add comments * lint * add test case * add commnet --- apis/python/tests/test_sparse_nd_array.py | 28 +++++++++++++++++++++++ libtiledbsoma/src/soma/managed_query.cc | 8 +++---- libtiledbsoma/src/soma/managed_query.h | 12 ++++++---- libtiledbsoma/src/soma/soma_array.h | 4 ++++ 4 files changed, 44 insertions(+), 8 deletions(-) diff --git a/apis/python/tests/test_sparse_nd_array.py b/apis/python/tests/test_sparse_nd_array.py index 406f3e5db6..95bd91ba57 100644 --- a/apis/python/tests/test_sparse_nd_array.py +++ b/apis/python/tests/test_sparse_nd_array.py @@ -2,6 +2,7 @@ import contextlib import datetime +import gc import itertools import json import operator @@ -1979,3 +1980,30 @@ def test_iter(tmp_path: pathlib.Path): next(a) with pytest.raises(StopIteration): next(b) + + +def test_context_cleanup(tmp_path: pathlib.Path) -> None: + arrow_tensor = create_random_tensor("table", (1,), np.float32(), density=1) + with soma.SparseNDArray.create( + tmp_path.as_uri(), type=pa.float64(), shape=(1,) + ) as write_arr: + write_arr.write(arrow_tensor) + + def test(path, tiledb_config): + context = soma.SOMATileDBContext().replace(tiledb_config=tiledb_config) + X = soma.SparseNDArray.open(path, context=context, mode="r") + mq = X.read().tables() + return mq + + for _ in range(100): + # Run test multiple times. While the C++ this tests (dtor order) + # is deterministic, it is triggered by the Python GC, which makes + # no specific guarantees about when it will sweep any given object. + _ = test( + tmp_path.as_uri(), + { + "vfs.s3.no_sign_request": "true", + "vfs.s3.region": "us-west-2", + }, + ) + gc.collect() diff --git a/libtiledbsoma/src/soma/managed_query.cc b/libtiledbsoma/src/soma/managed_query.cc index c2c38147dd..a586683b30 100644 --- a/libtiledbsoma/src/soma/managed_query.cc +++ b/libtiledbsoma/src/soma/managed_query.cc @@ -51,8 +51,8 @@ ManagedQuery::ManagedQuery( std::shared_ptr array, std::shared_ptr ctx, std::string_view name) - : array_(array) - , ctx_(ctx) + : ctx_(ctx) + , array_(array) , name_(name) , schema_(std::make_shared(array->schema())) { reset(); @@ -62,8 +62,8 @@ ManagedQuery::ManagedQuery( std::unique_ptr array, std::shared_ptr ctx, std::string_view name) - : array_(array->arr_) - , ctx_(ctx) + : ctx_(ctx) + , array_(array->arr_) , name_(name) , schema_(std::make_shared(array->arr_->schema())) { reset(); diff --git a/libtiledbsoma/src/soma/managed_query.h b/libtiledbsoma/src/soma/managed_query.h index 52d2d53bc0..608708a67d 100644 --- a/libtiledbsoma/src/soma/managed_query.h +++ b/libtiledbsoma/src/soma/managed_query.h @@ -98,8 +98,8 @@ class ManagedQuery { ManagedQuery(const ManagedQuery&) = delete; ManagedQuery(ManagedQuery&& other) - : array_(other.array_) - , ctx_(other.ctx_) + : ctx_(other.ctx_) + , array_(other.array_) , name_(other.name_) , schema_(other.schema_) , query_(std::make_unique(*other.ctx_, *other.array_)) @@ -499,12 +499,16 @@ class ManagedQuery { const CurrentDomain& current_domain, bool is_read); void _fill_in_subarrays_if_dense_without_new_shape(bool is_read); - // TileDB array being queried. - std::shared_ptr array_; + // NB: the Array dtor REQUIRES that this context be alive, so member + // declaration order is significant. Context (ctx_) MUST be declared + // BEFORE Array (array_) so that ctx_ will be destructed last. // TileDB context object std::shared_ptr ctx_; + // TileDB array being queried. + std::shared_ptr array_; + // Name displayed in log messages std::string name_; diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index d143f594db..6b290564ea 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -1544,6 +1544,10 @@ class SOMAArray : public SOMAObject { // SOMAArray name for debugging std::string name_; + // NB: the Array dtor REQUIRES that this context be alive, so member + // declaration order is significant. Context (ctx_) MUST be declared + // BEFORE Array (arr_) so that ctx_ will be destructed last. + // SOMA context std::shared_ptr ctx_;