From b709ea0195d05f54eb6810ddbe53c5e72cde5f5f Mon Sep 17 00:00:00 2001 From: Royi Luo Date: Mon, 6 Jan 2025 17:32:14 -0500 Subject: [PATCH 1/2] Add skip/limit config for pandas scan --- tools/python_api/CMakeLists.txt | 1 + .../src_cpp/include/pandas/pandas_scan.h | 21 +++--- .../src_cpp/include/py_scan_config.h | 15 +++++ .../src_cpp/include/pyarrow/pyarrow_scan.h | 7 -- .../python_api/src_cpp/pandas/pandas_scan.cpp | 18 +++-- tools/python_api/src_cpp/py_scan_config.cpp | 38 +++++++++++ .../src_cpp/pyarrow/pyarrow_scan.cpp | 32 +-------- tools/python_api/test/test_scan_pandas.py | 65 ++++++++++++++++++- 8 files changed, 138 insertions(+), 59 deletions(-) create mode 100644 tools/python_api/src_cpp/include/py_scan_config.h create mode 100644 tools/python_api/src_cpp/py_scan_config.cpp diff --git a/tools/python_api/CMakeLists.txt b/tools/python_api/CMakeLists.txt index ed4145ffd61..7b80706ba22 100644 --- a/tools/python_api/CMakeLists.txt +++ b/tools/python_api/CMakeLists.txt @@ -16,6 +16,7 @@ pybind11_add_module(_kuzu src_cpp/py_prepared_statement.cpp src_cpp/py_query_result.cpp src_cpp/py_query_result_converter.cpp + src_cpp/py_scan_config.cpp src_cpp/py_udf.cpp src_cpp/py_conversion.cpp src_cpp/pyarrow/pyarrow_bind.cpp diff --git a/tools/python_api/src_cpp/include/pandas/pandas_scan.h b/tools/python_api/src_cpp/include/pandas/pandas_scan.h index 5e3ddb684a6..a6bf5bc1061 100644 --- a/tools/python_api/src_cpp/include/pandas/pandas_scan.h +++ b/tools/python_api/src_cpp/include/pandas/pandas_scan.h @@ -4,6 +4,7 @@ #include "function/table/scan_functions.h" #include "function/table_functions.h" #include "pandas_bind.h" +#include "py_scan_config.h" namespace kuzu { @@ -15,9 +16,10 @@ struct PandasScanLocalState final : public function::TableFuncLocalState { }; struct PandasScanSharedState final : public function::BaseScanSharedStateWithNumRows { - explicit PandasScanSharedState(uint64_t numRows) - : BaseScanSharedStateWithNumRows{numRows}, numRowsRead{0} {} + PandasScanSharedState(uint64_t startRow, uint64_t numRows) + : BaseScanSharedStateWithNumRows{numRows}, startRow(startRow), numRowsRead{0} {} + uint64_t startRow; uint64_t numRowsRead; }; @@ -31,23 +33,19 @@ struct PandasScanFunction { struct PandasScanFunctionData : public function::TableFuncBindData { py::handle df; std::vector> columnBindData; - common::FileScanInfo fileScanInfo; + PyScanConfig scanConfig; PandasScanFunctionData(binder::expression_vector columns, py::handle df, uint64_t numRows, - std::vector> columnBindData, - common::FileScanInfo fileScanInfo) + std::vector> columnBindData, PyScanConfig scanConfig) : TableFuncBindData{std::move(columns), 0 /* numWarningDataColumns */, numRows}, df{df}, - columnBindData{std::move(columnBindData)}, fileScanInfo(std::move(fileScanInfo)) {} + columnBindData{std::move(columnBindData)}, scanConfig(scanConfig) {} ~PandasScanFunctionData() override { py::gil_scoped_acquire acquire; columnBindData.clear(); } - bool getIgnoreErrorsOption() const override { - return fileScanInfo.getOption(common::CopyConstants::IGNORE_ERRORS_OPTION_NAME, - common::CopyConstants::DEFAULT_IGNORE_ERRORS); - } + bool getIgnoreErrorsOption() const override { return scanConfig.ignoreErrors; } std::vector> copyColumnBindData() const; @@ -57,11 +55,10 @@ struct PandasScanFunctionData : public function::TableFuncBindData { private: PandasScanFunctionData(const PandasScanFunctionData& other) - : TableFuncBindData{other}, df{other.df} { + : TableFuncBindData{other}, df{other.df}, scanConfig(other.scanConfig) { for (const auto& i : other.columnBindData) { columnBindData.push_back(i->copy()); } - fileScanInfo = other.fileScanInfo.copy(); } }; diff --git a/tools/python_api/src_cpp/include/py_scan_config.h b/tools/python_api/src_cpp/include/py_scan_config.h new file mode 100644 index 00000000000..0496f3229e2 --- /dev/null +++ b/tools/python_api/src_cpp/include/py_scan_config.h @@ -0,0 +1,15 @@ +#pragma once + +#include "common/case_insensitive_map.h" +#include "common/types/value/value.h" + +namespace kuzu { + +struct PyScanConfig { + uint64_t skipNum; + uint64_t limitNum; + bool ignoreErrors; + explicit PyScanConfig(const common::case_insensitive_map_t& options); +}; + +} // namespace kuzu diff --git a/tools/python_api/src_cpp/include/pyarrow/pyarrow_scan.h b/tools/python_api/src_cpp/include/pyarrow/pyarrow_scan.h index 111be4251ee..f8e2fa1d7e0 100644 --- a/tools/python_api/src_cpp/include/pyarrow/pyarrow_scan.h +++ b/tools/python_api/src_cpp/include/pyarrow/pyarrow_scan.h @@ -11,13 +11,6 @@ namespace kuzu { -struct PyArrowScanConfig { - uint64_t skipNum; - uint64_t limitNum; - bool ignoreErrors; - explicit PyArrowScanConfig(const common::case_insensitive_map_t& options); -}; - struct PyArrowTableScanLocalState final : public function::TableFuncLocalState { ArrowArrayWrapper* arrowArray; diff --git a/tools/python_api/src_cpp/pandas/pandas_scan.cpp b/tools/python_api/src_cpp/pandas/pandas_scan.cpp index 59d9577fffd..21a9ed20f0f 100644 --- a/tools/python_api/src_cpp/pandas/pandas_scan.cpp +++ b/tools/python_api/src_cpp/pandas/pandas_scan.cpp @@ -7,6 +7,7 @@ #include "numpy/numpy_scan.h" #include "processor/execution_context.h" #include "py_connection.h" +#include "py_scan_config.h" #include "pyarrow/pyarrow_scan.h" #include "pybind11/pytypes.h" @@ -32,10 +33,12 @@ std::unique_ptr bindFunc(ClientContext* /*context*/, auto getFunc = df.attr("__getitem__"); auto numRows = py::len(getFunc(columns[0])); auto returnColumns = input->binder->createVariables(names, returnTypes); - auto scanConfig = - input->extraInput->constPtrCast()->fileScanInfo.copy(); - return std::make_unique(std::move(returnColumns), df, numRows, - std::move(columnBindData), std::move(scanConfig)); + auto scanConfig = PyScanConfig{ + input->extraInput->constPtrCast()->fileScanInfo.options}; + KU_ASSERT(numRows >= scanConfig.skipNum); + return std::make_unique(std::move(returnColumns), df, + std::min(numRows - scanConfig.skipNum, scanConfig.limitNum), std::move(columnBindData), + scanConfig); } bool sharedStateNext(const TableFuncBindData* /*bindData*/, PandasScanLocalState* localState, @@ -45,11 +48,11 @@ bool sharedStateNext(const TableFuncBindData* /*bindData*/, PandasScanLocalState if (pandasSharedState->numRowsRead >= pandasSharedState->numRows) { return false; } - localState->start = pandasSharedState->numRowsRead; + localState->start = pandasSharedState->startRow + pandasSharedState->numRowsRead; pandasSharedState->numRowsRead += std::min(pandasSharedState->numRows - pandasSharedState->numRowsRead, CopyConstants::PANDAS_PARTITION_COUNT); - localState->end = pandasSharedState->numRowsRead; + localState->end = pandasSharedState->startRow + pandasSharedState->numRowsRead; return true; } @@ -67,7 +70,8 @@ std::unique_ptr initSharedState(const TableFunctionInitInp } // LCOV_EXCL_STOP auto scanBindData = ku_dynamic_cast(input.bindData); - return std::make_unique(scanBindData->cardinality); + return std::make_unique(scanBindData->scanConfig.skipNum, + scanBindData->cardinality); } void pandasBackendScanSwitch(PandasColumnBindData* bindData, uint64_t count, uint64_t offset, diff --git a/tools/python_api/src_cpp/py_scan_config.cpp b/tools/python_api/src_cpp/py_scan_config.cpp new file mode 100644 index 00000000000..9e728fd0ea6 --- /dev/null +++ b/tools/python_api/src_cpp/py_scan_config.cpp @@ -0,0 +1,38 @@ +#include "py_scan_config.h" + +#include "common/constants.h" +#include "common/exception/binder.h" +#include "function/cast/functions/numeric_limits.h" + +namespace kuzu { + +PyScanConfig::PyScanConfig(const common::case_insensitive_map_t& options) { + skipNum = 0; + limitNum = function::NumericLimits::maximum(); + ignoreErrors = common::CopyConstants::DEFAULT_IGNORE_ERRORS; + for (const auto& i : options) { + if (i.first == "SKIP") { + if (i.second.getDataType().getLogicalTypeID() != common::LogicalTypeID::INT64 || + i.second.val.int64Val < 0) { + throw common::BinderException("SKIP Option must be a positive integer literal."); + } + skipNum = i.second.val.int64Val; + } else if (i.first == "LIMIT") { + if (i.second.getDataType().getLogicalTypeID() != common::LogicalTypeID::INT64 || + i.second.val.int64Val < 0) { + throw common::BinderException("LIMIT Option must be a positive integer literal."); + } + limitNum = i.second.val.int64Val; + } else if (i.first == common::CopyConstants::IGNORE_ERRORS_OPTION_NAME) { + if (i.second.getDataType().getLogicalTypeID() != common::LogicalTypeID::BOOL) { + throw common::BinderException("IGNORE_ERRORS Option must be a boolean."); + } + ignoreErrors = i.second.val.booleanVal; + } else { + throw common::BinderException( + common::stringFormat("{} Option not recognized by pyArrow scanner.", i.first)); + } + } +} + +} // namespace kuzu diff --git a/tools/python_api/src_cpp/pyarrow/pyarrow_scan.cpp b/tools/python_api/src_cpp/pyarrow/pyarrow_scan.cpp index ec1cbb660af..2e88fc20497 100644 --- a/tools/python_api/src_cpp/pyarrow/pyarrow_scan.cpp +++ b/tools/python_api/src_cpp/pyarrow/pyarrow_scan.cpp @@ -7,6 +7,7 @@ #include "function/table/bind_input.h" #include "processor/execution_context.h" #include "py_connection.h" +#include "py_scan_config.h" #include "pyarrow/pyarrow_bind.h" #include "pybind11/pytypes.h" @@ -16,35 +17,6 @@ using namespace kuzu::catalog; namespace kuzu { -PyArrowScanConfig::PyArrowScanConfig(const case_insensitive_map_t& options) { - skipNum = 0; - limitNum = NumericLimits::maximum(); - ignoreErrors = CopyConstants::DEFAULT_IGNORE_ERRORS; - for (const auto& i : options) { - if (i.first == "SKIP") { - if (i.second.getDataType().getLogicalTypeID() != LogicalTypeID::INT64 || - i.second.val.int64Val < 0) { - throw BinderException("SKIP Option must be a positive integer literal."); - } - skipNum = i.second.val.int64Val; - } else if (i.first == "LIMIT") { - if (i.second.getDataType().getLogicalTypeID() != LogicalTypeID::INT64 || - i.second.val.int64Val < 0) { - throw BinderException("LIMIT Option must be a positive integer literal."); - } - limitNum = i.second.val.int64Val; - } else if (i.first == CopyConstants::IGNORE_ERRORS_OPTION_NAME) { - if (i.second.getDataType().getLogicalTypeID() != LogicalTypeID::BOOL) { - throw BinderException("IGNORE_ERRORS Option must be a boolean."); - } - ignoreErrors = i.second.val.booleanVal; - } else { - throw BinderException( - stringFormat("{} Option not recognized by pyArrow scanner.", i.first)); - } - } -} - template static bool moduleIsLoaded() { auto dict = pybind11::module_::import("sys").attr("modules"); @@ -73,7 +45,7 @@ static std::unique_ptr bindFunc(ClientContext*, } auto numRows = py::len(table); auto schema = Pyarrow::bind(table, returnTypes, names); - auto config = PyArrowScanConfig(scanInput->fileScanInfo.options); + auto config = PyScanConfig(scanInput->fileScanInfo.options); // The following python operations are zero copy as defined in pyarrow docs. if (config.skipNum != 0) { table = table.attr("slice")(config.skipNum); diff --git a/tools/python_api/test/test_scan_pandas.py b/tools/python_api/test/test_scan_pandas.py index f1f7edd76c6..41e1138d73c 100644 --- a/tools/python_api/test/test_scan_pandas.py +++ b/tools/python_api/test/test_scan_pandas.py @@ -140,7 +140,8 @@ def test_scan_pandas(tmp_path: Path) -> None: "INT32": np.array([-100, -200, -300, -400], dtype=np.int32), "INT64": np.array([-1000, -2000, -3000, -4000], dtype=np.int64), "FLOAT_32": np.array( - [-0.5199999809265137, float("nan"), -3.299999952316284, 4.400000095367432], dtype=np.float32 + [-0.5199999809265137, float("nan"), -3.299999952316284, 4.400000095367432], + dtype=np.float32, ), "FLOAT_64": np.array([5132.12321, 24.222, float("nan"), 4.444], dtype=np.float64), "datetime_microseconds": np.array([ @@ -312,8 +313,18 @@ def test_pandas_scan_demo(tmp_path: Path) -> None: "height_in_inch RETURN s" ).get_as_df() assert len(result) == 2 - assert result["s"][0] == {"ID": 0, "_id": {"offset": 0, "table": 0}, "_label": "student", "height": 70} - assert result["s"][1] == {"ID": 4, "_id": {"offset": 2, "table": 0}, "_label": "student", "height": 67} + assert result["s"][0] == { + "ID": 0, + "_id": {"offset": 0, "table": 0}, + "_label": "student", + "height": 70, + } + assert result["s"][1] == { + "ID": 4, + "_id": {"offset": 2, "table": 0}, + "_label": "student", + "height": 67, + } conn.execute("CREATE NODE TABLE person(ID INT64, age UINT16, height UINT32, is_student BOOLean, PRIMARY KEY(ID))") conn.execute("LOAD FROM person CREATE (p:person {ID: id, age: age, height: height, is_student: is_student})") @@ -402,6 +413,54 @@ def test_copy_from_pandas_object(tmp_path: Path) -> None: assert result.has_next() is False +def test_copy_from_pandas_object_skip(tmp_path: Path) -> None: + db = kuzu.Database(tmp_path) + conn = kuzu.Connection(db) + df = pd.DataFrame({"name": ["Adam", "Karissa", "Zhang", "Noura"], "age": [30, 40, 50, 25]}) + conn.execute("CREATE NODE TABLE Person(name STRING, age STRING, PRIMARY KEY (name));") + conn.execute("COPY Person FROM df(SKIP=2);") + result = conn.execute("match (p:Person) return p.*") + assert result.get_next() == ["Zhang", "50"] + assert result.get_next() == ["Noura", "25"] + assert result.has_next() is False + df = pd.DataFrame({"f": ["Adam", "Noura"], "t": ["Zhang", "Zhang"]}) + conn.execute("CREATE REL TABLE Knows(FROM Person TO Person);") + conn.execute("COPY Knows FROM df(SKIP=1)") + result = conn.execute("match (p:Person)-[]->(:Person {name: 'Zhang'}) return p.*") + assert result.get_next() == ["Noura", "25"] + assert result.has_next() is False + + +def test_copy_from_pandas_object_limit(tmp_path: Path) -> None: + db = kuzu.Database(tmp_path) + conn = kuzu.Connection(db) + df = pd.DataFrame({"name": ["Adam", "Karissa", "Zhang", "Noura"], "age": [30, 40, 50, 25]}) + conn.execute("CREATE NODE TABLE Person(name STRING, age STRING, PRIMARY KEY (name));") + conn.execute("COPY Person FROM df(LIMIT=2);") + result = conn.execute("match (p:Person) return p.*") + assert result.get_next() == ["Adam", "30"] + assert result.get_next() == ["Karissa", "40"] + assert result.has_next() is False + df = pd.DataFrame({"f": ["Adam", "Zhang"], "t": ["Karissa", "Karissa"]}) + conn.execute("CREATE REL TABLE Knows(FROM Person TO Person);") + conn.execute("COPY Knows FROM df(LIMIT=1)") + result = conn.execute("match (p:Person)-[]->(:Person {name: 'Karissa'}) return p.*") + assert result.get_next() == ["Adam", "30"] + assert result.has_next() is False + + +def test_copy_from_pandas_object_skip_and_limit(tmp_path: Path) -> None: + db = kuzu.Database(tmp_path) + conn = kuzu.Connection(db) + df = pd.DataFrame({"name": ["Adam", "Karissa", "Zhang", "Noura"], "age": [30, 40, 50, 25]}) + conn.execute("CREATE NODE TABLE Person(name STRING, age STRING, PRIMARY KEY (name));") + conn.execute("COPY Person FROM df(SKIP=1, LIMIT=2);") + result = conn.execute("match (p:Person) return p.*") + assert result.get_next() == ["Karissa", "40"] + assert result.get_next() == ["Zhang", "50"] + assert result.has_next() is False + + def test_copy_from_pandas_date(tmp_path: Path) -> None: db = kuzu.Database(tmp_path) conn = kuzu.Connection(db) From 9741d8898aa3496af0567d6f04ebb183c87e68f7 Mon Sep 17 00:00:00 2001 From: Royi Luo Date: Tue, 7 Jan 2025 09:49:19 -0500 Subject: [PATCH 2/2] Add bounds check for skip option --- .../src_cpp/include/py_scan_config.h | 3 ++- .../python_api/src_cpp/pandas/pandas_scan.cpp | 3 ++- tools/python_api/src_cpp/py_scan_config.cpp | 5 ++-- .../src_cpp/pyarrow/pyarrow_scan.cpp | 2 +- tools/python_api/test/test_scan_pandas.py | 24 +++++++++++++++++++ .../test/test_scan_pandas_pyarrow.py | 12 ++++++++++ 6 files changed, 44 insertions(+), 5 deletions(-) diff --git a/tools/python_api/src_cpp/include/py_scan_config.h b/tools/python_api/src_cpp/include/py_scan_config.h index 0496f3229e2..3c39004fb52 100644 --- a/tools/python_api/src_cpp/include/py_scan_config.h +++ b/tools/python_api/src_cpp/include/py_scan_config.h @@ -9,7 +9,8 @@ struct PyScanConfig { uint64_t skipNum; uint64_t limitNum; bool ignoreErrors; - explicit PyScanConfig(const common::case_insensitive_map_t& options); + explicit PyScanConfig(const common::case_insensitive_map_t& options, + uint64_t numRows); }; } // namespace kuzu diff --git a/tools/python_api/src_cpp/pandas/pandas_scan.cpp b/tools/python_api/src_cpp/pandas/pandas_scan.cpp index 21a9ed20f0f..8d1775bf60a 100644 --- a/tools/python_api/src_cpp/pandas/pandas_scan.cpp +++ b/tools/python_api/src_cpp/pandas/pandas_scan.cpp @@ -34,7 +34,8 @@ std::unique_ptr bindFunc(ClientContext* /*context*/, auto numRows = py::len(getFunc(columns[0])); auto returnColumns = input->binder->createVariables(names, returnTypes); auto scanConfig = PyScanConfig{ - input->extraInput->constPtrCast()->fileScanInfo.options}; + input->extraInput->constPtrCast()->fileScanInfo.options, + numRows}; KU_ASSERT(numRows >= scanConfig.skipNum); return std::make_unique(std::move(returnColumns), df, std::min(numRows - scanConfig.skipNum, scanConfig.limitNum), std::move(columnBindData), diff --git a/tools/python_api/src_cpp/py_scan_config.cpp b/tools/python_api/src_cpp/py_scan_config.cpp index 9e728fd0ea6..3bd8d94f841 100644 --- a/tools/python_api/src_cpp/py_scan_config.cpp +++ b/tools/python_api/src_cpp/py_scan_config.cpp @@ -6,7 +6,8 @@ namespace kuzu { -PyScanConfig::PyScanConfig(const common::case_insensitive_map_t& options) { +PyScanConfig::PyScanConfig(const common::case_insensitive_map_t& options, + uint64_t numRows) { skipNum = 0; limitNum = function::NumericLimits::maximum(); ignoreErrors = common::CopyConstants::DEFAULT_IGNORE_ERRORS; @@ -16,7 +17,7 @@ PyScanConfig::PyScanConfig(const common::case_insensitive_map_t& i.second.val.int64Val < 0) { throw common::BinderException("SKIP Option must be a positive integer literal."); } - skipNum = i.second.val.int64Val; + skipNum = std::min(numRows, static_cast(i.second.val.int64Val)); } else if (i.first == "LIMIT") { if (i.second.getDataType().getLogicalTypeID() != common::LogicalTypeID::INT64 || i.second.val.int64Val < 0) { diff --git a/tools/python_api/src_cpp/pyarrow/pyarrow_scan.cpp b/tools/python_api/src_cpp/pyarrow/pyarrow_scan.cpp index 2e88fc20497..1d151be47d9 100644 --- a/tools/python_api/src_cpp/pyarrow/pyarrow_scan.cpp +++ b/tools/python_api/src_cpp/pyarrow/pyarrow_scan.cpp @@ -45,7 +45,7 @@ static std::unique_ptr bindFunc(ClientContext*, } auto numRows = py::len(table); auto schema = Pyarrow::bind(table, returnTypes, names); - auto config = PyScanConfig(scanInput->fileScanInfo.options); + auto config = PyScanConfig(scanInput->fileScanInfo.options, numRows); // The following python operations are zero copy as defined in pyarrow docs. if (config.skipNum != 0) { table = table.attr("slice")(config.skipNum); diff --git a/tools/python_api/test/test_scan_pandas.py b/tools/python_api/test/test_scan_pandas.py index 41e1138d73c..ba781f6bf65 100644 --- a/tools/python_api/test/test_scan_pandas.py +++ b/tools/python_api/test/test_scan_pandas.py @@ -461,6 +461,30 @@ def test_copy_from_pandas_object_skip_and_limit(tmp_path: Path) -> None: assert result.has_next() is False +def test_copy_from_pandas_object_skip_bounds_check(tmp_path: Path) -> None: + db = kuzu.Database(tmp_path) + conn = kuzu.Connection(db) + df = pd.DataFrame({"name": ["Adam", "Karissa", "Zhang", "Noura"], "age": [30, 40, 50, 25]}) + conn.execute("CREATE NODE TABLE Person(name STRING, age STRING, PRIMARY KEY (name));") + conn.execute("COPY Person FROM df(SKIP=10);") + result = conn.execute("match (p:Person) return p.*") + assert result.has_next() is False + + +def test_copy_from_pandas_object_limit_bounds_check(tmp_path: Path) -> None: + db = kuzu.Database(tmp_path) + conn = kuzu.Connection(db) + df = pd.DataFrame({"name": ["Adam", "Karissa", "Zhang", "Noura"], "age": [30, 40, 50, 25]}) + conn.execute("CREATE NODE TABLE Person(name STRING, age STRING, PRIMARY KEY (name));") + conn.execute("COPY Person FROM df(LIMIT=10);") + result = conn.execute("match (p:Person) return p.*") + assert result.get_next() == ["Adam", "30"] + assert result.get_next() == ["Karissa", "40"] + assert result.get_next() == ["Zhang", "50"] + assert result.get_next() == ["Noura", "25"] + assert result.has_next() is False + + def test_copy_from_pandas_date(tmp_path: Path) -> None: db = kuzu.Database(tmp_path) conn = kuzu.Connection(db) diff --git a/tools/python_api/test/test_scan_pandas_pyarrow.py b/tools/python_api/test/test_scan_pandas_pyarrow.py index 34892ee58ae..38a9032f197 100644 --- a/tools/python_api/test/test_scan_pandas_pyarrow.py +++ b/tools/python_api/test/test_scan_pandas_pyarrow.py @@ -677,6 +677,18 @@ def test_pyarrow_skip_limit(conn_db_readonly: ConnDB) -> None: assert result["col1"].to_pylist() == expected["col1"].to_pylist() assert result["col2"].to_pylist() == expected["col2"].to_pylist() + # skip bounds check + result = conn.execute("LOAD FROM df (SKIP=500000, LIMIT=5000) RETURN * ORDER BY index").get_as_arrow() + assert len(result) == 0 + + # limit bounds check + result = conn.execute("LOAD FROM df (SKIP=0, LIMIT=500000) RETURN * ORDER BY index").get_as_arrow() + expected = pa.Table.from_pandas(df) + assert result["index"].to_pylist() == expected["index"].to_pylist() + assert result["col0"].to_pylist() == expected["col0"].to_pylist() + assert result["col1"].to_pylist() == expected["col1"].to_pylist() + assert result["col2"].to_pylist() == expected["col2"].to_pylist() + def test_pyarrow_invalid_skip_limit(conn_db_readonly: ConnDB) -> None: conn, db = conn_db_readonly