diff --git a/3rd_party/apache-arrow-adbc/CHANGELOG.md b/3rd_party/apache-arrow-adbc/CHANGELOG.md index d0edd13..186f710 100644 --- a/3rd_party/apache-arrow-adbc/CHANGELOG.md +++ b/3rd_party/apache-arrow-adbc/CHANGELOG.md @@ -173,3 +173,89 @@ - **c**: merge CMake projects (#597) - **c/driver/postgresql**: Factor out Postgres type abstraction and test it independently of the driver (#573) - **c/driver/shared**: created shared util library for drivers (#582) + +## ADBC Libraries 0.5.0 (2023-06-15) + +### Feat + +- **c/driver/postgresql**: Support INT16 Postgres Ingest (#800) +- **python/adbc_driver_manager**: add autocommit, executescript (#778) +- **c/driver/postgresql,java**: ensure time/date type support (#774) +- **c/driver/postgresql**: Implement Foreign Key information for GetObjects (#757) +- **c/driver/postgresql**: add timestamp types support (#758) +- **c/driver/postgresql**: Implement PRIMARY KEY in GetObjects ALL depth (#725) +- **csharp**: adding C# functionality (#697) +- **go/adbc/pkg**: catch panics at interface boundary (#730) +- **java/driver/jdbc**: add hooks for JDBC type system mapping (#722) +- **c/driver/postgresql**: Implement GetObjects for columns (#723) +- **c/driver/postgresql**: Implement GetObjects for tables (#712) +- **rust**: define the rust adbc api (#478) +- **c/driver/postgresql**: handle non-SELECT statements (#707) +- **c/driver/postgresql**: Implement GetObjectsDbSchemas for Postgres (#679) +- **r**: Add `read_adbc()`, `write_adbc()`, and `execute_adbc()` convenience functions (#706) +- **r**: Improve error communication (#703) +- **r**: Add scoping + lifecycle helpers (#693) +- **r**: Add driver logging utility (#694) +- **c/driver/postgresql**: implement GetObjectsSchema (#676) +- **go/adbc/driver/snowflake**: Update gosnowflake dep (#674) +- **c/driver/postgresql**: Implement Postgres Get table types (#668) +- **dev/release**: Retry on apt failure in the RC verification script (#672) +- **c/driver/postgresql**: Implement Postgres GetInfo (#658) + +### Fix + +- **go/adbc/pkg**: allow ConnectionSetOptions before Init (#789) +- **c/driver/sqlite**: support PRIMARY KEY constraint in GetObjects (#777) +- **c/driver/common**: Prevent UB in GetObjects with NULL argument (#786) +- **c**: Fix destructor mem leaks (#785) +- **java/driver/jdbc**: return timestamps as MICROSECOND always (#771) +- **go/adbc**: don't crash on duplicate column names (#766) +- **c/driver/postgresql**: Fix ASAN detected leaks (#768) +- **c/driver/sqlite**: Fix parameter binding when inferring types and when retrieving (#742) +- **python/adbc_driver_manager**: fix fetching queries with empty results (#744) +- **go/adbc/drivermgr**: go doesn't package symbolic links (#709) +- **r**: Don't save database/connection/statement options at the R level (#708) +- **go/adbc**: Update snowflake dep (#705) +- **c/driver/snowflake**: fix validation test failures (#677) +- **dev/release**: Fix BINARY_DIR prepare condition in the verify RC script (#670) +- **c/driver/postgresql**: Prevent SQL Injection in GetTableSchema (#657) + +### Refactor + +- **c/driver/postgresql**: More postgres test simplification (#784) +- **c/driver/postgresql**: Use AdbcGetInfoData structure (#769) +- **csharp**: Cleanup C API (#749) +- **go/adbc/driver/flightsql**: factor out server-based tests (#763) +- **java/driver/jdbc**: add JdbcQuirks for backend config (#748) +- **r/adbcdrivermanager**: Early exit (#740) +- **c/driver/postgresql**: Use Prepared Statement in Result Helper (#714) +- **c/driver/postgresql**: Postgres class helper for GetObjects (#711) +- **c**: Use ArrowArrayViewListChildOffset from nanoarrow (#696) +- **c/driver/postgresql**: implement InputIterator for ResultHelper (#683) +- **c**: Simplify CI testing for cpp (#610) + +### Perf + +- **go/adbc/driver/flightsql**: filter by schema in getObjectsTables (#726) + +## ADBC Libraries 0.5.1 (2023-06-22) + +### Feat + +- **r**: Add FlightSQL driver wrapper (#835) +- **python/adbc_driver_flightsql**: add cookie middleware option to DatabaseOptions (#830) +- **go/adbc/driver/flightsql**: Add cookie middleware option (#825) +- **c/driver/postgresql**: Implement GetObjects with table_types argument (#799) +- **c/driver/postgresql**: Binary ingest (#808) +- **c/driver/postgresql**: Support float type (#807) + +### Fix + +- **go/adbc/driver/snowflake**: fix potential deadlock and error handling (#828) +- **csharp**: submodule not pulling correctly (#824) +- **go/adbc/driver/snowflake**: initialize Params, add DLL build (#820) +- **dev/release**: add missing duckdb dependency (#810) + +### Refactor + +- **csharp**: cleanup load of imported drivers (#818) diff --git a/3rd_party/apache-arrow-adbc/CONTRIBUTING.md b/3rd_party/apache-arrow-adbc/CONTRIBUTING.md index e97110a..bcd2315 100644 --- a/3rd_party/apache-arrow-adbc/CONTRIBUTING.md +++ b/3rd_party/apache-arrow-adbc/CONTRIBUTING.md @@ -190,6 +190,19 @@ $ pip install -e .[test] $ pytest -vvx ``` +Type checking is done with [pyright][pyright]. There is a script to +run the type checker: + +```shell +# Build native libraries first +$ env ADBC_USE_ASAN=0 ADBC_USE_UBSAN=0 ./ci/scripts/cpp_build.sh $(pwd) $(pwd)/build +# Install Python packages +$ ./ci/scripts/python_build.sh $(pwd) $(pwd)/build +# Run type checker +$ ./ci/scripts/python_typecheck.sh $(pwd) +``` + +[pyright]: https://microsoft.github.io/pyright/ [pytest]: https://docs.pytest.org/ [setuptools]: https://setuptools.pypa.io/en/latest/index.html @@ -197,6 +210,16 @@ $ pytest -vvx The Ruby libraries are bindings around the GLib libraries. +### Rust + +The Rust components are a standard Rust project. + +```shell +$ cd rust +# Build and run tests +$ cargo test +``` + ## Opening a Pull Request Before opening a pull request, please run the static checks, which are diff --git a/3rd_party/apache-arrow-adbc/LICENSE.txt b/3rd_party/apache-arrow-adbc/LICENSE.txt index e0fc358..316c69e 100644 --- a/3rd_party/apache-arrow-adbc/LICENSE.txt +++ b/3rd_party/apache-arrow-adbc/LICENSE.txt @@ -309,6 +309,7 @@ https://www.openssl.org/source/license.html 3rdparty dependency SQLite is statically linked in certain binary distributions, like the Python wheels. SQLite is public domain. + -------------------------------------------------------------------------------- 3rdparty dependency github.com/99designs/keyring @@ -396,28 +397,27 @@ SOFTWARE 3rdparty dependency github.com/Azure/azure-sdk-for-go/sdk/storage/azblob is statically linked in certain binary distributions, like the Python wheels. github.com/Azure/azure-sdk-for-go/sdk/storage/azblob is under the MIT license. -MIT License - -Copyright (c) Microsoft Corporation. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE - + MIT License + + Copyright (c) Microsoft Corporation. All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE -------------------------------------------------------------------------------- 3rdparty dependency github.com/JohnCGriffin/overflow @@ -445,6 +445,10 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + + + -------------------------------------------------------------------------------- 3rdparty dependency github.com/andybalholm/brotli @@ -1214,53 +1218,347 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- -3rdparty dependency google.golang.org/genproto/googleapis/rpc/status +3rdparty dependency golang.org/x/crypto/ocsp is statically linked in certain binary distributions, like the Python wheels. -google.golang.org/genproto/googleapis/rpc/status is under the Apache-2.0 license. +golang.org/x/crypto/ocsp is under the BSD-3-Clause license. +Copyright (c) 2009 The Go Authors. All rights reserved. --------------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: -3rdparty dependency google.golang.org/grpc -is statically linked in certain binary distributions, like the Python wheels. -google.golang.org/grpc is under the Apache-2.0 license. + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. --------------------------------------------------------------------------------- +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -3rdparty dependency golang.org/x/crypto/ocsp -is statically linked in certain binary distributions, like the Python wheels. -golang.org/x/crypto/ocsp is under the BSD-3-Clause license. +-------------------------------------------------------------------------------- 3rdparty dependency golang.org/x/exp is statically linked in certain binary distributions, like the Python wheels. golang.org/x/exp is under the BSD-3-Clause license. +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- 3rdparty dependency golang.org/x/mod/semver is statically linked in certain binary distributions, like the Python wheels. golang.org/x/mod/semver is under the BSD-3-Clause license. +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- 3rdparty dependency golang.org/x/net is statically linked in certain binary distributions, like the Python wheels. golang.org/x/net is under the BSD-3-Clause license. +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- 3rdparty dependency golang.org/x/sync/errgroup is statically linked in certain binary distributions, like the Python wheels. golang.org/x/sync/errgroup is under the BSD-3-Clause license. +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- 3rdparty dependency golang.org/x/sys is statically linked in certain binary distributions, like the Python wheels. golang.org/x/sys is under the BSD-3-Clause license. +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- 3rdparty dependency golang.org/x/term is statically linked in certain binary distributions, like the Python wheels. golang.org/x/term is under the BSD-3-Clause license. +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- 3rdparty dependency golang.org/x/text is statically linked in certain binary distributions, like the Python wheels. golang.org/x/text is under the BSD-3-Clause license. +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- 3rdparty dependency golang.org/x/tools is statically linked in certain binary distributions, like the Python wheels. golang.org/x/tools is under the BSD-3-Clause license. +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency golang.org/x/xerrors +is statically linked in certain binary distributions, like the Python wheels. +golang.org/x/xerrors is under the BSD-3-Clause license. +Copyright (c) 2019 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency google.golang.org/genproto/googleapis/rpc/status +is statically linked in certain binary distributions, like the Python wheels. +google.golang.org/genproto/googleapis/rpc/status is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency google.golang.org/grpc +is statically linked in certain binary distributions, like the Python wheels. +google.golang.org/grpc is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- 3rdparty dependency google.golang.org/protobuf is statically linked in certain binary distributions, like the Python wheels. diff --git a/3rd_party/apache-arrow-adbc/c/CMakeLists.txt b/3rd_party/apache-arrow-adbc/c/CMakeLists.txt index 7f417b0..4115da7 100644 --- a/3rd_party/apache-arrow-adbc/c/CMakeLists.txt +++ b/3rd_party/apache-arrow-adbc/c/CMakeLists.txt @@ -29,6 +29,14 @@ include(CTest) add_subdirectory(vendor/nanoarrow) add_subdirectory(driver/common) +if(ADBC_BUILD_TESTS) + add_subdirectory(validation) +endif() + +if(ADBC_INTEGRATION_DUCKDB) + set(ADBC_DRIVER_MANAGER ON) +endif() + if(ADBC_DRIVER_FLIGHTSQL) add_subdirectory(driver/flightsql) endif() @@ -49,5 +57,9 @@ if(ADBC_DRIVER_SNOWFLAKE) add_subdirectory(driver/snowflake) endif() +if(ADBC_INTEGRATION_DUCKDB) + add_subdirectory(integration/duckdb) +endif() + validate_config() config_summary_message() diff --git a/3rd_party/apache-arrow-adbc/c/cmake_modules/AdbcVersion.cmake b/3rd_party/apache-arrow-adbc/c/cmake_modules/AdbcVersion.cmake index 54aa4f8..7275918 100644 --- a/3rd_party/apache-arrow-adbc/c/cmake_modules/AdbcVersion.cmake +++ b/3rd_party/apache-arrow-adbc/c/cmake_modules/AdbcVersion.cmake @@ -21,7 +21,7 @@ # ------------------------------------------------------------ # Version definitions -set(ADBC_VERSION "0.4.0") +set(ADBC_VERSION "0.5.1") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ADBC_BASE_VERSION "${ADBC_VERSION}") string(REPLACE "." ";" _adbc_version_list "${ADBC_BASE_VERSION}") list(GET _adbc_version_list 0 ADBC_VERSION_MAJOR) diff --git a/3rd_party/apache-arrow-adbc/c/cmake_modules/DefineOptions.cmake b/3rd_party/apache-arrow-adbc/c/cmake_modules/DefineOptions.cmake index f015683..42b8f4f 100644 --- a/3rd_party/apache-arrow-adbc/c/cmake_modules/DefineOptions.cmake +++ b/3rd_party/apache-arrow-adbc/c/cmake_modules/DefineOptions.cmake @@ -233,6 +233,8 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") define_option(ADBC_DRIVER_POSTGRESQL "Build the PostgreSQL driver" OFF) define_option(ADBC_DRIVER_SQLITE "Build the SQLite driver" OFF) define_option(ADBC_DRIVER_SNOWFLAKE "Build the Snowflake driver" OFF) + + define_option(ADBC_INTEGRATION_DUCKDB "Build the test suite for DuckDB" OFF) endif() macro(validate_config) @@ -255,10 +257,11 @@ endmacro() macro(config_summary_message) message(STATUS "---------------------------------------------------------------------") - message(STATUS "Arrow version: ${ADBC_VERSION}") + message(STATUS "ADBC version: ${ADBC_VERSION}") message(STATUS) message(STATUS "Build configuration summary:") + message(STATUS " CMake version: ${CMAKE_VERSION}") message(STATUS " Generator: ${CMAKE_GENERATOR}") message(STATUS " Build type: ${CMAKE_BUILD_TYPE}") message(STATUS " Source directory: ${CMAKE_CURRENT_SOURCE_DIR}") diff --git a/3rd_party/apache-arrow-adbc/c/driver/common/CMakeLists.txt b/3rd_party/apache-arrow-adbc/c/driver/common/CMakeLists.txt index 60516b6..33dd1c8 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/common/CMakeLists.txt +++ b/3rd_party/apache-arrow-adbc/c/driver/common/CMakeLists.txt @@ -17,8 +17,8 @@ add_library(adbc_driver_common STATIC utils.c) set_target_properties(adbc_driver_common PROPERTIES POSITION_INDEPENDENT_CODE ON) -include_directories(SYSTEM ${REPOSITORY_ROOT}) -include_directories(SYSTEM ${REPOSITORY_ROOT}/c/vendor) +target_include_directories(adbc_driver_common PRIVATE "${REPOSITORY_ROOT}" + "${REPOSITORY_ROOT}/c/vendor") if(ADBC_BUILD_TESTS) add_test_case(driver_common_test @@ -32,5 +32,7 @@ if(ADBC_BUILD_TESTS) adbc_driver_common nanoarrow) target_compile_features(adbc-driver-common-test PRIVATE cxx_std_17) + target_include_directories(adbc-driver-common-test + PRIVATE "${REPOSITORY_ROOT}" "${REPOSITORY_ROOT}/c/vendor") adbc_configure_target(adbc-driver-common-test) endif() diff --git a/3rd_party/apache-arrow-adbc/c/driver/common/utils.c b/3rd_party/apache-arrow-adbc/c/driver/common/utils.c index 26c8b9d..dfac14f 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/common/utils.c +++ b/3rd_party/apache-arrow-adbc/c/driver/common/utils.c @@ -24,7 +24,6 @@ #include #include -#include static size_t kErrorBufferSize = 256; @@ -145,6 +144,7 @@ int StringBuilderAppend(struct StringBuilder* builder, const char* fmt, ...) { va_start(argptr, fmt); int ret = vsnprintf(builder->buffer + builder->size, n + 1, fmt, argptr); if (ret < 0) { + va_end(argptr); return errno; } @@ -160,3 +160,750 @@ void StringBuilderReset(struct StringBuilder* builder) { } memset(builder, 0, sizeof(*builder)); } + +AdbcStatusCode AdbcInitConnectionGetInfoSchema(const uint32_t* info_codes, + size_t info_codes_length, + struct ArrowSchema* schema, + struct ArrowArray* array, + struct AdbcError* error) { + // TODO: use C equivalent of UniqueSchema to avoid incomplete schema + // on error + ArrowSchemaInit(schema); + CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(schema, /*num_columns=*/2), error); + + CHECK_NA(INTERNAL, ArrowSchemaSetType(schema->children[0], NANOARROW_TYPE_UINT32), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(schema->children[0], "info_name"), error); + schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE; + + struct ArrowSchema* info_value = schema->children[1]; + CHECK_NA(INTERNAL, ArrowSchemaSetTypeUnion(info_value, NANOARROW_TYPE_DENSE_UNION, 6), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(info_value, "info_value"), error); + + CHECK_NA(INTERNAL, ArrowSchemaSetType(info_value->children[0], NANOARROW_TYPE_STRING), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(info_value->children[0], "string_value"), error); + CHECK_NA(INTERNAL, ArrowSchemaSetType(info_value->children[1], NANOARROW_TYPE_BOOL), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(info_value->children[1], "bool_value"), error); + CHECK_NA(INTERNAL, ArrowSchemaSetType(info_value->children[2], NANOARROW_TYPE_INT64), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(info_value->children[2], "int64_value"), error); + CHECK_NA(INTERNAL, ArrowSchemaSetType(info_value->children[3], NANOARROW_TYPE_INT32), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(info_value->children[3], "int32_bitmask"), error); + CHECK_NA(INTERNAL, ArrowSchemaSetType(info_value->children[4], NANOARROW_TYPE_LIST), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(info_value->children[4], "string_list"), error); + CHECK_NA(INTERNAL, ArrowSchemaSetType(info_value->children[5], NANOARROW_TYPE_MAP), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetName(info_value->children[5], "int32_to_int32_list_map"), error); + + CHECK_NA( + INTERNAL, + ArrowSchemaSetType(info_value->children[4]->children[0], NANOARROW_TYPE_STRING), + error); + + CHECK_NA(INTERNAL, + ArrowSchemaSetType(info_value->children[5]->children[0]->children[0], + NANOARROW_TYPE_INT32), + error); + info_value->children[5]->children[0]->children[0]->flags &= ~ARROW_FLAG_NULLABLE; + CHECK_NA(INTERNAL, + ArrowSchemaSetType(info_value->children[5]->children[0]->children[1], + NANOARROW_TYPE_LIST), + error); + CHECK_NA( + INTERNAL, + ArrowSchemaSetType(info_value->children[5]->children[0]->children[1]->children[0], + NANOARROW_TYPE_INT32), + error); + + struct ArrowError na_error = {0}; + CHECK_NA_DETAIL(INTERNAL, ArrowArrayInitFromSchema(array, schema, &na_error), &na_error, + error); + CHECK_NA(INTERNAL, ArrowArrayStartAppending(array), error); + + return ADBC_STATUS_OK; +} // NOLINT(whitespace/indent) + +AdbcStatusCode AdbcConnectionGetInfoAppendString(struct ArrowArray* array, + uint32_t info_code, + const char* info_value, + struct AdbcError* error) { + CHECK_NA(INTERNAL, ArrowArrayAppendUInt(array->children[0], info_code), error); + // Append to type variant + struct ArrowStringView value = ArrowCharView(info_value); + CHECK_NA(INTERNAL, ArrowArrayAppendString(array->children[1]->children[0], value), + error); + // Append type code/offset + CHECK_NA(INTERNAL, ArrowArrayFinishUnionElement(array->children[1], /*type_id=*/0), + error); + return ADBC_STATUS_OK; +} + +AdbcStatusCode AdbcInitConnectionObjectsSchema(struct ArrowSchema* schema, + struct AdbcError* error) { + ArrowSchemaInit(schema); + CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(schema, /*num_columns=*/2), error); + CHECK_NA(INTERNAL, ArrowSchemaSetType(schema->children[0], NANOARROW_TYPE_STRING), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(schema->children[0], "catalog_name"), error); + CHECK_NA(INTERNAL, ArrowSchemaSetType(schema->children[1], NANOARROW_TYPE_LIST), error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(schema->children[1], "catalog_db_schemas"), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(schema->children[1]->children[0], 2), + error); + + struct ArrowSchema* db_schema_schema = schema->children[1]->children[0]; + CHECK_NA(INTERNAL, + ArrowSchemaSetType(db_schema_schema->children[0], NANOARROW_TYPE_STRING), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(db_schema_schema->children[0], "db_schema_name"), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetType(db_schema_schema->children[1], NANOARROW_TYPE_LIST), error); + CHECK_NA(INTERNAL, + ArrowSchemaSetName(db_schema_schema->children[1], "db_schema_tables"), error); + CHECK_NA(INTERNAL, + ArrowSchemaSetTypeStruct(db_schema_schema->children[1]->children[0], 4), + error); + + struct ArrowSchema* table_schema = db_schema_schema->children[1]->children[0]; + CHECK_NA(INTERNAL, ArrowSchemaSetType(table_schema->children[0], NANOARROW_TYPE_STRING), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(table_schema->children[0], "table_name"), error); + table_schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE; + CHECK_NA(INTERNAL, ArrowSchemaSetType(table_schema->children[1], NANOARROW_TYPE_STRING), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(table_schema->children[1], "table_type"), error); + table_schema->children[1]->flags &= ~ARROW_FLAG_NULLABLE; + CHECK_NA(INTERNAL, ArrowSchemaSetType(table_schema->children[2], NANOARROW_TYPE_LIST), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(table_schema->children[2], "table_columns"), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(table_schema->children[2]->children[0], 19), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetType(table_schema->children[3], NANOARROW_TYPE_LIST), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(table_schema->children[3], "table_constraints"), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(table_schema->children[3]->children[0], 4), + error); + + struct ArrowSchema* column_schema = table_schema->children[2]->children[0]; + CHECK_NA(INTERNAL, + ArrowSchemaSetType(column_schema->children[0], NANOARROW_TYPE_STRING), error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[0], "column_name"), + error); + column_schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE; + CHECK_NA(INTERNAL, ArrowSchemaSetType(column_schema->children[1], NANOARROW_TYPE_INT32), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[1], "ordinal_position"), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetType(column_schema->children[2], NANOARROW_TYPE_STRING), error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[2], "remarks"), error); + CHECK_NA(INTERNAL, ArrowSchemaSetType(column_schema->children[3], NANOARROW_TYPE_INT16), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[3], "xdbc_data_type"), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetType(column_schema->children[4], NANOARROW_TYPE_STRING), error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[4], "xdbc_type_name"), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetType(column_schema->children[5], NANOARROW_TYPE_INT32), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[5], "xdbc_column_size"), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetType(column_schema->children[6], NANOARROW_TYPE_INT16), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetName(column_schema->children[6], "xdbc_decimal_digits"), error); + CHECK_NA(INTERNAL, ArrowSchemaSetType(column_schema->children[7], NANOARROW_TYPE_INT16), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetName(column_schema->children[7], "xdbc_num_prec_radix"), error); + CHECK_NA(INTERNAL, ArrowSchemaSetType(column_schema->children[8], NANOARROW_TYPE_INT16), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[8], "xdbc_nullable"), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetType(column_schema->children[9], NANOARROW_TYPE_STRING), error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[9], "xdbc_column_def"), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetType(column_schema->children[10], NANOARROW_TYPE_INT16), error); + CHECK_NA(INTERNAL, + ArrowSchemaSetName(column_schema->children[10], "xdbc_sql_data_type"), error); + CHECK_NA(INTERNAL, + ArrowSchemaSetType(column_schema->children[11], NANOARROW_TYPE_INT16), error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[11], "xdbc_datetime_sub"), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetType(column_schema->children[12], NANOARROW_TYPE_INT32), error); + CHECK_NA(INTERNAL, + ArrowSchemaSetName(column_schema->children[12], "xdbc_char_octet_length"), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetType(column_schema->children[13], NANOARROW_TYPE_STRING), error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[13], "xdbc_is_nullable"), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetType(column_schema->children[14], NANOARROW_TYPE_STRING), error); + CHECK_NA(INTERNAL, + ArrowSchemaSetName(column_schema->children[14], "xdbc_scope_catalog"), error); + CHECK_NA(INTERNAL, + ArrowSchemaSetType(column_schema->children[15], NANOARROW_TYPE_STRING), error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[15], "xdbc_scope_schema"), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetType(column_schema->children[16], NANOARROW_TYPE_STRING), error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[16], "xdbc_scope_table"), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetType(column_schema->children[17], NANOARROW_TYPE_BOOL), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetName(column_schema->children[17], "xdbc_is_autoincrement"), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetType(column_schema->children[18], NANOARROW_TYPE_BOOL), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetName(column_schema->children[18], "xdbc_is_generatedcolumn"), + error); + + struct ArrowSchema* constraint_schema = table_schema->children[3]->children[0]; + CHECK_NA(INTERNAL, + ArrowSchemaSetType(constraint_schema->children[0], NANOARROW_TYPE_STRING), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetName(constraint_schema->children[0], "constraint_name"), error); + CHECK_NA(INTERNAL, + ArrowSchemaSetType(constraint_schema->children[1], NANOARROW_TYPE_STRING), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetName(constraint_schema->children[1], "constraint_type"), error); + constraint_schema->children[1]->flags &= ~ARROW_FLAG_NULLABLE; + CHECK_NA(INTERNAL, + ArrowSchemaSetType(constraint_schema->children[2], NANOARROW_TYPE_LIST), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetName(constraint_schema->children[2], "constraint_column_names"), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetType(constraint_schema->children[2]->children[0], + NANOARROW_TYPE_STRING), + error); + constraint_schema->children[2]->flags &= ~ARROW_FLAG_NULLABLE; + CHECK_NA(INTERNAL, + ArrowSchemaSetType(constraint_schema->children[3], NANOARROW_TYPE_LIST), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetName(constraint_schema->children[3], "constraint_column_usage"), + error); + CHECK_NA(INTERNAL, + ArrowSchemaSetTypeStruct(constraint_schema->children[3]->children[0], 4), + error); + + struct ArrowSchema* usage_schema = constraint_schema->children[3]->children[0]; + CHECK_NA(INTERNAL, ArrowSchemaSetType(usage_schema->children[0], NANOARROW_TYPE_STRING), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(usage_schema->children[0], "fk_catalog"), error); + CHECK_NA(INTERNAL, ArrowSchemaSetType(usage_schema->children[1], NANOARROW_TYPE_STRING), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(usage_schema->children[1], "fk_db_schema"), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetType(usage_schema->children[2], NANOARROW_TYPE_STRING), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(usage_schema->children[2], "fk_table"), error); + usage_schema->children[2]->flags &= ~ARROW_FLAG_NULLABLE; + CHECK_NA(INTERNAL, ArrowSchemaSetType(usage_schema->children[3], NANOARROW_TYPE_STRING), + error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(usage_schema->children[3], "fk_column_name"), + error); + usage_schema->children[3]->flags &= ~ARROW_FLAG_NULLABLE; + + return ADBC_STATUS_OK; +} + +struct AdbcGetObjectsData* AdbcGetObjectsDataInit(struct ArrowArrayView* array_view) { + struct AdbcGetObjectsData* get_objects_data = + (struct AdbcGetObjectsData*)calloc(1, sizeof(struct AdbcGetObjectsData)); + if (get_objects_data == NULL) { + return NULL; + } + + get_objects_data->catalog_name_array = array_view->children[0]; + get_objects_data->catalog_schemas_array = array_view->children[1]; + get_objects_data->n_catalogs = 0; + + struct ArrowArrayView* catalog_db_schemas_items = + get_objects_data->catalog_schemas_array->children[0]; + get_objects_data->db_schema_name_array = catalog_db_schemas_items->children[0]; + get_objects_data->db_schema_tables_array = catalog_db_schemas_items->children[1]; + + struct ArrowArrayView* schema_table_items = + get_objects_data->db_schema_tables_array->children[0]; + get_objects_data->table_name_array = schema_table_items->children[0]; + get_objects_data->table_type_array = schema_table_items->children[1]; + get_objects_data->table_columns_array = schema_table_items->children[2]; + get_objects_data->table_constraints_array = schema_table_items->children[3]; + + struct ArrowArrayView* table_columns_items = + get_objects_data->table_columns_array->children[0]; + get_objects_data->column_name_array = table_columns_items->children[0]; + get_objects_data->column_position_array = table_columns_items->children[1]; + get_objects_data->column_remarks_array = table_columns_items->children[2]; + get_objects_data->xdbc_data_type_array = table_columns_items->children[3]; + get_objects_data->xdbc_type_name_array = table_columns_items->children[4]; + get_objects_data->xdbc_column_size_array = table_columns_items->children[5]; + get_objects_data->xdbc_decimal_digits_array = table_columns_items->children[6]; + get_objects_data->xdbc_num_prec_radix_array = table_columns_items->children[7]; + get_objects_data->xdbc_nullable_array = table_columns_items->children[8]; + get_objects_data->xdbc_column_def_array = table_columns_items->children[9]; + get_objects_data->xdbc_sql_data_type_array = table_columns_items->children[10]; + get_objects_data->xdbc_datetime_sub_array = table_columns_items->children[11]; + get_objects_data->xdbc_char_octet_length_array = table_columns_items->children[12]; + get_objects_data->xdbc_is_nullable_array = table_columns_items->children[13]; + get_objects_data->xdbc_scope_catalog_array = table_columns_items->children[14]; + get_objects_data->xdbc_scope_schema_array = table_columns_items->children[15]; + get_objects_data->xdbc_scope_table_array = table_columns_items->children[16]; + get_objects_data->xdbc_is_autoincrement_array = table_columns_items->children[17]; + get_objects_data->xdbc_is_generatedcolumn_array = table_columns_items->children[18]; + + struct ArrowArrayView* table_constraints_items = + get_objects_data->table_constraints_array->children[0]; + get_objects_data->constraint_name_array = table_constraints_items->children[0]; + get_objects_data->constraint_type_array = table_constraints_items->children[1]; + get_objects_data->constraint_column_names_array = table_constraints_items->children[2]; + get_objects_data->constraint_column_name_array = + get_objects_data->constraint_column_names_array->children[0]; + get_objects_data->constraint_column_usages_array = table_constraints_items->children[3]; + + struct ArrowArrayView* constraint_column_usage_items = + get_objects_data->constraint_column_usages_array->children[0]; + get_objects_data->fk_catalog_array = constraint_column_usage_items->children[0]; + get_objects_data->fk_db_schema_array = constraint_column_usage_items->children[1]; + get_objects_data->fk_table_array = constraint_column_usage_items->children[2]; + get_objects_data->fk_column_name_array = constraint_column_usage_items->children[3]; + + get_objects_data->catalogs = (struct AdbcGetObjectsCatalog**)calloc( + array_view->array->length, sizeof(struct AdbcGetObjectsCatalog*)); + + if (get_objects_data->catalogs == NULL) { + goto error_handler; + } + + for (int64_t catalog_idx = 0; catalog_idx < array_view->array->length; catalog_idx++) { + struct AdbcGetObjectsCatalog* catalog = + (struct AdbcGetObjectsCatalog*)calloc(1, sizeof(struct AdbcGetObjectsCatalog)); + if (catalog == NULL) { + goto error_handler; + } + get_objects_data->catalogs[catalog_idx] = catalog; + get_objects_data->n_catalogs++; + + catalog->n_db_schemas = 0; + + catalog->catalog_name = + ArrowArrayViewGetStringUnsafe(get_objects_data->catalog_name_array, catalog_idx); + + int64_t db_schema_list_start = ArrowArrayViewListChildOffset( + get_objects_data->catalog_schemas_array, catalog_idx); + int64_t db_schema_list_end = ArrowArrayViewListChildOffset( + get_objects_data->catalog_schemas_array, catalog_idx + 1); + + int64_t db_schema_len = db_schema_list_end - db_schema_list_start; + + if (db_schema_len == 0) { + catalog->catalog_db_schemas = NULL; + } else { + catalog->catalog_db_schemas = (struct AdbcGetObjectsSchema**)calloc( + db_schema_len, sizeof(struct AdbcGetObjectsSchema*)); + if (catalog->catalog_db_schemas == NULL) { + goto error_handler; + } + + for (int64_t db_schema_index = db_schema_list_start; + db_schema_index < db_schema_list_end; db_schema_index++) { + struct AdbcGetObjectsSchema* schema = + (struct AdbcGetObjectsSchema*)calloc(1, sizeof(struct AdbcGetObjectsSchema)); + if (schema == NULL) { + goto error_handler; + } + catalog->catalog_db_schemas[db_schema_index - db_schema_list_start] = schema; + catalog->n_db_schemas++; + schema->n_db_schema_tables = 0; + + schema->db_schema_name = ArrowArrayViewGetStringUnsafe( + get_objects_data->db_schema_name_array, db_schema_index); + int64_t table_list_start = ArrowArrayViewListChildOffset( + get_objects_data->db_schema_tables_array, db_schema_index); + int64_t table_list_end = ArrowArrayViewListChildOffset( + get_objects_data->db_schema_tables_array, db_schema_index + 1); + int64_t table_len = table_list_end - table_list_start; + + if (table_len == 0) { + schema->db_schema_tables = NULL; + } else { + schema->db_schema_tables = (struct AdbcGetObjectsTable**)calloc( + table_len, sizeof(struct AdbcGetObjectsTable*)); + if (schema->db_schema_tables == NULL) { + goto error_handler; + } + + for (int64_t table_index = table_list_start; table_index < table_list_end; + table_index++) { + struct AdbcGetObjectsTable* table = (struct AdbcGetObjectsTable*)calloc( + 1, sizeof(struct AdbcGetObjectsTable)); + if (table == NULL) { + goto error_handler; + } + schema->db_schema_tables[table_index - table_list_start] = table; + schema->n_db_schema_tables++; + table->n_table_columns = 0; + table->n_table_constraints = 0; + + table->table_name = ArrowArrayViewGetStringUnsafe( + get_objects_data->table_name_array, table_index); + table->table_type = ArrowArrayViewGetStringUnsafe( + get_objects_data->table_type_array, table_index); + + int64_t columns_list_start = ArrowArrayViewListChildOffset( + get_objects_data->table_columns_array, table_index); + int64_t columns_list_end = ArrowArrayViewListChildOffset( + get_objects_data->table_columns_array, table_index + 1); + + int64_t columns_len = columns_list_end - columns_list_start; + + if (columns_len == 0) { + table->table_columns = NULL; + } else { + table->table_columns = (struct AdbcGetObjectsColumn**)calloc( + columns_len, sizeof(struct AdbcGetObjectsColumn*)); + if (table->table_columns == NULL) { + goto error_handler; + } + + for (int64_t column_index = columns_list_start; + column_index < columns_list_end; column_index++) { + struct AdbcGetObjectsColumn* column = + (struct AdbcGetObjectsColumn*)calloc( + 1, sizeof(struct AdbcGetObjectsColumn)); + if (column == NULL) { + goto error_handler; + } + table->table_columns[column_index - columns_list_start] = column; + table->n_table_columns++; + + column->column_name = ArrowArrayViewGetStringUnsafe( + get_objects_data->column_name_array, column_index); + column->ordinal_position = ArrowArrayViewGetIntUnsafe( + get_objects_data->column_position_array, column_index); + column->remarks = ArrowArrayViewGetStringUnsafe( + get_objects_data->column_remarks_array, column_index); + column->xdbc_data_type = ArrowArrayViewGetIntUnsafe( + get_objects_data->xdbc_data_type_array, column_index); + column->xdbc_type_name = ArrowArrayViewGetStringUnsafe( + get_objects_data->xdbc_type_name_array, column_index); + column->xdbc_column_size = ArrowArrayViewGetIntUnsafe( + get_objects_data->xdbc_column_size_array, column_index); + column->xdbc_decimal_digits = ArrowArrayViewGetIntUnsafe( + get_objects_data->xdbc_decimal_digits_array, column_index); + column->xdbc_num_prec_radix = ArrowArrayViewGetIntUnsafe( + get_objects_data->xdbc_num_prec_radix_array, column_index); + column->xdbc_nullable = ArrowArrayViewGetIntUnsafe( + get_objects_data->xdbc_nullable_array, column_index); + column->xdbc_column_def = ArrowArrayViewGetStringUnsafe( + get_objects_data->xdbc_column_def_array, column_index); + column->xdbc_sql_data_type = ArrowArrayViewGetIntUnsafe( + get_objects_data->xdbc_sql_data_type_array, column_index); + column->xdbc_datetime_sub = ArrowArrayViewGetIntUnsafe( + get_objects_data->xdbc_datetime_sub_array, column_index); + column->xdbc_char_octet_length = ArrowArrayViewGetIntUnsafe( + get_objects_data->xdbc_char_octet_length_array, column_index); + column->xdbc_scope_catalog = ArrowArrayViewGetStringUnsafe( + get_objects_data->xdbc_scope_catalog_array, column_index); + column->xdbc_scope_schema = ArrowArrayViewGetStringUnsafe( + get_objects_data->xdbc_scope_schema_array, column_index); + column->xdbc_scope_table = ArrowArrayViewGetStringUnsafe( + get_objects_data->xdbc_scope_table_array, column_index); + // TODO: implement a nanoarrow GetBool view here? + column->xdbc_is_autoincrement = ArrowArrayViewGetIntUnsafe( + get_objects_data->xdbc_is_autoincrement_array, column_index); + column->xdbc_is_generatedcolumn = ArrowArrayViewGetIntUnsafe( + get_objects_data->xdbc_is_generatedcolumn_array, column_index); + } + } + + int64_t constraints_list_start = ArrowArrayViewListChildOffset( + get_objects_data->table_constraints_array, table_index); + int64_t constraints_list_end = ArrowArrayViewListChildOffset( + get_objects_data->table_constraints_array, table_index + 1); + int64_t constraints_len = constraints_list_end - constraints_list_start; + + if (constraints_len == 0) { + table->table_constraints = NULL; + } else { + table->table_constraints = (struct AdbcGetObjectsConstraint**)calloc( + constraints_len, sizeof(struct AdbcGetObjectsConstraint*)); + if (table->table_constraints == NULL) { + goto error_handler; + } + + for (int64_t constraint_index = constraints_list_start; + constraint_index < constraints_list_end; constraint_index++) { + struct AdbcGetObjectsConstraint* constraint = + (struct AdbcGetObjectsConstraint*)calloc( + 1, sizeof(struct AdbcGetObjectsConstraint)); + if (constraint == NULL) { + goto error_handler; + } + table->table_constraints[constraint_index - constraints_list_start] = + constraint; + table->n_table_constraints++; + constraint->n_column_names = 0; + constraint->n_column_usages = 0; + + constraint->constraint_name = ArrowArrayViewGetStringUnsafe( + get_objects_data->constraint_name_array, constraint_index); + constraint->constraint_type = ArrowArrayViewGetStringUnsafe( + get_objects_data->constraint_type_array, constraint_index); + int64_t constraint_column_names_start = ArrowArrayViewListChildOffset( + get_objects_data->constraint_column_names_array, constraint_index); + int64_t constraint_column_names_end = ArrowArrayViewListChildOffset( + get_objects_data->constraint_column_names_array, + constraint_index + 1); + int64_t constraint_column_names_len = + constraint_column_names_end - constraint_column_names_start; + + if (constraint_column_names_len == 0) { + constraint->constraint_column_names = NULL; + } else { + constraint->constraint_column_names = (struct ArrowStringView*)calloc( + constraint_column_names_len, sizeof(struct ArrowStringView)); + if (constraint->constraint_column_names == NULL) { + goto error_handler; + } + + for (int64_t constraint_column_name_index = + constraint_column_names_start; + constraint_column_name_index < constraint_column_names_end; + constraint_column_name_index++) { + constraint->constraint_column_names[constraint_column_name_index - + constraint_column_names_start] = + ArrowArrayViewGetStringUnsafe( + get_objects_data->constraint_column_name_array, + constraint_column_name_index); + constraint->n_column_names++; + } + } + + int64_t constraint_column_usages_start = ArrowArrayViewListChildOffset( + get_objects_data->constraint_column_usages_array, constraint_index); + int64_t constraint_column_usages_end = ArrowArrayViewListChildOffset( + get_objects_data->constraint_column_usages_array, + constraint_index + 1); + int64_t constraint_column_usages_len = + constraint_column_usages_end - constraint_column_usages_start; + + if (constraint_column_usages_len == 0) { + constraint->constraint_column_usages = NULL; + } else { + constraint->constraint_column_usages = + (struct AdbcGetObjectsUsage**)calloc( + constraint_column_usages_len, + sizeof(struct AdbcGetObjectsUsage*)); + if (constraint->constraint_column_usages == NULL) { + goto error_handler; + } + + for (int64_t constraint_column_usage_index = + constraint_column_usages_start; + constraint_column_usage_index < constraint_column_usages_end; + constraint_column_usage_index++) { + struct AdbcGetObjectsUsage* usage = + (struct AdbcGetObjectsUsage*)calloc( + 1, sizeof(struct AdbcGetObjectsUsage)); + if (usage == NULL) { + goto error_handler; + } + + usage->fk_catalog = + ArrowArrayViewGetStringUnsafe(get_objects_data->fk_catalog_array, + constraint_column_usage_index); + usage->fk_db_schema = ArrowArrayViewGetStringUnsafe( + get_objects_data->fk_db_schema_array, + constraint_column_usage_index); + usage->fk_table = ArrowArrayViewGetStringUnsafe( + get_objects_data->fk_table_array, constraint_column_usage_index); + usage->fk_column_name = ArrowArrayViewGetStringUnsafe( + get_objects_data->fk_column_name_array, + constraint_column_usage_index); + + constraint->constraint_column_usages[constraint_column_usage_index - + constraint_column_usages_start] = + usage; + constraint->n_column_usages++; + } + } + } + } + } + } + } + } + } + + return get_objects_data; + +error_handler: + AdbcGetObjectsDataDelete(get_objects_data); + return NULL; +} + +void AdbcGetObjectsDataDelete(struct AdbcGetObjectsData* get_objects_data) { + for (int64_t catalog_index = 0; catalog_index < get_objects_data->n_catalogs; + catalog_index++) { + struct AdbcGetObjectsCatalog* catalog = get_objects_data->catalogs[catalog_index]; + for (int64_t db_schema_index = 0; db_schema_index < catalog->n_db_schemas; + db_schema_index++) { + struct AdbcGetObjectsSchema* schema = catalog->catalog_db_schemas[db_schema_index]; + for (int64_t table_index = 0; table_index < schema->n_db_schema_tables; + table_index++) { + struct AdbcGetObjectsTable* table = schema->db_schema_tables[table_index]; + for (int64_t column_index = 0; column_index < table->n_table_columns; + column_index++) { + free(table->table_columns[column_index]); + } + + free(table->table_columns); + + for (int64_t constraint_index = 0; constraint_index < table->n_table_constraints; + constraint_index++) { + struct AdbcGetObjectsConstraint* constraint = + table->table_constraints[constraint_index]; + free(constraint->constraint_column_names); + for (int64_t usage_index = 0; usage_index < constraint->n_column_usages; + usage_index++) { + free(constraint->constraint_column_usages[usage_index]); + } + + free(constraint->constraint_column_usages); + free(table->table_constraints[constraint_index]); + } + + free(table->table_constraints); + free(table); + } + + free(schema->db_schema_tables); + free(schema); + } + + free(catalog->catalog_db_schemas); + free(catalog); + } + + free(get_objects_data->catalogs); + free(get_objects_data); +} + +struct AdbcGetObjectsCatalog* AdbcGetObjectsDataGetCatalogByName( + struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name) { + if (catalog_name != NULL) { + for (int64_t i = 0; i < get_objects_data->n_catalogs; i++) { + struct AdbcGetObjectsCatalog* catalog = get_objects_data->catalogs[i]; + struct ArrowStringView name = catalog->catalog_name; + if (!strncmp(name.data, catalog_name, name.size_bytes)) { + return catalog; + } + } + } + + return NULL; +} + +struct AdbcGetObjectsSchema* AdbcGetObjectsDataGetSchemaByName( + struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name, + const char* const schema_name) { + if (schema_name != NULL) { + struct AdbcGetObjectsCatalog* catalog = + AdbcGetObjectsDataGetCatalogByName(get_objects_data, catalog_name); + if (catalog != NULL) { + for (int64_t i = 0; i < catalog->n_db_schemas; i++) { + struct AdbcGetObjectsSchema* schema = catalog->catalog_db_schemas[i]; + struct ArrowStringView name = schema->db_schema_name; + if (!strncmp(name.data, schema_name, name.size_bytes)) { + return schema; + } + } + } + } + + return NULL; +} + +struct AdbcGetObjectsTable* AdbcGetObjectsDataGetTableByName( + struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name, + const char* const schema_name, const char* const table_name) { + if (table_name != NULL) { + struct AdbcGetObjectsSchema* schema = + AdbcGetObjectsDataGetSchemaByName(get_objects_data, catalog_name, schema_name); + if (schema != NULL) { + for (int64_t i = 0; i < schema->n_db_schema_tables; i++) { + struct AdbcGetObjectsTable* table = schema->db_schema_tables[i]; + struct ArrowStringView name = table->table_name; + if (!strncmp(name.data, table_name, name.size_bytes)) { + return table; + } + } + } + } + + return NULL; +} + +struct AdbcGetObjectsColumn* AdbcGetObjectsDataGetColumnByName( + struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name, + const char* const schema_name, const char* const table_name, + const char* const column_name) { + if (column_name != NULL) { + struct AdbcGetObjectsTable* table = AdbcGetObjectsDataGetTableByName( + get_objects_data, catalog_name, schema_name, table_name); + if (table != NULL) { + for (int64_t i = 0; i < table->n_table_columns; i++) { + struct AdbcGetObjectsColumn* column = table->table_columns[i]; + struct ArrowStringView name = column->column_name; + if (!strncmp(name.data, column_name, name.size_bytes)) { + return column; + } + } + } + } + + return NULL; +} + +struct AdbcGetObjectsConstraint* AdbcGetObjectsDataGetConstraintByName( + struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name, + const char* const schema_name, const char* const table_name, + const char* const constraint_name) { + if (constraint_name != NULL) { + struct AdbcGetObjectsTable* table = AdbcGetObjectsDataGetTableByName( + get_objects_data, catalog_name, schema_name, table_name); + if (table != NULL) { + for (int64_t i = 0; i < table->n_table_constraints; i++) { + struct AdbcGetObjectsConstraint* constraint = table->table_constraints[i]; + struct ArrowStringView name = constraint->constraint_name; + if (!strncmp(name.data, constraint_name, name.size_bytes)) { + return constraint; + } + } + } + } + + return NULL; +} diff --git a/3rd_party/apache-arrow-adbc/c/driver/common/utils.h b/3rd_party/apache-arrow-adbc/c/driver/common/utils.h index 0223dca..f0b5fa3 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/common/utils.h +++ b/3rd_party/apache-arrow-adbc/c/driver/common/utils.h @@ -17,9 +17,11 @@ #pragma once +#include #include #include +#include "nanoarrow/nanoarrow.h" #if defined(__GNUC__) #define SET_ERROR_ATTRIBUTE __attribute__((format(printf, 2, 3))) @@ -104,6 +106,148 @@ void StringBuilderReset(struct StringBuilder* builder); if (adbc_status_code != ADBC_STATUS_OK) return adbc_status_code; \ } while (0) +/// \defgroup adbc-connection-utils Connection Utilities +/// Utilities for implementing connection-related functions for drivers +/// +/// @{ +AdbcStatusCode AdbcInitConnectionGetInfoSchema(const uint32_t* info_codes, + size_t info_codes_length, + struct ArrowSchema* schema, + struct ArrowArray* array, + struct AdbcError* error); +AdbcStatusCode AdbcConnectionGetInfoAppendString(struct ArrowArray* array, + uint32_t info_code, + const char* info_value, + struct AdbcError* error); + +AdbcStatusCode AdbcInitConnectionObjectsSchema(struct ArrowSchema* schema, + struct AdbcError* error); +/// @} + +struct AdbcGetObjectsUsage { + struct ArrowStringView fk_catalog; + struct ArrowStringView fk_db_schema; + struct ArrowStringView fk_table; + struct ArrowStringView fk_column_name; +}; + +struct AdbcGetObjectsConstraint { + struct ArrowStringView constraint_name; + struct ArrowStringView constraint_type; + struct ArrowStringView* constraint_column_names; + int n_column_names; + struct AdbcGetObjectsUsage** constraint_column_usages; + int n_column_usages; +}; + +struct AdbcGetObjectsColumn { + struct ArrowStringView column_name; + int32_t ordinal_position; + struct ArrowStringView remarks; + int16_t xdbc_data_type; + struct ArrowStringView xdbc_type_name; + int32_t xdbc_column_size; + int16_t xdbc_decimal_digits; + int16_t xdbc_num_prec_radix; + int16_t xdbc_nullable; + struct ArrowStringView xdbc_column_def; + int16_t xdbc_sql_data_type; + int16_t xdbc_datetime_sub; + int32_t xdbc_char_octet_length; + struct ArrowStringView xdbc_is_nullable; + struct ArrowStringView xdbc_scope_catalog; + struct ArrowStringView xdbc_scope_schema; + struct ArrowStringView xdbc_scope_table; + bool xdbc_is_autoincrement; + bool xdbc_is_generatedcolumn; +}; + +struct AdbcGetObjectsTable { + struct ArrowStringView table_name; + struct ArrowStringView table_type; + struct AdbcGetObjectsColumn** table_columns; + int n_table_columns; + struct AdbcGetObjectsConstraint** table_constraints; + int n_table_constraints; +}; + +struct AdbcGetObjectsSchema { + struct ArrowStringView db_schema_name; + struct AdbcGetObjectsTable** db_schema_tables; + int n_db_schema_tables; +}; + +struct AdbcGetObjectsCatalog { + struct ArrowStringView catalog_name; + struct AdbcGetObjectsSchema** catalog_db_schemas; + int n_db_schemas; +}; + +struct AdbcGetObjectsData { + struct AdbcGetObjectsCatalog** catalogs; + int n_catalogs; + struct ArrowArrayView* catalog_name_array; + struct ArrowArrayView* catalog_schemas_array; + struct ArrowArrayView* db_schema_name_array; + struct ArrowArrayView* db_schema_tables_array; + struct ArrowArrayView* table_name_array; + struct ArrowArrayView* table_type_array; + struct ArrowArrayView* table_columns_array; + struct ArrowArrayView* table_constraints_array; + struct ArrowArrayView* column_name_array; + struct ArrowArrayView* column_position_array; + struct ArrowArrayView* column_remarks_array; + struct ArrowArrayView* xdbc_data_type_array; + struct ArrowArrayView* xdbc_type_name_array; + struct ArrowArrayView* xdbc_column_size_array; + struct ArrowArrayView* xdbc_decimal_digits_array; + struct ArrowArrayView* xdbc_num_prec_radix_array; + struct ArrowArrayView* xdbc_nullable_array; + struct ArrowArrayView* xdbc_column_def_array; + struct ArrowArrayView* xdbc_sql_data_type_array; + struct ArrowArrayView* xdbc_datetime_sub_array; + struct ArrowArrayView* xdbc_char_octet_length_array; + struct ArrowArrayView* xdbc_is_nullable_array; + struct ArrowArrayView* xdbc_scope_catalog_array; + struct ArrowArrayView* xdbc_scope_schema_array; + struct ArrowArrayView* xdbc_scope_table_array; + struct ArrowArrayView* xdbc_is_autoincrement_array; + struct ArrowArrayView* xdbc_is_generatedcolumn_array; + struct ArrowArrayView* constraint_name_array; + struct ArrowArrayView* constraint_type_array; + struct ArrowArrayView* constraint_column_names_array; + struct ArrowArrayView* constraint_column_name_array; + struct ArrowArrayView* constraint_column_usages_array; + struct ArrowArrayView* fk_catalog_array; + struct ArrowArrayView* fk_db_schema_array; + struct ArrowArrayView* fk_table_array; + struct ArrowArrayView* fk_column_name_array; +}; + +// does not copy any data from array +// returns NULL on error +struct AdbcGetObjectsData* AdbcGetObjectsDataInit(struct ArrowArrayView* array_view); +void AdbcGetObjectsDataDelete(struct AdbcGetObjectsData* get_objects_data); + +// returns NULL on error +// for now all arguments are required +struct AdbcGetObjectsCatalog* AdbcGetObjectsDataGetCatalogByName( + struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name); +struct AdbcGetObjectsSchema* AdbcGetObjectsDataGetSchemaByName( + struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name, + const char* const schema_name); +struct AdbcGetObjectsTable* AdbcGetObjectsDataGetTableByName( + struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name, + const char* const schema_name, const char* const table_name); +struct AdbcGetObjectsColumn* AdbcGetObjectsDataGetColumnByName( + struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name, + const char* const schema_name, const char* const table_name, + const char* const column_name); +struct AdbcGetObjectsConstraint* AdbcGetObjectsDataGetConstraintByName( + struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name, + const char* const schema_name, const char* const table_name, + const char* const constraint_name); + #ifdef __cplusplus } #endif diff --git a/3rd_party/apache-arrow-adbc/c/driver/flightsql/CMakeLists.txt b/3rd_party/apache-arrow-adbc/c/driver/flightsql/CMakeLists.txt index 28d477e..b20a3e7 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/flightsql/CMakeLists.txt +++ b/3rd_party/apache-arrow-adbc/c/driver/flightsql/CMakeLists.txt @@ -33,6 +33,7 @@ add_go_lib("${REPOSITORY_ROOT}/go/adbc/pkg/flightsql/" include_directories(SYSTEM ${REPOSITORY_ROOT}) include_directories(SYSTEM ${REPOSITORY_ROOT}/c/) +include_directories(SYSTEM ${REPOSITORY_ROOT}/c/driver) include_directories(SYSTEM ${REPOSITORY_ROOT}/c/vendor) if(ADBC_TEST_LINKAGE STREQUAL "shared") @@ -50,9 +51,9 @@ if(ADBC_BUILD_TESTS) SOURCES dremio_flightsql_test.cc sqlite_flightsql_test.cc - ../../validation/adbc_validation.cc - ../../validation/adbc_validation_util.cc EXTRA_LINK_LIBS + adbc_driver_common + adbc_validation nanoarrow ${TEST_LINK_LIBS}) target_compile_features(adbc-driver-flightsql-test PRIVATE cxx_std_17) diff --git a/3rd_party/apache-arrow-adbc/c/driver/postgresql/CMakeLists.txt b/3rd_party/apache-arrow-adbc/c/driver/postgresql/CMakeLists.txt index 337873f..d169794 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/postgresql/CMakeLists.txt +++ b/3rd_party/apache-arrow-adbc/c/driver/postgresql/CMakeLists.txt @@ -53,7 +53,7 @@ include_directories(SYSTEM ${REPOSITORY_ROOT}) include_directories(SYSTEM ${REPOSITORY_ROOT}/c/) include_directories(SYSTEM ${LIBPQ_INCLUDE_DIRS}) include_directories(SYSTEM ${REPOSITORY_ROOT}/c/vendor) -include_directories(SYSTEM ${REPOSITORY_ROOT}/c/driver/common) +include_directories(SYSTEM ${REPOSITORY_ROOT}/c/driver) foreach(LIB_TARGET ${ADBC_LIBRARIES}) target_compile_definitions(${LIB_TARGET} PRIVATE ADBC_EXPORTING) @@ -75,10 +75,9 @@ if(ADBC_BUILD_TESTS) postgres_type_test.cc postgres_copy_reader_test.cc postgresql_test.cc - ../../validation/adbc_validation.cc - ../../validation/adbc_validation_util.cc EXTRA_LINK_LIBS adbc_driver_common + adbc_validation nanoarrow ${TEST_LINK_LIBS}) target_compile_features(adbc-driver-postgresql-test PRIVATE cxx_std_17) diff --git a/3rd_party/apache-arrow-adbc/c/driver/postgresql/connection.cc b/3rd_party/apache-arrow-adbc/c/driver/postgresql/connection.cc index 227c7cb..611cd51 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/postgresql/connection.cc +++ b/3rd_party/apache-arrow-adbc/c/driver/postgresql/connection.cc @@ -17,37 +17,722 @@ #include "connection.h" +#include #include #include #include +#include #include +#include +#include +#include #include #include +#include "common/utils.h" #include "database.h" -#include "utils.h" namespace { + +static const uint32_t kSupportedInfoCodes[] = { + ADBC_INFO_VENDOR_NAME, ADBC_INFO_VENDOR_VERSION, ADBC_INFO_DRIVER_NAME, + ADBC_INFO_DRIVER_VERSION, ADBC_INFO_DRIVER_ARROW_VERSION, +}; + +static const std::unordered_map kPgTableTypes = { + {"table", "r"}, {"view", "v"}, {"materialized_view", "m"}, + {"toast_table", "t"}, {"foreign_table", "f"}, {"partitioned_table", "p"}}; + +struct PqRecord { + const char* data; + const int len; + const bool is_null; +}; + +// Used by PqResultHelper to provide index-based access to the records within each +// row of a pg_result +class PqResultRow { + public: + PqResultRow(pg_result* result, int row_num) : result_(result), row_num_(row_num) { + ncols_ = PQnfields(result); + } + + PqRecord operator[](const int& col_num) { + assert(col_num < ncols_); + const char* data = PQgetvalue(result_, row_num_, col_num); + const int len = PQgetlength(result_, row_num_, col_num); + const bool is_null = PQgetisnull(result_, row_num_, col_num); + + return PqRecord{data, len, is_null}; + } + + private: + pg_result* result_ = nullptr; + int row_num_; + int ncols_; +}; + +// Helper to manager the lifecycle of a PQResult. The query argument +// will be evaluated as part of the constructor, with the desctructor handling cleanup +// Caller must call Prepare then Execute, checking both for an OK AdbcStatusCode +// prior to iterating class PqResultHelper { public: - PqResultHelper(PGconn* conn, const char* query) : conn_(conn) { - query_ = std::string(query); + explicit PqResultHelper(PGconn* conn, std::string query, struct AdbcError* error) + : conn_(conn), query_(std::move(query)), error_(error) {} + + explicit PqResultHelper(PGconn* conn, std::string query, + std::vector param_values, struct AdbcError* error) + : conn_(conn), + query_(std::move(query)), + param_values_(param_values), + error_(error) {} + + AdbcStatusCode Prepare() { + // TODO: make stmtName a unique identifier? + PGresult* result = + PQprepare(conn_, /*stmtName=*/"", query_.c_str(), param_values_.size(), NULL); + if (PQresultStatus(result) != PGRES_COMMAND_OK) { + SetError(error_, "[libpq] Failed to prepare query: %s\nQuery was:%s", + PQerrorMessage(conn_), query_.c_str()); + PQclear(result); + return ADBC_STATUS_IO; + } + + PQclear(result); + return ADBC_STATUS_OK; } - pg_result* Execute() { - result_ = PQexec(conn_, query_.c_str()); - return result_; + + AdbcStatusCode Execute() { + std::vector param_c_strs; + + for (auto index = 0; index < param_values_.size(); index++) { + param_c_strs.push_back(param_values_[index].c_str()); + } + + result_ = PQexecPrepared(conn_, "", param_values_.size(), param_c_strs.data(), NULL, + NULL, 0); + + if (PQresultStatus(result_) != PGRES_TUPLES_OK) { + SetError(error_, "[libpq] Failed to execute query: %s", PQerrorMessage(conn_)); + return ADBC_STATUS_IO; + } + + return ADBC_STATUS_OK; } ~PqResultHelper() { - if (result_ != nullptr) PQclear(result_); + if (result_ != nullptr) { + PQclear(result_); + } } + int NumRows() { return PQntuples(result_); } + + int NumColumns() { return PQnfields(result_); } + + class iterator { + const PqResultHelper& outer_; + int curr_row_ = 0; + + public: + explicit iterator(const PqResultHelper& outer, int curr_row = 0) + : outer_(outer), curr_row_(curr_row) {} + iterator& operator++() { + curr_row_++; + return *this; + } + iterator operator++(int) { + iterator retval = *this; + ++(*this); + return retval; + } + bool operator==(iterator other) const { + return outer_.result_ == other.outer_.result_ && curr_row_ == other.curr_row_; + } + bool operator!=(iterator other) const { return !(*this == other); } + PqResultRow operator*() { return PqResultRow(outer_.result_, curr_row_); } + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = std::vector; + using pointer = const std::vector*; + using reference = const std::vector&; + }; + + iterator begin() { return iterator(*this); } + iterator end() { return iterator(*this, NumRows()); } + private: pg_result* result_ = nullptr; PGconn* conn_; std::string query_; + std::vector param_values_; + struct AdbcError* error_; +}; + +class PqGetObjectsHelper { + public: + PqGetObjectsHelper(PGconn* conn, int depth, const char* catalog, const char* db_schema, + const char* table_name, const char** table_types, + const char* column_name, struct ArrowSchema* schema, + struct ArrowArray* array, struct AdbcError* error) + : conn_(conn), + depth_(depth), + catalog_(catalog), + db_schema_(db_schema), + table_name_(table_name), + table_types_(table_types), + column_name_(column_name), + schema_(schema), + array_(array), + error_(error) { + na_error_ = {0}; + } + + AdbcStatusCode GetObjects() { + PqResultHelper curr_db_helper = + PqResultHelper{conn_, std::string("SELECT current_database()"), error_}; + + RAISE_ADBC(curr_db_helper.Prepare()); + RAISE_ADBC(curr_db_helper.Execute()); + + assert(curr_db_helper.NumRows() == 1); + auto curr_iter = curr_db_helper.begin(); + PqResultRow db_row = *curr_iter; + current_db_ = std::string(db_row[0].data); + + RAISE_ADBC(InitArrowArray()); + + catalog_name_col_ = array_->children[0]; + catalog_db_schemas_col_ = array_->children[1]; + catalog_db_schemas_items_ = catalog_db_schemas_col_->children[0]; + db_schema_name_col_ = catalog_db_schemas_items_->children[0]; + db_schema_tables_col_ = catalog_db_schemas_items_->children[1]; + schema_table_items_ = db_schema_tables_col_->children[0]; + table_name_col_ = schema_table_items_->children[0]; + table_type_col_ = schema_table_items_->children[1]; + + table_columns_col_ = schema_table_items_->children[2]; + table_columns_items_ = table_columns_col_->children[0]; + column_name_col_ = table_columns_items_->children[0]; + column_position_col_ = table_columns_items_->children[1]; + column_remarks_col_ = table_columns_items_->children[2]; + + table_constraints_col_ = schema_table_items_->children[3]; + table_constraints_items_ = table_constraints_col_->children[0]; + constraint_name_col_ = table_constraints_items_->children[0]; + constraint_type_col_ = table_constraints_items_->children[1]; + + constraint_column_names_col_ = table_constraints_items_->children[2]; + constraint_column_name_col_ = constraint_column_names_col_->children[0]; + + constraint_column_usages_col_ = table_constraints_items_->children[3]; + constraint_column_usage_items_ = constraint_column_usages_col_->children[0]; + fk_catalog_col_ = constraint_column_usage_items_->children[0]; + fk_db_schema_col_ = constraint_column_usage_items_->children[1]; + fk_table_col_ = constraint_column_usage_items_->children[2]; + fk_column_name_col_ = constraint_column_usage_items_->children[3]; + + RAISE_ADBC(AppendCatalogs()); + RAISE_ADBC(FinishArrowArray()); + return ADBC_STATUS_OK; + } + + private: + AdbcStatusCode InitArrowArray() { + RAISE_ADBC(AdbcInitConnectionObjectsSchema(schema_, error_)); + + CHECK_NA_DETAIL(INTERNAL, ArrowArrayInitFromSchema(array_, schema_, &na_error_), + &na_error_, error_); + + CHECK_NA(INTERNAL, ArrowArrayStartAppending(array_), error_); + return ADBC_STATUS_OK; + } + + AdbcStatusCode AppendSchemas(std::string db_name) { + // postgres only allows you to list schemas for the currently connected db + if (db_name == current_db_) { + struct StringBuilder query = {0}; + if (StringBuilderInit(&query, /*initial_size*/ 256)) { + return ADBC_STATUS_INTERNAL; + } + + const char* stmt = + "SELECT nspname FROM pg_catalog.pg_namespace WHERE " + "nspname !~ '^pg_' AND nspname <> 'information_schema'"; + + if (StringBuilderAppend(&query, "%s", stmt)) { + StringBuilderReset(&query); + return ADBC_STATUS_INTERNAL; + } + + std::vector params; + if (db_schema_ != NULL) { + if (StringBuilderAppend(&query, "%s", " AND nspname = $1")) { + StringBuilderReset(&query); + return ADBC_STATUS_INTERNAL; + } + params.push_back(db_schema_); + } + + auto result_helper = + PqResultHelper{conn_, std::string(query.buffer), params, error_}; + StringBuilderReset(&query); + + RAISE_ADBC(result_helper.Prepare()); + RAISE_ADBC(result_helper.Execute()); + + for (PqResultRow row : result_helper) { + const char* schema_name = row[0].data; + CHECK_NA(INTERNAL, + ArrowArrayAppendString(db_schema_name_col_, ArrowCharView(schema_name)), + error_); + if (depth_ == ADBC_OBJECT_DEPTH_DB_SCHEMAS) { + CHECK_NA(INTERNAL, ArrowArrayAppendNull(db_schema_tables_col_, 1), error_); + } else { + RAISE_ADBC(AppendTables(std::string(schema_name))); + } + CHECK_NA(INTERNAL, ArrowArrayFinishElement(catalog_db_schemas_items_), error_); + } + } + + CHECK_NA(INTERNAL, ArrowArrayFinishElement(catalog_db_schemas_col_), error_); + return ADBC_STATUS_OK; + } + + AdbcStatusCode AppendCatalogs() { + struct StringBuilder query = {0}; + if (StringBuilderInit(&query, /*initial_size=*/256) != 0) return ADBC_STATUS_INTERNAL; + + if (StringBuilderAppend(&query, "%s", "SELECT datname FROM pg_catalog.pg_database")) { + return ADBC_STATUS_INTERNAL; + } + + std::vector params; + if (catalog_ != NULL) { + if (StringBuilderAppend(&query, "%s", " WHERE datname = $1")) { + StringBuilderReset(&query); + return ADBC_STATUS_INTERNAL; + } + params.push_back(catalog_); + } + + PqResultHelper result_helper = + PqResultHelper{conn_, std::string(query.buffer), params, error_}; + StringBuilderReset(&query); + + RAISE_ADBC(result_helper.Prepare()); + RAISE_ADBC(result_helper.Execute()); + + for (PqResultRow row : result_helper) { + const char* db_name = row[0].data; + CHECK_NA(INTERNAL, + ArrowArrayAppendString(catalog_name_col_, ArrowCharView(db_name)), error_); + if (depth_ == ADBC_OBJECT_DEPTH_CATALOGS) { + CHECK_NA(INTERNAL, ArrowArrayAppendNull(catalog_db_schemas_col_, 1), error_); + } else { + RAISE_ADBC(AppendSchemas(std::string(db_name))); + } + CHECK_NA(INTERNAL, ArrowArrayFinishElement(array_), error_); + } + + return ADBC_STATUS_OK; + } + + AdbcStatusCode AppendTables(std::string schema_name) { + struct StringBuilder query = {0}; + if (StringBuilderInit(&query, /*initial_size*/ 512)) { + return ADBC_STATUS_INTERNAL; + } + + std::vector params = {schema_name}; + const char* stmt = + "SELECT c.relname, CASE c.relkind WHEN 'r' THEN 'table' WHEN 'v' THEN 'view' " + "WHEN 'm' THEN 'materialized view' WHEN 't' THEN 'TOAST table' " + "WHEN 'f' THEN 'foreign table' WHEN 'p' THEN 'partitioned table' END " + "AS reltype FROM pg_catalog.pg_class c " + "LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace " + "WHERE c.relkind IN ('r','v','m','t','f','p') " + "AND pg_catalog.pg_table_is_visible(c.oid) AND n.nspname = $1"; + + if (StringBuilderAppend(&query, "%s", stmt)) { + StringBuilderReset(&query); + return ADBC_STATUS_INTERNAL; + } + + if (table_name_ != nullptr) { + if (StringBuilderAppend(&query, "%s", " AND c.relname LIKE $2")) { + StringBuilderReset(&query); + return ADBC_STATUS_INTERNAL; + } + + params.push_back(std::string(table_name_)); + } + + if (table_types_ != nullptr) { + std::vector table_type_filter; + const char** table_types = table_types_; + while (*table_types != NULL) { + auto table_type_str = std::string(*table_types); + if (auto search = kPgTableTypes.find(table_type_str); + search != kPgTableTypes.end()) { + table_type_filter.push_back(search->second); + } + table_types++; + } + + if (!table_type_filter.empty()) { + std::ostringstream oss; + bool first = true; + oss << "("; + for (const auto& str : table_type_filter) { + if (!first) { + oss << ", "; + } + oss << "'" << str << "'"; + first = false; + } + oss << ")"; + + if (StringBuilderAppend(&query, "%s%s", " AND c.relkind IN ", + oss.str().c_str())) { + StringBuilderReset(&query); + return ADBC_STATUS_INTERNAL; + } + } else { + // no matching table type means no records should come back + if (StringBuilderAppend(&query, "%s", " AND false")) { + StringBuilderReset(&query); + return ADBC_STATUS_INTERNAL; + } + } + } + + auto result_helper = PqResultHelper{conn_, query.buffer, params, error_}; + StringBuilderReset(&query); + + RAISE_ADBC(result_helper.Prepare()); + RAISE_ADBC(result_helper.Execute()); + for (PqResultRow row : result_helper) { + const char* table_name = row[0].data; + const char* table_type = row[1].data; + + CHECK_NA(INTERNAL, + ArrowArrayAppendString(table_name_col_, ArrowCharView(table_name)), + error_); + CHECK_NA(INTERNAL, + ArrowArrayAppendString(table_type_col_, ArrowCharView(table_type)), + error_); + if (depth_ == ADBC_OBJECT_DEPTH_TABLES) { + CHECK_NA(INTERNAL, ArrowArrayAppendNull(table_columns_col_, 1), error_); + CHECK_NA(INTERNAL, ArrowArrayAppendNull(table_constraints_col_, 1), error_); + } else { + auto table_name_s = std::string(table_name); + RAISE_ADBC(AppendColumns(schema_name, table_name_s)); + RAISE_ADBC(AppendConstraints(schema_name, table_name_s)); + } + CHECK_NA(INTERNAL, ArrowArrayFinishElement(schema_table_items_), error_); + } + + CHECK_NA(INTERNAL, ArrowArrayFinishElement(db_schema_tables_col_), error_); + return ADBC_STATUS_OK; + } + + AdbcStatusCode AppendColumns(std::string schema_name, std::string table_name) { + struct StringBuilder query = {0}; + if (StringBuilderInit(&query, /*initial_size*/ 512)) { + return ADBC_STATUS_INTERNAL; + } + + std::vector params = {schema_name, table_name}; + const char* stmt = + "SELECT attr.attname, attr.attnum, " + "pg_catalog.col_description(cls.oid, attr.attnum) " + "FROM pg_catalog.pg_attribute AS attr " + "INNER JOIN pg_catalog.pg_class AS cls ON attr.attrelid = cls.oid " + "INNER JOIN pg_catalog.pg_namespace AS nsp ON nsp.oid = cls.relnamespace " + "WHERE attr.attnum > 0 AND NOT attr.attisdropped " + "AND nsp.nspname LIKE $1 AND cls.relname LIKE $2"; + + if (StringBuilderAppend(&query, "%s", stmt)) { + StringBuilderReset(&query); + return ADBC_STATUS_INTERNAL; + } + + if (column_name_ != NULL) { + if (StringBuilderAppend(&query, "%s", " AND attr.attname LIKE $3")) { + StringBuilderReset(&query); + return ADBC_STATUS_INTERNAL; + } + + params.push_back(std::string(column_name_)); + } + + auto result_helper = PqResultHelper{conn_, query.buffer, params, error_}; + StringBuilderReset(&query); + + RAISE_ADBC(result_helper.Prepare()); + RAISE_ADBC(result_helper.Execute()); + + for (PqResultRow row : result_helper) { + const char* column_name = row[0].data; + const char* position = row[1].data; + + CHECK_NA(INTERNAL, + ArrowArrayAppendString(column_name_col_, ArrowCharView(column_name)), + error_); + int ival = atol(position); + CHECK_NA(INTERNAL, + ArrowArrayAppendInt(column_position_col_, static_cast(ival)), + error_); + if (row[2].is_null) { + CHECK_NA(INTERNAL, ArrowArrayAppendNull(column_remarks_col_, 1), error_); + } else { + const char* remarks = row[2].data; + CHECK_NA(INTERNAL, + ArrowArrayAppendString(column_remarks_col_, ArrowCharView(remarks)), + error_); + } + + // no xdbc_ values for now + for (auto i = 3; i < 19; i++) { + CHECK_NA(INTERNAL, ArrowArrayAppendNull(table_columns_items_->children[i], 1), + error_); + } + + CHECK_NA(INTERNAL, ArrowArrayFinishElement(table_columns_items_), error_); + } + + CHECK_NA(INTERNAL, ArrowArrayFinishElement(table_columns_col_), error_); + return ADBC_STATUS_OK; + } + + // libpq PQexecParams can use either text or binary transfers + // For now we are using text transfer internally, so arrays are sent + // back like {element1, element2} within a const char* + std::vector PqTextArrayToVector(std::string text_array) { + text_array.erase(0, 1); + text_array.erase(text_array.size() - 1); + + std::vector elements; + std::stringstream ss(std::move(text_array)); + std::string tmp; + + while (getline(ss, tmp, ',')) { + elements.push_back(std::move(tmp)); + } + + return elements; + } + + AdbcStatusCode AppendConstraints(std::string schema_name, std::string table_name) { + struct StringBuilder query = {0}; + if (StringBuilderInit(&query, /*initial_size*/ 4096)) { + return ADBC_STATUS_INTERNAL; + } + + std::vector params = {schema_name, table_name}; + const char* stmt = + "WITH fk_unnest AS ( " + " SELECT " + " con.conname, " + " 'FOREIGN KEY' AS contype, " + " conrelid, " + " UNNEST(con.conkey) AS conkey, " + " confrelid, " + " UNNEST(con.confkey) AS confkey " + " FROM pg_catalog.pg_constraint AS con " + " INNER JOIN pg_catalog.pg_class AS cls ON cls.oid = conrelid " + " INNER JOIN pg_catalog.pg_namespace AS nsp ON nsp.oid = cls.relnamespace " + " WHERE con.contype = 'f' AND nsp.nspname LIKE $1 " + " AND cls.relname LIKE $2 " + "), " + "fk_names AS ( " + " SELECT " + " fk_unnest.conname, " + " fk_unnest.contype, " + " fk_unnest.conkey, " + " fk_unnest.confkey, " + " attr.attname, " + " fnsp.nspname AS fschema, " + " fcls.relname AS ftable, " + " fattr.attname AS fattname " + " FROM fk_unnest " + " INNER JOIN pg_catalog.pg_class AS cls ON cls.oid = fk_unnest.conrelid " + " INNER JOIN pg_catalog.pg_class AS fcls ON fcls.oid = fk_unnest.confrelid " + " INNER JOIN pg_catalog.pg_namespace AS fnsp ON fnsp.oid = fcls.relnamespace" + " INNER JOIN pg_catalog.pg_attribute AS attr ON attr.attnum = " + "fk_unnest.conkey " + " AND attr.attrelid = fk_unnest.conrelid " + " LEFT JOIN pg_catalog.pg_attribute AS fattr ON fattr.attnum = " + "fk_unnest.confkey " + " AND fattr.attrelid = fk_unnest.confrelid " + "), " + "fkeys AS ( " + " SELECT " + " conname, " + " contype, " + " ARRAY_AGG(attname ORDER BY conkey) AS colnames, " + " fschema, " + " ftable, " + " ARRAY_AGG(fattname ORDER BY confkey) AS fcolnames " + " FROM fk_names " + " GROUP BY " + " conname, " + " contype, " + " fschema, " + " ftable " + "), " + "other_constraints AS ( " + " SELECT con.conname, CASE con.contype WHEN 'c' THEN 'CHECK' WHEN 'u' THEN " + " 'UNIQUE' WHEN 'p' THEN 'PRIMARY KEY' END AS contype, " + " ARRAY_AGG(attr.attname) AS colnames " + " FROM pg_catalog.pg_constraint AS con " + " CROSS JOIN UNNEST(conkey) AS conkeys " + " INNER JOIN pg_catalog.pg_class AS cls ON cls.oid = con.conrelid " + " INNER JOIN pg_catalog.pg_namespace AS nsp ON nsp.oid = cls.relnamespace " + " INNER JOIN pg_catalog.pg_attribute AS attr ON attr.attnum = conkeys " + " AND cls.oid = attr.attrelid " + " WHERE con.contype IN ('c', 'u', 'p') AND nsp.nspname LIKE $1 " + " AND cls.relname LIKE $2 " + " GROUP BY conname, contype " + ") " + "SELECT " + " conname, contype, colnames, fschema, ftable, fcolnames " + "FROM fkeys " + "UNION ALL " + "SELECT " + " conname, contype, colnames, NULL, NULL, NULL " + "FROM other_constraints"; + + if (StringBuilderAppend(&query, "%s", stmt)) { + StringBuilderReset(&query); + return ADBC_STATUS_INTERNAL; + } + + if (column_name_ != NULL) { + if (StringBuilderAppend(&query, "%s", " WHERE conname LIKE $3")) { + StringBuilderReset(&query); + return ADBC_STATUS_INTERNAL; + } + + params.push_back(std::string(column_name_)); + } + + auto result_helper = PqResultHelper{conn_, query.buffer, params, error_}; + StringBuilderReset(&query); + + RAISE_ADBC(result_helper.Prepare()); + RAISE_ADBC(result_helper.Execute()); + + for (PqResultRow row : result_helper) { + const char* constraint_name = row[0].data; + const char* constraint_type = row[1].data; + + CHECK_NA( + INTERNAL, + ArrowArrayAppendString(constraint_name_col_, ArrowCharView(constraint_name)), + error_); + + CHECK_NA( + INTERNAL, + ArrowArrayAppendString(constraint_type_col_, ArrowCharView(constraint_type)), + error_); + + auto constraint_column_names = PqTextArrayToVector(std::string(row[2].data)); + for (const auto& constraint_column_name : constraint_column_names) { + CHECK_NA(INTERNAL, + ArrowArrayAppendString(constraint_column_name_col_, + ArrowCharView(constraint_column_name.c_str())), + error_); + } + CHECK_NA(INTERNAL, ArrowArrayFinishElement(constraint_column_names_col_), error_); + + if (!strcmp(constraint_type, "FOREIGN KEY")) { + assert(!row[3].is_null); + assert(!row[4].is_null); + assert(!row[5].is_null); + + const char* constraint_ftable_schema = row[3].data; + const char* constraint_ftable_name = row[4].data; + auto constraint_fcolumn_names = PqTextArrayToVector(std::string(row[5].data)); + for (const auto& constraint_fcolumn_name : constraint_fcolumn_names) { + CHECK_NA( + INTERNAL, + ArrowArrayAppendString(fk_catalog_col_, ArrowCharView(current_db_.c_str())), + error_); + CHECK_NA(INTERNAL, + ArrowArrayAppendString(fk_db_schema_col_, + ArrowCharView(constraint_ftable_schema)), + error_); + CHECK_NA(INTERNAL, + ArrowArrayAppendString(fk_table_col_, + ArrowCharView(constraint_ftable_name)), + error_); + CHECK_NA(INTERNAL, + ArrowArrayAppendString(fk_column_name_col_, + ArrowCharView(constraint_fcolumn_name.c_str())), + error_); + + CHECK_NA(INTERNAL, ArrowArrayFinishElement(constraint_column_usage_items_), + error_); + } + } + CHECK_NA(INTERNAL, ArrowArrayFinishElement(constraint_column_usages_col_), error_); + CHECK_NA(INTERNAL, ArrowArrayFinishElement(table_constraints_items_), error_); + } + + CHECK_NA(INTERNAL, ArrowArrayFinishElement(table_constraints_col_), error_); + return ADBC_STATUS_OK; + } + + AdbcStatusCode FinishArrowArray() { + CHECK_NA_DETAIL(INTERNAL, ArrowArrayFinishBuildingDefault(array_, &na_error_), + &na_error_, error_); + + return ADBC_STATUS_OK; + } + + PGconn* conn_; + int depth_; + const char* catalog_; + const char* db_schema_; + const char* table_name_; + const char** table_types_; + const char* column_name_; + struct ArrowSchema* schema_; + struct ArrowArray* array_; + struct AdbcError* error_; + struct ArrowError na_error_; + std::string current_db_; + struct ArrowArray* catalog_name_col_; + struct ArrowArray* catalog_db_schemas_col_; + struct ArrowArray* catalog_db_schemas_items_; + struct ArrowArray* db_schema_name_col_; + struct ArrowArray* db_schema_tables_col_; + struct ArrowArray* schema_table_items_; + struct ArrowArray* table_name_col_; + struct ArrowArray* table_type_col_; + struct ArrowArray* table_columns_col_; + struct ArrowArray* table_columns_items_; + struct ArrowArray* column_name_col_; + struct ArrowArray* column_position_col_; + struct ArrowArray* column_remarks_col_; + struct ArrowArray* table_constraints_col_; + struct ArrowArray* table_constraints_items_; + struct ArrowArray* constraint_name_col_; + struct ArrowArray* constraint_type_col_; + struct ArrowArray* constraint_column_names_col_; + struct ArrowArray* constraint_column_name_col_; + struct ArrowArray* constraint_column_usages_col_; + struct ArrowArray* constraint_column_usage_items_; + struct ArrowArray* fk_catalog_col_; + struct ArrowArray* fk_db_schema_col_; + struct ArrowArray* fk_table_col_; + struct ArrowArray* fk_column_name_col_; }; + } // namespace namespace adbcpq { @@ -68,6 +753,97 @@ AdbcStatusCode PostgresConnection::Commit(struct AdbcError* error) { return ADBC_STATUS_OK; } +AdbcStatusCode PostgresConnectionGetInfoImpl(const uint32_t* info_codes, + size_t info_codes_length, + struct ArrowSchema* schema, + struct ArrowArray* array, + struct AdbcError* error) { + RAISE_ADBC(AdbcInitConnectionGetInfoSchema(info_codes, info_codes_length, schema, array, + error)); + + for (size_t i = 0; i < info_codes_length; i++) { + switch (info_codes[i]) { + case ADBC_INFO_VENDOR_NAME: + RAISE_ADBC( + AdbcConnectionGetInfoAppendString(array, info_codes[i], "PostgreSQL", error)); + break; + case ADBC_INFO_VENDOR_VERSION: + RAISE_ADBC(AdbcConnectionGetInfoAppendString( + array, info_codes[i], std::to_string(PQlibVersion()).c_str(), error)); + break; + case ADBC_INFO_DRIVER_NAME: + RAISE_ADBC(AdbcConnectionGetInfoAppendString(array, info_codes[i], + "ADBC PostgreSQL Driver", error)); + break; + case ADBC_INFO_DRIVER_VERSION: + // TODO(lidavidm): fill in driver version + RAISE_ADBC( + AdbcConnectionGetInfoAppendString(array, info_codes[i], "(unknown)", error)); + break; + case ADBC_INFO_DRIVER_ARROW_VERSION: + RAISE_ADBC(AdbcConnectionGetInfoAppendString(array, info_codes[i], + NANOARROW_VERSION, error)); + break; + default: + // Ignore + continue; + } + CHECK_NA(INTERNAL, ArrowArrayFinishElement(array), error); + } + + struct ArrowError na_error = {0}; + CHECK_NA_DETAIL(INTERNAL, ArrowArrayFinishBuildingDefault(array, &na_error), &na_error, + error); + + return ADBC_STATUS_OK; +} + +AdbcStatusCode PostgresConnection::GetInfo(struct AdbcConnection* connection, + uint32_t* info_codes, size_t info_codes_length, + struct ArrowArrayStream* out, + struct AdbcError* error) { + // XXX: mistake in adbc.h (should have been const pointer) + const uint32_t* codes = info_codes; + if (!info_codes) { + codes = kSupportedInfoCodes; + info_codes_length = sizeof(kSupportedInfoCodes) / sizeof(kSupportedInfoCodes[0]); + } + + struct ArrowSchema schema = {0}; + struct ArrowArray array = {0}; + + AdbcStatusCode status = + PostgresConnectionGetInfoImpl(codes, info_codes_length, &schema, &array, error); + if (status != ADBC_STATUS_OK) { + if (schema.release) schema.release(&schema); + if (array.release) array.release(&array); + return status; + } + + return BatchToArrayStream(&array, &schema, out, error); +} + +AdbcStatusCode PostgresConnection::GetObjects( + struct AdbcConnection* connection, int depth, const char* catalog, + const char* db_schema, const char* table_name, const char** table_types, + const char* column_name, struct ArrowArrayStream* out, struct AdbcError* error) { + struct ArrowSchema schema = {0}; + struct ArrowArray array = {0}; + + PqGetObjectsHelper helper = + PqGetObjectsHelper(conn_, depth, catalog, db_schema, table_name, table_types, + column_name, &schema, &array, error); + AdbcStatusCode status = helper.GetObjects(); + + if (status != ADBC_STATUS_OK) { + if (schema.release) schema.release(&schema); + if (array.release) array.release(&array); + return status; + } + + return BatchToArrayStream(&array, &schema, out, error); +} + AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog, const char* db_schema, const char* table_name, @@ -75,6 +851,7 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog, struct AdbcError* error) { AdbcStatusCode final_status = ADBC_STATUS_OK; struct StringBuilder query = {0}; + std::vector params; if (StringBuilderInit(&query, /*initial_size=*/256) != 0) return ADBC_STATUS_INTERNAL; if (StringBuilderAppend( @@ -83,71 +860,108 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog, "FROM pg_catalog.pg_class AS cls " "INNER JOIN pg_catalog.pg_attribute AS attr ON cls.oid = attr.attrelid " "INNER JOIN pg_catalog.pg_type AS typ ON attr.atttypid = typ.oid " - "WHERE attr.attnum >= 0 AND cls.oid = '") != 0) + "WHERE attr.attnum >= 0 AND cls.oid = ") != 0) return ADBC_STATUS_INTERNAL; if (db_schema != nullptr) { - char* schema = PQescapeIdentifier(conn_, db_schema, strlen(db_schema)); - if (schema == NULL) { - SetError(error, "%s%s", "Faled to escape schema: ", PQerrorMessage(conn_)); - return ADBC_STATUS_INVALID_ARGUMENT; + if (StringBuilderAppend(&query, "%s", "$1.")) { + StringBuilderReset(&query); + return ADBC_STATUS_INTERNAL; } - - int ret = StringBuilderAppend(&query, "%s%s", schema, "."); - PQfreemem(schema); - - if (ret != 0) return ADBC_STATUS_INTERNAL; + params.push_back(db_schema); } - char* table = PQescapeIdentifier(conn_, table_name, strlen(table_name)); - if (table == NULL) { - SetError(error, "%s%s", "Failed to escape table: ", PQerrorMessage(conn_)); - return ADBC_STATUS_INVALID_ARGUMENT; + if (StringBuilderAppend(&query, "%s%" PRIu64 "%s", "$", + static_cast(params.size() + 1), "::regclass::oid")) { + StringBuilderReset(&query); + return ADBC_STATUS_INTERNAL; } + params.push_back(table_name); - int ret = StringBuilderAppend(&query, "%s%s", table_name, "'::regclass::oid"); - PQfreemem(table); - - if (ret != 0) return ADBC_STATUS_INTERNAL; - - PqResultHelper result_helper = PqResultHelper{conn_, query.buffer}; + PqResultHelper result_helper = + PqResultHelper{conn_, std::string(query.buffer), params, error}; StringBuilderReset(&query); - pg_result* result = result_helper.Execute(); - ExecStatusType pq_status = PQresultStatus(result); + RAISE_ADBC(result_helper.Prepare()); + RAISE_ADBC(result_helper.Execute()); + auto uschema = nanoarrow::UniqueSchema(); + ArrowSchemaInit(uschema.get()); + CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(uschema.get(), result_helper.NumRows()), + error); - if (pq_status == PGRES_TUPLES_OK) { - int num_rows = PQntuples(result); - ArrowSchemaInit(uschema.get()); - CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(uschema.get(), num_rows), error); - - ArrowError na_error; - for (int row = 0; row < num_rows; row++) { - const char* colname = PQgetvalue(result, row, 0); - const Oid pg_oid = static_cast( - std::strtol(PQgetvalue(result, row, 1), /*str_end=*/nullptr, /*base=*/10)); - - PostgresType pg_type; - if (type_resolver_->Find(pg_oid, &pg_type, &na_error) != NANOARROW_OK) { - SetError(error, "%s%d%s%s%s%" PRIu32, "Column #", row + 1, " (\"", colname, - "\") has unknown type code ", pg_oid); - final_status = ADBC_STATUS_NOT_IMPLEMENTED; - break; - } + ArrowError na_error; + int row_counter = 0; + for (auto row : result_helper) { + const char* colname = row[0].data; + const Oid pg_oid = + static_cast(std::strtol(row[1].data, /*str_end=*/nullptr, /*base=*/10)); - CHECK_NA(INTERNAL, pg_type.WithFieldName(colname).SetSchema(uschema->children[row]), - error); + PostgresType pg_type; + if (type_resolver_->Find(pg_oid, &pg_type, &na_error) != NANOARROW_OK) { + SetError(error, "%s%d%s%s%s%" PRIu32, "Column #", row_counter + 1, " (\"", colname, + "\") has unknown type code ", pg_oid); + final_status = ADBC_STATUS_NOT_IMPLEMENTED; + break; } - } else { - SetError(error, "%s%s", "Failed to get table schema: ", PQerrorMessage(conn_)); - final_status = ADBC_STATUS_IO; + CHECK_NA(INTERNAL, + pg_type.WithFieldName(colname).SetSchema(uschema->children[row_counter]), + error); + row_counter++; } - uschema.move(schema); + return final_status; } +AdbcStatusCode PostgresConnectionGetTableTypesImpl(struct ArrowSchema* schema, + struct ArrowArray* array, + struct AdbcError* error) { + // See 'relkind' in https://www.postgresql.org/docs/current/catalog-pg-class.html + auto uschema = nanoarrow::UniqueSchema(); + ArrowSchemaInit(uschema.get()); + + CHECK_NA(INTERNAL, ArrowSchemaSetType(uschema.get(), NANOARROW_TYPE_STRUCT), error); + CHECK_NA(INTERNAL, ArrowSchemaAllocateChildren(uschema.get(), /*num_columns=*/1), + error); + ArrowSchemaInit(uschema.get()->children[0]); + CHECK_NA(INTERNAL, + ArrowSchemaSetType(uschema.get()->children[0], NANOARROW_TYPE_STRING), error); + CHECK_NA(INTERNAL, ArrowSchemaSetName(uschema.get()->children[0], "table_type"), error); + uschema.get()->children[0]->flags &= ~ARROW_FLAG_NULLABLE; + + CHECK_NA(INTERNAL, ArrowArrayInitFromSchema(array, uschema.get(), NULL), error); + CHECK_NA(INTERNAL, ArrowArrayStartAppending(array), error); + + for (auto const& table_type : kPgTableTypes) { + CHECK_NA(INTERNAL, + ArrowArrayAppendString(array->children[0], + ArrowCharView(table_type.first.c_str())), + error); + CHECK_NA(INTERNAL, ArrowArrayFinishElement(array), error); + } + + CHECK_NA(INTERNAL, ArrowArrayFinishBuildingDefault(array, NULL), error); + + uschema.move(schema); + return ADBC_STATUS_OK; +} + +AdbcStatusCode PostgresConnection::GetTableTypes(struct AdbcConnection* connection, + struct ArrowArrayStream* out, + struct AdbcError* error) { + struct ArrowSchema schema = {0}; + struct ArrowArray array = {0}; + + AdbcStatusCode status = PostgresConnectionGetTableTypesImpl(&schema, &array, error); + if (status != ADBC_STATUS_OK) { + if (schema.release) schema.release(&schema); + if (array.release) array.release(&array); + return status; + } + return BatchToArrayStream(&array, &schema, out, error); +} + AdbcStatusCode PostgresConnection::Init(struct AdbcDatabase* database, struct AdbcError* error) { if (!database || !database->private_data) { diff --git a/3rd_party/apache-arrow-adbc/c/driver/postgresql/connection.h b/3rd_party/apache-arrow-adbc/c/driver/postgresql/connection.h index 6f63d66..99770c2 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/postgresql/connection.h +++ b/3rd_party/apache-arrow-adbc/c/driver/postgresql/connection.h @@ -32,9 +32,19 @@ class PostgresConnection { PostgresConnection() : database_(nullptr), conn_(nullptr), autocommit_(true) {} AdbcStatusCode Commit(struct AdbcError* error); + AdbcStatusCode GetInfo(struct AdbcConnection* connection, uint32_t* info_codes, + size_t info_codes_length, struct ArrowArrayStream* out, + struct AdbcError* error); + AdbcStatusCode GetObjects(struct AdbcConnection* connection, int depth, + const char* catalog, const char* db_schema, + const char* table_name, const char** table_types, + const char* column_name, struct ArrowArrayStream* out, + struct AdbcError* error); AdbcStatusCode GetTableSchema(const char* catalog, const char* db_schema, const char* table_name, struct ArrowSchema* schema, struct AdbcError* error); + AdbcStatusCode GetTableTypes(struct AdbcConnection* connection, + struct ArrowArrayStream* out, struct AdbcError* error); AdbcStatusCode Init(struct AdbcDatabase* database, struct AdbcError* error); AdbcStatusCode Release(struct AdbcError* error); AdbcStatusCode Rollback(struct AdbcError* error); diff --git a/3rd_party/apache-arrow-adbc/c/driver/postgresql/database.cc b/3rd_party/apache-arrow-adbc/c/driver/postgresql/database.cc index 1d9bd41..3976c4b 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/postgresql/database.cc +++ b/3rd_party/apache-arrow-adbc/c/driver/postgresql/database.cc @@ -27,7 +27,7 @@ #include #include -#include "utils.h" +#include "common/utils.h" namespace adbcpq { diff --git a/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgres_copy_reader.h b/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgres_copy_reader.h index 6e8f8db..78358a9 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgres_copy_reader.h +++ b/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgres_copy_reader.h @@ -184,7 +184,7 @@ class PostgresCopyBooleanFieldReader : public PostgresCopyFieldReader { // Reader for Pg->Arrow conversions whose representations are identical minus // the bswap from network endian. This includes all integral and float types. -template +template class PostgresCopyNetworkEndianFieldReader : public PostgresCopyFieldReader { public: ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, @@ -200,7 +200,7 @@ class PostgresCopyNetworkEndianFieldReader : public PostgresCopyFieldReader { return EINVAL; } - T value = ReadUnsafe(data); + T value = kOffset + ReadUnsafe(data); NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &value, sizeof(T))); return AppendValid(array); } @@ -635,6 +635,32 @@ static inline ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, default: return ErrorCantConvert(error, pg_type, schema_view); } + + case NANOARROW_TYPE_DATE32: { + // 2000-01-01 + constexpr int32_t kPostgresDateEpoch = 10957; + *out = new PostgresCopyNetworkEndianFieldReader(); + return NANOARROW_OK; + } + + case NANOARROW_TYPE_TIME64: { + *out = new PostgresCopyNetworkEndianFieldReader(); + return NANOARROW_OK; + } + + case NANOARROW_TYPE_TIMESTAMP: + switch (pg_type.type_id()) { + case PostgresTypeId::kTimestamp: + case PostgresTypeId::kTimestamptz: { + // 2000-01-01 00:00:00.000000 in microseconds + constexpr int64_t kPostgresTimestampEpoch = 946684800000000; + *out = new PostgresCopyNetworkEndianFieldReader(); + return NANOARROW_OK; + } + default: + return ErrorCantConvert(error, pg_type, schema_view); + } default: return ErrorCantConvert(error, pg_type, schema_view); } diff --git a/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgres_type.h b/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgres_type.h index a2152a5..e234e36 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgres_type.h +++ b/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgres_type.h @@ -192,6 +192,7 @@ class PostgresType { // binary COPY representation in the output. ArrowErrorCode SetSchema(ArrowSchema* schema) const { switch (type_id_) { + // ---- Primitive types -------------------- case PostgresTypeId::kBool: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BOOL)); break; @@ -212,6 +213,8 @@ class PostgresType { case PostgresTypeId::kFloat8: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_DOUBLE)); break; + + // ---- Binary/string -------------------- case PostgresTypeId::kChar: case PostgresTypeId::kBpchar: case PostgresTypeId::kVarchar: @@ -223,6 +226,34 @@ class PostgresType { NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); break; + // ---- Temporal -------------------- + case PostgresTypeId::kDate: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_DATE32)); + break; + + case PostgresTypeId::kTime: + // We always return microsecond precision even if the type + // specifies differently + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeDateTime(schema, NANOARROW_TYPE_TIME64, + NANOARROW_TIME_UNIT_MICRO, + /*timezone=*/nullptr)); + break; + + case PostgresTypeId::kTimestamp: + // We always return microsecond precision even if the type + // specifies differently + NANOARROW_RETURN_NOT_OK( + ArrowSchemaSetTypeDateTime(schema, NANOARROW_TYPE_TIMESTAMP, + NANOARROW_TIME_UNIT_MICRO, /*timezone=*/nullptr)); + break; + + case PostgresTypeId::kTimestamptz: + NANOARROW_RETURN_NOT_OK( + ArrowSchemaSetTypeDateTime(schema, NANOARROW_TYPE_TIMESTAMP, + NANOARROW_TIME_UNIT_MICRO, /*timezone=*/"UTC")); + break; + + // ---- Nested -------------------- case PostgresTypeId::kRecord: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, n_children())); for (int64_t i = 0; i < n_children(); i++) { diff --git a/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgres_util.h b/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgres_util.h index df45bdd..0153c89 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgres_util.h +++ b/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgres_util.h @@ -36,16 +36,16 @@ namespace adbcpq { #if defined(_WIN32) && defined(_MSC_VER) -static inline uint32_t SwapNetworkToHost(uint16_t x) { return ntohs(x); } -static inline uint32_t SwapHostToNetwork(uint16_t x) { return htons(x); } +static inline uint16_t SwapNetworkToHost(uint16_t x) { return ntohs(x); } +static inline uint16_t SwapHostToNetwork(uint16_t x) { return htons(x); } static inline uint32_t SwapNetworkToHost(uint32_t x) { return ntohl(x); } static inline uint32_t SwapHostToNetwork(uint32_t x) { return htonl(x); } static inline uint64_t SwapNetworkToHost(uint64_t x) { return ntohll(x); } static inline uint64_t SwapHostToNetwork(uint64_t x) { return htonll(x); } #elif defined(_WIN32) // e.g., msys2, where ntohll is not necessarily defined -static inline uint32_t SwapNetworkToHost(uint16_t x) { return ntohs(x); } -static inline uint32_t SwapHostToNetwork(uint16_t x) { return htons(x); } +static inline uint16_t SwapNetworkToHost(uint16_t x) { return ntohs(x); } +static inline uint16_t SwapHostToNetwork(uint16_t x) { return htons(x); } static inline uint32_t SwapNetworkToHost(uint32_t x) { return ntohl(x); } static inline uint32_t SwapHostToNetwork(uint32_t x) { return htonl(x); } static inline uint64_t SwapNetworkToHost(uint64_t x) { @@ -112,12 +112,19 @@ static inline double LoadNetworkFloat8(const char* buf) { return out; } -static inline uint32_t ToNetworkInt32(int32_t v) { - return SwapHostToNetwork(static_cast(v)); -} +#define ADBC_REGISTER_TO_NETWORK_FUNC(size) \ + static inline uint##size##_t ToNetworkInt##size(int##size##_t v) { \ + return SwapHostToNetwork(static_cast(v)); \ + } + +ADBC_REGISTER_TO_NETWORK_FUNC(16) +ADBC_REGISTER_TO_NETWORK_FUNC(32) +ADBC_REGISTER_TO_NETWORK_FUNC(64) -static inline uint64_t ToNetworkInt64(int64_t v) { - return SwapHostToNetwork(static_cast(v)); +static inline uint32_t ToNetworkFloat4(float v) { + uint32_t vint; + memcpy(&vint, &v, sizeof(uint32_t)); + return SwapHostToNetwork(vint); } static inline uint64_t ToNetworkFloat8(double v) { diff --git a/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgresql.cc b/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgresql.cc index 1305f19..29fd04c 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgresql.cc +++ b/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgresql.cc @@ -22,10 +22,10 @@ #include +#include "common/utils.h" #include "connection.h" #include "database.h" #include "statement.h" -#include "utils.h" using adbcpq::PostgresConnection; using adbcpq::PostgresDatabase; @@ -125,14 +125,21 @@ AdbcStatusCode PostgresConnectionGetInfo(struct AdbcConnection* connection, uint32_t* info_codes, size_t info_codes_length, struct ArrowArrayStream* stream, struct AdbcError* error) { - return ADBC_STATUS_NOT_IMPLEMENTED; + if (!connection->private_data) return ADBC_STATUS_INVALID_STATE; + auto ptr = + reinterpret_cast*>(connection->private_data); + return (*ptr)->GetInfo(connection, info_codes, info_codes_length, stream, error); } AdbcStatusCode PostgresConnectionGetObjects( struct AdbcConnection* connection, int depth, const char* catalog, const char* db_schema, const char* table_name, const char** table_types, const char* column_name, struct ArrowArrayStream* stream, struct AdbcError* error) { - return ADBC_STATUS_NOT_IMPLEMENTED; + if (!connection->private_data) return ADBC_STATUS_INVALID_STATE; + auto ptr = + reinterpret_cast*>(connection->private_data); + return (*ptr)->GetObjects(connection, depth, catalog, db_schema, table_name, + table_types, column_name, stream, error); } AdbcStatusCode PostgresConnectionGetTableSchema( @@ -147,7 +154,10 @@ AdbcStatusCode PostgresConnectionGetTableSchema( AdbcStatusCode PostgresConnectionGetTableTypes(struct AdbcConnection* connection, struct ArrowArrayStream* stream, struct AdbcError* error) { - return ADBC_STATUS_NOT_IMPLEMENTED; + if (!connection->private_data) return ADBC_STATUS_INVALID_STATE; + auto ptr = + reinterpret_cast*>(connection->private_data); + return (*ptr)->GetTableTypes(connection, stream, error); } AdbcStatusCode PostgresConnectionInit(struct AdbcConnection* connection, diff --git a/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgresql_test.cc b/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgresql_test.cc index 0765a9c..429d59d 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgresql_test.cc +++ b/3rd_party/apache-arrow-adbc/c/driver/postgresql/postgresql_test.cc @@ -25,11 +25,13 @@ #include #include #include +#include "common/utils.h" #include "validation/adbc_validation.h" #include "validation/adbc_validation_util.h" using adbc_validation::IsOkStatus; +using adbc_validation::IsStatus; class PostgresQuirks : public adbc_validation::DriverQuirks { public: @@ -61,9 +63,37 @@ class PostgresQuirks : public adbc_validation::DriverQuirks { return status; } + AdbcStatusCode DropView(struct AdbcConnection* connection, const std::string& name, + struct AdbcError* error) const override { + struct AdbcStatement statement; + std::memset(&statement, 0, sizeof(statement)); + AdbcStatusCode status = AdbcStatementNew(connection, &statement, error); + if (status != ADBC_STATUS_OK) return status; + + std::string query = "DROP VIEW IF EXISTS " + name; + status = AdbcStatementSetSqlQuery(&statement, query.c_str(), error); + if (status != ADBC_STATUS_OK) { + std::ignore = AdbcStatementRelease(&statement, error); + return status; + } + status = AdbcStatementExecuteQuery(&statement, nullptr, nullptr, error); + std::ignore = AdbcStatementRelease(&statement, error); + return status; + } + std::string BindParameter(int index) const override { return "$" + std::to_string(index + 1); } + + std::optional PrimaryKeyTableDdl(std::string_view name) const override { + std::string ddl = "CREATE TABLE "; + ddl += name; + ddl += " (id SERIAL PRIMARY KEY)"; + return ddl; + } + + std::string catalog() const override { return "postgres"; } + std::string db_schema() const override { return "public"; } }; class PostgresDatabaseTest : public ::testing::Test, @@ -85,18 +115,431 @@ class PostgresConnectionTest : public ::testing::Test, void SetUp() override { ASSERT_NO_FATAL_FAILURE(SetUpTest()); } void TearDown() override { ASSERT_NO_FATAL_FAILURE(TearDownTest()); } - void TestMetadataGetInfo() { GTEST_SKIP() << "Not yet implemented"; } - void TestMetadataGetTableTypes() { GTEST_SKIP() << "Not yet implemented"; } - - void TestMetadataGetObjectsCatalogs() { GTEST_SKIP() << "Not yet implemented"; } - void TestMetadataGetObjectsDbSchemas() { GTEST_SKIP() << "Not yet implemented"; } - void TestMetadataGetObjectsTables() { GTEST_SKIP() << "Not yet implemented"; } - void TestMetadataGetObjectsTablesTypes() { GTEST_SKIP() << "Not yet implemented"; } - void TestMetadataGetObjectsColumns() { GTEST_SKIP() << "Not yet implemented"; } - protected: PostgresQuirks quirks_; }; + +TEST_F(PostgresConnectionTest, GetInfoMetadata) { + ASSERT_THAT(AdbcConnectionNew(&connection, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcConnectionInit(&connection, &database, &error), IsOkStatus(&error)); + + adbc_validation::StreamReader reader; + std::vector info = { + ADBC_INFO_DRIVER_NAME, + ADBC_INFO_DRIVER_VERSION, + ADBC_INFO_VENDOR_NAME, + ADBC_INFO_VENDOR_VERSION, + }; + ASSERT_THAT(AdbcConnectionGetInfo(&connection, info.data(), info.size(), + &reader.stream.value, &error), + IsOkStatus(&error)); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + + std::vector seen; + while (true) { + ASSERT_NO_FATAL_FAILURE(reader.Next()); + if (!reader.array->release) break; + + for (int64_t row = 0; row < reader.array->length; row++) { + ASSERT_FALSE(ArrowArrayViewIsNull(reader.array_view->children[0], row)); + const uint32_t code = + reader.array_view->children[0]->buffer_views[1].data.as_uint32[row]; + seen.push_back(code); + + int str_child_index = 0; + struct ArrowArrayView* str_child = + reader.array_view->children[1]->children[str_child_index]; + switch (code) { + case ADBC_INFO_DRIVER_NAME: { + ArrowStringView val = ArrowArrayViewGetStringUnsafe(str_child, 0); + EXPECT_EQ("ADBC PostgreSQL Driver", std::string(val.data, val.size_bytes)); + break; + } + case ADBC_INFO_DRIVER_VERSION: { + ArrowStringView val = ArrowArrayViewGetStringUnsafe(str_child, 1); + EXPECT_EQ("(unknown)", std::string(val.data, val.size_bytes)); + break; + } + case ADBC_INFO_VENDOR_NAME: { + ArrowStringView val = ArrowArrayViewGetStringUnsafe(str_child, 2); + EXPECT_EQ("PostgreSQL", std::string(val.data, val.size_bytes)); + break; + } + case ADBC_INFO_VENDOR_VERSION: { + ArrowStringView val = ArrowArrayViewGetStringUnsafe(str_child, 3); +#ifdef __WIN32 + const char* pater = "\\d\\d\\d\\d\\d\\d"; +#else + const char* pater = "[0-9]{6}"; +#endif + EXPECT_THAT(std::string(val.data, val.size_bytes), + ::testing::MatchesRegex(pater)); + break; + } + default: + // Ignored + break; + } + } + } + ASSERT_THAT(seen, ::testing::UnorderedElementsAreArray(info)); +} + +TEST_F(PostgresConnectionTest, GetObjectsGetCatalogs) { + ASSERT_THAT(AdbcConnectionNew(&connection, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcConnectionInit(&connection, &database, &error), IsOkStatus(&error)); + + if (!quirks()->supports_get_objects()) { + GTEST_SKIP(); + } + + adbc_validation::StreamReader reader; + ASSERT_THAT( + AdbcConnectionGetObjects(&connection, ADBC_OBJECT_DEPTH_CATALOGS, nullptr, nullptr, + nullptr, nullptr, nullptr, &reader.stream.value, &error), + IsOkStatus(&error)); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_NE(nullptr, reader.array->release); + ASSERT_GT(reader.array->length, 0); + + auto get_objects_data = adbc_validation::GetObjectsReader{&reader.array_view.value}; + ASSERT_NE(*get_objects_data, nullptr) + << "could not initialize the AdbcGetObjectsData object"; + + auto catalogs = {"postgres", "template0", "template1"}; + for (auto catalog : catalogs) { + struct AdbcGetObjectsCatalog* cat = + AdbcGetObjectsDataGetCatalogByName(*get_objects_data, catalog); + ASSERT_NE(cat, nullptr) << "catalog " << catalog << " not found"; + } +} + +TEST_F(PostgresConnectionTest, GetObjectsGetDbSchemas) { + ASSERT_THAT(AdbcConnectionNew(&connection, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcConnectionInit(&connection, &database, &error), IsOkStatus(&error)); + + if (!quirks()->supports_get_objects()) { + GTEST_SKIP(); + } + + adbc_validation::StreamReader reader; + ASSERT_THAT(AdbcConnectionGetObjects(&connection, ADBC_OBJECT_DEPTH_DB_SCHEMAS, nullptr, + nullptr, nullptr, nullptr, nullptr, + &reader.stream.value, &error), + IsOkStatus(&error)); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_NE(nullptr, reader.array->release); + ASSERT_GT(reader.array->length, 0); + + auto get_objects_data = adbc_validation::GetObjectsReader{&reader.array_view.value}; + ASSERT_NE(*get_objects_data, nullptr) + << "could not initialize the AdbcGetObjectsData object"; + + struct AdbcGetObjectsSchema* schema = + AdbcGetObjectsDataGetSchemaByName(*get_objects_data, "postgres", "public"); + ASSERT_NE(schema, nullptr) << "schema public not found"; +} + +TEST_F(PostgresConnectionTest, GetObjectsGetAllFindsPrimaryKey) { + ASSERT_THAT(AdbcConnectionNew(&connection, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcConnectionInit(&connection, &database, &error), IsOkStatus(&error)); + + if (!quirks()->supports_get_objects()) { + GTEST_SKIP(); + } + + ASSERT_THAT(quirks()->DropTable(&connection, "adbc_pkey_test", &error), + IsOkStatus(&error)); + + struct AdbcStatement statement; + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); + { + ASSERT_THAT( + AdbcStatementSetSqlQuery( + &statement, "CREATE TABLE adbc_pkey_test (ints INT, id SERIAL PRIMARY KEY)", + &error), + IsOkStatus(&error)); + adbc_validation::StreamReader reader; + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, + &reader.rows_affected, &error), + IsOkStatus(&error)); + ASSERT_EQ(reader.rows_affected, 0); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_EQ(reader.array->release, nullptr); + } + ASSERT_THAT(AdbcStatementRelease(&statement, &error), IsOkStatus(&error)); + + adbc_validation::StreamReader reader; + ASSERT_THAT( + AdbcConnectionGetObjects(&connection, ADBC_OBJECT_DEPTH_ALL, nullptr, nullptr, + nullptr, nullptr, nullptr, &reader.stream.value, &error), + IsOkStatus(&error)); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_NE(nullptr, reader.array->release); + ASSERT_GT(reader.array->length, 0); + + auto get_objects_data = adbc_validation::GetObjectsReader{&reader.array_view.value}; + ASSERT_NE(*get_objects_data, nullptr) + << "could not initialize the AdbcGetObjectsData object"; + + struct AdbcGetObjectsTable* table = AdbcGetObjectsDataGetTableByName( + *get_objects_data, "postgres", "public", "adbc_pkey_test"); + ASSERT_NE(table, nullptr) << "could not find adbc_pkey_test table"; + + ASSERT_EQ(table->n_table_columns, 2); + struct AdbcGetObjectsColumn* column = AdbcGetObjectsDataGetColumnByName( + *get_objects_data, "postgres", "public", "adbc_pkey_test", "id"); + ASSERT_NE(column, nullptr) << "could not find id column on adbc_pkey_test table"; + + ASSERT_EQ(table->n_table_constraints, 1) + << "expected 1 constraint on adbc_pkey_test table, found: " + << table->n_table_constraints; + + struct AdbcGetObjectsConstraint* constraint = AdbcGetObjectsDataGetConstraintByName( + *get_objects_data, "postgres", "public", "adbc_pkey_test", "adbc_pkey_test_pkey"); + ASSERT_NE(constraint, nullptr) << "could not find adbc_pkey_test_pkey constraint"; + + auto constraint_type = std::string(constraint->constraint_type.data, + constraint->constraint_type.size_bytes); + ASSERT_EQ(constraint_type, "PRIMARY KEY"); + ASSERT_EQ(constraint->n_column_names, 1) + << "expected constraint adbc_pkey_test_pkey to be applied to 1 column, found: " + << constraint->n_column_names; + + auto constraint_column_name = + std::string(constraint->constraint_column_names[0].data, + constraint->constraint_column_names[0].size_bytes); + ASSERT_EQ(constraint_column_name, "id"); +} + +TEST_F(PostgresConnectionTest, GetObjectsGetAllFindsForeignKey) { + ASSERT_THAT(AdbcConnectionNew(&connection, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcConnectionInit(&connection, &database, &error), IsOkStatus(&error)); + + if (!quirks()->supports_get_objects()) { + GTEST_SKIP(); + } + + ASSERT_THAT(quirks()->DropTable(&connection, "adbc_fkey_test", &error), + IsOkStatus(&error)); + ASSERT_THAT(quirks()->DropTable(&connection, "adbc_fkey_test_base", &error), + IsOkStatus(&error)); + + struct AdbcStatement statement; + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); + { + ASSERT_THAT( + AdbcStatementSetSqlQuery(&statement, + "CREATE TABLE adbc_fkey_test_base (id1 INT, id2 INT, " + "PRIMARY KEY (id1, id2))", + &error), + IsOkStatus(&error)); + adbc_validation::StreamReader reader; + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, + &reader.rows_affected, &error), + IsOkStatus(&error)); + ASSERT_EQ(reader.rows_affected, 0); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_EQ(reader.array->release, nullptr); + } + ASSERT_THAT(AdbcStatementRelease(&statement, &error), IsOkStatus(&error)); + + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); + { + ASSERT_THAT(AdbcStatementSetSqlQuery( + &statement, + "CREATE TABLE adbc_fkey_test (fid1 INT, fid2 INT, " + "FOREIGN KEY (fid1, fid2) REFERENCES adbc_fkey_test_base(id1, id2))", + &error), + IsOkStatus(&error)); + adbc_validation::StreamReader reader; + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, + &reader.rows_affected, &error), + IsOkStatus(&error)); + ASSERT_EQ(reader.rows_affected, 0); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_EQ(reader.array->release, nullptr); + } + ASSERT_THAT(AdbcStatementRelease(&statement, &error), IsOkStatus(&error)); + + adbc_validation::StreamReader reader; + ASSERT_THAT( + AdbcConnectionGetObjects(&connection, ADBC_OBJECT_DEPTH_ALL, nullptr, nullptr, + nullptr, nullptr, nullptr, &reader.stream.value, &error), + IsOkStatus(&error)); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_NE(nullptr, reader.array->release); + ASSERT_GT(reader.array->length, 0); + + auto get_objects_data = adbc_validation::GetObjectsReader{&reader.array_view.value}; + ASSERT_NE(*get_objects_data, nullptr) + << "could not initialize the AdbcGetInfoData object"; + + struct AdbcGetObjectsTable* table = AdbcGetObjectsDataGetTableByName( + *get_objects_data, "postgres", "public", "adbc_fkey_test"); + ASSERT_NE(table, nullptr) << "could not find adbc_fkey_test table"; + ASSERT_EQ(table->n_table_constraints, 1) + << "expected 1 constraint on adbc_fkey_test table, found: " + << table->n_table_constraints; + + struct AdbcGetObjectsConstraint* constraint = AdbcGetObjectsDataGetConstraintByName( + *get_objects_data, "postgres", "public", "adbc_fkey_test", + "adbc_fkey_test_fid1_fid2_fkey"); + ASSERT_NE(constraint, nullptr) + << "could not find adbc_fkey_test_fid1_fid2_fkey constraint"; + + auto constraint_type = std::string(constraint->constraint_type.data, + constraint->constraint_type.size_bytes); + ASSERT_EQ(constraint_type, "FOREIGN KEY"); + ASSERT_EQ(constraint->n_column_names, 2) + << "expected constraint adbc_fkey_test_fid1_fid2_fkey to be applied to 2 columns, " + "found: " + << constraint->n_column_names; + + for (auto i = 0; i < 2; i++) { + auto str_vw = constraint->constraint_column_names[i]; + auto str = std::string(str_vw.data, str_vw.size_bytes); + if (i == 0) { + ASSERT_EQ(str, "fid1"); + } else if (i == 1) { + ASSERT_EQ(str, "fid2"); + } + } + + ASSERT_EQ(constraint->n_column_usages, 2) + << "expected constraint adbc_fkey_test_fid1_fid2_fkey to have 2 usages, found: " + << constraint->n_column_usages; + + for (auto i = 0; i < 2; i++) { + struct AdbcGetObjectsUsage* usage = constraint->constraint_column_usages[i]; + auto catalog_str = std::string(usage->fk_catalog.data, usage->fk_catalog.size_bytes); + ASSERT_EQ(catalog_str, "postgres"); + auto schema_str = + std::string(usage->fk_db_schema.data, usage->fk_db_schema.size_bytes); + ASSERT_EQ(schema_str, "public"); + auto table_str = std::string(usage->fk_table.data, usage->fk_table.size_bytes); + ASSERT_EQ(table_str, "adbc_fkey_test_base"); + + auto column_str = + std::string(usage->fk_column_name.data, usage->fk_column_name.size_bytes); + if (i == 0) { + ASSERT_EQ(column_str, "id1"); + } else if (i == 1) { + ASSERT_EQ(column_str, "id2"); + } + } +} + +TEST_F(PostgresConnectionTest, GetObjectsTableTypesFilter) { + ASSERT_THAT(AdbcConnectionNew(&connection, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcConnectionInit(&connection, &database, &error), IsOkStatus(&error)); + + if (!quirks()->supports_get_objects()) { + GTEST_SKIP(); + } + + ASSERT_THAT(quirks()->DropView(&connection, "adbc_table_types_view_test", &error), + IsOkStatus(&error)); + ASSERT_THAT(quirks()->DropTable(&connection, "adbc_table_types_table_test", &error), + IsOkStatus(&error)); + + { + adbc_validation::Handle statement; + ASSERT_THAT(AdbcStatementNew(&connection, &statement.value, &error), + IsOkStatus(&error)); + + ASSERT_THAT( + AdbcStatementSetSqlQuery( + &statement.value, + "CREATE TABLE adbc_table_types_table_test (id1 INT, id2 INT)", &error), + IsOkStatus(&error)); + + int64_t rows_affected = 0; + ASSERT_THAT( + AdbcStatementExecuteQuery(&statement.value, nullptr, &rows_affected, &error), + IsOkStatus(&error)); + } + + { + adbc_validation::Handle statement; + ASSERT_THAT(AdbcStatementNew(&connection, &statement.value, &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementSetSqlQuery(&statement.value, + "CREATE VIEW adbc_table_types_view_test AS ( " + "SELECT * FROM adbc_table_types_table_test)", + &error), + IsOkStatus(&error)); + int64_t rows_affected = 0; + ASSERT_THAT( + AdbcStatementExecuteQuery(&statement.value, nullptr, &rows_affected, &error), + IsOkStatus(&error)); + } + + adbc_validation::StreamReader reader; + std::vector table_types = {"view", nullptr}; + ASSERT_THAT(AdbcConnectionGetObjects(&connection, ADBC_OBJECT_DEPTH_ALL, nullptr, + nullptr, nullptr, table_types.data(), nullptr, + &reader.stream.value, &error), + IsOkStatus(&error)); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_NE(nullptr, reader.array->release); + ASSERT_GT(reader.array->length, 0); + + auto get_objects_data = adbc_validation::GetObjectsReader{&reader.array_view.value}; + ASSERT_NE(*get_objects_data, nullptr) + << "could not initialize the AdbcGetInfoData object"; + + struct AdbcGetObjectsTable* table = AdbcGetObjectsDataGetTableByName( + *get_objects_data, "postgres", "public", "adbc_table_types_table_test"); + ASSERT_EQ(table, nullptr) << "unexpected table adbc_table_types_table_test found"; + + struct AdbcGetObjectsTable* view = AdbcGetObjectsDataGetTableByName( + *get_objects_data, "postgres", "public", "adbc_table_types_view_test"); + ASSERT_NE(view, nullptr) << "did not find view adbc_table_types_view_test"; +} + +TEST_F(PostgresConnectionTest, MetadataGetTableSchemaInjection) { + if (!quirks()->supports_bulk_ingest()) { + GTEST_SKIP(); + } + ASSERT_THAT(AdbcConnectionNew(&connection, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcConnectionInit(&connection, &database, &error), IsOkStatus(&error)); + ASSERT_THAT(quirks()->DropTable(&connection, "bulk_ingest", &error), + IsOkStatus(&error)); + ASSERT_THAT(quirks()->EnsureSampleTable(&connection, "bulk_ingest", &error), + IsOkStatus(&error)); + + adbc_validation::Handle schema; + ASSERT_THAT(AdbcConnectionGetTableSchema(&connection, /*catalog=*/nullptr, + /*db_schema=*/nullptr, + "0'::int; DROP TABLE bulk_ingest;--", + &schema.value, &error), + IsStatus(ADBC_STATUS_IO, &error)); + + ASSERT_THAT( + AdbcConnectionGetTableSchema(&connection, /*catalog=*/nullptr, + /*db_schema=*/"0'::int; DROP TABLE bulk_ingest;--", + "DROP TABLE bulk_ingest;", &schema.value, &error), + IsStatus(ADBC_STATUS_IO, &error)); + + ASSERT_THAT(AdbcConnectionGetTableSchema(&connection, /*catalog=*/nullptr, + /*db_schema=*/nullptr, "bulk_ingest", + &schema.value, &error), + IsOkStatus(&error)); + + ASSERT_NO_FATAL_FAILURE(adbc_validation::CompareSchema( + &schema.value, {{"int64s", NANOARROW_TYPE_INT64, true}, + {"strings", NANOARROW_TYPE_STRING, true}})); +} + ADBCV_TEST_CONNECTION(PostgresConnectionTest) class PostgresStatementTest : public ::testing::Test, @@ -107,15 +550,10 @@ class PostgresStatementTest : public ::testing::Test, void TearDown() override { ASSERT_NO_FATAL_FAILURE(TearDownTest()); } void TestSqlIngestInt8() { GTEST_SKIP() << "Not implemented"; } - void TestSqlIngestInt16() { GTEST_SKIP() << "Not implemented"; } - void TestSqlIngestInt32() { GTEST_SKIP() << "Not implemented"; } void TestSqlIngestUInt8() { GTEST_SKIP() << "Not implemented"; } void TestSqlIngestUInt16() { GTEST_SKIP() << "Not implemented"; } void TestSqlIngestUInt32() { GTEST_SKIP() << "Not implemented"; } void TestSqlIngestUInt64() { GTEST_SKIP() << "Not implemented"; } - void TestSqlIngestFloat32() { GTEST_SKIP() << "Not implemented"; } - void TestSqlIngestFloat64() { GTEST_SKIP() << "Not implemented"; } - void TestSqlIngestBinary() { GTEST_SKIP() << "Not implemented"; } void TestSqlPrepareErrorParamCountMismatch() { GTEST_SKIP() << "Not yet implemented"; } void TestSqlPrepareGetParameterSchema() { GTEST_SKIP() << "Not yet implemented"; } @@ -134,6 +572,64 @@ class PostgresStatementTest : public ::testing::Test, }; ADBCV_TEST_STATEMENT(PostgresStatementTest) +TEST_F(PostgresStatementTest, UpdateInExecuteQuery) { + ASSERT_THAT(quirks()->DropTable(&connection, "adbc_test", &error), IsOkStatus(&error)); + + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); + + { + ASSERT_THAT(AdbcStatementSetSqlQuery( + &statement, + "CREATE TABLE adbc_test (ints INT, id SERIAL PRIMARY KEY)", &error), + IsOkStatus(&error)); + adbc_validation::StreamReader reader; + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, + &reader.rows_affected, &error), + IsOkStatus(&error)); + ASSERT_EQ(reader.rows_affected, 0); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_EQ(reader.array->release, nullptr); + } + + { + // Use INSERT INTO + ASSERT_THAT(AdbcStatementSetSqlQuery( + &statement, "INSERT INTO adbc_test (ints) VALUES (1), (2)", &error), + IsOkStatus(&error)); + adbc_validation::StreamReader reader; + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, + &reader.rows_affected, &error), + IsOkStatus(&error)); + ASSERT_EQ(reader.rows_affected, 0); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_EQ(reader.array->release, nullptr); + } + + { + // Use INSERT INTO ... RETURNING + ASSERT_THAT(AdbcStatementSetSqlQuery( + &statement, + "INSERT INTO adbc_test (ints) VALUES (3), (4) RETURNING id", &error), + IsOkStatus(&error)); + adbc_validation::StreamReader reader; + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, + &reader.rows_affected, &error), + IsOkStatus(&error)); + ASSERT_EQ(reader.rows_affected, -1); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_NE(reader.array->release, nullptr); + ASSERT_EQ(reader.array->n_children, 1); + ASSERT_EQ(reader.array->length, 2); + ASSERT_EQ(reader.array_view->children[0]->buffer_views[1].data.as_int32[0], 3); + ASSERT_EQ(reader.array_view->children[0]->buffer_views[1].data.as_int32[1], 4); + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_EQ(reader.array->release, nullptr); + } +} + struct TypeTestCase { std::string name; std::string sql_type; @@ -220,6 +716,14 @@ TEST_P(PostgresTypeTest, SelectValue) { ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); ASSERT_NO_FATAL_FAILURE(adbc_validation::CompareSchema( &reader.schema.value, {{std::nullopt, GetParam().arrow_type, true}})); + if (GetParam().arrow_type == NANOARROW_TYPE_TIMESTAMP) { + if (GetParam().sql_type.find("WITH TIME ZONE") == std::string::npos) { + ASSERT_STREQ(reader.schema->children[0]->format, "tsu:"); + } else { + ASSERT_STREQ(reader.schema->children[0]->format, "tsu:UTC"); + } + } + ASSERT_NO_FATAL_FAILURE(reader.Next()); ASSERT_NE(nullptr, reader.array->release); ASSERT_FALSE(ArrowArrayViewIsNull(&reader.array_view.value, 0)); @@ -280,6 +784,127 @@ static std::initializer_list kIntTypeCases = { {"BIGSERIAL", "BIGSERIAL", std::to_string(std::numeric_limits::max()), NANOARROW_TYPE_INT64, std::numeric_limits::max()}, }; +static std::initializer_list kDateTypeCases = { + {"DATE0", "DATE", "'1970-01-01'", NANOARROW_TYPE_DATE32, int64_t(0)}, + {"DATE1", "DATE", "'2000-01-01'", NANOARROW_TYPE_DATE32, int64_t(10957)}, + {"DATE2", "DATE", "'1950-01-01'", NANOARROW_TYPE_DATE32, int64_t(-7305)}, +}; +static std::initializer_list kTimeTypeCases = { + {"TIME_WITHOUT_TIME_ZONE", "TIME WITHOUT TIME ZONE", "'00:00'", NANOARROW_TYPE_TIME64, + int64_t(0)}, + {"TIME_WITHOUT_TIME_ZONE_VAL", "TIME WITHOUT TIME ZONE", "'01:02:03.123123'", + NANOARROW_TYPE_TIME64, int64_t(3'723'123'123)}, + {"TIME_6_WITHOUT_TIME_ZONE", "TIME (6) WITHOUT TIME ZONE", "'00:00'", + NANOARROW_TYPE_TIME64, int64_t(0)}, + {"TIME_6_WITHOUT_TIME_ZONE_VAL", "TIME (6) WITHOUT TIME ZONE", "'01:02:03.123123'", + NANOARROW_TYPE_TIME64, int64_t(3'723'123'123)}, + {"TIME_5_WITHOUT_TIME_ZONE", "TIME (5) WITHOUT TIME ZONE", "'00:00'", + NANOARROW_TYPE_TIME64, int64_t(0)}, + {"TIME_5_WITHOUT_TIME_ZONE_VAL", "TIME (5) WITHOUT TIME ZONE", "'01:02:03.123123'", + NANOARROW_TYPE_TIME64, int64_t(3'723'123'120)}, + {"TIME_4_WITHOUT_TIME_ZONE", "TIME (4) WITHOUT TIME ZONE", "'00:00'", + NANOARROW_TYPE_TIME64, int64_t(0)}, + {"TIME_4_WITHOUT_TIME_ZONE_VAL", "TIME (4) WITHOUT TIME ZONE", "'01:02:03.123123'", + NANOARROW_TYPE_TIME64, int64_t(3'723'123'100)}, + {"TIME_3_WITHOUT_TIME_ZONE", "TIME (3) WITHOUT TIME ZONE", "'00:00'", + NANOARROW_TYPE_TIME64, int64_t(0)}, + {"TIME_3_WITHOUT_TIME_ZONE_VAL", "TIME (3) WITHOUT TIME ZONE", "'01:02:03.123123'", + NANOARROW_TYPE_TIME64, int64_t(3'723'123'000)}, + {"TIME_2_WITHOUT_TIME_ZONE", "TIME (2) WITHOUT TIME ZONE", "'00:00'", + NANOARROW_TYPE_TIME64, int64_t(0)}, + {"TIME_2_WITHOUT_TIME_ZONE_VAL", "TIME (2) WITHOUT TIME ZONE", "'01:02:03.123123'", + NANOARROW_TYPE_TIME64, int64_t(3'723'120'000)}, + {"TIME_1_WITHOUT_TIME_ZONE", "TIME (1) WITHOUT TIME ZONE", "'00:00'", + NANOARROW_TYPE_TIME64, int64_t(0)}, + {"TIME_1_WITHOUT_TIME_ZONE_VAL", "TIME (1) WITHOUT TIME ZONE", "'01:02:03.123123'", + NANOARROW_TYPE_TIME64, int64_t(3'723'100'000)}, + {"TIME_0_WITHOUT_TIME_ZONE", "TIME (0) WITHOUT TIME ZONE", "'00:00'", + NANOARROW_TYPE_TIME64, int64_t(0)}, + {"TIME_0_WITHOUT_TIME_ZONE_VAL", "TIME (0) WITHOUT TIME ZONE", "'01:02:03.123123'", + NANOARROW_TYPE_TIME64, int64_t(3'723'000'000)}, +}; +static std::initializer_list kTimestampTypeCases = { + {"TIMESTAMP_WITHOUT_TIME_ZONE", "TIMESTAMP WITHOUT TIME ZONE", + "'1970-01-01 00:00:00.000000'", NANOARROW_TYPE_TIMESTAMP, int64_t(0)}, + {"TIMESTAMP_WITHOUT_TIME_ZONE_VAL", "TIMESTAMP WITHOUT TIME ZONE", + "'1970-01-02 03:04:05.123123'", NANOARROW_TYPE_TIMESTAMP, int64_t(97'445'123'123)}, + {"TIMESTAMP_6_WITHOUT_TIME_ZONE", "TIMESTAMP (6) WITHOUT TIME ZONE", + "'1970-01-01 00:00:00.000000'", NANOARROW_TYPE_TIMESTAMP, int64_t(0)}, + {"TIMESTAMP_6_WITHOUT_TIME_ZONE_VAL", "TIMESTAMP (6) WITHOUT TIME ZONE", + "'1970-01-02 03:04:05.123123'", NANOARROW_TYPE_TIMESTAMP, int64_t(97'445'123'123)}, + {"TIMESTAMP_5_WITHOUT_TIME_ZONE", "TIMESTAMP (5) WITHOUT TIME ZONE", + "'1970-01-01 00:00:00.000000'", NANOARROW_TYPE_TIMESTAMP, int64_t(0)}, + {"TIMESTAMP_5_WITHOUT_TIME_ZONE_VAL", "TIMESTAMP (5) WITHOUT TIME ZONE", + "'1970-01-02 03:04:05.123123'", NANOARROW_TYPE_TIMESTAMP, int64_t(97'445'123'120)}, + {"TIMESTAMP_4_WITHOUT_TIME_ZONE", "TIMESTAMP (4) WITHOUT TIME ZONE", + "'1970-01-01 00:00:00.000000'", NANOARROW_TYPE_TIMESTAMP, int64_t(0)}, + {"TIMESTAMP_4_WITHOUT_TIME_ZONE_VAL", "TIMESTAMP (4) WITHOUT TIME ZONE", + "'1970-01-02 03:04:05.123123'", NANOARROW_TYPE_TIMESTAMP, int64_t(97'445'123'100)}, + {"TIMESTAMP_3_WITHOUT_TIME_ZONE", "TIMESTAMP (3) WITHOUT TIME ZONE", + "'1970-01-01 00:00:00.000000'", NANOARROW_TYPE_TIMESTAMP, int64_t(0)}, + {"TIMESTAMP_3_WITHOUT_TIME_ZONE_VAL", "TIMESTAMP (3) WITHOUT TIME ZONE", + "'1970-01-02 03:04:05.123123'", NANOARROW_TYPE_TIMESTAMP, int64_t(97'445'123'000)}, + {"TIMESTAMP_2_WITHOUT_TIME_ZONE", "TIMESTAMP (2) WITHOUT TIME ZONE", + "'1970-01-01 00:00:00.000000'", NANOARROW_TYPE_TIMESTAMP, int64_t(0)}, + {"TIMESTAMP_2_WITHOUT_TIME_ZONE_VAL", "TIMESTAMP (2) WITHOUT TIME ZONE", + "'1970-01-02 03:04:05.123123'", NANOARROW_TYPE_TIMESTAMP, int64_t(97'445'120'000)}, + {"TIMESTAMP_1_WITHOUT_TIME_ZONE", "TIMESTAMP (1) WITHOUT TIME ZONE", + "'1970-01-01 00:00:00.000000'", NANOARROW_TYPE_TIMESTAMP, int64_t(0)}, + {"TIMESTAMP_1_WITHOUT_TIME_ZONE_VAL", "TIMESTAMP (1) WITHOUT TIME ZONE", + "'1970-01-02 03:04:05.123123'", NANOARROW_TYPE_TIMESTAMP, int64_t(97'445'100'000)}, + {"TIMESTAMP_0_WITHOUT_TIME_ZONE", "TIMESTAMP (0) WITHOUT TIME ZONE", + "'1970-01-01 00:00:00.000000'", NANOARROW_TYPE_TIMESTAMP, int64_t(0)}, + {"TIMESTAMP_0_WITHOUT_TIME_ZONE_VAL", "TIMESTAMP (0) WITHOUT TIME ZONE", + "'1970-01-02 03:04:05.123123'", NANOARROW_TYPE_TIMESTAMP, int64_t(97'445'000'000)}, + {"TIMESTAMP_WITH_TIME_ZONE", "TIMESTAMP WITH TIME ZONE", + "'1970-01-01 00:00:00.000000+00:30'", NANOARROW_TYPE_TIMESTAMP, + int64_t(-1'800'000'000)}, + {"TIMESTAMP_WITH_TIME_ZONE_VAL", "TIMESTAMP WITH TIME ZONE", + "'1970-01-02 03:04:05.123123+00:30'", NANOARROW_TYPE_TIMESTAMP, + int64_t(95'645'123'123)}, + {"TIMESTAMP_6_WITH_TIME_ZONE", "TIMESTAMP (6) WITH TIME ZONE", + "'1970-01-01 00:00:00.000000+00:30'", NANOARROW_TYPE_TIMESTAMP, + int64_t(-1'800'000'000)}, + {"TIMESTAMP_6_WITH_TIME_ZONE_VAL", "TIMESTAMP (6) WITH TIME ZONE", + "'1970-01-02 03:04:05.123123+00:30'", NANOARROW_TYPE_TIMESTAMP, + int64_t(95'645'123'123)}, + {"TIMESTAMP_5_WITH_TIME_ZONE", "TIMESTAMP (5) WITH TIME ZONE", + "'1970-01-01 00:00:00.000000+00:30'", NANOARROW_TYPE_TIMESTAMP, + int64_t(-1'800'000'000)}, + {"TIMESTAMP_5_WITH_TIME_ZONE_VAL", "TIMESTAMP (5) WITH TIME ZONE", + "'1970-01-02 03:04:05.123123+00:30'", NANOARROW_TYPE_TIMESTAMP, + int64_t(95'645'123'120)}, + {"TIMESTAMP_4_WITH_TIME_ZONE", "TIMESTAMP (4) WITH TIME ZONE", + "'1970-01-01 00:00:00.000000+00:30'", NANOARROW_TYPE_TIMESTAMP, + int64_t(-1'800'000'000)}, + {"TIMESTAMP_4_WITH_TIME_ZONE_VAL", "TIMESTAMP (4) WITH TIME ZONE", + "'1970-01-02 03:04:05.123123+00:30'", NANOARROW_TYPE_TIMESTAMP, + int64_t(95'645'123'100)}, + {"TIMESTAMP_3_WITH_TIME_ZONE", "TIMESTAMP (3) WITH TIME ZONE", + "'1970-01-01 00:00:00.000000+00:30'", NANOARROW_TYPE_TIMESTAMP, + int64_t(-1'800'000'000)}, + {"TIMESTAMP_3_WITH_TIME_ZONE_VAL", "TIMESTAMP (3) WITH TIME ZONE", + "'1970-01-02 03:04:05.123123+00:30'", NANOARROW_TYPE_TIMESTAMP, + int64_t(95'645'123'000)}, + {"TIMESTAMP_2_WITH_TIME_ZONE", "TIMESTAMP (2) WITH TIME ZONE", + "'1970-01-01 00:00:00.000000+00:30'", NANOARROW_TYPE_TIMESTAMP, + int64_t(-1'800'000'000)}, + {"TIMESTAMP_2_WITH_TIME_ZONE_VAL", "TIMESTAMP (2) WITH TIME ZONE", + "'1970-01-02 03:04:05.123123+00:30'", NANOARROW_TYPE_TIMESTAMP, + int64_t(95'645'120'000)}, + {"TIMESTAMP_1_WITH_TIME_ZONE", "TIMESTAMP (1) WITH TIME ZONE", + "'1970-01-01 00:00:00.000000+00:30'", NANOARROW_TYPE_TIMESTAMP, + int64_t(-1'800'000'000)}, + {"TIMESTAMP_1_WITH_TIME_ZONE_VAL", "TIMESTAMP (1) WITH TIME ZONE", + "'1970-01-02 03:04:05.123123+00:30'", NANOARROW_TYPE_TIMESTAMP, + int64_t(95'645'100'000)}, + {"TIMESTAMP_0_WITH_TIME_ZONE", "TIMESTAMP (0) WITH TIME ZONE", + "'1970-01-01 00:00:00.000000+00:30'", NANOARROW_TYPE_TIMESTAMP, + int64_t(-1'800'000'000)}, + {"TIMESTAMP_0_WITH_TIME_ZONE_VAL", "TIMESTAMP (0) WITH TIME ZONE", + "'1970-01-02 03:04:05.123123+00:30'", NANOARROW_TYPE_TIMESTAMP, + int64_t(95'645'000'000)}, +}; INSTANTIATE_TEST_SUITE_P(BoolType, PostgresTypeTest, testing::ValuesIn(kBoolTypeCases), TypeTestCase::FormatName); @@ -289,3 +914,10 @@ INSTANTIATE_TEST_SUITE_P(FloatTypes, PostgresTypeTest, testing::ValuesIn(kFloatT TypeTestCase::FormatName); INSTANTIATE_TEST_SUITE_P(IntTypes, PostgresTypeTest, testing::ValuesIn(kIntTypeCases), TypeTestCase::FormatName); +INSTANTIATE_TEST_SUITE_P(DateTypes, PostgresTypeTest, testing::ValuesIn(kDateTypeCases), + TypeTestCase::FormatName); +INSTANTIATE_TEST_SUITE_P(TimeTypes, PostgresTypeTest, testing::ValuesIn(kTimeTypeCases), + TypeTestCase::FormatName); +INSTANTIATE_TEST_SUITE_P(TimestampTypes, PostgresTypeTest, + testing::ValuesIn(kTimestampTypeCases), + TypeTestCase::FormatName); diff --git a/3rd_party/apache-arrow-adbc/c/driver/postgresql/statement.cc b/3rd_party/apache-arrow-adbc/c/driver/postgresql/statement.cc index 494c736..3092046 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/postgresql/statement.cc +++ b/3rd_party/apache-arrow-adbc/c/driver/postgresql/statement.cc @@ -30,11 +30,11 @@ #include #include +#include "common/utils.h" #include "connection.h" #include "postgres_copy_reader.h" #include "postgres_type.h" #include "postgres_util.h" -#include "utils.h" namespace adbcpq { @@ -199,6 +199,10 @@ struct BindStream { type_id = PostgresTypeId::kInt8; param_lengths[i] = 8; break; + case ArrowType::NANOARROW_TYPE_FLOAT: + type_id = PostgresTypeId::kFloat4; + param_lengths[i] = 4; + break; case ArrowType::NANOARROW_TYPE_DOUBLE: type_id = PostgresTypeId::kFloat8; param_lengths[i] = 8; @@ -207,6 +211,10 @@ struct BindStream { type_id = PostgresTypeId::kText; param_lengths[i] = 0; break; + case ArrowType::NANOARROW_TYPE_BINARY: + type_id = PostgresTypeId::kBytea; + param_lengths[i] = 0; + break; default: SetError(error, "%s%" PRIu64 "%s%s%s%s", "[libpq] Field #", static_cast(i + 1), " ('", bind_schema->children[i]->name, @@ -239,8 +247,8 @@ struct BindStream { PGresult* result = PQprepare(conn, /*stmtName=*/"", query.c_str(), /*nParams=*/bind_schema->n_children, param_types.data()); if (PQresultStatus(result) != PGRES_COMMAND_OK) { - SetError(error, "%s%s", "[libpq] Failed to prepare query: ", PQerrorMessage(conn)); - SetError(error, "%s%s", "[libpq] Query: ", query.c_str()); + SetError(error, "[libpq] Failed to prepare query: %s\nQuery was:%s", + PQerrorMessage(conn), query.c_str()); PQclear(result); return ADBC_STATUS_IO; } @@ -256,10 +264,10 @@ struct BindStream { Handle array; int res = bind->get_next(&bind.value, &array.value); if (res != 0) { - // TODO: include errno - SetError(error, "%s%s", - "[libpq] Failed to read next batch from stream of bind parameters: ", - bind->get_last_error(&bind.value)); + SetError(error, + "[libpq] Failed to read next batch from stream of bind parameters: " + "(%d) %s %s", + res, std::strerror(res), bind->get_last_error(&bind.value)); return ADBC_STATUS_IO; } if (!array->release) break; @@ -282,8 +290,14 @@ struct BindStream { param_values[col] = param_values_buffer.data() + param_values_offsets[col]; } switch (bind_schema_fields[col].type) { + case ArrowType::NANOARROW_TYPE_INT16: { + const uint16_t value = ToNetworkInt16( + array_view->children[col]->buffer_views[1].data.as_int16[row]); + std::memcpy(param_values[col], &value, sizeof(int16_t)); + break; + } case ArrowType::NANOARROW_TYPE_INT32: { - const int64_t value = ToNetworkInt32( + const uint32_t value = ToNetworkInt32( array_view->children[col]->buffer_views[1].data.as_int32[row]); std::memcpy(param_values[col], &value, sizeof(int32_t)); break; @@ -294,13 +308,20 @@ struct BindStream { std::memcpy(param_values[col], &value, sizeof(int64_t)); break; } + case ArrowType::NANOARROW_TYPE_FLOAT: { + const uint32_t value = ToNetworkFloat4( + array_view->children[col]->buffer_views[1].data.as_float[row]); + std::memcpy(param_values[col], &value, sizeof(uint32_t)); + break; + } case ArrowType::NANOARROW_TYPE_DOUBLE: { const uint64_t value = ToNetworkFloat8( array_view->children[col]->buffer_views[1].data.as_double[row]); std::memcpy(param_values[col], &value, sizeof(uint64_t)); break; } - case ArrowType::NANOARROW_TYPE_STRING: { + case ArrowType::NANOARROW_TYPE_STRING: + case ArrowType::NANOARROW_TYPE_BINARY: { const ArrowBufferView view = ArrowArrayViewGetBytesUnsafe(array_view->children[col], row); // TODO: overflow check? @@ -442,6 +463,8 @@ int TupleReader::GetNext(struct ArrowArray* out) { } void TupleReader::Release() { + StringBuilderReset(&error_builder_); + if (result_) { PQclear(result_); result_ = nullptr; @@ -562,12 +585,18 @@ AdbcStatusCode PostgresStatement::CreateBulkTable( case ArrowType::NANOARROW_TYPE_INT64: create += " BIGINT"; break; + case ArrowType::NANOARROW_TYPE_FLOAT: + create += " REAL"; + break; case ArrowType::NANOARROW_TYPE_DOUBLE: create += " DOUBLE PRECISION"; break; case ArrowType::NANOARROW_TYPE_STRING: create += " TEXT"; break; + case ArrowType::NANOARROW_TYPE_BINARY: + create += " BYTEA"; + break; default: // TODO: data type to string SetError(error, "%s%" PRIu64 "%s%s%s%ud", "[libpq] Field #", @@ -584,9 +613,8 @@ AdbcStatusCode PostgresStatement::CreateBulkTable( /*paramLengths=*/nullptr, /*paramFormats=*/nullptr, /*resultFormat=*/1 /*(binary)*/); if (PQresultStatus(result) != PGRES_COMMAND_OK) { - SetError(error, "%s%s", - "[libpq] Failed to create table: ", PQerrorMessage(connection_->conn())); - SetError(error, "%s%s", "[libpq] Query: ", create.c_str()); + SetError(error, "[libpq] Failed to create table: %s\nQuery was: %s", + PQerrorMessage(connection_->conn()), create.c_str()); PQclear(result); return ADBC_STATUS_IO; } @@ -637,11 +665,8 @@ AdbcStatusCode PostgresStatement::ExecuteQuery(struct ArrowArrayStream* stream, // and https://stackoverflow.com/questions/69233792 suggests that // you can't PREPARE a query containing COPY. } - if (!stream) { - if (!ingest_.target.empty()) { - return ExecuteUpdateBulk(rows_affected, error); - } - return ExecuteUpdateQuery(rows_affected, error); + if (!stream && !ingest_.target.empty()) { + return ExecuteUpdateBulk(rows_affected, error); } if (query_.empty()) { @@ -649,18 +674,26 @@ AdbcStatusCode PostgresStatement::ExecuteQuery(struct ArrowArrayStream* stream, return ADBC_STATUS_INVALID_STATE; } - // 1. Execute the query with LIMIT 0 to get the schema + // 1. Prepare the query to get the schema { // TODO: we should pipeline here and assume this will succeed - std::string schema_query = "SELECT * FROM (" + query_ + ") AS ignored LIMIT 0"; - PGresult* result = - PQexecParams(connection_->conn(), query_.c_str(), /*nParams=*/0, - /*paramTypes=*/nullptr, /*paramValues=*/nullptr, - /*paramLengths=*/nullptr, /*paramFormats=*/nullptr, kPgBinaryFormat); - if (PQresultStatus(result) != PGRES_TUPLES_OK) { - SetError(error, "%s%s", "[libpq] Query was: ", schema_query.c_str()); - SetError(error, "%s%s", "[libpq] Failed to execute query: could not infer schema: ", - PQerrorMessage(connection_->conn())); + PGresult* result = PQprepare(connection_->conn(), /*stmtName=*/"", query_.c_str(), + /*nParams=*/0, nullptr); + if (PQresultStatus(result) != PGRES_COMMAND_OK) { + SetError(error, + "[libpq] Failed to execute query: could not infer schema: failed to " + "prepare query: %s\nQuery was:%s", + PQerrorMessage(connection_->conn()), query_.c_str()); + PQclear(result); + return ADBC_STATUS_IO; + } + PQclear(result); + result = PQdescribePrepared(connection_->conn(), /*stmtName=*/""); + if (PQresultStatus(result) != PGRES_COMMAND_OK) { + SetError(error, + "[libpq] Failed to execute query: could not infer schema: failed to " + "describe prepared statement: %s\nQuery was:%s", + PQerrorMessage(connection_->conn()), query_.c_str()); PQclear(result); return ADBC_STATUS_IO; } @@ -683,6 +716,20 @@ AdbcStatusCode PostgresStatement::ExecuteQuery(struct ArrowArrayStream* stream, return na_res; } + // If the caller did not request a result set or if there are no + // inferred output columns (e.g. a CREATE or UPDATE), then don't + // use COPY (which would fail anyways) + if (!stream || root_type.n_children() == 0) { + RAISE_ADBC(ExecuteUpdateQuery(rows_affected, error)); + if (stream) { + struct ArrowSchema schema; + std::memset(&schema, 0, sizeof(schema)); + RAISE_NA(reader_.copy_reader_->GetSchema(&schema)); + nanoarrow::EmptyArrayStream::MakeUnique(&schema).move(stream); + } + return ADBC_STATUS_OK; + } + // This resolves the reader specific to each PostgresType -> ArrowSchema // conversion. It is unlikely that this will fail given that we have just // inferred these conversions ourselves. @@ -701,9 +748,9 @@ AdbcStatusCode PostgresStatement::ExecuteQuery(struct ArrowArrayStream* stream, /*paramTypes=*/nullptr, /*paramValues=*/nullptr, /*paramLengths=*/nullptr, /*paramFormats=*/nullptr, kPgBinaryFormat); if (PQresultStatus(reader_.result_) != PGRES_COPY_OUT) { - SetError(error, "%s%s", "[libpq] Query was: ", copy_query.c_str()); - SetError(error, "%s%s", "[libpq] Failed to execute query: could not begin COPY: ", - PQerrorMessage(connection_->conn())); + SetError(error, + "[libpq] Failed to execute query: could not begin COPY: %s\nQuery was: %s", + PQerrorMessage(connection_->conn()), copy_query.c_str()); ClearResult(); return ADBC_STATUS_IO; } @@ -753,27 +800,14 @@ AdbcStatusCode PostgresStatement::ExecuteUpdateBulk(int64_t* rows_affected, AdbcStatusCode PostgresStatement::ExecuteUpdateQuery(int64_t* rows_affected, struct AdbcError* error) { - if (query_.empty()) { - SetError(error, "%s", "[libpq] Must SetSqlQuery before ExecuteQuery"); - return ADBC_STATUS_INVALID_STATE; - } - - PGresult* result = nullptr; - - if (prepared_) { - result = PQexecPrepared(connection_->conn(), /*stmtName=*/"", /*nParams=*/0, - /*paramValues=*/nullptr, /*paramLengths=*/nullptr, - /*paramFormats=*/nullptr, /*resultFormat=*/kPgBinaryFormat); - } else { - result = PQexecParams(connection_->conn(), query_.c_str(), /*nParams=*/0, - /*paramTypes=*/nullptr, /*paramValues=*/nullptr, - /*paramLengths=*/nullptr, /*paramFormats=*/nullptr, - /*resultFormat=*/kPgBinaryFormat); - } + // NOTE: must prepare first (used in ExecuteQuery) + PGresult* result = + PQexecPrepared(connection_->conn(), /*stmtName=*/"", /*nParams=*/0, + /*paramValues=*/nullptr, /*paramLengths=*/nullptr, + /*paramFormats=*/nullptr, /*resultFormat=*/kPgBinaryFormat); if (PQresultStatus(result) != PGRES_COMMAND_OK) { - SetError(error, "%s%s", "[libpq] Query was: ", query_.c_str()); - SetError(error, "%s%s", - "[libpq] Failed to execute query: ", PQerrorMessage(connection_->conn())); + SetError(error, "[libpq] Failed to execute query: %s\nQuery was:%s", + PQerrorMessage(connection_->conn()), query_.c_str()); PQclear(result); return ADBC_STATUS_IO; } diff --git a/3rd_party/apache-arrow-adbc/c/driver/postgresql/statement.h b/3rd_party/apache-arrow-adbc/c/driver/postgresql/statement.h index 5629323..0ff6cb8 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/postgresql/statement.h +++ b/3rd_party/apache-arrow-adbc/c/driver/postgresql/statement.h @@ -26,9 +26,9 @@ #include #include +#include "common/utils.h" #include "postgres_copy_reader.h" #include "postgres_type.h" -#include "utils.h" namespace adbcpq { class PostgresConnection; diff --git a/3rd_party/apache-arrow-adbc/c/driver/snowflake/CMakeLists.txt b/3rd_party/apache-arrow-adbc/c/driver/snowflake/CMakeLists.txt index 5b4693e..ca6c121 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/snowflake/CMakeLists.txt +++ b/3rd_party/apache-arrow-adbc/c/driver/snowflake/CMakeLists.txt @@ -33,6 +33,7 @@ add_go_lib("${REPOSITORY_ROOT}/go/adbc/pkg/snowflake/" include_directories(SYSTEM ${REPOSITORY_ROOT}) include_directories(SYSTEM ${REPOSITORY_ROOT}/c/) +include_directories(SYSTEM ${REPOSITORY_ROOT}/c/driver) include_directories(SYSTEM ${REPOSITORY_ROOT}/c/vendor) if(ADBC_TEST_LINKAGE STREQUAL "shared") @@ -42,6 +43,7 @@ else() endif() if(ADBC_BUILD_TESTS) + include_directories(SYSTEM ${REPOSITORY_ROOT}/c/driver/common) add_test_case(driver_snowflake_test PREFIX adbc @@ -49,9 +51,9 @@ if(ADBC_BUILD_TESTS) driver-snowflake SOURCES snowflake_test.cc - ../../validation/adbc_validation.cc - ../../validation/adbc_validation_util.cc EXTRA_LINK_LIBS + adbc_driver_common + adbc_validation nanoarrow ${TEST_LINK_LIBS}) target_compile_features(adbc-driver-snowflake-test PRIVATE cxx_std_17) diff --git a/3rd_party/apache-arrow-adbc/c/driver/snowflake/snowflake_test.cc b/3rd_party/apache-arrow-adbc/c/driver/snowflake/snowflake_test.cc index 021269c..3b2eb65 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/snowflake/snowflake_test.cc +++ b/3rd_party/apache-arrow-adbc/c/driver/snowflake/snowflake_test.cc @@ -114,6 +114,7 @@ class SnowflakeQuirks : public adbc_validation::DriverQuirks { bool supports_partitioned_data() const override { return false; } bool supports_dynamic_parameter_binding() const override { return false; } bool ddl_implicit_commit_txn() const override { return true; } + std::string db_schema() const override { return "ADBC_TESTING"; } const char* uri_; bool skip_{false}; diff --git a/3rd_party/apache-arrow-adbc/c/driver/sqlite/CMakeLists.txt b/3rd_party/apache-arrow-adbc/c/driver/sqlite/CMakeLists.txt index 9d1d400..eb5a845 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/sqlite/CMakeLists.txt +++ b/3rd_party/apache-arrow-adbc/c/driver/sqlite/CMakeLists.txt @@ -49,7 +49,7 @@ add_arrow_lib(adbc_driver_sqlite include_directories(SYSTEM ${REPOSITORY_ROOT}) include_directories(SYSTEM ${REPOSITORY_ROOT}/c/) include_directories(SYSTEM ${REPOSITORY_ROOT}/c/vendor) -include_directories(SYSTEM ${REPOSITORY_ROOT}/c/driver/common) +include_directories(SYSTEM ${REPOSITORY_ROOT}/c/driver) include_directories(SYSTEM ${SQLite3_INCLUDE_DIRS}) foreach(LIB_TARGET ${ADBC_LIBRARIES}) @@ -70,10 +70,9 @@ if(ADBC_BUILD_TESTS) driver-sqlite SOURCES sqlite_test.cc - ../../validation/adbc_validation.cc - ../../validation/adbc_validation_util.cc EXTRA_LINK_LIBS adbc_driver_common + adbc_validation nanoarrow ${TEST_LINK_LIBS}) target_compile_features(adbc-driver-sqlite-test PRIVATE cxx_std_17) diff --git a/3rd_party/apache-arrow-adbc/c/driver/sqlite/sqlite.c b/3rd_party/apache-arrow-adbc/c/driver/sqlite/sqlite.c index 43f9d98..4124098 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/sqlite/sqlite.c +++ b/3rd_party/apache-arrow-adbc/c/driver/sqlite/sqlite.c @@ -28,9 +28,9 @@ #include #include +#include "common/utils.h" #include "statement_reader.h" #include "types.h" -#include "utils.h" static const char kDefaultUri[] = "file:adbc_driver_sqlite?mode=memory&cache=shared"; // The batch size for query results (and for initial type inference) @@ -237,106 +237,35 @@ AdbcStatusCode SqliteConnectionRelease(struct AdbcConnection* connection, return ADBC_STATUS_OK; } -AdbcStatusCode SqliteConnectionGetInfoAppendStringImpl(struct ArrowArray* array, - uint32_t info_code, - const char* info_value, - struct AdbcError* error) { - CHECK_NA(INTERNAL, ArrowArrayAppendUInt(array->children[0], info_code), error); - // Append to type variant - struct ArrowStringView value = ArrowCharView(info_value); - CHECK_NA(INTERNAL, ArrowArrayAppendString(array->children[1]->children[0], value), - error); - // Append type code/offset - CHECK_NA(INTERNAL, ArrowArrayFinishUnionElement(array->children[1], /*type_id=*/0), - error); - return ADBC_STATUS_OK; -} - AdbcStatusCode SqliteConnectionGetInfoImpl(const uint32_t* info_codes, size_t info_codes_length, struct ArrowSchema* schema, struct ArrowArray* array, struct AdbcError* error) { - ArrowSchemaInit(schema); - CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(schema, /*num_columns=*/2), error); - - CHECK_NA(INTERNAL, ArrowSchemaSetType(schema->children[0], NANOARROW_TYPE_UINT32), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(schema->children[0], "info_name"), error); - schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE; - - struct ArrowSchema* info_value = schema->children[1]; - CHECK_NA(INTERNAL, ArrowSchemaSetTypeUnion(info_value, NANOARROW_TYPE_DENSE_UNION, 6), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(info_value, "info_value"), error); - - CHECK_NA(INTERNAL, ArrowSchemaSetType(info_value->children[0], NANOARROW_TYPE_STRING), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(info_value->children[0], "string_value"), error); - CHECK_NA(INTERNAL, ArrowSchemaSetType(info_value->children[1], NANOARROW_TYPE_BOOL), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(info_value->children[1], "bool_value"), error); - CHECK_NA(INTERNAL, ArrowSchemaSetType(info_value->children[2], NANOARROW_TYPE_INT64), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(info_value->children[2], "int64_value"), error); - CHECK_NA(INTERNAL, ArrowSchemaSetType(info_value->children[3], NANOARROW_TYPE_INT32), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(info_value->children[3], "int32_bitmask"), error); - CHECK_NA(INTERNAL, ArrowSchemaSetType(info_value->children[4], NANOARROW_TYPE_LIST), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(info_value->children[4], "string_list"), error); - CHECK_NA(INTERNAL, ArrowSchemaSetType(info_value->children[5], NANOARROW_TYPE_MAP), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetName(info_value->children[5], "int32_to_int32_list_map"), error); - - CHECK_NA( - INTERNAL, - ArrowSchemaSetType(info_value->children[4]->children[0], NANOARROW_TYPE_STRING), - error); - - CHECK_NA(INTERNAL, - ArrowSchemaSetType(info_value->children[5]->children[0]->children[0], - NANOARROW_TYPE_INT32), - error); - info_value->children[5]->children[0]->children[0]->flags &= ~ARROW_FLAG_NULLABLE; - CHECK_NA(INTERNAL, - ArrowSchemaSetType(info_value->children[5]->children[0]->children[1], - NANOARROW_TYPE_LIST), - error); - CHECK_NA( - INTERNAL, - ArrowSchemaSetType(info_value->children[5]->children[0]->children[1]->children[0], - NANOARROW_TYPE_INT32), - error); - - struct ArrowError na_error = {0}; - CHECK_NA_DETAIL(INTERNAL, ArrowArrayInitFromSchema(array, schema, &na_error), &na_error, - error); - CHECK_NA(INTERNAL, ArrowArrayStartAppending(array), error); - + RAISE_ADBC(AdbcInitConnectionGetInfoSchema(info_codes, info_codes_length, schema, array, + error)); for (size_t i = 0; i < info_codes_length; i++) { switch (info_codes[i]) { case ADBC_INFO_VENDOR_NAME: - RAISE_ADBC(SqliteConnectionGetInfoAppendStringImpl(array, info_codes[i], "SQLite", - error)); + RAISE_ADBC( + AdbcConnectionGetInfoAppendString(array, info_codes[i], "SQLite", error)); break; case ADBC_INFO_VENDOR_VERSION: - RAISE_ADBC(SqliteConnectionGetInfoAppendStringImpl(array, info_codes[i], - sqlite3_libversion(), error)); + RAISE_ADBC(AdbcConnectionGetInfoAppendString(array, info_codes[i], + sqlite3_libversion(), error)); break; case ADBC_INFO_DRIVER_NAME: - RAISE_ADBC(SqliteConnectionGetInfoAppendStringImpl(array, info_codes[i], - "ADBC SQLite Driver", error)); + RAISE_ADBC(AdbcConnectionGetInfoAppendString(array, info_codes[i], + "ADBC SQLite Driver", error)); break; case ADBC_INFO_DRIVER_VERSION: // TODO(lidavidm): fill in driver version - RAISE_ADBC(SqliteConnectionGetInfoAppendStringImpl(array, info_codes[i], - "(unknown)", error)); + RAISE_ADBC( + AdbcConnectionGetInfoAppendString(array, info_codes[i], "(unknown)", error)); break; case ADBC_INFO_DRIVER_ARROW_VERSION: - RAISE_ADBC(SqliteConnectionGetInfoAppendStringImpl(array, info_codes[i], - NANOARROW_VERSION, error)); + RAISE_ADBC(AdbcConnectionGetInfoAppendString(array, info_codes[i], + NANOARROW_VERSION, error)); break; default: // Ignore @@ -345,6 +274,7 @@ AdbcStatusCode SqliteConnectionGetInfoImpl(const uint32_t* info_codes, CHECK_NA(INTERNAL, ArrowArrayFinishElement(array), error); } + struct ArrowError na_error = {0}; CHECK_NA_DETAIL(INTERNAL, ArrowArrayFinishBuildingDefault(array, &na_error), &na_error, error); @@ -378,190 +308,6 @@ AdbcStatusCode SqliteConnectionGetInfo(struct AdbcConnection* connection, return BatchToArrayStream(&array, &schema, out, error); } -AdbcStatusCode SqliteConnectionGetObjectsSchema(struct ArrowSchema* schema, - struct AdbcError* error) { - ArrowSchemaInit(schema); - CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(schema, /*num_columns=*/2), error); - CHECK_NA(INTERNAL, ArrowSchemaSetType(schema->children[0], NANOARROW_TYPE_STRING), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(schema->children[0], "catalog_name"), error); - CHECK_NA(INTERNAL, ArrowSchemaSetType(schema->children[1], NANOARROW_TYPE_LIST), error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(schema->children[1], "catalog_db_schemas"), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(schema->children[1]->children[0], 2), - error); - - struct ArrowSchema* db_schema_schema = schema->children[1]->children[0]; - CHECK_NA(INTERNAL, - ArrowSchemaSetType(db_schema_schema->children[0], NANOARROW_TYPE_STRING), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(db_schema_schema->children[0], "db_schema_name"), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetType(db_schema_schema->children[1], NANOARROW_TYPE_LIST), error); - CHECK_NA(INTERNAL, - ArrowSchemaSetName(db_schema_schema->children[1], "db_schema_tables"), error); - CHECK_NA(INTERNAL, - ArrowSchemaSetTypeStruct(db_schema_schema->children[1]->children[0], 4), - error); - - struct ArrowSchema* table_schema = db_schema_schema->children[1]->children[0]; - CHECK_NA(INTERNAL, ArrowSchemaSetType(table_schema->children[0], NANOARROW_TYPE_STRING), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(table_schema->children[0], "table_name"), error); - table_schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE; - CHECK_NA(INTERNAL, ArrowSchemaSetType(table_schema->children[1], NANOARROW_TYPE_STRING), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(table_schema->children[1], "table_type"), error); - table_schema->children[1]->flags &= ~ARROW_FLAG_NULLABLE; - CHECK_NA(INTERNAL, ArrowSchemaSetType(table_schema->children[2], NANOARROW_TYPE_LIST), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(table_schema->children[2], "table_columns"), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(table_schema->children[2]->children[0], 19), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetType(table_schema->children[3], NANOARROW_TYPE_LIST), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(table_schema->children[3], "table_constraints"), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(table_schema->children[3]->children[0], 4), - error); - - struct ArrowSchema* column_schema = table_schema->children[2]->children[0]; - CHECK_NA(INTERNAL, - ArrowSchemaSetType(column_schema->children[0], NANOARROW_TYPE_STRING), error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[0], "column_name"), - error); - column_schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE; - CHECK_NA(INTERNAL, ArrowSchemaSetType(column_schema->children[1], NANOARROW_TYPE_INT32), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[1], "ordinal_position"), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetType(column_schema->children[2], NANOARROW_TYPE_STRING), error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[2], "remarks"), error); - CHECK_NA(INTERNAL, ArrowSchemaSetType(column_schema->children[3], NANOARROW_TYPE_INT16), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[3], "xdbc_data_type"), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetType(column_schema->children[4], NANOARROW_TYPE_STRING), error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[4], "xdbc_type_name"), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetType(column_schema->children[5], NANOARROW_TYPE_INT32), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[5], "xdbc_column_size"), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetType(column_schema->children[6], NANOARROW_TYPE_INT16), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetName(column_schema->children[6], "xdbc_decimal_digits"), error); - CHECK_NA(INTERNAL, ArrowSchemaSetType(column_schema->children[7], NANOARROW_TYPE_INT16), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetName(column_schema->children[7], "xdbc_num_prec_radix"), error); - CHECK_NA(INTERNAL, ArrowSchemaSetType(column_schema->children[8], NANOARROW_TYPE_INT16), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[8], "xdbc_nullable"), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetType(column_schema->children[9], NANOARROW_TYPE_STRING), error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[9], "xdbc_column_def"), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetType(column_schema->children[10], NANOARROW_TYPE_INT16), error); - CHECK_NA(INTERNAL, - ArrowSchemaSetName(column_schema->children[10], "xdbc_sql_data_type"), error); - CHECK_NA(INTERNAL, - ArrowSchemaSetType(column_schema->children[11], NANOARROW_TYPE_INT16), error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[11], "xdbc_datetime_sub"), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetType(column_schema->children[12], NANOARROW_TYPE_INT32), error); - CHECK_NA(INTERNAL, - ArrowSchemaSetName(column_schema->children[12], "xdbc_char_octet_length"), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetType(column_schema->children[13], NANOARROW_TYPE_STRING), error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[13], "xdbc_is_nullable"), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetType(column_schema->children[14], NANOARROW_TYPE_STRING), error); - CHECK_NA(INTERNAL, - ArrowSchemaSetName(column_schema->children[14], "xdbc_scope_catalog"), error); - CHECK_NA(INTERNAL, - ArrowSchemaSetType(column_schema->children[15], NANOARROW_TYPE_STRING), error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[15], "xdbc_scope_schema"), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetType(column_schema->children[16], NANOARROW_TYPE_STRING), error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(column_schema->children[16], "xdbc_scope_table"), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetType(column_schema->children[17], NANOARROW_TYPE_BOOL), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetName(column_schema->children[17], "xdbc_is_autoincrement"), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetType(column_schema->children[18], NANOARROW_TYPE_BOOL), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetName(column_schema->children[18], "xdbc_is_generatedcolumn"), - error); - - struct ArrowSchema* constraint_schema = table_schema->children[3]->children[0]; - CHECK_NA(INTERNAL, - ArrowSchemaSetType(constraint_schema->children[0], NANOARROW_TYPE_STRING), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetName(constraint_schema->children[0], "constraint_name"), error); - CHECK_NA(INTERNAL, - ArrowSchemaSetType(constraint_schema->children[1], NANOARROW_TYPE_STRING), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetName(constraint_schema->children[1], "constraint_type"), error); - constraint_schema->children[1]->flags &= ~ARROW_FLAG_NULLABLE; - CHECK_NA(INTERNAL, - ArrowSchemaSetType(constraint_schema->children[2], NANOARROW_TYPE_LIST), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetName(constraint_schema->children[2], "constraint_column_names"), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetType(constraint_schema->children[2]->children[0], - NANOARROW_TYPE_STRING), - error); - constraint_schema->children[2]->flags &= ~ARROW_FLAG_NULLABLE; - CHECK_NA(INTERNAL, - ArrowSchemaSetType(constraint_schema->children[3], NANOARROW_TYPE_LIST), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetName(constraint_schema->children[3], "constraint_column_usage"), - error); - CHECK_NA(INTERNAL, - ArrowSchemaSetTypeStruct(constraint_schema->children[3]->children[0], 4), - error); - - struct ArrowSchema* usage_schema = constraint_schema->children[3]->children[0]; - CHECK_NA(INTERNAL, ArrowSchemaSetType(usage_schema->children[0], NANOARROW_TYPE_STRING), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(usage_schema->children[0], "fk_catalog"), error); - CHECK_NA(INTERNAL, ArrowSchemaSetType(usage_schema->children[1], NANOARROW_TYPE_STRING), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(usage_schema->children[1], "fk_db_schema"), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetType(usage_schema->children[2], NANOARROW_TYPE_STRING), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(usage_schema->children[2], "fk_table"), error); - usage_schema->children[2]->flags &= ~ARROW_FLAG_NULLABLE; - CHECK_NA(INTERNAL, ArrowSchemaSetType(usage_schema->children[3], NANOARROW_TYPE_STRING), - error); - CHECK_NA(INTERNAL, ArrowSchemaSetName(usage_schema->children[3], "fk_column_name"), - error); - usage_schema->children[3]->flags &= ~ARROW_FLAG_NULLABLE; - - return ADBC_STATUS_OK; -} - static const char kTableQuery[] = "SELECT name, type " "FROM sqlite_master " @@ -685,6 +431,7 @@ AdbcStatusCode SqliteConnectionGetColumnsImpl( CHECK_NA(INTERNAL, ArrowArrayFinishElement(table_columns_items), error); } + RAISE(INTERNAL, rc == SQLITE_DONE, sqlite3_errmsg(conn->conn), error); return ADBC_STATUS_OK; } @@ -695,7 +442,7 @@ AdbcStatusCode SqliteConnectionGetConstraintsImpl( sqlite3_stmt* fk_stmt, struct AdbcError* error) { struct ArrowArray* table_constraints_items = table_constraints_col->children[0]; struct ArrowArray* constraint_name_col = table_constraints_items->children[0]; - // Constraints type column would be table_constraints_items->children[1]; + struct ArrowArray* constraint_type_col = table_constraints_items->children[1]; struct ArrowArray* constraint_column_names_col = table_constraints_items->children[2]; struct ArrowArray* constraint_column_names_items = constraint_column_names_col->children[0]; @@ -723,7 +470,7 @@ AdbcStatusCode SqliteConnectionGetConstraintsImpl( has_primary_key = 1; CHECK_NA(INTERNAL, ArrowArrayAppendNull(constraint_name_col, 1), error); CHECK_NA(INTERNAL, - ArrowArrayAppendString(constraint_name_col, ArrowCharView("PRIMARY KEY")), + ArrowArrayAppendString(constraint_type_col, ArrowCharView("PRIMARY KEY")), error); } CHECK_NA( @@ -734,6 +481,7 @@ AdbcStatusCode SqliteConnectionGetConstraintsImpl( .size_bytes = sqlite3_column_bytes(pk_stmt, 0)}), error); } + RAISE(INTERNAL, rc == SQLITE_DONE, sqlite3_errmsg(conn->conn), error); if (has_primary_key) { CHECK_NA(INTERNAL, ArrowArrayFinishElement(constraint_column_names_col), error); CHECK_NA(INTERNAL, ArrowArrayAppendNull(constraint_column_usage_col, 1), error); @@ -791,6 +539,7 @@ AdbcStatusCode SqliteConnectionGetConstraintsImpl( error); } } + RAISE(INTERNAL, rc == SQLITE_DONE, sqlite3_errmsg(conn->conn), error); if (prev_fk_id != -1) { CHECK_NA(INTERNAL, ArrowArrayFinishElement(constraint_column_names_col), error); CHECK_NA(INTERNAL, ArrowArrayFinishElement(constraint_column_usage_col), error); @@ -928,7 +677,7 @@ AdbcStatusCode SqliteConnectionGetObjectsImpl( struct SqliteConnection* conn, int depth, const char* catalog, const char* db_schema, const char* table_name, const char** table_type, const char* column_name, struct ArrowSchema* schema, struct ArrowArray* array, struct AdbcError* error) { - RAISE_ADBC(SqliteConnectionGetObjectsSchema(schema, error)); + RAISE_ADBC(AdbcInitConnectionObjectsSchema(schema, error)); struct ArrowError na_error = {0}; CHECK_NA_DETAIL(INTERNAL, ArrowArrayInitFromSchema(array, schema, &na_error), &na_error, diff --git a/3rd_party/apache-arrow-adbc/c/driver/sqlite/sqlite_test.cc b/3rd_party/apache-arrow-adbc/c/driver/sqlite/sqlite_test.cc index 7e7d840..8a580cd 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/sqlite/sqlite_test.cc +++ b/3rd_party/apache-arrow-adbc/c/driver/sqlite/sqlite_test.cc @@ -64,7 +64,17 @@ class SqliteQuirks : public adbc_validation::DriverQuirks { } } + std::optional PrimaryKeyTableDdl(std::string_view name) const override { + std::string ddl = "CREATE TABLE "; + ddl += name; + ddl += " (id INTEGER PRIMARY KEY)"; + return ddl; + } + bool supports_concurrent_statements() const override { return true; } + + std::string catalog() const override { return "main"; } + std::string db_schema() const override { return ""; } }; class SqliteDatabaseTest : public ::testing::Test, public adbc_validation::DatabaseTest { @@ -90,6 +100,68 @@ class SqliteConnectionTest : public ::testing::Test, }; ADBCV_TEST_CONNECTION(SqliteConnectionTest) +TEST_F(SqliteConnectionTest, GetInfoMetadata) { + ASSERT_THAT(AdbcConnectionNew(&connection, &error), + adbc_validation::IsOkStatus(&error)); + ASSERT_THAT(AdbcConnectionInit(&connection, &database, &error), + adbc_validation::IsOkStatus(&error)); + + adbc_validation::StreamReader reader; + std::vector info = { + ADBC_INFO_DRIVER_NAME, + ADBC_INFO_DRIVER_VERSION, + ADBC_INFO_VENDOR_NAME, + ADBC_INFO_VENDOR_VERSION, + }; + ASSERT_THAT(AdbcConnectionGetInfo(&connection, info.data(), info.size(), + &reader.stream.value, &error), + adbc_validation::IsOkStatus(&error)); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + + std::vector seen; + while (true) { + ASSERT_NO_FATAL_FAILURE(reader.Next()); + if (!reader.array->release) break; + + for (int64_t row = 0; row < reader.array->length; row++) { + ASSERT_FALSE(ArrowArrayViewIsNull(reader.array_view->children[0], row)); + const uint32_t code = + reader.array_view->children[0]->buffer_views[1].data.as_uint32[row]; + seen.push_back(code); + + int str_child_index = 0; + struct ArrowArrayView* str_child = + reader.array_view->children[1]->children[str_child_index]; + switch (code) { + case ADBC_INFO_DRIVER_NAME: { + ArrowStringView val = ArrowArrayViewGetStringUnsafe(str_child, 0); + EXPECT_EQ("ADBC SQLite Driver", std::string(val.data, val.size_bytes)); + break; + } + case ADBC_INFO_DRIVER_VERSION: { + ArrowStringView val = ArrowArrayViewGetStringUnsafe(str_child, 1); + EXPECT_EQ("(unknown)", std::string(val.data, val.size_bytes)); + break; + } + case ADBC_INFO_VENDOR_NAME: { + ArrowStringView val = ArrowArrayViewGetStringUnsafe(str_child, 2); + EXPECT_EQ("SQLite", std::string(val.data, val.size_bytes)); + break; + } + case ADBC_INFO_VENDOR_VERSION: { + ArrowStringView val = ArrowArrayViewGetStringUnsafe(str_child, 3); + EXPECT_THAT(std::string(val.data, val.size_bytes), + ::testing::MatchesRegex("3\\..*")); + } + default: + // Ignored + break; + } + } + } + ASSERT_THAT(seen, ::testing::UnorderedElementsAreArray(info)); +} + class SqliteStatementTest : public ::testing::Test, public adbc_validation::StatementTest { public: @@ -474,6 +546,47 @@ TEST_F(SqliteReaderTest, InferTypedParams) { "[SQLite] Type mismatch in column 0: expected INT64 but got DOUBLE")); } +TEST_F(SqliteReaderTest, MultiValueParams) { + // Regression test for apache/arrow-adbc#734 + adbc_validation::StreamReader reader; + Handle schema; + Handle batch; + + ASSERT_NO_FATAL_FAILURE(Exec("CREATE TABLE foo (col)")); + ASSERT_NO_FATAL_FAILURE( + Exec("INSERT INTO foo VALUES (1), (2), (2), (3), (3), (3), (4), (4), (4), (4)")); + + ASSERT_THAT(adbc_validation::MakeSchema(&schema.value, {{"", NANOARROW_TYPE_INT64}}), + IsOkErrno()); + ASSERT_THAT(adbc_validation::MakeBatch(&schema.value, &batch.value, + /*error=*/nullptr, {4, 1, 3, 2}), + IsOkErrno()); + + ASSERT_NO_FATAL_FAILURE(Bind(&batch.value, &schema.value)); + ASSERT_NO_FATAL_FAILURE( + Exec("SELECT col FROM foo WHERE col = ?", /*infer_rows=*/3, &reader)); + ASSERT_EQ(1, reader.schema->n_children); + ASSERT_EQ(NANOARROW_TYPE_INT64, reader.fields[0].type); + + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_NO_FATAL_FAILURE( + CompareArray(reader.array_view->children[0], {4, 4, 4})); + + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_NO_FATAL_FAILURE( + CompareArray(reader.array_view->children[0], {4, 1, 3})); + + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_NO_FATAL_FAILURE( + CompareArray(reader.array_view->children[0], {3, 3, 2})); + + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_NO_FATAL_FAILURE(CompareArray(reader.array_view->children[0], {2})); + + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_EQ(nullptr, reader.array->release); +} + template class SqliteNumericParamTest : public SqliteReaderTest, public ::testing::WithParamInterface { diff --git a/3rd_party/apache-arrow-adbc/c/driver/sqlite/statement_reader.c b/3rd_party/apache-arrow-adbc/c/driver/sqlite/statement_reader.c index abde44a..504a4d8 100644 --- a/3rd_party/apache-arrow-adbc/c/driver/sqlite/statement_reader.c +++ b/3rd_party/apache-arrow-adbc/c/driver/sqlite/statement_reader.c @@ -25,7 +25,7 @@ #include #include -#include "utils.h" +#include "common/utils.h" AdbcStatusCode AdbcSqliteBinderSet(struct AdbcSqliteBinder* binder, struct AdbcError* error) { @@ -253,6 +253,8 @@ const char* StatementReaderGetLastError(struct ArrowArrayStream* self) { void StatementReaderSetError(struct StatementReader* reader) { const char* msg = sqlite3_errmsg(reader->db); + // Reset here so that we don't get an error again in StatementRelease + (void)sqlite3_reset(reader->stmt); strncpy(reader->error.message, msg, sizeof(reader->error.message)); reader->error.message[sizeof(reader->error.message) - 1] = '\0'; } @@ -382,35 +384,41 @@ int StatementReaderGetNext(struct ArrowArrayStream* self, struct ArrowArray* out sqlite3_mutex_enter(sqlite3_db_mutex(reader->db)); while (batch_size < reader->batch_size) { - if (reader->binder) { - char finished = 0; - struct AdbcError error = {0}; - AdbcStatusCode status = AdbcSqliteBinderBindNext(reader->binder, reader->db, - reader->stmt, &finished, &error); - if (status != ADBC_STATUS_OK) { - reader->done = 1; - status = EIO; - if (error.release) { - strncpy(reader->error.message, error.message, sizeof(reader->error.message)); - reader->error.message[sizeof(reader->error.message) - 1] = '\0'; - error.release(&error); - } - break; - } else if (finished) { + int rc = sqlite3_step(reader->stmt); + if (rc == SQLITE_DONE) { + if (!reader->binder) { reader->done = 1; break; + } else { + char finished = 0; + struct AdbcError error = {0}; + status = AdbcSqliteBinderBindNext(reader->binder, reader->db, reader->stmt, + &finished, &error); + if (status != ADBC_STATUS_OK) { + reader->done = 1; + status = EIO; + if (error.release) { + strncpy(reader->error.message, error.message, sizeof(reader->error.message)); + reader->error.message[sizeof(reader->error.message) - 1] = '\0'; + error.release(&error); + } + break; + } else if (finished) { + reader->done = 1; + break; + } + continue; } - } - - int rc = sqlite3_step(reader->stmt); - if (rc == SQLITE_DONE) { - reader->done = 1; - break; } else if (rc == SQLITE_ERROR) { reader->done = 1; status = EIO; StatementReaderSetError(reader); break; + } else if (rc != SQLITE_ROW) { + reader->done = 1; + status = ADBC_STATUS_INTERNAL; + StatementReaderSetError(reader); + break; } for (int col = 0; col < reader->schema.n_children; col++) { @@ -804,7 +812,7 @@ AdbcStatusCode StatementReaderInferOneValue( } case SQLITE_BLOB: default: { - return ADBC_STATUS_IO; + return ADBC_STATUS_NOT_IMPLEMENTED; } } return ADBC_STATUS_OK; @@ -836,25 +844,41 @@ AdbcStatusCode AdbcSqliteExportReader(sqlite3* db, sqlite3_stmt* stmt, AdbcStatusCode status = StatementReaderInitializeInfer( num_columns, batch_size, validity, data, binary, current_type, error); - if (status == ADBC_STATUS_OK) { + + if (binder) { + char finished = 0; + status = AdbcSqliteBinderBindNext(binder, db, stmt, &finished, error); + if (finished) { + reader->done = 1; + } + } + + if (status == ADBC_STATUS_OK && !reader->done) { int64_t num_rows = 0; while (num_rows < batch_size) { - if (binder) { - char finished = 0; - status = AdbcSqliteBinderBindNext(binder, db, stmt, &finished, error); - if (status != ADBC_STATUS_OK) break; - if (finished) { + int rc = sqlite3_step(stmt); + if (rc == SQLITE_DONE) { + if (!binder) { reader->done = 1; break; + } else { + char finished = 0; + status = AdbcSqliteBinderBindNext(binder, db, stmt, &finished, error); + if (status != ADBC_STATUS_OK) break; + if (finished) { + reader->done = 1; + break; + } } - } - - int rc = sqlite3_step(stmt); - if (rc == SQLITE_DONE) { - reader->done = 1; - break; + continue; } else if (rc == SQLITE_ERROR) { + SetError(error, "Failed to step query: %s", sqlite3_errmsg(db)); status = ADBC_STATUS_IO; + // Reset here so that we don't get an error again in StatementRelease + (void)sqlite3_reset(stmt); + break; + } else if (rc != SQLITE_ROW) { + status = ADBC_STATUS_INTERNAL; break; } diff --git a/3rd_party/apache-arrow-adbc/c/driver_manager/CMakeLists.txt b/3rd_party/apache-arrow-adbc/c/driver_manager/CMakeLists.txt index e062b11..dd28470 100644 --- a/3rd_party/apache-arrow-adbc/c/driver_manager/CMakeLists.txt +++ b/3rd_party/apache-arrow-adbc/c/driver_manager/CMakeLists.txt @@ -33,7 +33,7 @@ add_arrow_lib(adbc_driver_manager include_directories(SYSTEM ${REPOSITORY_ROOT}) include_directories(SYSTEM ${REPOSITORY_ROOT}/c/) include_directories(SYSTEM ${REPOSITORY_ROOT}/c/vendor) -include_directories(SYSTEM ${REPOSITORY_ROOT}/c/driver/common) +include_directories(SYSTEM ${REPOSITORY_ROOT}/c/driver) foreach(LIB_TARGET ${ADBC_LIBRARIES}) target_compile_definitions(${LIB_TARGET} PRIVATE ADBC_EXPORTING) @@ -58,6 +58,7 @@ if(ADBC_BUILD_TESTS) ../validation/adbc_validation.cc ../validation/adbc_validation_util.cc EXTRA_LINK_LIBS + adbc_driver_common nanoarrow ${TEST_LINK_LIBS}) target_compile_features(adbc-driver-manager-test PRIVATE cxx_std_17) diff --git a/3rd_party/apache-arrow-adbc/c/integration/duckdb/CMakeLists.txt b/3rd_party/apache-arrow-adbc/c/integration/duckdb/CMakeLists.txt new file mode 100644 index 0000000..52fb9d0 --- /dev/null +++ b/3rd_party/apache-arrow-adbc/c/integration/duckdb/CMakeLists.txt @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +include(FetchContent) + +if(ADBC_BUILD_TESTS) + fetchcontent_declare(duckdb + GIT_REPOSITORY https://github.com/duckdb/duckdb.git + GIT_TAG e8e4cea5ec9d1a84c1f516d0f0674f8785a3e786 # v0.8.0 + GIT_PROGRESS TRUE + USES_TERMINAL_DOWNLOAD TRUE) + set(BUILD_JEMALLOC_EXTENSION + OFF + CACHE INTERNAL "Disable") + set(BUILD_NODE + OFF + CACHE INTERNAL "Disable") + set(BUILD_PYTHON + OFF + CACHE INTERNAL "Disable") + set(BUILD_R + OFF + CACHE INTERNAL "Disable") + set(BUILD_SHELL + OFF + CACHE INTERNAL "Disable") + set(BUILD_UNITTESTS + OFF + CACHE INTERNAL "Disable") + set(ENABLE_SANITIZER + OFF + CACHE INTERNAL "Disable ASAN") + set(ENABLE_UBSAN + OFF + CACHE INTERNAL "Disable UBSAN") + # Force cmake to honor our options here in the subproject + cmake_policy(SET CMP0077 NEW) + fetchcontent_makeavailable(duckdb) + + include_directories(SYSTEM ${REPOSITORY_ROOT}) + include_directories(SYSTEM ${REPOSITORY_ROOT}/c/) + include_directories(PRIVATE ${REPOSITORY_ROOT}/c/driver) + include_directories(SYSTEM ${REPOSITORY_ROOT}/c/vendor) + + add_test_case(integration_duckdb_test + PREFIX + adbc + EXTRA_LABELS + integration-duckdb + SOURCES + duckdb_test.cc + EXTRA_LINK_LIBS + adbc_driver_common + adbc_driver_manager_static + adbc_validation + duckdb + nanoarrow) + add_dependencies(adbc-integration-duckdb-test duckdb) + target_compile_features(adbc-integration-duckdb-test PRIVATE cxx_std_17) + adbc_configure_target(adbc-integration-duckdb-test) +endif() diff --git a/3rd_party/apache-arrow-adbc/c/integration/duckdb/duckdb_test.cc b/3rd_party/apache-arrow-adbc/c/integration/duckdb/duckdb_test.cc new file mode 100644 index 0000000..b2a5c52 --- /dev/null +++ b/3rd_party/apache-arrow-adbc/c/integration/duckdb/duckdb_test.cc @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include +#include +#include + +#include "validation/adbc_validation.h" +#include "validation/adbc_validation_util.h" + +// Convert between our definitions and DuckDB's +AdbcStatusCode DuckDbDriverInitFunc(int version, void* driver, struct AdbcError* error) { + return duckdb_adbc_init(static_cast(version), + reinterpret_cast(driver), + reinterpret_cast(error)); +} + +class DuckDbQuirks : public adbc_validation::DriverQuirks { + public: + AdbcStatusCode SetupDatabase(struct AdbcDatabase* database, + struct AdbcError* error) const override { + if (auto status = + AdbcDriverManagerDatabaseSetInitFunc(database, DuckDbDriverInitFunc, error); + status != ADBC_STATUS_OK) { + return status; + } + + return ADBC_STATUS_OK; + } + + std::string BindParameter(int index) const override { return "?"; } + + bool supports_bulk_ingest() const override { return false; } + bool supports_concurrent_statements() const override { return true; } + bool supports_dynamic_parameter_binding() const override { return false; } + bool supports_get_sql_info() const override { return false; } + bool supports_get_objects() const override { return false; } + bool supports_rows_affected() const override { return false; } + bool supports_transactions() const override { return false; } +}; + +class DuckDbDatabaseTest : public ::testing::Test, public adbc_validation::DatabaseTest { + public: + const adbc_validation::DriverQuirks* quirks() const override { return &quirks_; } + void SetUp() override { ASSERT_NO_FATAL_FAILURE(SetUpTest()); } + void TearDown() override { ASSERT_NO_FATAL_FAILURE(TearDownTest()); } + + protected: + DuckDbQuirks quirks_; +}; +ADBCV_TEST_DATABASE(DuckDbDatabaseTest) + +class DuckDbConnectionTest : public ::testing::Test, + public adbc_validation::ConnectionTest { + public: + const adbc_validation::DriverQuirks* quirks() const override { return &quirks_; } + void SetUp() override { ASSERT_NO_FATAL_FAILURE(SetUpTest()); } + void TearDown() override { ASSERT_NO_FATAL_FAILURE(TearDownTest()); } + + void TestAutocommitDefault() { GTEST_SKIP(); } + void TestMetadataGetTableSchema() { GTEST_SKIP(); } + void TestMetadataGetTableTypes() { GTEST_SKIP(); } + + protected: + DuckDbQuirks quirks_; +}; +ADBCV_TEST_CONNECTION(DuckDbConnectionTest) + +class DuckDbStatementTest : public ::testing::Test, + public adbc_validation::StatementTest { + public: + const adbc_validation::DriverQuirks* quirks() const override { return &quirks_; } + void SetUp() override { ASSERT_NO_FATAL_FAILURE(SetUpTest()); } + void TearDown() override { ASSERT_NO_FATAL_FAILURE(TearDownTest()); } + + // DuckDB doesn't guard against this + void TestNewInit() { GTEST_SKIP(); } + // Accepts Prepare() without any query + void TestSqlPrepareErrorNoQuery() { GTEST_SKIP(); } + + protected: + DuckDbQuirks quirks_; +}; +ADBCV_TEST_STATEMENT(DuckDbStatementTest) diff --git a/3rd_party/apache-arrow-adbc/c/validation/AdbcValidationConfig.cmake.in b/3rd_party/apache-arrow-adbc/c/validation/AdbcValidationConfig.cmake.in deleted file mode 100644 index dc2ca25..0000000 --- a/3rd_party/apache-arrow-adbc/c/validation/AdbcValidationConfig.cmake.in +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -@PACKAGE_INIT@ - -include("${CMAKE_CURRENT_LIST_DIR}/AdbcValidationTargets.cmake") - -check_required_components(AdbcValidation) diff --git a/3rd_party/apache-arrow-adbc/c/validation/CMakeLists.txt b/3rd_party/apache-arrow-adbc/c/validation/CMakeLists.txt index d9449c2..3c83f95 100644 --- a/3rd_party/apache-arrow-adbc/c/validation/CMakeLists.txt +++ b/3rd_party/apache-arrow-adbc/c/validation/CMakeLists.txt @@ -15,49 +15,10 @@ # specific language governing permissions and limitations # under the License. -cmake_minimum_required(VERSION 3.18) -get_filename_component(REPOSITORY_ROOT "../../" ABSOLUTE) -list(APPEND CMAKE_MODULE_PATH "${REPOSITORY_ROOT}/c/cmake_modules/") -include(AdbcVersion) -include(GNUInstallDirs) -include(CMakePackageConfigHelpers) -project(adbc_validation - VERSION "${ADBC_BASE_VERSION}" - LANGUAGES CXX) - -find_package(GTest REQUIRED) -get_filename_component(REPOSITORY_ROOT "../../" ABSOLUTE) - -add_library(adbc_validation STATIC adbc_validation.cc adbc_validation_util.cc) +add_library(adbc_validation OBJECT adbc_validation.cc adbc_validation_util.cc) target_compile_features(adbc_validation PRIVATE cxx_std_17) -target_include_directories(adbc_validation SYSTEM PRIVATE "${REPOSITORY_ROOT}" - "${REPOSITORY_ROOT}/c/vendor/") -target_link_libraries(adbc_validation PUBLIC nanoarrow GTest::gtest GTest::gmock) - -set_target_properties(adbc_validation - PROPERTIES PUBLIC_HEADER "adbc_validation.h;adbc_validation_util.h") - -install(TARGETS adbc_validation - EXPORT AdbcValidationTargets - LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" - PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/adbc_validation/") - -write_basic_package_version_file( - "${CMAKE_CURRENT_BINARY_DIR}/AdbcValidation/AdbcValidationConfigVersion.cmake" - COMPATIBILITY AnyNewerVersion) -export(EXPORT AdbcValidationTargets - FILE "${CMAKE_CURRENT_BINARY_DIR}/AdbcValidation/AdbcValidationTargets.cmake" - NAMESPACE AdbcValidation::) -configure_package_config_file("AdbcValidationConfig.cmake.in" - "${CMAKE_CURRENT_BINARY_DIR}/AdbcValidation/AdbcValidationConfig.cmake" - INSTALL_DESTINATION "AdbcValidation/AdbcValidationConfig.cmake" -) - -install(EXPORT AdbcValidationTargets - FILE AdbcValidationTargets.cmake - NAMESPACE AdbcValidation:: - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/AdbcValidation") -install(FILES "${CMAKE_CURRENT_BINARY_DIR}/AdbcValidation/AdbcValidationConfig.cmake" - "${CMAKE_CURRENT_BINARY_DIR}/AdbcValidation/AdbcValidationConfigVersion.cmake" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/AdbcValidation" - COMPONENT Devel) +target_include_directories(adbc_validation SYSTEM + PRIVATE "${REPOSITORY_ROOT}" "${REPOSITORY_ROOT}/c/driver/" + "${REPOSITORY_ROOT}/c/vendor/") +target_link_libraries(adbc_validation PUBLIC adbc_driver_common nanoarrow GTest::gtest + GTest::gmock) diff --git a/3rd_party/apache-arrow-adbc/c/validation/adbc_validation.cc b/3rd_party/apache-arrow-adbc/c/validation/adbc_validation.cc index 0da8068..d73b556 100644 --- a/3rd_party/apache-arrow-adbc/c/validation/adbc_validation.cc +++ b/3rd_party/apache-arrow-adbc/c/validation/adbc_validation.cc @@ -511,12 +511,18 @@ void ConnectionTest::TestMetadataGetObjectsDbSchemas() { ASSERT_FALSE(ArrowArrayViewIsNull(catalog_db_schemas_list, row)) << "Row " << row << " should have non-null catalog_db_schemas"; + ArrowStringView catalog_name = + ArrowArrayViewGetStringUnsafe(reader.array_view->children[0], row); + const int64_t start_offset = - ArrowArrayViewGetOffsetUnsafe(catalog_db_schemas_list, row); + ArrowArrayViewListChildOffset(catalog_db_schemas_list, row); const int64_t end_offset = - ArrowArrayViewGetOffsetUnsafe(catalog_db_schemas_list, row + 1); - ASSERT_GT(end_offset, start_offset) - << "Row " << row << " should have nonempty catalog_db_schemas"; + ArrowArrayViewListChildOffset(catalog_db_schemas_list, row + 1); + ASSERT_GE(end_offset, start_offset) + << "Row " << row << " (Catalog " + << std::string(catalog_name.data, catalog_name.size_bytes) + << ") should have nonempty catalog_db_schemas "; + ASSERT_FALSE(ArrowArrayViewIsNull(catalog_db_schemas_list, row)); for (int64_t list_index = start_offset; list_index < end_offset; list_index++) { ASSERT_TRUE(ArrowArrayViewIsNull(db_schema_tables_list, row + list_index)) << "Row " << row << " should have null db_schema_tables"; @@ -545,9 +551,9 @@ void ConnectionTest::TestMetadataGetObjectsDbSchemas() { << "Row " << row << " should have non-null catalog_db_schemas"; const int64_t start_offset = - ArrowArrayViewGetOffsetUnsafe(catalog_db_schemas_list, row); + ArrowArrayViewListChildOffset(catalog_db_schemas_list, row); const int64_t end_offset = - ArrowArrayViewGetOffsetUnsafe(catalog_db_schemas_list, row + 1); + ArrowArrayViewListChildOffset(catalog_db_schemas_list, row + 1); ASSERT_EQ(start_offset, end_offset); } ASSERT_NO_FATAL_FAILURE(reader.Next()); @@ -600,17 +606,17 @@ void ConnectionTest::TestMetadataGetObjectsTables() { << "Row " << row << " should have non-null catalog_db_schemas"; for (int64_t db_schemas_index = - ArrowArrayViewGetOffsetUnsafe(catalog_db_schemas_list, row); + ArrowArrayViewListChildOffset(catalog_db_schemas_list, row); db_schemas_index < - ArrowArrayViewGetOffsetUnsafe(catalog_db_schemas_list, row + 1); + ArrowArrayViewListChildOffset(catalog_db_schemas_list, row + 1); db_schemas_index++) { ASSERT_FALSE(ArrowArrayViewIsNull(db_schema_tables_list, db_schemas_index)) << "Row " << row << " should have non-null db_schema_tables"; for (int64_t tables_index = - ArrowArrayViewGetOffsetUnsafe(db_schema_tables_list, db_schemas_index); + ArrowArrayViewListChildOffset(db_schema_tables_list, db_schemas_index); tables_index < - ArrowArrayViewGetOffsetUnsafe(db_schema_tables_list, db_schemas_index + 1); + ArrowArrayViewListChildOffset(db_schema_tables_list, db_schemas_index + 1); tables_index++) { ArrowStringView table_name = ArrowArrayViewGetStringUnsafe( db_schema_tables->children[0], tables_index); @@ -674,17 +680,17 @@ void ConnectionTest::TestMetadataGetObjectsTablesTypes() { << "Row " << row << " should have non-null catalog_db_schemas"; for (int64_t db_schemas_index = - ArrowArrayViewGetOffsetUnsafe(catalog_db_schemas_list, row); + ArrowArrayViewListChildOffset(catalog_db_schemas_list, row); db_schemas_index < - ArrowArrayViewGetOffsetUnsafe(catalog_db_schemas_list, row + 1); + ArrowArrayViewListChildOffset(catalog_db_schemas_list, row + 1); db_schemas_index++) { ASSERT_FALSE(ArrowArrayViewIsNull(db_schema_tables_list, db_schemas_index)) << "Row " << row << " should have non-null db_schema_tables"; - for (int64_t tables_index = ArrowArrayViewGetOffsetUnsafe( - db_schema_tables_list, row + db_schemas_index); + for (int64_t tables_index = + ArrowArrayViewListChildOffset(db_schema_tables_list, db_schemas_index); tables_index < - ArrowArrayViewGetOffsetUnsafe(db_schema_tables_list, db_schemas_index + 1); + ArrowArrayViewListChildOffset(db_schema_tables_list, db_schemas_index + 1); tables_index++) { ArrowStringView table_name = ArrowArrayViewGetStringUnsafe( db_schema_tables->children[0], tables_index); @@ -771,17 +777,20 @@ void ConnectionTest::TestMetadataGetObjectsColumns() { << "Row " << row << " should have non-null catalog_db_schemas"; for (int64_t db_schemas_index = - ArrowArrayViewGetOffsetUnsafe(catalog_db_schemas_list, row); + ArrowArrayViewListChildOffset(catalog_db_schemas_list, row); db_schemas_index < - ArrowArrayViewGetOffsetUnsafe(catalog_db_schemas_list, row + 1); + ArrowArrayViewListChildOffset(catalog_db_schemas_list, row + 1); db_schemas_index++) { ASSERT_FALSE(ArrowArrayViewIsNull(db_schema_tables_list, db_schemas_index)) << "Row " << row << " should have non-null db_schema_tables"; + ArrowStringView db_schema_name = ArrowArrayViewGetStringUnsafe( + catalog_db_schemas->children[0], db_schemas_index); + for (int64_t tables_index = - ArrowArrayViewGetOffsetUnsafe(db_schema_tables_list, db_schemas_index); + ArrowArrayViewListChildOffset(db_schema_tables_list, db_schemas_index); tables_index < - ArrowArrayViewGetOffsetUnsafe(db_schema_tables_list, db_schemas_index + 1); + ArrowArrayViewListChildOffset(db_schema_tables_list, db_schemas_index + 1); tables_index++) { ArrowStringView table_name = ArrowArrayViewGetStringUnsafe( db_schema_tables->children[0], tables_index); @@ -792,13 +801,15 @@ void ConnectionTest::TestMetadataGetObjectsColumns() { << "Row " << row << " should have non-null table_constraints"; if (iequals(std::string(table_name.data, table_name.size_bytes), - "bulk_ingest")) { + "bulk_ingest") && + iequals(std::string(db_schema_name.data, db_schema_name.size_bytes), + quirks()->db_schema())) { found_expected_table = true; for (int64_t columns_index = - ArrowArrayViewGetOffsetUnsafe(table_columns_list, tables_index); + ArrowArrayViewListChildOffset(table_columns_list, tables_index); columns_index < - ArrowArrayViewGetOffsetUnsafe(table_columns_list, tables_index + 1); + ArrowArrayViewListChildOffset(table_columns_list, tables_index + 1); columns_index++) { ArrowStringView name = ArrowArrayViewGetStringUnsafe( table_columns->children[0], columns_index); @@ -827,6 +838,79 @@ void ConnectionTest::TestMetadataGetObjectsConstraints() { // TODO: can't be done portably (need to create tables with primary keys and such) } +void ConnectionTest::TestMetadataGetObjectsPrimaryKey() { + ASSERT_THAT(AdbcConnectionNew(&connection, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcConnectionInit(&connection, &database, &error), IsOkStatus(&error)); + + if (!quirks()->supports_get_objects()) { + GTEST_SKIP(); + } + + std::optional maybe_ddl = quirks()->PrimaryKeyTableDdl("adbc_pkey_test"); + if (!maybe_ddl.has_value()) { + GTEST_SKIP(); + } + std::string ddl = std::move(*maybe_ddl); + + ASSERT_THAT(quirks()->DropTable(&connection, "adbc_pkey_test", &error), + IsOkStatus(&error)); + + { + Handle statement; + ASSERT_THAT(AdbcStatementNew(&connection, &statement.value, &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementSetSqlQuery(&statement.value, ddl.c_str(), &error), + IsOkStatus(&error)); + int64_t rows_affected = 0; + ASSERT_THAT( + AdbcStatementExecuteQuery(&statement.value, nullptr, &rows_affected, &error), + IsOkStatus(&error)); + } + + adbc_validation::StreamReader reader; + ASSERT_THAT( + AdbcConnectionGetObjects(&connection, ADBC_OBJECT_DEPTH_ALL, nullptr, nullptr, + nullptr, nullptr, nullptr, &reader.stream.value, &error), + IsOkStatus(&error)); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_NE(nullptr, reader.array->release); + ASSERT_GT(reader.array->length, 0); + + auto get_objects_data = adbc_validation::GetObjectsReader{&reader.array_view.value}; + ASSERT_NE(*get_objects_data, nullptr) + << "could not initialize the AdbcGetObjectsData object"; + + struct AdbcGetObjectsTable* table = + AdbcGetObjectsDataGetTableByName(*get_objects_data, quirks()->catalog().c_str(), + quirks()->db_schema().c_str(), "adbc_pkey_test"); + ASSERT_NE(table, nullptr) << "could not find adbc_pkey_test table"; + + ASSERT_EQ(table->n_table_columns, 1); + struct AdbcGetObjectsColumn* column = AdbcGetObjectsDataGetColumnByName( + *get_objects_data, quirks()->catalog().c_str(), quirks()->db_schema().c_str(), + "adbc_pkey_test", "id"); + ASSERT_NE(column, nullptr) << "could not find id column on adbc_pkey_test table"; + + ASSERT_EQ(table->n_table_constraints, 1) + << "expected 1 constraint on adbc_pkey_test table, found: " + << table->n_table_constraints; + + struct AdbcGetObjectsConstraint* constraint = table->table_constraints[0]; + + std::string_view constraint_type(constraint->constraint_type.data, + constraint->constraint_type.size_bytes); + ASSERT_EQ(constraint_type, "PRIMARY KEY"); + ASSERT_EQ(constraint->n_column_names, 1) + << "expected constraint adbc_pkey_test_pkey to be applied to 1 column, found: " + << constraint->n_column_names; + + std::string_view constraint_column_name( + constraint->constraint_column_names[0].data, + constraint->constraint_column_names[0].size_bytes); + ASSERT_EQ(constraint_column_name, "id"); +} + //------------------------------------------------------------ // Tests of AdbcStatement @@ -1248,11 +1332,10 @@ void StatementTest::TestSqlIngestSample() { IsOkStatus(&error)); ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); - ASSERT_THAT( - AdbcStatementSetSqlQuery( - &statement, "SELECT * FROM bulk_ingest ORDER BY \"int64s\" ASC NULLS FIRST", - &error), - IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementSetSqlQuery( + &statement, "SELECT * FROM bulk_ingest ORDER BY int64s ASC NULLS FIRST", + &error), + IsOkStatus(&error)); StreamReader reader; ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, &reader.rows_affected, &error), @@ -1377,8 +1460,13 @@ void StatementTest::TestSqlPrepareSelectNoParams() { ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, &reader.rows_affected, &error), IsOkStatus(&error)); - ASSERT_THAT(reader.rows_affected, - ::testing::AnyOf(::testing::Eq(1), ::testing::Eq(-1))); + if (quirks()->supports_rows_affected()) { + ASSERT_THAT(reader.rows_affected, + ::testing::AnyOf(::testing::Eq(1), ::testing::Eq(-1))); + } else { + ASSERT_THAT(reader.rows_affected, + ::testing::Not(::testing::AnyOf(::testing::Eq(1), ::testing::Eq(-1)))); + } ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); ASSERT_EQ(1, reader.schema->n_children); @@ -1453,6 +1541,9 @@ void StatementTest::TestSqlPrepareSelectParams() { auto start = nrows; auto end = nrows + reader.array->length; + ASSERT_LT(start, expected_int32.size()); + ASSERT_LE(end, expected_int32.size()); + switch (reader.fields[0].type) { case NANOARROW_TYPE_INT32: ASSERT_NO_FATAL_FAILURE(CompareArray( @@ -1703,8 +1794,13 @@ void StatementTest::TestSqlQueryInts() { ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, &reader.rows_affected, &error), IsOkStatus(&error)); - ASSERT_THAT(reader.rows_affected, - ::testing::AnyOf(::testing::Eq(1), ::testing::Eq(-1))); + if (quirks()->supports_rows_affected()) { + ASSERT_THAT(reader.rows_affected, + ::testing::AnyOf(::testing::Eq(1), ::testing::Eq(-1))); + } else { + ASSERT_THAT(reader.rows_affected, + ::testing::Not(::testing::AnyOf(::testing::Eq(1), ::testing::Eq(-1)))); + } ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); ASSERT_EQ(1, reader.schema->n_children); @@ -1744,8 +1840,13 @@ void StatementTest::TestSqlQueryFloats() { ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, &reader.rows_affected, &error), IsOkStatus(&error)); - ASSERT_THAT(reader.rows_affected, - ::testing::AnyOf(::testing::Eq(1), ::testing::Eq(-1))); + if (quirks()->supports_rows_affected()) { + ASSERT_THAT(reader.rows_affected, + ::testing::AnyOf(::testing::Eq(1), ::testing::Eq(-1))); + } else { + ASSERT_THAT(reader.rows_affected, + ::testing::Not(::testing::AnyOf(::testing::Eq(1), ::testing::Eq(-1)))); + } ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); ASSERT_EQ(1, reader.schema->n_children); @@ -1787,8 +1888,13 @@ void StatementTest::TestSqlQueryStrings() { ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, &reader.rows_affected, &error), IsOkStatus(&error)); - ASSERT_THAT(reader.rows_affected, - ::testing::AnyOf(::testing::Eq(1), ::testing::Eq(-1))); + if (quirks()->supports_rows_affected()) { + ASSERT_THAT(reader.rows_affected, + ::testing::AnyOf(::testing::Eq(1), ::testing::Eq(-1))); + } else { + ASSERT_THAT(reader.rows_affected, + ::testing::Not(::testing::AnyOf(::testing::Eq(1), ::testing::Eq(-1)))); + } ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); ASSERT_EQ(1, reader.schema->n_children); @@ -1801,6 +1907,7 @@ void StatementTest::TestSqlQueryStrings() { ASSERT_FALSE(ArrowArrayViewIsNull(&reader.array_view.value, 0)); ASSERT_FALSE(ArrowArrayViewIsNull(reader.array_view->children[0], 0)); switch (reader.fields[0].type) { + case NANOARROW_TYPE_LARGE_STRING: case NANOARROW_TYPE_STRING: { ASSERT_NO_FATAL_FAILURE( CompareArray(reader.array_view->children[0], {"SaShiSuSeSo"})); diff --git a/3rd_party/apache-arrow-adbc/c/validation/adbc_validation.h b/3rd_party/apache-arrow-adbc/c/validation/adbc_validation.h index 7f3f175..4e4251b 100644 --- a/3rd_party/apache-arrow-adbc/c/validation/adbc_validation.h +++ b/3rd_party/apache-arrow-adbc/c/validation/adbc_validation.h @@ -47,6 +47,13 @@ class DriverQuirks { return ADBC_STATUS_OK; } + /// \brief Drop the given view. Used by tests to reset state. + virtual AdbcStatusCode DropView(struct AdbcConnection* connection, + const std::string& name, + struct AdbcError* error) const { + return ADBC_STATUS_NOT_IMPLEMENTED; + } + virtual AdbcStatusCode EnsureSampleTable(struct AdbcConnection* connection, const std::string& name, struct AdbcError* error) const; @@ -60,6 +67,15 @@ class DriverQuirks { const std::string& name, struct AdbcError* error) const; + /// \brief Get the statement to create a table with a primary key, or nullopt if not + /// supported. + /// + /// The table should have one column: + /// - "id" with Arrow type int64 (primary key) + virtual std::optional PrimaryKeyTableDdl(std::string_view name) const { + return std::nullopt; + } + /// \brief Return the SQL to reference the bind parameter of the given index virtual std::string BindParameter(int index) const { return "?"; } @@ -93,6 +109,15 @@ class DriverQuirks { /// \brief Whether dynamic parameter bindings are supported for prepare virtual bool supports_dynamic_parameter_binding() const { return true; } + + /// \brief Whether ExecuteQuery sets rows_affected appropriately + virtual bool supports_rows_affected() const { return true; } + + /// \brief Default catalog to use for tests + virtual std::string catalog() const { return ""; } + + /// \brief Default Schema to use for tests + virtual std::string db_schema() const { return ""; } }; class DatabaseTest { @@ -142,6 +167,7 @@ class ConnectionTest { void TestMetadataGetObjectsTablesTypes(); void TestMetadataGetObjectsColumns(); void TestMetadataGetObjectsConstraints(); + void TestMetadataGetObjectsPrimaryKey(); protected: struct AdbcError error; @@ -167,7 +193,10 @@ class ConnectionTest { TestMetadataGetObjectsTablesTypes(); \ } \ TEST_F(FIXTURE, MetadataGetObjectsColumns) { TestMetadataGetObjectsColumns(); } \ - TEST_F(FIXTURE, MetadataGetObjectsConstraints) { TestMetadataGetObjectsConstraints(); } + TEST_F(FIXTURE, MetadataGetObjectsConstraints) { \ + TestMetadataGetObjectsConstraints(); \ + } \ + TEST_F(FIXTURE, MetadataGetObjectsPrimaryKey) { TestMetadataGetObjectsPrimaryKey(); } class StatementTest { public: diff --git a/3rd_party/apache-arrow-adbc/c/validation/adbc_validation_util.cc b/3rd_party/apache-arrow-adbc/c/validation/adbc_validation_util.cc index e67be6d..7978947 100644 --- a/3rd_party/apache-arrow-adbc/c/validation/adbc_validation_util.cc +++ b/3rd_party/apache-arrow-adbc/c/validation/adbc_validation_util.cc @@ -64,20 +64,6 @@ std::string ToString(struct ArrowArrayStream* stream) { return ""; } -int64_t ArrowArrayViewGetOffsetUnsafe(struct ArrowArrayView* array_view, int64_t i) { - struct ArrowBufferView* data_view = &array_view->buffer_views[1]; - i += array_view->array->offset; - switch (array_view->storage_type) { - case NANOARROW_TYPE_LIST: - case NANOARROW_TYPE_MAP: - return data_view->data.as_int32[i]; - case NANOARROW_TYPE_LARGE_LIST: - return data_view->data.as_int64[i]; - default: - return INT64_MAX; - } -} - IsErrno::IsErrno(int expected, struct ArrowArrayStream* stream, struct ArrowError* error) : expected_(expected), stream_(stream), error_(error) {} @@ -251,7 +237,7 @@ void CompareSchema( (schema->children[i]->flags & ARROW_FLAG_NULLABLE) != 0) << "Nullability mismatch"; if (std::get<0>(fields[i]).has_value()) { - ASSERT_EQ(*std::get<0>(fields[i]), schema->children[i]->name); + ASSERT_STRCASEEQ(std::get<0>(fields[i])->c_str(), schema->children[i]->name); } } } diff --git a/3rd_party/apache-arrow-adbc/c/validation/adbc_validation_util.h b/3rd_party/apache-arrow-adbc/c/validation/adbc_validation_util.h index f64f64b..a239e76 100644 --- a/3rd_party/apache-arrow-adbc/c/validation/adbc_validation_util.h +++ b/3rd_party/apache-arrow-adbc/c/validation/adbc_validation_util.h @@ -31,6 +31,7 @@ #include #include #include +#include "common/utils.h" namespace adbc_validation { @@ -42,12 +43,6 @@ std::string ToString(struct AdbcError* error); std::string ToString(struct ArrowError* error); std::string ToString(struct ArrowArrayStream* stream); -// ------------------------------------------------------------ -// Nanoarrow helpers - -/// \brief Get the array offset for a particular index -int64_t ArrowArrayViewGetOffsetUnsafe(struct ArrowArrayView* array_view, int64_t i); - // ------------------------------------------------------------ // Helper to manage C Data Interface/Nanoarrow resources with RAII @@ -203,6 +198,26 @@ struct StreamReader { } }; +/// \brief Read an AdbcGetInfoData struct with RAII safety +struct GetObjectsReader { + explicit GetObjectsReader(struct ArrowArrayView* array_view) : array_view_(array_view) { + // TODO: this swallows any construction errors + get_objects_data_ = AdbcGetObjectsDataInit(array_view); + } + ~GetObjectsReader() { AdbcGetObjectsDataDelete(get_objects_data_); } + + struct AdbcGetObjectsData* operator*() { + return get_objects_data_; + } + struct AdbcGetObjectsData* operator->() { + return get_objects_data_; + } + + private: + struct ArrowArrayView* array_view_; + struct AdbcGetObjectsData* get_objects_data_; +}; + struct SchemaField { std::string name; ArrowType type = NANOARROW_TYPE_UNINITIALIZED; diff --git a/3rd_party/apache-arrow-adbc/c/vendor/nanoarrow/nanoarrow.h b/3rd_party/apache-arrow-adbc/c/vendor/nanoarrow/nanoarrow.h index f337795..759c969 100644 --- a/3rd_party/apache-arrow-adbc/c/vendor/nanoarrow/nanoarrow.h +++ b/3rd_party/apache-arrow-adbc/c/vendor/nanoarrow/nanoarrow.h @@ -191,6 +191,9 @@ static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, #define _NANOARROW_CHECK_RANGE(x_, min_, max_) \ NANOARROW_RETURN_NOT_OK((x_ >= min_ && x_ <= max_) ? NANOARROW_OK : EINVAL) +#define _NANOARROW_CHECK_UPPER_LIMIT(x_, max_) \ + NANOARROW_RETURN_NOT_OK((x_ <= max_) ? NANOARROW_OK : EINVAL) + /// \brief Return code for success. /// \ingroup nanoarrow-errors #define NANOARROW_OK 0 @@ -1625,6 +1628,10 @@ static inline int8_t ArrowArrayViewUnionChildIndex(struct ArrowArrayView* array_ static inline int64_t ArrowArrayViewUnionChildOffset(struct ArrowArrayView* array_view, int64_t i); +/// \brief Get the index to use into the relevant list child array +static inline int64_t ArrowArrayViewListChildOffset(struct ArrowArrayView* array_view, + int64_t i); + /// \brief Get an element in an ArrowArrayView as an integer /// /// This function does not check for null values, that values are actually integers, or @@ -2591,22 +2598,22 @@ static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array, NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(uint64_t))); break; case NANOARROW_TYPE_UINT32: - _NANOARROW_CHECK_RANGE(value, 0, UINT32_MAX); + _NANOARROW_CHECK_UPPER_LIMIT(value, UINT32_MAX); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt32(data_buffer, (uint32_t)value)); break; case NANOARROW_TYPE_UINT16: - _NANOARROW_CHECK_RANGE(value, 0, UINT16_MAX); + _NANOARROW_CHECK_UPPER_LIMIT(value, UINT16_MAX); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt16(data_buffer, (uint16_t)value)); break; case NANOARROW_TYPE_UINT8: - _NANOARROW_CHECK_RANGE(value, 0, UINT8_MAX); + _NANOARROW_CHECK_UPPER_LIMIT(value, UINT8_MAX); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt8(data_buffer, (uint8_t)value)); break; case NANOARROW_TYPE_INT64: case NANOARROW_TYPE_INT32: case NANOARROW_TYPE_INT16: case NANOARROW_TYPE_INT8: - _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX); + _NANOARROW_CHECK_UPPER_LIMIT(value, INT64_MAX); return ArrowArrayAppendInt(array, value); case NANOARROW_TYPE_DOUBLE: NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, value)); @@ -2910,6 +2917,19 @@ static inline int64_t ArrowArrayViewUnionChildOffset(struct ArrowArrayView* arra } } + +static inline int64_t ArrowArrayViewListChildOffset(struct ArrowArrayView* array_view, + int64_t i) { + switch (array_view->storage_type) { + case NANOARROW_TYPE_LIST: + return array_view->buffer_views[1].data.as_int32[i]; + case NANOARROW_TYPE_LARGE_LIST: + return array_view->buffer_views[1].data.as_int64[i]; + default: + return -1; + } +} + static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_view, int64_t i) { struct ArrowBufferView* data_view = &array_view->buffer_views[1]; diff --git a/3rd_party/apache-arrow-adbc/c/vendor/nanoarrow/nanoarrow.hpp b/3rd_party/apache-arrow-adbc/c/vendor/nanoarrow/nanoarrow.hpp index b01d2a6..468e911 100644 --- a/3rd_party/apache-arrow-adbc/c/vendor/nanoarrow/nanoarrow.hpp +++ b/3rd_party/apache-arrow-adbc/c/vendor/nanoarrow/nanoarrow.hpp @@ -250,6 +250,8 @@ class EmptyArrayStream { static void release_wrapper(struct ArrowArrayStream* stream) { delete reinterpret_cast(stream->private_data); + stream->release = nullptr; + stream->private_data = nullptr; } }; diff --git a/lib/adbc_driver.ex b/lib/adbc_driver.ex index 1ca60e4..f55867f 100644 --- a/lib/adbc_driver.ex +++ b/lib/adbc_driver.ex @@ -6,7 +6,7 @@ defmodule Adbc.Driver do @official_drivers ~w(sqlite postgresql flightsql snowflake)a @official_driver_base_url "https://github.com/apache/arrow-adbc/releases/download/apache-arrow-adbc-" - @version "0.4.0" + @version "0.5.1" def download(driver_name, opts \\ []) when driver_name in @official_drivers do base_url = opts[:base_url] || @official_driver_base_url