Skip to content

Commit

Permalink
ARROW-180 Support PyArrow 13 (#168)
Browse files Browse the repository at this point in the history
* ARROW-180 Support PyArrow 13

* fix prerelease handling

* clean up pytest invocation

* remove Py3.12 support

* switch back to ubuntu 20

* fix manifest

* Use a manylinux wheel for linux builds

* fix handling of env variable

* fix handling of env variable

* revert wheel building on linux

* try building pyarrow from src

* fixup

* fix

* fix and try py312

* try without gcc override

* update to match pyarrow wheel target

* fixups

* cleanup

* fixups

* fix min version

* undo changes to benchmarks file

* add debug print

* clean up asv

* skip failing asv methods

* more skips

* try without setup_cache

* fixups

* more skips

* undo changes

* undo change to workflow

* undo changes to dev guide

* fix gcc version

* address review
  • Loading branch information
blink1073 authored Sep 19, 2023
1 parent 7cabba7 commit c6dd78b
Show file tree
Hide file tree
Showing 15 changed files with 131 additions and 109 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ jobs:
- name: Run benchmarks
run: |
set -eu
set -eux
run_asv () {
if [ ! -e "asv.conf.json" ] ; then
git checkout refs/bm/pr asv.conf.json
git checkout refs/bm/pr benchmarks/__init__.py
git checkout refs/bm/pr benchmarks/benchmarks.py
fi
git show --no-patch --format="%H (%s)"
asv run --python=`which python` --set-commit-hash $(git rev-parse HEAD)
asv run -e --python=`which python` --set-commit-hash $(git rev-parse HEAD)
}
asv machine --yes
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/release-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ jobs:
- uses: actions/setup-python@v4
with:
python-version: ${{env.PYTHON_VERSION}}
cache: 'pip'
cache-dependency-path: 'bindings/python/pyproject.toml'
allow-prereleases: true

- name: Set up QEMU
if: runner.os == 'Linux'
Expand Down Expand Up @@ -84,7 +87,7 @@ jobs:

make_sdist:
name: Make SDist
runs-on: ubuntu-latest
runs-on: macos-latest
steps:
- uses: actions/checkout@v3
with:
Expand Down
9 changes: 5 additions & 4 deletions .github/workflows/test-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
- uses: pre-commit/[email protected]
with:
Expand All @@ -32,12 +32,12 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: ["ubuntu-20.04", "macos-latest", "windows-latest"]
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
python-version: [3.8, 3.9, "3.10", "3.11"]
fail-fast: false
name: CPython ${{ matrix.python-version }}-${{ matrix.os }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v4
with:
Expand Down Expand Up @@ -71,6 +71,7 @@ jobs:
net start MongoDB
- name: Install libbson
run: |
pip install packaging # needed for mongo-c-driver-1.24.4/build/calc_release_version.py
./build-libbson.sh
- name: Install Python dependencies
run: |
Expand All @@ -96,7 +97,7 @@ jobs:
docs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Cache conda
uses: actions/cache@v3
env:
Expand Down
1 change: 1 addition & 0 deletions bindings/python/MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ graft pymongoarrow

recursive-include test *
recursive-exclude docs *
recursive-exclude benchmarks *

global-exclude *.cpp
global-exclude *.dylib
Expand Down
13 changes: 3 additions & 10 deletions bindings/python/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,9 @@
"repo_subdir": "bindings/python",
"branches": ["main"],
"matrix": {
"req": {
"pyarrow": ["7.0.0"],
"pymongo": ["3.11", "4.1.1"],
"pandas": [],
"Cython": [],
"numpy": []
},
"env": {
"N_DOCS": ["20000", "1000"],
},
"N_DOCS": ["20000", "1000"]
}
},
"environment_type": "virtualenv",
"environment_type": "virtualenv"
}
46 changes: 30 additions & 16 deletions bindings/python/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class Insert(ABC):
rounds = 1

@abc.abstractmethod
def setup_cache(self):
def setup(self):
raise NotImplementedError

def time_insert_arrow(self):
Expand Down Expand Up @@ -94,7 +94,7 @@ class Read(ABC):
rounds = 1

@abc.abstractmethod
def setup_cache(self):
def setup(self):
raise NotImplementedError

# We need this because the naive methods don't always convert nested objects.
Expand Down Expand Up @@ -160,7 +160,7 @@ class ProfileReadArray(Read):
}
)

def setup_cache(self):
def setup(self):
coll = db.benchmark
coll.drop()
base_dict = dict(
Expand Down Expand Up @@ -205,7 +205,7 @@ class ProfileReadDocument(Read):
}
)

def setup_cache(self):
def setup(self):
coll = db.benchmark
coll.drop()
base_dict = dict(
Expand Down Expand Up @@ -247,7 +247,7 @@ class ProfileReadSmall(Read):
schema = Schema({"x": pyarrow.int64(), "y": pyarrow.float64()})
dtypes = np.dtype(np.dtype([("x", np.int64), ("y", np.float64)]))

def setup_cache(self):
def setup(self):
coll = db.benchmark
coll.drop()
base_dict = dict(
Expand All @@ -268,7 +268,7 @@ class ProfileReadLarge(Read):
schema = Schema({k: pyarrow.float64() for k in large_doc_keys})
dtypes = np.dtype([(k, np.float64) for k in large_doc_keys])

def setup_cache(self):
def setup(self):
coll = db.benchmark
coll.drop()

Expand All @@ -284,7 +284,7 @@ class ProfileReadExtensionSmall(Read):
schema = Schema({"x": Decimal128Type(), "y": BinaryType(10)})
dtypes = np.dtype(np.dtype([("x", np.object_), ("y", np.object_)]))

def setup_cache(self):
def setup(self):
coll = db.benchmark
coll.drop()
base_dict = dict(
Expand All @@ -299,13 +299,20 @@ def setup_cache(self):
% (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict))
)

# This must be skipped because arrow can't read the Decimal128Type
def time_conventional_arrow(self):
pass

def time_insert_conventional(self):
pass


class ProfileReadExtensionLarge(Read):
large_doc_keys = [f"{i}" for i in range(LARGE_DOC_SIZE)]
schema = Schema({k: Decimal128Type() for k in large_doc_keys})
dtypes = np.dtype([(k, np.object_) for k in large_doc_keys])

def setup_cache(self):
def setup(self):
coll = db.benchmark
coll.drop()

Expand All @@ -316,16 +323,20 @@ def setup_cache(self):
% (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict))
)

# This must be skipped because arrow can't read the Decimal128Type
def time_conventional_arrow(self):
pass

def time_insert_conventional(self):
pass


class ProfileInsertSmall(Insert):
large_doc_keys = [f"a{i}" for i in range(LARGE_DOC_SIZE)]
schema = Schema({"x": pyarrow.int64(), "y": pyarrow.float64()})
arrow_table = find_arrow_all(db.benchmark, {}, schema=schema)
pandas_table = find_pandas_all(db.benchmark, {}, schema=schema)
numpy_arrays = find_numpy_all(db.benchmark, {}, schema=schema)
dtypes = np.dtype([("x", np.int64), ("y", np.float64)])

def setup_cache(self):
def setup(self):
coll = db.benchmark
coll.drop()
base_dict = dict([("x", 1), ("y", math.pi)])
Expand All @@ -334,17 +345,17 @@ def setup_cache(self):
"%d docs, %dk each with %d keys"
% (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict))
)
self.arrow_table = find_arrow_all(db.benchmark, {}, schema=self.schema)
self.pandas_table = find_pandas_all(db.benchmark, {}, schema=self.schema)
self.numpy_arrays = find_numpy_all(db.benchmark, {}, schema=self.schema)


class ProfileInsertLarge(Insert):
large_doc_keys = [f"a{i}" for i in range(LARGE_DOC_SIZE)]
schema = Schema({k: pyarrow.float64() for k in large_doc_keys})
arrow_table = find_arrow_all(db.benchmark, {}, schema=schema)
pandas_table = find_pandas_all(db.benchmark, {}, schema=schema)
numpy_arrays = find_numpy_all(db.benchmark, {}, schema=schema)
dtypes = np.dtype([(k, np.float64) for k in large_doc_keys])

def setup_cache(self):
def setup(self):
coll = db.benchmark
coll.drop()
base_dict = dict([(k, math.pi) for k in self.large_doc_keys])
Expand All @@ -353,3 +364,6 @@ def setup_cache(self):
"%d docs, %dk each with %d keys"
% (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict))
)
self.arrow_table = find_arrow_all(db.benchmark, {}, schema=self.schema)
self.pandas_table = find_pandas_all(db.benchmark, {}, schema=self.schema)
self.numpy_arrays = find_numpy_all(db.benchmark, {}, schema=self.schema)
2 changes: 1 addition & 1 deletion bindings/python/build-libbson.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -o errexit

# Version of libbson to build
# Keep in sync with pymongoarrow.version._MIN_LIBBSON_VERSION
LIBBSON_VERSION=${LIBBSON_VERSION:-"1.21.1"}
LIBBSON_VERSION=${LIBBSON_VERSION:-"1.23.1"}
if [ -z "$LIBBSON_VERSION" ]
then
echo "Did not provide a libbson revision ID to build"
Expand Down
2 changes: 1 addition & 1 deletion bindings/python/docs/source/developer/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Command Line Tools. Additionally, you need CMake and pkg-config::
$ brew install cmake
$ brew install pkg-config

On Linux, you require gcc 4.8, CMake and pkg-config.
On Linux, installation requires gcc 12, CMake and pkg-config.

Windows is not yet supported.

Expand Down
12 changes: 6 additions & 6 deletions bindings/python/docs/source/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ PyMongoArrow is regularly built and tested on macOS and Linux
Python Compatibility
--------------------

PyMongoArrow is currently compatible with CPython 3.8, 3.9, 3.10 and 3.11.
PyMongoArrow is currently compatible with CPython 3.8, 3.9, 3.10, and 3.11.

Using Pip
---------
Expand Down Expand Up @@ -56,20 +56,20 @@ Dependencies

PyMongoArrow requires:

- PyMongo>=3.11 (PyMongo 4.0 is supported from 0.2)
- PyArrow>=7,<7.1
- PyMongo>=4.4
- PyArrow>=13,<13.1

To use PyMongoArrow with a PyMongo feature that requires an optional
dependency, users must install PyMongo with the given dependency manually.

.. note:: PyMongo's optional dependencies are detailed
`here <https://pymongo.readthedocs.io/en/stable/installation.html#dependencies>`_.

For example, to use PyMongoArrow with MongoDB Atlas' ``mongodb+srv://`` URIs
users must install PyMongo with the ``srv`` extra in addition to installing
For example, to use PyMongoArrow with Client-Side Field Level Encryption
users must install PyMongo with the ``encryption`` extra in addition to installing
PyMongoArrow::

$ python -m pip install 'pymongo[srv]' pymongoarrow
$ python -m pip install 'pymongo[encryption]' pymongoarrow

Applications intending to use PyMongoArrow APIs that return query result sets
as :class:`pandas.DataFrame` instances (e.g. :meth:`~pymongoarrow.api.find_pandas_all`)
Expand Down
2 changes: 1 addition & 1 deletion bindings/python/pymongoarrow/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@

__version__ = "1.1.0.dev0"

_MIN_LIBBSON_VERSION = "1.21.0"
_MIN_LIBBSON_VERSION = "1.23.1"
74 changes: 71 additions & 3 deletions bindings/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,70 @@ requires = [
"setuptools>=47.9",
"wheel>=0.37",
"cython>=0.29",
# Must be kept in sync with the `install_requires` in `setup.cfg`
"pyarrow>=12.0,<12.1.0",
# Must be kept in sync with "project.dependencies" below.
"pyarrow>=13.0,<13.1.0",
]

[project]
name = "pymongoarrow"
description = '"Tools for using NumPy, Pandas and PyArrow with MongoDB"'
license = {text = "Apache License, Version 2.0"}
authors = [{name = "Prashant Mital"}]
maintainers = [{name = "MongoDB"}, {name = "Inc."}]
keywords = ["mongo", "mongodb", "pymongo", "arrow", "bson", "numpy", "pandas"]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Operating System :: MacOS :: MacOS X",
"Operating System :: POSIX",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: Implementation :: CPython",
"Topic :: Database",
]
requires-python = ">=3.8"
dependencies = [
# Must be kept in sync with "build_sytem.requires" above.
"pyarrow >=13.0,<13.1",
"pymongo >=4.4,<5",
"pandas >=1.3.5,<3",
]
dynamic = ["version"]

[project.readme]
file = "README.rst"
content-type = "text/x-rst"

[project.urls]
Homepage = "https://github.com/mongodb-labs/mongo-arrow/tree/main/bindings/python"

[project.optional-dependencies]
test = ["pytz", "pytest"]

[tool.setuptools]
zip-safe = false
include-package-data = true
platforms = ["Linux", "Mac OS X"]

[tool.setuptools.package-data]
pymongoarrow = ["*.pxd", "*.pyx", "*.pyi", "*.so.*", "*.dylib", "*.dll", "*.pyd"]

[tool.setuptools.packages.find]
exclude = [
"test",
"docs",
]
namespaces = false

[tool.setuptools.dynamic]
version = {attr = "pymongoarrow.version.__version__"}

[tool.cibuildwheel]
skip = "pp* *-manylinux_i686 *_ppc64le *_s390x *-musllinux*"
before-build = "bash ./cibw_before_build.sh"
Expand All @@ -20,7 +80,7 @@ LIBBSON_INSTALL_DIR = "./libbson"

[tool.cibuildwheel.linux]
archs = "x86_64 aarch64"
manylinux-x86_64-image = "manylinux2014"
manylinux-x86_64-image = "manylinux_2_28"
repair-wheel-command = [
"pip install \"auditwheel>=5,<6\"",
"python addtags.py {wheel} {dest_dir}"
Expand All @@ -29,3 +89,11 @@ repair-wheel-command = [
[tool.cibuildwheel.macos]
archs = "x86_64 arm64"
test-skip = "*arm64"

[tool.pytest.ini_options]
testpaths = ["test"]
addopts = "-ra --maxfail=10 --durations=5"
faulthandler_timeout=1500
filterwarnings = [
"error"
]
Loading

0 comments on commit c6dd78b

Please sign in to comment.