From 6ebba5104126ddafec9dd17690cbca50323352bb Mon Sep 17 00:00:00 2001 From: augray Date: Mon, 16 Sep 2024 07:54:20 -0700 Subject: [PATCH] Add basic integrations (#4) Some integrations are with libs that are widely adopted, and essentially come "for free." This PR adds those and associated docs/packaging changes. --- .github/workflows/ci.yaml | 3 +- Makefile | 8 +- README.md | 114 ++++++++++++++++- pyproject.toml | 24 +++- src/airtrain/__init__.py | 2 + src/airtrain/integrations/pandas.py | 46 +++++++ src/airtrain/integrations/polars.py | 46 +++++++ .../integrations/tests/test_pandas.py | 90 +++++++++++++ .../integrations/tests/test_polars.py | 89 +++++++++++++ uv.lock | 121 +++++++++++++++++- 10 files changed, 534 insertions(+), 9 deletions(-) create mode 100644 src/airtrain/integrations/pandas.py create mode 100644 src/airtrain/integrations/polars.py create mode 100644 src/airtrain/integrations/tests/test_pandas.py create mode 100644 src/airtrain/integrations/tests/test_polars.py diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4282407..f3aea97 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -5,6 +5,7 @@ on: [push] jobs: test-python: runs-on: ubuntu-latest + timeout-minutes: 10 strategy: matrix: python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] @@ -19,4 +20,4 @@ jobs: run: make lint - name: Run tests - run: make test + run: make ci-test diff --git a/Makefile b/Makefile index b92db7f..d5d4f33 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ py-prep: .PHONY: sync sync: - uv sync + uv sync --extra all .PHONY: fix fix: @@ -43,3 +43,9 @@ lint: .PHONY: test test: uv run pytest ./ + +.PHONY: ci-test +ci-test: + uv sync --extra polars + uv pip install pandas # uv seems to stall if py 3.12 installs this as an extra + uv run pytest ./ diff --git a/README.md b/README.md index ad490a7..402b52f 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,26 @@ This repository holds the SDK for interacting with the tool for improving your AI apps, RAG pipelines, and models by curating high-quality training and eval datasets. +## Installation + +To install the core package without any integrations, simply + +`pip install airtrain-py` + +You may install integrations by using pip extras. As an example, to +install the pandas integration: + +`pip install airtrain-py[pandas]` + +If you want to install all integrations, you may do the following: + +`pip install airtrain-py[all]` + +The following are available extras: + +- `pandas` +- `polars` + ## Usage Obtain your API key by going to your user settings on @@ -54,9 +74,99 @@ url = at.upload_from_dicts( [ {"foo": "some text", "bar": "more text"}, {"foo": "even more text", "bar": "so much text"}, - ] + ], + name="My Dataset name", # name is Optional ).url -# You may view your dataset at this URL +# You may view your dataset in the Airtrain dashboard at this URL +# It may take some time to complete ingestion and generation of +# automated insights. You will receive an email when it is complete. print(f"Dataset URL: {url}") ``` + +The data may be any iterable of dictionaries that can be represented using +automatically inferred [Apache Arrow](https://arrow.apache.org/docs/python/index.html) +types. If you would like to give a hint as to the Arrow schema of the data being +uploaded, you may provide one using the `schema` parameter to `upload_from_dicts`. + +### Custom Embeddings + +Airtrain produces a variety of insights into your data automatically. Some of +these insights (ex: automatic clustering) relies on embeddings of the data. Airtrain +will also embed your data automatically, but if you wish to provide your own embeddings +you may do so by adding the `embedding_column` parameter when you upload: + +```python +url = at.upload_from_dicts( + [ + {"foo": "some text", "bar": [0.0, 0.707, 0.707, 0.0]}, + {"foo": "even more text", "bar": [0.577, 0.577, 0.0, 0.577]}, + ], + embedding_column="bar", +).url +``` + +If you provide this argument, the embeddings must all be lists of floating point +numbers with the same length. + +### Integrations + +Airtrain provides integrations to allow for uploading data from a variety of +sources. In general most integrations take the form of an `upload_from_x(...)` +function with a signature matching that of `upload_from_dicts` except for +the first parameter specifying the data to be uploaded. Integrations may require +installing the Airtrain SDK [with extras](#installation). + +#### Pandas + +```python +import pandas as pd + +# ... + +df = pd.DataFrame( + { + "foo": ["some text", "more text", "even more"], + "bar": [1, 2, 3], + } +) + + +url = at.upload_from_pandas(df, name="My Pandas Dataset").url +``` + +You may also provide an iterable of dataframes instead of a single one. + +#### Polars + +```python +import polars as pl + +# ... + +df = pl.DataFrame( + { + "foo": ["some text", "more text", "even more"], + "bar": [1, 2, 3], + } +) + + +url = at.upload_from_polars(df, name="My Polars Dataset").url +``` + +You may also provide an iterable of dataframes instead of a single one. + + +#### Arrow + +```python +import pyarrow as pa + +# ... + +table = pa.table({"foo": [1, 2, 3], "bar": ["a", "b", "c"]}) + + +url = at.upload_from_arrow_tables([table], name="My Arrow Dataset").url +``` diff --git a/pyproject.toml b/pyproject.toml index 84dca45..bbb74ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "airtrain" +name = "airtrain-py" description = "SDK for interacting with https://airtrain.ai" version = "0.0.1" requires-python = ">=3.8" @@ -33,6 +33,16 @@ classifiers = [ "Programming Language :: Python :: 3.12", ] +[project.optional-dependencies] +pandas = [ + "pandas>=1.0.0; python_version < '3.12'", + "pandas>=2.0.0; python_version >= '3.12'", +] +polars = [ + "polars>=0.19.0", +] +all = ["airtrain-py[pandas,polars]"] + [tool.uv] dev-dependencies = [ "mypy==1.11.1", @@ -80,3 +90,15 @@ ignore_missing_imports = true [[tool.mypy.overrides]] module = "pyarrow.*" ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "pandas.*" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "polars.*" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "tests.*" +ignore_missing_imports = true diff --git a/src/airtrain/__init__.py b/src/airtrain/__init__.py index fe44ec0..6b568bd 100644 --- a/src/airtrain/__init__.py +++ b/src/airtrain/__init__.py @@ -4,3 +4,5 @@ upload_from_arrow_tables, upload_from_dicts, ) +from airtrain.integrations.pandas import upload_from_pandas # noqa: F401 +from airtrain.integrations.polars import upload_from_polars # noqa: F401 diff --git a/src/airtrain/integrations/pandas.py b/src/airtrain/integrations/pandas.py new file mode 100644 index 0000000..70d30c2 --- /dev/null +++ b/src/airtrain/integrations/pandas.py @@ -0,0 +1,46 @@ +from typing import Any, Iterable, Union + + +try: + import pandas as pd + + ENABLED = True +except ImportError: + ENABLED = False +import pyarrow as pa + +from airtrain.core import CreationArgs, DatasetMetadata, Unpack, upload_from_arrow_tables + + +# In case pandas is not installed +DataFrame = Any + + +def upload_from_pandas( + data: Union[Iterable[DataFrame], DataFrame], + **kwargs: Unpack[CreationArgs], +) -> DatasetMetadata: + """Upload an Airtrain dataset from the provided pandas DataFrame(s). + + Parameters + ---------- + data: + Either an individual pandas DataFrame or an iterable of DataFrames. + Data will be intermediately represented as pyarrow tables. + kwargs: + See `upload_from_arrow_tables` for other arguments. + + Returns + ------- + A DatasetMetadata object summarizing the created dataset. + """ + if not ENABLED: + raise ImportError( + "Pandas integration not enabled. Please install Airtrain package as " + "`airtrain-py[pandas]`" + ) + if isinstance(data, pd.DataFrame): + data = [data] + data = (pa.Table.from_pandas(df) for df in data) # type: ignore + + return upload_from_arrow_tables(data, **kwargs) diff --git a/src/airtrain/integrations/polars.py b/src/airtrain/integrations/polars.py new file mode 100644 index 0000000..f152e1a --- /dev/null +++ b/src/airtrain/integrations/polars.py @@ -0,0 +1,46 @@ +from typing import Any, Iterable, Union + + +try: + import polars as pl + + ENABLED = True +except ImportError: + ENABLED = False + +from airtrain.core import CreationArgs, DatasetMetadata, Unpack, upload_from_arrow_tables + + +# In case polars is not installed +DataFrame = Any + + +def upload_from_polars( + data: Union[Iterable[DataFrame], DataFrame], + **kwargs: Unpack[CreationArgs], +) -> DatasetMetadata: + """Upload an Airtrain dataset from the provided polars DataFrame(s). + + Parameters + ---------- + data: + Either an individual polars DataFrame or an iterable of DataFrames. + Data will be intermediately represented as pyarrow tables. + kwargs: + See `upload_from_arrow_tables` for other arguments. + + Returns + ------- + A DatasetMetadata object summarizing the created dataset. + """ + if not ENABLED: + raise ImportError( + "Polars integration not enabled. Please install Airtrain package as " + "`airtrain-py[polars]`" + ) + if isinstance(data, pl.DataFrame): + data = [data] + + data = (df.to_arrow() for df in data) # type: ignore + + return upload_from_arrow_tables(data, **kwargs) diff --git a/src/airtrain/integrations/tests/test_pandas.py b/src/airtrain/integrations/tests/test_pandas.py new file mode 100644 index 0000000..f33409e --- /dev/null +++ b/src/airtrain/integrations/tests/test_pandas.py @@ -0,0 +1,90 @@ +import numpy as np +import pandas as pd +import pytest + +from airtrain.core import DatasetMetadata +from airtrain.integrations.pandas import upload_from_pandas +from tests.fixtures import MockAirtrainClient, mock_client # noqa: F401 + + +def test_upload_from_pandas(mock_client: MockAirtrainClient): # noqa: F811 + df = pd.DataFrame( + [ + {"foo": 42, "bar": "a"}, + {"foo": 43, "bar": "b"}, + {"foo": 44, "bar": "c"}, + {"foo": 45, "bar": "d"}, + ] + ) + name = "Foo dataset" + result = upload_from_pandas(df, name=name) + assert isinstance(result, DatasetMetadata) + assert result.size == df.shape[0] + assert result.name == name + fake_dataset = mock_client.get_fake_dataset(result.id) + assert fake_dataset.name == name + table = fake_dataset.ingested + assert table is not None + assert table.shape[0] == df.shape[0] + assert table["foo"].to_pylist() == [42, 43, 44, 45] + assert table["bar"].to_pylist() == ["a", "b", "c", "d"] + + +def test_upload_from_pandas_multiple(mock_client: MockAirtrainClient): # noqa: F811 + df_1 = pd.DataFrame( + [ + {"foo": 42, "bar": "a"}, + {"foo": 43, "bar": "b"}, + {"foo": 44, "bar": "c"}, + {"foo": 45, "bar": "d"}, + ] + ) + df_2 = pd.DataFrame( + [ + {"foo": 46, "bar": "e"}, + {"foo": 47, "bar": "f"}, + {"foo": 48, "bar": "g"}, + {"foo": 49, "bar": "h"}, + ] + ) + result = upload_from_pandas((df_1, df_2)) + assert isinstance(result, DatasetMetadata) + assert result.size == df_1.shape[0] + df_2.shape[0] + fake_dataset = mock_client.get_fake_dataset(result.id) + table = fake_dataset.ingested + assert table is not None + assert table.shape[0] == result.size + assert table["foo"].to_pylist() == [42, 43, 44, 45, 46, 47, 48, 49] + assert table["bar"].to_pylist() == ["a", "b", "c", "d", "e", "f", "g", "h"] + + +def test_upload_from_pandas_embeddings(mock_client: MockAirtrainClient): # noqa: F811 + df = pd.DataFrame( + [ + {"foo": 42, "bar": np.array([1.0, 0.0, 0.0, 0.0])}, + {"foo": 43, "bar": np.array([0.0, 1.0, 0.0, 0.0])}, + {"foo": 44, "bar": np.array([0.0, 0.0, 1.0, 0.0])}, + {"foo": 45, "bar": np.array([0.0, 0.0, 0.0, 1.0])}, + ] + ) + result = upload_from_pandas(df, embedding_column="bar") + assert isinstance(result, DatasetMetadata) + assert result.size == df.shape[0] + fake_dataset = mock_client.get_fake_dataset(result.id) + table = fake_dataset.ingested + assert table is not None + assert table.shape[0] == df.shape[0] + assert table["foo"].to_pylist() == [42, 43, 44, 45] + assert table["bar"].to_pylist()[1] == [0.0, 1.0, 0.0, 0.0] + + df_bad = pd.DataFrame( + [ + {"foo": 42, "bar": np.array([1.0, 0.0, 0.0, 0.0])}, + {"foo": 43, "bar": np.array([0.0, 1.0, 0.0, 0.0])}, + {"foo": 44, "bar": np.array([0.0, 0.0, 1.0])}, + {"foo": 45, "bar": np.array([0.0, 0.0, 0.0, 1.0])}, + ] + ) + with pytest.raises(ValueError): + # one row has a different number of embedding dimensions. + upload_from_pandas(df_bad, embedding_column="bar") diff --git a/src/airtrain/integrations/tests/test_polars.py b/src/airtrain/integrations/tests/test_polars.py new file mode 100644 index 0000000..1df378c --- /dev/null +++ b/src/airtrain/integrations/tests/test_polars.py @@ -0,0 +1,89 @@ +import polars as pl +import pytest + +from airtrain.core import DatasetMetadata +from airtrain.integrations.polars import upload_from_polars +from tests.fixtures import MockAirtrainClient, mock_client # noqa: F401 + + +def test_upload_from_polars(mock_client: MockAirtrainClient): # noqa: F811 + df = pl.DataFrame( + [ + {"foo": 42, "bar": "a"}, + {"foo": 43, "bar": "b"}, + {"foo": 44, "bar": "c"}, + {"foo": 45, "bar": "d"}, + ] + ) + name = "Foo dataset" + result = upload_from_polars(df, name=name) + assert isinstance(result, DatasetMetadata) + assert result.size == df.shape[0] + assert result.name == name + fake_dataset = mock_client.get_fake_dataset(result.id) + assert fake_dataset.name == name + table = fake_dataset.ingested + assert table is not None + assert table.shape[0] == df.shape[0] + assert table["foo"].to_pylist() == [42, 43, 44, 45] + assert table["bar"].to_pylist() == ["a", "b", "c", "d"] + + +def test_upload_from_polars_multiple(mock_client: MockAirtrainClient): # noqa: F811 + df_1 = pl.DataFrame( + [ + {"foo": 42, "bar": "a"}, + {"foo": 43, "bar": "b"}, + {"foo": 44, "bar": "c"}, + {"foo": 45, "bar": "d"}, + ] + ) + df_2 = pl.DataFrame( + [ + {"foo": 46, "bar": "e"}, + {"foo": 47, "bar": "f"}, + {"foo": 48, "bar": "g"}, + {"foo": 49, "bar": "h"}, + ] + ) + result = upload_from_polars((df_1, df_2)) + assert isinstance(result, DatasetMetadata) + assert result.size == df_1.shape[0] + df_2.shape[0] + fake_dataset = mock_client.get_fake_dataset(result.id) + table = fake_dataset.ingested + assert table is not None + assert table.shape[0] == result.size + assert table["foo"].to_pylist() == [42, 43, 44, 45, 46, 47, 48, 49] + assert table["bar"].to_pylist() == ["a", "b", "c", "d", "e", "f", "g", "h"] + + +def test_upload_from_polars_embeddings(mock_client: MockAirtrainClient): # noqa: F811 + df = pl.DataFrame( + [ + {"foo": 42, "bar": [1.0, 0.0, 0.0, 0.0]}, + {"foo": 43, "bar": [0.0, 1.0, 0.0, 0.0]}, + {"foo": 44, "bar": [0.0, 0.0, 1.0, 0.0]}, + {"foo": 45, "bar": [0.0, 0.0, 0.0, 1.0]}, + ] + ) + result = upload_from_polars(df, embedding_column="bar") + assert isinstance(result, DatasetMetadata) + assert result.size == df.shape[0] + fake_dataset = mock_client.get_fake_dataset(result.id) + table = fake_dataset.ingested + assert table is not None + assert table.shape[0] == df.shape[0] + assert table["foo"].to_pylist() == [42, 43, 44, 45] + assert table["bar"].to_pylist()[1] == [0.0, 1.0, 0.0, 0.0] + + df_bad = pl.DataFrame( + [ + {"foo": 42, "bar": [1.0, 0.0, 0.0, 0.0]}, + {"foo": 43, "bar": [0.0, 1.0, 0.0, 0.0]}, + {"foo": 44, "bar": [0.0, 0.0, 1.0]}, + {"foo": 45, "bar": [0.0, 0.0, 0.0, 1.0]}, + ] + ) + with pytest.raises(ValueError): + # one row has a different number of embedding dimensions. + upload_from_polars(df_bad, embedding_column="bar") diff --git a/uv.lock b/uv.lock index c696e94..fc3dcec 100644 --- a/uv.lock +++ b/uv.lock @@ -2,12 +2,15 @@ version = 1 requires-python = ">=3.8" resolution-markers = [ "python_full_version < '3.9'", - "python_full_version >= '3.9' and python_full_version < '3.12'", + "python_full_version < '3.9'", + "python_full_version == '3.9.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.11.*'", "python_full_version >= '3.12'", ] [[package]] -name = "airtrain" +name = "airtrain-py" version = "0.0.1" source = { editable = "." } dependencies = [ @@ -17,6 +20,18 @@ dependencies = [ { name = "pyarrow" }, ] +[package.optional-dependencies] +all = [ + { name = "pandas" }, + { name = "polars" }, +] +pandas = [ + { name = "pandas" }, +] +polars = [ + { name = "polars" }, +] + [package.dev-dependencies] dev = [ { name = "mypy" }, @@ -28,10 +43,14 @@ dev = [ [package.metadata] requires-dist = [ + { name = "airtrain-py", extras = ["pandas", "polars"], marker = "extra == 'all'" }, { name = "httpx", specifier = ">=0.25.0" }, { name = "numpy", marker = "python_full_version == '3.8.*'", specifier = "<=1.24.4" }, { name = "numpy", marker = "python_full_version >= '3.9'", specifier = ">=1.19.3" }, { name = "numpy", marker = "python_full_version >= '3.12'", specifier = ">=1.26.0" }, + { name = "pandas", marker = "python_full_version >= '3.12' and extra == 'pandas'", specifier = ">=2.0.0" }, + { name = "pandas", marker = "python_full_version < '3.12' and extra == 'pandas'", specifier = ">=1.0.0" }, + { name = "polars", marker = "extra == 'polars'", specifier = ">=0.19.0" }, { name = "pyarrow", specifier = ">=13.0.0" }, ] @@ -170,7 +189,7 @@ name = "importlib-metadata" version = "8.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "zipp", marker = "python_full_version < '3.12'" }, + { name = "zipp", marker = "python_full_version < '3.10'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/c0/bd/fa8ce65b0a7d4b6d143ec23b0f5fd3f7ab80121078c465bc02baeaab22dc/importlib_metadata-8.4.0.tar.gz", hash = "sha256:9a547d3bc3608b025f93d403fdd1aae741c24fbb8314df4b155675742ce303c5", size = 54320 } wheels = [ @@ -265,7 +284,10 @@ version = "1.24.4" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version < '3.9'", - "python_full_version >= '3.9' and python_full_version < '3.12'", + "python_full_version < '3.9'", + "python_full_version == '3.9.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.11.*'", ] sdist = { url = "https://files.pythonhosted.org/packages/a4/9b/027bec52c633f6556dba6b722d9a0befb40498b9ceddd29cbe67a45a127c/numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463", size = 10911229 } wheels = [ @@ -369,6 +391,45 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/08/aa/cc0199a5f0ad350994d660967a8efb233fe0416e4639146c089643407ce6/packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124", size = 53985 }, ] +[[package]] +name = "pandas" +version = "2.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "numpy", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "python-dateutil" }, + { name = "pytz" }, + { name = "tzdata" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/a7/824332581e258b5aa4f3763ecb2a797e5f9a54269044ba2e50ac19936b32/pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c", size = 5284455 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/b2/0d4a5729ce1ce11630c4fc5d5522a33b967b3ca146c210f58efde7c40e99/pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8", size = 11760908 }, + { url = "https://files.pythonhosted.org/packages/4a/f6/f620ca62365d83e663a255a41b08d2fc2eaf304e0b8b21bb6d62a7390fe3/pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f", size = 10823486 }, + { url = "https://files.pythonhosted.org/packages/c2/59/cb4234bc9b968c57e81861b306b10cd8170272c57b098b724d3de5eda124/pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183", size = 11571897 }, + { url = "https://files.pythonhosted.org/packages/e3/59/35a2892bf09ded9c1bf3804461efe772836a5261ef5dfb4e264ce813ff99/pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0", size = 12306421 }, + { url = "https://files.pythonhosted.org/packages/94/71/3a0c25433c54bb29b48e3155b959ac78f4c4f2f06f94d8318aac612cb80f/pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210", size = 9540792 }, + { url = "https://files.pythonhosted.org/packages/ed/30/b97456e7063edac0e5a405128065f0cd2033adfe3716fb2256c186bd41d0/pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e", size = 10664333 }, + { url = "https://files.pythonhosted.org/packages/b3/92/a5e5133421b49e901a12e02a6a7ef3a0130e10d13db8cb657fdd0cba3b90/pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8", size = 11645672 }, + { url = "https://files.pythonhosted.org/packages/8f/bb/aea1fbeed5b474cb8634364718abe9030d7cc7a30bf51f40bd494bbc89a2/pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26", size = 10693229 }, + { url = "https://files.pythonhosted.org/packages/d6/90/e7d387f1a416b14e59290baa7a454a90d719baebbf77433ff1bdcc727800/pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d", size = 11581591 }, + { url = "https://files.pythonhosted.org/packages/d0/28/88b81881c056376254618fad622a5e94b5126db8c61157ea1910cd1c040a/pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df", size = 12219370 }, + { url = "https://files.pythonhosted.org/packages/e4/a5/212b9039e25bf8ebb97e417a96660e3dc925dacd3f8653d531b8f7fd9be4/pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd", size = 9482935 }, + { url = "https://files.pythonhosted.org/packages/9e/71/756a1be6bee0209d8c0d8c5e3b9fc72c00373f384a4017095ec404aec3ad/pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b", size = 10607692 }, + { url = "https://files.pythonhosted.org/packages/78/a8/07dd10f90ca915ed914853cd57f79bfc22e1ef4384ab56cb4336d2fc1f2a/pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061", size = 11653303 }, + { url = "https://files.pythonhosted.org/packages/53/c3/f8e87361f7fdf42012def602bfa2a593423c729f5cb7c97aed7f51be66ac/pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5", size = 10710932 }, + { url = "https://files.pythonhosted.org/packages/a7/87/828d50c81ce0f434163bf70b925a0eec6076808e0bca312a79322b141f66/pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089", size = 11684018 }, + { url = "https://files.pythonhosted.org/packages/f8/7f/5b047effafbdd34e52c9e2d7e44f729a0655efafb22198c45cf692cdc157/pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0", size = 12353723 }, + { url = "https://files.pythonhosted.org/packages/ea/ae/26a2eda7fa581347d69e51f93892493b2074ef3352ac71033c9f32c52389/pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02", size = 9646403 }, + { url = "https://files.pythonhosted.org/packages/c3/6c/ea362eef61f05553aaf1a24b3e96b2d0603f5dc71a3bd35688a24ed88843/pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78", size = 10777638 }, + { url = "https://files.pythonhosted.org/packages/f8/c7/cfef920b7b457dff6928e824896cb82367650ea127d048ee0b820026db4f/pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b", size = 11834160 }, + { url = "https://files.pythonhosted.org/packages/6c/1c/689c9d99bc4e5d366a5fd871f0bcdee98a6581e240f96b78d2d08f103774/pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e", size = 10862752 }, + { url = "https://files.pythonhosted.org/packages/cc/b8/4d082f41c27c95bf90485d1447b647cc7e5680fea75e315669dc6e4cb398/pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b", size = 11715852 }, + { url = "https://files.pythonhosted.org/packages/9e/0d/91a9fd2c202f2b1d97a38ab591890f86480ecbb596cbc56d035f6f23fdcc/pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641", size = 12398496 }, + { url = "https://files.pythonhosted.org/packages/26/7d/d8aa0a2c4f3f5f8ea59fb946c8eafe8f508090ca73e2b08a9af853c1103e/pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682", size = 9630766 }, + { url = "https://files.pythonhosted.org/packages/9a/f2/0ad053856debbe90c83de1b4f05915f85fd2146f20faf9daa3b320d36df3/pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc", size = 10755902 }, +] + [[package]] name = "parso" version = "0.8.4" @@ -387,6 +448,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, ] +[[package]] +name = "polars" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2b/a9/cf169ce361224d4b397f52d6fcceb191452ecdc50813ce2aa6c60ff46e04/polars-1.6.0.tar.gz", hash = "sha256:d7e8d5e577883a9755bc3be92ecbf6f20bced68267bdb8bdb440120e905cc19c", size = 3929590 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/a6/00e9c0cc08d8b279ee576dca105fb5b6c3f812f56ce6bbefdf127773641b/polars-1.6.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:6d1665c23e3574ebd47a26a5d7b619e6e73e53718c3b0bfd7d08b6a0a4ae7daa", size = 30510442 }, + { url = "https://files.pythonhosted.org/packages/95/0d/7665314925d774236404919678c197abe4818d1820387017a23f21e27815/polars-1.6.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d7f3abf085adf034720b358119c4c8e144bcc2d96010b7e7d0afa11b80da383c", size = 26758515 }, + { url = "https://files.pythonhosted.org/packages/04/1c/1a0a0a2c076bec8501ada9496afe5486c9e994558b0c80057f7e3ee6ec16/polars-1.6.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a166adb429f8ee099c9d803e7470a80c76368437a8b272c67cef9eef6d5e9da1", size = 31869680 }, + { url = "https://files.pythonhosted.org/packages/c1/95/224139dbd93ce450f194233f643f08e759f369c10c5bd62a13d615dd886c/polars-1.6.0-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:1c811b772c9476f7f0bb4445a8387d2ab6d86f5e79140b1bfba914a32788d261", size = 28441792 }, + { url = "https://files.pythonhosted.org/packages/fa/cb/8f97ea9bbe41f862cc685b1f223ee8508c60f6510918de75637b3539e62d/polars-1.6.0-cp38-abi3-win_amd64.whl", hash = "sha256:ffae15ffa80fda5cc3af44a340b565bcf7f2ab6d7854d3f967baf505710c78e2", size = 31424668 }, +] + [[package]] name = "pyarrow" version = "17.0.0" @@ -464,6 +538,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/b2/741130cbcf2bbfa852ed95a60dc311c9e232c7ed25bac3d9b8880a8df4ae/pytest-7.4.0-py3-none-any.whl", hash = "sha256:78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32", size = 323580 }, ] +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, +] + [[package]] name = "python-lsp-jsonrpc" version = "1.1.2" @@ -509,6 +595,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1d/49/37c9659f76dbf1018d88892c14184db36ce9df09ea7d760162584aee8a58/python_lsp_server-1.12.0-py3-none-any.whl", hash = "sha256:2e912c661881d85f67f2076e4e66268b695b62bf127e07e81f58b187d4bb6eda", size = 74782 }, ] +[[package]] +name = "pytz" +version = "2024.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/90/26/9f1f00a5d021fff16dee3de13d43e5e978f3d58928e129c3a62cf7eb9738/pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812", size = 316214 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9c/3d/a121f284241f08268b21359bd425f7d4825cffc5ac5cd0e1b3d82ffd2b10/pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319", size = 505474 }, +] + [[package]] name = "ruff" version = "0.6.2" @@ -549,6 +644,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4c/69/5b229615794cf021a61fb56a9f512f65a8639260c3c9b60f8e3a6bed4e4e/ruff_lsp-0.0.35-py3-none-any.whl", hash = "sha256:7e9a5581a342122dbd83b007d56e5afbf53bef857bb264bd14e1f2de1438412c", size = 13648 }, ] +[[package]] +name = "six" +version = "1.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/71/39/171f1c67cd00715f190ba0b100d606d440a28c93c7714febeca8b79af85e/six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", size = 34041 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254", size = 11053 }, +] + [[package]] name = "sniffio" version = "1.3.1" @@ -576,6 +680,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 }, ] +[[package]] +name = "tzdata" +version = "2024.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/74/5b/e025d02cb3b66b7b76093404392d4b44343c69101cc85f4d180dd5784717/tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd", size = 190559 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/65/58/f9c9e6be752e9fcb8b6a0ee9fb87e6e7a1f6bcab2cdc73f02bb7ba91ada0/tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252", size = 345370 }, +] + [[package]] name = "ujson" version = "5.10.0"