-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
pipedag compatibility with pydiverse.transform >=0.2 #237
base: main
Are you sure you want to change the base?
Changes from all commits
10ff115
bbdf7a9
78ca5d5
a7d144a
5e7c460
dd4f4f7
f1d5512
e1af510
3529e69
3a52808
65e58e3
ea5a599
b9c2fd7
6891d94
89589aa
21bb162
ef8b038
ba0c35a
25e8862
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,7 +37,7 @@ kazoo = ">=2.8.0" | |
dask = ">=2022.1.0" | ||
|
||
[feature.prefect.dependencies] | ||
prefect = ">=2.13.5" | ||
prefect = ">=2.13.5, <3.0.0" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and could you please also add a note that/why we pinned the prefect versions? |
||
|
||
[feature.snowflake.dependencies] | ||
snowflake-sqlalchemy = ">=1.6.1" | ||
|
@@ -58,7 +58,6 @@ tidypolars = ">=0.2.19" | |
|
||
[feature.all-tests.dependencies] | ||
# Table Hooks | ||
pydiverse-transform = ">=0.1.3, <0.2" | ||
ibis-framework = ">=5.1.0, <9" | ||
ibis-mssql = ">=5.1.0, <9" | ||
ibis-postgres = ">=5.1.0, <9" | ||
|
@@ -68,15 +67,21 @@ arrow-odbc = ">=7.0.4" | |
pyodbc = ">=4.0.35" | ||
pytsql = ">=1.1.4" | ||
|
||
[feature.pdtransform-new.dependencies] | ||
pydiverse-transform = ">0.2.0" | ||
|
||
[feature.pdtransform-old.dependencies] | ||
pydiverse-transform = "<0.2" | ||
|
||
# IBM DB2 | ||
[feature.ibm-db] | ||
# ibm-db is not available on Apple Silicon | ||
# https://ibm-data-and-ai.ideas.ibm.com/ideas/DB2CON-I-92 | ||
platforms = ["linux-64", "osx-64", "win-64"] | ||
[feature.ibm-db.target.osx.pypi-dependencies] | ||
# ibm_db is not on conda-forge for macOS | ||
# https://github.com/ibmdb/db2drivers/issues/3 | ||
ibm-db = ">=3.1.4" | ||
platforms = ["linux-64", "win-64"] # TODO "osx-arm64" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ...and that/why we removed ibm-on-osx support? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @pavelzw maybe that's also something you could elaborate on. I'm wondering if we should maybe unbundle this particular change. wdyt? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The issue was that ibm-db on macos didn't have a conda-forge build back then (conda-forge/ibm_db-feedstock#77) and just got one recently in conda-forge/ibm_db-feedstock#79. In a future PR (#231) i will re-add macos support for ibm-db with the new package. This lead to us using the ibm-db build on pypi for macos (in combination with telling people to install gcc next to clang) and thus activating
this would require a change of the lockfile which will likely lead to ci breakages. if it's not high effort to fix these breakages (or introduce corresponding pinnings in pixi.toml), we could do it 🤷🏻 |
||
#[feature.ibm-db.target.osx.pypi-dependencies] | ||
## ibm_db is not on conda-forge for macOS | ||
## https://github.com/ibmdb/db2drivers/issues/3 | ||
#ibm-db = ">=3.1.4" | ||
[feature.ibm-db.target.linux.dependencies] | ||
ibm_db = ">=3.1.4" | ||
[feature.ibm-db.target.win.dependencies] | ||
|
@@ -132,8 +137,8 @@ connectorx = ">=0.3.3" | |
connectorx = ">=0.3.3" | ||
|
||
[environments] | ||
default = ["py312", "all-tests", "dev", "filelock", "zookeeper", "dask", "prefect", "pd2", "sa2", "snowflake"] | ||
all-tests = ["py312", "all-tests", "dev", "filelock", "zookeeper", "dask", "prefect", "pd2", "sa2", "snowflake"] | ||
default = ["py312", "all-tests", "pdtransform-new", "dev", "filelock", "zookeeper", "dask", "prefect", "pd2", "sa2", "snowflake"] | ||
all-tests = ["py312", "all-tests", "pdtransform-new", "dev", "filelock", "zookeeper", "dask", "prefect", "pd2", "sa2", "snowflake"] | ||
docs = ["docs"] | ||
release = { features=["release"], no-default-feature=true } | ||
py39 = ["py39", "dev", "zookeeper", "pd2", "sa2", "dask", "duckdb1"] | ||
|
@@ -143,11 +148,10 @@ py312 = ["py312", "dev", "zookeeper", "pd2", "sa2", "dask", "duckdb1"] | |
py39pdsa1 = ["py39", "dev", "zookeeper", "pd1", "sa1", "dask", "connectorx-noarm", "duckdb0"] | ||
py311pdsa1 = ["py311", "dev", "zookeeper", "pd1", "sa1", "dask", "connectorx-noarm", "duckdb0"] | ||
py312pdsa1 = ["py39", "dev", "zookeeper", "pd1", "sa1", "dask", "connectorx-noarm", "duckdb0"] | ||
py39all = ["py39", "dev", "zookeeper", "all-tests", "filelock", "dask", "prefect", "pd2", "sa2", "snowflake", "tidypolars"] | ||
py310all = ["py310", "dev", "zookeeper", "all-tests", "filelock", "dask", "prefect", "pd2", "sa2", "snowflake", "tidypolars"] | ||
py311all = ["py311", "dev", "zookeeper", "all-tests", "filelock", "dask", "prefect", "pd2", "sa2", "snowflake"] | ||
py312all = ["py312", "dev", "zookeeper", "all-tests", "filelock", "dask", "prefect", "pd2", "sa2", "snowflake"] | ||
py39pdsa1all = ["py39", "dev", "zookeeper", "pd1", "sa1", "connectorx-noarm", "all-tests", "filelock", "dask", "prefect", "snowflake", "tidypolars"] | ||
py311pdsa1all = ["py311", "dev", "zookeeper", "pd1", "sa1", "connectorx-noarm", "all-tests", "filelock", "dask", "prefect", "snowflake"] | ||
py39ibm = ["py39", "dev", "zookeeper", "pd2", "sa2", "dask", "ibm-db", "all-tests", "tidypolars"] | ||
py312ibm = ["py312", "dev", "zookeeper", "pd2", "sa2", "dask", "ibm-db", "all-tests"] | ||
py310all = ["py310", "dev", "zookeeper", "all-tests", "pdtransform-new", "filelock", "dask", "prefect", "pd2", "sa2", "snowflake", "tidypolars"] | ||
py311all = ["py311", "dev", "zookeeper", "all-tests", "pdtransform-new", "filelock", "dask", "prefect", "pd2", "sa2", "snowflake"] | ||
py312all = ["py312", "dev", "zookeeper", "all-tests", "pdtransform-new", "filelock", "dask", "prefect", "pd2", "sa2", "snowflake"] | ||
py39pdsa1all = ["py39", "dev", "zookeeper", "pd1", "sa1", "connectorx-noarm", "all-tests", "pdtransform-old", "filelock", "dask", "prefect", "snowflake", "tidypolars"] | ||
py311pdsa1all = ["py311", "dev", "zookeeper", "pd1", "sa1", "connectorx-noarm", "all-tests", "pdtransform-old", "filelock", "dask", "prefect", "snowflake"] | ||
py310ibm = ["py310", "dev", "zookeeper", "pd2", "sa2", "dask", "ibm-db", "all-tests", "pdtransform-old", "tidypolars"] | ||
py312ibm = ["py312", "dev", "zookeeper", "pd2", "sa2", "dask", "ibm-db", "all-tests", "pdtransform-old"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -190,12 +190,37 @@ def retrieve( | |
|
||
try: | ||
import pydiverse.transform as pdt | ||
|
||
try: | ||
from pydiverse.transform.eager import PandasTableImpl | ||
|
||
_ = PandasTableImpl | ||
|
||
pdt_old = pdt | ||
pdt_new = None | ||
except ImportError: | ||
try: | ||
# detect if 0.2 or >0.2 is active | ||
# this import would only work in <=0.2 | ||
from pydiverse.transform.extended import Polars | ||
|
||
# ensures a "used" state for the import, preventing black from deleting it | ||
_ = Polars | ||
|
||
pdt_old = None | ||
pdt_new = pdt | ||
except ImportError: | ||
raise NotImplementedError( | ||
"pydiverse.transform 0.2.0 isn't supported" | ||
) from None | ||
except ImportError: | ||
pdt = None | ||
pdt_old = None | ||
pdt_new = None | ||
|
||
|
||
@ParquetTableCache.register_table(pdt) | ||
class PydiverseTransformTableHook(TableHook[ParquetTableCache]): | ||
@ParquetTableCache.register_table(pdt_old) | ||
class PydiverseTransformTableHookOld(TableHook[ParquetTableCache]): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there no more indicative naming possible here instead of just Old/New? 😅 |
||
@classmethod | ||
def can_materialize(cls, type_) -> bool: | ||
return issubclass(type_, pdt.Table) | ||
|
@@ -240,3 +265,52 @@ def retrieve( | |
return pdt.Table(PandasTableImpl(table.name, df)) | ||
|
||
raise ValueError(f"Invalid type {as_type}") | ||
|
||
|
||
@ParquetTableCache.register_table(pdt_new) | ||
class PydiverseTransformTableHookNew(TableHook[ParquetTableCache]): | ||
@classmethod | ||
def can_materialize(cls, type_) -> bool: | ||
return issubclass(type_, pdt.Table) | ||
|
||
@classmethod | ||
def can_retrieve(cls, type_) -> bool: | ||
from pydiverse.transform.extended import Pandas | ||
|
||
return issubclass(type_, Pandas) | ||
|
||
@classmethod | ||
def materialize( | ||
cls, store: ParquetTableCache, table: Table[pdt.Table], stage_name: str | ||
): | ||
from pydiverse.transform.extended import ( | ||
Polars, | ||
export, | ||
) | ||
|
||
t = table.obj | ||
table = table.copy_without_obj() | ||
|
||
try: | ||
table.obj = t >> export(Polars()) | ||
hook = store.get_hook_subclass(PolarsTableHook) | ||
return hook.materialize(store, table, stage_name) | ||
except Exception as e: | ||
raise TypeError(f"Unsupported type {type(t._ast).__name__}") from e | ||
|
||
@classmethod | ||
def retrieve( | ||
cls, | ||
store: ParquetTableCache, | ||
table: Table, | ||
stage_name: str | None, | ||
as_type: type, | ||
): | ||
from pydiverse.transform.extended import Polars | ||
|
||
if isinstance(as_type, Polars): | ||
hook = store.get_hook_subclass(PandasTableHook) | ||
df = hook.retrieve(store, table, stage_name, pd.DataFrame) | ||
return pdt.Table(df) | ||
|
||
raise ValueError(f"Invalid type {as_type}") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could you please add to the PR description why we decided to drop Python 3.9 from the tests?