Skip to content

Commit

Permalink
ci: use duckdb instead of ibis to test interchange-only support (vega…
Browse files Browse the repository at this point in the history
…#3672)

Co-authored-by: dangotbanned <[email protected]>
  • Loading branch information
MarcoGorelli and dangotbanned authored Nov 3, 2024
1 parent c28dbb9 commit c5d3bdf
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 65 deletions.
5 changes: 3 additions & 2 deletions altair/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,9 @@ class SupportsGeoInterface(Protocol):


def is_data_type(obj: Any) -> TypeIs[DataType]:
return _is_pandas_dataframe(obj) or isinstance(
obj, (dict, DataFrameLike, SupportsGeoInterface, nw.DataFrame)
return isinstance(obj, (dict, SupportsGeoInterface)) or isinstance(
nw.from_native(obj, eager_or_interchange_only=True, strict=False),
nw.DataFrame,
)


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ all = [
dev = [
"hatch",
"ruff>=0.6.0",
"ibis-framework[polars]",
"duckdb>=1.0",
"ipython[kernel]",
"pandas>=1.1.3",
"pytest",
Expand Down
60 changes: 48 additions & 12 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@

import pkgutil
import re
import sys
from importlib.util import find_spec
from typing import TYPE_CHECKING
from pathlib import Path
from typing import TYPE_CHECKING, Any

import pytest

from tests import examples_arguments_syntax, examples_methods_syntax

if TYPE_CHECKING:
import sys
from collections.abc import Collection, Iterator, Mapping
from collections.abc import Callable, Collection, Iterator, Mapping
from re import Pattern

if sys.version_info >= (3, 11):
Expand All @@ -24,6 +25,21 @@
"pytest.MarkDecorator | Collection[pytest.MarkDecorator | pytest.Mark]"
)


def windows_has_tzdata() -> bool:
"""
From PyArrow: python/pyarrow/tests/util.py.
This is the default location where tz.cpp will look for (until we make
this configurable at run-time)
Skip test on Windows when the tz database is not configured.
See https://github.com/vega/altair/issues/3050.
"""
return (Path.home() / "Downloads" / "tzdata").exists()


slow: pytest.MarkDecorator = pytest.mark.slow()
"""
Custom ``pytest.mark`` decorator.
Expand Down Expand Up @@ -69,17 +85,37 @@
"""


skip_requires_pyarrow: pytest.MarkDecorator = pytest.mark.skipif(
find_spec("pyarrow") is None, reason="`pyarrow` not installed."
)
"""
``pytest.mark.skipif`` decorator.
def skip_requires_pyarrow(
fn: Callable[..., Any] | None = None, /, *, requires_tzdata: bool = False
) -> Callable[..., Any]:
"""
``pytest.mark.skipif`` decorator.
Applies when `pyarrow`_ import would fail.
Applies when `pyarrow`_ import would fail.
.. _pyarrow:
https://pypi.org/project/pyarrow/
"""
Additionally, we mark as expected to fail on `Windows`.
https://github.com/vega/altair/issues/3050
.. _pyarrow:
https://pypi.org/project/pyarrow/
"""
composed = pytest.mark.skipif(
find_spec("pyarrow") is None, reason="`pyarrow` not installed."
)
if requires_tzdata:
composed = pytest.mark.xfail(
sys.platform == "win32" and not windows_has_tzdata(),
reason="Timezone database is not installed on Windows",
)(composed)

def wrap(test_fn: Callable[..., Any], /) -> Callable[..., Any]:
return composed(test_fn)

if fn is None:
return wrap
else:
return wrap(fn)


def id_func_str_only(val) -> str:
Expand Down
20 changes: 1 addition & 19 deletions tests/utils/test_to_values_narwhals.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import re
import sys
from datetime import datetime
from pathlib import Path

import narwhals.stable.v1 as nw
import pandas as pd
Expand All @@ -11,23 +9,7 @@
from tests import skip_requires_pyarrow


def windows_has_tzdata():
"""
From PyArrow: python/pyarrow/tests/util.py.
This is the default location where tz.cpp will look for (until we make
this configurable at run-time)
"""
return Path.home().joinpath("Downloads", "tzdata").exists()


# Skip test on Windows when the tz database is not configured.
# See https://github.com/vega/altair/issues/3050.
@pytest.mark.skipif(
sys.platform == "win32" and not windows_has_tzdata(),
reason="Timezone database is not installed on Windows",
)
@skip_requires_pyarrow
@skip_requires_pyarrow(requires_tzdata=True)
def test_arrow_timestamp_conversion():
"""Test that arrow timestamp values are converted to ISO-8601 strings."""
import pyarrow as pa
Expand Down
4 changes: 0 additions & 4 deletions tests/utils/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import io
import json
import sys

import narwhals.stable.v1 as nw
import numpy as np
Expand Down Expand Up @@ -121,9 +120,6 @@ def test_sanitize_dataframe_arrow_columns():


@skip_requires_pyarrow
@pytest.mark.xfail(
sys.platform == "win32", reason="Timezone database is not installed on Windows"
)
def test_sanitize_pyarrow_table_columns() -> None:
import pyarrow as pa

Expand Down
75 changes: 48 additions & 27 deletions tests/vegalite/v5/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from importlib.util import find_spec
from typing import TYPE_CHECKING

import ibis
import duckdb
import jsonschema
import narwhals.stable.v1 as nw
import pandas as pd
Expand All @@ -26,7 +26,7 @@
import altair as alt
from altair.utils.core import use_signature
from altair.utils.schemapi import Optional, SchemaValidationError, Undefined
from tests import skip_requires_vl_convert, slow
from tests import skip_requires_pyarrow, skip_requires_vl_convert, slow

if TYPE_CHECKING:
from typing import Any
Expand Down Expand Up @@ -1607,51 +1607,72 @@ def test_polars_with_pandas_nor_pyarrow(monkeypatch: pytest.MonkeyPatch):
assert "numpy" not in sys.modules


@pytest.mark.skipif(
Version("1.5") > PANDAS_VERSION,
reason="A warning is thrown on old pandas versions",
)
@pytest.mark.xfail(
sys.platform == "win32", reason="Timezone database is not installed on Windows"
)
def test_ibis_with_date_32():
ibis.set_backend("polars")
df = pl.DataFrame(
@skip_requires_pyarrow(requires_tzdata=True)
def test_interchange_with_date_32():
# Test that objects which Narwhals only supports at the interchange
# level can be plotted when they contain date32 columns.
df = pl.DataFrame( # noqa: F841
{"a": [1, 2, 3], "b": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)]}
)
tbl = ibis.memtable(df)
result = alt.Chart(tbl).mark_line().encode(x="a", y="b").to_dict()
rel = duckdb.sql("select * from df")
result = alt.Chart(rel).mark_line().encode(x="a", y="b").to_dict()
assert next(iter(result["datasets"].values())) == [
{"a": 1, "b": "2020-01-01T00:00:00"},
{"a": 2, "b": "2020-01-02T00:00:00"},
{"a": 3, "b": "2020-01-03T00:00:00"},
]


@pytest.mark.skipif(
Version("1.5") > PANDAS_VERSION,
reason="A warning is thrown on old pandas versions",
)
@pytest.mark.xfail(
sys.platform == "win32", reason="Timezone database is not installed on Windows"
)
def test_ibis_with_vegafusion(monkeypatch: pytest.MonkeyPatch):
ibis.set_backend("polars")
df = pl.DataFrame(
@skip_requires_pyarrow(requires_tzdata=True)
def test_interchange_with_vegafusion(monkeypatch: pytest.MonkeyPatch):
# Test that objects which Narwhals only supports at the interchange
# level don't get converted to PyArrow unnecessarily when plotted
# with the vegafusion transformer.
# TODO: this test can be drastically simplified when some level of
# DuckDB support in VegaFusion, as it can then just be `alt.Chart(rel_df)`
# without DuckDBWithInterchangeSupport.
df = pl.DataFrame( # noqa: F841
{
"a": [1, 2, 3],
"b": [datetime(2020, 1, 1), datetime(2020, 1, 2), datetime(2020, 1, 3)],
}
)
tbl = ibis.memtable(df)
rel = duckdb.sql("select * from df")

class DuckDBWithInterchangeSupport:
"""
DuckDB doesn't (yet?) support the interchange protocol.
So, we create duckdb wrapper which defers to PyArrow's
implementation of the protocol.
"""

def __init__(self, rel: duckdb.DuckDBPyRelation) -> None:
self._rel = rel

def __dataframe__(self, allow_copy: bool = True) -> object:
return self._rel.to_arrow_table().__dataframe__()

rel_df = DuckDBWithInterchangeSupport(rel)
# "poison" `arrow_table_from_dfi_dataframe` to check that it does not get called
# if we use the vegafusion transformer
monkeypatch.setattr(
"altair.utils.data.arrow_table_from_dfi_dataframe", lambda x: 1 / 0
)
tbl = ibis.memtable(df)

# Narwhals doesn't fully support our custom DuckDBWithInterchangeSupport,
# so we need to overwrite `to_native`
def to_native(df, strict):
if isinstance(df, nw.DataFrame):
return rel_df
return df

monkeypatch.setattr("narwhals.stable.v1.to_native", to_native)

with alt.data_transformers.enable("vegafusion"):
result = alt.Chart(tbl).mark_line().encode(x="a", y="b").to_dict(format="vega")
result = (
alt.Chart(rel_df).mark_line().encode(x="a", y="b").to_dict(format="vega")
)
assert next(iter(result["data"]))["values"] == [
{"a": 1, "b": "2020-01-01T00:00:00.000"},
{"a": 2, "b": "2020-01-02T00:00:00.000"},
Expand Down

0 comments on commit c5d3bdf

Please sign in to comment.