Merge remote-tracking branch 'upstream/main' into config-typed-dict

dangotbanned · Sep 3, 2024 · 9c94918 · 9c94918
2 parents 9e1c8c4 + db97a0b
commit 9c94918
Show file tree

Hide file tree

Showing 35 changed files with 2,275 additions and 1,847 deletions.
diff --git a/.gitignore b/.gitignore
@@ -75,3 +75,6 @@ Untitled*.ipynb
 
 # hatch, doc generation
 data.json
+
+# type stubs
+typings/
diff --git a/RELEASING.md b/RELEASING.md
@@ -4,9 +4,9 @@
 
        hatch env prune
 
-2. Make certain your branch is in sync with head:
+2. Make certain your branch is in sync with head. If you work on a fork, replace `origin` with `upstream`:
 
-       git pull upstream main
+       git pull origin main
 
 3. Do a clean doc build:
 
@@ -17,48 +17,67 @@
    Navigate to http://localhost:8000 and ensure it looks OK (particularly
    do a visual scan of the gallery thumbnails).
 
-4. Update version to, e.g. 5.0.0:
+4. Create a new release branch:
+
+       git switch -c version_5.0.0
+
+5. Update version to, e.g. 5.0.0:
 
    - in ``altair/__init__.py``
    - in ``doc/conf.py``
 
-5. Commit change and push to main:
+6. Commit changes and push:
 
        git add . -u
-       git commit -m "chore: bump version to 5.0.0"
-       git push upstream main
+       git commit -m "chore: Bump version to 5.0.0"
+       git push
+
+7. Merge release branch into main, make sure that all required checks pass
 
-6. Tag the release:
+8. Tag the release:
 
        git tag -a v5.0.0 -m "version 5.0.0 release"
-       git push upstream v5.0.0
+       git push origin v5.0.0
 
-7. Build source & wheel distributions:
+9. On main, build source & wheel distributions. If you work on a fork, replace `origin` with `upstream`:
 
+       git switch main
+       git pull origin main
        hatch clean  # clean old builds & distributions
        hatch build  # create a source distribution and universal wheel
 
-8. publish to PyPI (Requires correct PyPI owner permissions):
+10. publish to PyPI (Requires correct PyPI owner permissions):
 
         hatch publish
 
-9. build and publish docs (Requires write-access to altair-viz/altair-viz.github.io):
+11. build and publish docs (Requires write-access to altair-viz/altair-viz.github.io):
 
         hatch run doc:publish-clean-build
 
-10. update version to, e.g. 5.1.0dev:
+12. On main, tag the release. If you work on a fork, replace `origin` with `upstream`:
+
+        git tag -a v5.0.0 -m "Version 5.0.0 release"
+        git push origin v5.0.0
+
+13. Create a new branch:
+
+       git switch -c maint_5.1.0dev
+
+14. Update version and add 'dev' suffix, e.g. 5.1.0dev:
 
     - in ``altair/__init__.py``
     - in ``doc/conf.py``
 
-11. Commit change and push to main:
+15. Commit changes and push:
 
         git add . -u
-        git commit -m "chore: bump version to 5.1.0dev"
-        git push upstream main
+        git commit -m "chore: Bump version to 5.1.0dev"
+        git push
+        
+16. Merge maintenance branch into main
 
-12. Double-check that a conda-forge pull request is generated from the updated
-    pip package by the conda-forge bot (may take up to ~an hour):
+17. Double-check that a conda-forge pull request is generated from the updated
+    pip package by the conda-forge bot (may take up to several hours):
     https://github.com/conda-forge/altair-feedstock/pulls
 
-13. Publish a new release in https://github.com/vega/altair/releases/
+18. Publish a new release in https://github.com/vega/altair/releases/
diff --git a/altair/utils/__init__.py b/altair/utils/__init__.py
@@ -1,6 +1,5 @@
 from .core import (
     SHORTHAND_KEYS,
-    SchemaBase,
     display_traceback,
     infer_encoding_types,
     infer_vegalite_type_for_pandas,
@@ -13,7 +12,7 @@
 from .deprecation import AltairDeprecationWarning, deprecated, deprecated_warn
 from .html import spec_to_html
 from .plugin_registry import PluginRegistry
-from .schemapi import Optional, Undefined, is_undefined
+from .schemapi import Optional, SchemaBase, Undefined, is_undefined
 
 __all__ = (
     "SHORTHAND_KEYS",

diff --git a/altair/utils/_vegafusion_data.py b/altair/utils/_vegafusion_data.py
@@ -13,8 +13,6 @@
 )
 from weakref import WeakValueDictionary
 
-import narwhals.stable.v1 as nw
-
 from altair.utils._importers import import_vegafusion
 from altair.utils.core import DataFrameLike
 from altair.utils.data import (
@@ -71,10 +69,6 @@ def vegafusion_data_transformer(
     data: DataType | None = None, max_rows: int = 100000
 ) -> Callable[..., Any] | _VegaFusionReturnType:
     """VegaFusion Data Transformer."""
-    # Vegafusion does not support Narwhals, so if `data` is a Narwhals
-    # object, we make sure to extract the native object and let Vegafusion handle it.
-    # `strict=False` passes `data` through as-is if it is not a Narwhals object.
-    data = nw.to_native(data, strict=False)
     if data is None:
         return vegafusion_data_transformer
     elif isinstance(data, DataFrameLike) and not isinstance(data, SupportsGeoInterface):

diff --git a/altair/utils/data.py b/altair/utils/data.py
@@ -314,12 +314,7 @@ def to_values(data: DataType) -> ToValuesReturnType:
     # `strict=False` passes `data` through as-is if it is not a Narwhals object.
     data_native = nw.to_native(data, strict=False)
     if isinstance(data_native, SupportsGeoInterface):
-        if _is_pandas_dataframe(data_native):
-            data_native = sanitize_pandas_dataframe(data_native)
-        # Maybe the type could be further clarified here that it is
-        # SupportGeoInterface and then the ignore statement is not needed?
-        data_sanitized = sanitize_geo_interface(data_native.__geo_interface__)
-        return {"values": data_sanitized}
+        return {"values": _from_geo_interface(data_native)}
     elif _is_pandas_dataframe(data_native):
         data_native = sanitize_pandas_dataframe(data_native)
         return {"values": data_native.to_dict(orient="records")}
@@ -350,32 +345,45 @@ def _compute_data_hash(data_str: str) -> str:
     return hashlib.sha256(data_str.encode()).hexdigest()[:32]
 
 
+def _from_geo_interface(data: SupportsGeoInterface | Any) -> dict[str, Any]:
+    """
+    Santize a ``__geo_interface__`` w/ pre-santize step for ``pandas`` if needed.
+
+    Notes
+    -----
+    Split out to resolve typing issues related to:
+    - Intersection types
+    - ``typing.TypeGuard``
+    - ``pd.DataFrame.__getattr__``
+    """
+    if _is_pandas_dataframe(data):
+        data = sanitize_pandas_dataframe(data)
+    return sanitize_geo_interface(data.__geo_interface__)
+
+
 def _data_to_json_string(data: DataType) -> str:
     """Return a JSON string representation of the input data."""
     check_data_type(data)
-    # `strict=False` passes `data` through as-is if it is not a Narwhals object.
-    data_native = nw.to_native(data, strict=False)
-    if isinstance(data_native, SupportsGeoInterface):
-        if _is_pandas_dataframe(data_native):
-            data_native = sanitize_pandas_dataframe(data_native)
-        data_native = sanitize_geo_interface(data_native.__geo_interface__)
-        return json.dumps(data_native)
-    elif _is_pandas_dataframe(data_native):
-        data = sanitize_pandas_dataframe(data_native)
-        return data_native.to_json(orient="records", double_precision=15)
-    elif isinstance(data_native, dict):
-        if "values" not in data_native:
+    if isinstance(data, SupportsGeoInterface):
+        return json.dumps(_from_geo_interface(data))
+    elif _is_pandas_dataframe(data):
+        data = sanitize_pandas_dataframe(data)
+        return data.to_json(orient="records", double_precision=15)
+    elif isinstance(data, dict):
+        if "values" not in data:
             msg = "values expected in data dict, but not present."
             raise KeyError(msg)
-        return json.dumps(data_native["values"], sort_keys=True)
-    elif isinstance(data, nw.DataFrame):
-        return json.dumps(data.rows(named=True))
-    else:
-        msg = "to_json only works with data expressed as " "a DataFrame or as a dict"
-        raise NotImplementedError(msg)
+        return json.dumps(data["values"], sort_keys=True)
+    try:
+        data_nw = nw.from_native(data, eager_only=True)
+    except TypeError as exc:
+        msg = "to_json only works with data expressed as a DataFrame or as a dict"
+        raise NotImplementedError(msg) from exc
+    data_nw = sanitize_narwhals_dataframe(data_nw)
+    return json.dumps(data_nw.rows(named=True))
 
 
-def _data_to_csv_string(data: dict | pd.DataFrame | DataFrameLike) -> str:
+def _data_to_csv_string(data: DataType) -> str:
     """Return a CSV string representation of the input data."""
     check_data_type(data)
     if isinstance(data, SupportsGeoInterface):
@@ -398,18 +406,12 @@ def _data_to_csv_string(data: dict | pd.DataFrame | DataFrameLike) -> str:
             msg = "pandas is required to convert a dict to a CSV string"
             raise ImportError(msg) from exc
         return pd.DataFrame.from_dict(data["values"]).to_csv(index=False)
-    elif isinstance(data, DataFrameLike):
-        # experimental interchange dataframe support
-        import pyarrow as pa
-        import pyarrow.csv as pa_csv
-
-        pa_table = arrow_table_from_dfi_dataframe(data)
-        csv_buffer = pa.BufferOutputStream()
-        pa_csv.write_csv(pa_table, csv_buffer)
-        return csv_buffer.getvalue().to_pybytes().decode()
-    else:
-        msg = "to_csv only works with data expressed as " "a DataFrame or as a dict"
-        raise NotImplementedError(msg)
+    try:
+        data_nw = nw.from_native(data, eager_only=True)
+    except TypeError as exc:
+        msg = "to_csv only works with data expressed as a DataFrame or as a dict"
+        raise NotImplementedError(msg) from exc
+    return data_nw.write_csv()
 
 
 def arrow_table_from_dfi_dataframe(dfi_df: DataFrameLike) -> pa.Table:

diff --git a/altair/utils/execeval.py b/altair/utils/execeval.py
@@ -1,33 +1,70 @@
+from __future__ import annotations
+
 import ast
 import sys
+from typing import TYPE_CHECKING, Any, Callable, Literal, overload
+
+if TYPE_CHECKING:
+    from os import PathLike
+
+    from _typeshed import ReadableBuffer
+
+    if sys.version_info >= (3, 11):
+        from typing import Self
+    else:
+        from typing_extensions import Self
 
 
 class _CatchDisplay:
     """Class to temporarily catch sys.displayhook."""
 
-    def __init__(self):
-        self.output = None
+    def __init__(self) -> None:
+        self.output: Any | None = None
 
-    def __enter__(self):
-        self.old_hook = sys.displayhook
+    def __enter__(self) -> Self:
+        self.old_hook: Callable[[object], Any] = sys.displayhook
         sys.displayhook = self
         return self
 
-    def __exit__(self, type, value, traceback):
+    def __exit__(self, type, value, traceback) -> Literal[False]:
         sys.displayhook = self.old_hook
         # Returning False will cause exceptions to propagate
         return False
 
-    def __call__(self, output):
+    def __call__(self, output: Any) -> None:
         self.output = output
 
 
-def eval_block(code, namespace=None, filename="<string>"):
+@overload
+def eval_block(
+    code: str | Any,
+    namespace: dict[str, Any] | None = ...,
+    filename: str | ReadableBuffer | PathLike[Any] = ...,
+    *,
+    strict: Literal[False] = ...,
+) -> Any | None: ...
+@overload
+def eval_block(
+    code: str | Any,
+    namespace: dict[str, Any] | None = ...,
+    filename: str | ReadableBuffer | PathLike[Any] = ...,
+    *,
+    strict: Literal[True] = ...,
+) -> Any: ...
+def eval_block(
+    code: str | Any,
+    namespace: dict[str, Any] | None = None,
+    filename: str | ReadableBuffer | PathLike[Any] = "<string>",
+    *,
+    strict: bool = False,
+) -> Any | None:
     """
     Execute a multi-line block of code in the given namespace.
 
     If the final statement in the code is an expression, return
     the result of the expression.
+
+    If ``strict``, raise a ``TypeError`` when the return value would be ``None``.
     """
     tree = ast.parse(code, filename="<ast>", mode="exec")
     if namespace is None:
@@ -50,4 +87,12 @@ def eval_block(code, namespace=None, filename="<string>"):
             )
             exec(compiled, namespace)
 
-    return catch_display.output
+    if strict:
+        output = catch_display.output
+        if output is None:
+            msg = f"Expected a non-None value but got {output!r}"
+            raise TypeError(msg)
+        else:
+            return output
+    else:
+        return catch_display.output