Skip to content

Commit

Permalink
chore(python): Add unstable warning to hive_schema functionality (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Apr 6, 2024
1 parent 93b194e commit ac1b088
Showing 1 changed file with 17 additions and 0 deletions.
17 changes: 17 additions & 0 deletions py-polars/polars/io/parquet/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import polars._reexport as pl
from polars._utils.deprecation import deprecate_renamed_parameter
from polars._utils.unstable import issue_unstable_warning
from polars._utils.various import is_int_sequence, normalize_filepath
from polars.convert import from_arrow
from polars.dependencies import _PYARROW_AVAILABLE
Expand Down Expand Up @@ -75,6 +76,10 @@ def read_parquet(
hive_schema
The column names and data types of the columns by which the data is partitioned.
If set to `None` (default), the schema of the Hive partitions is inferred.
.. warning::
This functionality is considered **unstable**. It may be changed
at any point without it being considered a breaking change.
rechunk
Make sure that all columns are contiguous in memory by
aggregating the chunks into a single array.
Expand Down Expand Up @@ -123,6 +128,10 @@ def read_parquet(
benchmarking the parquet-reader as `rechunk` can be an expensive operation
that should not contribute to the timings.
"""
if hive_schema is not None:
msg = "The `hive_schema` parameter of `read_parquet` is considered unstable."
issue_unstable_warning(msg)

# Dispatch to pyarrow if requested
if use_pyarrow:
if not _PYARROW_AVAILABLE:
Expand Down Expand Up @@ -269,6 +278,10 @@ def scan_parquet(
hive_schema
The column names and data types of the columns by which the data is partitioned.
If set to `None` (default), the schema of the Hive partitions is inferred.
.. warning::
This functionality is considered **unstable**. It may be changed
at any point without it being considered a breaking change.
rechunk
In case of reading multiple files via a glob pattern rechunk the final DataFrame
into contiguous memory chunks.
Expand Down Expand Up @@ -315,6 +328,10 @@ def scan_parquet(
... }
>>> pl.scan_parquet(source, storage_options=storage_options) # doctest: +SKIP
"""
if hive_schema is not None:
msg = "The `hive_schema` parameter of `scan_parquet` is considered unstable."
issue_unstable_warning(msg)

if isinstance(source, (str, Path)):
source = normalize_filepath(source)
else:
Expand Down

0 comments on commit ac1b088

Please sign in to comment.