diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index f810028676e1..dadd25a25821 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -98,6 +98,7 @@ TooManyRowsReturnedError, ) from polars.functions import col, lit +from polars.polars import PyDataFrame from polars.selectors import _expand_selector_dicts, _expand_selectors from polars.type_aliases import DbWriteMode, JaxExportType, TorchExportType @@ -121,7 +122,6 @@ from polars import DataType, Expr, LazyFrame, Series from polars.interchange.dataframe import PolarsDataFrame from polars.ml.torch import PolarsDataset - from polars.polars import PyDataFrame from polars.type_aliases import ( AsofJoinStrategy, AvroCompression, @@ -418,6 +418,46 @@ def __init__( ) raise TypeError(msg) + @classmethod + def deserialize(cls, source: str | Path | IOBase) -> Self: + """ + Read a serialized DataFrame from a file. + + Parameters + ---------- + source + Path to a file or a file-like object (by file-like object, we refer to + objects that have a `read()` method, such as a file handler (e.g. + via builtin `open` function) or `BytesIO`). + + See Also + -------- + DataFrame.serialize + + Examples + -------- + >>> import io + >>> df = pl.DataFrame({"a": [1, 2, 3], "b": [4.0, 5.0, 6.0]}) + >>> json = df.serialize() + >>> pl.DataFrame.deserialize(io.StringIO(json)) + shape: (3, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ f64 │ + ╞═════╪═════╡ + │ 1 ┆ 4.0 │ + │ 2 ┆ 5.0 │ + │ 3 ┆ 6.0 │ + └─────┴─────┘ + """ + if isinstance(source, StringIO): + source = BytesIO(source.getvalue().encode()) + elif isinstance(source, (str, Path)): + source = normalize_filepath(source) + + return cls._from_pydf(PyDataFrame.deserialize(source)) + @classmethod def _from_pydf(cls, py_df: PyDataFrame) -> Self: """Construct Polars DataFrame from FFI PyDataFrame object.""" diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 9fabcda950cd..88f7942e9c69 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -340,7 +340,7 @@ def function(s: Series) -> Series: # pragma: no cover @classmethod def deserialize(cls, source: str | Path | IOBase) -> Self: """ - Read an expression from a JSON file. + Read a serialized expression from a file. Parameters ---------- @@ -351,10 +351,9 @@ def deserialize(cls, source: str | Path | IOBase) -> Self: Warnings -------- - This function uses :mod:`pickle` under some circumstances, and as - such inherits the security implications. Deserializing can execute - arbitrary code so it should only be attempted on trusted data. - pickle is only used when the logical plan contains python UDFs. + This function uses :mod:`pickle` when the logical plan contains Python UDFs, + and as such inherits the security implications. Deserializing can execute + arbitrary code, so it should only be attempted on trusted data. See Also -------- diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 58e743b173c9..aa2a3025e924 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -348,7 +348,7 @@ def _scan_python_function( @classmethod def deserialize(cls, source: str | Path | IOBase) -> Self: """ - Read a logical plan from a JSON file to construct a LazyFrame. + Read a logical plan from a file to construct a LazyFrame. Parameters ---------- @@ -359,11 +359,9 @@ def deserialize(cls, source: str | Path | IOBase) -> Self: Warnings -------- - This function uses :mod:`pickle` under some circumstances, and as - such inherits the security implications. Deserializing can execute - arbitrary code so it should only be attempted on trusted data. - pickle is only used when the logical plan contains python UDFs. - + This function uses :mod:`pickle` when the logical plan contains Python UDFs, + and as such inherits the security implications. Deserializing can execute + arbitrary code, so it should only be attempted on trusted data. See Also -------- diff --git a/py-polars/src/dataframe/io.rs b/py-polars/src/dataframe/io.rs index 8f2d8a21e545..151e80435c80 100644 --- a/py-polars/src/dataframe/io.rs +++ b/py-polars/src/dataframe/io.rs @@ -183,6 +183,27 @@ impl PyDataFrame { Ok(PyDataFrame::new(df)) } + #[staticmethod] + #[cfg(feature = "json")] + pub fn deserialize(py: Python, mut py_f: Bound) -> PyResult { + use crate::file::read_if_bytesio; + py_f = read_if_bytesio(py_f); + let mmap_bytes_r = get_mmap_bytes_reader(&py_f)?; + + py.allow_threads(move || { + let mmap_read: ReaderBytes = (&mmap_bytes_r).into(); + let bytes = mmap_read.deref(); + match serde_json::from_slice::(bytes) { + Ok(df) => Ok(df.into()), + Err(e) => { + let msg = format!("{e}"); + let e = PyPolarsErr::from(PolarsError::ComputeError(msg.into())); + Err(PyErr::from(e)) + }, + } + }) + } + #[staticmethod] #[cfg(feature = "json")] pub fn read_json(