From 82f717b45ecb3c580cd788a41d717dd2f2ae539a Mon Sep 17 00:00:00 2001 From: Lava <34743145+CanglongCl@users.noreply.github.com> Date: Mon, 1 Apr 2024 05:23:00 -0700 Subject: [PATCH] feat(python): Add `read_clipboard` and `DataFrame.write_clipboard` (#15272) --- Cargo.lock | 103 +++++++++++++++++++++++++ Cargo.toml | 1 + py-polars/Cargo.toml | 3 + py-polars/docs/source/reference/io.rst | 8 ++ py-polars/polars/__init__.py | 2 + py-polars/polars/dataframe/frame.py | 22 ++++++ py-polars/polars/io/__init__.py | 2 + py-polars/polars/io/clipboard.py | 36 +++++++++ py-polars/src/functions/io.rs | 24 ++++++ py-polars/src/lib.rs | 6 ++ 10 files changed, 207 insertions(+) create mode 100644 py-polars/polars/io/clipboard.py diff --git a/Cargo.lock b/Cargo.lock index 93c1a6c43eaa..8be56fa3464f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -125,6 +125,22 @@ dependencies = [ "uuid", ] +[[package]] +name = "arboard" +version = "3.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2041f1943049c7978768d84e6d0fd95de98b76d6c4727b09e78ec253d29fa58" +dependencies = [ + "clipboard-win", + "log", + "objc", + "objc-foundation", + "objc_id", + "parking_lot", + "thiserror", + "x11rb", +] + [[package]] name = "argminmax" version = "0.6.2" @@ -712,6 +728,12 @@ dependencies = [ "serde", ] +[[package]] +name = "block" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" + [[package]] name = "block-buffer" version = "0.10.4" @@ -925,6 +947,15 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" +[[package]] +name = "clipboard-win" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d517d4b86184dbb111d3556a10f1c8a04da7428d2987bf1081602bf11c3aa9ee" +dependencies = [ + "error-code", +] + [[package]] name = "cmake" version = "0.1.50" @@ -1311,6 +1342,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "error-code" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0474425d51df81997e2f90a21591180b38eccf27292d755f3e30750225c175b" + [[package]] name = "ethnum" version = "1.5.0" @@ -1485,6 +1522,16 @@ dependencies = [ "version_check", ] +[[package]] +name = "gethostname" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0176e0459c2e4a1fe232f984bca6890e681076abb9934f6cea7c326f3fc47818" +dependencies = [ + "libc", + "windows-targets 0.48.5", +] + [[package]] name = "getrandom" version = "0.2.12" @@ -2110,6 +2157,15 @@ dependencies = [ "libc", ] +[[package]] +name = "malloc_buf" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" +dependencies = [ + "libc", +] + [[package]] name = "matrixmultiply" version = "0.3.8" @@ -2349,6 +2405,35 @@ dependencies = [ "rustc-hash", ] +[[package]] +name = "objc" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" +dependencies = [ + "malloc_buf", +] + +[[package]] +name = "objc-foundation" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1add1b659e36c9607c7aab864a76c7a4c2760cd0cd2e120f3fb8b952c7e22bf9" +dependencies = [ + "block", + "objc", + "objc_id", +] + +[[package]] +name = "objc_id" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c92d4ddb4bd7b50d730c215ff871754d0da6b2178849f8a2a2ab69712d0c073b" +dependencies = [ + "objc", +] + [[package]] name = "object" version = "0.32.2" @@ -3108,6 +3193,7 @@ name = "py-polars" version = "0.20.17" dependencies = [ "ahash", + "arboard", "built", "ciborium", "either", @@ -4788,6 +4874,23 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "x11rb" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8f25ead8c7e4cba123243a6367da5d3990e0d3affa708ea19dce96356bd9f1a" +dependencies = [ + "gethostname", + "rustix", + "x11rb-protocol", +] + +[[package]] +name = "x11rb-protocol" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e63e71c4b8bd9ffec2c963173a4dc4cbde9ee96961d4fcb4429db9929b606c34" + [[package]] name = "xmlparser" version = "0.13.6" diff --git a/Cargo.toml b/Cargo.toml index 7d75530dc2e1..ef4b780ce9bc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -85,6 +85,7 @@ version_check = "0.9.4" xxhash-rust = { version = "0.8.6", features = ["xxh3"] } zstd = "0.13" uuid = { version = "1.7.0", features = ["v4"] } +arboard = { version = "3.3.2", default-features = false } polars = { version = "0.38.3", path = "crates/polars", default-features = false } polars-compute = { version = "0.38.3", path = "crates/polars-compute", default-features = false } diff --git a/py-polars/Cargo.toml b/py-polars/Cargo.toml index a48000225eab..85bcbf976fdd 100644 --- a/py-polars/Cargo.toml +++ b/py-polars/Cargo.toml @@ -17,6 +17,7 @@ polars-plan = { workspace = true } polars-utils = { workspace = true } ahash = { workspace = true } +arboard = { workspace = true, optional = true } ciborium = { workspace = true } either = { workspace = true } itoa = { workspace = true } @@ -126,6 +127,7 @@ search_sorted = ["polars/search_sorted"] decompress = ["polars/decompress-fast"] regex = ["polars/regex"] csv = ["polars/csv"] +clipboard = ["arboard"] object = ["polars/object"] extract_jsonpath = ["polars/extract_jsonpath"] pivot = ["polars/pivot"] @@ -204,6 +206,7 @@ io = [ "avro", "csv", "cloud", + "clipboard", ] optimizations = [ diff --git a/py-polars/docs/source/reference/io.rst b/py-polars/docs/source/reference/io.rst index d3c45469f94a..1f088958a3c0 100644 --- a/py-polars/docs/source/reference/io.rst +++ b/py-polars/docs/source/reference/io.rst @@ -11,6 +11,14 @@ Avro read_avro DataFrame.write_avro +Clipboard +~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + read_clipboard + DataFrame.write_clipboard + CSV ~~~ .. autosummary:: diff --git a/py-polars/polars/__init__.py b/py-polars/polars/__init__.py index 4691a6f13cc2..492c3e437f63 100644 --- a/py-polars/polars/__init__.py +++ b/py-polars/polars/__init__.py @@ -180,6 +180,7 @@ ) from polars.io import ( read_avro, + read_clipboard, read_csv, read_csv_batched, read_database, @@ -316,6 +317,7 @@ "scan_ndjson", "scan_parquet", "scan_pyarrow_dataset", + "read_clipboard", # polars.stringcache "StringCache", "disable_string_cache", diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 2d0e9cde550e..a34c4aae5b84 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -118,6 +118,7 @@ with contextlib.suppress(ImportError): # Module not available when building docs from polars.polars import PyDataFrame from polars.polars import dtype_str_repr as _dtype_str_repr + from polars.polars import write_clipboard_string as _write_clipboard_string if TYPE_CHECKING: import sys @@ -2595,6 +2596,27 @@ def write_csv( return None + def write_clipboard(self, *, separator: str = "\t", **kwargs: Any) -> None: + """ + Copy `DataFrame` in csv format to the system clipboard with `write_csv`. + + Useful for pasting into Excel or other similar spreadsheet software. + + Parameters + ---------- + separator + Separate CSV fields with this symbol. + kwargs + Additional arguments to pass to `write_csv`. + + See Also + -------- + polars.read_clipboard: Read a DataFrame from the clipboard. + write_csv: Write to comma-separated values (CSV) file. + """ + result: str = self.write_csv(file=None, separator=separator, **kwargs) + _write_clipboard_string(result) + def write_avro( self, file: str | Path | IO[bytes], diff --git a/py-polars/polars/io/__init__.py b/py-polars/polars/io/__init__.py index 395f15bd4c94..35f61f1fb596 100644 --- a/py-polars/polars/io/__init__.py +++ b/py-polars/polars/io/__init__.py @@ -1,6 +1,7 @@ """Functions for reading data.""" from polars.io.avro import read_avro +from polars.io.clipboard import read_clipboard from polars.io.csv import read_csv, read_csv_batched, scan_csv from polars.io.database import read_database, read_database_uri from polars.io.delta import read_delta, scan_delta @@ -35,4 +36,5 @@ "scan_ndjson", "scan_parquet", "scan_pyarrow_dataset", + "read_clipboard", ] diff --git a/py-polars/polars/io/clipboard.py b/py-polars/polars/io/clipboard.py new file mode 100644 index 000000000000..aa441ded0429 --- /dev/null +++ b/py-polars/polars/io/clipboard.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import contextlib +from io import StringIO +from typing import TYPE_CHECKING, Any + +from polars.io.csv.functions import read_csv + +with contextlib.suppress(ImportError): + from polars.polars import read_clipboard_string as _read_clipboard_string + +if TYPE_CHECKING: + from polars import DataFrame + + +def read_clipboard(separator: str = "\t", **kwargs: Any) -> DataFrame: + """ + Read text from clipboard and pass to `read_csv`. + + Useful for reading data copied from Excel or other similar spreadsheet software. + + Parameters + ---------- + separator + Single byte character to use as separator parsing csv from clipboard. + kwargs + Additional arguments passed to `read_csv`. + + See Also + -------- + read_csv : Read a csv file into a DataFrame. + DataFrame.write_clipboard : Write a DataFrame to the clipboard. + """ + csv_string: str = _read_clipboard_string() + io_string = StringIO(csv_string) + return read_csv(source=io_string, separator=separator, **kwargs) diff --git a/py-polars/src/functions/io.rs b/py-polars/src/functions/io.rs index 4f79dc46f873..212d16b19210 100644 --- a/py-polars/src/functions/io.rs +++ b/py-polars/src/functions/io.rs @@ -56,3 +56,27 @@ fn fields_to_pydict(fields: &Vec, dict: &PyDict, py: Python) -> PyResult< } Ok(()) } + +#[cfg(feature = "clipboard")] +#[pyfunction] +pub fn read_clipboard_string() -> PyResult { + use arboard; + let mut clipboard = + arboard::Clipboard::new().map_err(|e| PyPolarsErr::Other(format!("{e}")))?; + let result = clipboard + .get_text() + .map_err(|e| PyPolarsErr::Other(format!("{e}")))?; + Ok(result) +} + +#[cfg(feature = "clipboard")] +#[pyfunction] +pub fn write_clipboard_string(s: &str) -> PyResult<()> { + use arboard; + let mut clipboard = + arboard::Clipboard::new().map_err(|e| PyPolarsErr::Other(format!("{e}")))?; + clipboard + .set_text(s) + .map_err(|e| PyPolarsErr::Other(format!("{e}")))?; + Ok(()) +} diff --git a/py-polars/src/lib.rs b/py-polars/src/lib.rs index e1c978f595d7..787e747a8b56 100644 --- a/py-polars/src/lib.rs +++ b/py-polars/src/lib.rs @@ -217,6 +217,12 @@ fn polars(py: Python, m: &PyModule) -> PyResult<()> { #[cfg(feature = "parquet")] m.add_wrapped(wrap_pyfunction!(functions::read_parquet_schema)) .unwrap(); + #[cfg(feature = "clipboard")] + m.add_wrapped(wrap_pyfunction!(functions::read_clipboard_string)) + .unwrap(); + #[cfg(feature = "clipboard")] + m.add_wrapped(wrap_pyfunction!(functions::write_clipboard_string)) + .unwrap(); // Functions - meta m.add_wrapped(wrap_pyfunction!(functions::get_index_type))