Skip to content

add PyBackedStr and PyBackedBytes #3802

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions newsfragments/3802.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add `PyBackedStr` and `PyBackedBytes`, as alternatives to `&str` and `&bytes` where a Python object owns the data.
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,7 @@ pub mod marshal;
pub mod sync;
pub mod panic;
pub mod prelude;
pub mod pybacked;
pub mod pycell;
pub mod pyclass;
pub mod pyclass_init;
Expand Down
186 changes: 186 additions & 0 deletions src/pybacked.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
//! Contains types for working with Python objects that own the underlying data.

use std::{ops::Deref, ptr::NonNull};

use crate::{
types::{
any::PyAnyMethods, bytearray::PyByteArrayMethods, bytes::PyBytesMethods,
string::PyStringMethods, PyByteArray, PyBytes, PyString,
},
Bound, DowncastError, FromPyObject, Py, PyAny, PyErr, PyResult,
};

/// A wrapper around `str` where the storage is owned by a Python `bytes` or `str` object.
///
/// This type gives access to the underlying data via a `Deref` implementation.
pub struct PyBackedStr {
#[allow(dead_code)] // only held so that the storage is not dropped
storage: Py<PyAny>,
data: NonNull<[u8]>,
}

impl Deref for PyBackedStr {
type Target = str;
fn deref(&self) -> &str {
// Safety: `data` is known to be immutable utf8 string and owned by self
unsafe { std::str::from_utf8_unchecked(self.data.as_ref()) }
}
}

impl TryFrom<Bound<'_, PyString>> for PyBackedStr {
type Error = PyErr;
fn try_from(py_string: Bound<'_, PyString>) -> Result<Self, Self::Error> {
#[cfg(any(Py_3_10, not(Py_LIMITED_API)))]
{
let s = py_string.to_str()?;
let data = NonNull::from(s.as_bytes());
Ok(Self {
storage: py_string.as_any().to_owned().unbind(),
data,
})
}
#[cfg(not(any(Py_3_10, not(Py_LIMITED_API))))]
{
let bytes = py_string.encode_utf8()?;
let b = bytes.as_bytes();
let data = NonNull::from(b);
Ok(Self {
storage: bytes.into_any().unbind(),
data,
})
}
}
}

impl FromPyObject<'_> for PyBackedStr {
fn extract_bound(obj: &Bound<'_, PyAny>) -> PyResult<Self> {
let py_string = obj.downcast::<PyString>()?.to_owned();
Self::try_from(py_string)
}
}

/// A wrapper around `[u8]` where the storage is either owned by a Python `bytes` object, or a Rust `Box<[u8]>`.
///
/// This type gives access to the underlying data via a `Deref` implementation.
pub struct PyBackedBytes {
#[allow(dead_code)] // only held so that the storage is not dropped
storage: PyBackedBytesStorage,
data: NonNull<[u8]>,
}

#[allow(dead_code)]
enum PyBackedBytesStorage {
Python(Py<PyBytes>),
Rust(Box<[u8]>),
}

impl Deref for PyBackedBytes {
type Target = [u8];
fn deref(&self) -> &[u8] {
// Safety: `data` is known to be immutable and owned by self
unsafe { self.data.as_ref() }
}
}

impl From<Bound<'_, PyBytes>> for PyBackedBytes {
fn from(py_bytes: Bound<'_, PyBytes>) -> Self {
let b = py_bytes.as_bytes();
let data = NonNull::from(b);
Self {
storage: PyBackedBytesStorage::Python(py_bytes.to_owned().unbind()),
data,
}
}
}

impl From<Bound<'_, PyByteArray>> for PyBackedBytes {
fn from(py_bytearray: Bound<'_, PyByteArray>) -> Self {
let s = py_bytearray.to_vec().into_boxed_slice();
let data = NonNull::from(s.as_ref());
Self {
storage: PyBackedBytesStorage::Rust(s),
data,
}
}
}

impl FromPyObject<'_> for PyBackedBytes {
fn extract_bound(obj: &Bound<'_, PyAny>) -> PyResult<Self> {
if let Ok(bytes) = obj.downcast::<PyBytes>() {
Ok(Self::from(bytes.to_owned()))
} else if let Ok(bytearray) = obj.downcast::<PyByteArray>() {
Ok(Self::from(bytearray.to_owned()))
} else {
Err(DowncastError::new(obj, "`bytes` or `bytearray`").into())
}
}
}

#[cfg(test)]
mod test {
use super::*;
use crate::Python;

#[test]
fn py_backed_str_empty() {
Python::with_gil(|py| {
let s = PyString::new_bound(py, "");
let py_backed_str = s.extract::<PyBackedStr>().unwrap();
assert_eq!(&*py_backed_str, "");
});
}

#[test]
fn py_backed_str() {
Python::with_gil(|py| {
let s = PyString::new_bound(py, "hello");
let py_backed_str = s.extract::<PyBackedStr>().unwrap();
assert_eq!(&*py_backed_str, "hello");
});
}

#[test]
fn py_backed_str_try_from() {
Python::with_gil(|py| {
let s = PyString::new_bound(py, "hello");
let py_backed_str = PyBackedStr::try_from(s).unwrap();
assert_eq!(&*py_backed_str, "hello");
});
}

#[test]
fn py_backed_bytes_empty() {
Python::with_gil(|py| {
let b = PyBytes::new_bound(py, &[]);
let py_backed_bytes = b.extract::<PyBackedBytes>().unwrap();
assert_eq!(&*py_backed_bytes, &[]);
});
}

#[test]
fn py_backed_bytes() {
Python::with_gil(|py| {
let b = PyBytes::new_bound(py, b"abcde");
let py_backed_bytes = b.extract::<PyBackedBytes>().unwrap();
assert_eq!(&*py_backed_bytes, b"abcde");
});
}

#[test]
fn py_backed_bytes_from_bytes() {
Python::with_gil(|py| {
let b = PyBytes::new_bound(py, b"abcde");
let py_backed_bytes = PyBackedBytes::from(b);
assert_eq!(&*py_backed_bytes, b"abcde");
});
}

#[test]
fn py_backed_bytes_from_bytearray() {
Python::with_gil(|py| {
let b = PyByteArray::new_bound(py, b"abcde");
let py_backed_bytes = PyBackedBytes::from(b);
assert_eq!(&*py_backed_bytes, b"abcde");
});
}
}