Skip to content

Commit 1aaede5

Browse files
committed
add PyBackedStr and PyBackedBytes
1 parent 94b7d7e commit 1aaede5

File tree

2 files changed

+122
-0
lines changed

2 files changed

+122
-0
lines changed

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ pub mod marshal;
426426
pub mod sync;
427427
pub mod panic;
428428
pub mod prelude;
429+
pub mod py_backed;
429430
pub mod pycell;
430431
pub mod pyclass;
431432
pub mod pyclass_init;

src/py_backed.rs

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
//! This module provides some wrappers around `str` and `[u8]` where the storage is owned by a Python `str` or `bytes` object.
2+
//!
3+
//! This can help avoid copying text or byte data sourced from Python.
4+
5+
use std::ops::Deref;
6+
7+
use crate::{
8+
types::{
9+
any::PyAnyMethods, bytearray::PyByteArrayMethods, bytes::PyBytesMethods,
10+
string::PyStringMethods, PyByteArray, PyBytes, PyString,
11+
},
12+
Bound, DowncastError, FromPyObject, Py, PyAny, PyResult,
13+
};
14+
15+
/// A wrapper around `str` where the storage is owned by a Python `bytes` or `str` object.
16+
///
17+
/// This type gives access to the underlying data via a `Deref` implementation.
18+
pub struct PyBackedStr {
19+
#[allow(dead_code)]
20+
storage: PyBackedStrStorage,
21+
data: *const u8,
22+
length: usize,
23+
}
24+
25+
#[allow(dead_code)]
26+
enum PyBackedStrStorage {
27+
String(Py<PyString>),
28+
Bytes(Py<PyBytes>),
29+
}
30+
31+
impl Deref for PyBackedStr {
32+
type Target = str;
33+
fn deref(&self) -> &str {
34+
unsafe {
35+
// Safety: `data` is a pointer to the start of a valid UTF-8 string of length `length`.
36+
std::str::from_utf8_unchecked(std::slice::from_raw_parts(self.data, self.length))
37+
}
38+
}
39+
}
40+
41+
impl FromPyObject<'_> for PyBackedStr {
42+
fn extract_bound(obj: &Bound<'_, PyAny>) -> PyResult<Self> {
43+
let py_string = obj.downcast::<PyString>()?;
44+
#[cfg(any(Py_3_10, not(Py_LIMITED_API)))]
45+
{
46+
let s = py_string.to_str()?;
47+
let data = s.as_ptr();
48+
let length = s.len();
49+
Ok(Self {
50+
storage: PyBackedStrStorage::String(py_string.to_owned().unbind()),
51+
data,
52+
length,
53+
})
54+
}
55+
#[cfg(not(any(Py_3_10, not(Py_LIMITED_API))))]
56+
{
57+
let bytes = py_string.encode_utf8()?;
58+
let b = bytes.as_bytes();
59+
let data = b.as_ptr();
60+
let length = b.len();
61+
Ok(Self {
62+
storage: PyBackedStrStorage::Bytes(bytes.unbind()),
63+
data,
64+
length,
65+
})
66+
}
67+
}
68+
}
69+
70+
/// A wrapper around `[u8]` where the storage is either owned by a Python `bytes` object, or a Rust `Vec<u8>`.
71+
///
72+
/// This type gives access to the underlying data via a `Deref` implementation.
73+
pub struct PyBackedBytes {
74+
#[allow(dead_code)] // only held so that the storage is not dropped
75+
storage: PyBackedBytesStorage,
76+
data: *const u8,
77+
length: usize,
78+
}
79+
80+
enum PyBackedBytesStorage {
81+
Python(Py<PyBytes>),
82+
Rust(Vec<u8>),
83+
}
84+
85+
impl Deref for PyBackedBytes {
86+
type Target = [u8];
87+
fn deref(&self) -> &[u8] {
88+
unsafe {
89+
// Safety: `data` is a pointer to the start of a buffer of length `length`.
90+
std::slice::from_raw_parts(self.data, self.length)
91+
}
92+
}
93+
}
94+
95+
impl FromPyObject<'_> for PyBackedBytes {
96+
fn extract_bound(obj: &Bound<'_, PyAny>) -> PyResult<Self> {
97+
if let Ok(bytes) = obj.downcast::<PyBytes>() {
98+
let b = bytes.as_bytes();
99+
let data = b.as_ptr();
100+
let len = b.len();
101+
return Ok(Self {
102+
storage: PyBackedBytesStorage::Python(bytes.to_owned().unbind()),
103+
data,
104+
length: len,
105+
});
106+
}
107+
108+
if let Ok(bytearray) = obj.downcast::<PyByteArray>() {
109+
let s = bytearray.to_vec();
110+
let data = s.as_ptr();
111+
let len = s.len();
112+
return Ok(Self {
113+
storage: PyBackedBytesStorage::Rust(s),
114+
data,
115+
length: len,
116+
});
117+
}
118+
119+
return Err(DowncastError::new(obj, "`bytes` or `bytearray`").into());
120+
}
121+
}

0 commit comments

Comments
 (0)