@@ -305,6 +305,9 @@ pub trait PyStringMethods<'py> {
305
305
/// replaced with `U+FFFD REPLACEMENT CHARACTER`.
306
306
fn to_string_lossy ( & self ) -> Cow < ' _ , str > ;
307
307
308
+ /// Encodes this string as a Python `bytes` object, using UTF-8 encoding.
309
+ fn encode_utf8 ( & self ) -> PyResult < Bound < ' py , PyBytes > > ;
310
+
308
311
/// Obtains the raw data backing the Python string.
309
312
///
310
313
/// If the Python string object was created through legacy APIs, its internal storage format
@@ -337,6 +340,14 @@ impl<'py> PyStringMethods<'py> for Bound<'py, PyString> {
337
340
self . as_borrowed ( ) . to_string_lossy ( )
338
341
}
339
342
343
+ fn encode_utf8 ( & self ) -> PyResult < Bound < ' py , PyBytes > > {
344
+ unsafe {
345
+ ffi:: PyUnicode_AsUTF8String ( self . as_ptr ( ) )
346
+ . assume_owned_or_err ( self . py ( ) )
347
+ . downcast_into_unchecked :: < PyBytes > ( )
348
+ }
349
+ }
350
+
340
351
#[ cfg( not( Py_LIMITED_API ) ) ]
341
352
unsafe fn data ( & self ) -> PyResult < PyStringData < ' _ > > {
342
353
self . as_borrowed ( ) . data ( )
@@ -371,11 +382,7 @@ impl<'a> Borrowed<'a, '_, PyString> {
371
382
372
383
#[ cfg( not( any( Py_3_10 , not( Py_LIMITED_API ) ) ) ) ]
373
384
{
374
- let bytes = unsafe {
375
- ffi:: PyUnicode_AsUTF8String ( self . as_ptr ( ) )
376
- . assume_owned_or_err ( self . py ( ) ) ?
377
- . downcast_into_unchecked :: < PyBytes > ( )
378
- } ;
385
+ let bytes = self . encode_utf8 ( ) ?;
379
386
Ok ( Cow :: Owned (
380
387
unsafe { str:: from_utf8_unchecked ( bytes. as_bytes ( ) ) } . to_owned ( ) ,
381
388
) )
@@ -535,6 +542,28 @@ mod tests {
535
542
} )
536
543
}
537
544
545
+ #[ test]
546
+ fn test_encode_utf8_unicode ( ) {
547
+ Python :: with_gil ( |py| {
548
+ let s = "哈哈🐈" ;
549
+ let obj = PyString :: new_bound ( py, s) ;
550
+ assert_eq ! ( s. as_bytes( ) , obj. encode_utf8( ) . unwrap( ) . as_bytes( ) ) ;
551
+ } )
552
+ }
553
+
554
+ #[ test]
555
+ fn test_encode_utf8_surrogate ( ) {
556
+ Python :: with_gil ( |py| {
557
+ let obj: PyObject = py. eval ( r"'\ud800'" , None , None ) . unwrap ( ) . into ( ) ;
558
+ assert ! ( obj
559
+ . bind( py)
560
+ . downcast:: <PyString >( )
561
+ . unwrap( )
562
+ . encode_utf8( )
563
+ . is_err( ) ) ;
564
+ } )
565
+ }
566
+
538
567
#[ test]
539
568
fn test_to_string_lossy ( ) {
540
569
Python :: with_gil ( |py| {
0 commit comments