Skip to content

Commit 3f340bb

Browse files
committed
Cleanup python bindings.
1 parent 39a2e80 commit 3f340bb

File tree

7 files changed

+32
-21
lines changed

7 files changed

+32
-21
lines changed

bindings/python/src/decoders.rs

+11-6
Original file line numberDiff line numberDiff line change
@@ -424,12 +424,12 @@ pub struct PyBPEDecoder {}
424424
impl PyBPEDecoder {
425425
#[getter]
426426
fn get_suffix(self_: PyRef<Self>) -> String {
427-
getter!(self_, BPE, suffix.clone())
427+
getter!(self_, BPE, suffix.to_string())
428428
}
429429

430430
#[setter]
431431
fn set_suffix(self_: PyRef<Self>, suffix: String) {
432-
setter!(self_, BPE, suffix, suffix);
432+
setter!(self_, BPE, suffix, suffix.into());
433433
}
434434

435435
#[new]
@@ -455,22 +455,27 @@ pub struct PyCTCDecoder {}
455455
impl PyCTCDecoder {
456456
#[getter]
457457
fn get_pad_token(self_: PyRef<Self>) -> String {
458-
getter!(self_, CTC, pad_token.clone())
458+
getter!(self_, CTC, pad_token.to_string())
459459
}
460460

461461
#[setter]
462462
fn set_pad_token(self_: PyRef<Self>, pad_token: String) {
463-
setter!(self_, CTC, pad_token, pad_token);
463+
setter!(self_, CTC, pad_token, pad_token.into());
464464
}
465465

466466
#[getter]
467467
fn get_word_delimiter_token(self_: PyRef<Self>) -> String {
468-
getter!(self_, CTC, word_delimiter_token.clone())
468+
getter!(self_, CTC, word_delimiter_token.clone()).to_string()
469469
}
470470

471471
#[setter]
472472
fn set_word_delimiter_token(self_: PyRef<Self>, word_delimiter_token: String) {
473-
setter!(self_, CTC, word_delimiter_token, word_delimiter_token);
473+
setter!(
474+
self_,
475+
CTC,
476+
word_delimiter_token,
477+
word_delimiter_token.into()
478+
);
474479
}
475480

476481
#[getter]

bindings/python/src/encoding.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ impl PyEncoding {
129129
fn get_tokens(&self) -> Vec<String> {
130130
self.encoding
131131
.get_tokens()
132-
.into_iter()
132+
.iter()
133133
.map(|x| x.to_string())
134134
.collect()
135135
}

bindings/python/src/models.rs

-1
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,6 @@ impl PyBPE {
590590
e
591591
))
592592
})
593-
.into()
594593
}
595594

596595
/// Instantiate a BPE model from the given files.

bindings/python/src/normalizers.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -521,12 +521,12 @@ pub struct PyPrepend {}
521521
impl PyPrepend {
522522
#[getter]
523523
fn get_prepend(self_: PyRef<Self>) -> String {
524-
getter!(self_, Prepend, prepend)
524+
getter!(self_, Prepend, prepend).into()
525525
}
526526

527527
#[setter]
528528
fn set_prepend(self_: PyRef<Self>, prepend: String) {
529-
setter!(self_, Prepend, prepend, prepend)
529+
setter!(self_, Prepend, prepend, prepend.into())
530530
}
531531

532532
#[new]
@@ -624,12 +624,12 @@ impl PyReplace {
624624

625625
#[getter]
626626
fn get_content(self_: PyRef<Self>) -> String {
627-
getter!(self_, Replace, content)
627+
getter!(self_, Replace, content).to_string()
628628
}
629629

630630
#[setter]
631631
fn set_content(self_: PyRef<Self>, content: String) {
632-
setter!(self_, Replace, content, content)
632+
setter!(self_, Replace, content, content.into())
633633
}
634634
}
635635

bindings/python/src/processors.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -721,7 +721,7 @@ impl PyTemplateProcessing {
721721

722722
#[getter]
723723
fn get_single(self_: PyRef<Self>) -> String {
724-
getter!(self_, Template, get_single())
724+
getter!(self_, Template, get_single()).into()
725725
}
726726

727727
#[setter]
@@ -847,7 +847,7 @@ mod test {
847847
fn get_subtype() {
848848
Python::with_gil(|py| {
849849
let py_proc = PyPostProcessor::new(PyPostProcessorTypeWrapper::Single(Arc::new(
850-
RwLock::new(BertProcessing::new(("SEP".into(), 0), ("CLS".into(), 1)).into()),
850+
RwLock::new(BertProcessing::new(("SEP", 0), ("CLS", 1)).into()),
851851
)));
852852
let py_bert = py_proc.get_as_subtype(py).unwrap();
853853
assert_eq!(
@@ -859,7 +859,7 @@ mod test {
859859

860860
#[test]
861861
fn serialize() {
862-
let rs_processing = BertProcessing::new(("SEP".into(), 0), ("CLS".into(), 1));
862+
let rs_processing = BertProcessing::new(("SEP", 0), ("CLS", 1));
863863
let rs_wrapper: PostProcessorWrapper = rs_processing.clone().into();
864864
let rs_processing_ser = serde_json::to_string(&rs_processing).unwrap();
865865
let rs_wrapper_ser = serde_json::to_string(&rs_wrapper).unwrap();

bindings/python/src/tokenizer.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -871,7 +871,9 @@ impl PyTokenizer {
871871
}
872872
"pad_id" => params.pad_id = value.extract()?,
873873
"pad_type_id" => params.pad_type_id = value.extract()?,
874-
"pad_token" => params.pad_token = value.extract()?,
874+
"pad_token" => {
875+
params.pad_token = value.extract::<String>()?.to_compact_string()
876+
}
875877
"max_length" => {
876878
println!(
877879
"enable_padding(max_length=X) is deprecated, \
@@ -927,7 +929,7 @@ impl PyTokenizer {
927929
)?;
928930
dict.set_item("pad_to_multiple_of", params.pad_to_multiple_of)?;
929931
dict.set_item("pad_id", params.pad_id)?;
930-
dict.set_item("pad_token", &params.pad_token)?;
932+
dict.set_item("pad_token", &*params.pad_token)?;
931933
dict.set_item("pad_type_id", params.pad_type_id)?;
932934
dict.set_item("direction", params.direction.as_ref())?;
933935

bindings/python/src/utils/normalization.rs

+9-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use super::regex::PyRegex;
22
use super::{DestroyPtr, RefMutContainer, RefMutGuard};
33
use crate::error::ToPyResult;
4+
use compact_str::ToCompactString;
45
use pyo3::exceptions;
56
use pyo3::prelude::*;
67
use pyo3::types::*;
@@ -38,17 +39,21 @@ impl Pattern for PyPattern {
3839
impl From<PyPattern> for tk::normalizers::replace::ReplacePattern {
3940
fn from(pattern: PyPattern) -> Self {
4041
match pattern {
41-
PyPattern::Str(s) => Self::String(s.to_owned()),
42-
PyPattern::Regex(r) => Python::with_gil(|py| Self::Regex(r.borrow(py).pattern.clone())),
42+
PyPattern::Str(s) => Self::String(s.into()),
43+
PyPattern::Regex(r) => {
44+
Python::with_gil(|py| Self::Regex(r.borrow(py).pattern.to_compact_string()))
45+
}
4346
}
4447
}
4548
}
4649

4750
impl From<PyPattern> for tk::pre_tokenizers::split::SplitPattern {
4851
fn from(pattern: PyPattern) -> Self {
4952
match pattern {
50-
PyPattern::Str(s) => Self::String(s.to_owned()),
51-
PyPattern::Regex(r) => Python::with_gil(|py| Self::Regex(r.borrow(py).pattern.clone())),
53+
PyPattern::Str(s) => Self::String(s.into()),
54+
PyPattern::Regex(r) => {
55+
Python::with_gil(|py| Self::Regex(r.borrow(py).pattern.to_compact_string()))
56+
}
5257
}
5358
}
5459
}

0 commit comments

Comments
 (0)