diff --git a/Changelog.md b/Changelog.md index a205615d..bd2df2e0 100644 --- a/Changelog.md +++ b/Changelog.md @@ -15,10 +15,21 @@ ### New Features +- [#826]: Implement `From` and `From>` for `quick_xml::de::Text`. +- [#826]: Make `SimpleTypeDeserializer` and `SimpleTypeSerializer` public. +- [#826]: Implement `IntoDeserializer` for `&mut Deserializer`. + ### Bug Fixes - [#655]: Do not write indent before and after `$text` fields and those `$value` fields that are serialized as a text (for example, `usize` or `String`). +- [#826]: Handle only those boolean representations that are allowed by [Xml Schema] + which is only `"true"`, `"1"`, `"false"`, and `"0"`. Previously the following values + also was accepted: + |`bool` |XML content + |-------|------------------------------------------------------------- + |`true` |`"True"`, `"TRUE"`, `"t"`, `"Yes"`, `"YES"`, `"yes"`, `"y"` + |`false`|`"False"`, `"FALSE"`, `"f"`, `"No"`, `"NO"`, `"no"`, `"n"` ### Misc Changes @@ -34,6 +45,9 @@ `Vec` in `$value` fields. They cannot be deserialized back with the same result - [#827]: Make `escape` and it variants take a `impl Into>` argument and implement `From<(&'a str, Cow<'a, str>)>` on `Attribute` +- [#826]: Removed `DeError::InvalidInt`, `DeError::InvalidFloat` and `DeError::InvalidBoolean`. + Now the responsibility for returning the error lies with the visitor of the type. + See rationale in https://github.com/serde-rs/serde/pull/2811 [#227]: https://github.com/tafia/quick-xml/issues/227 [#655]: https://github.com/tafia/quick-xml/issues/655 @@ -41,7 +55,9 @@ [#811]: https://github.com/tafia/quick-xml/pull/811 [#820]: https://github.com/tafia/quick-xml/pull/820 [#823]: https://github.com/tafia/quick-xml/pull/823 +[#826]: https://github.com/tafia/quick-xml/pull/826 [#827]: https://github.com/tafia/quick-xml/pull/827 +[Xml Schema]: https://www.w3.org/TR/xmlschema11-2/#boolean ## 0.36.2 -- 2024-09-20 diff --git a/src/de/key.rs b/src/de/key.rs index 0c003730..39ad3759 100644 --- a/src/de/key.rs +++ b/src/de/key.rs @@ -1,5 +1,4 @@ use crate::de::simple_type::UnitOnly; -use crate::de::str2bool; use crate::encoding::Decoder; use crate::errors::serialize::DeError; use crate::name::QName; @@ -14,7 +13,10 @@ macro_rules! deserialize_num { where V: Visitor<'de>, { - visitor.$visit(self.name.parse()?) + match self.name.parse() { + Ok(number) => visitor.$visit(number), + Err(_) => self.name.deserialize_str(visitor), + } } }; } @@ -134,17 +136,12 @@ impl<'de, 'd> Deserializer<'de> for QNameDeserializer<'de, 'd> { /// According to the , /// valid boolean representations are only `"true"`, `"false"`, `"1"`, - /// and `"0"`. But this method also handles following: - /// - /// |`bool` |XML content - /// |-------|------------------------------------------------------------- - /// |`true` |`"True"`, `"TRUE"`, `"t"`, `"Yes"`, `"YES"`, `"yes"`, `"y"` - /// |`false`|`"False"`, `"FALSE"`, `"f"`, `"No"`, `"NO"`, `"no"`, `"n"` + /// and `"0"`. fn deserialize_bool(self, visitor: V) -> Result where V: Visitor<'de>, { - str2bool(self.name.as_ref(), visitor) + self.name.deserialize_bool(visitor) } deserialize_num!(deserialize_i8, visit_i8); diff --git a/src/de/map.rs b/src/de/map.rs index 3989a64d..9f2a9876 100644 --- a/src/de/map.rs +++ b/src/de/map.rs @@ -5,13 +5,14 @@ use crate::{ de::resolver::EntityResolver, de::simple_type::SimpleTypeDeserializer, de::text::TextDeserializer, - de::{str2bool, DeEvent, Deserializer, XmlRead, TEXT_KEY, VALUE_KEY}, + de::{DeEvent, Deserializer, XmlRead, TEXT_KEY, VALUE_KEY}, encoding::Decoder, errors::serialize::DeError, errors::Error, events::attributes::IterState, events::BytesStart, name::QName, + utils::CowRef, }; use serde::de::value::BorrowedStrDeserializer; use serde::de::{self, DeserializeSeed, Deserializer as _, MapAccess, SeqAccess, Visitor}; diff --git a/src/de/mod.rs b/src/de/mod.rs index 578a88de..484c31b0 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -1834,7 +1834,7 @@ // Also, macros should be imported before using them use serde::serde_if_integer128; -macro_rules! deserialize_type { +macro_rules! deserialize_num { ($deserialize:ident => $visit:ident, $($mut:tt)?) => { fn $deserialize($($mut)? self, visitor: V) -> Result where @@ -1842,7 +1842,13 @@ macro_rules! deserialize_type { { // No need to unescape because valid integer representations cannot be escaped let text = self.read_string()?; - visitor.$visit(text.parse()?) + match text.parse() { + Ok(number) => visitor.$visit(number), + Err(_) => match text { + Cow::Borrowed(t) => visitor.visit_str(t), + Cow::Owned(t) => visitor.visit_string(t), + } + } } }; } @@ -1851,31 +1857,33 @@ macro_rules! deserialize_type { /// byte arrays, booleans and identifiers. macro_rules! deserialize_primitives { ($($mut:tt)?) => { - deserialize_type!(deserialize_i8 => visit_i8, $($mut)?); - deserialize_type!(deserialize_i16 => visit_i16, $($mut)?); - deserialize_type!(deserialize_i32 => visit_i32, $($mut)?); - deserialize_type!(deserialize_i64 => visit_i64, $($mut)?); + deserialize_num!(deserialize_i8 => visit_i8, $($mut)?); + deserialize_num!(deserialize_i16 => visit_i16, $($mut)?); + deserialize_num!(deserialize_i32 => visit_i32, $($mut)?); + deserialize_num!(deserialize_i64 => visit_i64, $($mut)?); - deserialize_type!(deserialize_u8 => visit_u8, $($mut)?); - deserialize_type!(deserialize_u16 => visit_u16, $($mut)?); - deserialize_type!(deserialize_u32 => visit_u32, $($mut)?); - deserialize_type!(deserialize_u64 => visit_u64, $($mut)?); + deserialize_num!(deserialize_u8 => visit_u8, $($mut)?); + deserialize_num!(deserialize_u16 => visit_u16, $($mut)?); + deserialize_num!(deserialize_u32 => visit_u32, $($mut)?); + deserialize_num!(deserialize_u64 => visit_u64, $($mut)?); serde_if_integer128! { - deserialize_type!(deserialize_i128 => visit_i128, $($mut)?); - deserialize_type!(deserialize_u128 => visit_u128, $($mut)?); + deserialize_num!(deserialize_i128 => visit_i128, $($mut)?); + deserialize_num!(deserialize_u128 => visit_u128, $($mut)?); } - deserialize_type!(deserialize_f32 => visit_f32, $($mut)?); - deserialize_type!(deserialize_f64 => visit_f64, $($mut)?); + deserialize_num!(deserialize_f32 => visit_f32, $($mut)?); + deserialize_num!(deserialize_f64 => visit_f64, $($mut)?); fn deserialize_bool($($mut)? self, visitor: V) -> Result where V: Visitor<'de>, { - let text = self.read_string()?; - - str2bool(&text, visitor) + let text = match self.read_string()? { + Cow::Borrowed(s) => CowRef::Input(s), + Cow::Owned(s) => CowRef::Owned(s), + }; + text.deserialize_bool(visitor) } /// Character represented as [strings](#method.deserialize_str). @@ -1998,8 +2006,9 @@ mod simple_type; mod text; mod var; +pub use self::resolver::{EntityResolver, PredefinedEntityResolver}; +pub use self::simple_type::SimpleTypeDeserializer; pub use crate::errors::serialize::DeError; -pub use resolver::{EntityResolver, PredefinedEntityResolver}; use crate::{ de::map::ElementMapAccess, @@ -2008,8 +2017,11 @@ use crate::{ events::{BytesCData, BytesEnd, BytesStart, BytesText, Event}, name::QName, reader::Reader, + utils::CowRef, +}; +use serde::de::{ + self, Deserialize, DeserializeOwned, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor, }; -use serde::de::{self, Deserialize, DeserializeOwned, DeserializeSeed, SeqAccess, Visitor}; use std::borrow::Cow; #[cfg(feature = "overlapped-lists")] use std::collections::VecDeque; @@ -2058,6 +2070,22 @@ impl<'a> From<&'a str> for Text<'a> { } } +impl<'a> From for Text<'a> { + #[inline] + fn from(text: String) -> Self { + Self { + text: Cow::Owned(text), + } + } +} + +impl<'a> From> for Text<'a> { + #[inline] + fn from(text: Cow<'a, str>) -> Self { + Self { text } + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////// /// Simplified event which contains only these variants that used by deserializer @@ -2287,7 +2315,7 @@ where } /// Deserialize from a reader. This method will do internal copies of data -/// readed from `reader`. If you want have a `&str` input and want to borrow +/// read from `reader`. If you want have a `&str` input and want to borrow /// as much as possible, use [`from_str`]. pub fn from_reader(reader: R) -> Result where @@ -2298,49 +2326,6 @@ where T::deserialize(&mut de) } -// TODO: According to the https://www.w3.org/TR/xmlschema11-2/#boolean, -// valid boolean representations are only "true", "false", "1", and "0" -fn str2bool<'de, V>(value: &str, visitor: V) -> Result -where - V: de::Visitor<'de>, -{ - match value { - "true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => { - visitor.visit_bool(true) - } - "false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => { - visitor.visit_bool(false) - } - _ => Err(DeError::InvalidBoolean(value.into())), - } -} - -fn deserialize_bool<'de, V>(value: &[u8], decoder: Decoder, visitor: V) -> Result -where - V: Visitor<'de>, -{ - #[cfg(feature = "encoding")] - { - let value = decoder.decode(value)?; - // No need to unescape because valid boolean representations cannot be escaped - str2bool(value.as_ref(), visitor) - } - - #[cfg(not(feature = "encoding"))] - { - // No need to unescape because valid boolean representations cannot be escaped - match value { - b"true" | b"1" | b"True" | b"TRUE" | b"t" | b"Yes" | b"YES" | b"yes" | b"y" => { - visitor.visit_bool(true) - } - b"false" | b"0" | b"False" | b"FALSE" | b"f" | b"No" | b"NO" | b"no" | b"n" => { - visitor.visit_bool(false) - } - e => Err(DeError::InvalidBoolean(decoder.decode(e)?.into())), - } - } -} - //////////////////////////////////////////////////////////////////////////////////////////////////// /// A structure that deserializes XML into Rust values. @@ -3007,6 +2992,19 @@ where } } +impl<'de, 'a, R, E> IntoDeserializer<'de, DeError> for &'a mut Deserializer<'de, R, E> +where + R: XmlRead<'de>, + E: EntityResolver, +{ + type Deserializer = Self; + + #[inline] + fn into_deserializer(self) -> Self { + self + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////// /// Helper struct that contains a state for an algorithm of converting events diff --git a/src/de/simple_type.rs b/src/de/simple_type.rs index d78db8fc..fefa9e97 100644 --- a/src/de/simple_type.rs +++ b/src/de/simple_type.rs @@ -3,34 +3,47 @@ //! [simple types]: https://www.w3schools.com/xml/el_simpletype.asp //! [as defined]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition -use crate::de::{deserialize_bool, str2bool, Text}; +use crate::de::Text; use crate::encoding::Decoder; use crate::errors::serialize::DeError; use crate::escape::unescape; use crate::utils::CowRef; use memchr::memchr; use serde::de::value::UnitDeserializer; -use serde::de::{DeserializeSeed, Deserializer, EnumAccess, SeqAccess, VariantAccess, Visitor}; +use serde::de::{ + DeserializeSeed, Deserializer, EnumAccess, IntoDeserializer, SeqAccess, VariantAccess, Visitor, +}; use serde::serde_if_integer128; use std::borrow::Cow; use std::ops::Range; macro_rules! deserialize_num { - ($method:ident, $visit:ident) => { + ($method:ident => $visit:ident) => { + #[inline] fn $method(self, visitor: V) -> Result where V: Visitor<'de>, { - visitor.$visit(self.content.as_str().parse()?) + let text: &str = self.content.as_ref(); + match text.parse() { + Ok(number) => visitor.$visit(number), + Err(_) => self.content.deserialize_str(visitor), + } } }; - ($method:ident => $visit:ident) => { +} + +macro_rules! deserialize_primitive { + ($method:ident) => { fn $method(self, visitor: V) -> Result where V: Visitor<'de>, { - let string = self.decode()?; - visitor.$visit(string.as_str().parse()?) + let de = AtomicDeserializer { + content: self.decode()?, + escaped: self.escaped, + }; + de.$method(visitor) } }; } @@ -84,47 +97,6 @@ impl<'de, 'a> Content<'de, 'a> { Content::Owned(s, offset) => s.split_at(*offset).1, } } - - /// Supply to the visitor a borrowed string, a string slice, or an owned - /// string depending on the kind of input. Unlike [`Self::deserialize_item`], - /// the whole [`Self::Owned`] string will be passed to the visitor. - /// - /// Calls - /// - `visitor.visit_borrowed_str` if data borrowed from the input - /// - `visitor.visit_str` if data borrowed from another source - /// - `visitor.visit_string` if data owned by this type - #[inline] - fn deserialize_all(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - match self { - Content::Input(s) => visitor.visit_borrowed_str(s), - Content::Slice(s) => visitor.visit_str(s), - Content::Owned(s, _) => visitor.visit_string(s), - } - } - - /// Supply to the visitor a borrowed string, a string slice, or an owned - /// string depending on the kind of input. Unlike [`Self::deserialize_all`], - /// only part of [`Self::Owned`] string will be passed to the visitor. - /// - /// Calls - /// - `visitor.visit_borrowed_str` if data borrowed from the input - /// - `visitor.visit_str` if data borrowed from another source - /// - `visitor.visit_string` if data owned by this type - #[inline] - fn deserialize_item(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - match self { - Content::Input(s) => visitor.visit_borrowed_str(s), - Content::Slice(s) => visitor.visit_str(s), - Content::Owned(s, 0) => visitor.visit_string(s), - Content::Owned(s, offset) => visitor.visit_str(s.split_at(offset).1), - } - } } /// A deserializer that handles ordinary [simple type definition][item] with @@ -151,7 +123,7 @@ impl<'de, 'a> Content<'de, 'a> { /// [simple type]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition struct AtomicDeserializer<'de, 'a> { /// Content of the attribute value, text content or CDATA content - content: Content<'de, 'a>, + content: CowRef<'de, 'a, str>, /// If `true`, `content` in an escaped form and should be unescaped before use escaped: bool, } @@ -169,36 +141,31 @@ impl<'de, 'a> Deserializer<'de> for AtomicDeserializer<'de, 'a> { /// According to the , /// valid boolean representations are only `"true"`, `"false"`, `"1"`, - /// and `"0"`. But this method also handles following: - /// - /// |`bool` |XML content - /// |-------|------------------------------------------------------------- - /// |`true` |`"True"`, `"TRUE"`, `"t"`, `"Yes"`, `"YES"`, `"yes"`, `"y"` - /// |`false`|`"False"`, `"FALSE"`, `"f"`, `"No"`, `"NO"`, `"no"`, `"n"` + /// and `"0"`. fn deserialize_bool(self, visitor: V) -> Result where V: Visitor<'de>, { - str2bool(self.content.as_str(), visitor) + self.content.deserialize_bool(visitor) } - deserialize_num!(deserialize_i8, visit_i8); - deserialize_num!(deserialize_i16, visit_i16); - deserialize_num!(deserialize_i32, visit_i32); - deserialize_num!(deserialize_i64, visit_i64); + deserialize_num!(deserialize_i8 => visit_i8); + deserialize_num!(deserialize_i16 => visit_i16); + deserialize_num!(deserialize_i32 => visit_i32); + deserialize_num!(deserialize_i64 => visit_i64); - deserialize_num!(deserialize_u8, visit_u8); - deserialize_num!(deserialize_u16, visit_u16); - deserialize_num!(deserialize_u32, visit_u32); - deserialize_num!(deserialize_u64, visit_u64); + deserialize_num!(deserialize_u8 => visit_u8); + deserialize_num!(deserialize_u16 => visit_u16); + deserialize_num!(deserialize_u32 => visit_u32); + deserialize_num!(deserialize_u64 => visit_u64); serde_if_integer128! { - deserialize_num!(deserialize_i128, visit_i128); - deserialize_num!(deserialize_u128, visit_u128); + deserialize_num!(deserialize_i128 => visit_i128); + deserialize_num!(deserialize_u128 => visit_u128); } - deserialize_num!(deserialize_f32, visit_f32); - deserialize_num!(deserialize_f64, visit_f64); + deserialize_num!(deserialize_f32 => visit_f32); + deserialize_num!(deserialize_f64 => visit_f64); /// Forwards deserialization to the [`Self::deserialize_str`] fn deserialize_char(self, visitor: V) -> Result @@ -223,12 +190,12 @@ impl<'de, 'a> Deserializer<'de> for AtomicDeserializer<'de, 'a> { V: Visitor<'de>, { if self.escaped { - match unescape(self.content.as_str())? { - Cow::Borrowed(_) => self.content.deserialize_item(visitor), + match unescape(self.content.as_ref())? { + Cow::Borrowed(_) => self.content.deserialize_str(visitor), Cow::Owned(s) => visitor.visit_string(s), } } else { - self.content.deserialize_item(visitor) + self.content.deserialize_str(visitor) } } @@ -245,7 +212,8 @@ impl<'de, 'a> Deserializer<'de> for AtomicDeserializer<'de, 'a> { where V: Visitor<'de>, { - if self.content.as_str().is_empty() { + let text: &str = self.content.as_ref(); + if text.is_empty() { visitor.visit_none() } else { visitor.visit_some(self) @@ -402,10 +370,24 @@ impl<'de, 'a> SeqAccess<'de> for ListIter<'de, 'a> { } return match memchr(DELIMITER, string.as_bytes()) { // No delimiters in the `content`, deserialize it as a whole atomic - None => seed.deserialize(AtomicDeserializer { - content, - escaped: self.escaped, - }), + None => match content { + Content::Input(s) => seed.deserialize(AtomicDeserializer { + content: CowRef::Input(s), + escaped: self.escaped, + }), + Content::Slice(s) => seed.deserialize(AtomicDeserializer { + content: CowRef::Slice(s), + escaped: self.escaped, + }), + Content::Owned(s, 0) => seed.deserialize(AtomicDeserializer { + content: CowRef::Owned(s), + escaped: self.escaped, + }), + Content::Owned(s, offset) => seed.deserialize(AtomicDeserializer { + content: CowRef::Slice(s.split_at(offset).1), + escaped: self.escaped, + }), + }, // `content` started with a space, skip them all Some(0) => { // Skip all spaces @@ -432,7 +414,7 @@ impl<'de, 'a> SeqAccess<'de> for ListIter<'de, 'a> { self.content = Some(Content::Input(rest)); seed.deserialize(AtomicDeserializer { - content: Content::Input(item), + content: CowRef::Input(item), escaped: self.escaped, }) } @@ -441,7 +423,7 @@ impl<'de, 'a> SeqAccess<'de> for ListIter<'de, 'a> { self.content = Some(Content::Slice(rest)); seed.deserialize(AtomicDeserializer { - content: Content::Slice(item), + content: CowRef::Slice(item), escaped: self.escaped, }) } @@ -450,7 +432,7 @@ impl<'de, 'a> SeqAccess<'de> for ListIter<'de, 'a> { Content::Owned(s, skip) => { let item = s.split_at(skip + end).0; let result = seed.deserialize(AtomicDeserializer { - content: Content::Slice(item), + content: CowRef::Slice(item), escaped: self.escaped, }); @@ -483,27 +465,37 @@ impl<'de, 'a> SeqAccess<'de> for ListIter<'de, 'a> { /// - mixed text / CDATA content (`<...>text`) /// /// This deserializer processes items as following: -/// - numbers are parsed from a text content using [`FromStr`]; +/// - numbers are parsed from a text content using [`FromStr`]; in case of error +/// [`Visitor::visit_borrowed_str`], [`Visitor::visit_str`], or [`Visitor::visit_string`] +/// is called; it is responsibility of the type to return an error if it does +/// not able to process passed data; /// - booleans converted from the text according to the XML [specification]: /// - `"true"` and `"1"` converted to `true`; /// - `"false"` and `"0"` converted to `false`; +/// - everything else calls [`Visitor::visit_borrowed_str`], [`Visitor::visit_str`], +/// or [`Visitor::visit_string`]; it is responsibility of the type to return +/// an error if it does not able to process passed data; /// - strings returned as is; /// - characters also returned as strings. If string contain more than one character /// or empty, it is responsibility of a type to return an error; /// - `Option` always deserialized as `Some` using the same deserializer. /// If attribute or text content is missed, then the deserializer even wouldn't /// be used, so if it is used, then the value should be; -/// - units (`()`) and unit structs always deserialized successfully; +/// - units (`()`) and unit structs always deserialized successfully, the content is ignored; /// - newtype structs forwards deserialization to the inner type using the same /// deserializer; /// - sequences, tuples and tuple structs are deserialized as `xs:list`s. Only /// sequences of primitive types is possible to deserialize this way and they /// should be delimited by a space (` `, `\t`, `\r`, or `\n`); -/// - structs and maps delegates to [`Self::deserialize_str`]; +/// - structs and maps delegates to [`Self::deserialize_str`] which calls +/// [`Visitor::visit_borrowed_str`] or [`Visitor::visit_string`]; it is responsibility +/// of the type to return an error if it does not able to process passed data; /// - enums: -/// - unit variants: just return `()`; -/// - newtype variants: deserialize from [`UnitDeserializer`]; -/// - tuple and struct variants: call [`Visitor::visit_unit`]; +/// - the variant name is deserialized using the same deserializer; +/// - the content is deserialized using the deserializer that always returns unit (`()`): +/// - unit variants: just return `()`; +/// - newtype variants: deserialize from [`UnitDeserializer`]; +/// - tuple and struct variants: call [`Visitor::visit_unit`]; /// - identifiers are deserialized as strings. /// /// [simple types]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition @@ -521,7 +513,9 @@ pub struct SimpleTypeDeserializer<'de, 'a> { } impl<'de, 'a> SimpleTypeDeserializer<'de, 'a> { - /// Creates a deserializer from a value, that possible borrowed from input + /// Creates a deserializer from a value, that possible borrowed from input. + /// + /// It is assumed that `text` does not have entities. pub fn from_text(text: Cow<'de, str>) -> Self { let content = match text { Cow::Borrowed(slice) => CowRef::Input(slice.as_bytes()), @@ -529,14 +523,20 @@ impl<'de, 'a> SimpleTypeDeserializer<'de, 'a> { }; Self::new(content, false, Decoder::utf8()) } - /// Creates a deserializer from a value, that possible borrowed from input + /// Creates a deserializer from an XML text node, that possible borrowed from input. + /// + /// It is assumed that `text` does not have entities. + /// + /// This constructor used internally to deserialize from text nodes. pub fn from_text_content(value: Text<'de>) -> Self { Self::from_text(value.text) } - /// Creates a deserializer from a part of value at specified range + /// Creates a deserializer from a part of value at specified range. + /// + /// This constructor used internally to deserialize from attribute values. #[allow(clippy::ptr_arg)] - pub fn from_part( + pub(crate) fn from_part( value: &'a Cow<'de, [u8]>, range: Range, escaped: bool, @@ -562,19 +562,19 @@ impl<'de, 'a> SimpleTypeDeserializer<'de, 'a> { /// Decodes raw bytes using the encoding specified. /// The method will borrow if has the UTF-8 compatible representation. #[inline] - fn decode<'b>(&'b self) -> Result, DeError> { + fn decode<'b>(&'b self) -> Result, DeError> { Ok(match self.content { CowRef::Input(content) => match self.decoder.decode(content)? { - Cow::Borrowed(content) => Content::Input(content), - Cow::Owned(content) => Content::Owned(content, 0), + Cow::Borrowed(content) => CowRef::Input(content), + Cow::Owned(content) => CowRef::Owned(content), }, CowRef::Slice(content) => match self.decoder.decode(content)? { - Cow::Borrowed(content) => Content::Slice(content), - Cow::Owned(content) => Content::Owned(content, 0), + Cow::Borrowed(content) => CowRef::Slice(content), + Cow::Owned(content) => CowRef::Owned(content), }, CowRef::Owned(ref content) => match self.decoder.decode(content)? { - Cow::Borrowed(content) => Content::Slice(content), - Cow::Owned(content) => Content::Owned(content, 0), + Cow::Borrowed(content) => CowRef::Slice(content), + Cow::Owned(content) => CowRef::Owned(content), }, }) } @@ -591,30 +591,27 @@ impl<'de, 'a> Deserializer<'de> for SimpleTypeDeserializer<'de, 'a> { self.deserialize_str(visitor) } - fn deserialize_bool(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - deserialize_bool(&self.content, self.decoder, visitor) - } + deserialize_primitive!(deserialize_bool); - deserialize_num!(deserialize_i8 => visit_i8); - deserialize_num!(deserialize_i16 => visit_i16); - deserialize_num!(deserialize_i32 => visit_i32); - deserialize_num!(deserialize_i64 => visit_i64); + deserialize_primitive!(deserialize_i8); + deserialize_primitive!(deserialize_i16); + deserialize_primitive!(deserialize_i32); + deserialize_primitive!(deserialize_i64); - deserialize_num!(deserialize_u8 => visit_u8); - deserialize_num!(deserialize_u16 => visit_u16); - deserialize_num!(deserialize_u32 => visit_u32); - deserialize_num!(deserialize_u64 => visit_u64); + deserialize_primitive!(deserialize_u8); + deserialize_primitive!(deserialize_u16); + deserialize_primitive!(deserialize_u32); + deserialize_primitive!(deserialize_u64); serde_if_integer128! { - deserialize_num!(deserialize_i128 => visit_i128); - deserialize_num!(deserialize_u128 => visit_u128); + deserialize_primitive!(deserialize_i128); + deserialize_primitive!(deserialize_u128); } - deserialize_num!(deserialize_f32 => visit_f32); - deserialize_num!(deserialize_f64 => visit_f64); + deserialize_primitive!(deserialize_f32); + deserialize_primitive!(deserialize_f64); + + deserialize_primitive!(deserialize_str); /// Forwards deserialization to the [`Self::deserialize_str`] #[inline] @@ -625,21 +622,6 @@ impl<'de, 'a> Deserializer<'de> for SimpleTypeDeserializer<'de, 'a> { self.deserialize_str(visitor) } - fn deserialize_str(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let content = self.decode()?; - if self.escaped { - match unescape(content.as_str())? { - Cow::Borrowed(_) => content.deserialize_all(visitor), - Cow::Owned(s) => visitor.visit_string(s), - } - } else { - content.deserialize_all(visitor) - } - } - /// Forwards deserialization to the [`Self::deserialize_str`] #[inline] fn deserialize_string(self, visitor: V) -> Result @@ -710,8 +692,13 @@ impl<'de, 'a> Deserializer<'de> for SimpleTypeDeserializer<'de, 'a> { where V: Visitor<'de>, { + let content = match self.decode()? { + CowRef::Input(s) => Content::Input(s), + CowRef::Slice(s) => Content::Slice(s), + CowRef::Owned(s) => Content::Owned(s, 0), + }; visitor.visit_seq(ListIter { - content: Some(self.decode()?), + content: Some(content), escaped: self.escaped, }) } @@ -785,6 +772,15 @@ impl<'de, 'a> EnumAccess<'de> for SimpleTypeDeserializer<'de, 'a> { } } +impl<'de, 'a> IntoDeserializer<'de, DeError> for SimpleTypeDeserializer<'de, 'a> { + type Deserializer = Self; + + #[inline] + fn into_deserializer(self) -> Self { + self + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] @@ -904,6 +900,7 @@ mod tests { use super::*; use crate::se::simple_type::AtomicSerializer; use pretty_assertions::assert_eq; + use std::ops::Deref; /// Checks that given `$input` successfully deserializing into given `$result` macro_rules! deserialized_to_only { @@ -911,7 +908,7 @@ mod tests { #[test] fn $name() { let de = AtomicDeserializer { - content: Content::Input($input), + content: CowRef::Input($input), escaped: true, }; let data: $type = Deserialize::deserialize(de).unwrap(); @@ -928,7 +925,7 @@ mod tests { #[test] fn $name() { let de = AtomicDeserializer { - content: Content::Input($input), + content: CowRef::Input($input), escaped: true, }; let data: $type = Deserialize::deserialize(de).unwrap(); @@ -958,7 +955,7 @@ mod tests { #[test] fn $name() { let de = AtomicDeserializer { - content: Content::Input($input), + content: CowRef::Input($input), escaped: true, }; let err = <$type as Deserialize>::deserialize(de).unwrap_err(); @@ -1054,13 +1051,13 @@ mod tests { #[cfg(feature = "encoding")] fn owned_data() { let de = AtomicDeserializer { - content: Content::Owned("string slice".into(), 7), + content: CowRef::Owned("string slice".into()), escaped: true, }; - assert_eq!(de.content.as_str(), "slice"); + assert_eq!(de.content.deref(), "string slice"); let data: String = Deserialize::deserialize(de).unwrap(); - assert_eq!(data, "slice"); + assert_eq!(data, "string slice"); } /// Checks that deserialization from a content borrowed from some @@ -1068,10 +1065,10 @@ mod tests { #[test] fn borrowed_from_deserializer() { let de = AtomicDeserializer { - content: Content::Slice("string slice"), + content: CowRef::Slice("string slice"), escaped: true, }; - assert_eq!(de.content.as_str(), "string slice"); + assert_eq!(de.content.deref(), "string slice"); let data: String = Deserialize::deserialize(de).unwrap(); assert_eq!(data, "string slice"); diff --git a/src/de/text.rs b/src/de/text.rs index f3129e48..a72f3147 100644 --- a/src/de/text.rs +++ b/src/de/text.rs @@ -1,7 +1,8 @@ use crate::{ de::simple_type::SimpleTypeDeserializer, - de::{str2bool, Text, TEXT_KEY}, + de::{Text, TEXT_KEY}, errors::serialize::DeError, + utils::CowRef, }; use serde::de::value::BorrowedStrDeserializer; use serde::de::{DeserializeSeed, Deserializer, EnumAccess, VariantAccess, Visitor}; @@ -17,17 +18,23 @@ use std::borrow::Cow; /// over tags / text within it's parent tag. /// /// This deserializer processes items as following: -/// - numbers are parsed from a text content using [`FromStr`]; +/// - numbers are parsed from a text content using [`FromStr`]; in case of error +/// [`Visitor::visit_borrowed_str`], [`Visitor::visit_str`], or [`Visitor::visit_string`] +/// is called; it is responsibility of the type to return an error if it does +/// not able to process passed data; /// - booleans converted from the text according to the XML [specification]: /// - `"true"` and `"1"` converted to `true`; /// - `"false"` and `"0"` converted to `false`; +/// - everything else calls [`Visitor::visit_borrowed_str`], [`Visitor::visit_str`], +/// or [`Visitor::visit_string`]; it is responsibility of the type to return +/// an error if it does not able to process passed data; /// - strings returned as is; /// - characters also returned as strings. If string contain more than one character /// or empty, it is responsibility of a type to return an error; /// - `Option`: /// - empty text is deserialized as `None`; /// - everything else is deserialized as `Some` using the same deserializer; -/// - units (`()`) and unit structs always deserialized successfully; +/// - units (`()`) and unit structs always deserialized successfully, the content is ignored; /// - newtype structs forwards deserialization to the inner type using the same /// deserializer; /// - sequences, tuples and tuple structs are deserialized using [`SimpleTypeDeserializer`] diff --git a/src/errors.rs b/src/errors.rs index 6b661020..5a15a5ad 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -276,7 +276,6 @@ pub mod serialize { use std::borrow::Cow; #[cfg(feature = "overlapped-lists")] use std::num::NonZeroUsize; - use std::num::{ParseFloatError, ParseIntError}; use std::str::Utf8Error; /// (De)serialization error @@ -286,12 +285,6 @@ pub mod serialize { Custom(String), /// Xml parsing error InvalidXml(Error), - /// Cannot parse to integer - InvalidInt(ParseIntError), - /// Cannot parse to float - InvalidFloat(ParseFloatError), - /// Cannot parse specified value to boolean - InvalidBoolean(String), /// This error indicates an error in the [`Deserialize`](serde::Deserialize) /// implementation when read a map or a struct: `MapAccess::next_value[_seed]` /// was called before `MapAccess::next_key[_seed]`. @@ -322,9 +315,6 @@ pub mod serialize { match self { Self::Custom(s) => f.write_str(s), Self::InvalidXml(e) => e.fmt(f), - Self::InvalidInt(e) => write!(f, "invalid integral value: {}", e), - Self::InvalidFloat(e) => write!(f, "invalid floating-point value: {}", e), - Self::InvalidBoolean(v) => write!(f, "invalid boolean value '{}'", v), Self::KeyNotRead => f.write_str("invalid `Deserialize` implementation: `MapAccess::next_value[_seed]` was called before `MapAccess::next_key[_seed]`"), Self::UnexpectedStart(e) => { f.write_str("unexpected `Event::Start(")?; @@ -342,8 +332,6 @@ pub mod serialize { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { Self::InvalidXml(e) => Some(e), - Self::InvalidInt(e) => Some(e), - Self::InvalidFloat(e) => Some(e), _ => None, } } @@ -383,20 +371,6 @@ pub mod serialize { } } - impl From for DeError { - #[inline] - fn from(e: ParseIntError) -> Self { - Self::InvalidInt(e) - } - } - - impl From for DeError { - #[inline] - fn from(e: ParseFloatError) -> Self { - Self::InvalidFloat(e) - } - } - /// Serialization error #[derive(Clone, Debug)] pub enum SeError { diff --git a/src/se/mod.rs b/src/se/mod.rs index e9eaa0fc..c8a71d46 100644 --- a/src/se/mod.rs +++ b/src/se/mod.rs @@ -82,13 +82,15 @@ mod text; use self::content::ContentSerializer; use self::element::{ElementSerializer, Map, Struct, Tuple}; use crate::de::TEXT_KEY; -pub use crate::errors::serialize::SeError; use crate::writer::Indentation; use serde::ser::{self, Serialize}; use serde::serde_if_integer128; use std::fmt::Write; use std::str::from_utf8; +pub use self::simple_type::SimpleTypeSerializer; +pub use crate::errors::serialize::SeError; + /// Serialize struct into a `Write`r. /// /// Returns the classification of the last written type. diff --git a/src/utils.rs b/src/utils.rs index 95b3b65d..3912bde8 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -92,6 +92,48 @@ where } } +impl<'i, 's> CowRef<'i, 's, str> { + /// Supply to the visitor a borrowed string, a string slice, or an owned + /// string depending on the kind of input. Unlike [`Self::deserialize_all`], + /// only part of [`Self::Owned`] string will be passed to the visitor. + /// + /// Calls + /// - `visitor.visit_borrowed_str` if data borrowed from the input + /// - `visitor.visit_str` if data borrowed from another source + /// - `visitor.visit_string` if data owned by this type + #[cfg(feature = "serialize")] + pub fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'i>, + E: Error, + { + match self { + Self::Input(s) => visitor.visit_borrowed_str(s), + Self::Slice(s) => visitor.visit_str(s), + Self::Owned(s) => visitor.visit_string(s), + } + } + + /// Calls [`Visitor::visit_bool`] with `true` or `false` if text contains + /// [valid] boolean representation, otherwise calls [`Self::deserialize_str`]. + /// + /// The valid boolean representations are only `"true"`, `"false"`, `"1"`, and `"0"`. + /// + /// [valid]: https://www.w3.org/TR/xmlschema11-2/#boolean + #[cfg(feature = "serialize")] + pub fn deserialize_bool(self, visitor: V) -> Result + where + V: Visitor<'i>, + E: Error, + { + match self.as_ref() { + "1" | "true" => visitor.visit_bool(true), + "0" | "false" => visitor.visit_bool(false), + _ => self.deserialize_str(visitor), + } + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////// /// Wrapper around `Vec` that has a human-readable debug representation: