Skip to content

Commit

Permalink
Merge pull request #826 from Mingun/more-deserializers
Browse files Browse the repository at this point in the history
Fix boolean parsing and make `SimpleTypeSerializer` and `SimpleTypeDeserializer` public
  • Loading branch information
Mingun authored Oct 20, 2024
2 parents 3197e64 + 83d2957 commit a6c5fc5
Show file tree
Hide file tree
Showing 9 changed files with 275 additions and 241 deletions.
16 changes: 16 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,21 @@

### New Features

- [#826]: Implement `From<String>` and `From<Cow<str>>` for `quick_xml::de::Text`.
- [#826]: Make `SimpleTypeDeserializer` and `SimpleTypeSerializer` public.
- [#826]: Implement `IntoDeserializer` for `&mut Deserializer`.

### Bug Fixes

- [#655]: Do not write indent before and after `$text` fields and those `$value` fields
that are serialized as a text (for example, `usize` or `String`).
- [#826]: Handle only those boolean representations that are allowed by [Xml Schema]
which is only `"true"`, `"1"`, `"false"`, and `"0"`. Previously the following values
also was accepted:
|`bool` |XML content
|-------|-------------------------------------------------------------
|`true` |`"True"`, `"TRUE"`, `"t"`, `"Yes"`, `"YES"`, `"yes"`, `"y"`
|`false`|`"False"`, `"FALSE"`, `"f"`, `"No"`, `"NO"`, `"no"`, `"n"`

### Misc Changes

Expand All @@ -34,14 +45,19 @@
`Vec<String>` in `$value` fields. They cannot be deserialized back with the same result
- [#827]: Make `escape` and it variants take a `impl Into<Cow<str>>` argument and implement
`From<(&'a str, Cow<'a, str>)>` on `Attribute`
- [#826]: Removed `DeError::InvalidInt`, `DeError::InvalidFloat` and `DeError::InvalidBoolean`.
Now the responsibility for returning the error lies with the visitor of the type.
See rationale in https://github.com/serde-rs/serde/pull/2811

[#227]: https://github.com/tafia/quick-xml/issues/227
[#655]: https://github.com/tafia/quick-xml/issues/655
[#810]: https://github.com/tafia/quick-xml/pull/810
[#811]: https://github.com/tafia/quick-xml/pull/811
[#820]: https://github.com/tafia/quick-xml/pull/820
[#823]: https://github.com/tafia/quick-xml/pull/823
[#826]: https://github.com/tafia/quick-xml/pull/826
[#827]: https://github.com/tafia/quick-xml/pull/827
[Xml Schema]: https://www.w3.org/TR/xmlschema11-2/#boolean


## 0.36.2 -- 2024-09-20
Expand Down
15 changes: 6 additions & 9 deletions src/de/key.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use crate::de::simple_type::UnitOnly;
use crate::de::str2bool;
use crate::encoding::Decoder;
use crate::errors::serialize::DeError;
use crate::name::QName;
Expand All @@ -14,7 +13,10 @@ macro_rules! deserialize_num {
where
V: Visitor<'de>,
{
visitor.$visit(self.name.parse()?)
match self.name.parse() {
Ok(number) => visitor.$visit(number),
Err(_) => self.name.deserialize_str(visitor),
}
}
};
}
Expand Down Expand Up @@ -134,17 +136,12 @@ impl<'de, 'd> Deserializer<'de> for QNameDeserializer<'de, 'd> {

/// According to the <https://www.w3.org/TR/xmlschema11-2/#boolean>,
/// valid boolean representations are only `"true"`, `"false"`, `"1"`,
/// and `"0"`. But this method also handles following:
///
/// |`bool` |XML content
/// |-------|-------------------------------------------------------------
/// |`true` |`"True"`, `"TRUE"`, `"t"`, `"Yes"`, `"YES"`, `"yes"`, `"y"`
/// |`false`|`"False"`, `"FALSE"`, `"f"`, `"No"`, `"NO"`, `"no"`, `"n"`
/// and `"0"`.
fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
str2bool(self.name.as_ref(), visitor)
self.name.deserialize_bool(visitor)
}

deserialize_num!(deserialize_i8, visit_i8);
Expand Down
3 changes: 2 additions & 1 deletion src/de/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ use crate::{
de::resolver::EntityResolver,
de::simple_type::SimpleTypeDeserializer,
de::text::TextDeserializer,
de::{str2bool, DeEvent, Deserializer, XmlRead, TEXT_KEY, VALUE_KEY},
de::{DeEvent, Deserializer, XmlRead, TEXT_KEY, VALUE_KEY},
encoding::Decoder,
errors::serialize::DeError,
errors::Error,
events::attributes::IterState,
events::BytesStart,
name::QName,
utils::CowRef,
};
use serde::de::value::BorrowedStrDeserializer;
use serde::de::{self, DeserializeSeed, Deserializer as _, MapAccess, SeqAccess, Visitor};
Expand Down
124 changes: 61 additions & 63 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1834,15 +1834,21 @@
// Also, macros should be imported before using them
use serde::serde_if_integer128;

macro_rules! deserialize_type {
macro_rules! deserialize_num {
($deserialize:ident => $visit:ident, $($mut:tt)?) => {
fn $deserialize<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
// No need to unescape because valid integer representations cannot be escaped
let text = self.read_string()?;
visitor.$visit(text.parse()?)
match text.parse() {
Ok(number) => visitor.$visit(number),
Err(_) => match text {
Cow::Borrowed(t) => visitor.visit_str(t),
Cow::Owned(t) => visitor.visit_string(t),
}
}
}
};
}
Expand All @@ -1851,31 +1857,33 @@ macro_rules! deserialize_type {
/// byte arrays, booleans and identifiers.
macro_rules! deserialize_primitives {
($($mut:tt)?) => {
deserialize_type!(deserialize_i8 => visit_i8, $($mut)?);
deserialize_type!(deserialize_i16 => visit_i16, $($mut)?);
deserialize_type!(deserialize_i32 => visit_i32, $($mut)?);
deserialize_type!(deserialize_i64 => visit_i64, $($mut)?);
deserialize_num!(deserialize_i8 => visit_i8, $($mut)?);
deserialize_num!(deserialize_i16 => visit_i16, $($mut)?);
deserialize_num!(deserialize_i32 => visit_i32, $($mut)?);
deserialize_num!(deserialize_i64 => visit_i64, $($mut)?);

deserialize_type!(deserialize_u8 => visit_u8, $($mut)?);
deserialize_type!(deserialize_u16 => visit_u16, $($mut)?);
deserialize_type!(deserialize_u32 => visit_u32, $($mut)?);
deserialize_type!(deserialize_u64 => visit_u64, $($mut)?);
deserialize_num!(deserialize_u8 => visit_u8, $($mut)?);
deserialize_num!(deserialize_u16 => visit_u16, $($mut)?);
deserialize_num!(deserialize_u32 => visit_u32, $($mut)?);
deserialize_num!(deserialize_u64 => visit_u64, $($mut)?);

serde_if_integer128! {
deserialize_type!(deserialize_i128 => visit_i128, $($mut)?);
deserialize_type!(deserialize_u128 => visit_u128, $($mut)?);
deserialize_num!(deserialize_i128 => visit_i128, $($mut)?);
deserialize_num!(deserialize_u128 => visit_u128, $($mut)?);
}

deserialize_type!(deserialize_f32 => visit_f32, $($mut)?);
deserialize_type!(deserialize_f64 => visit_f64, $($mut)?);
deserialize_num!(deserialize_f32 => visit_f32, $($mut)?);
deserialize_num!(deserialize_f64 => visit_f64, $($mut)?);

fn deserialize_bool<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
let text = self.read_string()?;

str2bool(&text, visitor)
let text = match self.read_string()? {
Cow::Borrowed(s) => CowRef::Input(s),
Cow::Owned(s) => CowRef::Owned(s),
};
text.deserialize_bool(visitor)
}

/// Character represented as [strings](#method.deserialize_str).
Expand Down Expand Up @@ -1998,8 +2006,9 @@ mod simple_type;
mod text;
mod var;

pub use self::resolver::{EntityResolver, PredefinedEntityResolver};
pub use self::simple_type::SimpleTypeDeserializer;
pub use crate::errors::serialize::DeError;
pub use resolver::{EntityResolver, PredefinedEntityResolver};

use crate::{
de::map::ElementMapAccess,
Expand All @@ -2008,8 +2017,11 @@ use crate::{
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
name::QName,
reader::Reader,
utils::CowRef,
};
use serde::de::{
self, Deserialize, DeserializeOwned, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor,
};
use serde::de::{self, Deserialize, DeserializeOwned, DeserializeSeed, SeqAccess, Visitor};
use std::borrow::Cow;
#[cfg(feature = "overlapped-lists")]
use std::collections::VecDeque;
Expand Down Expand Up @@ -2058,6 +2070,22 @@ impl<'a> From<&'a str> for Text<'a> {
}
}

impl<'a> From<String> for Text<'a> {
#[inline]
fn from(text: String) -> Self {
Self {
text: Cow::Owned(text),
}
}
}

impl<'a> From<Cow<'a, str>> for Text<'a> {
#[inline]
fn from(text: Cow<'a, str>) -> Self {
Self { text }
}
}

////////////////////////////////////////////////////////////////////////////////////////////////////

/// Simplified event which contains only these variants that used by deserializer
Expand Down Expand Up @@ -2287,7 +2315,7 @@ where
}

/// Deserialize from a reader. This method will do internal copies of data
/// readed from `reader`. If you want have a `&str` input and want to borrow
/// read from `reader`. If you want have a `&str` input and want to borrow
/// as much as possible, use [`from_str`].
pub fn from_reader<R, T>(reader: R) -> Result<T, DeError>
where
Expand All @@ -2298,49 +2326,6 @@ where
T::deserialize(&mut de)
}

// TODO: According to the https://www.w3.org/TR/xmlschema11-2/#boolean,
// valid boolean representations are only "true", "false", "1", and "0"
fn str2bool<'de, V>(value: &str, visitor: V) -> Result<V::Value, DeError>
where
V: de::Visitor<'de>,
{
match value {
"true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => {
visitor.visit_bool(true)
}
"false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => {
visitor.visit_bool(false)
}
_ => Err(DeError::InvalidBoolean(value.into())),
}
}

fn deserialize_bool<'de, V>(value: &[u8], decoder: Decoder, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
#[cfg(feature = "encoding")]
{
let value = decoder.decode(value)?;
// No need to unescape because valid boolean representations cannot be escaped
str2bool(value.as_ref(), visitor)
}

#[cfg(not(feature = "encoding"))]
{
// No need to unescape because valid boolean representations cannot be escaped
match value {
b"true" | b"1" | b"True" | b"TRUE" | b"t" | b"Yes" | b"YES" | b"yes" | b"y" => {
visitor.visit_bool(true)
}
b"false" | b"0" | b"False" | b"FALSE" | b"f" | b"No" | b"NO" | b"no" | b"n" => {
visitor.visit_bool(false)
}
e => Err(DeError::InvalidBoolean(decoder.decode(e)?.into())),
}
}
}

////////////////////////////////////////////////////////////////////////////////////////////////////

/// A structure that deserializes XML into Rust values.
Expand Down Expand Up @@ -3007,6 +2992,19 @@ where
}
}

impl<'de, 'a, R, E> IntoDeserializer<'de, DeError> for &'a mut Deserializer<'de, R, E>
where
R: XmlRead<'de>,
E: EntityResolver,
{
type Deserializer = Self;

#[inline]
fn into_deserializer(self) -> Self {
self
}
}

////////////////////////////////////////////////////////////////////////////////////////////////////

/// Helper struct that contains a state for an algorithm of converting events
Expand Down
Loading

0 comments on commit a6c5fc5

Please sign in to comment.