Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix boolean parsing and make SimpleTypeSerializer and SimpleTypeDeserializer public #826

Merged
merged 8 commits into from
Oct 20, 2024
16 changes: 16 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,21 @@

### New Features

- [#826]: Implement `From<String>` and `From<Cow<str>>` for `quick_xml::de::Text`.
- [#826]: Make `SimpleTypeDeserializer` and `SimpleTypeSerializer` public.
- [#826]: Implement `IntoDeserializer` for `&mut Deserializer`.

### Bug Fixes

- [#655]: Do not write indent before and after `$text` fields and those `$value` fields
that are serialized as a text (for example, `usize` or `String`).
- [#826]: Handle only those boolean representations that are allowed by [Xml Schema]
which is only `"true"`, `"1"`, `"false"`, and `"0"`. Previously the following values
also was accepted:
|`bool` |XML content
|-------|-------------------------------------------------------------
|`true` |`"True"`, `"TRUE"`, `"t"`, `"Yes"`, `"YES"`, `"yes"`, `"y"`
|`false`|`"False"`, `"FALSE"`, `"f"`, `"No"`, `"NO"`, `"no"`, `"n"`

### Misc Changes

Expand All @@ -34,14 +45,19 @@
`Vec<String>` in `$value` fields. They cannot be deserialized back with the same result
- [#827]: Make `escape` and it variants take a `impl Into<Cow<str>>` argument and implement
`From<(&'a str, Cow<'a, str>)>` on `Attribute`
- [#826]: Removed `DeError::InvalidInt`, `DeError::InvalidFloat` and `DeError::InvalidBoolean`.
Now the responsibility for returning the error lies with the visitor of the type.
See rationale in https://github.com/serde-rs/serde/pull/2811

[#227]: https://github.com/tafia/quick-xml/issues/227
[#655]: https://github.com/tafia/quick-xml/issues/655
[#810]: https://github.com/tafia/quick-xml/pull/810
[#811]: https://github.com/tafia/quick-xml/pull/811
[#820]: https://github.com/tafia/quick-xml/pull/820
[#823]: https://github.com/tafia/quick-xml/pull/823
[#826]: https://github.com/tafia/quick-xml/pull/826
[#827]: https://github.com/tafia/quick-xml/pull/827
[Xml Schema]: https://www.w3.org/TR/xmlschema11-2/#boolean


## 0.36.2 -- 2024-09-20
Expand Down
15 changes: 6 additions & 9 deletions src/de/key.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use crate::de::simple_type::UnitOnly;
use crate::de::str2bool;
use crate::encoding::Decoder;
use crate::errors::serialize::DeError;
use crate::name::QName;
Expand All @@ -14,7 +13,10 @@ macro_rules! deserialize_num {
where
V: Visitor<'de>,
{
visitor.$visit(self.name.parse()?)
match self.name.parse() {
Ok(number) => visitor.$visit(number),
Err(_) => self.name.deserialize_str(visitor),
}
}
};
}
Expand Down Expand Up @@ -134,17 +136,12 @@ impl<'de, 'd> Deserializer<'de> for QNameDeserializer<'de, 'd> {

/// According to the <https://www.w3.org/TR/xmlschema11-2/#boolean>,
/// valid boolean representations are only `"true"`, `"false"`, `"1"`,
/// and `"0"`. But this method also handles following:
///
/// |`bool` |XML content
/// |-------|-------------------------------------------------------------
/// |`true` |`"True"`, `"TRUE"`, `"t"`, `"Yes"`, `"YES"`, `"yes"`, `"y"`
/// |`false`|`"False"`, `"FALSE"`, `"f"`, `"No"`, `"NO"`, `"no"`, `"n"`
/// and `"0"`.
fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
str2bool(self.name.as_ref(), visitor)
self.name.deserialize_bool(visitor)
}

deserialize_num!(deserialize_i8, visit_i8);
Expand Down
3 changes: 2 additions & 1 deletion src/de/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ use crate::{
de::resolver::EntityResolver,
de::simple_type::SimpleTypeDeserializer,
de::text::TextDeserializer,
de::{str2bool, DeEvent, Deserializer, XmlRead, TEXT_KEY, VALUE_KEY},
de::{DeEvent, Deserializer, XmlRead, TEXT_KEY, VALUE_KEY},
encoding::Decoder,
errors::serialize::DeError,
errors::Error,
events::attributes::IterState,
events::BytesStart,
name::QName,
utils::CowRef,
};
use serde::de::value::BorrowedStrDeserializer;
use serde::de::{self, DeserializeSeed, Deserializer as _, MapAccess, SeqAccess, Visitor};
Expand Down
124 changes: 61 additions & 63 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1834,15 +1834,21 @@
// Also, macros should be imported before using them
use serde::serde_if_integer128;

macro_rules! deserialize_type {
macro_rules! deserialize_num {
($deserialize:ident => $visit:ident, $($mut:tt)?) => {
fn $deserialize<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
// No need to unescape because valid integer representations cannot be escaped
let text = self.read_string()?;
visitor.$visit(text.parse()?)
match text.parse() {
Ok(number) => visitor.$visit(number),
Err(_) => match text {
Cow::Borrowed(t) => visitor.visit_str(t),
Cow::Owned(t) => visitor.visit_string(t),
}
}
}
};
}
Expand All @@ -1851,31 +1857,33 @@ macro_rules! deserialize_type {
/// byte arrays, booleans and identifiers.
macro_rules! deserialize_primitives {
($($mut:tt)?) => {
deserialize_type!(deserialize_i8 => visit_i8, $($mut)?);
deserialize_type!(deserialize_i16 => visit_i16, $($mut)?);
deserialize_type!(deserialize_i32 => visit_i32, $($mut)?);
deserialize_type!(deserialize_i64 => visit_i64, $($mut)?);
deserialize_num!(deserialize_i8 => visit_i8, $($mut)?);
deserialize_num!(deserialize_i16 => visit_i16, $($mut)?);
deserialize_num!(deserialize_i32 => visit_i32, $($mut)?);
deserialize_num!(deserialize_i64 => visit_i64, $($mut)?);

deserialize_type!(deserialize_u8 => visit_u8, $($mut)?);
deserialize_type!(deserialize_u16 => visit_u16, $($mut)?);
deserialize_type!(deserialize_u32 => visit_u32, $($mut)?);
deserialize_type!(deserialize_u64 => visit_u64, $($mut)?);
deserialize_num!(deserialize_u8 => visit_u8, $($mut)?);
deserialize_num!(deserialize_u16 => visit_u16, $($mut)?);
deserialize_num!(deserialize_u32 => visit_u32, $($mut)?);
deserialize_num!(deserialize_u64 => visit_u64, $($mut)?);

serde_if_integer128! {
deserialize_type!(deserialize_i128 => visit_i128, $($mut)?);
deserialize_type!(deserialize_u128 => visit_u128, $($mut)?);
deserialize_num!(deserialize_i128 => visit_i128, $($mut)?);
deserialize_num!(deserialize_u128 => visit_u128, $($mut)?);
}

deserialize_type!(deserialize_f32 => visit_f32, $($mut)?);
deserialize_type!(deserialize_f64 => visit_f64, $($mut)?);
deserialize_num!(deserialize_f32 => visit_f32, $($mut)?);
deserialize_num!(deserialize_f64 => visit_f64, $($mut)?);

fn deserialize_bool<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
let text = self.read_string()?;

str2bool(&text, visitor)
let text = match self.read_string()? {
Cow::Borrowed(s) => CowRef::Input(s),
Cow::Owned(s) => CowRef::Owned(s),
};
text.deserialize_bool(visitor)
}

/// Character represented as [strings](#method.deserialize_str).
Expand Down Expand Up @@ -1998,8 +2006,9 @@ mod simple_type;
mod text;
mod var;

pub use self::resolver::{EntityResolver, PredefinedEntityResolver};
pub use self::simple_type::SimpleTypeDeserializer;
pub use crate::errors::serialize::DeError;
pub use resolver::{EntityResolver, PredefinedEntityResolver};

use crate::{
de::map::ElementMapAccess,
Expand All @@ -2008,8 +2017,11 @@ use crate::{
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
name::QName,
reader::Reader,
utils::CowRef,
};
use serde::de::{
self, Deserialize, DeserializeOwned, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor,
};
use serde::de::{self, Deserialize, DeserializeOwned, DeserializeSeed, SeqAccess, Visitor};
use std::borrow::Cow;
#[cfg(feature = "overlapped-lists")]
use std::collections::VecDeque;
Expand Down Expand Up @@ -2058,6 +2070,22 @@ impl<'a> From<&'a str> for Text<'a> {
}
}

impl<'a> From<String> for Text<'a> {
#[inline]
fn from(text: String) -> Self {
Self {
text: Cow::Owned(text),
}
}
}

impl<'a> From<Cow<'a, str>> for Text<'a> {
#[inline]
fn from(text: Cow<'a, str>) -> Self {
Self { text }
}
}

////////////////////////////////////////////////////////////////////////////////////////////////////

/// Simplified event which contains only these variants that used by deserializer
Expand Down Expand Up @@ -2287,7 +2315,7 @@ where
}

/// Deserialize from a reader. This method will do internal copies of data
/// readed from `reader`. If you want have a `&str` input and want to borrow
/// read from `reader`. If you want have a `&str` input and want to borrow
/// as much as possible, use [`from_str`].
pub fn from_reader<R, T>(reader: R) -> Result<T, DeError>
where
Expand All @@ -2298,49 +2326,6 @@ where
T::deserialize(&mut de)
}

// TODO: According to the https://www.w3.org/TR/xmlschema11-2/#boolean,
// valid boolean representations are only "true", "false", "1", and "0"
fn str2bool<'de, V>(value: &str, visitor: V) -> Result<V::Value, DeError>
where
V: de::Visitor<'de>,
{
match value {
"true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => {
visitor.visit_bool(true)
}
"false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => {
visitor.visit_bool(false)
}
_ => Err(DeError::InvalidBoolean(value.into())),
}
}

fn deserialize_bool<'de, V>(value: &[u8], decoder: Decoder, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
#[cfg(feature = "encoding")]
{
let value = decoder.decode(value)?;
// No need to unescape because valid boolean representations cannot be escaped
str2bool(value.as_ref(), visitor)
}

#[cfg(not(feature = "encoding"))]
{
// No need to unescape because valid boolean representations cannot be escaped
match value {
b"true" | b"1" | b"True" | b"TRUE" | b"t" | b"Yes" | b"YES" | b"yes" | b"y" => {
visitor.visit_bool(true)
}
b"false" | b"0" | b"False" | b"FALSE" | b"f" | b"No" | b"NO" | b"no" | b"n" => {
visitor.visit_bool(false)
}
e => Err(DeError::InvalidBoolean(decoder.decode(e)?.into())),
}
}
}

////////////////////////////////////////////////////////////////////////////////////////////////////

/// A structure that deserializes XML into Rust values.
Expand Down Expand Up @@ -3007,6 +2992,19 @@ where
}
}

impl<'de, 'a, R, E> IntoDeserializer<'de, DeError> for &'a mut Deserializer<'de, R, E>
where
R: XmlRead<'de>,
E: EntityResolver,
{
type Deserializer = Self;

#[inline]
fn into_deserializer(self) -> Self {
self
}
}

////////////////////////////////////////////////////////////////////////////////////////////////////

/// Helper struct that contains a state for an algorithm of converting events
Expand Down
Loading