diff --git a/Changelog.md b/Changelog.md index 5f0ddd0d..b0ffa638 100644 --- a/Changelog.md +++ b/Changelog.md @@ -33,6 +33,10 @@ XML specification. See the updated `custom_entities` example! ### Bug Fixes +- [#868]: Allow to have both `$text` and `$value` special fields in one struct. Previously + any text will be recognized as `$value` field even when `$text` field is also presented. +- [#868]: Skip text events when deserialize a sequence of items overlapped with text (including CDATA). + ### Misc Changes - [#863]: Remove `From> for BytesStart<'a>` because now `BytesStart` stores the @@ -42,6 +46,7 @@ XML specification. See the updated `custom_entities` example! [#766]: https://github.com/tafia/quick-xml/pull/766 [#863]: https://github.com/tafia/quick-xml/pull/863 +[#868]: https://github.com/tafia/quick-xml/pull/868 [general entity]: https://www.w3.org/TR/xml11/#gen-entity diff --git a/src/de/map.rs b/src/de/map.rs index c5540459..224d5bd1 100644 --- a/src/de/map.rs +++ b/src/de/map.rs @@ -192,6 +192,9 @@ where /// value for VALUE_KEY field /// ``` has_value_field: bool, + /// If `true`, then the deserialized struct has a field with a special name: + /// [`TEXT_KEY`]. + has_text_field: bool, } impl<'de, 'd, R, E> ElementMapAccess<'de, 'd, R, E> @@ -212,6 +215,7 @@ where source: ValueSource::Unknown, fields, has_value_field: fields.contains(&VALUE_KEY), + has_text_field: fields.contains(&TEXT_KEY), }) } @@ -264,10 +268,8 @@ where } else { // try getting from events (value) match self.de.peek()? { - // We shouldn't have both `$value` and `$text` fields in the same - // struct, so if we have `$value` field, the we should deserialize - // text content to `$value` - DeEvent::Text(_) if self.has_value_field => { + // If we have dedicated "$text" field, it will not be passed to "$value" field + DeEvent::Text(_) if self.has_value_field && !self.has_text_field => { self.source = ValueSource::Content; // Deserialize `key` from special attribute name which means // that value should be taken from the text content of the @@ -609,7 +611,7 @@ where _ => unreachable!(), } } else { - TagFilter::Exclude(self.map.fields) + TagFilter::Exclude(self.map.fields, self.map.has_text_field) }; visitor.visit_seq(MapValueSeqAccess { #[cfg(feature = "overlapped-lists")] @@ -850,15 +852,27 @@ enum TagFilter<'de> { Include(BytesStart<'de>), //TODO: Need to store only name instead of a whole tag /// A `SeqAccess` interested in tags with any name, except explicitly listed. /// Excluded tags are used as struct field names and therefore should not - /// fall into a `$value` category - Exclude(&'static [&'static str]), + /// fall into a `$value` category. + /// + /// The `bool` represents the having of a `$text` special field in fields array. + /// It is used to exclude text events when `$text` fields is defined together with + /// `$value` fieldб and `$value` accepts sequence. + Exclude(&'static [&'static str], bool), } impl<'de> TagFilter<'de> { fn is_suitable(&self, start: &BytesStart) -> Result { match self { Self::Include(n) => Ok(n.name() == start.name()), - Self::Exclude(fields) => not_in(fields, start), + Self::Exclude(fields, _) => not_in(fields, start), + } + } + const fn need_skip_text(&self) -> bool { + match self { + // If we look only for tags, we should skip any $text keys + Self::Include(_) => true, + // If we look fo any data, we should exclude $text keys if it in the list + Self::Exclude(_, has_text_field) => *has_text_field, } } } @@ -942,9 +956,17 @@ where self.map.de.skip()?; continue; } + // Skip any text events if sequence expects only specific tag names + #[cfg(feature = "overlapped-lists")] + DeEvent::Text(_) if self.filter.need_skip_text() => { + self.map.de.skip()?; + continue; + } // Stop iteration when list elements ends #[cfg(not(feature = "overlapped-lists"))] DeEvent::Start(e) if !self.filter.is_suitable(e)? => Ok(None), + #[cfg(not(feature = "overlapped-lists"))] + DeEvent::Text(_) if self.filter.need_skip_text() => Ok(None), // Stop iteration after reaching a closing tag // The matching tag name is guaranteed by the reader diff --git a/src/de/mod.rs b/src/de/mod.rs index 2b7d0f3c..3c04ba14 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -1506,6 +1506,28 @@ //! The only difference is in how complex types and sequences are serialized. //! If you doubt which one you should select, begin with [`$value`](#value). //! +//! If you have both `$text` and `$value` in you struct, then text events will be +//! mapped to the `$text` field: +//! +//! ``` +//! # use serde::Deserialize; +//! # use quick_xml::de::from_str; +//! #[derive(Deserialize, PartialEq, Debug)] +//! struct TextAndValue { +//! #[serde(rename = "$text")] +//! text: Option, +//! +//! #[serde(rename = "$value")] +//! value: Option, +//! } +//! +//! let object: TextAndValue = from_str("text ").unwrap(); +//! assert_eq!(object, TextAndValue { +//! text: Some("text and CDATA".to_string()), +//! value: None, +//! }); +//! ``` +//! //! ## `$text` //! `$text` is used when you want to write your XML as a text or a CDATA content. //! More formally, field with that name represents simple type definition with @@ -1744,12 +1766,6 @@ //! assert_eq!(object, obj); //! ``` //! -//! ---------------------------------------------------------------------------- -//! -//! You can have either `$text` or `$value` field in your structs. Unfortunately, -//! that is not enforced, so you can theoretically have both, but you should -//! avoid that. -//! //! //! //! Frequently Used Patterns diff --git a/tests/serde-de-seq.rs b/tests/serde-de-seq.rs index a1aaa6dc..d8287ba6 100644 --- a/tests/serde-de-seq.rs +++ b/tests/serde-de-seq.rs @@ -821,6 +821,28 @@ mod fixed_name { ); } + #[test] + fn text_and_value() { + #[derive(Debug, PartialEq, Deserialize)] + struct List { + #[serde(rename = "$text")] + text: (), + item: [(); 2], + } + + from_str::( + r#" + + + + text + + + "#, + ) + .unwrap(); + } + /// Checks that sequences represented by elements can contain sequences, /// represented by [`xs:list`s](https://www.w3schools.com/xml/el_list.asp) mod xs_list { @@ -1656,6 +1678,36 @@ mod fixed_name { ); } + #[test] + fn text_and_value() { + #[derive(Debug, PartialEq, Deserialize)] + struct List { + #[serde(rename = "$text")] + text: (), + item: Vec<()>, + } + + let data: List = from_str( + r#" + + + + text + + + "#, + ) + .unwrap(); + + assert_eq!( + data, + List { + text: (), + item: vec![(); 2], + } + ); + } + /// Checks that sequences represented by elements can contain sequences, /// represented by `xs:list`s mod xs_list { @@ -2883,6 +2935,29 @@ mod variable_name { ); } + #[test] + fn text_and_value() { + #[derive(Debug, PartialEq, Deserialize)] + struct List { + #[serde(rename = "$text")] + text: (), + #[serde(rename = "$value")] + value: [(); 2], + } + + from_str::( + r#" + + + + text + + + "#, + ) + .unwrap(); + } + /// Checks that sequences represented by elements can contain sequences, /// represented by `xs:list`s mod xs_list { @@ -3929,6 +4004,37 @@ mod variable_name { .unwrap_err(); } + #[test] + fn text_and_value() { + #[derive(Debug, PartialEq, Deserialize)] + struct List { + #[serde(rename = "$text")] + text: (), + #[serde(rename = "$value")] + value: Vec<()>, + } + + let data: List = from_str( + r#" + + + + text + + + "#, + ) + .unwrap(); + + assert_eq!( + data, + List { + text: (), + value: vec![(); 2], + } + ); + } + /// Checks that sequences represented by elements can contain sequences, /// represented by `xs:list`s mod xs_list { diff --git a/tests/serde-issues.rs b/tests/serde-issues.rs index 7826669d..f9d4ee1e 100644 --- a/tests/serde-issues.rs +++ b/tests/serde-issues.rs @@ -570,3 +570,86 @@ fn issue683() { } ); } + +/// Regression test for https://github.com/tafia/quick-xml/issues/868. +#[test] +fn issue868() { + #[derive(Debug, PartialEq, Deserialize)] + #[serde(rename = "root")] + pub struct Root { + #[serde(rename = "key")] + pub key: Key, + } + + #[derive(Debug, PartialEq, Deserialize)] + #[serde(rename = "key")] + pub struct Key { + #[serde(rename = "value")] + pub values: Option>, + } + + #[derive(Debug, PartialEq, Deserialize)] + #[serde(rename = "Value")] + pub struct Value { + #[serde(rename = "@id")] + pub id: String, + + #[serde(rename = "$text")] + pub text: Option, + } + let xml = r#" + + + + text1 + text2 + text3 + text4d + text5 + text6 + + "#; + let data = quick_xml::de::from_str::(xml); + #[cfg(feature = "overlapped-lists")] + assert_eq!( + data.unwrap(), + Root { + key: Key { + values: Some(vec![ + Value { + id: "1".to_string(), + text: Some("text1".to_string()) + }, + Value { + id: "2".to_string(), + text: Some("text2".to_string()) + }, + Value { + id: "3".to_string(), + text: Some("text3".to_string()) + }, + Value { + id: "4".to_string(), + text: Some("text4".to_string()) + }, + Value { + id: "5".to_string(), + text: Some("text5".to_string()) + }, + Value { + id: "6".to_string(), + text: Some("text6".to_string()) + }, + ]), + }, + } + ); + #[cfg(not(feature = "overlapped-lists"))] + match data { + Err(quick_xml::DeError::Custom(e)) => assert_eq!(e, "duplicate field `value`"), + e => panic!( + r#"Expected `Err(Custom("duplicate field `value`"))`, but got `{:?}`"#, + e + ), + } +}