Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Properly use xsi:nil to deserialize null values via serde #842

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion src/de/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,16 @@ where
where
V: Visitor<'de>,
{
match self.map.de.peek()? {
let _ = self.map.de.peek()?;
match self.map.de.readable_peek().expect("This exists as we called peek before") {
DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
DeEvent::Start(start)
// if the `xsi:nil` attribute is set to true we got a none value
if start.has_nil_attr(&self.map.de.reader.reader) =>
{
self.map.de.skip_nil_tag()?;
visitor.visit_none()
}
_ => visitor.visit_some(self),
}
}
Expand Down
86 changes: 75 additions & 11 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2010,14 +2010,15 @@ pub use self::resolver::{EntityResolver, PredefinedEntityResolver};
pub use self::simple_type::SimpleTypeDeserializer;
pub use crate::errors::serialize::DeError;

use crate::name::{LocalName, Namespace, PrefixDeclaration, ResolveResult};
use crate::{
de::map::ElementMapAccess,
encoding::Decoder,
errors::Error,
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
name::QName,
reader::Reader,
utils::CowRef,
NsReader,
};
use serde::de::{
self, Deserialize, DeserializeOwned, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor,
Expand Down Expand Up @@ -2534,6 +2535,17 @@ where
}
}

fn readable_peek(&self) -> Option<&DeEvent<'de>> {
#[cfg(not(feature = "overlapped-lists"))]
{
self.peek.as_ref()
}
#[cfg(feature = "overlapped-lists")]
{
self.read.front()
}
}

fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
// Replay skipped or peeked events
#[cfg(feature = "overlapped-lists")]
Expand Down Expand Up @@ -2764,6 +2776,14 @@ where
}
self.reader.read_to_end(name)
}

fn skip_nil_tag(&mut self) -> Result<(), DeError> {
let DeEvent::Start(start) = self.next()? else {
unreachable!("Only call this if the next event is a start event")
};
let name = start.name();
self.read_to_end(name)
}
}

impl<'de> Deserializer<'de, SliceReader<'de>> {
Expand All @@ -2783,7 +2803,7 @@ where
/// Create new deserializer that will borrow data from the specified string
/// and use specified entity resolver.
pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
let mut reader = Reader::from_str(source);
let mut reader = NsReader::from_str(source);
let config = reader.config_mut();
config.expand_empty_elements = true;

Expand Down Expand Up @@ -2826,7 +2846,7 @@ where
/// will borrow instead of copy. If you have `&[u8]` which is known to represent
/// UTF-8, you can decode it first before using [`from_str`].
pub fn with_resolver(reader: R, entity_resolver: E) -> Self {
let mut reader = Reader::from_reader(reader);
let mut reader = NsReader::from_reader(reader);
let config = reader.config_mut();
config.expand_empty_elements = true;

Expand Down Expand Up @@ -2945,9 +2965,17 @@ where
where
V: Visitor<'de>,
{
match self.peek()? {
let _ = self.peek()?;
match self.readable_peek().expect("This exists as we called peek before") {
DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
DeEvent::Eof => visitor.visit_none(),
DeEvent::Start(start)
// if the `xsi:nil` attribute is set to true we got a none value
if start.has_nil_attr(&self.reader.reader) =>
{
self.skip_nil_tag()?;
visitor.visit_none()
}
_ => visitor.visit_some(self),
}
}
Expand Down Expand Up @@ -3071,14 +3099,22 @@ pub trait XmlRead<'i> {

/// A copy of the reader's decoder used to decode strings.
fn decoder(&self) -> Decoder;

/// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_.
///
/// See [`NsReader::resolve_attribute`] for details
fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>);

/// Get the current default namespace
fn default_namespace(&self) -> Option<Namespace<'_>>;
}

/// XML input source that reads from a std::io input stream.
///
/// You cannot create it, it is created automatically when you call
/// [`Deserializer::from_reader`]
pub struct IoReader<R: BufRead> {
reader: Reader<R>,
reader: NsReader<R>,
start_trimmer: StartTrimmer,
buf: Vec<u8>,
}
Expand Down Expand Up @@ -3113,7 +3149,7 @@ impl<R: BufRead> IoReader<R> {
/// assert_eq!(reader.error_position(), 28);
/// assert_eq!(reader.buffer_position(), 41);
/// ```
pub const fn get_ref(&self) -> &Reader<R> {
pub const fn get_ref(&self) -> &NsReader<R> {
&self.reader
}
}
Expand All @@ -3140,14 +3176,28 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
fn decoder(&self) -> Decoder {
self.reader.decoder()
}

fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
self.reader.resolve_attribute(name)
}

fn default_namespace(&self) -> Option<Namespace<'_>> {
self.reader.prefixes().find_map(|(key, value)| {
if PrefixDeclaration::Default == key {
Some(value)
} else {
None
}
})
}
}

/// XML input source that reads from a slice of bytes and can borrow from it.
///
/// You cannot create it, it is created automatically when you call
/// [`Deserializer::from_str`].
pub struct SliceReader<'de> {
reader: Reader<&'de [u8]>,
reader: NsReader<&'de [u8]>,
start_trimmer: StartTrimmer,
}

Expand Down Expand Up @@ -3180,7 +3230,7 @@ impl<'de> SliceReader<'de> {
/// assert_eq!(reader.error_position(), 28);
/// assert_eq!(reader.buffer_position(), 41);
/// ```
pub const fn get_ref(&self) -> &Reader<&'de [u8]> {
pub const fn get_ref(&self) -> &NsReader<&'de [u8]> {
&self.reader
}
}
Expand All @@ -3205,6 +3255,20 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
fn decoder(&self) -> Decoder {
self.reader.decoder()
}

fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
self.reader.resolve_attribute(name)
}

fn default_namespace(&self) -> Option<Namespace<'_>> {
self.reader.prefixes().find_map(|(key, value)| {
if PrefixDeclaration::Default == key {
Some(value)
} else {
None
}
})
}
}

#[cfg(test)]
Expand Down Expand Up @@ -3781,12 +3845,12 @@ mod tests {
"#;

let mut reader1 = IoReader {
reader: Reader::from_reader(s.as_bytes()),
reader: NsReader::from_reader(s.as_bytes()),
start_trimmer: StartTrimmer::default(),
buf: Vec::new(),
};
let mut reader2 = SliceReader {
reader: Reader::from_str(s),
reader: NsReader::from_str(s),
start_trimmer: StartTrimmer::default(),
};

Expand All @@ -3812,7 +3876,7 @@ mod tests {
"#;

let mut reader = SliceReader {
reader: Reader::from_str(s),
reader: NsReader::from_str(s),
start_trimmer: StartTrimmer::default(),
};

Expand Down
35 changes: 34 additions & 1 deletion src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,17 @@ use crate::errors::{Error, IllFormedError};
use crate::escape::{
escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with,
};
#[cfg(feature = "serialize")]
use crate::name::Namespace;
use crate::name::{LocalName, QName};
#[cfg(feature = "serialize")]
use crate::utils::CowRef;
use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes};
use attributes::{AttrError, Attribute, Attributes};

#[cfg(feature = "serialize")]
const XSI_NAMESPACE_URL: Namespace = Namespace(b"http://www.w3.org/2001/XMLSchema-instance");

/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
///
/// The name can be accessed using the [`name`] or [`local_name`] methods.
Expand Down Expand Up @@ -232,6 +237,34 @@ impl<'a> BytesStart<'a> {
Cow::Owned(ref o) => CowRef::Slice(&o[..self.name_len]),
}
}

/// This method checks if the current tag has a `xsi::nil` attribute
///
/// This attribute should be used for deciding if the value is not set
/// according to https://www.w3.org/TR/xmlschema-1/#xsi_nil
#[cfg(feature = "serialize")]
pub(crate) fn has_nil_attr(&self, reader: &dyn crate::de::XmlRead) -> bool {
use crate::name::ResolveResult;

let default_ns = reader.default_namespace();
self.attributes().any(|attr| {
if let Ok(attr) = attr {
let value_is_true = &*attr.value == b"true" || &*attr.value == b"1";
let might_be_nil_attr = attr.key.0.ends_with(b"nil");
if value_is_true && might_be_nil_attr {
let (res, local_name) = reader.resolve_attribute(attr.key);
(matches!(res, ResolveResult::Bound(XSI_NAMESPACE_URL))
|| (matches!(res, ResolveResult::Unbound)
&& default_ns == Some(XSI_NAMESPACE_URL)))
&& local_name.as_ref() == b"nil"
} else {
false
}
} else {
false
}
})
}
}

/// Attribute-related methods
Expand Down Expand Up @@ -278,7 +311,7 @@ impl<'a> BytesStart<'a> {
}

/// Returns an iterator over the attributes of this tag.
pub fn attributes(&self) -> Attributes {
pub fn attributes<'b>(&'b self) -> Attributes<'b> {
Attributes::wrap(&self.buf, self.name_len, false)
}

Expand Down
Loading