From 4ac34a08a7627d2b72624c30a47a091aaebbea51 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Tue, 6 Jun 2023 19:26:25 -0700 Subject: [PATCH 1/4] Re-use UnvalidatedStr impls in NormalizedPropertyNameStr --- components/properties/src/provider/names.rs | 109 ++++++++++++-------- 1 file changed, 67 insertions(+), 42 deletions(-) diff --git a/components/properties/src/provider/names.rs b/components/properties/src/provider/names.rs index 0ab6563fcf2..ab2324616ea 100644 --- a/components/properties/src/provider/names.rs +++ b/components/properties/src/provider/names.rs @@ -14,14 +14,14 @@ use alloc::boxed::Box; use core::cmp::Ordering; -use core::fmt; use core::str; use icu_provider::prelude::*; use tinystr::TinyStr4; -use zerovec::{maps::ZeroMapKV, ule::VarULE, VarZeroSlice, VarZeroVec, ZeroMap, ZeroVec}; +use zerovec::ule::{UnvalidatedStr, VarULE}; +use zerovec::{maps::ZeroMapKV, VarZeroSlice, VarZeroVec, ZeroMap, ZeroVec}; /// This is a property name that can be "loose matched" as according to /// [PropertyValueAliases.txt](https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt) @@ -39,48 +39,72 @@ use zerovec::{maps::ZeroMapKV, ule::VarULE, VarZeroSlice, VarZeroVec, ZeroMap, Z /// including in SemVer minor releases. While the serde representation of data structs is guaranteed /// to be stable, their Rust representation might not be. Use with caution. /// +/// +/// # Examples +/// +/// Using a [`NormalizedPropertyNameStr`] as the key of a [`ZeroMap`]: +/// +/// ``` +/// use icu_properties::provider::names::NormalizedPropertyNameStr; +/// use zerovec::ZeroMap; +/// +/// let map: ZeroMap = [ +/// (NormalizedPropertyNameStr::from_str("A_BC"), 11), +/// (NormalizedPropertyNameStr::from_str("dEf"), 22), +/// (NormalizedPropertyNameStr::from_str("G_H-I"), 33), +/// ] +/// .into_iter() +/// .collect(); +/// +/// let key_approx = NormalizedPropertyNameStr::from_str("AB-C"); +/// let key_exact = NormalizedPropertyNameStr::from_str("A_BC"); +/// +/// // Strict lookup: +/// assert_eq!(None, map.get_copied(key_approx)); +/// assert_eq!(Some(11), map.get_copied(key_exact)); +/// +/// // Loose lookup: +/// assert_eq!(Some(11), map.get_copied_by(|u| u.cmp_loose(key_approx))); +/// assert_eq!(Some(11), map.get_copied_by(|u| u.cmp_loose(key_exact))); +/// ``` #[derive(PartialEq, Eq)] // VarULE wants these to be byte equality -#[derive(VarULE)] +#[derive(Debug, VarULE)] #[repr(transparent)] -pub struct NormalizedPropertyNameStr([u8]); +pub struct NormalizedPropertyNameStr(UnvalidatedStr); -#[cfg(feature = "datagen")] +/// This impl requires enabling the optional `serde` Cargo feature of the `icu_properties` crate +#[cfg(feature = "serde")] impl serde::Serialize for NormalizedPropertyNameStr { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { - use serde::ser::Error; - if serializer.is_human_readable() { - let s = str::from_utf8(&self.0) - .map_err(|_| S::Error::custom("Attempted to datagen invalid string property"))?; - serializer.serialize_str(s) - } else { - serializer.serialize_bytes(&self.0) - } + self.0.serialize(serializer) } } +/// This impl requires enabling the optional `serde` Cargo feature of the `icu_properties` crate #[cfg(feature = "serde")] impl<'de> serde::Deserialize<'de> for Box { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { - use alloc::borrow::Cow; - let s; // lifetime lengthening - let b; - // Can be improved with https://github.com/unicode-org/icu4x/issues/2310 - // the allocations here are fine, in normal ICU4X code they'll only get hit - // during human-readable deserialization - let bytes = if deserializer.is_human_readable() { - s = >::deserialize(deserializer)?; - s.as_bytes() - } else { - b = >::deserialize(deserializer)?; - &b - }; - Ok(NormalizedPropertyNameStr::boxed_from_bytes(bytes)) + >::deserialize(deserializer).map(From::from) + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `icu_properties` crate +#[cfg(feature = "serde")] +impl<'de, 'a> serde::Deserialize<'de> for &'a NormalizedPropertyNameStr +where + 'de: 'a, +{ + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + <&UnvalidatedStr>::deserialize(deserializer).map(From::from) } } @@ -134,30 +158,31 @@ impl Ord for NormalizedPropertyNameStr { } } -impl fmt::Debug for NormalizedPropertyNameStr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if let Ok(s) = str::from_utf8(&self.0) { - f.write_str(s) - } else { - f.write_str("(invalid utf8)") - } +impl<'a> From<&'a UnvalidatedStr> for &'a NormalizedPropertyNameStr { + fn from(value: &'a UnvalidatedStr) -> Self { + // Safety: repr(transparent) + unsafe { core::mem::transmute(value) } + } +} + +impl From> for Box { + fn from(value: Box) -> Self { + // Safety: repr(transparent) + unsafe { core::mem::transmute(value) } } } impl NormalizedPropertyNameStr { - pub(crate) fn cmp_loose(&self, other: &Self) -> Ordering { + /// Perform the loose comparison as defined in [`NormalizedPropertyNameStr`]. + pub fn cmp_loose(&self, other: &Self) -> Ordering { let self_iter = self.0.iter().copied().filter_map(normalize_char); let other_iter = other.0.iter().copied().filter_map(normalize_char); self_iter.cmp(other_iter) } - #[cfg(feature = "serde")] - /// Get a `Box` from a byte slice - pub fn boxed_from_bytes(b: &[u8]) -> Box { - #[allow(clippy::expect_used)] // Self has no invariants - // can be cleaned up with https://github.com/unicode-org/icu4x/issues/2310 - let this = Self::parse_byte_slice(b).expect("NormalizedPropertyName has no invariants"); - zerovec::ule::encode_varule_to_box(&this) + /// Convert a string to a [`NormalizedPropertyNameStr`]. + pub fn from_str(s: &str) -> &Self { + UnvalidatedStr::from_str(s).into() } } From 2d046619ad0d5aefef345d055952864ef97e78ca Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Tue, 6 Jun 2023 19:28:17 -0700 Subject: [PATCH 2/4] Add back boxed_from_bytes --- components/properties/src/provider/names.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/components/properties/src/provider/names.rs b/components/properties/src/provider/names.rs index ab2324616ea..7ae676f4324 100644 --- a/components/properties/src/provider/names.rs +++ b/components/properties/src/provider/names.rs @@ -184,6 +184,15 @@ impl NormalizedPropertyNameStr { pub fn from_str(s: &str) -> &Self { UnvalidatedStr::from_str(s).into() } + + /// Get a `Box` from a byte slice + pub fn boxed_from_bytes(b: &[u8]) -> Box { + #[allow(clippy::expect_used)] // Self has no invariants + // can be cleaned up with https://github.com/unicode-org/icu4x/issues/2310 + let this = Self::parse_byte_slice(b).expect("NormalizedPropertyName has no invariants"); + + zerovec::ule::encode_varule_to_box(&this) + } } /// A set of characters and strings which share a particular property value. From 83b9ea72b6c230473fe714f3a956228d259dc9ad Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Tue, 6 Jun 2023 22:02:10 -0700 Subject: [PATCH 3/4] Cleanup --- components/properties/src/provider/names.rs | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/components/properties/src/provider/names.rs b/components/properties/src/provider/names.rs index 7ae676f4324..895ddf002dd 100644 --- a/components/properties/src/provider/names.rs +++ b/components/properties/src/provider/names.rs @@ -69,20 +69,10 @@ use zerovec::{maps::ZeroMapKV, VarZeroSlice, VarZeroVec, ZeroMap, ZeroVec}; /// ``` #[derive(PartialEq, Eq)] // VarULE wants these to be byte equality #[derive(Debug, VarULE)] +#[cfg_attr(feature = "serde", derive(serde::Serialize))] #[repr(transparent)] pub struct NormalizedPropertyNameStr(UnvalidatedStr); -/// This impl requires enabling the optional `serde` Cargo feature of the `icu_properties` crate -#[cfg(feature = "serde")] -impl serde::Serialize for NormalizedPropertyNameStr { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - self.0.serialize(serializer) - } -} - /// This impl requires enabling the optional `serde` Cargo feature of the `icu_properties` crate #[cfg(feature = "serde")] impl<'de> serde::Deserialize<'de> for Box { @@ -187,11 +177,7 @@ impl NormalizedPropertyNameStr { /// Get a `Box` from a byte slice pub fn boxed_from_bytes(b: &[u8]) -> Box { - #[allow(clippy::expect_used)] // Self has no invariants - // can be cleaned up with https://github.com/unicode-org/icu4x/issues/2310 - let this = Self::parse_byte_slice(b).expect("NormalizedPropertyName has no invariants"); - - zerovec::ule::encode_varule_to_box(&this) + Box::::from(UnvalidatedStr::from_boxed_bytes(b.into())) } } From ad886cc41120b8f1869fa4e41b63c51b1df3f93b Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Tue, 6 Jun 2023 22:08:51 -0700 Subject: [PATCH 4/4] Const functions --- components/properties/src/provider/names.rs | 40 ++++++++++----------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/components/properties/src/provider/names.rs b/components/properties/src/provider/names.rs index 895ddf002dd..baf5fc1f289 100644 --- a/components/properties/src/provider/names.rs +++ b/components/properties/src/provider/names.rs @@ -80,7 +80,7 @@ impl<'de> serde::Deserialize<'de> for Box { where D: serde::Deserializer<'de>, { - >::deserialize(deserializer).map(From::from) + >::deserialize(deserializer).map(NormalizedPropertyNameStr::cast_box) } } @@ -94,7 +94,7 @@ where where D: serde::Deserializer<'de>, { - <&UnvalidatedStr>::deserialize(deserializer).map(From::from) + <&UnvalidatedStr>::deserialize(deserializer).map(NormalizedPropertyNameStr::cast_ref) } } @@ -148,20 +148,6 @@ impl Ord for NormalizedPropertyNameStr { } } -impl<'a> From<&'a UnvalidatedStr> for &'a NormalizedPropertyNameStr { - fn from(value: &'a UnvalidatedStr) -> Self { - // Safety: repr(transparent) - unsafe { core::mem::transmute(value) } - } -} - -impl From> for Box { - fn from(value: Box) -> Self { - // Safety: repr(transparent) - unsafe { core::mem::transmute(value) } - } -} - impl NormalizedPropertyNameStr { /// Perform the loose comparison as defined in [`NormalizedPropertyNameStr`]. pub fn cmp_loose(&self, other: &Self) -> Ordering { @@ -170,14 +156,26 @@ impl NormalizedPropertyNameStr { self_iter.cmp(other_iter) } - /// Convert a string to a [`NormalizedPropertyNameStr`]. - pub fn from_str(s: &str) -> &Self { - UnvalidatedStr::from_str(s).into() + /// Convert a string reference to a [`NormalizedPropertyNameStr`]. + pub const fn from_str(s: &str) -> &Self { + Self::cast_ref(UnvalidatedStr::from_str(s)) + } + + /// Convert a [`UnvalidatedStr`] reference to a [`NormalizedPropertyNameStr`] reference. + pub const fn cast_ref(value: &UnvalidatedStr) -> &Self { + // Safety: repr(transparent) + unsafe { core::mem::transmute(value) } + } + + /// Convert a [`UnvalidatedStr`] box to a [`NormalizedPropertyNameStr`] box. + pub const fn cast_box(value: Box) -> Box { + // Safety: repr(transparent) + unsafe { core::mem::transmute(value) } } - /// Get a `Box` from a byte slice + /// Get a [`NormalizedPropertyName`] box from a byte slice. pub fn boxed_from_bytes(b: &[u8]) -> Box { - Box::::from(UnvalidatedStr::from_boxed_bytes(b.into())) + Self::cast_box(UnvalidatedStr::from_boxed_bytes(b.into())) } }