Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use zeroslice! macros in ICU4X #3455

Merged
merged 11 commits into from
Jun 2, 2023
14 changes: 5 additions & 9 deletions components/collator/src/elements.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use icu_properties::CanonicalCombiningClass;
use smallvec::SmallVec;
use zerovec::ule::AsULE;
use zerovec::ule::RawBytesULE;
use zerovec::ZeroSlice;
use zerovec::{zeroslice, ZeroSlice};

use crate::provider::CollationDataV1;

Expand Down Expand Up @@ -147,17 +147,13 @@ pub(crate) const FFFD_CE: CollationElement = CollationElement(FFFD_CE_VALUE);
pub(crate) const FFFD_CE32_VALUE: u32 = 0xFFFD0505;
pub(crate) const FFFD_CE32: CollationElement32 = CollationElement32(FFFD_CE32_VALUE);

pub(crate) const EMPTY_U16: &ZeroSlice<u16> =
ZeroSlice::<u16>::from_ule_slice(&<u16 as AsULE>::ULE::from_array([]));
pub(crate) const EMPTY_U16: &ZeroSlice<u16> = zeroslice![];
const SINGLE_REPLACEMENT_CHARACTER_U16: &ZeroSlice<u16> =
ZeroSlice::<u16>::from_ule_slice(&<u16 as AsULE>::ULE::from_array([
REPLACEMENT_CHARACTER as u16
]));

pub(crate) const EMPTY_CHAR: &ZeroSlice<char> = ZeroSlice::new_empty();
zeroslice![u16; <u16 as AsULE>::ULE::from_unsigned; REPLACEMENT_CHARACTER as u16];

pub(crate) const EMPTY_CHAR: &ZeroSlice<char> = zeroslice![];
const SINGLE_REPLACEMENT_CHARACTER_CHAR: &ZeroSlice<char> =
ZeroSlice::<char>::from_ule_slice(&<char as AsULE>::ULE::from_array([REPLACEMENT_CHARACTER]));
zeroslice![char; <char as AsULE>::ULE::from_aligned; REPLACEMENT_CHARACTER];

/// If `opt` is `Some`, unwrap it. If `None`, panic if debug assertions
/// are enabled and return `default` if debug assertions are not enabled.
Expand Down
6 changes: 3 additions & 3 deletions components/collator/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ use icu_collections::char16trie::Char16TrieIterator;
use icu_collections::codepointtrie::CodePointTrie;
use icu_provider::prelude::*;
use zerovec::ule::AsULE;
use zerovec::ZeroSlice;
use zerovec::ZeroVec;
use zerovec::{zeroslice, ZeroSlice};

use crate::elements::CollationElement;
use crate::elements::CollationElement32;
Expand All @@ -39,9 +39,9 @@ use super::CaseFirst;
use super::MaxVariable;

const SINGLE_U32: &ZeroSlice<u32> =
ZeroSlice::<u32>::from_ule_slice(&<u32 as AsULE>::ULE::from_array([FFFD_CE32_VALUE]));
zeroslice![u32; <u32 as AsULE>::ULE::from_unsigned; FFFD_CE32_VALUE];
const SINGLE_U64: &ZeroSlice<u64> =
ZeroSlice::<u64>::from_ule_slice(&<u64 as AsULE>::ULE::from_array([FFFD_CE_VALUE]));
zeroslice![u64; <u64 as AsULE>::ULE::from_unsigned; FFFD_CE_VALUE];

fn data_ce_to_primary(data_ce: u64, c: char) -> u32 {
// Collation::getThreeBytePrimaryForOffsetData
Expand Down
17 changes: 7 additions & 10 deletions components/collections/src/codepointinvlist/cpinvlist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use alloc::vec::Vec;
use core::{char, ops::RangeBounds, ops::RangeInclusive};
use yoke::Yokeable;
use zerofrom::ZeroFrom;
use zerovec::{ule::AsULE, ZeroSlice, ZeroVec};
use zerovec::{ule::AsULE, zerovec, ZeroVec};

use super::CodePointInversionListError;
use crate::codepointinvlist::utils::{deconstruct_range, is_valid_zv};
Expand All @@ -17,15 +17,12 @@ use crate::codepointinvlist::utils::{deconstruct_range, is_valid_zv};
const BMP_MAX: u32 = 0xFFFF;

/// Represents the inversion list for a set of all code points in the Basic Multilingual Plane.
const BMP_INV_LIST_SLICE: &ZeroSlice<u32> =
ZeroSlice::<u32>::from_ule_slice(&<u32 as AsULE>::ULE::from_array([0x0, BMP_MAX + 1]));
const BMP_INV_LIST_VEC: ZeroVec<u32> =
zerovec![u32; <u32 as AsULE>::ULE::from_unsigned; 0x0, BMP_MAX + 1];

/// Represents the inversion list for all of the code points in the Unicode range.
const ALL_SLICE: &ZeroSlice<u32> =
ZeroSlice::<u32>::from_ule_slice(&<u32 as AsULE>::ULE::from_array([
0x0,
(char::MAX as u32) + 1,
]));
const ALL_VEC: ZeroVec<u32> =
zerovec![u32; <u32 as AsULE>::ULE::from_unsigned; 0x0, (char::MAX as u32) + 1];

/// A membership wrapper for [`CodePointInversionList`].
///
Expand Down Expand Up @@ -297,7 +294,7 @@ impl<'data> CodePointInversionList<'data> {
/// ```
pub fn all() -> Self {
Self {
inv_list: ALL_SLICE.as_zerovec(),
inv_list: ALL_VEC,
size: (char::MAX as usize) + 1,
}
}
Expand Down Expand Up @@ -326,7 +323,7 @@ impl<'data> CodePointInversionList<'data> {
/// ```
pub fn bmp() -> Self {
Self {
inv_list: BMP_INV_LIST_SLICE.as_zerovec(),
inv_list: BMP_INV_LIST_VEC,
size: (BMP_MAX as usize) + 1,
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ fn one_hundred_code_points(sample_str: &str) -> String {
fn get_trie_small() -> CodePointTrie<'static, u8> {
CodePointTrie::try_new(
tries::gc_small::HEADER,
tries::gc_small::INDEX.as_zerovec(),
tries::gc_small::DATA.as_zerovec(),
tries::gc_small::INDEX,
tries::gc_small::DATA,
)
.unwrap()
}
Expand All @@ -43,8 +43,8 @@ fn get_trie_small() -> CodePointTrie<'static, u8> {
fn get_trie_fast() -> CodePointTrie<'static, u8> {
CodePointTrie::try_new(
tries::gc_fast::HEADER,
tries::gc_fast::INDEX.as_zerovec(),
tries::gc_fast::DATA.as_zerovec(),
tries::gc_fast::INDEX,
tries::gc_fast::DATA,
)
.unwrap()
}
Expand Down
8 changes: 4 additions & 4 deletions components/collections/src/codepointtrie/benches/iai_cpt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@ const SAMPLE_STRING_MIXED: &str = "Dèclaråcion ЗАГАЛЬНА 世界人权
fn get_trie_small() -> CodePointTrie<'static, u8> {
CodePointTrie::try_new(
tries::gc_small::HEADER,
tries::gc_small::INDEX.as_zerovec(),
tries::gc_small::DATA.as_zerovec(),
tries::gc_small::INDEX,
tries::gc_small::DATA,
)
.unwrap()
}

fn get_trie_fast() -> CodePointTrie<'static, u8> {
CodePointTrie::try_new(
tries::gc_fast::HEADER,
tries::gc_fast::INDEX.as_zerovec(),
tries::gc_fast::DATA.as_zerovec(),
tries::gc_fast::INDEX,
tries::gc_fast::DATA,
)
.unwrap()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use icu_collections::codepointtrie::{CodePointTrieHeader, TrieType};
use zerovec::{ule::RawBytesULE, ZeroSlice};
use zerovec::ule::AsULE;
use zerovec::{zerovec, ZeroVec};

#[rustfmt::skip]
pub static INDEX: &ZeroSlice<u16> = ZeroSlice::<u16>::from_ule_slice(&RawBytesULE::<2>::from_array([
pub const INDEX: ZeroVec<u16> = zerovec![u16; <u16 as AsULE>::ULE::from_unsigned;
0,0x40,0x7f,0xbf,0xff,0x12e,0x16d,0x1ad,0x1e5,0x224,0x250,0x28e,0x2ce,0x2de,0x31e,0x34f,
0x38c,0x3bc,0x3fa,0x43a,0x44a,0x47b,0x4b2,0x4f2,0x532,0x572,0x5a3,0x5cf,0x60f,0x644,0x65e,0x69e,
0x6de,0x71e,0x756,0x78d,0x7ca,0x809,0x848,0x887,0x8c6,0x905,0x944,0x983,0x9c3,0xa01,0xa3f,0xa7f,
Expand Down Expand Up @@ -198,10 +199,10 @@ pub static INDEX: &ZeroSlice<u16> = ZeroSlice::<u16>::from_ule_slice(&RawBytesUL
0x66a,0x66a,0x66a,0x66a,0x66a,0x66a,0x66a,0x66a,0x66a,0x66a,0x66a,0x66a,0x66a,0x66a,0x380,0x380,
0x380,0x380,0x380,0x380,0x380,0x380,0x380,0x380,0x380,0x380,0x380,0x380,0x380,0x380,0x380,0x380,
0x380,0x380,0x380,0x380,0x380,0x380,0x380,0x380,0x380,0x380,0x380,0x380,0x380,0x380,0xaa0
]));
];

#[rustfmt::skip]
pub static DATA: &ZeroSlice<u8> = ZeroSlice::<u8>::from_ule_slice(&[
pub const DATA: ZeroVec<u8> = zerovec![u8; core::convert::identity;
0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
0xc,0x17,0x17,0x17,0x19,0x17,0x17,0x17,0x14,0x15,0x17,0x18,0x17,0x13,0x17,0x17,
Expand Down Expand Up @@ -1111,7 +1112,7 @@ pub static DATA: &ZeroSlice<u8> = ZeroSlice::<u8>::from_ule_slice(&[
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x10,0x10,
0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x11,0x11,
0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0,0
]);
];

pub static HEADER: CodePointTrieHeader = CodePointTrieHeader {
high_start: 0x110000,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use icu_collections::codepointtrie::{CodePointTrieHeader, TrieType};
use zerovec::{ule::RawBytesULE, ZeroSlice};
use zerovec::ule::AsULE;
use zerovec::{zerovec, ZeroVec};

#[rustfmt::skip]
pub static INDEX: &ZeroSlice<u16> = ZeroSlice::<u16>::from_ule_slice(&RawBytesULE::<2>::from_array([
pub const INDEX: ZeroVec<u16> = zerovec![u16; <u16 as AsULE>::ULE::from_unsigned;
0,0x40,0x7f,0xbf,0xff,0x12e,0x16d,0x1ad,0x1e5,0x224,0x250,0x28e,0x2ce,0x2de,0x31e,0x34f,
0x38c,0x3bc,0x3fa,0x43a,0x44a,0x47b,0x4b2,0x4f2,0x532,0x572,0x5a3,0x5cf,0x60f,0x644,0x65e,0x69e,
0x6de,0x71e,0x756,0x78d,0x7ca,0x809,0x848,0x887,0x8c6,0x905,0x944,0x983,0x9c3,0xa01,0xa3f,0xa7f,
Expand Down Expand Up @@ -220,10 +221,10 @@ pub static INDEX: &ZeroSlice<u16> = ZeroSlice::<u16>::from_ule_slice(&RawBytesUL
0x767,0x767,0x767,0x767,0x767,0x767,0x767,0x767,0x767,0x767,0x767,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,
0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,
0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0xb9d
]));
];

#[rustfmt::skip]
pub static DATA: &ZeroSlice<u8> = ZeroSlice::<u8>::from_ule_slice(&[
pub const DATA: ZeroVec<u8> = zerovec![u8; core::convert::identity;
0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
0xc,0x17,0x17,0x17,0x19,0x17,0x17,0x17,0x14,0x15,0x17,0x18,0x17,0x13,0x17,0x17,
Expand Down Expand Up @@ -848,7 +849,7 @@ pub static DATA: &ZeroSlice<u8> = ZeroSlice::<u8>::from_ule_slice(&[
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x10,0x10,
0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x11,0x11,
0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0,0,0,0
]);
];

pub static HEADER: CodePointTrieHeader = CodePointTrieHeader {
high_start: 0x110000,
Expand Down
8 changes: 3 additions & 5 deletions components/normalizer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,7 @@ use utf16_iter::Utf16CharsEx;
use utf8_iter::Utf8CharsEx;
use write16::Write16;
use zerofrom::ZeroFrom;
use zerovec::ule::AsULE;
use zerovec::ZeroSlice;
use zerovec::{zeroslice, ZeroSlice};

#[derive(Debug)]
enum SupplementPayloadHolder {
Expand Down Expand Up @@ -244,10 +243,9 @@ fn char_from_u16(u: u16) -> char {
char_from_u32(u32::from(u))
}

const EMPTY_U16: &ZeroSlice<u16> =
ZeroSlice::<u16>::from_ule_slice(&<u16 as AsULE>::ULE::from_array([]));
const EMPTY_U16: &ZeroSlice<u16> = zeroslice![];

const EMPTY_CHAR: &ZeroSlice<char> = ZeroSlice::new_empty();
const EMPTY_CHAR: &ZeroSlice<char> = zeroslice![];

#[inline(always)]
fn in_inclusive_range(c: char, start: char, end: char) -> bool {
Expand Down
36 changes: 27 additions & 9 deletions utils/zerovec/src/ule/plain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ macro_rules! impl_const_constructors {
}

macro_rules! impl_byte_slice_type {
($type:ty, $size:literal) => {
($single_fn:ident, $type:ty, $size:literal) => {
impl From<$type> for RawBytesULE<$size> {
#[inline]
fn from(value: $type) -> Self {
Expand All @@ -132,6 +132,24 @@ macro_rules! impl_byte_slice_type {
// EqULE is true because $type and RawBytesULE<$size>
// have the same byte sequence on little-endian
unsafe impl EqULE for $type {}

impl RawBytesULE<$size> {
pub const fn $single_fn(v: $type) -> Self {
RawBytesULE(v.to_le_bytes())
}
}
};
}

macro_rules! impl_byte_slice_unsigned_type {
($type:ty, $size:literal) => {
impl_byte_slice_type!(from_unsigned, $type, $size);
};
}

macro_rules! impl_byte_slice_signed_type {
($type:ty, $size:literal) => {
impl_byte_slice_type!(from_signed, $type, $size);
};
}

Expand All @@ -140,15 +158,15 @@ impl_byte_slice_size!(u32, 4);
impl_byte_slice_size!(u64, 8);
impl_byte_slice_size!(u128, 16);

impl_byte_slice_type!(u16, 2);
impl_byte_slice_type!(u32, 4);
impl_byte_slice_type!(u64, 8);
impl_byte_slice_type!(u128, 16);
impl_byte_slice_unsigned_type!(u16, 2);
impl_byte_slice_unsigned_type!(u32, 4);
impl_byte_slice_unsigned_type!(u64, 8);
impl_byte_slice_unsigned_type!(u128, 16);

impl_byte_slice_type!(i16, 2);
impl_byte_slice_type!(i32, 4);
impl_byte_slice_type!(i64, 8);
impl_byte_slice_type!(i128, 16);
impl_byte_slice_signed_type!(i16, 2);
impl_byte_slice_signed_type!(i32, 4);
impl_byte_slice_signed_type!(i64, 8);
impl_byte_slice_signed_type!(i128, 16);

impl_const_constructors!(u8, 1);
impl_const_constructors!(u16, 2);
Expand Down
9 changes: 9 additions & 0 deletions utils/zerovec/src/ule/unvalidated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,15 @@ impl UnvalidatedChar {
}
}

impl RawBytesULE<3> {
/// Converts a [`UnvalidatedChar`] to its ULE type. This is equivalent to calling
/// [`AsULE::to_unaligned`].
#[inline]
pub const fn from_unvalidated_char(uc: UnvalidatedChar) -> Self {
RawBytesULE(uc.0)
}
}

impl AsULE for UnvalidatedChar {
type ULE = RawBytesULE<3>;

Expand Down
Loading