diff --git a/components/segmenter/src/dictionary.rs b/components/segmenter/src/complex/dictionary.rs similarity index 93% rename from components/segmenter/src/dictionary.rs rename to components/segmenter/src/complex/dictionary.rs index ac0ccb19f43..76f3e025c4a 100644 --- a/components/segmenter/src/dictionary.rs +++ b/components/segmenter/src/complex/dictionary.rs @@ -10,7 +10,7 @@ use icu_collections::char16trie::{Char16Trie, TrieResult}; use icu_provider::prelude::*; /// A trait for dictionary based iterator -pub trait DictionaryType<'l, 's> { +trait DictionaryType<'l, 's> { /// The iterator over characters. type IterAttr: Iterator + Clone; @@ -21,7 +21,7 @@ pub trait DictionaryType<'l, 's> { fn char_len(c: Self::CharType) -> usize; } -pub struct DictionaryBreakIterator< +struct DictionaryBreakIterator< 'l, 's, Y: DictionaryType<'l, 's> + ?Sized, @@ -137,13 +137,13 @@ impl<'l, 's> DictionaryType<'l, 's> for char { } } -pub(crate) struct DictionarySegmenter<'l> { +pub(super) struct DictionarySegmenter<'l> { dict: &'l UCharDictionaryBreakDataV1<'l>, grapheme: &'l RuleBreakDataV1<'l>, } impl<'l> DictionarySegmenter<'l> { - pub fn new( + pub(super) fn new( dict: &'l DataPayload, grapheme: &'l DataPayload, ) -> Self { @@ -155,12 +155,9 @@ impl<'l> DictionarySegmenter<'l> { } /// Create a dictionary based break iterator for an `str` (a UTF-8 string). - pub fn segment_str<'s>( - &'s self, - input: &'s str, - ) -> DictionaryBreakIterator<'l, 's, char, GraphemeClusterBreakIteratorUtf8> { + pub(super) fn segment_str(&'l self, input: &'l str) -> impl Iterator + 'l { let grapheme_iter = GraphemeClusterSegmenter::new_and_segment_str(input, self.grapheme); - DictionaryBreakIterator { + DictionaryBreakIterator:: { trie: Char16Trie::new(self.dict.trie_data.clone()), iter: input.char_indices(), len: input.len(), @@ -169,12 +166,9 @@ impl<'l> DictionarySegmenter<'l> { } /// Create a dictionary based break iterator for a UTF-16 string. - pub fn segment_utf16<'s>( - &'s self, - input: &'s [u16], - ) -> DictionaryBreakIterator<'l, 's, u32, GraphemeClusterBreakIteratorUtf16> { + pub(super) fn segment_utf16(&'l self, input: &'l [u16]) -> impl Iterator + 'l { let grapheme_iter = GraphemeClusterSegmenter::new_and_segment_utf16(input, self.grapheme); - DictionaryBreakIterator { + DictionaryBreakIterator:: { trie: Char16Trie::new(self.dict.trie_data.clone()), iter: Utf16Indices::new(input), len: input.len(), @@ -186,11 +180,8 @@ impl<'l> DictionarySegmenter<'l> { #[cfg(test)] #[cfg(feature = "serde")] mod tests { - use crate::{ - dictionary::DictionarySegmenter, provider::DictionaryForWordOnlyAutoV1Marker, - LineSegmenter, WordSegmenter, - }; - use icu_provider::prelude::*; + use super::*; + use crate::{provider::DictionaryForWordOnlyAutoV1Marker, LineSegmenter, WordSegmenter}; use icu_provider_adapters::fork::ForkByKeyProvider; use icu_provider_fs::FsDataProvider; use std::path::PathBuf; diff --git a/components/segmenter/src/language.rs b/components/segmenter/src/complex/language.rs similarity index 96% rename from components/segmenter/src/language.rs rename to components/segmenter/src/complex/language.rs index 801ed918e6b..327eea5e20b 100644 --- a/components/segmenter/src/language.rs +++ b/components/segmenter/src/complex/language.rs @@ -3,7 +3,7 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). #[derive(PartialEq, Debug, Copy, Clone)] -pub enum Language { +pub(super) enum Language { Burmese, ChineseOrJapanese, Khmer, @@ -43,12 +43,12 @@ fn get_language(codepoint: u32) -> Language { /// This struct is an iterator that returns the string per language from the /// given string. -pub struct LanguageIterator<'s> { +pub(super) struct LanguageIterator<'s> { rest: &'s str, } impl<'s> LanguageIterator<'s> { - pub fn new(input: &'s str) -> Self { + pub(super) fn new(input: &'s str) -> Self { Self { rest: input } } } @@ -70,12 +70,12 @@ impl<'s> Iterator for LanguageIterator<'s> { } } -pub struct LanguageIteratorUtf16<'s> { +pub(super) struct LanguageIteratorUtf16<'s> { rest: &'s [u16], } impl<'s> LanguageIteratorUtf16<'s> { - pub fn new(input: &'s [u16]) -> Self { + pub(super) fn new(input: &'s [u16]) -> Self { Self { rest: input } } } diff --git a/components/segmenter/src/math_helper.rs b/components/segmenter/src/complex/lstm/matrix.rs similarity index 83% rename from components/segmenter/src/math_helper.rs rename to components/segmenter/src/complex/lstm/matrix.rs index 545dca8d14c..faeeece1394 100644 --- a/components/segmenter/src/math_helper.rs +++ b/components/segmenter/src/complex/lstm/matrix.rs @@ -14,13 +14,13 @@ use num_traits::Float; /// `tanh` computes the tanh function for a scalar value. #[inline] -pub fn tanh(x: f32) -> f32 { +fn tanh(x: f32) -> f32 { x.tanh() } /// `sigmoid` computes the sigmoid function for a scalar value. #[inline] -pub fn sigmoid(x: f32) -> f32 { +fn sigmoid(x: f32) -> f32 { 1.0 / (1.0 + (-x).exp()) } @@ -30,20 +30,20 @@ pub fn sigmoid(x: f32) -> f32 { /// submatrices. For example, indexing into a matrix of size 5x4x3 returns a /// matrix of size 4x3. For more information, see [`MatrixOwned::submatrix`]. #[derive(Debug, Clone)] -pub struct MatrixOwned { +pub(super) struct MatrixOwned { data: Vec, dims: [usize; D], } impl MatrixOwned { - pub fn as_borrowed(&self) -> MatrixBorrowed { + pub(super) fn as_borrowed(&self) -> MatrixBorrowed { MatrixBorrowed { data: &self.data, dims: self.dims, } } - pub fn new_zero(dims: [usize; D]) -> Self { + pub(super) fn new_zero(dims: [usize; D]) -> Self { let total_len = dims.iter().product::(); MatrixOwned { data: vec![0.0; total_len], @@ -58,7 +58,7 @@ impl MatrixOwned { /// /// The type parameter `M` should be `D - 1`. #[inline] - pub fn submatrix(&self, index: usize) -> Option> { + pub(super) fn submatrix(&self, index: usize) -> Option> { // This assertion is based on const generics; it should always succeed and be elided. assert_eq!(M, D - 1); let (range, dims) = self.as_borrowed().submatrix_range(index); @@ -66,7 +66,7 @@ impl MatrixOwned { Some(MatrixBorrowed { data, dims }) } - pub fn as_mut(&mut self) -> MatrixBorrowedMut { + pub(super) fn as_mut(&mut self) -> MatrixBorrowedMut { MatrixBorrowedMut { data: &mut self.data, dims: self.dims, @@ -75,7 +75,10 @@ impl MatrixOwned { /// A mutable version of [`Self::submatrix`]. #[inline] - pub fn submatrix_mut(&mut self, index: usize) -> Option> { + pub(super) fn submatrix_mut( + &mut self, + index: usize, + ) -> Option> { // This assertion is based on const generics; it should always succeed and be elided. assert_eq!(M, D - 1); let (range, dims) = self.as_borrowed().submatrix_range(index); @@ -86,26 +89,26 @@ impl MatrixOwned { /// A `D`-dimensional, borrowed matrix. #[derive(Debug, Clone, Copy)] -pub struct MatrixBorrowed<'a, const D: usize> { +pub(super) struct MatrixBorrowed<'a, const D: usize> { data: &'a [f32], dims: [usize; D], } impl<'a, const D: usize> MatrixBorrowed<'a, D> { #[cfg(debug_assertions)] - pub fn debug_assert_dims(&self, dims: [usize; D]) { + pub(super) fn debug_assert_dims(&self, dims: [usize; D]) { debug_assert_eq!(dims, self.dims); let expected_len = dims.iter().product::(); debug_assert_eq!(expected_len, self.data.len()); } - pub fn as_slice(&self) -> &'a [f32] { + pub(super) fn as_slice(&self) -> &'a [f32] { self.data } /// See [`MatrixOwned::submatrix`]. #[inline] - pub fn submatrix(&self, index: usize) -> Option> { + pub(super) fn submatrix(&self, index: usize) -> Option> { // This assertion is based on const generics; it should always succeed and be elided. assert_eq!(M, D - 1); let (range, dims) = self.submatrix_range(index); @@ -129,21 +132,21 @@ macro_rules! impl_basic_dim { ($t1:path, $t2:path, $t3:path) => { impl<'a> $t1 { #[allow(dead_code)] - pub fn dim(&self) -> usize { + pub(super) fn dim(&self) -> usize { let [dim] = self.dims; dim } } impl<'a> $t2 { #[allow(dead_code)] - pub fn dim(&self) -> (usize, usize) { + pub(super) fn dim(&self) -> (usize, usize) { let [d0, d1] = self.dims; (d0, d1) } } impl<'a> $t3 { #[allow(dead_code)] - pub fn dim(&self) -> (usize, usize, usize) { + pub(super) fn dim(&self) -> (usize, usize, usize) { let [d0, d1, d2] = self.dims; (d0, d1, d2) } @@ -165,24 +168,24 @@ impl_basic_dim!( impl_basic_dim!(MatrixZero<'a, 1>, MatrixZero<'a, 2>, MatrixZero<'a, 3>); /// A `D`-dimensional, mutably borrowed matrix. -pub struct MatrixBorrowedMut<'a, const D: usize> { - pub(crate) data: &'a mut [f32], - pub(crate) dims: [usize; D], +pub(super) struct MatrixBorrowedMut<'a, const D: usize> { + pub(super) data: &'a mut [f32], + pub(super) dims: [usize; D], } impl<'a, const D: usize> MatrixBorrowedMut<'a, D> { - pub fn as_borrowed(&self) -> MatrixBorrowed { + pub(super) fn as_borrowed(&self) -> MatrixBorrowed { MatrixBorrowed { data: self.data, dims: self.dims, } } - pub fn as_mut_slice(&mut self) -> &mut [f32] { + pub(super) fn as_mut_slice(&mut self) -> &mut [f32] { self.data } - pub fn copy_submatrix(&mut self, from: usize, to: usize) { + pub(super) fn copy_submatrix(&mut self, from: usize, to: usize) { let (range_from, _) = self.as_borrowed().submatrix_range::(from); let (range_to, _) = self.as_borrowed().submatrix_range::(to); if let (Some(_), Some(_)) = ( @@ -195,7 +198,7 @@ impl<'a, const D: usize> MatrixBorrowedMut<'a, D> { } #[must_use] - pub fn add(&mut self, other: MatrixZero<'_, D>) -> Option<()> { + pub(super) fn add(&mut self, other: MatrixZero<'_, D>) -> Option<()> { debug_assert_eq!(self.dims, other.dims); // TODO: Vectorize? for i in 0..self.data.len() { @@ -205,26 +208,26 @@ impl<'a, const D: usize> MatrixBorrowedMut<'a, D> { } /// Mutates this matrix by applying a softmax transformation. - pub fn softmax_transform(&mut self) { + pub(super) fn softmax_transform(&mut self) { let sm = self.data.iter().map(|v| v.exp()).sum::(); self.data.iter_mut().for_each(|v| { *v = v.exp() / sm; }); } - pub fn sigmoid_transform(&mut self) { + pub(super) fn sigmoid_transform(&mut self) { for x in &mut self.data.iter_mut() { *x = sigmoid(*x); } } - pub fn tanh_transform(&mut self) { + pub(super) fn tanh_transform(&mut self) { for x in &mut self.data.iter_mut() { *x = tanh(*x); } } - pub fn convolve( + pub(super) fn convolve( &mut self, i: MatrixBorrowed<'_, D>, c: MatrixBorrowed<'_, D>, @@ -247,7 +250,7 @@ impl<'a, const D: usize> MatrixBorrowedMut<'a, D> { } } - pub fn mul_tanh(&mut self, o: MatrixBorrowed<'_, D>, c: MatrixBorrowed<'_, D>) { + pub(super) fn mul_tanh(&mut self, o: MatrixBorrowed<'_, D>, c: MatrixBorrowed<'_, D>) { let o = o.as_slice(); let c = c.as_slice(); let len = self.data.len(); @@ -267,7 +270,7 @@ impl<'a, const D: usize> MatrixBorrowedMut<'a, D> { impl<'a> MatrixBorrowed<'a, 1> { #[allow(dead_code)] // could be useful - pub fn dot_1d(&self, other: MatrixZero<1>) -> f32 { + pub(super) fn dot_1d(&self, other: MatrixZero<1>) -> f32 { debug_assert_eq!(self.dims, other.dims); unrolled_dot_1(self.data, other.data) } @@ -278,7 +281,7 @@ impl<'a> MatrixBorrowedMut<'a, 1> { /// /// Note: For better dot product efficiency, if `b` is MxN, then `a` should be N; /// this is the opposite of standard practice. - pub fn add_dot_2d(&mut self, a: MatrixBorrowed<1>, b: MatrixZero<2>) { + pub(super) fn add_dot_2d(&mut self, a: MatrixBorrowed<1>, b: MatrixZero<2>) { let m = a.dim(); let n = self.as_borrowed().dim(); debug_assert_eq!( @@ -312,7 +315,7 @@ impl<'a> MatrixBorrowedMut<'a, 2> { /// Calculate the dot product of a and b, adding the result to self. /// /// Self should be _MxN_; `a`, _O_; and `b`, _MxNxO_. - pub fn add_dot_3d_1(&mut self, a: MatrixBorrowed<1>, b: MatrixZero<3>) { + pub(super) fn add_dot_3d_1(&mut self, a: MatrixBorrowed<1>, b: MatrixZero<3>) { let m = a.dim(); let n = self.as_borrowed().dim().0 * self.as_borrowed().dim().1; debug_assert_eq!( @@ -352,7 +355,7 @@ impl<'a> MatrixBorrowedMut<'a, 2> { /// Calculate the dot product of a and b, adding the result to self. /// /// Self should be _MxN_; `a`, _O_; and `b`, _MxNxO_. - pub fn add_dot_3d_2(&mut self, a: MatrixZero<1>, b: MatrixZero<3>) { + pub(super) fn add_dot_3d_2(&mut self, a: MatrixZero<1>, b: MatrixZero<3>) { let m = a.dim(); let n = self.as_borrowed().dim().0 * self.as_borrowed().dim().1; debug_assert_eq!( @@ -392,30 +395,53 @@ impl<'a> MatrixBorrowedMut<'a, 2> { /// A `D`-dimensional matrix borrowed from a [`ZeroSlice`]. #[derive(Debug, Clone, Copy)] -pub struct MatrixZero<'a, const D: usize> { +pub(super) struct MatrixZero<'a, const D: usize> { data: &'a ZeroSlice, dims: [usize; D], } -impl<'a, const D: usize> MatrixZero<'a, D> { - pub fn from_parts_unchecked(data: &'a ZeroSlice, dims: [usize; D]) -> Self { - Self { data, dims } +impl<'a> From<&'a crate::provider::LstmMatrix1<'a>> for MatrixZero<'a, 1> { + fn from(other: &'a crate::provider::LstmMatrix1<'a>) -> Self { + Self { + data: &other.data, + dims: other.dims.map(|x| x as usize), + } } +} +impl<'a> From<&'a crate::provider::LstmMatrix2<'a>> for MatrixZero<'a, 2> { + fn from(other: &'a crate::provider::LstmMatrix2<'a>) -> Self { + Self { + data: &other.data, + dims: other.dims.map(|x| x as usize), + } + } +} + +impl<'a> From<&'a crate::provider::LstmMatrix3<'a>> for MatrixZero<'a, 3> { + fn from(other: &'a crate::provider::LstmMatrix3<'a>) -> Self { + Self { + data: &other.data, + dims: other.dims.map(|x| x as usize), + } + } +} + +impl<'a, const D: usize> MatrixZero<'a, D> { #[allow(clippy::wrong_self_convention)] // same convention as slice::to_vec - pub fn to_owned(&self) -> MatrixOwned { + pub(super) fn to_owned(&self) -> MatrixOwned { MatrixOwned { data: self.data.iter().collect(), dims: self.dims, } } - pub fn as_slice(&self) -> &ZeroSlice { + pub(super) fn as_slice(&self) -> &ZeroSlice { self.data } #[cfg(debug_assertions)] - pub fn debug_assert_dims(&self, dims: [usize; D]) { + pub(super) fn debug_assert_dims(&self, dims: [usize; D]) { debug_assert_eq!(dims, self.dims); let expected_len = dims.iter().product::(); debug_assert_eq!(expected_len, self.data.len()); @@ -423,7 +449,7 @@ impl<'a, const D: usize> MatrixZero<'a, D> { /// See [`MatrixOwned::submatrix`]. #[inline] - pub fn submatrix(&self, index: usize) -> Option> { + pub(super) fn submatrix(&self, index: usize) -> Option> { // This assertion is based on const generics; it should always succeed and be elided. assert_eq!(M, D - 1); let (range, dims) = self.submatrix_range(index); diff --git a/components/segmenter/src/lstm.rs b/components/segmenter/src/complex/lstm/mod.rs similarity index 92% rename from components/segmenter/src/lstm.rs rename to components/segmenter/src/complex/lstm/mod.rs index 98e49a2dd27..6cb249e5457 100644 --- a/components/segmenter/src/lstm.rs +++ b/components/segmenter/src/complex/lstm/mod.rs @@ -3,7 +3,6 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::grapheme::GraphemeClusterSegmenter; -use crate::math_helper::{MatrixBorrowedMut, MatrixOwned, MatrixZero}; use crate::provider::*; use alloc::boxed::Box; use alloc::string::String; @@ -12,9 +11,12 @@ use core::char::{decode_utf16, REPLACEMENT_CHARACTER}; use icu_provider::DataPayload; use zerovec::{maps::ZeroMapBorrowed, ule::UnvalidatedStr}; +mod matrix; +use matrix::*; + // A word break iterator using LSTM model. Input string have to be same language. -pub struct LstmSegmenterIterator<'s> { +struct LstmSegmenterIterator<'s> { input: &'s str, bies_str: Box<[Bies]>, pos: usize, @@ -37,7 +39,7 @@ impl Iterator for LstmSegmenterIterator<'_> { } } -pub struct LstmSegmenterIteratorUtf16 { +struct LstmSegmenterIteratorUtf16 { bies_str: Box<[Bies]>, pos: usize, } @@ -56,7 +58,7 @@ impl Iterator for LstmSegmenterIteratorUtf16 { } } -pub(crate) struct LstmSegmenter<'l> { +pub(super) struct LstmSegmenter<'l> { dic: ZeroMapBorrowed<'l, UnvalidatedStr, u16>, embedding: MatrixZero<'l, 2>, fw_w: MatrixZero<'l, 3>, @@ -72,28 +74,28 @@ pub(crate) struct LstmSegmenter<'l> { impl<'l> LstmSegmenter<'l> { /// Returns `Err` if grapheme data is required but not present - pub fn new( + pub(super) fn new( lstm: &'l DataPayload, grapheme: &'l DataPayload, ) -> Self { let LstmDataV1::Float32(lstm) = lstm.get(); Self { dic: lstm.dic.as_borrowed(), - embedding: lstm.embedding.as_matrix_zero(), - fw_w: lstm.fw_w.as_matrix_zero(), - fw_u: lstm.fw_u.as_matrix_zero(), - fw_b: lstm.fw_b.as_matrix_zero(), - bw_w: lstm.bw_w.as_matrix_zero(), - bw_u: lstm.bw_u.as_matrix_zero(), - bw_b: lstm.bw_b.as_matrix_zero(), - time_w: lstm.time_w.as_matrix_zero(), - time_b: lstm.time_b.as_matrix_zero(), + embedding: MatrixZero::from(&lstm.embedding), + fw_w: MatrixZero::from(&lstm.fw_w), + fw_u: MatrixZero::from(&lstm.fw_u), + fw_b: MatrixZero::from(&lstm.fw_b), + bw_w: MatrixZero::from(&lstm.bw_w), + bw_u: MatrixZero::from(&lstm.bw_u), + bw_b: MatrixZero::from(&lstm.bw_b), + time_w: MatrixZero::from(&lstm.time_w), + time_b: MatrixZero::from(&lstm.time_b), grapheme: (lstm.model == ModelType::GraphemeClusters).then(|| grapheme.get()), } } /// Create an LSTM based break iterator for an `str` (a UTF-8 string). - pub fn segment_str<'s>(&self, input: &'s str) -> LstmSegmenterIterator<'s> { + pub(super) fn segment_str<'s>(&self, input: &'s str) -> impl Iterator + 's { let lstm_output = self.produce_bies(input); LstmSegmenterIterator { input, @@ -104,7 +106,7 @@ impl<'l> LstmSegmenter<'l> { } /// Create an LSTM based break iterator for a UTF-16 string. - pub fn segment_utf16(&self, input: &[u16]) -> LstmSegmenterIteratorUtf16 { + pub(super) fn segment_utf16(&self, input: &[u16]) -> impl Iterator { let input: String = decode_utf16(input.iter().copied()) .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER)) .collect(); @@ -265,7 +267,7 @@ impl<'l> LstmSegmenter<'l> { // TODO(#421): Use common BIES normalizer code #[derive(Debug, PartialEq, Copy, Clone)] -pub enum Bies { +enum Bies { B, I, E, @@ -318,21 +320,21 @@ mod tests { /// Each test case has two attributs: `unseg` which denots the unsegmented line, and `true_bies` which indicates the Bies /// sequence representing the true segmentation. #[derive(PartialEq, Debug, Deserialize)] - pub struct TestCase { - pub unseg: String, - pub expected_bies: String, - pub true_bies: String, + struct TestCase { + unseg: String, + expected_bies: String, + true_bies: String, } /// `TestTextData` is a struct to store a vector of `TestCase` that represents a test text. #[derive(PartialEq, Debug, Deserialize)] - pub struct TestTextData { - pub testcases: Vec, + struct TestTextData { + testcases: Vec, } #[derive(Debug)] - pub struct TestText { - pub data: TestTextData, + struct TestText { + data: TestTextData, } fn load_test_text(filename: &str) -> TestTextData { diff --git a/components/segmenter/src/complex.rs b/components/segmenter/src/complex/mod.rs similarity index 98% rename from components/segmenter/src/complex.rs rename to components/segmenter/src/complex/mod.rs index 00b01f8afe0..774204b7edb 100644 --- a/components/segmenter/src/complex.rs +++ b/components/segmenter/src/complex/mod.rs @@ -2,15 +2,20 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use crate::dictionary::DictionarySegmenter; -use crate::language::*; -#[cfg(feature = "lstm")] -use crate::lstm::LstmSegmenter; use crate::provider::*; use alloc::vec::Vec; use icu_locid::{locale, Locale}; use icu_provider::prelude::*; +mod dictionary; +use dictionary::*; +mod language; +use language::*; +#[cfg(feature = "lstm")] +mod lstm; +#[cfg(feature = "lstm")] +use lstm::*; + #[cfg(not(feature = "lstm"))] type DictOrLstm = Result, core::convert::Infallible>; #[cfg(not(feature = "lstm"))] diff --git a/components/segmenter/src/lib.rs b/components/segmenter/src/lib.rs index c4ca8a2e7b2..b809ed72dae 100644 --- a/components/segmenter/src/lib.rs +++ b/components/segmenter/src/lib.rs @@ -126,11 +126,9 @@ extern crate alloc; mod complex; -mod dictionary; mod error; mod indices; mod iterator_helpers; -mod language; mod rule_segmenter; mod grapheme; @@ -144,11 +142,6 @@ pub mod provider; #[doc(hidden)] pub mod symbols; -#[cfg(feature = "lstm")] -mod lstm; -#[cfg(feature = "lstm")] -mod math_helper; - // Main Segmenter and BreakIterator public types pub use crate::grapheme::GraphemeClusterBreakIterator; pub use crate::grapheme::GraphemeClusterSegmenter; diff --git a/components/segmenter/src/provider/lstm.rs b/components/segmenter/src/provider/lstm.rs index 61fc0d3e11f..6a85680e4ca 100644 --- a/components/segmenter/src/provider/lstm.rs +++ b/components/segmenter/src/provider/lstm.rs @@ -26,9 +26,9 @@ macro_rules! lstm_matrix { pub struct $name<'data> { // Invariant: dims.product() == data.len() #[allow(missing_docs)] - dims: [u16; $generic], + pub(crate) dims: [u16; $generic], #[allow(missing_docs)] - data: ZeroVec<'data, f32>, + pub(crate) data: ZeroVec<'data, f32>, } impl<'data> $name<'data> { @@ -52,14 +52,6 @@ macro_rules! lstm_matrix { ) -> Self { Self { dims, data } } - - #[cfg(feature = "lstm")] - pub(crate) fn as_matrix_zero(&self) -> crate::math_helper::MatrixZero<$generic> { - crate::math_helper::MatrixZero::from_parts_unchecked( - &self.data, - self.dims.map(|x| x as usize), - ) - } } #[cfg(feature = "serde")]