Skip to content

Commit

Permalink
make clone(), drop(), and from() (when not inserting) lock-free
Browse files Browse the repository at this point in the history
  • Loading branch information
drash-course committed Jul 5, 2024
1 parent 9b65ad5 commit 941c453
Show file tree
Hide file tree
Showing 5 changed files with 246 additions and 48 deletions.
16 changes: 16 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ keywords = ["string", "interner", "caching"]

[dependencies]
left-right = "0.11.5"
lockfree = "0.5.1"
once_cell = "1.19.0"
radix_trie = "0.2.1"
serde = { version = "1.0", optional = true }
Expand Down
3 changes: 0 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,6 @@ scaling for writes.

## Planned Improvements

- Make `IString::from` (in the already interned case), `IString::clone`, and `IString::drop`
lock free.

- Replace or rewrite the radix tree to make it reuse the string storage, instead of storing a clone
of the each interned string.
Currently the crate uses 2x the interned string storage space because of this (1x in storage,
Expand Down
137 changes: 126 additions & 11 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,17 @@ mod storage;

/// An immutable and interned string.
///
/// Reading an `IString`'s contents is very fast, lock free and wait free (thanks to `left_right`).
/// Can be shared and read from any number of threads.
/// Scales linearly with the number of reading threads.
/// Reading an `IString`'s contents is very fast, lock-free and wait-free.
/// It can be shared and read from any number of threads.
/// It scales linearly with the number of reading threads.
///
/// `IString` provides `Hash` and `Eq` implementations that run in O(1),
/// perfect for an high performance `HashMap<IString, _>`
///
/// The tradeoff is that creating a new `IString` is comparatively slower :
/// - Creating a new `IString` with a string that is already interned is generally fast.
/// It acquires a global lock.
/// - Creating a new `IString` with a string that isn't already interned is much slower.
/// It acquired a global lock and waits for all readers to finish reading.
/// - Creating a new `IString` with a string that is already interned is fast and lock-free.
/// - Creating a new `IString` with a string that isn't already interned is slower.
/// It acquires a global lock and waits for all readers to finish reading.
#[derive(Eq, PartialEq, Ord, Hash)]
pub struct IString {
pub(crate) key: IStringKey
Expand All @@ -22,18 +24,46 @@ pub struct IString {
// Indispensable traits impl : From, Drop, Deref

impl From<String> for IString {
/// Intern the given `String` by consuming it. Its allocation is reused.
///
/// This operation runs in O(N) where N is the `string.len()`.
/// If the string was already interned, this operation is lock-free.
/// Otherwise, a global lock is acquired.
///
/// # Example
///
/// ```
/// use interned_string::IString;
///
/// let my_istring = IString::from("hello".to_string());
/// ```
#[inline]
fn from(string: String) -> Self {
Self {
// could block
key: SHARED_STORAGE.insert_or_retain(string)
}
}
}

impl From<&str> for IString {
/// Intern the given `&str` by cloning its contents.
///
/// This operation runs in O(N) where N is the `string.len()`.
/// If the string was already interned, this operation is lock-free.
/// Otherwise, a global lock is acquired.
///
/// # Example
///
/// ```
/// use interned_string::IString;
///
/// let my_istring = IString::from("hello");
/// ```
#[inline]
fn from(string: &str) -> Self {
Self {
// could block
key: SHARED_STORAGE.insert_or_retain(String::from(string))
}
}
Expand All @@ -42,13 +72,31 @@ impl From<&str> for IString {
impl Drop for IString {
#[inline]
fn drop(&mut self) {
SHARED_STORAGE.release(self)
THREAD_LOCAL_READER.with(|tl_reader| {
tl_reader.release(self);
});
}
}

impl Deref for IString {
type Target = str;

/// Returns a reference to the string's contents.
///
/// This operation runs in O(1) and is lock-free.
///
/// # Example
/// ```
/// use interned_string::Intern;
///
/// fn foo(string: &str) {
/// println!("{string}")
/// }
///
/// let my_istring = "hello".intern();
/// // implicit call to Deref::deref
/// foo(&my_istring);
/// ```
#[inline]
fn deref(&self) -> &Self::Target {
THREAD_LOCAL_READER.with(|reader: &ThreadLocalReader| {
Expand All @@ -58,41 +106,63 @@ impl Deref for IString {
}

impl AsRef<str> for IString {
/// Returns a reference to the string's contents.
///
/// This operation runs in O(1) and is lock-free.
///
/// # Example
/// ```
/// use interned_string::Intern;
///
/// let my_istring = "Hello, World!".intern();
/// let (hello, world) = my_istring.as_ref().split_at(5);
/// ```
#[inline]
fn as_ref(&self) -> &str {
THREAD_LOCAL_READER.with(|reader: &ThreadLocalReader| {
reader.read(self)
THREAD_LOCAL_READER.with(|tl_reader: &ThreadLocalReader| {
tl_reader.read(self)
})
}
}

// Common traits impl that can't be derived : Clone, PartialOrd, Debug, Display, Default

impl Clone for IString {
/// Returns a copy of the `IString`.
///
/// This operation runs in O(1) and is lock-free.
#[inline]
fn clone(&self) -> Self {
SHARED_STORAGE.retain(self.key);
THREAD_LOCAL_READER.with(|reader: &ThreadLocalReader| {
reader.retain(self.key)
});

Self { key: self.key }
}
}

impl PartialOrd for IString {
#[inline]
fn lt(&self, other: &Self) -> bool {
self.deref().lt(other.deref())
}

#[inline]
fn le(&self, other: &Self) -> bool {
self.deref().le(other.deref())
}

#[inline]
fn gt(&self, other: &Self) -> bool {
self.deref().gt(other.deref())
}

#[inline]
fn ge(&self, other: &Self) -> bool {
self.deref().ge(other.deref())
}

#[inline]
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.deref().partial_cmp(other.deref())
}
Expand All @@ -114,6 +184,8 @@ impl std::fmt::Display for IString {
}

impl Default for IString {
/// Creates an empty `IString`.
#[inline]
fn default() -> Self {
Self::from(String::default())
}
Expand All @@ -126,19 +198,61 @@ pub trait Intern {
}

impl Intern for String {
/// Intern the given `String` by consuming it. Its allocation is reused.
///
/// This operation runs in O(N) where N is the `string.len()`.
/// If the string was already interned, this operation is lock-free.
/// Otherwise, a global lock is acquired.
///
/// # Example
///
/// ```
/// use interned_string::Intern;
///
/// let my_istring = "hello".to_string().intern();
/// ```
#[inline]
fn intern(self) -> IString {
IString::from(self)
}
}

impl Intern for &str {
/// Intern the given `&str` by cloning its contents.
///
/// This operation runs in O(N) where N is the `string.len()`.
/// If the string was already interned, this operation is lock-free.
/// Otherwise, a global lock is acquired.
///
/// # Example
///
/// ```
/// use interned_string::Intern;
///
/// let my_istring = "hello".intern();
/// ```
#[inline]
fn intern(self) -> IString {
IString::from(self)
}
}

// Garbage collection

impl IString {
/// Immediately frees all the interned strings that are no longer used.
///
/// Call this function when you wish to immediately reduce memory usage,
/// at the cost of some CPU time.
/// This will acquire a global lock and wait for all readers to finish reading.
/// It's recommended to only call this function when your program has nothing else to do.
///
/// Using this function is optional. Memory is always eventually freed.
pub fn collect_garbage_now() {
SHARED_STORAGE.writer.lock().unwrap().collect_garbage();
}
}

#[cfg(feature = "serde")]
mod feature_serde {
use serde::{de::Visitor, Deserialize, Serialize};
Expand Down Expand Up @@ -370,6 +484,7 @@ mod tests {

// reset the writer for the next test
let mut writer = SHARED_STORAGE.writer.lock().unwrap();
writer.drain_channel_ops();
writer.write_handle.append(storage::StringStorageOp::DropUnusedStrings);
writer.write_handle.publish();
drop(writer);
Expand Down
Loading

0 comments on commit 941c453

Please sign in to comment.