sts10 · sts10 · May 1, 2023 · Apr 24, 2023 · Apr 24, 2023 · Apr 24, 2023
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tidy"
-version = "0.2.88"
+version = "0.2.90"
 authors = ["sts10 <sschlinkert@gmail.com>"]
 license = "MIT"
 edition = "2021"

diff --git a/readme.markdown b/readme.markdown
@@ -371,9 +371,11 @@ I occasionally [build releases](https://github.com/sts10/tidy/releases) of Tidy
 
 ## For Tidy developers
 
-Run all code tests: `cargo test`
+* Run all code tests: `cargo test`
+* Generate docs: `cargo doc --document-private-items --no-deps`. Add `--open` flag to open docs after generation. Locally, docs are printed to `./target/doc/tidy/index.html`.
+* Check license compatibility of Tidy's dependencies: `cargo deny check licenses` (requires that you [have cargo-deny installed locally](https://github.com/EmbarkStudios/cargo-deny#install-cargo-deny))
 
-Generate docs: `cargo doc --document-private-items --no-deps`. Add `--open` flag to open docs after generation. Locally, docs are printed to `./target/doc/tidy/index.html`.
+Pull Requests welcome!
 
 ## Blog posts related to this project
 

diff --git a/src/display_information/mod.rs b/src/display_information/mod.rs
@@ -2,7 +2,7 @@
 
 pub mod uniquely_decodable;
 use crate::count_characters;
-use crate::display_information::uniquely_decodable::check_decodability;
+use crate::display_information::uniquely_decodable::is_uniquely_decodable;
 use crate::parse_delimiter;
 use crate::split_and_vectorize;
 
@@ -78,7 +78,10 @@ pub fn display_list_information(
 
     // At least for now, this one is EXPENSIVE
     if level >= 4 {
-        eprintln!("Uniquely decodable?       : {}", check_decodability(&list));
+        eprintln!(
+            "Uniquely decodable?       : {}",
+            is_uniquely_decodable(&list)
+        );
     }
 
     let entropy_per_word = calc_entropy_per_word(list.len());

diff --git a/src/display_information/uniquely_decodable.rs b/src/display_information/uniquely_decodable.rs
@@ -6,10 +6,9 @@
 /// very closely.
 use std::collections::HashSet;
 
-pub fn check_decodability(c: &[String]) -> bool {
-    // Right off the bat, convert inputted Slice to a HashSet
-    // Since we always want this list to be unique, and we're
-    // going to eventually calculate a disjoint boolean!
+/// Return true if the list is uniquely decodable, false if not. I
+/// don't _think_ we need to check reversed words in this case.
+pub fn is_uniquely_decodable(c: &[String]) -> bool {
     let c = vec_to_hash(c);
     sardinas_patterson_theorem(c)
 }

diff --git a/src/list_manipulations.rs b/src/list_manipulations.rs
@@ -88,7 +88,7 @@ pub fn delete_before_first_char(s: &str, ch: char) -> &str {
 /// [a separate repo](https://github.com/sts10/splitter/blob/main/src/lib.rs).
 pub fn delete_after_first_char(s: &str, ch: char) -> &str {
     match memchr(ch as u8, s.as_bytes()) {
-        None => s, // not found => return the whole string
+        None => s, // delimiting character not found in string s, so return the whole string
         Some(pos) => &s[0..pos],
     }
 }
@@ -141,13 +141,43 @@ pub fn guarantee_maximum_prefix_length(
 /// Executes Schlinkert prune. Attempts to make list uniquely decodable
 /// by removing the fewest number of code words possible. Adapted from
 /// Sardinas-Patterson algorithm.
+/// Runs word list both as given and with each word reversed, preferring
+/// which ever preserves more words from the given list.
 pub fn schlinkert_prune(list: &[String]) -> Vec<String> {
-    let offenders_to_remove = get_sardinas_patterson_final_intersection(list);
+    // Clumsily clone the list into a new variable.
     let mut new_list = list.to_owned();
-    new_list.retain(|x| !offenders_to_remove.contains(x));
+    // First, simply find the "offenders" with the list as given.
+    let offenders_to_remove_forwards = get_sardinas_patterson_final_intersection(list);
+    // Now, reverse all words before running the Schlinkert prune.
+    // This will give a different list of offending words -- and potentially FEWER
+    // than running the prune forwards. (We call reverse_all_words function
+    // twice because we have to un-reverse all the offending words at the end.)
+    let offenders_to_remove_backwards = reverse_all_words(
+        &get_sardinas_patterson_final_intersection(&reverse_all_words(list)),
+    );
+    // If running the prune on the reversed words yielded fewer offenders
+    // we'll remove those offending words, since our goal is to remove
+    // the fewest number of words as possible.
+    if offenders_to_remove_forwards.len() <= offenders_to_remove_backwards.len() {
+        new_list.retain(|x| !offenders_to_remove_forwards.contains(x));
+    } else {
+        new_list.retain(|x| !offenders_to_remove_backwards.contains(x));
+    }
     new_list
 }
 
+/// Reverse all words on given list. For example,
+/// `["hotdog", "hamburger", "alligator"]` becomes
+/// `["godtoh", "regrubmah", "rotagilla"]`
+/// Uses graphemes to ensure it handles accented characters correctly.
+pub fn reverse_all_words(list: &[String]) -> Vec<String> {
+    let mut reversed_list = vec![];
+    for word in list {
+        reversed_list.push(word.graphemes(true).rev().collect::<String>());
+    }
+    reversed_list
+}
+
 use unicode_segmentation::UnicodeSegmentation;
 /// Given a word and a `usize` of `length`, this function returns
 /// the first `length` characters of that word. This length is

diff --git a/tests/list_manipulation_tests.rs b/tests/list_manipulation_tests.rs
@@ -1,5 +1,6 @@
 mod list_manipulation_tests {
     use tidy::dice::print_as_dice; // not exactly sure why I need this here...
+    use tidy::list_manipulations::reverse_all_words;
     use tidy::*;
 
     fn make_lists() -> (Vec<String>, Vec<String>, Vec<String>, Vec<String>) {
@@ -660,6 +661,18 @@ mod list_manipulation_tests {
         );
     }
 
+    #[test]
+    fn can_reverse_list() {
+        let list = vec![
+            "hotdog".to_string(),
+            "hamburger".to_string(),
+            "alligator".to_string(),
+            "😀😁😆".to_string(),
+        ];
+        let rev_list = reverse_all_words(&list);
+        assert_eq!(rev_list, ["godtoh", "regrubmah", "rotagilla", "😆😁😀"]);
+    }
+
     #[test]
     fn can_print_dice_rolls_of_base_6() {
         assert_eq!(print_as_dice(0, 6, 7776, false), "11111".to_string());

diff --git a/tests/pruning_tests.rs b/tests/pruning_tests.rs
@@ -0,0 +1,37 @@
+mod pruning_tests {
+    use tidy::display_information::uniquely_decodable::is_uniquely_decodable;
+    use tidy::*;
+
+    #[test]
+    fn can_run_schlinkert_prune_on_reversed_list_if_it_saves_more_words() {
+        let list: Vec<String> = vec![
+            "news",
+            "paper",
+            "newspaper",
+            "donkey",
+            "newsdonkey",
+            "ghost",
+            "newsghost",
+            "radish",
+            "newsradish",
+        ]
+        .iter()
+        .map(|w| w.to_string())
+        .collect();
+
+        let this_tidy_request = TidyRequest {
+            list: list,
+            should_schlinkert_prune: true,
+            ..Default::default()
+        };
+        let new_list = tidy_list(this_tidy_request);
+        // If Schlinkert prune was done in forwards, only
+        // 5 words would be saved. But if we Schlinkert
+        // prune the reversed list, we save 8 words.
+        assert!(new_list.len() == 8);
+        // And now let's confirm that the new list is indeed
+        // uniquely decodable, at least as far as Tidy is able
+        // to confirm.
+        assert!(is_uniquely_decodable(&new_list));
+    }
+}
diff --git a/tests/uniquely_decodable_tests.rs b/tests/uniquely_decodable_tests.rs
@@ -1,5 +1,5 @@
 mod uniquely_decodable_tests {
-    use tidy::display_information::uniquely_decodable::check_decodability;
+    use tidy::display_information::uniquely_decodable::is_uniquely_decodable;
 
     #[test]
     fn can_determine_a_list_with_prefix_words_is_not_uniquely_decodable() {
@@ -8,7 +8,7 @@ mod uniquely_decodable_tests {
             .map(|x| x.to_string())
             .collect();
 
-        assert!(!check_decodability(&list));
+        assert!(!is_uniquely_decodable(&list));
 
         let list2: Vec<String> = vec![
             "spill".to_string(),
@@ -17,7 +17,7 @@ mod uniquely_decodable_tests {
             "spills".to_string(),
             "unmoved".to_string(),
         ];
-        assert!(!check_decodability(&list2));
+        assert!(!is_uniquely_decodable(&list2));
     }
 
     #[test]
@@ -39,7 +39,7 @@ mod uniquely_decodable_tests {
         .iter()
         .map(|w| w.to_string())
         .collect();
-        assert!(check_decodability(&list));
+        assert!(is_uniquely_decodable(&list));
     }
 
     #[test]
@@ -48,7 +48,7 @@ mod uniquely_decodable_tests {
             .iter()
             .map(|w| w.to_string())
             .collect();
-        assert!(!check_decodability(&list));
+        assert!(!is_uniquely_decodable(&list));
     }
 
     #[test]
@@ -57,26 +57,26 @@ mod uniquely_decodable_tests {
             .iter()
             .map(|w| w.to_string())
             .collect();
-        assert!(check_decodability(&list));
+        assert!(is_uniquely_decodable(&list));
 
         let list: Vec<String> = vec!["0", "10", "010", "101"]
             .iter()
             .map(|w| w.to_string())
             .collect();
-        assert!(!check_decodability(&list));
+        assert!(!is_uniquely_decodable(&list));
 
         let list: Vec<String> = vec!["0", "01", "011", "0111"]
             .iter()
             .map(|w| w.to_string())
             .collect();
-        assert!(check_decodability(&list));
+        assert!(is_uniquely_decodable(&list));
 
         // '0, 1, 00, 11' is not an uniquely decodable code
         let list: Vec<String> = vec!["0", "1", "00", "11"]
             .iter()
             .map(|w| w.to_string())
             .collect();
-        assert!(!check_decodability(&list));
+        assert!(!is_uniquely_decodable(&list));
     }
 
     #[test]
@@ -87,6 +87,6 @@ mod uniquely_decodable_tests {
         .iter()
         .map(|w| w.to_string())
         .collect();
-        assert!(check_decodability(&list));
+        assert!(is_uniquely_decodable(&list));
     }
 }