From 3d1e852cc57e1a8d7aa91afa4bbe7e06174caca8 Mon Sep 17 00:00:00 2001 From: Greg Miller Date: Fri, 26 May 2023 07:40:29 -0700 Subject: [PATCH] test basic minimization --- .../automaton/TestStringsToAutomaton.java | 44 ++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestStringsToAutomaton.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestStringsToAutomaton.java index b9c86a775eca..09b3730c88fd 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestStringsToAutomaton.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestStringsToAutomaton.java @@ -43,6 +43,7 @@ public void testBasic() throws Exception { Automaton a = build(terms, false); checkAutomaton(terms, a, false); + checkMinimized(a); } public void testBasicBinary() throws Exception { @@ -51,6 +52,35 @@ public void testBasicBinary() throws Exception { Automaton a = build(terms, true); checkAutomaton(terms, a, true); + checkMinimized(a); + } + + public void testRandomMinimized() throws Exception { + int iters = RandomizedTest.isNightly() ? 20 : 5; + for (int i = 0; i < iters; i++) { + boolean buildBinary = random().nextBoolean(); + int size = random().nextInt(2, 50); + Set terms = new HashSet<>(); + List automatonList = new ArrayList<>(size); + for (int j = 0; j < size; j++) { + if (buildBinary) { + BytesRef t = TestUtil.randomBinaryTerm(random(), 8); + terms.add(t); + automatonList.add(Automata.makeBinary(t)); + } else { + String s = TestUtil.randomRealisticUnicodeString(random(), 8); + terms.add(newBytesRef(s)); + automatonList.add(Automata.makeString(s)); + } + } + List sortedTerms = terms.stream().sorted().toList(); + + Automaton expected = + MinimizationOperations.minimize( + Operations.union(automatonList), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); + Automaton actual = build(sortedTerms, buildBinary); + assertSameAutomaton(expected, actual); + } } public void testRandomUnicodeOnly() throws Exception { @@ -119,6 +149,18 @@ private void checkAutomaton(List expected, Automaton a, boolean isBina } } + private void checkMinimized(Automaton a) { + Automaton minimized = + MinimizationOperations.minimize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); + assertSameAutomaton(minimized, a); + } + + private static void assertSameAutomaton(Automaton a, Automaton b) { + assertEquals(a.getNumStates(), b.getNumStates()); + assertEquals(a.getNumTransitions(), b.getNumTransitions()); + assertTrue(Operations.sameLanguage(a, b)); + } + private List basicTerms() { List terms = new ArrayList<>(); terms.add(newBytesRef("dog")); @@ -137,7 +179,7 @@ private Automaton build(Collection terms, boolean asBinary) throws IOE } } - private static class TermIterator implements BytesRefIterator { + private static final class TermIterator implements BytesRefIterator { private final Iterator it; TermIterator(Collection terms) {