Skip to content

Commit

Permalink
add more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
gsmiller committed May 21, 2023
1 parent 2e22e77 commit c2b042d
Showing 1 changed file with 81 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,51 @@
*/
package org.apache.lucene.util.automaton;

import com.carrotsearch.randomizedtesting.RandomizedTest;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.Util;

public class TestDaciukMihovAutomatonBuilder extends LuceneTestCase {

public void testBasic() throws Exception {
List<BytesRef> terms = basicTerms();
Collections.sort(terms);

Automaton a = build(terms, false);
checkAutomaton(terms, a, false);
}

public void testBasicBinary() throws Exception {
List<BytesRef> terms = basicTerms();
Collections.sort(terms);

Automaton a = build(terms, true);
checkAutomaton(terms, a, true);
}

public void testRandomUnicodeOnly() throws Exception {
testRandom(false);
}

public void testRandomBinary() throws Exception {
testRandom(true);
}

public void testLargeTerms() throws Exception {
byte[] b10k = new byte[10_000];
Arrays.fill(b10k, (byte) 'a');
Expand All @@ -46,6 +79,54 @@ public void testLargeTerms() throws Exception {
build(Collections.singleton(new BytesRef(b1k)), false); // no exception
}

private void testRandom(boolean allowBinary) throws Exception {
int iters = RandomizedTest.isNightly() ? 50 : 10;
for (int i = 0; i < iters; i++) {
int size = random().nextInt(500, 2_000);
Set<BytesRef> terms = new HashSet<>(size);
for (int j = 0; j < size; j++) {
if (allowBinary && random().nextInt(10) < 2) {
// Sometimes random bytes term that isn't necessarily valid unicode
terms.add(newBytesRef(TestUtil.randomBinaryTerm(random())));
} else {
terms.add(newBytesRef(TestUtil.randomRealisticUnicodeString(random())));
}
}

List<BytesRef> sorted = terms.stream().sorted().toList();
Automaton a = build(sorted, allowBinary);
checkAutomaton(sorted, a, allowBinary);
}
}

private void checkAutomaton(List<BytesRef> expected, Automaton a, boolean isBinary) {
CompiledAutomaton c = new CompiledAutomaton(a, true, false, isBinary);
ByteRunAutomaton runAutomaton = c.runAutomaton;

for (BytesRef t : expected) {
String readable = isBinary ? t.toString() : t.utf8ToString();
assertTrue(
readable + " should be found but wasn't", runAutomaton.run(t.bytes, t.offset, t.length));
}

BytesRefBuilder scratch = new BytesRefBuilder();
FiniteStringsIterator it = new FiniteStringsIterator(c.automaton);
for (IntsRef r = it.next(); r != null; r = it.next()) {
BytesRef t = Util.toBytesRef(r, scratch);
assertTrue(expected.contains(t));
}
}

private List<BytesRef> basicTerms() {
List<BytesRef> terms = new ArrayList<>();
terms.add(newBytesRef("dog"));
terms.add(newBytesRef("day"));
terms.add(newBytesRef("dad"));
terms.add(newBytesRef("cats"));
terms.add(newBytesRef("cat"));
return terms;
}

private Automaton build(Collection<BytesRef> terms, boolean asBinary) throws IOException {
if (random().nextBoolean()) {
return DaciukMihovAutomatonBuilder.build(terms, asBinary);
Expand Down

0 comments on commit c2b042d

Please sign in to comment.