From 7cff986540d9249f983551430af1692f7d85c556 Mon Sep 17 00:00:00 2001 From: Chris Fournier Date: Thu, 27 Apr 2023 12:57:24 -0400 Subject: [PATCH 1/8] Add test for LUCENE-10181 --- .../TestGraphTokenStreamFiniteStrings.java | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java b/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java index e68b892d01ce..88a14696cac8 100644 --- a/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java +++ b/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.util.graph; +import java.util.ArrayList; import java.util.Iterator; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -660,4 +661,27 @@ public void testMultipleSidePathsWithGaps() throws Exception { it.next(), new String[] {"king", "alfred", "saxons", "ruled"}, new int[] {1, 1, 3, 1}); assertFalse(it.hasNext()); } + + public void testLongTokenStreamStackOverflowError() throws Exception { + ArrayList tokens = + new ArrayList() { + { + add(token("turbo", 1, 1)); + add(token("fast", 0, 2)); + add(token("charged", 1, 1)); + add(token("wi", 1, 1)); + add(token("wifi", 0, 2)); + add(token("fi", 1, 1)); + } + }; + + // Add in too many tokens to get a high depth graph + for (int i = 0; i < 1024 * 10; i++) { + tokens.add(token("network", 1, 1)); + } + + TokenStream ts = new CannedTokenStream(tokens.toArray(new Token[0])); + GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(ts); + graph.articulationPoints(); // This will cause a java.lang.StackOverflowError + } } From fb01e86dede939ffa3dc97b5efc16d1ff460bf23 Mon Sep 17 00:00:00 2001 From: Chris Fournier Date: Thu, 27 Apr 2023 12:57:33 -0400 Subject: [PATCH 2/8] Add fix for LUCENE-10181 --- .../lucene/util/graph/GraphTokenStreamFiniteStrings.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java b/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java index 6711dfb6230f..3ea862c53013 100644 --- a/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java +++ b/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java @@ -46,6 +46,7 @@ */ public final class GraphTokenStreamFiniteStrings { + private static final int MAX_RECURSION_DEPTH = 1024; private AttributeSource[] tokens = new AttributeSource[4]; private final Automaton det; private final Transition transition = new Transition(); @@ -269,7 +270,7 @@ private static void articulationPointsRecurse( int numT = a.initTransition(state, t); for (int i = 0; i < numT; i++) { a.getNextTransition(t); - if (visited.get(t.dest) == false) { + if (visited.get(t.dest) == false && d < MAX_RECURSION_DEPTH) { parent[t.dest] = state; articulationPointsRecurse(a, t.dest, d + 1, depth, low, parent, visited, points); childCount++; From 8375cd858cda2ade85b3a9338f7cd2f84ba6c8ea Mon Sep 17 00:00:00 2001 From: Chris Fournier Date: Thu, 27 Apr 2023 13:08:57 -0400 Subject: [PATCH 3/8] Add changes --- lucene/CHANGES.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 332e8eb4f608..623e255e2fff 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -86,6 +86,8 @@ Bug Fixes * GITHUB#12220: Hunspell: disallow hidden title-case entries from compound middle/end +* LUCENE-10181: Restrict GraphTokenStreamFiniteStrings#articulationPointsRecurse recursion depth. (Chris Fournier) + Other --------------------- From 975cd44471f2e4ac3717bcc2031f6b6a1507025f Mon Sep 17 00:00:00 2001 From: Chris Fournier Date: Thu, 27 Apr 2023 16:25:02 -0400 Subject: [PATCH 4/8] Adjust implementation to still return articulation points --- .../graph/GraphTokenStreamFiniteStrings.java | 10 ++++-- .../TestGraphTokenStreamFiniteStrings.java | 36 ++++++++++++++++--- 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java b/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java index 3ea862c53013..048cf0ab00c9 100644 --- a/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java +++ b/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java @@ -18,6 +18,7 @@ package org.apache.lucene.util.graph; import static org.apache.lucene.util.automaton.Operations.DEFAULT_DETERMINIZE_WORK_LIMIT; +import static org.apache.lucene.util.automaton.Operations.MAX_RECURSION_LEVEL; import java.io.IOException; import java.util.ArrayList; @@ -46,7 +47,6 @@ */ public final class GraphTokenStreamFiniteStrings { - private static final int MAX_RECURSION_DEPTH = 1024; private AttributeSource[] tokens = new AttributeSource[4]; private final Automaton det; private final Transition transition = new Transition(); @@ -270,9 +270,13 @@ private static void articulationPointsRecurse( int numT = a.initTransition(state, t); for (int i = 0; i < numT; i++) { a.getNextTransition(t); - if (visited.get(t.dest) == false && d < MAX_RECURSION_DEPTH) { + if (visited.get(t.dest) == false) { parent[t.dest] = state; - articulationPointsRecurse(a, t.dest, d + 1, depth, low, parent, visited, points); + if (d < MAX_RECURSION_LEVEL) { + articulationPointsRecurse(a, t.dest, d + 1, depth, low, parent, visited, points); + } else { + continue; + } childCount++; if (low[t.dest] >= depth[state]) { isArticulation = true; diff --git a/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java b/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java index 88a14696cac8..56fadc901445 100644 --- a/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java +++ b/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java @@ -16,6 +16,8 @@ */ package org.apache.lucene.util.graph; +import static org.apache.lucene.util.automaton.Operations.MAX_RECURSION_LEVEL; + import java.util.ArrayList; import java.util.Iterator; import org.apache.lucene.analysis.TokenStream; @@ -663,12 +665,11 @@ public void testMultipleSidePathsWithGaps() throws Exception { } public void testLongTokenStreamStackOverflowError() throws Exception { + ArrayList tokens = new ArrayList() { { - add(token("turbo", 1, 1)); - add(token("fast", 0, 2)); - add(token("charged", 1, 1)); + add(token("fast", 1, 1)); add(token("wi", 1, 1)); add(token("wifi", 0, 2)); add(token("fi", 1, 1)); @@ -682,6 +683,33 @@ public void testLongTokenStreamStackOverflowError() throws Exception { TokenStream ts = new CannedTokenStream(tokens.toArray(new Token[0])); GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(ts); - graph.articulationPoints(); // This will cause a java.lang.StackOverflowError + + Iterator it = graph.getFiniteStrings(); + assertTrue(it.hasNext()); + it.next(); + assertTrue(it.hasNext()); + it.next(); + assertFalse(it.hasNext()); + + int[] points = graph.articulationPoints(); // This will cause a java.lang.StackOverflowError + assertEquals(points[0], 1); + assertEquals(points[1], 3); + assertEquals(points.length, MAX_RECURSION_LEVEL - 2); + + assertFalse(graph.hasSidePath(0)); + it = graph.getFiniteStrings(0, 1); + assertTrue(it.hasNext()); + assertTokenStream(it.next(), new String[] {"fast"}, new int[] {1}); + assertFalse(it.hasNext()); + Term[] terms = graph.getTerms("field", 0); + assertArrayEquals(terms, new Term[] {new Term("field", "fast")}); + + assertTrue(graph.hasSidePath(1)); + it = graph.getFiniteStrings(1, 3); + assertTrue(it.hasNext()); + assertTokenStream(it.next(), new String[] {"wi", "fi"}, new int[] {1, 1}); + assertTrue(it.hasNext()); + assertTokenStream(it.next(), new String[] {"wifi"}, new int[] {1}); + assertFalse(it.hasNext()); } } From bb96830b490bda51fec69fd3bb5adf51c366bce7 Mon Sep 17 00:00:00 2001 From: Chris Fournier Date: Tue, 2 May 2023 09:09:22 -0400 Subject: [PATCH 5/8] Throw IllegalArgumentException --- .../graph/GraphTokenStreamFiniteStrings.java | 2 +- .../TestGraphTokenStreamFiniteStrings.java | 28 +------------------ 2 files changed, 2 insertions(+), 28 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java b/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java index 048cf0ab00c9..4a00119cc30b 100644 --- a/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java +++ b/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java @@ -275,7 +275,7 @@ private static void articulationPointsRecurse( if (d < MAX_RECURSION_LEVEL) { articulationPointsRecurse(a, t.dest, d + 1, depth, low, parent, visited, points); } else { - continue; + throw new IllegalArgumentException("Exceeded maximum recursion level during graph analysis"); } childCount++; if (low[t.dest] >= depth[state]) { diff --git a/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java b/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java index 56fadc901445..2cbf41ea605e 100644 --- a/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java +++ b/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java @@ -684,32 +684,6 @@ public void testLongTokenStreamStackOverflowError() throws Exception { TokenStream ts = new CannedTokenStream(tokens.toArray(new Token[0])); GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(ts); - Iterator it = graph.getFiniteStrings(); - assertTrue(it.hasNext()); - it.next(); - assertTrue(it.hasNext()); - it.next(); - assertFalse(it.hasNext()); - - int[] points = graph.articulationPoints(); // This will cause a java.lang.StackOverflowError - assertEquals(points[0], 1); - assertEquals(points[1], 3); - assertEquals(points.length, MAX_RECURSION_LEVEL - 2); - - assertFalse(graph.hasSidePath(0)); - it = graph.getFiniteStrings(0, 1); - assertTrue(it.hasNext()); - assertTokenStream(it.next(), new String[] {"fast"}, new int[] {1}); - assertFalse(it.hasNext()); - Term[] terms = graph.getTerms("field", 0); - assertArrayEquals(terms, new Term[] {new Term("field", "fast")}); - - assertTrue(graph.hasSidePath(1)); - it = graph.getFiniteStrings(1, 3); - assertTrue(it.hasNext()); - assertTokenStream(it.next(), new String[] {"wi", "fi"}, new int[] {1, 1}); - assertTrue(it.hasNext()); - assertTokenStream(it.next(), new String[] {"wifi"}, new int[] {1}); - assertFalse(it.hasNext()); + assertThrows(IllegalArgumentException.class, () -> graph.articulationPoints()); } } From b616ff52eb9082261294057dea43cccd1a1818bc Mon Sep 17 00:00:00 2001 From: Chris Fournier Date: Mon, 12 Jun 2023 11:01:00 -0400 Subject: [PATCH 6/8] Address reviewer comments --- lucene/CHANGES.txt | 4 ++-- .../lucene/util/graph/GraphTokenStreamFiniteStrings.java | 3 ++- .../util/graph/TestGraphTokenStreamFiniteStrings.java | 6 ++---- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 623e255e2fff..48626a8349c9 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -86,8 +86,6 @@ Bug Fixes * GITHUB#12220: Hunspell: disallow hidden title-case entries from compound middle/end -* LUCENE-10181: Restrict GraphTokenStreamFiniteStrings#articulationPointsRecurse recursion depth. (Chris Fournier) - Other --------------------- @@ -192,6 +190,8 @@ Bug Fixes * GITHUB#12352: [Tessellator] Improve the checks that validate the diagonal between two polygon nodes so the resulting polygons are valid counter clockwise polygons. (Ignacio Vera) +* LUCENE-10181: Restrict GraphTokenStreamFiniteStrings#articulationPointsRecurse recursion depth. (Chris Fournier) + Other --------------------- (No changes) diff --git a/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java b/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java index 4a00119cc30b..11bf116a796e 100644 --- a/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java +++ b/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java @@ -275,7 +275,8 @@ private static void articulationPointsRecurse( if (d < MAX_RECURSION_LEVEL) { articulationPointsRecurse(a, t.dest, d + 1, depth, low, parent, visited, points); } else { - throw new IllegalArgumentException("Exceeded maximum recursion level during graph analysis"); + throw new IllegalArgumentException( + "Exceeded maximum recursion level during graph analysis"); } childCount++; if (low[t.dest] >= depth[state]) { diff --git a/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java b/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java index 2cbf41ea605e..4df3a0eb029a 100644 --- a/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java +++ b/lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java @@ -16,8 +16,6 @@ */ package org.apache.lucene.util.graph; -import static org.apache.lucene.util.automaton.Operations.MAX_RECURSION_LEVEL; - import java.util.ArrayList; import java.util.Iterator; import org.apache.lucene.analysis.TokenStream; @@ -677,13 +675,13 @@ public void testLongTokenStreamStackOverflowError() throws Exception { }; // Add in too many tokens to get a high depth graph - for (int i = 0; i < 1024 * 10; i++) { + for (int i = 0; i < 1024 + 1; i++) { tokens.add(token("network", 1, 1)); } TokenStream ts = new CannedTokenStream(tokens.toArray(new Token[0])); GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(ts); - assertThrows(IllegalArgumentException.class, () -> graph.articulationPoints()); + assertThrows(IllegalArgumentException.class, graph::articulationPoints); } } From 940f7e14880964c59b6c5d5200113c4663650013 Mon Sep 17 00:00:00 2001 From: Chris Fournier Date: Mon, 12 Jun 2023 11:15:30 -0400 Subject: [PATCH 7/8] Add constant removed from org.apache.lucene.util.automaton.Operations --- .../lucene/util/graph/GraphTokenStreamFiniteStrings.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java b/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java index 11bf116a796e..db01d3afd6e8 100644 --- a/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java +++ b/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java @@ -18,7 +18,6 @@ package org.apache.lucene.util.graph; import static org.apache.lucene.util.automaton.Operations.DEFAULT_DETERMINIZE_WORK_LIMIT; -import static org.apache.lucene.util.automaton.Operations.MAX_RECURSION_LEVEL; import java.io.IOException; import java.util.ArrayList; @@ -46,6 +45,8 @@ * different paths of the {@link Automaton}. */ public final class GraphTokenStreamFiniteStrings { + /** Maximum level of recursion allowed in recursive operations. */ + public static final int MAX_RECURSION_LEVEL = 1000; private AttributeSource[] tokens = new AttributeSource[4]; private final Automaton det; From be470dde14bad69c62688ba4af5e91b227e533a4 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 12 Jun 2023 17:58:08 +0200 Subject: [PATCH 8/8] Update GraphTokenStreamFiniteStrings.java --- .../apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java b/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java index db01d3afd6e8..321c6ff133a0 100644 --- a/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java +++ b/lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java @@ -46,7 +46,7 @@ */ public final class GraphTokenStreamFiniteStrings { /** Maximum level of recursion allowed in recursive operations. */ - public static final int MAX_RECURSION_LEVEL = 1000; + private static final int MAX_RECURSION_LEVEL = 1000; private AttributeSource[] tokens = new AttributeSource[4]; private final Automaton det;