-
Notifications
You must be signed in to change notification settings - Fork 76
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Team3 regex Disjunctive Normal Form #127
Changes from all commits
0365b08
7c9fa56
8416c34
af09fda
6dc1801
bcfa0c0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
|
||
import java.util.ArrayList; | ||
import java.util.HashSet; | ||
import java.util.Iterator; | ||
import java.util.List; | ||
import java.util.Set; | ||
import java.util.StringJoiner; | ||
|
@@ -293,6 +294,152 @@ private static String toLuceneQueryString(GramBooleanQuery query) { | |
} | ||
} | ||
} | ||
|
||
public boolean isEmpty() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's better to make it a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For isEmpty(), I'd rather leave it public, because it kinda override the isEmpty function, just like how I override toString() in this class. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. then we should add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure. Although it will override anyway, it can serve as a reminder. |
||
if (this.operandSet.size() > 0) { | ||
return false; | ||
} | ||
for (GramBooleanQuery subQuery : this.subQuerySet) { | ||
if (! subQuery.isEmpty()) { | ||
return false; | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
// "AND" two DNF trees (trees are assumed to be in DNF form) | ||
// Apply distributive laws: | ||
// a AND (b OR c) = (a AND b) OR (a AND c) | ||
// (a OR b) AND (c OR d) = (a AND c) OR (a AND d) OR (b AND c) OR (c AND d) | ||
private static GramBooleanQuery andDNF(GramBooleanQuery left, GramBooleanQuery right) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function is basically: for each node in left query, AND it with each node in right query. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We had an earlier discussion about this topic. I assume this logic is based on RE2J. Do you think it's better to use the cleaner design we talked before, in which we treat all such nodes uniformly? It can simplify this logic. We can do the refactoring as a separate PR. (It's optional though.) |
||
if (left.isEmpty()) { | ||
return right; | ||
} | ||
if (right.isEmpty()) { | ||
return left; | ||
} | ||
GramBooleanQuery resultQuery = new GramBooleanQuery(QueryOp.OR); | ||
for (String leftOperand : left.operandSet) { | ||
for (String rightOperand : right.operandSet) { | ||
GramBooleanQuery tempQuery = new GramBooleanQuery(QueryOp.AND); | ||
tempQuery.operandSet.add(leftOperand); | ||
tempQuery.operandSet.add(rightOperand); | ||
resultQuery.subQuerySet.add(tempQuery); | ||
} | ||
for (GramBooleanQuery rightSubQuery : right.subQuerySet) { | ||
GramBooleanQuery tempQuery = new GramBooleanQuery(QueryOp.AND); | ||
tempQuery.operandSet.add(leftOperand); | ||
tempQuery.operandSet.addAll(rightSubQuery.operandSet); | ||
resultQuery.subQuerySet.add(tempQuery); | ||
} | ||
} | ||
for (GramBooleanQuery leftSubQuery : left.subQuerySet) { | ||
for (String rightOperand : right.operandSet) { | ||
GramBooleanQuery tempQuery = new GramBooleanQuery(QueryOp.AND); | ||
tempQuery.operandSet.addAll(leftSubQuery.operandSet); | ||
tempQuery.operandSet.add(rightOperand); | ||
resultQuery.subQuerySet.add(tempQuery); | ||
} | ||
for (GramBooleanQuery rightSubQuery : right.subQuerySet) { | ||
GramBooleanQuery tempQuery = new GramBooleanQuery(QueryOp.AND); | ||
tempQuery.operandSet.addAll(leftSubQuery.operandSet); | ||
tempQuery.operandSet.addAll(rightSubQuery.operandSet); | ||
resultQuery.subQuerySet.add(tempQuery); | ||
} | ||
} | ||
return resultQuery; | ||
} | ||
|
||
|
||
/** | ||
* Simplify a tree, which is assumed to be already in DNF form | ||
* Apply Absorption laws: a OR (a AND b) = a | ||
* | ||
* Simplification is extremely important, because it removes lots of redundant information, | ||
* thus enabling comparison of two trees, | ||
* | ||
* @param DNFQuery | ||
* @return simplifiedDNFQuery | ||
*/ | ||
public static GramBooleanQuery simplifyDNF(GramBooleanQuery query) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function checks all sub-queries and see if this queries is replaceable by another one. The final result will only contain necessary queries. |
||
GramBooleanQuery result = new GramBooleanQuery(QueryOp.OR); | ||
result.operandSet.addAll(query.operandSet); | ||
|
||
Iterator<GramBooleanQuery> outerIterator = query.subQuerySet.iterator(); | ||
OuterLoop: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why use |
||
while (outerIterator.hasNext()) { | ||
GramBooleanQuery outerAndQuery = outerIterator.next(); | ||
for (String operand : query.operandSet) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can use some Java8 lambda function to simply the code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Of course. I really like to use these Java 8 features, they make me feel Java code could be simpler for the first time :) So an issue in this class is we are putting leaf-nodes in a set of Strings, non-leaf nodes in another set. So I have to go through two sets every time. Professor Li also pointed that out a while ago and suggested we should put them in a same set. I'm considering refactoring this to make the code simple and elegant. Probably I'll do it when I'm bored on my flight to China :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 💯 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Coding on an airplane can benefit from the power of "git" :-) |
||
if (outerAndQuery.operandSet.contains(operand)) { | ||
continue OuterLoop; | ||
} | ||
} | ||
Iterator<GramBooleanQuery> innerIterator = query.subQuerySet.iterator(); | ||
while (innerIterator.hasNext()) { | ||
GramBooleanQuery innerAndQuery = innerIterator.next(); | ||
if (outerAndQuery != innerAndQuery) { | ||
if (outerAndQuery.operandSet.containsAll(innerAndQuery.operandSet)) { | ||
outerIterator.remove(); | ||
continue OuterLoop; | ||
} | ||
} | ||
} | ||
// if reach this code, then add a copy of it to result | ||
GramBooleanQuery tempQuery = new GramBooleanQuery(QueryOp.AND); | ||
tempQuery.operandSet.addAll(outerAndQuery.operandSet); | ||
result.subQuerySet.add(tempQuery); | ||
} | ||
|
||
return query; | ||
} | ||
|
||
|
||
/** | ||
* The query tree generated by the translator is messy with possibly lots of redundant information. | ||
* This function transforms it into Disjunctive normal form (DNF), which is an OR of different ANDs. | ||
* | ||
* To transform a tree to DNF form, the following laws are applied recursively from bottom to top: | ||
* Associative laws: (a OR b) OR c = a OR (b OR c) = a OR b OR c, when transforming OR nodes, | ||
* Distributive laws: a AND (b OR c) = (a AND b) OR (a AND c), when transforming AND nodes, | ||
* | ||
* For each node, its children will be transformed to DNF form first, then | ||
* if it's OR, apply associative laws, if it's AND, apply distributive laws. | ||
* Then recursively apply the same rules all the way up to the top node. | ||
* | ||
* The result is NOT simplified. Must call simplifyDNF() to obtain the optimal tree. | ||
* | ||
* @param query | ||
* @return DNFQuery | ||
*/ | ||
public static GramBooleanQuery toDNF(GramBooleanQuery query) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does this function has any requirements about the argument There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think so, the purpose of it is to transform any boolean expression to DNF. We can change it to package only, since no one else is using it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add an example to the comments. |
||
if (query.operator == QueryOp.AND) { | ||
GramBooleanQuery firstOrNode = new GramBooleanQuery(QueryOp.OR); | ||
if (query.operandSet.size() != 0) { | ||
GramBooleanQuery firstAndNode = new GramBooleanQuery(QueryOp.AND); | ||
firstAndNode.operandSet.addAll(query.operandSet); | ||
firstOrNode.subQuerySet.add(firstAndNode); | ||
} | ||
|
||
ArrayList<GramBooleanQuery> subDNFList = new ArrayList<>(); | ||
for (GramBooleanQuery subQuery : query.subQuerySet) { | ||
subDNFList.add(toDNF(subQuery)); | ||
} | ||
|
||
GramBooleanQuery result = subDNFList.stream().reduce(firstOrNode, (left, right) -> andDNF(left, right)); | ||
return result; | ||
} else if (query.operator == QueryOp.OR) { | ||
GramBooleanQuery result = new GramBooleanQuery(QueryOp.OR); | ||
result.operandSet.addAll(query.operandSet); | ||
for (GramBooleanQuery subQuery : query.subQuerySet) { | ||
GramBooleanQuery newSubQuery = toDNF(subQuery); | ||
result.subQuerySet.addAll(newSubQuery.subQuerySet); | ||
result.operandSet.addAll(newSubQuery.operandSet); | ||
} | ||
return result; | ||
} | ||
|
||
// ANY or NONE, no need to simplify | ||
return query; | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A general comment: in the description of this PR, can you add an example to illustrate what this PR does? It can help the review a lot.