-
Notifications
You must be signed in to change notification settings - Fork 76
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Issue #31] (Team4) Keyword Operator #85
Changes from 29 commits
29bef31
b6affd2
5e64dce
ce33cc3
00f1b4d
4487b20
31f12ed
c103be7
f5f58f6
712bcdd
2d8afe6
18b19b6
7e3619f
17b42d6
5f1f044
ed297d7
eb74d2f
8fb8d92
db4a530
774f5da
b4ec071
7671fbf
05315cb
3b2927c
8ddf519
5ba14f6
1357ac2
9bdf5d0
43d9604
a782b1f
ea5d6c0
11bff36
c0e4ba1
365285e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,32 @@ | ||
package edu.uci.ics.textdb.common.utils; | ||
|
||
import java.io.StringReader; | ||
import java.text.ParseException; | ||
import java.util.ArrayList; | ||
import java.util.Date; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
|
||
import org.apache.lucene.analysis.Analyzer; | ||
import org.apache.lucene.analysis.TokenStream; | ||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; | ||
import org.apache.lucene.document.DateTools; | ||
import org.apache.lucene.document.DateTools.Resolution; | ||
import org.apache.lucene.document.Field.Store; | ||
import org.apache.lucene.document.IntField; | ||
import org.apache.lucene.index.IndexableField; | ||
|
||
import edu.uci.ics.textdb.api.common.Attribute; | ||
import edu.uci.ics.textdb.api.common.FieldType; | ||
import edu.uci.ics.textdb.api.common.IField; | ||
import edu.uci.ics.textdb.api.common.ITuple; | ||
import edu.uci.ics.textdb.api.common.Schema; | ||
import edu.uci.ics.textdb.common.constants.SchemaConstants; | ||
import edu.uci.ics.textdb.common.field.DataTuple; | ||
import edu.uci.ics.textdb.common.field.DateField; | ||
import edu.uci.ics.textdb.common.field.DoubleField; | ||
import edu.uci.ics.textdb.common.field.IntegerField; | ||
import edu.uci.ics.textdb.common.field.ListField; | ||
import edu.uci.ics.textdb.common.field.Span; | ||
import edu.uci.ics.textdb.common.field.StringField; | ||
import edu.uci.ics.textdb.common.field.TextField; | ||
|
||
|
@@ -72,4 +85,60 @@ public static IndexableField getLuceneField(FieldType fieldType, | |
} | ||
return luceneField; | ||
} | ||
/** | ||
* @about Modifies schema, fields and creates a new span tuple | ||
*/ | ||
public static ITuple getSpanTuple( List<IField> fieldList, List<Span> spanList, Schema spanSchema) { | ||
IField spanListField = new ListField<Span>(new ArrayList<>(spanList)); | ||
List<IField> fieldListDuplicate = new ArrayList<>(fieldList); | ||
fieldListDuplicate.add(spanListField); | ||
|
||
IField[] fieldsDuplicate = fieldListDuplicate.toArray(new IField[fieldListDuplicate.size()]); | ||
return new DataTuple(spanSchema, fieldsDuplicate); | ||
} | ||
|
||
/** | ||
* | ||
* @param schema | ||
* @about Creating a new schema object, and adding SPAN_LIST_ATTRIBUTE to | ||
* the schema. SPAN_LIST_ATTRIBUTE is of type List | ||
*/ | ||
public static Schema createSpanSchema(Schema schema) { | ||
List<Attribute> dataTupleAttributes = schema.getAttributes(); | ||
//spanAttributes contains all attributes of dataTupleAttributes and an additional SPAN_LIST_ATTRIBUTE | ||
Attribute[] spanAttributes = new Attribute[dataTupleAttributes.size() + 1]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we "+1"? Explain in comments? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. will do. |
||
for (int count = 0; count < dataTupleAttributes.size(); count++) { | ||
spanAttributes[count] = dataTupleAttributes.get(count); | ||
} | ||
spanAttributes[spanAttributes.length - 1] = SchemaConstants.SPAN_LIST_ATTRIBUTE; | ||
Schema spanSchema = new Schema(spanAttributes); | ||
return spanSchema; | ||
} | ||
|
||
/** | ||
* Tokenizes the query string using the given analyser | ||
* @param analyzer | ||
* @param query | ||
* @return ArrayList<String> list of results | ||
*/ | ||
public static ArrayList<String> tokenizeQuery(Analyzer analyzer, String query) { | ||
HashSet<String> resultSet = new HashSet<>(); | ||
ArrayList<String> result = new ArrayList<String>(); | ||
TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(query)); | ||
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); | ||
|
||
try{ | ||
tokenStream.reset(); | ||
while (tokenStream.incrementToken()) { | ||
String term = charTermAttribute.toString(); | ||
resultSet.add(term); | ||
} | ||
tokenStream.close(); | ||
} catch (Exception e) { | ||
e.printStackTrace(); | ||
} | ||
result.addAll(resultSet); | ||
|
||
return result; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
package edu.uci.ics.textdb.dataflow.common; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
import edu.uci.ics.textdb.api.common.Attribute; | ||
import edu.uci.ics.textdb.common.utils.Utils; | ||
import org.apache.lucene.analysis.Analyzer; | ||
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; | ||
import org.apache.lucene.queryparser.classic.ParseException; | ||
import org.apache.lucene.search.BooleanClause; | ||
import org.apache.lucene.search.BooleanQuery; | ||
import org.apache.lucene.search.Query; | ||
import edu.uci.ics.textdb.api.common.IPredicate; | ||
import edu.uci.ics.textdb.api.common.ITuple; | ||
import edu.uci.ics.textdb.common.exception.DataFlowException; | ||
|
||
/** | ||
* @author prakul | ||
* | ||
*/ | ||
|
||
/** | ||
* This class handles creation of predicate for querying using Keyword Matcher | ||
*/ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add high-level comments to explain the purpose of this class. |
||
public class KeywordPredicate implements IPredicate{ | ||
|
||
private final List<Attribute> attributeList; | ||
private final String[] fields; | ||
private final String query; | ||
private final Query queryObject; | ||
private ArrayList<String> tokens; | ||
private Analyzer analyzer; | ||
|
||
public KeywordPredicate(String query, List<Attribute> attributeList, Analyzer analyzer ) throws DataFlowException{ | ||
try { | ||
this.query = query; | ||
this.attributeList = attributeList; | ||
String[] temp = new String[attributeList.size()]; | ||
|
||
for(int i=0; i < attributeList.size(); i++){ | ||
temp[i] = attributeList.get(i).getFieldName(); | ||
} | ||
this.fields = temp; | ||
this.tokens = Utils.tokenizeQuery(analyzer, this.query); | ||
this.analyzer = analyzer; | ||
this.queryObject = createQueryObject(); | ||
} catch (Exception e) { | ||
e.printStackTrace(); | ||
throw new DataFlowException(e.getMessage(), e); | ||
} | ||
} | ||
|
||
@Override | ||
public boolean satisfy(ITuple tuple) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a "TODO" here? |
||
|
||
//This method is necessary for the interface implementation, and it's really not used. | ||
return true; | ||
} | ||
|
||
/** | ||
* Creates a Query object as a boolean Query on all attributes. | ||
* Example: For creating a query like | ||
* (TestConstants.DESCRIPTION + ":lin" + " AND " + TestConstants.LAST_NAME + ":lin") | ||
* we provide a list of AttributeFields (Description, Last_name) to search on and a query string (lin) | ||
* | ||
* TODO #88:BooleanQuery() is deprecated. In future a better solution could be worked out in Query builder layer | ||
|
||
* @return QueryObject | ||
* @throws ParseException | ||
*/ | ||
private Query createQueryObject() throws ParseException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use an example to explain the purpose of this function. |
||
BooleanQuery booleanQuery = new BooleanQuery(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My Intellij IDE shows that "BooleanQuery" is deprecated. |
||
MultiFieldQueryParser parser = new MultiFieldQueryParser(this.fields, this.analyzer); | ||
for(String searchToken: this.tokens){ | ||
Query termQuery = parser.parse(searchToken); | ||
booleanQuery.add(termQuery, BooleanClause.Occur.MUST); | ||
} | ||
return booleanQuery; | ||
} | ||
|
||
public String getQuery(){ | ||
return query; | ||
} | ||
|
||
public List<Attribute> getAttributeList() { | ||
return attributeList; | ||
} | ||
public Query getQueryObject(){return this.queryObject;} | ||
|
||
public ArrayList<String> getTokens(){return this.tokens;} | ||
|
||
public Analyzer getAnalyzer(){ | ||
return analyzer; | ||
} | ||
|
||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use an example to explain this function.
Does it really "modifies schema"?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@sandeepreddy602 Please take a look
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It doesn't modifiy the schema.. Will change the comments.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@sandeepreddy602 and @prakul : was the comment modified accordingly?