Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Issue #31] (Team 4) Enabling positional indexing in Lucene for TEXT type #103

Merged
merged 11 commits into from
May 18, 2016
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexOptions;

import edu.uci.ics.textdb.api.common.Attribute;
import edu.uci.ics.textdb.api.common.FieldType;
Expand Down Expand Up @@ -49,18 +50,18 @@ public static IField getField(FieldType fieldType, String fieldValue) throws Par
case TEXT:
field = new TextField(fieldValue);
break;

default:
break;
}
return field;
}

public static IndexableField getLuceneField(FieldType fieldType,
String fieldName, Object fieldValue) {
String fieldName, Object fieldValue) {
IndexableField luceneField = null;
switch(fieldType){
case STRING:
case STRING:
luceneField = new org.apache.lucene.document.StringField(
fieldName, (String) fieldValue, Store.YES);
break;
Expand All @@ -78,10 +79,20 @@ public static IndexableField getLuceneField(FieldType fieldType,
luceneField = new org.apache.lucene.document.StringField(fieldName, dateString, Store.YES);
break;
case TEXT:
luceneField = new org.apache.lucene.document.TextField(
fieldName, (String) fieldValue, Store.YES);
break;

org.apache.lucene.document.FieldType luceneFieldType = new org.apache.lucene.document.FieldType();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add comments to the codebase: "By default we enable positional indexing in Lucene so that we can return information about character offsets and token offsets.""

luceneFieldType.setIndexOptions( IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS );
luceneFieldType.setStored(true);
luceneFieldType.setStoreTermVectors( true );
luceneFieldType.setStoreTermVectorOffsets( true );
luceneFieldType.setStoreTermVectorPayloads( true );
luceneFieldType.setStoreTermVectorPositions( true );
luceneFieldType.setTokenized( true );

luceneField = new org.apache.lucene.document.Field(
fieldName,(String) fieldValue,luceneFieldType);

break;

}
return luceneField;
}
Expand All @@ -96,10 +107,10 @@ public static ITuple getSpanTuple( List<IField> fieldList, List<Span> spanList,
IField[] fieldsDuplicate = fieldListDuplicate.toArray(new IField[fieldListDuplicate.size()]);
return new DataTuple(spanSchema, fieldsDuplicate);
}

/**
*
* @param schema
*
* @param schema
* @about Creating a new schema object, and adding SPAN_LIST_ATTRIBUTE to
* the schema. SPAN_LIST_ATTRIBUTE is of type List
*/
Expand Down