Skip to content

Commit

Permalink
Change Postings back to using FOR in Lucene99PostingsFormat
Browse files Browse the repository at this point in the history
We are still keeping PFOR for positions only.
This is a partial revert of apache#69 which brings back ForDeltaUtil.
  • Loading branch information
slow-J committed Nov 6, 2023
1 parent 8d4f9e5 commit fc88d53
Show file tree
Hide file tree
Showing 65 changed files with 6,550 additions and 189 deletions.
28 changes: 25 additions & 3 deletions gradle/generation/forUtil.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ configure(project(":lucene:core")) {
description "Regenerate gen_ForUtil.py"
group "generation"

def genDir = file("src/java/org/apache/lucene/codecs/lucene90")
def genDir = file("src/java/org/apache/lucene/codecs/lucene99")
def genScript = file("${genDir}/gen_ForUtil.py")
def genOutput = file("${genDir}/ForUtil.java")

Expand All @@ -44,7 +44,7 @@ configure(project(":lucene:core")) {

configure(project(":lucene:backward-codecs")) {

task generateForUtilInternal() {
task generateForUtil84Internal() {
description "Regenerate gen_ForUtil.py"
group "generation"

Expand All @@ -64,6 +64,28 @@ configure(project(":lucene:backward-codecs")) {
}
}

regenerate.dependsOn wrapWithPersistentChecksums(generateForUtilInternal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"] ])
regenerate.dependsOn wrapWithPersistentChecksums(generateForUtil84Internal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"] ])

task generateForUtil90Internal() {
description "Regenerate gen_ForUtil.py"
group "generation"

def genDir = file("src/java/org/apache/lucene/backward_codecs/lucene90")
def genScript = file("${genDir}/gen_ForUtil.py")
def genOutput = file("${genDir}/ForUtil.java")

inputs.file genScript
outputs.file genOutput

doLast {
quietExec {
workingDir genDir
executable project.externalTool("python3")
args = [ '-B', genScript ]
}
}
}

regenerate.dependsOn wrapWithPersistentChecksums(generateForUtil90Internal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"] ])
}

2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ Optimizations

* GITHUB#12552: Make FSTPostingsFormat load FSTs off-heap. (Tony X)

* GITHUB#12696: Change Postings back to using FOR in Lucene99PostingsFormat. (Jakub Slowinski)

Bug Fixes
---------------------

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/ForUtil.java": "861cab516c7424e6323831c16f0f521499391a90",
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/gen_ForUtil.py": "b66e2f8012759d6d5ce0d73fabb329ae4a391aa0"
}
3 changes: 2 additions & 1 deletion lucene/backward-codecs/src/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
org.apache.lucene.backward_codecs.lucene80.Lucene80DocValuesFormat;
provides org.apache.lucene.codecs.PostingsFormat with
org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat,
org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat;
org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat,
org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat;
provides org.apache.lucene.codecs.KnnVectorsFormat with
org.apache.lucene.backward_codecs.lucene90.Lucene90HnswVectorsFormat,
org.apache.lucene.backward_codecs.lucene91.Lucene91HnswVectorsFormat,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import org.apache.lucene.store.DataInput;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90NormsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import org.apache.lucene.codecs.BlockTermState;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,23 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import static org.apache.lucene.codecs.lucene90.ForUtil.BLOCK_SIZE;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.POS_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.VERSION_START;
import static org.apache.lucene.backward_codecs.lucene90.ForUtil.BLOCK_SIZE;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.POS_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.VERSION_START;

import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Impacts;
import org.apache.lucene.index.ImpactsEnum;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,22 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import static org.apache.lucene.codecs.lucene90.ForUtil.BLOCK_SIZE;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.POS_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT;
import static org.apache.lucene.backward_codecs.lucene90.ForUtil.BLOCK_SIZE;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.POS_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT;

import java.io.IOException;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
import org.apache.lucene.codecs.PushPostingsWriterBase;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import java.util.AbstractList;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import java.util.Arrays;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import java.util.Arrays;
Expand Down Expand Up @@ -92,8 +92,7 @@ public Lucene90SkipWriter(
}
}

public void setField(
boolean fieldHasPositions, boolean fieldHasOffsets, boolean fieldHasPayloads) {
void setField(boolean fieldHasPositions, boolean fieldHasOffsets, boolean fieldHasPayloads) {
this.fieldHasPositions = fieldHasPositions;
this.fieldHasOffsets = fieldHasOffsets;
this.fieldHasPayloads = fieldHasPayloads;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import java.util.Arrays;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;
import java.io.IOException;
import org.apache.lucene.store.DataInput;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.util.Objects;
import org.apache.lucene.backward_codecs.lucene90.Lucene90FieldInfosFormat;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.backward_codecs.lucene90.Lucene90SegmentInfoFormat;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
Expand All @@ -37,7 +38,6 @@
import org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90NormsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,17 +151,17 @@
* field names. These are used to store auxiliary information about the document, such as its
* title, url, or an identifier to access a database. The set of stored fields are what is
* returned for each hit when searching. This is keyed by document number.
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term dictionary}. A
* dictionary containing all of the terms used in all of the indexed fields of all of the
* <li>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term dictionary}.
* A dictionary containing all of the terms used in all of the indexed fields of all of the
* documents. The dictionary also contains the number of documents which contain the term, and
* pointers to the term's frequency and proximity data.
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Frequency data}. For
* each term in the dictionary, the numbers of all the documents that contain that term, and
* the frequency of the term in that document, unless frequencies are omitted ({@link
* org.apache.lucene.index.IndexOptions#DOCS IndexOptions.DOCS})
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Proximity data}. For
* each term in the dictionary, the positions that the term occurs in each document. Note that
* this will not exist if all fields in all documents omit position data.
* <li>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term Frequency
* data}. For each term in the dictionary, the numbers of all the documents that contain that
* term, and the frequency of the term in that document, unless frequencies are omitted
* ({@link org.apache.lucene.index.IndexOptions#DOCS IndexOptions.DOCS})
* <li>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term Proximity
* data}. For each term in the dictionary, the positions that the term occurs in each
* document. Note that this will not exist if all fields in all documents omit position data.
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90NormsFormat Normalization factors}. For
* each field in each document, a value is stored that is multiplied into the score for hits
* on that field.
Expand Down Expand Up @@ -255,27 +255,27 @@
* <td>The stored fields for documents</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Dictionary}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term Dictionary}</td>
* <td>.tim</td>
* <td>The term dictionary, stores term info</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Index}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term Index}</td>
* <td>.tip</td>
* <td>The index into the Term Dictionary</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Frequencies}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Frequencies}</td>
* <td>.doc</td>
* <td>Contains the list of docs which contain each term along with frequency</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Positions}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Positions}</td>
* <td>.pos</td>
* <td>Stores position information about where a term occurs in the index</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Payloads}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Payloads}</td>
* <td>.pay</td>
* <td>Stores additional per-position metadata information such as character offsets and user payloads</td>
* </tr>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.util.Objects;
import org.apache.lucene.backward_codecs.lucene90.Lucene90FieldInfosFormat;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.backward_codecs.lucene90.Lucene90SegmentInfoFormat;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
Expand All @@ -37,7 +38,6 @@
import org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90NormsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,17 +151,17 @@
* field names. These are used to store auxiliary information about the document, such as its
* title, url, or an identifier to access a database. The set of stored fields are what is
* returned for each hit when searching. This is keyed by document number.
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term dictionary}. A
* dictionary containing all of the terms used in all of the indexed fields of all of the
* <li>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term dictionary}.
* A dictionary containing all of the terms used in all of the indexed fields of all of the
* documents. The dictionary also contains the number of documents which contain the term, and
* pointers to the term's frequency and proximity data.
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Frequency data}. For
* each term in the dictionary, the numbers of all the documents that contain that term, and
* the frequency of the term in that document, unless frequencies are omitted ({@link
* org.apache.lucene.index.IndexOptions#DOCS IndexOptions.DOCS})
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Proximity data}. For
* each term in the dictionary, the positions that the term occurs in each document. Note that
* this will not exist if all fields in all documents omit position data.
* <li>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term Frequency
* data}. For each term in the dictionary, the numbers of all the documents that contain that
* term, and the frequency of the term in that document, unless frequencies are omitted
* ({@link org.apache.lucene.index.IndexOptions#DOCS IndexOptions.DOCS})
* <li>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term Proximity
* data}. For each term in the dictionary, the positions that the term occurs in each
* document. Note that this will not exist if all fields in all documents omit position data.
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90NormsFormat Normalization factors}. For
* each field in each document, a value is stored that is multiplied into the score for hits
* on that field.
Expand Down Expand Up @@ -255,27 +255,27 @@
* <td>The stored fields for documents</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Dictionary}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term Dictionary}</td>
* <td>.tim</td>
* <td>The term dictionary, stores term info</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Index}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term Index}</td>
* <td>.tip</td>
* <td>The index into the Term Dictionary</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Frequencies}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Frequencies}</td>
* <td>.doc</td>
* <td>Contains the list of docs which contain each term along with frequency</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Positions}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Positions}</td>
* <td>.pos</td>
* <td>Stores position information about where a term occurs in the index</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Payloads}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Payloads}</td>
* <td>.pay</td>
* <td>Stores additional per-position metadata information such as character offsets and user payloads</td>
* </tr>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.lucene.backward_codecs.lucene94;

import java.util.Objects;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.backward_codecs.lucene90.Lucene90SegmentInfoFormat;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
Expand All @@ -36,7 +37,6 @@
import org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90NormsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat;
import org.apache.lucene.codecs.lucene94.Lucene94FieldInfosFormat;
Expand Down
Loading

0 comments on commit fc88d53

Please sign in to comment.