Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LUCENE-9116: Remove long[] from PostingsWriterBase#encodeTerm. #1158

Merged
merged 2 commits into from
Jan 17, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ API Changes
yield Passages sized a little different due to the fact that the sizing pivot is now the center of the first match and
not its left edge.

* LUCENE-9116: PostingsWriterBase and PostingsReaderBase no longer support
setting a field's metadata via a `long[]`. (Adrien Grand)

* LUCENE-9116: The FSTOrd postings format has been removed.
(Adrien Grand)

New Features
---------------------
(No changes)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ public void close() throws IOException {
}

@Override
public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
throws IOException {
final IntBlockTermState termState = (IntBlockTermState) _termState;
final boolean fieldHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
Expand All @@ -167,11 +167,11 @@ public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTer
termState.payStartFP = 0;
}

termState.docStartFP += longs[0];
termState.docStartFP += in.readVLong();
if (fieldHasPositions) {
termState.posStartFP += longs[1];
termState.posStartFP += in.readVLong();
if (fieldHasOffsets || fieldHasPayloads) {
termState.payStartFP += longs[2];
termState.payStartFP += in.readVLong();
}
}
if (termState.docFreq == 1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,20 +187,11 @@ public void init(IndexOutput termsOut, SegmentWriteState state) throws IOExcepti
}

@Override
public int setField(FieldInfo fieldInfo) {
public void setField(FieldInfo fieldInfo) {
super.setField(fieldInfo);
skipWriter.setField(writePositions, writeOffsets, writePayloads);
lastState = emptyState;
fieldHasNorms = fieldInfo.hasNorms();
if (writePositions) {
if (writePayloads || writeOffsets) {
return 3; // doc + pos + pay FP
} else {
return 2; // doc + pos FP
}
} else {
return 1; // doc FP
}
}

@Override
Expand Down Expand Up @@ -463,16 +454,16 @@ public void finishTerm(BlockTermState _state) throws IOException {
}

@Override
public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
public void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
IntBlockTermState state = (IntBlockTermState)_state;
if (absolute) {
lastState = emptyState;
}
longs[0] = state.docStartFP - lastState.docStartFP;
out.writeVLong(state.docStartFP - lastState.docStartFP);
if (writePositions) {
longs[1] = state.posStartFP - lastState.posStartFP;
out.writeVLong(state.posStartFP - lastState.posStartFP);
if (writePayloads || writeOffsets) {
longs[2] = state.payStartFP - lastState.payStartFP;
out.writeVLong(state.payStartFP - lastState.payStartFP);
}
}
if (state.singletonDocID != -1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,6 @@ public BlockTermsReader(TermsIndexReaderBase indexReader, PostingsReaderBase pos
// when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
final int docCount = in.readVInt();
final int longsSize = in.readVInt();
if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), in);
}
Expand All @@ -155,7 +154,7 @@ public BlockTermsReader(TermsIndexReaderBase indexReader, PostingsReaderBase pos
if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
}
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize));
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount));
if (previous != null) {
throw new CorruptIndexException("duplicate fields: " + fieldInfo.name, in);
}
Expand Down Expand Up @@ -223,17 +222,15 @@ private class FieldReader extends Terms implements Accountable {
final long sumTotalTermFreq;
final long sumDocFreq;
final int docCount;
final int longsSize;

FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
this.termsStartPointer = termsStartPointer;
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
}

@Override
Expand Down Expand Up @@ -326,7 +323,6 @@ calls next() (which is not "typical"), then we'll do the real seek */
private final ByteArrayDataInput freqReader = new ByteArrayDataInput();
private int metaDataUpto;

private long[] longs;
private byte[] bytes;
private ByteArrayDataInput bytesReader;

Expand All @@ -343,7 +339,6 @@ public SegmentTermsEnum() throws IOException {
termSuffixes = new byte[128];
docFreqBytes = new byte[64];
//System.out.println("BTR.enum init this=" + this + " postingsReader=" + postingsReader);
longs = new long[longsSize];
}

// TODO: we may want an alternate mode here which is
Expand Down Expand Up @@ -826,10 +821,7 @@ private void decodeMetaData() throws IOException {
//System.out.println(" totTF=" + state.totalTermFreq);
}
// metadata
for (int i = 0; i < longs.length; i++) {
longs[i] = bytesReader.readVLong();
}
postingsReader.decodeTerm(longs, bytesReader, fieldInfo, state, absolute);
postingsReader.decodeTerm(bytesReader, fieldInfo, state, absolute);
metaDataUpto++;
absolute = false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,15 @@ private static class FieldMetaData {
public final long sumTotalTermFreq;
public final long sumDocFreq;
public final int docCount;
public final int longsSize;

public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
this.termsStartPointer = termsStartPointer;
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
}
}

Expand Down Expand Up @@ -176,7 +174,6 @@ public void close() throws IOException {
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
out.writeVInt(field.longsSize);
}
writeTrailer(dirStart);
CodecUtil.writeFooter(out);
Expand Down Expand Up @@ -206,7 +203,6 @@ class TermsWriter {
long sumTotalTermFreq;
long sumDocFreq;
int docCount;
int longsSize;

private TermEntry[] pendingTerms;

Expand All @@ -226,7 +222,7 @@ class TermsWriter {
}
termsStartPointer = out.getFilePointer();
this.postingsWriter = postingsWriter;
this.longsSize = postingsWriter.setField(fieldInfo);
postingsWriter.setField(fieldInfo);
}

private final BytesRefBuilder lastPrevTerm = new BytesRefBuilder();
Expand Down Expand Up @@ -285,8 +281,7 @@ void finish() throws IOException {
termsStartPointer,
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0 ? sumTotalTermFreq : -1,
sumDocFreq,
docsSeen.cardinality(),
longsSize));
docsSeen.cardinality()));
}
}

Expand All @@ -307,7 +302,6 @@ private int sharedPrefix(BytesRef term1, BytesRef term2) {
}

private final ByteBuffersDataOutput bytesWriter = ByteBuffersDataOutput.newResettableInstance();
private final ByteBuffersDataOutput bufferWriter = ByteBuffersDataOutput.newResettableInstance();

private void flushBlock() throws IOException {
//System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer());
Expand Down Expand Up @@ -353,16 +347,10 @@ private void flushBlock() throws IOException {
bytesWriter.reset();

// 4th pass: write the metadata
long[] longs = new long[longsSize];
boolean absolute = true;
for(int termCount=0;termCount<pendingCount;termCount++) {
final BlockTermState state = pendingTerms[termCount].state;
postingsWriter.encodeTerm(longs, bufferWriter, fieldInfo, state, absolute);
for (int i = 0; i < longsSize; i++) {
bytesWriter.writeVLong(longs[i]);
}
bufferWriter.copyTo(bytesWriter);
bufferWriter.reset();
postingsWriter.encodeTerm(bytesWriter, fieldInfo, state, absolute);
absolute = false;
}
out.writeVInt(Math.toIntExact(bytesWriter.size()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ public OrdsBlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadSt
// when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
final int docCount = in.readVInt();
final int longsSize = in.readVInt();
// System.out.println(" longsSize=" + longsSize);

BytesRef minTerm = readBytesRef(in);
Expand All @@ -147,7 +146,7 @@ public OrdsBlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadSt
final long indexStartFP = indexIn.readVLong();
OrdsFieldReader previous = fields.put(fieldInfo.name,
new OrdsFieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount,
indexStartFP, longsSize, indexIn, minTerm, maxTerm));
indexStartFP, indexIn, minTerm, maxTerm));
if (previous != null) {
throw new CorruptIndexException("duplicate field: " + fieldInfo.name, in);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,12 +143,11 @@ private static class FieldMetaData {
public final long sumTotalTermFreq;
public final long sumDocFreq;
public final int docCount;
private final int longsSize;
public final BytesRef minTerm;
public final BytesRef maxTerm;

public FieldMetaData(FieldInfo fieldInfo, Output rootCode, long numTerms, long indexStartFP,
long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize,
long sumTotalTermFreq, long sumDocFreq, int docCount,
BytesRef minTerm, BytesRef maxTerm) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
Expand All @@ -159,7 +158,6 @@ public FieldMetaData(FieldInfo fieldInfo, Output rootCode, long numTerms, long i
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
this.minTerm = minTerm;
this.maxTerm = maxTerm;
}
Expand Down Expand Up @@ -424,7 +422,6 @@ private void append(FSTCompiler<Output> fstCompiler, FST<Output> subIndex, long

class TermsWriter {
private final FieldInfo fieldInfo;
private final int longsSize;
private long numTerms;
final FixedBitSet docsSeen;
long sumTotalTermFreq;
Expand All @@ -439,8 +436,6 @@ class TermsWriter {
private final BytesRefBuilder lastTerm = new BytesRefBuilder();
private int[] prefixStarts = new int[8];

private final long[] longs;

// Pending stack of terms and blocks. As terms arrive (in sorted order)
// we append to this stack, and once the top of the stack has enough
// terms starting with a common prefix, we write a new block with
Expand Down Expand Up @@ -633,13 +628,7 @@ private PendingBlock writeBlock(int prefixLength, boolean isFloor, int floorLead
}

// Write term meta data
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
for (int pos = 0; pos < longsSize; pos++) {
assert longs[pos] >= 0;
metaWriter.writeVLong(longs[pos]);
}
bytesWriter.copyTo(metaWriter);
bytesWriter.reset();
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
absolute = false;
}
totalTermCount = end-start;
Expand Down Expand Up @@ -684,13 +673,7 @@ private PendingBlock writeBlock(int prefixLength, boolean isFloor, int floorLead
// separate anymore:

// Write term meta data
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
for (int pos = 0; pos < longsSize; pos++) {
assert longs[pos] >= 0;
metaWriter.writeVLong(longs[pos]);
}
bytesWriter.copyTo(metaWriter);
bytesWriter.reset();
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
absolute = false;

totalTermCount++;
Expand Down Expand Up @@ -763,8 +746,7 @@ private PendingBlock writeBlock(int prefixLength, boolean isFloor, int floorLead
TermsWriter(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
docsSeen = new FixedBitSet(maxDoc);
this.longsSize = postingsWriter.setField(fieldInfo);
this.longs = new long[longsSize];
postingsWriter.setField(fieldInfo);
}

/** Writes one term's worth of postings. */
Expand Down Expand Up @@ -874,7 +856,6 @@ public void finish() throws IOException {
sumTotalTermFreq,
sumDocFreq,
docsSeen.cardinality(),
longsSize,
minTerm, maxTerm));
} else {
assert docsSeen.cardinality() == 0;
Expand All @@ -884,7 +865,6 @@ public void finish() throws IOException {
private final ByteBuffersDataOutput suffixWriter = ByteBuffersDataOutput.newResettableInstance();
private final ByteBuffersDataOutput statsWriter = ByteBuffersDataOutput.newResettableInstance();
private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance();
private final ByteBuffersDataOutput bytesWriter = ByteBuffersDataOutput.newResettableInstance();
}

private boolean closed;
Expand Down Expand Up @@ -916,7 +896,6 @@ public void close() throws IOException {
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
out.writeVInt(field.longsSize);
indexOut.writeVLong(field.indexStartFP);
writeBytesRef(out, field.minTerm);
writeBytesRef(out, field.maxTerm);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,14 @@ final class OrdsFieldReader extends Terms implements Accountable {
final Output rootCode;
final BytesRef minTerm;
final BytesRef maxTerm;
final int longsSize;
final OrdsBlockTreeTermsReader parent;

final FST<Output> index;
//private boolean DEBUG;

OrdsFieldReader(OrdsBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms,
Output rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount,
long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
long indexStartFP, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
//DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
Expand All @@ -65,7 +64,6 @@ final class OrdsFieldReader extends Terms implements Accountable {
this.docCount = docCount;
this.indexStartFP = indexStartFP;
this.rootCode = rootCode;
this.longsSize = longsSize;
this.minTerm = minTerm;
this.maxTerm = maxTerm;
// if (DEBUG) {
Expand Down
Loading