Skip to content

Commit

Permalink
LUCENE-9116: Remove long[] from PostingsWriterBase#encodeTerm. (#1149
Browse files Browse the repository at this point in the history
…) (#1158)

All the metadata can be directly encoded in the `DataOutput`.
  • Loading branch information
jpountz committed Jan 27, 2020
1 parent d614bb8 commit ace4fcc
Show file tree
Hide file tree
Showing 39 changed files with 128 additions and 1,781 deletions.
6 changes: 6 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ API Changes
yield Passages sized a little different due to the fact that the sizing pivot is now the center of the first match and
not its left edge.

* LUCENE-9116: PostingsWriterBase and PostingsReaderBase no longer support
setting a field's metadata via a `long[]`. (Adrien Grand)

* LUCENE-9116: The FSTOrd postings format has been removed.
(Adrien Grand)

* LUCENE-8369: Remove obsolete spatial module. (Nick Knize, David Smiley)

* LUCENE-8621: Refactor LatLonShape, XYShape, and all query and utility classes to core. (Nick Knize)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ public void close() throws IOException {
}

@Override
public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
throws IOException {
final IntBlockTermState termState = (IntBlockTermState) _termState;
final boolean fieldHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
Expand All @@ -167,11 +167,11 @@ public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTer
termState.payStartFP = 0;
}

termState.docStartFP += longs[0];
termState.docStartFP += in.readVLong();
if (fieldHasPositions) {
termState.posStartFP += longs[1];
termState.posStartFP += in.readVLong();
if (fieldHasOffsets || fieldHasPayloads) {
termState.payStartFP += longs[2];
termState.payStartFP += in.readVLong();
}
}
if (termState.docFreq == 1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,20 +187,11 @@ public void init(IndexOutput termsOut, SegmentWriteState state) throws IOExcepti
}

@Override
public int setField(FieldInfo fieldInfo) {
public void setField(FieldInfo fieldInfo) {
super.setField(fieldInfo);
skipWriter.setField(writePositions, writeOffsets, writePayloads);
lastState = emptyState;
fieldHasNorms = fieldInfo.hasNorms();
if (writePositions) {
if (writePayloads || writeOffsets) {
return 3; // doc + pos + pay FP
} else {
return 2; // doc + pos FP
}
} else {
return 1; // doc FP
}
}

@Override
Expand Down Expand Up @@ -463,16 +454,16 @@ public void finishTerm(BlockTermState _state) throws IOException {
}

@Override
public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
public void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
IntBlockTermState state = (IntBlockTermState)_state;
if (absolute) {
lastState = emptyState;
}
longs[0] = state.docStartFP - lastState.docStartFP;
out.writeVLong(state.docStartFP - lastState.docStartFP);
if (writePositions) {
longs[1] = state.posStartFP - lastState.posStartFP;
out.writeVLong(state.posStartFP - lastState.posStartFP);
if (writePayloads || writeOffsets) {
longs[2] = state.payStartFP - lastState.payStartFP;
out.writeVLong(state.payStartFP - lastState.payStartFP);
}
}
if (state.singletonDocID != -1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,6 @@ public BlockTermsReader(TermsIndexReaderBase indexReader, PostingsReaderBase pos
// when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
final int docCount = in.readVInt();
final int longsSize = in.readVInt();
if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), in);
}
Expand All @@ -155,7 +154,7 @@ public BlockTermsReader(TermsIndexReaderBase indexReader, PostingsReaderBase pos
if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
}
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize));
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount));
if (previous != null) {
throw new CorruptIndexException("duplicate fields: " + fieldInfo.name, in);
}
Expand Down Expand Up @@ -223,17 +222,15 @@ private class FieldReader extends Terms implements Accountable {
final long sumTotalTermFreq;
final long sumDocFreq;
final int docCount;
final int longsSize;

FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
this.termsStartPointer = termsStartPointer;
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
}

@Override
Expand Down Expand Up @@ -326,7 +323,6 @@ calls next() (which is not "typical"), then we'll do the real seek */
private final ByteArrayDataInput freqReader = new ByteArrayDataInput();
private int metaDataUpto;

private long[] longs;
private byte[] bytes;
private ByteArrayDataInput bytesReader;

Expand All @@ -343,7 +339,6 @@ public SegmentTermsEnum() throws IOException {
termSuffixes = new byte[128];
docFreqBytes = new byte[64];
//System.out.println("BTR.enum init this=" + this + " postingsReader=" + postingsReader);
longs = new long[longsSize];
}

// TODO: we may want an alternate mode here which is
Expand Down Expand Up @@ -826,10 +821,7 @@ private void decodeMetaData() throws IOException {
//System.out.println(" totTF=" + state.totalTermFreq);
}
// metadata
for (int i = 0; i < longs.length; i++) {
longs[i] = bytesReader.readVLong();
}
postingsReader.decodeTerm(longs, bytesReader, fieldInfo, state, absolute);
postingsReader.decodeTerm(bytesReader, fieldInfo, state, absolute);
metaDataUpto++;
absolute = false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,15 @@ private static class FieldMetaData {
public final long sumTotalTermFreq;
public final long sumDocFreq;
public final int docCount;
public final int longsSize;

public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
this.termsStartPointer = termsStartPointer;
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
}
}

Expand Down Expand Up @@ -176,7 +174,6 @@ public void close() throws IOException {
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
out.writeVInt(field.longsSize);
}
writeTrailer(dirStart);
CodecUtil.writeFooter(out);
Expand Down Expand Up @@ -206,7 +203,6 @@ class TermsWriter {
long sumTotalTermFreq;
long sumDocFreq;
int docCount;
int longsSize;

private TermEntry[] pendingTerms;

Expand All @@ -226,7 +222,7 @@ class TermsWriter {
}
termsStartPointer = out.getFilePointer();
this.postingsWriter = postingsWriter;
this.longsSize = postingsWriter.setField(fieldInfo);
postingsWriter.setField(fieldInfo);
}

private final BytesRefBuilder lastPrevTerm = new BytesRefBuilder();
Expand Down Expand Up @@ -285,8 +281,7 @@ void finish() throws IOException {
termsStartPointer,
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0 ? sumTotalTermFreq : -1,
sumDocFreq,
docsSeen.cardinality(),
longsSize));
docsSeen.cardinality()));
}
}

Expand All @@ -307,7 +302,6 @@ private int sharedPrefix(BytesRef term1, BytesRef term2) {
}

private final RAMOutputStream bytesWriter = new RAMOutputStream();
private final RAMOutputStream bufferWriter = new RAMOutputStream();

private void flushBlock() throws IOException {
//System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer());
Expand Down Expand Up @@ -353,16 +347,10 @@ private void flushBlock() throws IOException {
bytesWriter.reset();

// 4th pass: write the metadata
long[] longs = new long[longsSize];
boolean absolute = true;
for(int termCount=0;termCount<pendingCount;termCount++) {
final BlockTermState state = pendingTerms[termCount].state;
postingsWriter.encodeTerm(longs, bufferWriter, fieldInfo, state, absolute);
for (int i = 0; i < longsSize; i++) {
bytesWriter.writeVLong(longs[i]);
}
bufferWriter.writeTo(bytesWriter);
bufferWriter.reset();
postingsWriter.encodeTerm(bytesWriter, fieldInfo, state, absolute);
absolute = false;
}
out.writeVInt((int) bytesWriter.getFilePointer());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ public OrdsBlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadSt
// when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
final int docCount = in.readVInt();
final int longsSize = in.readVInt();
// System.out.println(" longsSize=" + longsSize);

BytesRef minTerm = readBytesRef(in);
Expand All @@ -147,7 +146,7 @@ public OrdsBlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadSt
final long indexStartFP = indexIn.readVLong();
OrdsFieldReader previous = fields.put(fieldInfo.name,
new OrdsFieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount,
indexStartFP, longsSize, indexIn, minTerm, maxTerm));
indexStartFP, indexIn, minTerm, maxTerm));
if (previous != null) {
throw new CorruptIndexException("duplicate field: " + fieldInfo.name, in);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,12 +143,11 @@ private static class FieldMetaData {
public final long sumTotalTermFreq;
public final long sumDocFreq;
public final int docCount;
private final int longsSize;
public final BytesRef minTerm;
public final BytesRef maxTerm;

public FieldMetaData(FieldInfo fieldInfo, Output rootCode, long numTerms, long indexStartFP,
long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize,
long sumTotalTermFreq, long sumDocFreq, int docCount,
BytesRef minTerm, BytesRef maxTerm) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
Expand All @@ -159,7 +158,6 @@ public FieldMetaData(FieldInfo fieldInfo, Output rootCode, long numTerms, long i
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
this.minTerm = minTerm;
this.maxTerm = maxTerm;
}
Expand Down Expand Up @@ -428,7 +426,6 @@ private void append(Builder<Output> builder, FST<Output> subIndex, long termOrdO

class TermsWriter {
private final FieldInfo fieldInfo;
private final int longsSize;
private long numTerms;
final FixedBitSet docsSeen;
long sumTotalTermFreq;
Expand All @@ -443,8 +440,6 @@ class TermsWriter {
private final BytesRefBuilder lastTerm = new BytesRefBuilder();
private int[] prefixStarts = new int[8];

private final long[] longs;

// Pending stack of terms and blocks. As terms arrive (in sorted order)
// we append to this stack, and once the top of the stack has enough
// terms starting with a common prefix, we write a new block with
Expand Down Expand Up @@ -637,13 +632,7 @@ private PendingBlock writeBlock(int prefixLength, boolean isFloor, int floorLead
}

// Write term meta data
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
for (int pos = 0; pos < longsSize; pos++) {
assert longs[pos] >= 0;
metaWriter.writeVLong(longs[pos]);
}
bytesWriter.writeTo(metaWriter);
bytesWriter.reset();
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
absolute = false;
}
totalTermCount = end-start;
Expand Down Expand Up @@ -688,13 +677,7 @@ private PendingBlock writeBlock(int prefixLength, boolean isFloor, int floorLead
// separate anymore:

// Write term meta data
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
for (int pos = 0; pos < longsSize; pos++) {
assert longs[pos] >= 0;
metaWriter.writeVLong(longs[pos]);
}
bytesWriter.writeTo(metaWriter);
bytesWriter.reset();
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
absolute = false;

totalTermCount++;
Expand Down Expand Up @@ -767,8 +750,7 @@ private PendingBlock writeBlock(int prefixLength, boolean isFloor, int floorLead
TermsWriter(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
docsSeen = new FixedBitSet(maxDoc);
this.longsSize = postingsWriter.setField(fieldInfo);
this.longs = new long[longsSize];
postingsWriter.setField(fieldInfo);
}

/** Writes one term's worth of postings. */
Expand Down Expand Up @@ -878,7 +860,6 @@ public void finish() throws IOException {
sumTotalTermFreq,
sumDocFreq,
docsSeen.cardinality(),
longsSize,
minTerm, maxTerm));
} else {
assert docsSeen.cardinality() == 0;
Expand All @@ -888,7 +869,6 @@ public void finish() throws IOException {
private final RAMOutputStream suffixWriter = new RAMOutputStream();
private final RAMOutputStream statsWriter = new RAMOutputStream();
private final RAMOutputStream metaWriter = new RAMOutputStream();
private final RAMOutputStream bytesWriter = new RAMOutputStream();
}

private boolean closed;
Expand Down Expand Up @@ -920,7 +900,6 @@ public void close() throws IOException {
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
out.writeVInt(field.longsSize);
indexOut.writeVLong(field.indexStartFP);
writeBytesRef(out, field.minTerm);
writeBytesRef(out, field.maxTerm);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,14 @@ final class OrdsFieldReader extends Terms implements Accountable {
final Output rootCode;
final BytesRef minTerm;
final BytesRef maxTerm;
final int longsSize;
final OrdsBlockTreeTermsReader parent;

final FST<Output> index;
//private boolean DEBUG;

OrdsFieldReader(OrdsBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms,
Output rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount,
long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
long indexStartFP, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
//DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
Expand All @@ -65,7 +64,6 @@ final class OrdsFieldReader extends Terms implements Accountable {
this.docCount = docCount;
this.indexStartFP = indexStartFP;
this.rootCode = rootCode;
this.longsSize = longsSize;
this.minTerm = minTerm;
this.maxTerm = maxTerm;
// if (DEBUG) {
Expand Down
Loading

0 comments on commit ace4fcc

Please sign in to comment.