Skip to content

Commit

Permalink
ESQL: Compute infrastruture for LEFT JOIN (elastic#118889)
Browse files Browse the repository at this point in the history
This adds some infrastructure that we can use to run LOOKUP JOIN using
real LEFT JOIN semantics.

Right now if LOOKUP JOIN matches many rows in the `lookup` index we
merge all of the values into a multivalued field. So the number of rows
emitted from LOOKUP JOIN is the same as the number of rows that comes
into LOOKUP JOIN.

This change builds the infrastructure to emit one row per match, mostly
reusing the infrastructure from ENRICH.
  • Loading branch information
nik9000 committed Jan 15, 2025
1 parent 1ea495e commit e4cca58
Show file tree
Hide file tree
Showing 16 changed files with 883 additions and 46 deletions.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -212,10 +212,46 @@ default boolean mvSortedAscending() {
/**
* Expand multivalued fields into one row per value. Returns the same block if there aren't any multivalued
* fields to expand. The returned block needs to be closed by the caller to release the block's resources.
* TODO: pass BlockFactory
*/
Block expand();

/**
* Build a {@link Block} with a {@code null} inserted {@code before} each
* listed position.
* <p>
* Note: {@code before} must be non-decreasing.
* </p>
*/
default Block insertNulls(IntVector before) {
// TODO remove default and scatter to implementation where it can be a lot more efficient
int myCount = getPositionCount();
int beforeCount = before.getPositionCount();
try (Builder builder = elementType().newBlockBuilder(myCount + beforeCount, blockFactory())) {
int beforeP = 0;
int nextNull = before.getInt(beforeP);
for (int mainP = 0; mainP < myCount; mainP++) {
while (mainP == nextNull) {
builder.appendNull();
beforeP++;
if (beforeP >= beforeCount) {
builder.copyFrom(this, mainP, myCount);
return builder.build();
}
nextNull = before.getInt(beforeP);
}
// This line right below this is the super inefficient one.
builder.copyFrom(this, mainP, mainP + 1);
}
assert nextNull == myCount;
while (beforeP < beforeCount) {
nextNull = before.getInt(beforeP++);
assert nextNull == myCount;
builder.appendNull();
}
return builder.build();
}
}

/**
* Builds {@link Block}s. Typically, you use one of it's direct supinterfaces like {@link IntBlock.Builder}.
* This is {@link Releasable} and should be released after building the block or if building the block fails.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ $endif$
int valueCount = getValueCount(pos);
int first = getFirstValueIndex(pos);
if (valueCount == 1) {
builder.append$Type$(get$Type$(getFirstValueIndex(pos)$if(BytesRef)$, scratch$endif$));
builder.append$Type$(get$Type$(first$if(BytesRef)$, scratch$endif$));
} else {
builder.beginPositionEntry();
for (int c = 0; c < valueCount; c++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,32 @@
import java.util.Objects;

/**
* Combines values at the given blocks with the same positions into a single position for the blocks at the given channels
* Combines values at the given blocks with the same positions into a single position
* for the blocks at the given channels.
* <p>
* Example, input pages consisting of three blocks:
* positions | field-1 | field-2 |
* -----------------------------------
* </p>
* <pre>{@code
* | positions | field-1 | field-2 |
* ------------------------------------
* Page 1:
* 1 | a,b | 2020 |
* 1 | c | 2021 |
* ---------------------------------
* | 1 | a,b | 2020 |
* | 1 | c | 2021 |
* Page 2:
* 2 | a,e | 2021 |
* ---------------------------------
* | 2 | a,e | 2021 |
* Page 3:
* 4 | d | null |
* ---------------------------------
* | 4 | d | null |
* }</pre>
* Output:
* <pre>{@code
* | field-1 | field-2 |
* ---------------------------
* | null | null |
* | a,b,c | 2020,2021 |
* | a,e | 2021 |
* | null | null |
* | d | 2023 |
* }</pre>
*/
public final class MergePositionsOperator implements Operator {
private boolean finished = false;
Expand Down
Loading

0 comments on commit e4cca58

Please sign in to comment.