Skip to content

Commit

Permalink
Make knn graph conn writing more consistent (#14174)
Browse files Browse the repository at this point in the history
* Make graph writing more consistent

* correct concurrent connected components logic
  • Loading branch information
benwtrent committed Jan 28, 2025
1 parent ffd7b67 commit 6ac6b33
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -244,13 +244,14 @@ private HnswGraph reconstructAndWriteGraph(
nodesByLevel.add(null);

int maxOrd = graph.size();
int[] scratch = new int[graph.maxConn() * 2];
NodesIterator nodesOnLevel0 = graph.getNodesOnLevel(0);
levelNodeOffsets[0] = new int[nodesOnLevel0.size()];
while (nodesOnLevel0.hasNext()) {
int node = nodesOnLevel0.nextInt();
NeighborArray neighbors = graph.getNeighbors(0, newToOldMap[node]);
long offset = vectorIndex.getFilePointer();
reconstructAndWriteNeighbours(neighbors, oldToNewMap, maxOrd);
reconstructAndWriteNeighbours(neighbors, oldToNewMap, scratch, maxOrd);
levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offset);
}

Expand All @@ -267,7 +268,7 @@ private HnswGraph reconstructAndWriteGraph(
for (int node : newNodes) {
NeighborArray neighbors = graph.getNeighbors(level, newToOldMap[node]);
long offset = vectorIndex.getFilePointer();
reconstructAndWriteNeighbours(neighbors, oldToNewMap, maxOrd);
reconstructAndWriteNeighbours(neighbors, oldToNewMap, scratch, maxOrd);
levelNodeOffsets[level][nodeOffsetIndex++] =
Math.toIntExact(vectorIndex.getFilePointer() - offset);
}
Expand Down Expand Up @@ -309,25 +310,33 @@ public NodesIterator getNodesOnLevel(int level) {
};
}

private void reconstructAndWriteNeighbours(NeighborArray neighbors, int[] oldToNewMap, int maxOrd)
throws IOException {
private void reconstructAndWriteNeighbours(
NeighborArray neighbors, int[] oldToNewMap, int[] scratch, int maxOrd) throws IOException {
int size = neighbors.size();
vectorIndex.writeVInt(size);

// Destructively modify; it's ok we are discarding it after this
int[] nnodes = neighbors.nodes();
for (int i = 0; i < size; i++) {
nnodes[i] = oldToNewMap[nnodes[i]];
}
Arrays.sort(nnodes, 0, size);
int actualSize = 0;
if (size > 0) {
scratch[0] = nnodes[0];
actualSize = 1;
}
// Now that we have sorted, do delta encoding to minimize the required bits to store the
// information
for (int i = size - 1; i > 0; --i) {
for (int i = 1; i < size; i++) {
assert nnodes[i] < maxOrd : "node too large: " + nnodes[i] + ">=" + maxOrd;
nnodes[i] -= nnodes[i - 1];
if (nnodes[i - 1] == nnodes[i]) {
continue;
}
scratch[actualSize++] = nnodes[i] - nnodes[i - 1];
}
for (int i = 0; i < size; i++) {
vectorIndex.writeVInt(nnodes[i]);
// Write the size after duplicates are removed
vectorIndex.writeVInt(actualSize);
for (int i = 0; i < actualSize; i++) {
vectorIndex.writeVInt(scratch[i]);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,7 @@ public OnHeapHnswGraph build(int maxOrd) throws IOException {
});
}
taskExecutor.invokeAll(futures);
finish();
frozen = true;
return workers[0].getCompletedGraph();
return getCompletedGraph();
}

@Override
Expand Down

0 comments on commit 6ac6b33

Please sign in to comment.