From 55e3266ae934538d18d17452583b8cd1da4b450f Mon Sep 17 00:00:00 2001 From: Mengdi Lin Date: Tue, 14 May 2024 15:49:52 -0700 Subject: [PATCH] interrupt for NNDescent (#3432) Summary: Addresses the issue in https://github.com/facebookresearch/faiss/issues/3173 for `IndexNNDescent`, I see that there is already interrupt implemented for it's [search](https://fburl.com/code/iwn3tqic) API, so I looked into it's `add` API. For a given dataset nb = 10 mil, iter = 10, K = 32, d = 32 on a CPU only machine reveals that bulk of the cost comes from [nndescent](https://fburl.com/code/5rdb1p5o). For every iteration of `nndescent` takes around ~12 seconds, ~70-80% of the time is spent on `join` method (~10 seconds per iteration) and ~20-30% spent on `update` (~2 second per iteration). Adding the interrupt on the `join` should suffice on quickly terminating the program when users hit ctrl+C (happy to move the interrupt elsewhere if we think otherwise) Reviewed By: junjieqi, mdouze Differential Revision: D57300514 --- faiss/impl/NNDescent.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/faiss/impl/NNDescent.cpp b/faiss/impl/NNDescent.cpp index b609aba390..5afcdaf5b7 100644 --- a/faiss/impl/NNDescent.cpp +++ b/faiss/impl/NNDescent.cpp @@ -154,15 +154,20 @@ NNDescent::NNDescent(const int d, const int K) : K(K), d(d) { NNDescent::~NNDescent() {} void NNDescent::join(DistanceComputer& qdis) { + idx_t check_period = InterruptCallback::get_period_hint(d * search_L); + for (idx_t i0 = 0; i0 < (idx_t)ntotal; i0 += check_period) { + idx_t i1 = std::min(i0 + check_period, (idx_t)ntotal); #pragma omp parallel for default(shared) schedule(dynamic, 100) - for (int n = 0; n < ntotal; n++) { - graph[n].join([&](int i, int j) { - if (i != j) { - float dist = qdis.symmetric_dis(i, j); - graph[i].insert(j, dist); - graph[j].insert(i, dist); - } - }); + for (idx_t n = i0; n < i1; n++) { + graph[n].join([&](int i, int j) { + if (i != j) { + float dist = qdis.symmetric_dis(i, j); + graph[i].insert(j, dist); + graph[j].insert(i, dist); + } + }); + } + InterruptCallback::check(); } }